Compare commits
2 Commits
debug_form
...
copilot/co
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
9e806cb3f7 | ||
|
|
f267af38bd |
18
.github/copilot-instructions.md
vendored
18
.github/copilot-instructions.md
vendored
@@ -55,26 +55,22 @@ ninja build/<mode>/test/boost/<test_name>
|
||||
ninja build/<mode>/scylla
|
||||
|
||||
# Run all tests in a file
|
||||
./test.py --mode=<mode> test/<suite>/<test_name>.py
|
||||
./test.py --mode=<mode> <test_path>
|
||||
|
||||
# Run a single test case from a file
|
||||
./test.py --mode=<mode> test/<suite>/<test_name>.py::<test_function_name>
|
||||
|
||||
# Run all tests in a directory
|
||||
./test.py --mode=<mode> test/<suite>/
|
||||
./test.py --mode=<mode> <test_path>::<test_function_name>
|
||||
|
||||
# Examples
|
||||
./test.py --mode=dev test/alternator/
|
||||
./test.py --mode=dev test/cluster/test_raft_voters.py::test_raft_limited_voters_retain_coordinator
|
||||
./test.py --mode=dev test/cqlpy/test_json.py
|
||||
./test.py --mode=dev alternator/
|
||||
./test.py --mode=dev cluster/test_raft_voters::test_raft_limited_voters_retain_coordinator
|
||||
|
||||
# Optional flags
|
||||
./test.py --mode=dev test/cluster/test_raft_no_quorum.py -v # Verbose output
|
||||
./test.py --mode=dev test/cluster/test_raft_no_quorum.py --repeat 5 # Repeat test 5 times
|
||||
./test.py --mode=dev cluster/test_raft_no_quorum -v # Verbose output
|
||||
./test.py --mode=dev cluster/test_raft_no_quorum --repeat 5 # Repeat test 5 times
|
||||
```
|
||||
|
||||
**Important:**
|
||||
- Use full path with `.py` extension (e.g., `test/cluster/test_raft_no_quorum.py`, not `cluster/test_raft_no_quorum`)
|
||||
- Use path without `.py` extension (e.g., `cluster/test_raft_no_quorum`, not `cluster/test_raft_no_quorum.py`)
|
||||
- To run a single test case, append `::<test_function_name>` to the file path
|
||||
- Add `-v` for verbose output
|
||||
- Add `--repeat <num>` to repeat a test multiple times
|
||||
|
||||
2
.github/dependabot.yml
vendored
2
.github/dependabot.yml
vendored
@@ -1,6 +1,6 @@
|
||||
version: 2
|
||||
updates:
|
||||
- package-ecosystem: "uv"
|
||||
- package-ecosystem: "pip"
|
||||
directory: "/docs"
|
||||
schedule:
|
||||
interval: "daily"
|
||||
|
||||
@@ -8,9 +8,6 @@ on:
|
||||
jobs:
|
||||
check-fixes-prefix:
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
contents: read
|
||||
issues: write
|
||||
steps:
|
||||
- name: Check PR body for "Fixes" prefix patterns
|
||||
uses: actions/github-script@v7
|
||||
|
||||
53
.github/workflows/call_backport_with_jira.yaml
vendored
53
.github/workflows/call_backport_with_jira.yaml
vendored
@@ -1,53 +0,0 @@
|
||||
name: Backport with Jira Integration
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- master
|
||||
- next-*.*
|
||||
- branch-*.*
|
||||
pull_request_target:
|
||||
types: [labeled, closed]
|
||||
branches:
|
||||
- master
|
||||
- next
|
||||
- next-*.*
|
||||
- branch-*.*
|
||||
|
||||
jobs:
|
||||
backport-on-push:
|
||||
if: github.event_name == 'push'
|
||||
uses: scylladb/github-automation/.github/workflows/backport-with-jira.yaml@main
|
||||
with:
|
||||
event_type: 'push'
|
||||
base_branch: ${{ github.ref }}
|
||||
commits: ${{ github.event.before }}..${{ github.sha }}
|
||||
secrets:
|
||||
gh_token: ${{ secrets.AUTO_BACKPORT_TOKEN }}
|
||||
jira_auth: ${{ secrets.USER_AND_KEY_FOR_JIRA_AUTOMATION }}
|
||||
|
||||
backport-on-label:
|
||||
if: github.event_name == 'pull_request_target' && github.event.action == 'labeled'
|
||||
uses: scylladb/github-automation/.github/workflows/backport-with-jira.yaml@main
|
||||
with:
|
||||
event_type: 'labeled'
|
||||
base_branch: refs/heads/${{ github.event.pull_request.base.ref }}
|
||||
pull_request_number: ${{ github.event.pull_request.number }}
|
||||
head_commit: ${{ github.event.pull_request.base.sha }}
|
||||
label_name: ${{ github.event.label.name }}
|
||||
pr_state: ${{ github.event.pull_request.state }}
|
||||
secrets:
|
||||
gh_token: ${{ secrets.AUTO_BACKPORT_TOKEN }}
|
||||
jira_auth: ${{ secrets.USER_AND_KEY_FOR_JIRA_AUTOMATION }}
|
||||
|
||||
backport-chain:
|
||||
if: github.event_name == 'pull_request_target' && github.event.action == 'closed' && github.event.pull_request.merged == true
|
||||
uses: scylladb/github-automation/.github/workflows/backport-with-jira.yaml@main
|
||||
with:
|
||||
event_type: 'chain'
|
||||
base_branch: refs/heads/${{ github.event.pull_request.base.ref }}
|
||||
pull_request_number: ${{ github.event.pull_request.number }}
|
||||
pr_body: ${{ github.event.pull_request.body }}
|
||||
secrets:
|
||||
gh_token: ${{ secrets.AUTO_BACKPORT_TOKEN }}
|
||||
jira_auth: ${{ secrets.USER_AND_KEY_FOR_JIRA_AUTOMATION }}
|
||||
35
.github/workflows/call_jira_sync.yml
vendored
35
.github/workflows/call_jira_sync.yml
vendored
@@ -1,8 +1,8 @@
|
||||
name: Sync Jira Based on PR Events
|
||||
name: Sync Jira Based on PR Events
|
||||
|
||||
on:
|
||||
pull_request_target:
|
||||
types: [opened, edited, ready_for_review, review_requested, labeled, unlabeled, closed]
|
||||
types: [opened, ready_for_review, review_requested, labeled, unlabeled, closed]
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
@@ -10,9 +10,32 @@ permissions:
|
||||
issues: write
|
||||
|
||||
jobs:
|
||||
jira-sync:
|
||||
uses: scylladb/github-automation/.github/workflows/main_pr_events_jira_sync.yml@main
|
||||
with:
|
||||
caller_action: ${{ github.event.action }}
|
||||
jira-sync-pr-opened:
|
||||
if: github.event.action == 'opened'
|
||||
uses: scylladb/github-automation/.github/workflows/main_jira_sync_pr_opened.yml@main
|
||||
secrets:
|
||||
caller_jira_auth: ${{ secrets.USER_AND_KEY_FOR_JIRA_AUTOMATION }}
|
||||
|
||||
jira-sync-in-review:
|
||||
if: github.event.action == 'ready_for_review' || github.event.action == 'review_requested'
|
||||
uses: scylladb/github-automation/.github/workflows/main_jira_sync_in_review.yml@main
|
||||
secrets:
|
||||
caller_jira_auth: ${{ secrets.USER_AND_KEY_FOR_JIRA_AUTOMATION }}
|
||||
|
||||
jira-sync-add-label:
|
||||
if: github.event.action == 'labeled'
|
||||
uses: scylladb/github-automation/.github/workflows/main_jira_sync_add_label.yml@main
|
||||
secrets:
|
||||
caller_jira_auth: ${{ secrets.USER_AND_KEY_FOR_JIRA_AUTOMATION }}
|
||||
|
||||
jira-status-remove-label:
|
||||
if: github.event.action == 'unlabeled'
|
||||
uses: scylladb/github-automation/.github/workflows/main_jira_sync_remove_label.yml@main
|
||||
secrets:
|
||||
caller_jira_auth: ${{ secrets.USER_AND_KEY_FOR_JIRA_AUTOMATION }}
|
||||
|
||||
jira-status-pr-closed:
|
||||
if: github.event.action == 'closed'
|
||||
uses: scylladb/github-automation/.github/workflows/main_jira_sync_pr_closed.yml@main
|
||||
secrets:
|
||||
caller_jira_auth: ${{ secrets.USER_AND_KEY_FOR_JIRA_AUTOMATION }}
|
||||
|
||||
@@ -1,22 +0,0 @@
|
||||
name: Sync Jira Based on PR Milestone Events
|
||||
|
||||
on:
|
||||
pull_request_target:
|
||||
types: [milestoned, demilestoned]
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
pull-requests: read
|
||||
|
||||
jobs:
|
||||
jira-sync-milestone-set:
|
||||
if: github.event.action == 'milestoned'
|
||||
uses: scylladb/github-automation/.github/workflows/main_jira_sync_pr_milestone_set.yml@main
|
||||
secrets:
|
||||
caller_jira_auth: ${{ secrets.USER_AND_KEY_FOR_JIRA_AUTOMATION }}
|
||||
|
||||
jira-sync-milestone-removed:
|
||||
if: github.event.action == 'demilestoned'
|
||||
uses: scylladb/github-automation/.github/workflows/main_jira_sync_pr_milestone_removed.yml@main
|
||||
secrets:
|
||||
caller_jira_auth: ${{ secrets.USER_AND_KEY_FOR_JIRA_AUTOMATION }}
|
||||
@@ -2,13 +2,13 @@ name: Call Jira release creation for new milestone
|
||||
|
||||
on:
|
||||
milestone:
|
||||
types: [created, closed]
|
||||
types: [created]
|
||||
|
||||
jobs:
|
||||
sync-milestone-to-jira:
|
||||
uses: scylladb/github-automation/.github/workflows/main_sync_milestone_to_jira_release.yml@main
|
||||
with:
|
||||
# Comma-separated list of Jira project keys
|
||||
jira_project_keys: "SCYLLADB,CUSTOMER,SMI,RELENG,VECTOR"
|
||||
jira_project_keys: "SCYLLADB,CUSTOMER"
|
||||
secrets:
|
||||
caller_jira_auth: ${{ secrets.USER_AND_KEY_FOR_JIRA_AUTOMATION }}
|
||||
|
||||
@@ -1,62 +0,0 @@
|
||||
name: Close issues created by Scylla associates
|
||||
|
||||
on:
|
||||
issues:
|
||||
types: [opened, reopened]
|
||||
|
||||
permissions:
|
||||
issues: write
|
||||
|
||||
jobs:
|
||||
comment-and-close:
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- name: Comment and close if author email is scylladb.com
|
||||
uses: actions/github-script@v7
|
||||
with:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
script: |
|
||||
const issue = context.payload.issue;
|
||||
const actor = context.actor;
|
||||
|
||||
// Get user data (only public email is available)
|
||||
const { data: user } = await github.rest.users.getByUsername({
|
||||
username: actor,
|
||||
});
|
||||
|
||||
const email = user.email || "";
|
||||
console.log(`Actor: ${actor}, public email: ${email || "<none>"}`);
|
||||
|
||||
// Only continue if email exists and ends with @scylladb.com
|
||||
if (!email || !email.toLowerCase().endsWith("@scylladb.com")) {
|
||||
console.log("User is not a scylladb.com email (or email not public); skipping.");
|
||||
return;
|
||||
}
|
||||
|
||||
const owner = context.repo.owner;
|
||||
const repo = context.repo.repo;
|
||||
const issue_number = issue.number;
|
||||
|
||||
const body = "Issues in this repository are closed automatically. Scylla associates should use Jira to manage issues.\nPlease move this issue to Jira https://scylladb.atlassian.net/jira/software/c/projects/SCYLLADB/list";
|
||||
|
||||
// Add the comment
|
||||
await github.rest.issues.createComment({
|
||||
owner,
|
||||
repo,
|
||||
issue_number,
|
||||
body,
|
||||
});
|
||||
|
||||
console.log(`Comment added to #${issue_number}`);
|
||||
|
||||
// Close the issue
|
||||
await github.rest.issues.update({
|
||||
owner,
|
||||
repo,
|
||||
issue_number,
|
||||
state: "closed",
|
||||
state_reason: "not_planned"
|
||||
});
|
||||
|
||||
console.log(`Issue #${issue_number} closed.`);
|
||||
6
.github/workflows/docs-pages.yaml
vendored
6
.github/workflows/docs-pages.yaml
vendored
@@ -19,8 +19,6 @@ on:
|
||||
jobs:
|
||||
release:
|
||||
permissions:
|
||||
pages: write
|
||||
id-token: write
|
||||
contents: write
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
@@ -33,9 +31,7 @@ jobs:
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.12"
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@v6
|
||||
python-version: "3.10"
|
||||
- name: Set up env
|
||||
run: make -C docs FLAG="${{ env.FLAG }}" setupenv
|
||||
- name: Build docs
|
||||
|
||||
4
.github/workflows/docs-pr.yaml
vendored
4
.github/workflows/docs-pr.yaml
vendored
@@ -29,9 +29,7 @@ jobs:
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.12"
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@v6
|
||||
python-version: "3.10"
|
||||
- name: Set up env
|
||||
run: make -C docs FLAG="${{ env.FLAG }}" setupenv
|
||||
- name: Build docs
|
||||
|
||||
50
.github/workflows/trigger-scylla-ci.yaml
vendored
50
.github/workflows/trigger-scylla-ci.yaml
vendored
@@ -1,6 +1,4 @@
|
||||
name: Trigger Scylla CI Route
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
on:
|
||||
issue_comment:
|
||||
@@ -11,56 +9,16 @@ on:
|
||||
|
||||
jobs:
|
||||
trigger-jenkins:
|
||||
if: (github.event_name == 'issue_comment' && github.event.comment.user.login != 'scylladbbot') || github.event.label.name == 'conflicts'
|
||||
if: (github.event.comment.user.login != 'scylladbbot' && contains(github.event.comment.body, '@scylladbbot') && contains(github.event.comment.body, 'trigger-ci')) || github.event.label.name == 'conflicts'
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Verify Org Membership
|
||||
id: verify_author
|
||||
env:
|
||||
EVENT_NAME: ${{ github.event_name }}
|
||||
PR_AUTHOR: ${{ github.event.pull_request.user.login }}
|
||||
PR_ASSOCIATION: ${{ github.event.pull_request.author_association }}
|
||||
COMMENT_AUTHOR: ${{ github.event.comment.user.login }}
|
||||
COMMENT_ASSOCIATION: ${{ github.event.comment.author_association }}
|
||||
shell: bash
|
||||
run: |
|
||||
if [[ "$EVENT_NAME" == "pull_request_target" ]]; then
|
||||
AUTHOR="$PR_AUTHOR"
|
||||
ASSOCIATION="$PR_ASSOCIATION"
|
||||
else
|
||||
AUTHOR="$COMMENT_AUTHOR"
|
||||
ASSOCIATION="$COMMENT_ASSOCIATION"
|
||||
fi
|
||||
if [[ "$ASSOCIATION" == "MEMBER" || "$ASSOCIATION" == "OWNER" ]]; then
|
||||
echo "member=true" >> $GITHUB_OUTPUT
|
||||
else
|
||||
echo "::warning::${AUTHOR} is not a member of scylladb (association: ${ASSOCIATION}); skipping CI trigger."
|
||||
echo "member=false" >> $GITHUB_OUTPUT
|
||||
fi
|
||||
|
||||
- name: Validate Comment Trigger
|
||||
if: github.event_name == 'issue_comment'
|
||||
id: verify_comment
|
||||
env:
|
||||
COMMENT_BODY: ${{ github.event.comment.body }}
|
||||
shell: bash
|
||||
run: |
|
||||
CLEAN_BODY=$(echo "$COMMENT_BODY" | grep -v '^[[:space:]]*>')
|
||||
|
||||
if echo "$CLEAN_BODY" | grep -qi '@scylladbbot' && echo "$CLEAN_BODY" | grep -qi 'trigger-ci'; then
|
||||
echo "trigger=true" >> $GITHUB_OUTPUT
|
||||
else
|
||||
echo "trigger=false" >> $GITHUB_OUTPUT
|
||||
fi
|
||||
|
||||
- name: Trigger Scylla-CI-Route Jenkins Job
|
||||
if: steps.verify_author.outputs.member == 'true' && (github.event_name == 'pull_request_target' || steps.verify_comment.outputs.trigger == 'true')
|
||||
env:
|
||||
JENKINS_USER: ${{ secrets.JENKINS_USERNAME }}
|
||||
JENKINS_API_TOKEN: ${{ secrets.JENKINS_TOKEN }}
|
||||
JENKINS_URL: "https://jenkins.scylladb.com"
|
||||
PR_NUMBER: "${{ github.event.issue.number || github.event.pull_request.number }}"
|
||||
PR_REPO_NAME: "${{ github.event.repository.full_name }}"
|
||||
run: |
|
||||
PR_NUMBER=${{ github.event.issue.number }}
|
||||
PR_REPO_NAME=${{ github.event.repository.full_name }}
|
||||
curl -X POST "$JENKINS_URL/job/releng/job/Scylla-CI-Route/buildWithParameters?PR_NUMBER=$PR_NUMBER&PR_REPO_NAME=$PR_REPO_NAME" \
|
||||
--user "$JENKINS_USER:$JENKINS_API_TOKEN" --fail
|
||||
--user "$JENKINS_USER:$JENKINS_API_TOKEN" --fail -i -v
|
||||
|
||||
3
.github/workflows/trigger_jenkins.yaml
vendored
3
.github/workflows/trigger_jenkins.yaml
vendored
@@ -1,8 +1,5 @@
|
||||
name: Trigger next gating
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
|
||||
@@ -300,6 +300,7 @@ add_subdirectory(locator)
|
||||
add_subdirectory(message)
|
||||
add_subdirectory(mutation)
|
||||
add_subdirectory(mutation_writer)
|
||||
add_subdirectory(node_ops)
|
||||
add_subdirectory(readers)
|
||||
add_subdirectory(replica)
|
||||
add_subdirectory(raft)
|
||||
|
||||
@@ -43,7 +43,7 @@ For further information, please see:
|
||||
|
||||
[developer documentation]: HACKING.md
|
||||
[build documentation]: docs/dev/building.md
|
||||
[docker image build documentation]: dist/docker/redhat/README.md
|
||||
[docker image build documentation]: dist/docker/debian/README.md
|
||||
|
||||
## Running Scylla
|
||||
|
||||
|
||||
@@ -13,8 +13,7 @@
|
||||
#include <string_view>
|
||||
#include "alternator/auth.hh"
|
||||
#include <fmt/format.h>
|
||||
#include "db/consistency_level_type.hh"
|
||||
#include "db/system_keyspace.hh"
|
||||
#include "auth/password_authenticator.hh"
|
||||
#include "service/storage_proxy.hh"
|
||||
#include "alternator/executor.hh"
|
||||
#include "cql3/selection/selection.hh"
|
||||
@@ -26,8 +25,8 @@ namespace alternator {
|
||||
|
||||
static logging::logger alogger("alternator-auth");
|
||||
|
||||
future<std::string> get_key_from_roles(service::storage_proxy& proxy, std::string username) {
|
||||
schema_ptr schema = proxy.data_dictionary().find_schema(db::system_keyspace::NAME, "roles");
|
||||
future<std::string> get_key_from_roles(service::storage_proxy& proxy, auth::service& as, std::string username) {
|
||||
schema_ptr schema = proxy.data_dictionary().find_schema(auth::get_auth_ks_name(as.query_processor()), "roles");
|
||||
partition_key pk = partition_key::from_single_value(*schema, utf8_type->decompose(username));
|
||||
dht::partition_range_vector partition_ranges{dht::partition_range(dht::decorate_key(*schema, pk))};
|
||||
std::vector<query::clustering_range> bounds{query::clustering_range::make_open_ended_both_sides()};
|
||||
@@ -40,7 +39,7 @@ future<std::string> get_key_from_roles(service::storage_proxy& proxy, std::strin
|
||||
auto partition_slice = query::partition_slice(std::move(bounds), {}, query::column_id_vector{salted_hash_col->id, can_login_col->id}, selection->get_query_options());
|
||||
auto command = ::make_lw_shared<query::read_command>(schema->id(), schema->version(), partition_slice,
|
||||
proxy.get_max_result_size(partition_slice), query::tombstone_limit(proxy.get_tombstone_limit()));
|
||||
auto cl = db::consistency_level::LOCAL_ONE;
|
||||
auto cl = auth::password_authenticator::consistency_for_user(username);
|
||||
|
||||
service::client_state client_state{service::client_state::internal_tag()};
|
||||
service::storage_proxy::coordinator_query_result qr = co_await proxy.query(schema, std::move(command), std::move(partition_ranges), cl,
|
||||
|
||||
@@ -20,6 +20,6 @@ namespace alternator {
|
||||
|
||||
using key_cache = utils::loading_cache<std::string, std::string, 1>;
|
||||
|
||||
future<std::string> get_key_from_roles(service::storage_proxy& proxy, std::string username);
|
||||
future<std::string> get_key_from_roles(service::storage_proxy& proxy, auth::service& as, std::string username);
|
||||
|
||||
}
|
||||
|
||||
@@ -244,7 +244,10 @@ static bool is_set_of(const rjson::value& type1, const rjson::value& type2) {
|
||||
|
||||
// Check if two JSON-encoded values match with the CONTAINS relation
|
||||
bool check_CONTAINS(const rjson::value* v1, const rjson::value& v2, bool v1_from_query, bool v2_from_query) {
|
||||
if (!v1) {
|
||||
if (!v1 || !v1->IsObject() || v1->MemberCount() == 0) {
|
||||
return false;
|
||||
}
|
||||
if (!v2.IsObject() || v2.MemberCount() == 0) {
|
||||
return false;
|
||||
}
|
||||
const auto& kv1 = *v1->MemberBegin();
|
||||
@@ -618,7 +621,7 @@ conditional_operator_type get_conditional_operator(const rjson::value& req) {
|
||||
// Check if the existing values of the item (previous_item) match the
|
||||
// conditions given by the Expected and ConditionalOperator parameters
|
||||
// (if they exist) in the request (an UpdateItem, PutItem or DeleteItem).
|
||||
// This function can throw a ValidationException API error if there
|
||||
// This function can throw an ValidationException API error if there
|
||||
// are errors in the format of the condition itself.
|
||||
bool verify_expected(const rjson::value& req, const rjson::value* previous_item) {
|
||||
const rjson::value* expected = rjson::find(req, "Expected");
|
||||
|
||||
@@ -53,7 +53,9 @@ void consumed_capacity_counter::add_consumed_capacity_to_response_if_needed(rjso
|
||||
}
|
||||
|
||||
static uint64_t calculate_half_units(uint64_t unit_block_size, uint64_t total_bytes, bool is_quorum) {
|
||||
uint64_t half_units = (total_bytes + unit_block_size -1) / unit_block_size; //divide by unit_block_size and round up
|
||||
// Avoid potential integer overflow when total_bytes is close to UINT64_MAX
|
||||
// by using division with modulo instead of addition before division
|
||||
uint64_t half_units = total_bytes / unit_block_size + (total_bytes % unit_block_size != 0 ? 1 : 0);
|
||||
|
||||
if (is_quorum) {
|
||||
half_units *= 2;
|
||||
|
||||
@@ -63,7 +63,6 @@
|
||||
#include "types/types.hh"
|
||||
#include "db/system_keyspace.hh"
|
||||
#include "cql3/statements/ks_prop_defs.hh"
|
||||
#include "alternator/ttl_tag.hh"
|
||||
|
||||
using namespace std::chrono_literals;
|
||||
|
||||
@@ -165,7 +164,7 @@ static map_type attrs_type() {
|
||||
|
||||
static const column_definition& attrs_column(const schema& schema) {
|
||||
const column_definition* cdef = schema.get_column_definition(bytes(executor::ATTRS_COLUMN_NAME));
|
||||
throwing_assert(cdef);
|
||||
SCYLLA_ASSERT(cdef);
|
||||
return *cdef;
|
||||
}
|
||||
|
||||
@@ -238,7 +237,7 @@ static void validate_is_object(const rjson::value& value, const char* caller) {
|
||||
}
|
||||
|
||||
// This function assumes the given value is an object and returns requested member value.
|
||||
// If it is not possible, an api_error::validation is thrown.
|
||||
// If it is not possible an api_error::validation is thrown.
|
||||
static const rjson::value& get_member(const rjson::value& obj, const char* member_name, const char* caller) {
|
||||
validate_is_object(obj, caller);
|
||||
const rjson::value* ret = rjson::find(obj, member_name);
|
||||
@@ -250,7 +249,7 @@ static const rjson::value& get_member(const rjson::value& obj, const char* membe
|
||||
|
||||
|
||||
// This function assumes the given value is an object with a single member, and returns this member.
|
||||
// In case the requirements are not met, an api_error::validation is thrown.
|
||||
// In case the requirements are not met an api_error::validation is thrown.
|
||||
static const rjson::value::Member& get_single_member(const rjson::value& v, const char* caller) {
|
||||
if (!v.IsObject() || v.MemberCount() != 1) {
|
||||
throw api_error::validation(format("{}: expected an object with a single member.", caller));
|
||||
@@ -683,7 +682,7 @@ static std::optional<int> get_int_attribute(const rjson::value& value, std::stri
|
||||
}
|
||||
|
||||
// Sets a KeySchema object inside the given JSON parent describing the key
|
||||
// attributes of the given schema as being either HASH or RANGE keys.
|
||||
// attributes of the the given schema as being either HASH or RANGE keys.
|
||||
// Additionally, adds to a given map mappings between the key attribute
|
||||
// names and their type (as a DynamoDB type string).
|
||||
void executor::describe_key_schema(rjson::value& parent, const schema& schema, std::unordered_map<std::string,std::string>* attribute_types, const std::map<sstring, sstring> *tags) {
|
||||
@@ -835,11 +834,13 @@ future<> executor::fill_table_size(rjson::value &table_description, schema_ptr s
|
||||
total_size = co_await _ss.estimate_total_sstable_volume(schema->id(), service::storage_service::ignore_errors::yes);
|
||||
const auto expiry = std::chrono::seconds{ _proxy.data_dictionary().get_config().alternator_describe_table_info_cache_validity_in_seconds() };
|
||||
// Note: we don't care when the notification of other shards will finish, as long as it will be done
|
||||
// it's possible to get into race condition (next DescribeTable comes to other shard, that new shard doesn't have
|
||||
// the size yet, so it will calculate it again) - this is not a problem, because it will call cache_newly_calculated_size_on_all_shards
|
||||
// with expiry, which is extremely unlikely to be exactly the same as the previous one, all shards will keep the size coming with expiry that is further into the future.
|
||||
// In case of the same expiry, some shards will have different size, which means DescribeTable will return different values depending on the shard
|
||||
// which is also fine, as the specification doesn't give precision guarantees of any kind.
|
||||
// A race condition is possible: if a DescribeTable request arrives on a different shard before
|
||||
// that shard receives the cached size, it will recalculate independently. This is acceptable because:
|
||||
// 1. Both calculations will cache their results with an expiry time
|
||||
// 2. Expiry times are unlikely to be identical, so eventually all shards converge to the most recent value
|
||||
// 3. Even if expiry times match, different shards may briefly return different table sizes
|
||||
// 4. This temporary inconsistency is acceptable per DynamoDB specification, which doesn't guarantee
|
||||
// exact precision for DescribeTable size information
|
||||
co_await cache_newly_calculated_size_on_all_shards(schema, total_size, expiry);
|
||||
}
|
||||
}
|
||||
@@ -917,7 +918,7 @@ future<rjson::value> executor::fill_table_description(schema_ptr schema, table_s
|
||||
sstring index_name = cf_name.substr(delim_it + 1);
|
||||
rjson::add(view_entry, "IndexName", rjson::from_string(index_name));
|
||||
rjson::add(view_entry, "IndexArn", generate_arn_for_index(*schema, index_name));
|
||||
// Add index's KeySchema and collect types for AttributeDefinitions:
|
||||
// Add indexes's KeySchema and collect types for AttributeDefinitions:
|
||||
executor::describe_key_schema(view_entry, *vptr, key_attribute_types, db::get_tags_of_table(vptr));
|
||||
// Add projection type
|
||||
rjson::value projection = rjson::empty_object();
|
||||
@@ -1650,7 +1651,7 @@ static future<> mark_view_schemas_as_built(utils::chunked_vector<mutation>& out,
|
||||
}
|
||||
|
||||
future<executor::request_return_type> executor::create_table_on_shard0(service::client_state&& client_state, tracing::trace_state_ptr trace_state, rjson::value request, bool enforce_authorization, bool warn_authorization, const db::tablets_mode_t::mode tablets_mode) {
|
||||
throwing_assert(this_shard_id() == 0);
|
||||
SCYLLA_ASSERT(this_shard_id() == 0);
|
||||
|
||||
// We begin by parsing and validating the content of the CreateTable
|
||||
// command. We can't inspect the current database schema at this point
|
||||
@@ -2436,7 +2437,7 @@ std::unordered_map<bytes, std::string> si_key_attributes(data_dictionary::table
|
||||
// case, this function simply won't be called for this attribute.)
|
||||
//
|
||||
// This function checks if the given attribute update is an update to some
|
||||
// GSI's key, and if the value is unsuitable, an api_error::validation is
|
||||
// GSI's key, and if the value is unsuitable, a api_error::validation is
|
||||
// thrown. The checking here is similar to the checking done in
|
||||
// get_key_from_typed_value() for the base table's key columns.
|
||||
//
|
||||
@@ -2838,12 +2839,14 @@ future<executor::request_return_type> rmw_operation::execute(service::storage_pr
|
||||
}
|
||||
} else if (_write_isolation != write_isolation::LWT_ALWAYS) {
|
||||
std::optional<mutation> m = apply(nullptr, api::new_timestamp(), cdc_opts);
|
||||
throwing_assert(m); // !needs_read_before_write, so apply() did not check a condition
|
||||
SCYLLA_ASSERT(m); // !needs_read_before_write, so apply() did not check a condition
|
||||
return proxy.mutate(utils::chunked_vector<mutation>{std::move(*m)}, db::consistency_level::LOCAL_QUORUM, executor::default_timeout(), trace_state, std::move(permit), db::allow_per_partition_rate_limit::yes, false, std::move(cdc_opts)).then([this, &wcu_total] () mutable {
|
||||
return rmw_operation_return(std::move(_return_attributes), _consumed_capacity, wcu_total);
|
||||
});
|
||||
}
|
||||
throwing_assert(cas_shard);
|
||||
if (!cas_shard) {
|
||||
on_internal_error(elogger, "cas_shard is not set");
|
||||
}
|
||||
// If we're still here, we need to do this write using LWT:
|
||||
global_stats.write_using_lwt++;
|
||||
per_table_stats.write_using_lwt++;
|
||||
@@ -3463,11 +3466,7 @@ future<executor::request_return_type> executor::batch_write_item(client_state& c
|
||||
if (should_add_wcu) {
|
||||
rjson::add(ret, "ConsumedCapacity", std::move(consumed_capacity));
|
||||
}
|
||||
auto duration = std::chrono::steady_clock::now() - start_time;
|
||||
_stats.api_operations.batch_write_item_latency.mark(duration);
|
||||
for (const auto& w : per_table_wcu) {
|
||||
w.first->api_operations.batch_write_item_latency.mark(duration);
|
||||
}
|
||||
_stats.api_operations.batch_write_item_latency.mark(std::chrono::steady_clock::now() - start_time);
|
||||
co_return rjson::print(std::move(ret));
|
||||
}
|
||||
|
||||
@@ -3551,7 +3550,7 @@ static bool hierarchy_filter(rjson::value& val, const attribute_path_map_node<T>
|
||||
return true;
|
||||
}
|
||||
|
||||
// Add a path to an attribute_path_map. Throws a validation error if the path
|
||||
// Add a path to a attribute_path_map. Throws a validation error if the path
|
||||
// "overlaps" with one already in the filter (one is a sub-path of the other)
|
||||
// or "conflicts" with it (both a member and index is requested).
|
||||
template<typename T>
|
||||
@@ -4978,12 +4977,7 @@ future<executor::request_return_type> executor::batch_get_item(client_state& cli
|
||||
if (!some_succeeded && eptr) {
|
||||
co_await coroutine::return_exception_ptr(std::move(eptr));
|
||||
}
|
||||
auto duration = std::chrono::steady_clock::now() - start_time;
|
||||
_stats.api_operations.batch_get_item_latency.mark(duration);
|
||||
for (const table_requests& rs : requests) {
|
||||
lw_shared_ptr<stats> per_table_stats = get_stats_from_schema(_proxy, *rs.schema);
|
||||
per_table_stats->api_operations.batch_get_item_latency.mark(duration);
|
||||
}
|
||||
_stats.api_operations.batch_get_item_latency.mark(std::chrono::steady_clock::now() - start_time);
|
||||
if (is_big(response)) {
|
||||
co_return make_streamed(std::move(response));
|
||||
} else {
|
||||
@@ -5421,7 +5415,7 @@ static future<executor::request_return_type> do_query(service::storage_proxy& pr
|
||||
}
|
||||
|
||||
static dht::token token_for_segment(int segment, int total_segments) {
|
||||
throwing_assert(total_segments > 1 && segment >= 0 && segment < total_segments);
|
||||
SCYLLA_ASSERT(total_segments > 1 && segment >= 0 && segment < total_segments);
|
||||
uint64_t delta = std::numeric_limits<uint64_t>::max() / total_segments;
|
||||
return dht::token::from_int64(std::numeric_limits<int64_t>::min() + delta * segment);
|
||||
}
|
||||
|
||||
@@ -50,7 +50,7 @@ public:
|
||||
_operators.emplace_back(i);
|
||||
check_depth_limit();
|
||||
}
|
||||
void add_dot(std::string name) {
|
||||
void add_dot(std::string(name)) {
|
||||
_operators.emplace_back(std::move(name));
|
||||
check_depth_limit();
|
||||
}
|
||||
@@ -85,7 +85,7 @@ struct constant {
|
||||
}
|
||||
};
|
||||
|
||||
// "value" is a value used in the right hand side of an assignment
|
||||
// "value" is is a value used in the right hand side of an assignment
|
||||
// expression, "SET a = ...". It can be a constant (a reference to a value
|
||||
// included in the request, e.g., ":val"), a path to an attribute from the
|
||||
// existing item (e.g., "a.b[3].c"), or a function of other such values.
|
||||
@@ -205,7 +205,7 @@ public:
|
||||
// The supported primitive conditions are:
|
||||
// 1. Binary operators - v1 OP v2, where OP is =, <>, <, <=, >, or >= and
|
||||
// v1 and v2 are values - from the item (an attribute path), the query
|
||||
// (a ":val" reference), or a function of the above (only the size()
|
||||
// (a ":val" reference), or a function of the the above (only the size()
|
||||
// function is supported).
|
||||
// 2. Ternary operator - v1 BETWEEN v2 and v3 (means v1 >= v2 AND v1 <= v3).
|
||||
// 3. N-ary operator - v1 IN ( v2, v3, ... )
|
||||
|
||||
@@ -55,7 +55,7 @@ partition_key pk_from_json(const rjson::value& item, schema_ptr schema);
|
||||
clustering_key ck_from_json(const rjson::value& item, schema_ptr schema);
|
||||
position_in_partition pos_from_json(const rjson::value& item, schema_ptr schema);
|
||||
|
||||
// If v encodes a number (i.e., it is a {"N": [...]}), returns an object representing it. Otherwise,
|
||||
// If v encodes a number (i.e., it is a {"N": [...]}, returns an object representing it. Otherwise,
|
||||
// raises ValidationException with diagnostic.
|
||||
big_decimal unwrap_number(const rjson::value& v, std::string_view diagnostic);
|
||||
|
||||
|
||||
@@ -411,8 +411,8 @@ future<std::string> server::verify_signature(const request& req, const chunked_c
|
||||
}
|
||||
}
|
||||
|
||||
auto cache_getter = [&proxy = _proxy] (std::string username) {
|
||||
return get_key_from_roles(proxy, std::move(username));
|
||||
auto cache_getter = [&proxy = _proxy, &as = _auth_service] (std::string username) {
|
||||
return get_key_from_roles(proxy, as, std::move(username));
|
||||
};
|
||||
return _key_cache.get_ptr(user, cache_getter).then_wrapped([this, &req, &content,
|
||||
user = std::move(user),
|
||||
@@ -710,7 +710,7 @@ future<executor::request_return_type> server::handle_api_request(std::unique_ptr
|
||||
++_executor._stats.requests_blocked_memory;
|
||||
}
|
||||
auto units = co_await std::move(units_fut);
|
||||
throwing_assert(req->content_stream);
|
||||
SCYLLA_ASSERT(req->content_stream);
|
||||
chunked_content content = co_await read_entire_stream(*req->content_stream, request_content_length_limit);
|
||||
// If the request had no Content-Length, we reserved too many units
|
||||
// so need to return some
|
||||
@@ -771,7 +771,7 @@ future<executor::request_return_type> server::handle_api_request(std::unique_ptr
|
||||
if (!username.empty()) {
|
||||
client_state.set_login(auth::authenticated_user(username));
|
||||
}
|
||||
client_state.maybe_update_per_service_level_params();
|
||||
co_await client_state.maybe_update_per_service_level_params();
|
||||
|
||||
tracing::trace_state_ptr trace_state = maybe_trace_query(client_state, username, op, content, _max_users_query_size_in_trace_output.get());
|
||||
tracing::trace(trace_state, "{}", op);
|
||||
|
||||
@@ -14,6 +14,20 @@
|
||||
namespace alternator {
|
||||
|
||||
const char* ALTERNATOR_METRICS = "alternator";
|
||||
static seastar::metrics::histogram estimated_histogram_to_metrics(const utils::estimated_histogram& histogram) {
|
||||
seastar::metrics::histogram res;
|
||||
res.buckets.resize(histogram.bucket_offsets.size());
|
||||
uint64_t cumulative_count = 0;
|
||||
res.sample_count = histogram._count;
|
||||
res.sample_sum = histogram._sample_sum;
|
||||
for (size_t i = 0; i < res.buckets.size(); i++) {
|
||||
auto& v = res.buckets[i];
|
||||
v.upper_bound = histogram.bucket_offsets[i];
|
||||
cumulative_count += histogram.buckets[i];
|
||||
v.count = cumulative_count;
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
static seastar::metrics::label column_family_label("cf");
|
||||
static seastar::metrics::label keyspace_label("ks");
|
||||
@@ -137,21 +151,21 @@ static void register_metrics_with_optional_table(seastar::metrics::metric_groups
|
||||
seastar::metrics::make_counter("batch_item_count", seastar::metrics::description("The total number of items processed across all batches"), labels,
|
||||
stats.api_operations.batch_get_item_batch_total)(op("BatchGetItem")).aggregate(aggregate_labels).set_skip_when_empty(),
|
||||
seastar::metrics::make_histogram("batch_item_count_histogram", seastar::metrics::description("Histogram of the number of items in a batch request"), labels,
|
||||
[&stats]{ return to_metrics_histogram(stats.api_operations.batch_get_item_histogram);})(op("BatchGetItem")).aggregate({seastar::metrics::shard_label}).set_skip_when_empty(),
|
||||
[&stats]{ return estimated_histogram_to_metrics(stats.api_operations.batch_get_item_histogram);})(op("BatchGetItem")).aggregate({seastar::metrics::shard_label}).set_skip_when_empty(),
|
||||
seastar::metrics::make_histogram("batch_item_count_histogram", seastar::metrics::description("Histogram of the number of items in a batch request"), labels,
|
||||
[&stats]{ return to_metrics_histogram(stats.api_operations.batch_write_item_histogram);})(op("BatchWriteItem")).aggregate({seastar::metrics::shard_label}).set_skip_when_empty(),
|
||||
[&stats]{ return estimated_histogram_to_metrics(stats.api_operations.batch_write_item_histogram);})(op("BatchWriteItem")).aggregate({seastar::metrics::shard_label}).set_skip_when_empty(),
|
||||
seastar::metrics::make_histogram("operation_size_kb", seastar::metrics::description("Histogram of item sizes involved in a request"), labels,
|
||||
[&stats]{ return to_metrics_histogram(stats.operation_sizes.get_item_op_size_kb);})(op("GetItem")).aggregate({seastar::metrics::shard_label}).set_skip_when_empty(),
|
||||
[&stats]{ return estimated_histogram_to_metrics(stats.operation_sizes.get_item_op_size_kb);})(op("GetItem")).aggregate({seastar::metrics::shard_label}).set_skip_when_empty(),
|
||||
seastar::metrics::make_histogram("operation_size_kb", seastar::metrics::description("Histogram of item sizes involved in a request"), labels,
|
||||
[&stats]{ return to_metrics_histogram(stats.operation_sizes.put_item_op_size_kb);})(op("PutItem")).aggregate({seastar::metrics::shard_label}).set_skip_when_empty(),
|
||||
[&stats]{ return estimated_histogram_to_metrics(stats.operation_sizes.put_item_op_size_kb);})(op("PutItem")).aggregate({seastar::metrics::shard_label}).set_skip_when_empty(),
|
||||
seastar::metrics::make_histogram("operation_size_kb", seastar::metrics::description("Histogram of item sizes involved in a request"), labels,
|
||||
[&stats]{ return to_metrics_histogram(stats.operation_sizes.delete_item_op_size_kb);})(op("DeleteItem")).aggregate({seastar::metrics::shard_label}).set_skip_when_empty(),
|
||||
[&stats]{ return estimated_histogram_to_metrics(stats.operation_sizes.delete_item_op_size_kb);})(op("DeleteItem")).aggregate({seastar::metrics::shard_label}).set_skip_when_empty(),
|
||||
seastar::metrics::make_histogram("operation_size_kb", seastar::metrics::description("Histogram of item sizes involved in a request"), labels,
|
||||
[&stats]{ return to_metrics_histogram(stats.operation_sizes.update_item_op_size_kb);})(op("UpdateItem")).aggregate({seastar::metrics::shard_label}).set_skip_when_empty(),
|
||||
[&stats]{ return estimated_histogram_to_metrics(stats.operation_sizes.update_item_op_size_kb);})(op("UpdateItem")).aggregate({seastar::metrics::shard_label}).set_skip_when_empty(),
|
||||
seastar::metrics::make_histogram("operation_size_kb", seastar::metrics::description("Histogram of item sizes involved in a request"), labels,
|
||||
[&stats]{ return to_metrics_histogram(stats.operation_sizes.batch_get_item_op_size_kb);})(op("BatchGetItem")).aggregate({seastar::metrics::shard_label}).set_skip_when_empty(),
|
||||
[&stats]{ return estimated_histogram_to_metrics(stats.operation_sizes.batch_get_item_op_size_kb);})(op("BatchGetItem")).aggregate({seastar::metrics::shard_label}).set_skip_when_empty(),
|
||||
seastar::metrics::make_histogram("operation_size_kb", seastar::metrics::description("Histogram of item sizes involved in a request"), labels,
|
||||
[&stats]{ return to_metrics_histogram(stats.operation_sizes.batch_write_item_op_size_kb);})(op("BatchWriteItem")).aggregate({seastar::metrics::shard_label}).set_skip_when_empty(),
|
||||
[&stats]{ return estimated_histogram_to_metrics(stats.operation_sizes.batch_write_item_op_size_kb);})(op("BatchWriteItem")).aggregate({seastar::metrics::shard_label}).set_skip_when_empty(),
|
||||
});
|
||||
|
||||
seastar::metrics::label expression_label("expression");
|
||||
|
||||
@@ -16,8 +16,6 @@
|
||||
#include "cql3/stats.hh"
|
||||
|
||||
namespace alternator {
|
||||
using batch_histogram = utils::estimated_histogram_with_max<128>;
|
||||
using op_size_histogram = utils::estimated_histogram_with_max<512>;
|
||||
|
||||
// Object holding per-shard statistics related to Alternator.
|
||||
// While this object is alive, these metrics are also registered to be
|
||||
@@ -78,34 +76,34 @@ public:
|
||||
utils::timed_rate_moving_average_summary_and_histogram batch_get_item_latency;
|
||||
utils::timed_rate_moving_average_summary_and_histogram get_records_latency;
|
||||
|
||||
batch_histogram batch_get_item_histogram;
|
||||
batch_histogram batch_write_item_histogram;
|
||||
utils::estimated_histogram batch_get_item_histogram{22}; // a histogram that covers the range 1 - 100
|
||||
utils::estimated_histogram batch_write_item_histogram{22}; // a histogram that covers the range 1 - 100
|
||||
} api_operations;
|
||||
// Operation size metrics
|
||||
struct {
|
||||
// Item size statistics collected per table and aggregated per node.
|
||||
// Each histogram covers the range 0 - 512. Resolves #25143.
|
||||
// Each histogram covers the range 0 - 446. Resolves #25143.
|
||||
// A size is the retrieved item's size.
|
||||
op_size_histogram get_item_op_size_kb;
|
||||
utils::estimated_histogram get_item_op_size_kb{30};
|
||||
// A size is the maximum of the new item's size and the old item's size.
|
||||
op_size_histogram put_item_op_size_kb;
|
||||
utils::estimated_histogram put_item_op_size_kb{30};
|
||||
// A size is the deleted item's size. If the deleted item's size is
|
||||
// unknown (i.e. read-before-write wasn't necessary and it wasn't
|
||||
// forced by a configuration option), it won't be recorded on the
|
||||
// histogram.
|
||||
op_size_histogram delete_item_op_size_kb;
|
||||
utils::estimated_histogram delete_item_op_size_kb{30};
|
||||
// A size is the maximum of existing item's size and the estimated size
|
||||
// of the update. This will be changed to the maximum of the existing item's
|
||||
// size and the new item's size in a subsequent PR.
|
||||
op_size_histogram update_item_op_size_kb;
|
||||
utils::estimated_histogram update_item_op_size_kb{30};
|
||||
|
||||
// A size is the sum of the sizes of all items per table. This means
|
||||
// that a single BatchGetItem / BatchWriteItem updates the histogram
|
||||
// for each table that it has items in.
|
||||
// The sizes are the retrieved items' sizes grouped per table.
|
||||
op_size_histogram batch_get_item_op_size_kb;
|
||||
utils::estimated_histogram batch_get_item_op_size_kb{30};
|
||||
// The sizes are the the written items' sizes grouped per table.
|
||||
op_size_histogram batch_write_item_op_size_kb;
|
||||
utils::estimated_histogram batch_write_item_op_size_kb{30};
|
||||
} operation_sizes;
|
||||
// Count of authentication and authorization failures, counted if either
|
||||
// alternator_enforce_authorization or alternator_warn_authorization are
|
||||
@@ -142,7 +140,7 @@ public:
|
||||
cql3::cql_stats cql_stats;
|
||||
|
||||
// Enumeration of expression types only for stats
|
||||
// if needed it can be extended e.g. per operation
|
||||
// if needed it can be extended e.g. per operation
|
||||
enum expression_types {
|
||||
UPDATE_EXPRESSION,
|
||||
CONDITION_EXPRESSION,
|
||||
@@ -166,7 +164,7 @@ struct table_stats {
|
||||
void register_metrics(seastar::metrics::metric_groups& metrics, const stats& stats);
|
||||
|
||||
inline uint64_t bytes_to_kb_ceil(uint64_t bytes) {
|
||||
return (bytes) / 1024;
|
||||
return (bytes + 1023) / 1024;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -33,8 +33,6 @@
|
||||
#include "data_dictionary/data_dictionary.hh"
|
||||
#include "utils/rjson.hh"
|
||||
|
||||
static logging::logger elogger("alternator-streams");
|
||||
|
||||
/**
|
||||
* Base template type to implement rapidjson::internal::TypeHelper<...>:s
|
||||
* for types that are ostreamable/string constructible/castable.
|
||||
@@ -430,25 +428,6 @@ using namespace std::chrono_literals;
|
||||
// Dynamo docs says no data shall live longer than 24h.
|
||||
static constexpr auto dynamodb_streams_max_window = 24h;
|
||||
|
||||
// find the parent shard in previous generation for the given child shard
|
||||
// takes care of wrap-around case in vnodes
|
||||
// prev_streams must be sorted by token
|
||||
const cdc::stream_id& find_parent_shard_in_previous_generation(db_clock::time_point prev_timestamp, const utils::chunked_vector<cdc::stream_id> &prev_streams, const cdc::stream_id &child) {
|
||||
if (prev_streams.empty()) {
|
||||
// something is really wrong - streams are empty
|
||||
// let's try internal_error in hope it will be notified and fixed
|
||||
on_internal_error(elogger, fmt::format("streams are empty for cdc generation at {} ({})", prev_timestamp, prev_timestamp.time_since_epoch().count()));
|
||||
}
|
||||
auto it = std::lower_bound(prev_streams.begin(), prev_streams.end(), child.token(), [](const cdc::stream_id& id, const dht::token& t) {
|
||||
return id.token() < t;
|
||||
});
|
||||
if (it == prev_streams.end()) {
|
||||
// wrap around case - take first
|
||||
it = prev_streams.begin();
|
||||
}
|
||||
return *it;
|
||||
}
|
||||
|
||||
future<executor::request_return_type> executor::describe_stream(client_state& client_state, service_permit permit, rjson::value request) {
|
||||
_stats.api_operations.describe_stream++;
|
||||
|
||||
@@ -599,8 +578,16 @@ future<executor::request_return_type> executor::describe_stream(client_state& cl
|
||||
auto shard = rjson::empty_object();
|
||||
|
||||
if (prev != e) {
|
||||
auto &pid = find_parent_shard_in_previous_generation(prev->first, prev->second.streams, id);
|
||||
rjson::add(shard, "ParentShardId", shard_id(prev->first, pid));
|
||||
auto& pids = prev->second.streams;
|
||||
auto pid = std::upper_bound(pids.begin(), pids.end(), id.token(), [](const dht::token& t, const cdc::stream_id& id) {
|
||||
return t < id.token();
|
||||
});
|
||||
if (pid != pids.begin()) {
|
||||
pid = std::prev(pid);
|
||||
}
|
||||
if (pid != pids.end()) {
|
||||
rjson::add(shard, "ParentShardId", shard_id(prev->first, *pid));
|
||||
}
|
||||
}
|
||||
|
||||
last.emplace(ts, id);
|
||||
|
||||
@@ -46,7 +46,6 @@
|
||||
#include "alternator/executor.hh"
|
||||
#include "alternator/controller.hh"
|
||||
#include "alternator/serialization.hh"
|
||||
#include "alternator/ttl_tag.hh"
|
||||
#include "dht/sharder.hh"
|
||||
#include "db/config.hh"
|
||||
#include "db/tags/utils.hh"
|
||||
@@ -58,10 +57,19 @@ static logging::logger tlogger("alternator_ttl");
|
||||
|
||||
namespace alternator {
|
||||
|
||||
// We write the expiration-time attribute enabled on a table in a
|
||||
// tag TTL_TAG_KEY.
|
||||
// Currently, the *value* of this tag is simply the name of the attribute,
|
||||
// and the expiration scanner interprets it as an Alternator attribute name -
|
||||
// It can refer to a real column or if that doesn't exist, to a member of
|
||||
// the ":attrs" map column. Although this is designed for Alternator, it may
|
||||
// be good enough for CQL as well (there, the ":attrs" column won't exist).
|
||||
extern const sstring TTL_TAG_KEY;
|
||||
|
||||
future<executor::request_return_type> executor::update_time_to_live(client_state& client_state, service_permit permit, rjson::value request) {
|
||||
_stats.api_operations.update_time_to_live++;
|
||||
if (!_proxy.features().alternator_ttl) {
|
||||
co_return api_error::unknown_operation("UpdateTimeToLive not yet supported. Upgrade all nodes to a version that supports it.");
|
||||
co_return api_error::unknown_operation("UpdateTimeToLive not yet supported. Experimental support is available if the 'alternator-ttl' experimental feature is enabled on all nodes.");
|
||||
}
|
||||
|
||||
schema_ptr schema = get_table(_proxy, request);
|
||||
@@ -133,7 +141,7 @@ future<executor::request_return_type> executor::describe_time_to_live(client_sta
|
||||
|
||||
// expiration_service is a sharded service responsible for cleaning up expired
|
||||
// items in all tables with per-item expiration enabled. Currently, this means
|
||||
// Alternator tables with TTL configured via an UpdateTimeToLive request.
|
||||
// Alternator tables with TTL configured via a UpdateTimeToLive request.
|
||||
//
|
||||
// Here is a brief overview of how the expiration service works:
|
||||
//
|
||||
@@ -316,7 +324,9 @@ static future<std::vector<std::pair<dht::token_range, locator::host_id>>> get_se
|
||||
const auto& tm = *erm->get_token_metadata_ptr();
|
||||
const auto& sorted_tokens = tm.sorted_tokens();
|
||||
std::vector<std::pair<dht::token_range, locator::host_id>> ret;
|
||||
throwing_assert(!sorted_tokens.empty());
|
||||
if (sorted_tokens.empty()) {
|
||||
on_internal_error(tlogger, "Token metadata is empty");
|
||||
}
|
||||
auto prev_tok = sorted_tokens.back();
|
||||
for (const auto& tok : sorted_tokens) {
|
||||
co_await coroutine::maybe_yield();
|
||||
@@ -553,7 +563,7 @@ static future<> scan_table_ranges(
|
||||
expiration_service::stats& expiration_stats)
|
||||
{
|
||||
const schema_ptr& s = scan_ctx.s;
|
||||
throwing_assert(partition_ranges.size() == 1); // otherwise issue #9167 will cause incorrect results.
|
||||
SCYLLA_ASSERT (partition_ranges.size() == 1); // otherwise issue #9167 will cause incorrect results.
|
||||
auto p = service::pager::query_pagers::pager(proxy, s, scan_ctx.selection, *scan_ctx.query_state_ptr,
|
||||
*scan_ctx.query_options, scan_ctx.command, std::move(partition_ranges), nullptr);
|
||||
while (!p->is_exhausted()) {
|
||||
@@ -583,7 +593,7 @@ static future<> scan_table_ranges(
|
||||
if (retries >= 10) {
|
||||
// Don't get stuck forever asking the same page, maybe there's
|
||||
// a bug or a real problem in several replicas. Give up on
|
||||
// this scan and retry the scan from a random position later,
|
||||
// this scan an retry the scan from a random position later,
|
||||
// in the next scan period.
|
||||
throw runtime_exception("scanner thread failed after too many timeouts for the same page");
|
||||
}
|
||||
@@ -630,38 +640,13 @@ static future<> scan_table_ranges(
|
||||
}
|
||||
} else {
|
||||
// For a real column to contain an expiration time, it
|
||||
// must be a numeric type. We currently support decimal
|
||||
// (used by Alternator TTL) as well as bigint, int and
|
||||
// timestamp (used by CQL per-row TTL).
|
||||
switch (meta[*expiration_column]->type->get_kind()) {
|
||||
case abstract_type::kind::decimal:
|
||||
// Used by Alternator TTL for key columns not stored
|
||||
// in the map. The value is in seconds, fractional
|
||||
// part is ignored.
|
||||
expired = is_expired(value_cast<big_decimal>(v), now);
|
||||
break;
|
||||
case abstract_type::kind::long_kind:
|
||||
// Used by CQL per-row TTL. The value is in seconds.
|
||||
expired = is_expired(gc_clock::time_point(std::chrono::seconds(value_cast<int64_t>(v))), now);
|
||||
break;
|
||||
case abstract_type::kind::int32:
|
||||
// Used by CQL per-row TTL. The value is in seconds.
|
||||
// Using int type is not recommended because it will
|
||||
// overflow in 2038, but we support it to allow users
|
||||
// to use existing int columns for expiration.
|
||||
expired = is_expired(gc_clock::time_point(std::chrono::seconds(value_cast<int32_t>(v))), now);
|
||||
break;
|
||||
case abstract_type::kind::timestamp:
|
||||
// Used by CQL per-row TTL. The value is in milliseconds
|
||||
// but we truncate it to gc_clock's precision (whole seconds).
|
||||
expired = is_expired(gc_clock::time_point(std::chrono::duration_cast<gc_clock::duration>(value_cast<db_clock::time_point>(v).time_since_epoch())), now);
|
||||
break;
|
||||
default:
|
||||
// Should never happen - we verified the column's type
|
||||
// before starting the scan.
|
||||
[[unlikely]]
|
||||
on_internal_error(tlogger, format("expiration scanner value of unsupported type {} in column {}", meta[*expiration_column]->type->cql3_type_name(), scan_ctx.column_name) );
|
||||
}
|
||||
// must be a numeric type.
|
||||
// FIXME: Currently we only support decimal_type (which is
|
||||
// what Alternator uses), but other numeric types can be
|
||||
// supported as well to make this feature more useful in CQL.
|
||||
// Note that kind::decimal is also checked above.
|
||||
big_decimal n = value_cast<big_decimal>(v);
|
||||
expired = is_expired(n, now);
|
||||
}
|
||||
if (expired) {
|
||||
expiration_stats.items_deleted++;
|
||||
@@ -723,12 +708,16 @@ static future<bool> scan_table(
|
||||
co_return false;
|
||||
}
|
||||
// attribute_name may be one of the schema's columns (in Alternator, this
|
||||
// means a key column, in CQL it's a regular column), or an element in
|
||||
// Alternator's attrs map encoded in Alternator's JSON encoding (which we
|
||||
// decode). If attribute_name is a real column, in Alternator it will have
|
||||
// the type decimal, counting seconds since the UNIX epoch, while in CQL
|
||||
// it will one of the types bigint or int (counting seconds) or timestamp
|
||||
// (counting milliseconds).
|
||||
// means it's a key column), or an element in Alternator's attrs map
|
||||
// encoded in Alternator's JSON encoding.
|
||||
// FIXME: To make this less Alternators-specific, we should encode in the
|
||||
// single key's value three things:
|
||||
// 1. The name of a column
|
||||
// 2. Optionally if column is a map, a member in the map
|
||||
// 3. The deserializer for the value: CQL or Alternator (JSON).
|
||||
// The deserializer can be guessed: If the given column or map item is
|
||||
// numeric, it can be used directly. If it is a "bytes" type, it needs to
|
||||
// be deserialized using Alternator's deserializer.
|
||||
bytes column_name = to_bytes(*attribute_name);
|
||||
const column_definition *cd = s->get_column_definition(column_name);
|
||||
std::optional<std::string> member;
|
||||
@@ -747,14 +736,11 @@ static future<bool> scan_table(
|
||||
data_type column_type = cd->type;
|
||||
// Verify that the column has the right type: If "member" exists
|
||||
// the column must be a map, and if it doesn't, the column must
|
||||
// be decimal_type (Alternator), bigint, int or timestamp (CQL).
|
||||
// If the column has the wrong type nothing can get expired in
|
||||
// this table, and it's pointless to scan it.
|
||||
// (currently) be a decimal_type. If the column has the wrong type
|
||||
// nothing can get expired in this table, and it's pointless to
|
||||
// scan it.
|
||||
if ((member && column_type->get_kind() != abstract_type::kind::map) ||
|
||||
(!member && column_type->get_kind() != abstract_type::kind::decimal &&
|
||||
column_type->get_kind() != abstract_type::kind::long_kind &&
|
||||
column_type->get_kind() != abstract_type::kind::int32 &&
|
||||
column_type->get_kind() != abstract_type::kind::timestamp)) {
|
||||
(!member && column_type->get_kind() != abstract_type::kind::decimal)) {
|
||||
tlogger.info("table {} TTL column has unsupported type, not scanning", s->cf_name());
|
||||
co_return false;
|
||||
}
|
||||
@@ -781,7 +767,7 @@ static future<bool> scan_table(
|
||||
// by tasking another node to take over scanning of the dead node's primary
|
||||
// ranges. What we do here is that this node will also check expiration
|
||||
// on its *secondary* ranges - but only those whose primary owner is down.
|
||||
auto tablet_secondary_replica = tablet_map.get_secondary_replica(*tablet, erm->get_topology()); // throws if no secondary replica
|
||||
auto tablet_secondary_replica = tablet_map.get_secondary_replica(*tablet); // throws if no secondary replica
|
||||
if (tablet_secondary_replica.host == my_host_id && tablet_secondary_replica.shard == this_shard_id()) {
|
||||
if (!gossiper.is_alive(tablet_primary_replica.host)) {
|
||||
co_await scan_tablet(*tablet, proxy, abort_source, page_sem, expiration_stats, scan_ctx, tablet_map);
|
||||
@@ -892,10 +878,12 @@ future<> expiration_service::run() {
|
||||
future<> expiration_service::start() {
|
||||
// Called by main() on each shard to start the expiration-service
|
||||
// thread. Just runs run() in the background and allows stop().
|
||||
if (!shutting_down()) {
|
||||
_end = run().handle_exception([] (std::exception_ptr ep) {
|
||||
tlogger.error("expiration_service failed: {}", ep);
|
||||
});
|
||||
if (_db.features().alternator_ttl) {
|
||||
if (!shutting_down()) {
|
||||
_end = run().handle_exception([] (std::exception_ptr ep) {
|
||||
tlogger.error("expiration_service failed: {}", ep);
|
||||
});
|
||||
}
|
||||
}
|
||||
return make_ready_future<>();
|
||||
}
|
||||
|
||||
@@ -30,7 +30,7 @@ namespace alternator {
|
||||
|
||||
// expiration_service is a sharded service responsible for cleaning up expired
|
||||
// items in all tables with per-item expiration enabled. Currently, this means
|
||||
// Alternator tables with TTL configured via an UpdateTimeToLive request.
|
||||
// Alternator tables with TTL configured via a UpdateTimeToLeave request.
|
||||
class expiration_service final : public seastar::peering_sharded_service<expiration_service> {
|
||||
public:
|
||||
// Object holding per-shard statistics related to the expiration service.
|
||||
@@ -52,7 +52,7 @@ private:
|
||||
data_dictionary::database _db;
|
||||
service::storage_proxy& _proxy;
|
||||
gms::gossiper& _gossiper;
|
||||
// _end is set by start(), and resolves when the background service
|
||||
// _end is set by start(), and resolves when the the background service
|
||||
// started by it ends. To ask the background service to end, _abort_source
|
||||
// should be triggered. stop() below uses both _abort_source and _end.
|
||||
std::optional<future<>> _end;
|
||||
|
||||
@@ -1,26 +0,0 @@
|
||||
/*
|
||||
* Copyright 2026-present ScyllaDB
|
||||
*/
|
||||
|
||||
/*
|
||||
* SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "seastarx.hh"
|
||||
#include <seastar/core/sstring.hh>
|
||||
|
||||
namespace alternator {
|
||||
// We use the table tag TTL_TAG_KEY ("system:ttl_attribute") to remember
|
||||
// which attribute was chosen as the expiration-time attribute for
|
||||
// Alternator's TTL and CQL's per-row TTL features.
|
||||
// Currently, the *value* of this tag is simply the name of the attribute:
|
||||
// It can refer to a real column or if that doesn't exist, to a member of
|
||||
// the ":attrs" map column (which Alternator uses).
|
||||
extern const sstring TTL_TAG_KEY;
|
||||
} // namespace alternator
|
||||
|
||||
// let users use TTL_TAG_KEY without the "alternator::" prefix,
|
||||
// to make it easier to move it to a different namespace later.
|
||||
using alternator::TTL_TAG_KEY;
|
||||
@@ -12,7 +12,7 @@
|
||||
"operations":[
|
||||
{
|
||||
"method":"POST",
|
||||
"summary":"Resets authorized prepared statements cache",
|
||||
"summary":"Reset cache",
|
||||
"type":"void",
|
||||
"nickname":"authorization_cache_reset",
|
||||
"produces":[
|
||||
|
||||
@@ -243,7 +243,7 @@
|
||||
"GOSSIP_DIGEST_SYN",
|
||||
"GOSSIP_DIGEST_ACK2",
|
||||
"GOSSIP_SHUTDOWN",
|
||||
"UNUSED__DEFINITIONS_UPDATE",
|
||||
"DEFINITIONS_UPDATE",
|
||||
"TRUNCATE",
|
||||
"UNUSED__REPLICATION_FINISHED",
|
||||
"MIGRATION_REQUEST",
|
||||
|
||||
@@ -1295,45 +1295,6 @@
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"path":"/storage_service/logstor_compaction",
|
||||
"operations":[
|
||||
{
|
||||
"method":"POST",
|
||||
"summary":"Trigger compaction of the key-value storage",
|
||||
"type":"void",
|
||||
"nickname":"logstor_compaction",
|
||||
"produces":[
|
||||
"application/json"
|
||||
],
|
||||
"parameters":[
|
||||
{
|
||||
"name":"major",
|
||||
"description":"When true, perform a major compaction",
|
||||
"required":false,
|
||||
"allowMultiple":false,
|
||||
"type":"boolean",
|
||||
"paramType":"query"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"path":"/storage_service/logstor_flush",
|
||||
"operations":[
|
||||
{
|
||||
"method":"POST",
|
||||
"summary":"Trigger flush of logstor storage",
|
||||
"type":"void",
|
||||
"nickname":"logstor_flush",
|
||||
"produces":[
|
||||
"application/json"
|
||||
],
|
||||
"parameters":[]
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"path":"/storage_service/active_repair/",
|
||||
"operations":[
|
||||
@@ -3124,48 +3085,6 @@
|
||||
}
|
||||
]
|
||||
},
|
||||
|
||||
{
|
||||
"path":"/storage_service/tablets/snapshots",
|
||||
"operations":[
|
||||
{
|
||||
"method":"POST",
|
||||
"summary":"Takes the snapshot for the given keyspaces/tables. A snapshot name must be specified.",
|
||||
"type":"void",
|
||||
"nickname":"take_cluster_snapshot",
|
||||
"produces":[
|
||||
"application/json"
|
||||
],
|
||||
"parameters":[
|
||||
{
|
||||
"name":"tag",
|
||||
"description":"the tag given to the snapshot",
|
||||
"required":true,
|
||||
"allowMultiple":false,
|
||||
"type":"string",
|
||||
"paramType":"query"
|
||||
},
|
||||
{
|
||||
"name":"keyspace",
|
||||
"description":"Keyspace(s) to snapshot. Multiple keyspaces can be provided using a comma-separated list. If omitted, snapshot all keyspaces.",
|
||||
"required":false,
|
||||
"allowMultiple":false,
|
||||
"type":"string",
|
||||
"paramType":"query"
|
||||
},
|
||||
{
|
||||
"name":"table",
|
||||
"description":"Table(s) to snapshot. Multiple tables (in a single keyspace) can be provided using a comma-separated list. If omitted, snapshot all tables in the given keyspace(s).",
|
||||
"required":false,
|
||||
"allowMultiple":false,
|
||||
"type":"string",
|
||||
"paramType":"query"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
|
||||
{
|
||||
"path":"/storage_service/quiesce_topology",
|
||||
"operations":[
|
||||
@@ -3268,38 +3187,6 @@
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"path":"/storage_service/logstor_info",
|
||||
"operations":[
|
||||
{
|
||||
"method":"GET",
|
||||
"summary":"Logstor segment information for one table",
|
||||
"type":"table_logstor_info",
|
||||
"nickname":"logstor_info",
|
||||
"produces":[
|
||||
"application/json"
|
||||
],
|
||||
"parameters":[
|
||||
{
|
||||
"name":"keyspace",
|
||||
"description":"The keyspace",
|
||||
"required":true,
|
||||
"allowMultiple":false,
|
||||
"type":"string",
|
||||
"paramType":"query"
|
||||
},
|
||||
{
|
||||
"name":"table",
|
||||
"description":"table name",
|
||||
"required":true,
|
||||
"allowMultiple":false,
|
||||
"type":"string",
|
||||
"paramType":"query"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"path":"/storage_service/retrain_dict",
|
||||
"operations":[
|
||||
@@ -3708,47 +3595,6 @@
|
||||
}
|
||||
}
|
||||
},
|
||||
"logstor_hist_bucket":{
|
||||
"id":"logstor_hist_bucket",
|
||||
"properties":{
|
||||
"bucket":{
|
||||
"type":"long"
|
||||
},
|
||||
"count":{
|
||||
"type":"long"
|
||||
},
|
||||
"min_data_size":{
|
||||
"type":"long"
|
||||
},
|
||||
"max_data_size":{
|
||||
"type":"long"
|
||||
}
|
||||
}
|
||||
},
|
||||
"table_logstor_info":{
|
||||
"id":"table_logstor_info",
|
||||
"description":"Per-table logstor segment distribution",
|
||||
"properties":{
|
||||
"keyspace":{
|
||||
"type":"string"
|
||||
},
|
||||
"table":{
|
||||
"type":"string"
|
||||
},
|
||||
"compaction_groups":{
|
||||
"type":"long"
|
||||
},
|
||||
"segments":{
|
||||
"type":"long"
|
||||
},
|
||||
"data_size_histogram":{
|
||||
"type":"array",
|
||||
"items":{
|
||||
"$ref":"logstor_hist_bucket"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"tablet_repair_result":{
|
||||
"id":"tablet_repair_result",
|
||||
"description":"Tablet repair result",
|
||||
|
||||
@@ -209,21 +209,6 @@
|
||||
"parameters":[]
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"path":"/system/chosen_sstable_version",
|
||||
"operations":[
|
||||
{
|
||||
"method":"GET",
|
||||
"summary":"Get sstable version currently chosen for use in new sstables",
|
||||
"type":"string",
|
||||
"nickname":"get_chosen_sstable_version",
|
||||
"produces":[
|
||||
"application/json"
|
||||
],
|
||||
"parameters":[]
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
@@ -122,9 +122,9 @@ future<> unset_thrift_controller(http_context& ctx) {
|
||||
return ctx.http_server.set_routes([&ctx] (routes& r) { unset_thrift_controller(ctx, r); });
|
||||
}
|
||||
|
||||
future<> set_server_storage_service(http_context& ctx, sharded<service::storage_service>& ss, sharded<db::snapshot_ctl>& ssc, service::raft_group0_client& group0_client) {
|
||||
return ctx.http_server.set_routes([&ctx, &ss, &ssc, &group0_client] (routes& r) {
|
||||
set_storage_service(ctx, r, ss, ssc, group0_client);
|
||||
future<> set_server_storage_service(http_context& ctx, sharded<service::storage_service>& ss, service::raft_group0_client& group0_client) {
|
||||
return ctx.http_server.set_routes([&ctx, &ss, &group0_client] (routes& r) {
|
||||
set_storage_service(ctx, r, ss, group0_client);
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
25
api/api.hh
25
api/api.hh
@@ -23,6 +23,31 @@
|
||||
|
||||
namespace api {
|
||||
|
||||
template<class T>
|
||||
std::vector<T> map_to_key_value(const std::map<sstring, sstring>& map) {
|
||||
std::vector<T> res;
|
||||
res.reserve(map.size());
|
||||
|
||||
for (const auto& [key, value] : map) {
|
||||
res.push_back(T());
|
||||
res.back().key = key;
|
||||
res.back().value = value;
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
template<class T, class MAP>
|
||||
std::vector<T>& map_to_key_value(const MAP& map, std::vector<T>& res) {
|
||||
res.reserve(res.size() + std::size(map));
|
||||
|
||||
for (const auto& [key, value] : map) {
|
||||
T val;
|
||||
val.key = fmt::to_string(key);
|
||||
val.value = fmt::to_string(value);
|
||||
res.push_back(val);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
template <typename T, typename S = T>
|
||||
T map_sum(T&& dest, const S& src) {
|
||||
for (const auto& i : src) {
|
||||
|
||||
@@ -98,7 +98,7 @@ future<> set_server_config(http_context& ctx, db::config& cfg);
|
||||
future<> unset_server_config(http_context& ctx);
|
||||
future<> set_server_snitch(http_context& ctx, sharded<locator::snitch_ptr>& snitch);
|
||||
future<> unset_server_snitch(http_context& ctx);
|
||||
future<> set_server_storage_service(http_context& ctx, sharded<service::storage_service>& ss, sharded<db::snapshot_ctl>&, service::raft_group0_client&);
|
||||
future<> set_server_storage_service(http_context& ctx, sharded<service::storage_service>& ss, service::raft_group0_client&);
|
||||
future<> unset_server_storage_service(http_context& ctx);
|
||||
future<> set_server_client_routes(http_context& ctx, sharded<service::client_routes_service>& cr);
|
||||
future<> unset_server_client_routes(http_context& ctx);
|
||||
|
||||
@@ -18,9 +18,7 @@
|
||||
#include "utils/assert.hh"
|
||||
#include "utils/estimated_histogram.hh"
|
||||
#include <algorithm>
|
||||
#include <sstream>
|
||||
#include "db/data_listeners.hh"
|
||||
#include "utils/hash.hh"
|
||||
#include "storage_service.hh"
|
||||
#include "compaction/compaction_manager.hh"
|
||||
#include "unimplemented.hh"
|
||||
@@ -344,56 +342,6 @@ uint64_t accumulate_on_active_memtables(replica::table& t, noncopyable_function<
|
||||
return ret;
|
||||
}
|
||||
|
||||
static
|
||||
future<json::json_return_type>
|
||||
rest_toppartitions_generic(sharded<replica::database>& db, std::unique_ptr<http::request> req) {
|
||||
bool filters_provided = false;
|
||||
|
||||
std::unordered_set<std::tuple<sstring, sstring>, utils::tuple_hash> table_filters {};
|
||||
if (auto filters = req->get_query_param("table_filters"); !filters.empty()) {
|
||||
filters_provided = true;
|
||||
std::stringstream ss { filters };
|
||||
std::string filter;
|
||||
while (!filters.empty() && ss.good()) {
|
||||
std::getline(ss, filter, ',');
|
||||
table_filters.emplace(parse_fully_qualified_cf_name(filter));
|
||||
}
|
||||
}
|
||||
|
||||
std::unordered_set<sstring> keyspace_filters {};
|
||||
if (auto filters = req->get_query_param("keyspace_filters"); !filters.empty()) {
|
||||
filters_provided = true;
|
||||
std::stringstream ss { filters };
|
||||
std::string filter;
|
||||
while (!filters.empty() && ss.good()) {
|
||||
std::getline(ss, filter, ',');
|
||||
keyspace_filters.emplace(std::move(filter));
|
||||
}
|
||||
}
|
||||
|
||||
// when the query is empty return immediately
|
||||
if (filters_provided && table_filters.empty() && keyspace_filters.empty()) {
|
||||
apilog.debug("toppartitions query: processing results");
|
||||
cf::toppartitions_query_results results;
|
||||
|
||||
results.read_cardinality = 0;
|
||||
results.write_cardinality = 0;
|
||||
|
||||
return make_ready_future<json::json_return_type>(results);
|
||||
}
|
||||
|
||||
api::req_param<std::chrono::milliseconds, unsigned> duration{*req, "duration", 1000ms};
|
||||
api::req_param<unsigned> capacity(*req, "capacity", 256);
|
||||
api::req_param<unsigned> list_size(*req, "list_size", 10);
|
||||
|
||||
apilog.info("toppartitions query: #table_filters={} #keyspace_filters={} duration={} list_size={} capacity={}",
|
||||
!table_filters.empty() ? std::to_string(table_filters.size()) : "all", !keyspace_filters.empty() ? std::to_string(keyspace_filters.size()) : "all", duration.value, list_size.value, capacity.value);
|
||||
|
||||
return seastar::do_with(db::toppartitions_query(db, std::move(table_filters), std::move(keyspace_filters), duration.value, list_size, capacity), [] (db::toppartitions_query& q) {
|
||||
return run_toppartitions_query(q);
|
||||
});
|
||||
}
|
||||
|
||||
void set_column_family(http_context& ctx, routes& r, sharded<replica::database>& db) {
|
||||
cf::get_column_family_name.set(r, [&db] (const_req req){
|
||||
std::vector<sstring> res;
|
||||
@@ -1099,10 +1047,6 @@ void set_column_family(http_context& ctx, routes& r, sharded<replica::database>&
|
||||
});
|
||||
});
|
||||
|
||||
ss::toppartitions_generic.set(r, [&db] (std::unique_ptr<http::request> req) {
|
||||
return rest_toppartitions_generic(db, std::move(req));
|
||||
});
|
||||
|
||||
cf::force_major_compaction.set(r, [&ctx, &db](std::unique_ptr<http::request> req) -> future<json::json_return_type> {
|
||||
if (!req->get_query_param("split_output").empty()) {
|
||||
fail(unimplemented::cause::API);
|
||||
@@ -1269,7 +1213,6 @@ void unset_column_family(http_context& ctx, routes& r) {
|
||||
cf::get_sstable_count_per_level.unset(r);
|
||||
cf::get_sstables_for_key.unset(r);
|
||||
cf::toppartitions.unset(r);
|
||||
ss::toppartitions_generic.unset(r);
|
||||
cf::force_major_compaction.unset(r);
|
||||
ss::get_load.unset(r);
|
||||
ss::get_metrics_load.unset(r);
|
||||
|
||||
@@ -17,7 +17,9 @@
|
||||
#include "gms/feature_service.hh"
|
||||
#include "schema/schema_builder.hh"
|
||||
#include "sstables/sstables_manager.hh"
|
||||
#include "utils/hash.hh"
|
||||
#include <optional>
|
||||
#include <sstream>
|
||||
#include <stdexcept>
|
||||
#include <time.h>
|
||||
#include <algorithm>
|
||||
@@ -513,15 +515,6 @@ void set_sstables_loader(http_context& ctx, routes& r, sharded<sstables_loader>&
|
||||
auto sstables = parsed.GetArray() |
|
||||
std::views::transform([] (const auto& s) { return sstring(rjson::to_string_view(s)); }) |
|
||||
std::ranges::to<std::vector>();
|
||||
apilog.info("Restore invoked with following parameters: keyspace={}, table={}, endpoint={}, bucket={}, prefix={}, sstables_count={}, scope={}, primary_replica_only={}",
|
||||
keyspace,
|
||||
table,
|
||||
endpoint,
|
||||
bucket,
|
||||
prefix,
|
||||
sstables.size(),
|
||||
scope,
|
||||
primary_replica_only);
|
||||
auto task_id = co_await sst_loader.local().download_new_sstables(keyspace, table, prefix, std::move(sstables), endpoint, bucket, scope, primary_replica_only);
|
||||
co_return json::json_return_type(fmt::to_string(task_id));
|
||||
});
|
||||
@@ -534,15 +527,13 @@ void unset_sstables_loader(http_context& ctx, routes& r) {
|
||||
}
|
||||
|
||||
void set_view_builder(http_context& ctx, routes& r, sharded<db::view::view_builder>& vb, sharded<gms::gossiper>& g) {
|
||||
ss::view_build_statuses.set(r, [&ctx, &vb, &g] (std::unique_ptr<http::request> req) -> future<json::json_return_type> {
|
||||
ss::view_build_statuses.set(r, [&ctx, &vb, &g] (std::unique_ptr<http::request> req) {
|
||||
auto keyspace = validate_keyspace(ctx, req);
|
||||
auto view = req->get_path_param("view");
|
||||
co_return json::json_return_type(stream_range_as_array(co_await vb.local().view_build_statuses(std::move(keyspace), std::move(view), g.local()), [] (const auto& i) {
|
||||
storage_service_json::mapper res;
|
||||
res.key = i.first;
|
||||
res.value = i.second;
|
||||
return res;
|
||||
}));
|
||||
return vb.local().view_build_statuses(std::move(keyspace), std::move(view), g.local()).then([] (std::unordered_map<sstring, sstring> status) {
|
||||
std::vector<storage_service_json::mapper> res;
|
||||
return make_ready_future<json::json_return_type>(map_to_key_value(std::move(status), res));
|
||||
});
|
||||
});
|
||||
|
||||
cf::get_built_indexes.set(r, [&vb](std::unique_ptr<http::request> req) -> future<json::json_return_type> {
|
||||
@@ -580,16 +571,6 @@ static future<json::json_return_type> describe_ring_as_json_for_table(const shar
|
||||
co_return json::json_return_type(stream_range_as_array(co_await ss.local().describe_ring_for_table(keyspace, table), token_range_endpoints_to_json));
|
||||
}
|
||||
|
||||
namespace {
|
||||
template <typename Key, typename Value>
|
||||
storage_service_json::mapper map_to_json(const std::pair<Key, Value>& i) {
|
||||
storage_service_json::mapper val;
|
||||
val.key = fmt::to_string(i.first);
|
||||
val.value = fmt::to_string(i.second);
|
||||
return val;
|
||||
}
|
||||
}
|
||||
|
||||
static
|
||||
future<json::json_return_type>
|
||||
rest_get_token_endpoint(http_context& ctx, sharded<service::storage_service>& ss, std::unique_ptr<http::request> req) {
|
||||
@@ -607,7 +588,62 @@ rest_get_token_endpoint(http_context& ctx, sharded<service::storage_service>& ss
|
||||
throw bad_param_exception("Either provide both keyspace and table (for tablet table) or neither (for vnodes)");
|
||||
}
|
||||
|
||||
co_return json::json_return_type(stream_range_as_array(token_endpoints, &map_to_json<dht::token, gms::inet_address>));
|
||||
co_return json::json_return_type(stream_range_as_array(token_endpoints, [](const auto& i) {
|
||||
storage_service_json::mapper val;
|
||||
val.key = fmt::to_string(i.first);
|
||||
val.value = fmt::to_string(i.second);
|
||||
return val;
|
||||
}));
|
||||
}
|
||||
|
||||
static
|
||||
future<json::json_return_type>
|
||||
rest_toppartitions_generic(http_context& ctx, std::unique_ptr<http::request> req) {
|
||||
bool filters_provided = false;
|
||||
|
||||
std::unordered_set<std::tuple<sstring, sstring>, utils::tuple_hash> table_filters {};
|
||||
if (auto filters = req->get_query_param("table_filters"); !filters.empty()) {
|
||||
filters_provided = true;
|
||||
std::stringstream ss { filters };
|
||||
std::string filter;
|
||||
while (!filters.empty() && ss.good()) {
|
||||
std::getline(ss, filter, ',');
|
||||
table_filters.emplace(parse_fully_qualified_cf_name(filter));
|
||||
}
|
||||
}
|
||||
|
||||
std::unordered_set<sstring> keyspace_filters {};
|
||||
if (auto filters = req->get_query_param("keyspace_filters"); !filters.empty()) {
|
||||
filters_provided = true;
|
||||
std::stringstream ss { filters };
|
||||
std::string filter;
|
||||
while (!filters.empty() && ss.good()) {
|
||||
std::getline(ss, filter, ',');
|
||||
keyspace_filters.emplace(std::move(filter));
|
||||
}
|
||||
}
|
||||
|
||||
// when the query is empty return immediately
|
||||
if (filters_provided && table_filters.empty() && keyspace_filters.empty()) {
|
||||
apilog.debug("toppartitions query: processing results");
|
||||
httpd::column_family_json::toppartitions_query_results results;
|
||||
|
||||
results.read_cardinality = 0;
|
||||
results.write_cardinality = 0;
|
||||
|
||||
return make_ready_future<json::json_return_type>(results);
|
||||
}
|
||||
|
||||
api::req_param<std::chrono::milliseconds, unsigned> duration{*req, "duration", 1000ms};
|
||||
api::req_param<unsigned> capacity(*req, "capacity", 256);
|
||||
api::req_param<unsigned> list_size(*req, "list_size", 10);
|
||||
|
||||
apilog.info("toppartitions query: #table_filters={} #keyspace_filters={} duration={} list_size={} capacity={}",
|
||||
!table_filters.empty() ? std::to_string(table_filters.size()) : "all", !keyspace_filters.empty() ? std::to_string(keyspace_filters.size()) : "all", duration.value, list_size.value, capacity.value);
|
||||
|
||||
return seastar::do_with(db::toppartitions_query(ctx.db, std::move(table_filters), std::move(keyspace_filters), duration.value, list_size, capacity), [] (db::toppartitions_query& q) {
|
||||
return run_toppartitions_query(q);
|
||||
});
|
||||
}
|
||||
|
||||
static
|
||||
@@ -641,6 +677,7 @@ rest_get_range_to_endpoint_map(http_context& ctx, sharded<service::storage_servi
|
||||
table_id = validate_table(ctx.db.local(), keyspace, table);
|
||||
}
|
||||
|
||||
std::vector<ss::maplist_mapper> res;
|
||||
co_return stream_range_as_array(co_await ss.local().get_range_to_address_map(keyspace, table_id),
|
||||
[](const std::pair<dht::token_range, inet_address_vector_replica_set>& entry){
|
||||
ss::maplist_mapper m;
|
||||
@@ -731,13 +768,17 @@ rest_cleanup_all(http_context& ctx, sharded<service::storage_service>& ss, std::
|
||||
|
||||
apilog.info("cleanup_all global={}", global);
|
||||
|
||||
if (global) {
|
||||
co_await ss.invoke_on(0, [] (service::storage_service& ss) -> future<> {
|
||||
co_return co_await ss.do_clusterwide_vnodes_cleanup();
|
||||
});
|
||||
auto done = !global ? false : co_await ss.invoke_on(0, [] (service::storage_service& ss) -> future<bool> {
|
||||
if (!ss.is_topology_coordinator_enabled()) {
|
||||
co_return false;
|
||||
}
|
||||
co_await ss.do_clusterwide_vnodes_cleanup();
|
||||
co_return true;
|
||||
});
|
||||
if (done) {
|
||||
co_return json::json_return_type(0);
|
||||
}
|
||||
// fall back to the local cleanup if local cleanup is requested
|
||||
// fall back to the local cleanup if topology coordinator is not enabled or local cleanup is requested
|
||||
auto& db = ctx.db;
|
||||
auto& compaction_module = db.local().get_compaction_manager().get_task_manager_module();
|
||||
auto task = co_await compaction_module.make_and_start_task<compaction::global_cleanup_compaction_task_impl>({}, db);
|
||||
@@ -745,7 +786,9 @@ rest_cleanup_all(http_context& ctx, sharded<service::storage_service>& ss, std::
|
||||
|
||||
// Mark this node as clean
|
||||
co_await ss.invoke_on(0, [] (service::storage_service& ss) -> future<> {
|
||||
co_await ss.reset_cleanup_needed();
|
||||
if (ss.is_topology_coordinator_enabled()) {
|
||||
co_await ss.reset_cleanup_needed();
|
||||
}
|
||||
});
|
||||
|
||||
co_return json::json_return_type(0);
|
||||
@@ -756,6 +799,9 @@ future<json::json_return_type>
|
||||
rest_reset_cleanup_needed(http_context& ctx, sharded<service::storage_service>& ss, std::unique_ptr<http::request> req) {
|
||||
apilog.info("reset_cleanup_needed");
|
||||
co_await ss.invoke_on(0, [] (service::storage_service& ss) {
|
||||
if (!ss.is_topology_coordinator_enabled()) {
|
||||
throw std::runtime_error("mark_node_as_clean is only supported when topology over raft is enabled");
|
||||
}
|
||||
return ss.reset_cleanup_needed();
|
||||
});
|
||||
co_return json_void();
|
||||
@@ -783,31 +829,9 @@ rest_force_keyspace_flush(http_context& ctx, std::unique_ptr<http::request> req)
|
||||
|
||||
static
|
||||
future<json::json_return_type>
|
||||
rest_logstor_compaction(http_context& ctx, std::unique_ptr<http::request> req) {
|
||||
bool major = false;
|
||||
if (auto major_param = req->get_query_param("major"); !major_param.empty()) {
|
||||
major = validate_bool(major_param);
|
||||
}
|
||||
apilog.info("logstor_compaction: major={}", major);
|
||||
auto& db = ctx.db;
|
||||
co_await replica::database::trigger_logstor_compaction_on_all_shards(db, major);
|
||||
co_return json_void();
|
||||
}
|
||||
|
||||
static
|
||||
future<json::json_return_type>
|
||||
rest_logstor_flush(http_context& ctx, std::unique_ptr<http::request> req) {
|
||||
apilog.info("logstor_flush");
|
||||
auto& db = ctx.db;
|
||||
co_await replica::database::flush_logstor_separator_on_all_shards(db);
|
||||
co_return json_void();
|
||||
}
|
||||
|
||||
static
|
||||
future<json::json_return_type>
|
||||
rest_decommission(sharded<service::storage_service>& ss, sharded<db::snapshot_ctl>& ssc, std::unique_ptr<http::request> req) {
|
||||
rest_decommission(sharded<service::storage_service>& ss, std::unique_ptr<http::request> req) {
|
||||
apilog.info("decommission");
|
||||
return ss.local().decommission(ssc).then([] {
|
||||
return ss.local().decommission().then([] {
|
||||
return make_ready_future<json::json_return_type>(json_void());
|
||||
});
|
||||
}
|
||||
@@ -1284,7 +1308,10 @@ rest_get_ownership(http_context& ctx, sharded<service::storage_service>& ss, std
|
||||
throw httpd::bad_param_exception("storage_service/ownership cannot be used when a keyspace uses tablets");
|
||||
}
|
||||
|
||||
co_return json::json_return_type(stream_range_as_array(co_await ss.local().get_ownership(), &map_to_json<gms::inet_address, float>));
|
||||
return ss.local().get_ownership().then([] (auto&& ownership) {
|
||||
std::vector<storage_service_json::mapper> res;
|
||||
return make_ready_future<json::json_return_type>(map_to_key_value(ownership, res));
|
||||
});
|
||||
}
|
||||
|
||||
static
|
||||
@@ -1301,7 +1328,10 @@ rest_get_effective_ownership(http_context& ctx, sharded<service::storage_service
|
||||
}
|
||||
}
|
||||
|
||||
co_return json::json_return_type(stream_range_as_array(co_await ss.local().effective_ownership(keyspace_name, table_name), &map_to_json<gms::inet_address, float>));
|
||||
return ss.local().effective_ownership(keyspace_name, table_name).then([] (auto&& ownership) {
|
||||
std::vector<storage_service_json::mapper> res;
|
||||
return make_ready_future<json::json_return_type>(map_to_key_value(ownership, res));
|
||||
});
|
||||
}
|
||||
|
||||
static
|
||||
@@ -1311,7 +1341,7 @@ rest_estimate_compression_ratios(http_context& ctx, sharded<service::storage_ser
|
||||
apilog.warn("estimate_compression_ratios: called before the cluster feature was enabled");
|
||||
throw std::runtime_error("estimate_compression_ratios requires all nodes to support the SSTABLE_COMPRESSION_DICTS cluster feature");
|
||||
}
|
||||
auto ticket = co_await get_units(ss.local().get_do_sample_sstables_concurrency_limiter(), 1);
|
||||
auto ticket = get_units(ss.local().get_do_sample_sstables_concurrency_limiter(), 1);
|
||||
auto ks = api::req_param<sstring>(*req, "keyspace", {}).value;
|
||||
auto cf = api::req_param<sstring>(*req, "cf", {}).value;
|
||||
apilog.debug("estimate_compression_ratios: called with ks={} cf={}", ks, cf);
|
||||
@@ -1377,7 +1407,7 @@ rest_retrain_dict(http_context& ctx, sharded<service::storage_service>& ss, serv
|
||||
apilog.warn("retrain_dict: called before the cluster feature was enabled");
|
||||
throw std::runtime_error("retrain_dict requires all nodes to support the SSTABLE_COMPRESSION_DICTS cluster feature");
|
||||
}
|
||||
auto ticket = co_await get_units(ss.local().get_do_sample_sstables_concurrency_limiter(), 1);
|
||||
auto ticket = get_units(ss.local().get_do_sample_sstables_concurrency_limiter(), 1);
|
||||
auto ks = api::req_param<sstring>(*req, "keyspace", {}).value;
|
||||
auto cf = api::req_param<sstring>(*req, "cf", {}).value;
|
||||
apilog.debug("retrain_dict: called with ks={} cf={}", ks, cf);
|
||||
@@ -1523,54 +1553,6 @@ rest_sstable_info(http_context& ctx, std::unique_ptr<http::request> req) {
|
||||
});
|
||||
}
|
||||
|
||||
static
|
||||
future<json::json_return_type>
|
||||
rest_logstor_info(http_context& ctx, std::unique_ptr<http::request> req) {
|
||||
auto keyspace = api::req_param<sstring>(*req, "keyspace", {}).value;
|
||||
auto table = api::req_param<sstring>(*req, "table", {}).value;
|
||||
if (table.empty()) {
|
||||
table = api::req_param<sstring>(*req, "cf", {}).value;
|
||||
}
|
||||
|
||||
if (keyspace.empty()) {
|
||||
throw bad_param_exception("The query parameter 'keyspace' is required");
|
||||
}
|
||||
if (table.empty()) {
|
||||
throw bad_param_exception("The query parameter 'table' is required");
|
||||
}
|
||||
|
||||
keyspace = validate_keyspace(ctx, keyspace);
|
||||
auto tid = validate_table(ctx.db.local(), keyspace, table);
|
||||
|
||||
auto& cf = ctx.db.local().find_column_family(tid);
|
||||
if (!cf.uses_logstor()) {
|
||||
throw bad_param_exception(fmt::format("Table {}.{} does not use logstor", keyspace, table));
|
||||
}
|
||||
|
||||
return do_with(replica::logstor::table_segment_stats{}, [keyspace = std::move(keyspace), table = std::move(table), tid, &ctx] (replica::logstor::table_segment_stats& merged_stats) {
|
||||
return ctx.db.map_reduce([&merged_stats](replica::logstor::table_segment_stats&& shard_stats) {
|
||||
merged_stats += shard_stats;
|
||||
}, [tid](const replica::database& db) {
|
||||
return db.get_logstor_table_segment_stats(tid);
|
||||
}).then([&merged_stats, keyspace = std::move(keyspace), table = std::move(table)] {
|
||||
ss::table_logstor_info result;
|
||||
result.keyspace = keyspace;
|
||||
result.table = table;
|
||||
result.compaction_groups = merged_stats.compaction_group_count;
|
||||
result.segments = merged_stats.segment_count;
|
||||
|
||||
for (const auto& bucket : merged_stats.histogram) {
|
||||
ss::logstor_hist_bucket hist;
|
||||
hist.count = bucket.count;
|
||||
hist.max_data_size = bucket.max_data_size;
|
||||
result.data_size_histogram.push(std::move(hist));
|
||||
}
|
||||
|
||||
return make_ready_future<json::json_return_type>(stream_object(result));
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
static
|
||||
future<json::json_return_type>
|
||||
rest_reload_raft_topology_state(sharded<service::storage_service>& ss, service::raft_group0_client& group0_client, std::unique_ptr<http::request> req) {
|
||||
@@ -1583,14 +1565,26 @@ rest_reload_raft_topology_state(sharded<service::storage_service>& ss, service::
|
||||
static
|
||||
future<json::json_return_type>
|
||||
rest_upgrade_to_raft_topology(sharded<service::storage_service>& ss, std::unique_ptr<http::request> req) {
|
||||
apilog.info("Requested to schedule upgrade to raft topology, but this version does not need it since it uses raft topology by default.");
|
||||
apilog.info("Requested to schedule upgrade to raft topology");
|
||||
try {
|
||||
co_await ss.invoke_on(0, [] (auto& ss) {
|
||||
return ss.start_upgrade_to_raft_topology();
|
||||
});
|
||||
} catch (...) {
|
||||
auto ex = std::current_exception();
|
||||
apilog.error("Failed to schedule upgrade to raft topology: {}", ex);
|
||||
std::rethrow_exception(std::move(ex));
|
||||
}
|
||||
co_return json_void();
|
||||
}
|
||||
|
||||
static
|
||||
future<json::json_return_type>
|
||||
rest_raft_topology_upgrade_status(sharded<service::storage_service>& ss, std::unique_ptr<http::request> req) {
|
||||
co_return sstring("done");
|
||||
const auto ustate = co_await ss.invoke_on(0, [] (auto& ss) {
|
||||
return ss.get_topology_upgrade_state();
|
||||
});
|
||||
co_return sstring(format("{}", ustate));
|
||||
}
|
||||
|
||||
static
|
||||
@@ -1800,8 +1794,9 @@ rest_bind(FuncType func, BindArgs&... args) {
|
||||
return std::bind_front(func, std::ref(args)...);
|
||||
}
|
||||
|
||||
void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_service>& ss, sharded<db::snapshot_ctl>& ssc, service::raft_group0_client& group0_client) {
|
||||
void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_service>& ss, service::raft_group0_client& group0_client) {
|
||||
ss::get_token_endpoint.set(r, rest_bind(rest_get_token_endpoint, ctx, ss));
|
||||
ss::toppartitions_generic.set(r, rest_bind(rest_toppartitions_generic, ctx));
|
||||
ss::get_release_version.set(r, rest_bind(rest_get_release_version, ss));
|
||||
ss::get_scylla_release_version.set(r, rest_bind(rest_get_scylla_release_version, ss));
|
||||
ss::get_schema_version.set(r, rest_bind(rest_get_schema_version, ss));
|
||||
@@ -1816,9 +1811,7 @@ void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_
|
||||
ss::reset_cleanup_needed.set(r, rest_bind(rest_reset_cleanup_needed, ctx, ss));
|
||||
ss::force_flush.set(r, rest_bind(rest_force_flush, ctx));
|
||||
ss::force_keyspace_flush.set(r, rest_bind(rest_force_keyspace_flush, ctx));
|
||||
ss::decommission.set(r, rest_bind(rest_decommission, ss, ssc));
|
||||
ss::logstor_compaction.set(r, rest_bind(rest_logstor_compaction, ctx));
|
||||
ss::logstor_flush.set(r, rest_bind(rest_logstor_flush, ctx));
|
||||
ss::decommission.set(r, rest_bind(rest_decommission, ss));
|
||||
ss::move.set(r, rest_bind(rest_move, ss));
|
||||
ss::remove_node.set(r, rest_bind(rest_remove_node, ss));
|
||||
ss::exclude_node.set(r, rest_bind(rest_exclude_node, ss));
|
||||
@@ -1867,7 +1860,6 @@ void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_
|
||||
ss::retrain_dict.set(r, rest_bind(rest_retrain_dict, ctx, ss, group0_client));
|
||||
ss::estimate_compression_ratios.set(r, rest_bind(rest_estimate_compression_ratios, ctx, ss));
|
||||
ss::sstable_info.set(r, rest_bind(rest_sstable_info, ctx));
|
||||
ss::logstor_info.set(r, rest_bind(rest_logstor_info, ctx));
|
||||
ss::reload_raft_topology_state.set(r, rest_bind(rest_reload_raft_topology_state, ss, group0_client));
|
||||
ss::upgrade_to_raft_topology.set(r, rest_bind(rest_upgrade_to_raft_topology, ss));
|
||||
ss::raft_topology_upgrade_status.set(r, rest_bind(rest_raft_topology_upgrade_status, ss));
|
||||
@@ -1884,6 +1876,7 @@ void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_
|
||||
|
||||
void unset_storage_service(http_context& ctx, routes& r) {
|
||||
ss::get_token_endpoint.unset(r);
|
||||
ss::toppartitions_generic.unset(r);
|
||||
ss::get_release_version.unset(r);
|
||||
ss::get_scylla_release_version.unset(r);
|
||||
ss::get_schema_version.unset(r);
|
||||
@@ -1897,8 +1890,6 @@ void unset_storage_service(http_context& ctx, routes& r) {
|
||||
ss::reset_cleanup_needed.unset(r);
|
||||
ss::force_flush.unset(r);
|
||||
ss::force_keyspace_flush.unset(r);
|
||||
ss::logstor_compaction.unset(r);
|
||||
ss::logstor_flush.unset(r);
|
||||
ss::decommission.unset(r);
|
||||
ss::move.unset(r);
|
||||
ss::remove_node.unset(r);
|
||||
@@ -1946,7 +1937,6 @@ void unset_storage_service(http_context& ctx, routes& r) {
|
||||
ss::get_ownership.unset(r);
|
||||
ss::get_effective_ownership.unset(r);
|
||||
ss::sstable_info.unset(r);
|
||||
ss::logstor_info.unset(r);
|
||||
ss::reload_raft_topology_state.unset(r);
|
||||
ss::upgrade_to_raft_topology.unset(r);
|
||||
ss::raft_topology_upgrade_status.unset(r);
|
||||
@@ -2026,8 +2016,6 @@ void set_snapshot(http_context& ctx, routes& r, sharded<db::snapshot_ctl>& snap_
|
||||
auto tag = req->get_query_param("tag");
|
||||
auto column_families = split(req->get_query_param("cf"), ",");
|
||||
auto sfopt = req->get_query_param("sf");
|
||||
auto tcopt = req->get_query_param("tc");
|
||||
|
||||
db::snapshot_options opts = {
|
||||
.skip_flush = strcasecmp(sfopt.c_str(), "true") == 0,
|
||||
};
|
||||
@@ -2052,27 +2040,6 @@ void set_snapshot(http_context& ctx, routes& r, sharded<db::snapshot_ctl>& snap_
|
||||
}
|
||||
});
|
||||
|
||||
ss::take_cluster_snapshot.set(r, [&snap_ctl](std::unique_ptr<http::request> req) -> future<json::json_return_type> {
|
||||
apilog.info("take_cluster_snapshot: {}", req->get_query_params());
|
||||
auto tag = req->get_query_param("tag");
|
||||
auto column_families = split(req->get_query_param("table"), ",");
|
||||
// Note: not published/active. Retain as internal option, but...
|
||||
auto sfopt = req->get_query_param("skip_flush");
|
||||
|
||||
db::snapshot_options opts = {
|
||||
.skip_flush = strcasecmp(sfopt.c_str(), "true") == 0,
|
||||
};
|
||||
|
||||
std::vector<sstring> keynames = split(req->get_query_param("keyspace"), ",");
|
||||
try {
|
||||
co_await snap_ctl.local().take_cluster_column_family_snapshot(keynames, column_families, tag, opts);
|
||||
co_return json_void();
|
||||
} catch (...) {
|
||||
apilog.error("take_cluster_snapshot failed: {}", std::current_exception());
|
||||
throw;
|
||||
}
|
||||
});
|
||||
|
||||
ss::del_snapshot.set(r, [&snap_ctl](std::unique_ptr<http::request> req) -> future<json::json_return_type> {
|
||||
apilog.info("del_snapshot: {}", req->get_query_params());
|
||||
auto tag = req->get_query_param("tag");
|
||||
@@ -2163,7 +2130,6 @@ void unset_snapshot(http_context& ctx, routes& r) {
|
||||
ss::start_backup.unset(r);
|
||||
cf::get_true_snapshots_size.unset(r);
|
||||
cf::get_all_true_snapshots_size.unset(r);
|
||||
ss::decommission.unset(r);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -66,7 +66,7 @@ struct scrub_info {
|
||||
|
||||
scrub_info parse_scrub_options(const http_context& ctx, std::unique_ptr<http::request> req);
|
||||
|
||||
void set_storage_service(http_context& ctx, httpd::routes& r, sharded<service::storage_service>& ss, sharded<db::snapshot_ctl>&, service::raft_group0_client&);
|
||||
void set_storage_service(http_context& ctx, httpd::routes& r, sharded<service::storage_service>& ss, service::raft_group0_client&);
|
||||
void unset_storage_service(http_context& ctx, httpd::routes& r);
|
||||
void set_sstables_loader(http_context& ctx, httpd::routes& r, sharded<sstables_loader>& sst_loader);
|
||||
void unset_sstables_loader(http_context& ctx, httpd::routes& r);
|
||||
|
||||
@@ -190,13 +190,6 @@ void set_system(http_context& ctx, routes& r) {
|
||||
return make_ready_future<json::json_return_type>(seastar::to_sstring(format));
|
||||
});
|
||||
});
|
||||
|
||||
hs::get_chosen_sstable_version.set(r, [&ctx] (std::unique_ptr<request> req) {
|
||||
return smp::submit_to(0, [&ctx] {
|
||||
auto format = ctx.db.local().get_user_sstables_manager().get_preferred_sstable_version();
|
||||
return make_ready_future<json::json_return_type>(seastar::to_sstring(format));
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -17,14 +17,15 @@ target_sources(scylla_auth
|
||||
password_authenticator.cc
|
||||
passwords.cc
|
||||
permission.cc
|
||||
permissions_cache.cc
|
||||
resource.cc
|
||||
role_or_anonymous.cc
|
||||
roles-metadata.cc
|
||||
sasl_challenge.cc
|
||||
saslauthd_authenticator.cc
|
||||
service.cc
|
||||
standard_role_manager.cc
|
||||
transitional.cc
|
||||
maintenance_socket_authenticator.cc
|
||||
maintenance_socket_role_manager.cc)
|
||||
target_include_directories(scylla_auth
|
||||
PUBLIC
|
||||
@@ -48,4 +49,4 @@ if (Scylla_USE_PRECOMPILED_HEADER_USE)
|
||||
target_precompile_headers(scylla_auth REUSE_FROM scylla-precompiled-header)
|
||||
endif()
|
||||
check_headers(check-headers scylla_auth
|
||||
GLOB_RECURSE ${CMAKE_CURRENT_SOURCE_DIR}/*.hh)
|
||||
GLOB_RECURSE ${CMAKE_CURRENT_SOURCE_DIR}/*.hh)
|
||||
@@ -9,9 +9,19 @@
|
||||
#include "auth/allow_all_authenticator.hh"
|
||||
|
||||
#include "service/migration_manager.hh"
|
||||
#include "utils/class_registrator.hh"
|
||||
|
||||
namespace auth {
|
||||
|
||||
constexpr std::string_view allow_all_authenticator_name("org.apache.cassandra.auth.AllowAllAuthenticator");
|
||||
|
||||
// To ensure correct initialization order, we unfortunately need to use a string literal.
|
||||
static const class_registrator<
|
||||
authenticator,
|
||||
allow_all_authenticator,
|
||||
cql3::query_processor&,
|
||||
::service::raft_group0_client&,
|
||||
::service::migration_manager&,
|
||||
cache&> registration("org.apache.cassandra.auth.AllowAllAuthenticator");
|
||||
|
||||
}
|
||||
|
||||
@@ -9,9 +9,18 @@
|
||||
#include "auth/allow_all_authorizer.hh"
|
||||
|
||||
#include "auth/common.hh"
|
||||
#include "utils/class_registrator.hh"
|
||||
|
||||
namespace auth {
|
||||
|
||||
constexpr std::string_view allow_all_authorizer_name("org.apache.cassandra.auth.AllowAllAuthorizer");
|
||||
|
||||
// To ensure correct initialization order, we unfortunately need to use a string literal.
|
||||
static const class_registrator<
|
||||
authorizer,
|
||||
allow_all_authorizer,
|
||||
cql3::query_processor&,
|
||||
::service::raft_group0_client&,
|
||||
::service::migration_manager&> registration("org.apache.cassandra.auth.AllowAllAuthorizer");
|
||||
|
||||
}
|
||||
|
||||
@@ -26,7 +26,7 @@ extern const std::string_view allow_all_authorizer_name;
|
||||
|
||||
class allow_all_authorizer final : public authorizer {
|
||||
public:
|
||||
allow_all_authorizer(cql3::query_processor&) {
|
||||
allow_all_authorizer(cql3::query_processor&, ::service::raft_group0_client&, ::service::migration_manager&) {
|
||||
}
|
||||
|
||||
virtual future<> start() override {
|
||||
|
||||
199
auth/cache.cc
199
auth/cache.cc
@@ -8,7 +8,6 @@
|
||||
|
||||
#include "auth/cache.hh"
|
||||
#include "auth/common.hh"
|
||||
#include "auth/role_or_anonymous.hh"
|
||||
#include "auth/roles-metadata.hh"
|
||||
#include "cql3/query_processor.hh"
|
||||
#include "cql3/untyped_result_set.hh"
|
||||
@@ -19,8 +18,6 @@
|
||||
#include <seastar/core/abort_source.hh>
|
||||
#include <seastar/coroutine/maybe_yield.hh>
|
||||
#include <seastar/core/format.hh>
|
||||
#include <seastar/core/metrics.hh>
|
||||
#include <seastar/core/do_with.hh>
|
||||
|
||||
namespace auth {
|
||||
|
||||
@@ -30,24 +27,10 @@ cache::cache(cql3::query_processor& qp, abort_source& as) noexcept
|
||||
: _current_version(0)
|
||||
, _qp(qp)
|
||||
, _loading_sem(1)
|
||||
, _as(as)
|
||||
, _permission_loader(nullptr)
|
||||
, _permission_loader_sem(8) {
|
||||
namespace sm = seastar::metrics;
|
||||
_metrics.add_group("auth_cache", {
|
||||
sm::make_gauge("roles", [this] { return _roles.size(); },
|
||||
sm::description("Number of roles currently cached")),
|
||||
sm::make_gauge("permissions", [this] {
|
||||
return _cached_permissions_count;
|
||||
}, sm::description("Total number of permission sets currently cached across all roles"))
|
||||
});
|
||||
, _as(as) {
|
||||
}
|
||||
|
||||
void cache::set_permission_loader(permission_loader_func loader) {
|
||||
_permission_loader = std::move(loader);
|
||||
}
|
||||
|
||||
lw_shared_ptr<const cache::role_record> cache::get(std::string_view role) const noexcept {
|
||||
lw_shared_ptr<const cache::role_record> cache::get(const role_name_t& role) const noexcept {
|
||||
auto it = _roles.find(role);
|
||||
if (it == _roles.end()) {
|
||||
return {};
|
||||
@@ -55,93 +38,6 @@ lw_shared_ptr<const cache::role_record> cache::get(std::string_view role) const
|
||||
return it->second;
|
||||
}
|
||||
|
||||
void cache::for_each_role(const std::function<void(const role_name_t&, const role_record&)>& func) const {
|
||||
for (const auto& [name, record] : _roles) {
|
||||
func(name, *record);
|
||||
}
|
||||
}
|
||||
|
||||
size_t cache::roles_count() const noexcept {
|
||||
return _roles.size();
|
||||
}
|
||||
|
||||
future<permission_set> cache::get_permissions(const role_or_anonymous& role, const resource& r) {
|
||||
std::unordered_map<resource, permission_set>* perms_cache;
|
||||
lw_shared_ptr<role_record> role_ptr;
|
||||
|
||||
if (is_anonymous(role)) {
|
||||
perms_cache = &_anonymous_permissions;
|
||||
} else {
|
||||
const auto& role_name = *role.name;
|
||||
auto role_it = _roles.find(role_name);
|
||||
if (role_it == _roles.end()) {
|
||||
// Role might have been deleted but there are some connections
|
||||
// left which reference it. They should no longer have access to anything.
|
||||
return make_ready_future<permission_set>(permissions::NONE);
|
||||
}
|
||||
role_ptr = role_it->second;
|
||||
perms_cache = &role_ptr->cached_permissions;
|
||||
}
|
||||
|
||||
if (auto it = perms_cache->find(r); it != perms_cache->end()) {
|
||||
return make_ready_future<permission_set>(it->second);
|
||||
}
|
||||
// keep alive role_ptr as it holds perms_cache (except anonymous)
|
||||
return do_with(std::move(role_ptr), [this, &role, &r, perms_cache] (auto& role_ptr) {
|
||||
return load_permissions(role, r, perms_cache);
|
||||
});
|
||||
}
|
||||
|
||||
future<permission_set> cache::load_permissions(const role_or_anonymous& role, const resource& r, std::unordered_map<resource, permission_set>* perms_cache) {
|
||||
SCYLLA_ASSERT(_permission_loader);
|
||||
auto units = co_await get_units(_permission_loader_sem, 1, _as);
|
||||
|
||||
// Check again, perhaps we were blocked and other call loaded
|
||||
// the permissions already. This is a protection against misses storm.
|
||||
if (auto it = perms_cache->find(r); it != perms_cache->end()) {
|
||||
co_return it->second;
|
||||
}
|
||||
auto perms = co_await _permission_loader(role, r);
|
||||
add_permissions(*perms_cache, r, perms);
|
||||
co_return perms;
|
||||
}
|
||||
|
||||
future<> cache::prune(const resource& r) {
|
||||
auto units = co_await get_units(_loading_sem, 1, _as);
|
||||
_anonymous_permissions.erase(r);
|
||||
for (auto& it : _roles) {
|
||||
// Prunning can run concurrently with other functions but it
|
||||
// can only cause cached_permissions extra reload via get_permissions.
|
||||
remove_permissions(it.second->cached_permissions, r);
|
||||
co_await coroutine::maybe_yield();
|
||||
}
|
||||
}
|
||||
|
||||
future<> cache::reload_all_permissions() noexcept {
|
||||
SCYLLA_ASSERT(_permission_loader);
|
||||
auto units = co_await get_units(_loading_sem, 1, _as);
|
||||
auto copy_keys = [] (const std::unordered_map<resource, permission_set>& m) {
|
||||
std::vector<resource> keys;
|
||||
keys.reserve(m.size());
|
||||
for (const auto& [res, _] : m) {
|
||||
keys.push_back(res);
|
||||
}
|
||||
return keys;
|
||||
};
|
||||
const role_or_anonymous anon;
|
||||
for (const auto& res : copy_keys(_anonymous_permissions)) {
|
||||
_anonymous_permissions[res] = co_await _permission_loader(anon, res);
|
||||
}
|
||||
for (auto& [role, entry] : _roles) {
|
||||
auto& perms_cache = entry->cached_permissions;
|
||||
auto r = role_or_anonymous(role);
|
||||
for (const auto& res : copy_keys(perms_cache)) {
|
||||
perms_cache[res] = co_await _permission_loader(r, res);
|
||||
}
|
||||
}
|
||||
logger.debug("Reloaded auth cache with {} entries", _roles.size());
|
||||
}
|
||||
|
||||
future<lw_shared_ptr<cache::role_record>> cache::fetch_role(const role_name_t& role) const {
|
||||
auto rec = make_lw_shared<role_record>();
|
||||
rec->version = _current_version;
|
||||
@@ -209,7 +105,7 @@ future<lw_shared_ptr<cache::role_record>> cache::fetch_role(const role_name_t& r
|
||||
future<> cache::prune_all() noexcept {
|
||||
for (auto it = _roles.begin(); it != _roles.end(); ) {
|
||||
if (it->second->version != _current_version) {
|
||||
remove_role(it++);
|
||||
_roles.erase(it++);
|
||||
co_await coroutine::maybe_yield();
|
||||
} else {
|
||||
++it;
|
||||
@@ -219,6 +115,9 @@ future<> cache::prune_all() noexcept {
|
||||
}
|
||||
|
||||
future<> cache::load_all() {
|
||||
if (legacy_mode(_qp)) {
|
||||
co_return;
|
||||
}
|
||||
SCYLLA_ASSERT(this_shard_id() == 0);
|
||||
auto units = co_await get_units(_loading_sem, 1, _as);
|
||||
|
||||
@@ -230,7 +129,7 @@ future<> cache::load_all() {
|
||||
const auto name = r.get_as<sstring>("role");
|
||||
auto role = co_await fetch_role(name);
|
||||
if (role) {
|
||||
add_role(name, role);
|
||||
_roles[name] = role;
|
||||
}
|
||||
co_return stop_iteration::no;
|
||||
};
|
||||
@@ -243,71 +142,39 @@ future<> cache::load_all() {
|
||||
co_await distribute_role(name, role);
|
||||
}
|
||||
co_await container().invoke_on_others([this](cache& c) -> future<> {
|
||||
auto units = co_await get_units(c._loading_sem, 1, c._as);
|
||||
c._current_version = _current_version;
|
||||
co_await c.prune_all();
|
||||
});
|
||||
}
|
||||
|
||||
future<> cache::gather_inheriting_roles(std::unordered_set<role_name_t>& roles, lw_shared_ptr<cache::role_record> role, const role_name_t& name) {
|
||||
if (!role) {
|
||||
// Role might have been removed or not yet added, either way
|
||||
// their members will be handled by another top call to this function.
|
||||
future<> cache::load_roles(std::unordered_set<role_name_t> roles) {
|
||||
if (legacy_mode(_qp)) {
|
||||
co_return;
|
||||
}
|
||||
for (const auto& member_name : role->members) {
|
||||
bool is_new = roles.insert(member_name).second;
|
||||
if (!is_new) {
|
||||
continue;
|
||||
}
|
||||
lw_shared_ptr<cache::role_record> member_role;
|
||||
auto r = _roles.find(member_name);
|
||||
if (r != _roles.end()) {
|
||||
member_role = r->second;
|
||||
}
|
||||
co_await gather_inheriting_roles(roles, member_role, member_name);
|
||||
}
|
||||
}
|
||||
|
||||
future<> cache::load_roles(std::unordered_set<role_name_t> roles) {
|
||||
SCYLLA_ASSERT(this_shard_id() == 0);
|
||||
auto units = co_await get_units(_loading_sem, 1, _as);
|
||||
|
||||
std::unordered_set<role_name_t> roles_to_clear_perms;
|
||||
for (const auto& name : roles) {
|
||||
logger.info("Loading role {}", name);
|
||||
auto role = co_await fetch_role(name);
|
||||
if (role) {
|
||||
add_role(name, role);
|
||||
co_await gather_inheriting_roles(roles_to_clear_perms, role, name);
|
||||
_roles[name] = role;
|
||||
} else {
|
||||
if (auto it = _roles.find(name); it != _roles.end()) {
|
||||
auto old_role = it->second;
|
||||
remove_role(it);
|
||||
co_await gather_inheriting_roles(roles_to_clear_perms, old_role, name);
|
||||
}
|
||||
_roles.erase(name);
|
||||
}
|
||||
co_await distribute_role(name, role);
|
||||
}
|
||||
|
||||
co_await container().invoke_on_all([&roles_to_clear_perms] (cache& c) -> future<> {
|
||||
for (const auto& name : roles_to_clear_perms) {
|
||||
c.clear_role_permissions(name);
|
||||
co_await coroutine::maybe_yield();
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
future<> cache::distribute_role(const role_name_t& name, lw_shared_ptr<role_record> role) {
|
||||
auto role_ptr = role.get();
|
||||
co_await container().invoke_on_others([&name, role_ptr](cache& c) -> future<> {
|
||||
auto units = co_await get_units(c._loading_sem, 1, c._as);
|
||||
co_await container().invoke_on_others([&name, role_ptr](cache& c) {
|
||||
if (!role_ptr) {
|
||||
c.remove_role(name);
|
||||
co_return;
|
||||
c._roles.erase(name);
|
||||
return;
|
||||
}
|
||||
auto role_copy = make_lw_shared<role_record>(*role_ptr);
|
||||
c.add_role(name, std::move(role_copy));
|
||||
c._roles[name] = std::move(role_copy);
|
||||
});
|
||||
}
|
||||
|
||||
@@ -318,40 +185,4 @@ bool cache::includes_table(const table_id& id) noexcept {
|
||||
|| id == db::system_keyspace::role_permissions()->id();
|
||||
}
|
||||
|
||||
void cache::add_role(const role_name_t& name, lw_shared_ptr<role_record> role) {
|
||||
if (auto it = _roles.find(name); it != _roles.end()) {
|
||||
_cached_permissions_count -= it->second->cached_permissions.size();
|
||||
}
|
||||
_cached_permissions_count += role->cached_permissions.size();
|
||||
_roles[name] = std::move(role);
|
||||
}
|
||||
|
||||
void cache::remove_role(const role_name_t& name) {
|
||||
if (auto it = _roles.find(name); it != _roles.end()) {
|
||||
remove_role(it);
|
||||
}
|
||||
}
|
||||
|
||||
void cache::remove_role(roles_map::iterator it) {
|
||||
_cached_permissions_count -= it->second->cached_permissions.size();
|
||||
_roles.erase(it);
|
||||
}
|
||||
|
||||
void cache::clear_role_permissions(const role_name_t& name) {
|
||||
if (auto it = _roles.find(name); it != _roles.end()) {
|
||||
_cached_permissions_count -= it->second->cached_permissions.size();
|
||||
it->second->cached_permissions.clear();
|
||||
}
|
||||
}
|
||||
|
||||
void cache::add_permissions(std::unordered_map<resource, permission_set>& cache, const resource& r, permission_set perms) {
|
||||
if (cache.emplace(r, perms).second) {
|
||||
++_cached_permissions_count;
|
||||
}
|
||||
}
|
||||
|
||||
void cache::remove_permissions(std::unordered_map<resource, permission_set>& cache, const resource& r) {
|
||||
_cached_permissions_count -= cache.erase(r);
|
||||
}
|
||||
|
||||
} // namespace auth
|
||||
|
||||
@@ -9,7 +9,6 @@
|
||||
#pragma once
|
||||
|
||||
#include <seastar/core/abort_source.hh>
|
||||
#include <string_view>
|
||||
#include <unordered_set>
|
||||
#include <unordered_map>
|
||||
|
||||
@@ -18,14 +17,11 @@
|
||||
#include <seastar/core/sharded.hh>
|
||||
#include <seastar/core/shared_ptr.hh>
|
||||
#include <seastar/core/semaphore.hh>
|
||||
#include <seastar/core/metrics_registration.hh>
|
||||
|
||||
#include "absl-flat_hash_map.hh"
|
||||
#include <absl/container/flat_hash_map.h>
|
||||
|
||||
#include "auth/permission.hh"
|
||||
#include "auth/common.hh"
|
||||
#include "auth/resource.hh"
|
||||
#include "auth/role_or_anonymous.hh"
|
||||
|
||||
namespace cql3 { class query_processor; }
|
||||
|
||||
@@ -35,7 +31,6 @@ class cache : public peering_sharded_service<cache> {
|
||||
public:
|
||||
using role_name_t = sstring;
|
||||
using version_tag_t = char;
|
||||
using permission_loader_func = std::function<future<permission_set>(const role_or_anonymous&, const resource&)>;
|
||||
|
||||
struct role_record {
|
||||
bool can_login = false;
|
||||
@@ -43,60 +38,28 @@ public:
|
||||
std::unordered_set<role_name_t> member_of;
|
||||
std::unordered_set<role_name_t> members;
|
||||
sstring salted_hash;
|
||||
std::unordered_map<sstring, sstring, sstring_hash, sstring_eq> attributes;
|
||||
std::unordered_map<sstring, permission_set, sstring_hash, sstring_eq> permissions;
|
||||
private:
|
||||
friend cache;
|
||||
// cached permissions include effects of role's inheritance
|
||||
std::unordered_map<resource, permission_set> cached_permissions;
|
||||
std::unordered_map<sstring, sstring> attributes;
|
||||
std::unordered_map<sstring, permission_set> permissions;
|
||||
version_tag_t version; // used for seamless cache reloads
|
||||
};
|
||||
|
||||
explicit cache(cql3::query_processor& qp, abort_source& as) noexcept;
|
||||
lw_shared_ptr<const role_record> get(std::string_view role) const noexcept;
|
||||
void set_permission_loader(permission_loader_func loader);
|
||||
future<permission_set> get_permissions(const role_or_anonymous& role, const resource& r);
|
||||
future<> prune(const resource& r);
|
||||
future<> reload_all_permissions() noexcept;
|
||||
lw_shared_ptr<const role_record> get(const role_name_t& role) const noexcept;
|
||||
future<> load_all();
|
||||
future<> load_roles(std::unordered_set<role_name_t> roles);
|
||||
static bool includes_table(const table_id&) noexcept;
|
||||
|
||||
// Returns the number of roles in the cache.
|
||||
size_t roles_count() const noexcept;
|
||||
|
||||
// The callback doesn't suspend (no co_await) so it observes the state
|
||||
// of the cache atomically.
|
||||
void for_each_role(const std::function<void(const role_name_t&, const role_record&)>& func) const;
|
||||
|
||||
private:
|
||||
using roles_map = absl::flat_hash_map<role_name_t, lw_shared_ptr<role_record>, sstring_hash, sstring_eq>;
|
||||
using roles_map = absl::flat_hash_map<role_name_t, lw_shared_ptr<role_record>>;
|
||||
roles_map _roles;
|
||||
// anonymous permissions map exists mainly due to compatibility with
|
||||
// higher layers which use role_or_anonymous to get permissions.
|
||||
std::unordered_map<resource, permission_set> _anonymous_permissions;
|
||||
version_tag_t _current_version;
|
||||
cql3::query_processor& _qp;
|
||||
semaphore _loading_sem; // protects iteration of _roles map
|
||||
semaphore _loading_sem;
|
||||
abort_source& _as;
|
||||
permission_loader_func _permission_loader;
|
||||
semaphore _permission_loader_sem; // protects against reload storms on a single role change
|
||||
metrics::metric_groups _metrics;
|
||||
size_t _cached_permissions_count = 0;
|
||||
|
||||
future<lw_shared_ptr<role_record>> fetch_role(const role_name_t& role) const;
|
||||
future<> prune_all() noexcept;
|
||||
future<> distribute_role(const role_name_t& name, const lw_shared_ptr<role_record> role);
|
||||
future<> gather_inheriting_roles(std::unordered_set<role_name_t>& roles, lw_shared_ptr<cache::role_record> role, const role_name_t& name);
|
||||
|
||||
void add_role(const role_name_t& name, lw_shared_ptr<role_record> role);
|
||||
void remove_role(const role_name_t& name);
|
||||
void remove_role(roles_map::iterator it);
|
||||
void clear_role_permissions(const role_name_t& name);
|
||||
void add_permissions(std::unordered_map<resource, permission_set>& cache, const resource& r, permission_set perms);
|
||||
void remove_permissions(std::unordered_map<resource, permission_set>& cache, const resource& r);
|
||||
|
||||
future<permission_set> load_permissions(const role_or_anonymous& role, const resource& r, std::unordered_map<resource, permission_set>* perms_cache);
|
||||
};
|
||||
|
||||
} // namespace auth
|
||||
|
||||
@@ -13,11 +13,14 @@
|
||||
#include <boost/regex.hpp>
|
||||
#include <fmt/ranges.h>
|
||||
|
||||
#include "utils/class_registrator.hh"
|
||||
#include "utils/to_string.hh"
|
||||
#include "data_dictionary/data_dictionary.hh"
|
||||
#include "cql3/query_processor.hh"
|
||||
#include "db/config.hh"
|
||||
|
||||
static const auto CERT_AUTH_NAME = "com.scylladb.auth.CertificateAuthenticator";
|
||||
const std::string_view auth::certificate_authenticator_name(CERT_AUTH_NAME);
|
||||
|
||||
static logging::logger clogger("certificate_authenticator");
|
||||
|
||||
@@ -27,6 +30,13 @@ static const std::string cfg_query_attr = "query";
|
||||
static const std::string cfg_source_subject = "SUBJECT";
|
||||
static const std::string cfg_source_altname = "ALTNAME";
|
||||
|
||||
static const class_registrator<auth::authenticator
|
||||
, auth::certificate_authenticator
|
||||
, cql3::query_processor&
|
||||
, ::service::raft_group0_client&
|
||||
, ::service::migration_manager&
|
||||
, auth::cache&> cert_auth_reg(CERT_AUTH_NAME);
|
||||
|
||||
enum class auth::certificate_authenticator::query_source {
|
||||
subject, altname
|
||||
};
|
||||
@@ -89,7 +99,7 @@ future<> auth::certificate_authenticator::stop() {
|
||||
}
|
||||
|
||||
std::string_view auth::certificate_authenticator::qualified_java_name() const {
|
||||
return "com.scylladb.auth.CertificateAuthenticator";
|
||||
return certificate_authenticator_name;
|
||||
}
|
||||
|
||||
bool auth::certificate_authenticator::require_authentication() const {
|
||||
|
||||
@@ -27,6 +27,8 @@ namespace auth {
|
||||
|
||||
class cache;
|
||||
|
||||
extern const std::string_view certificate_authenticator_name;
|
||||
|
||||
class certificate_authenticator : public authenticator {
|
||||
enum class query_source;
|
||||
std::vector<std::pair<query_source, boost::regex>> _queries;
|
||||
|
||||
110
auth/common.cc
110
auth/common.cc
@@ -14,11 +14,18 @@
|
||||
#include <seastar/core/sharded.hh>
|
||||
|
||||
#include "mutation/canonical_mutation.hh"
|
||||
#include "schema/schema_fwd.hh"
|
||||
#include "mutation/timestamp.hh"
|
||||
#include "utils/assert.hh"
|
||||
#include "utils/exponential_backoff_retry.hh"
|
||||
#include "cql3/query_processor.hh"
|
||||
#include "cql3/statements/create_table_statement.hh"
|
||||
#include "schema/schema_builder.hh"
|
||||
#include "service/migration_manager.hh"
|
||||
#include "service/raft/group0_state_machine.hh"
|
||||
#include "timeout_config.hh"
|
||||
#include "utils/error_injection.hh"
|
||||
#include "db/system_keyspace.hh"
|
||||
|
||||
namespace auth {
|
||||
|
||||
@@ -26,14 +33,22 @@ namespace meta {
|
||||
|
||||
namespace legacy {
|
||||
constinit const std::string_view AUTH_KS("system_auth");
|
||||
constinit const std::string_view USERS_CF("users");
|
||||
} // namespace legacy
|
||||
constinit const std::string_view AUTH_PACKAGE_NAME("org.apache.cassandra.auth.");
|
||||
} // namespace meta
|
||||
|
||||
static logging::logger auth_log("auth");
|
||||
|
||||
std::string default_superuser(cql3::query_processor& qp) {
|
||||
return qp.db().get_config().auth_superuser_name();
|
||||
bool legacy_mode(cql3::query_processor& qp) {
|
||||
return qp.auth_version < db::auth_version_t::v2;
|
||||
}
|
||||
|
||||
std::string_view get_auth_ks_name(cql3::query_processor& qp) {
|
||||
if (legacy_mode(qp)) {
|
||||
return meta::legacy::AUTH_KS;
|
||||
}
|
||||
return db::system_keyspace::NAME;
|
||||
}
|
||||
|
||||
// Func must support being invoked more than once.
|
||||
@@ -50,6 +65,47 @@ future<> do_after_system_ready(seastar::abort_source& as, seastar::noncopyable_f
|
||||
}).discard_result();
|
||||
}
|
||||
|
||||
static future<> create_legacy_metadata_table_if_missing_impl(
|
||||
std::string_view table_name,
|
||||
cql3::query_processor& qp,
|
||||
std::string_view cql,
|
||||
::service::migration_manager& mm) {
|
||||
SCYLLA_ASSERT(this_shard_id() == 0); // once_among_shards makes sure a function is executed on shard 0 only
|
||||
|
||||
auto db = qp.db();
|
||||
auto parsed_statement = cql3::query_processor::parse_statement(cql, cql3::dialect{});
|
||||
auto& parsed_cf_statement = static_cast<cql3::statements::raw::cf_statement&>(*parsed_statement);
|
||||
|
||||
parsed_cf_statement.prepare_keyspace(meta::legacy::AUTH_KS);
|
||||
|
||||
auto statement = static_pointer_cast<cql3::statements::create_table_statement>(
|
||||
parsed_cf_statement.prepare(db, qp.get_cql_stats())->statement);
|
||||
|
||||
const auto schema = statement->get_cf_meta_data(qp.db());
|
||||
const auto uuid = generate_legacy_id(schema->ks_name(), schema->cf_name());
|
||||
|
||||
schema_builder b(schema);
|
||||
b.set_uuid(uuid);
|
||||
schema_ptr table = b.build();
|
||||
|
||||
if (!db.has_schema(table->ks_name(), table->cf_name())) {
|
||||
auto group0_guard = co_await mm.start_group0_operation();
|
||||
auto ts = group0_guard.write_timestamp();
|
||||
try {
|
||||
co_return co_await mm.announce(co_await ::service::prepare_new_column_family_announcement(qp.proxy(), table, ts),
|
||||
std::move(group0_guard), format("auth: create {} metadata table", table->cf_name()));
|
||||
} catch (const exceptions::already_exists_exception&) {}
|
||||
}
|
||||
}
|
||||
|
||||
future<> create_legacy_metadata_table_if_missing(
|
||||
std::string_view table_name,
|
||||
cql3::query_processor& qp,
|
||||
std::string_view cql,
|
||||
::service::migration_manager& mm) noexcept {
|
||||
return futurize_invoke(create_legacy_metadata_table_if_missing_impl, table_name, qp, cql, mm);
|
||||
}
|
||||
|
||||
::service::query_state& internal_distributed_query_state() noexcept {
|
||||
#ifdef DEBUG
|
||||
// Give the much slower debug tests more headroom for completing auth queries.
|
||||
@@ -84,6 +140,56 @@ static future<> announce_mutations_with_guard(
|
||||
return group0_client.add_entry(std::move(group0_cmd), std::move(group0_guard), as, timeout);
|
||||
}
|
||||
|
||||
future<> announce_mutations_with_batching(
|
||||
::service::raft_group0_client& group0_client,
|
||||
start_operation_func_t start_operation_func,
|
||||
std::function<::service::mutations_generator(api::timestamp_type t)> gen,
|
||||
seastar::abort_source& as,
|
||||
std::optional<::service::raft_timeout> timeout) {
|
||||
// account for command's overhead, it's better to use smaller threshold than constantly bounce off the limit
|
||||
size_t memory_threshold = group0_client.max_command_size() * 0.75;
|
||||
utils::get_local_injector().inject("auth_announce_mutations_command_max_size",
|
||||
[&memory_threshold] {
|
||||
memory_threshold = 1000;
|
||||
});
|
||||
|
||||
size_t memory_usage = 0;
|
||||
utils::chunked_vector<canonical_mutation> muts;
|
||||
|
||||
// guard has to be taken before we execute code in gen as
|
||||
// it can do read-before-write and we want announce_mutations
|
||||
// operation to be linearizable with other such calls,
|
||||
// for instance if we do select and then delete in gen
|
||||
// we want both to operate on the same data or fail
|
||||
// if someone else modified it in the middle
|
||||
std::optional<::service::group0_guard> group0_guard;
|
||||
group0_guard = co_await start_operation_func(as);
|
||||
auto timestamp = group0_guard->write_timestamp();
|
||||
|
||||
auto g = gen(timestamp);
|
||||
while (auto mut = co_await g()) {
|
||||
muts.push_back(canonical_mutation{*mut});
|
||||
memory_usage += muts.back().representation().size();
|
||||
if (memory_usage >= memory_threshold) {
|
||||
if (!group0_guard) {
|
||||
group0_guard = co_await start_operation_func(as);
|
||||
timestamp = group0_guard->write_timestamp();
|
||||
}
|
||||
co_await announce_mutations_with_guard(group0_client, std::move(muts), std::move(*group0_guard), as, timeout);
|
||||
group0_guard = std::nullopt;
|
||||
memory_usage = 0;
|
||||
muts = {};
|
||||
}
|
||||
}
|
||||
if (!muts.empty()) {
|
||||
if (!group0_guard) {
|
||||
group0_guard = co_await start_operation_func(as);
|
||||
timestamp = group0_guard->write_timestamp();
|
||||
}
|
||||
co_await announce_mutations_with_guard(group0_client, std::move(muts), std::move(*group0_guard), as, timeout);
|
||||
}
|
||||
}
|
||||
|
||||
future<> announce_mutations(
|
||||
cql3::query_processor& qp,
|
||||
::service::raft_group0_client& group0_client,
|
||||
|
||||
@@ -21,7 +21,12 @@
|
||||
|
||||
using namespace std::chrono_literals;
|
||||
|
||||
namespace replica {
|
||||
class database;
|
||||
}
|
||||
|
||||
namespace service {
|
||||
class migration_manager;
|
||||
class query_state;
|
||||
}
|
||||
|
||||
@@ -35,8 +40,10 @@ namespace meta {
|
||||
|
||||
namespace legacy {
|
||||
extern constinit const std::string_view AUTH_KS;
|
||||
extern constinit const std::string_view USERS_CF;
|
||||
} // namespace legacy
|
||||
|
||||
constexpr std::string_view DEFAULT_SUPERUSER_NAME("cassandra");
|
||||
extern constinit const std::string_view AUTH_PACKAGE_NAME;
|
||||
|
||||
} // namespace meta
|
||||
@@ -45,7 +52,12 @@ constexpr std::string_view PERMISSIONS_CF = "role_permissions";
|
||||
constexpr std::string_view ROLE_MEMBERS_CF = "role_members";
|
||||
constexpr std::string_view ROLE_ATTRIBUTES_CF = "role_attributes";
|
||||
|
||||
std::string default_superuser(cql3::query_processor& qp);
|
||||
// This is a helper to check whether auth-v2 is on.
|
||||
bool legacy_mode(cql3::query_processor& qp);
|
||||
|
||||
// We have legacy implementation using different keyspace
|
||||
// and need to parametrize depending on runtime feature.
|
||||
std::string_view get_auth_ks_name(cql3::query_processor& qp);
|
||||
|
||||
template <class Task>
|
||||
future<> once_among_shards(Task&& f) {
|
||||
@@ -59,6 +71,12 @@ future<> once_among_shards(Task&& f) {
|
||||
// Func must support being invoked more than once.
|
||||
future<> do_after_system_ready(seastar::abort_source& as, seastar::noncopyable_function<future<>()> func);
|
||||
|
||||
future<> create_legacy_metadata_table_if_missing(
|
||||
std::string_view table_name,
|
||||
cql3::query_processor&,
|
||||
std::string_view cql,
|
||||
::service::migration_manager&) noexcept;
|
||||
|
||||
///
|
||||
/// Time-outs for internal, non-local CQL queries.
|
||||
///
|
||||
@@ -66,6 +84,20 @@ future<> do_after_system_ready(seastar::abort_source& as, seastar::noncopyable_f
|
||||
|
||||
::service::raft_timeout get_raft_timeout() noexcept;
|
||||
|
||||
// Execute update query via group0 mechanism, mutations will be applied on all nodes.
|
||||
// Use this function when need to perform read before write on a single guard or if
|
||||
// you have more than one mutation and potentially exceed single command size limit.
|
||||
using start_operation_func_t = std::function<future<::service::group0_guard>(abort_source&)>;
|
||||
future<> announce_mutations_with_batching(
|
||||
::service::raft_group0_client& group0_client,
|
||||
// since we can operate also in topology coordinator context where we need stronger
|
||||
// guarantees than start_operation from group0_client gives we allow to inject custom
|
||||
// function here
|
||||
start_operation_func_t start_operation_func,
|
||||
std::function<::service::mutations_generator(api::timestamp_type t)> gen,
|
||||
seastar::abort_source& as,
|
||||
std::optional<::service::raft_timeout> timeout);
|
||||
|
||||
// Execute update query via group0 mechanism, mutations will be applied on all nodes.
|
||||
future<> announce_mutations(
|
||||
cql3::query_processor& qp,
|
||||
|
||||
@@ -26,6 +26,7 @@ extern "C" {
|
||||
#include "cql3/untyped_result_set.hh"
|
||||
#include "exceptions/exceptions.hh"
|
||||
#include "utils/log.hh"
|
||||
#include "utils/class_registrator.hh"
|
||||
|
||||
namespace auth {
|
||||
|
||||
@@ -39,14 +40,111 @@ static constexpr std::string_view PERMISSIONS_NAME = "permissions";
|
||||
|
||||
static logging::logger alogger("default_authorizer");
|
||||
|
||||
default_authorizer::default_authorizer(cql3::query_processor& qp)
|
||||
: _qp(qp) {
|
||||
// To ensure correct initialization order, we unfortunately need to use a string literal.
|
||||
static const class_registrator<
|
||||
authorizer,
|
||||
default_authorizer,
|
||||
cql3::query_processor&,
|
||||
::service::raft_group0_client&,
|
||||
::service::migration_manager&> password_auth_reg("org.apache.cassandra.auth.CassandraAuthorizer");
|
||||
|
||||
default_authorizer::default_authorizer(cql3::query_processor& qp, ::service::raft_group0_client& g0, ::service::migration_manager& mm)
|
||||
: _qp(qp)
|
||||
, _migration_manager(mm) {
|
||||
}
|
||||
|
||||
default_authorizer::~default_authorizer() {
|
||||
}
|
||||
|
||||
static const sstring legacy_table_name{"permissions"};
|
||||
|
||||
bool default_authorizer::legacy_metadata_exists() const {
|
||||
return _qp.db().has_schema(meta::legacy::AUTH_KS, legacy_table_name);
|
||||
}
|
||||
|
||||
future<bool> default_authorizer::legacy_any_granted() const {
|
||||
static const sstring query = seastar::format("SELECT * FROM {}.{} LIMIT 1", meta::legacy::AUTH_KS, PERMISSIONS_CF);
|
||||
|
||||
return _qp.execute_internal(
|
||||
query,
|
||||
db::consistency_level::LOCAL_ONE,
|
||||
{},
|
||||
cql3::query_processor::cache_internal::yes).then([](::shared_ptr<cql3::untyped_result_set> results) {
|
||||
return !results->empty();
|
||||
});
|
||||
}
|
||||
|
||||
future<> default_authorizer::migrate_legacy_metadata() {
|
||||
alogger.info("Starting migration of legacy permissions metadata.");
|
||||
static const sstring query = seastar::format("SELECT * FROM {}.{}", meta::legacy::AUTH_KS, legacy_table_name);
|
||||
|
||||
return _qp.execute_internal(
|
||||
query,
|
||||
db::consistency_level::LOCAL_ONE,
|
||||
cql3::query_processor::cache_internal::no).then([this](::shared_ptr<cql3::untyped_result_set> results) {
|
||||
return do_for_each(*results, [this](const cql3::untyped_result_set_row& row) {
|
||||
return do_with(
|
||||
row.get_as<sstring>("username"),
|
||||
parse_resource(row.get_as<sstring>(RESOURCE_NAME)),
|
||||
::service::group0_batch::unused(),
|
||||
[this, &row](const auto& username, const auto& r, auto& mc) {
|
||||
const permission_set perms = permissions::from_strings(row.get_set<sstring>(PERMISSIONS_NAME));
|
||||
return grant(username, perms, r, mc);
|
||||
});
|
||||
}).finally([results] {});
|
||||
}).then([] {
|
||||
alogger.info("Finished migrating legacy permissions metadata.");
|
||||
}).handle_exception([](std::exception_ptr ep) {
|
||||
alogger.error("Encountered an error during migration!");
|
||||
std::rethrow_exception(ep);
|
||||
});
|
||||
}
|
||||
|
||||
future<> default_authorizer::start_legacy() {
|
||||
static const sstring create_table = fmt::format(
|
||||
"CREATE TABLE {}.{} ("
|
||||
"{} text,"
|
||||
"{} text,"
|
||||
"{} set<text>,"
|
||||
"PRIMARY KEY({}, {})"
|
||||
") WITH gc_grace_seconds={}",
|
||||
meta::legacy::AUTH_KS,
|
||||
PERMISSIONS_CF,
|
||||
ROLE_NAME,
|
||||
RESOURCE_NAME,
|
||||
PERMISSIONS_NAME,
|
||||
ROLE_NAME,
|
||||
RESOURCE_NAME,
|
||||
90 * 24 * 60 * 60); // 3 months.
|
||||
|
||||
return once_among_shards([this] {
|
||||
return create_legacy_metadata_table_if_missing(
|
||||
PERMISSIONS_CF,
|
||||
_qp,
|
||||
create_table,
|
||||
_migration_manager).then([this] {
|
||||
_finished = do_after_system_ready(_as, [this] {
|
||||
return async([this] {
|
||||
_migration_manager.wait_for_schema_agreement(_qp.db().real_database(), db::timeout_clock::time_point::max(), &_as).get();
|
||||
|
||||
if (legacy_metadata_exists()) {
|
||||
if (!legacy_any_granted().get()) {
|
||||
migrate_legacy_metadata().get();
|
||||
return;
|
||||
}
|
||||
|
||||
alogger.warn("Ignoring legacy permissions metadata since role permissions exist.");
|
||||
}
|
||||
});
|
||||
});
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
future<> default_authorizer::start() {
|
||||
if (legacy_mode(_qp)) {
|
||||
return start_legacy();
|
||||
}
|
||||
return make_ready_future<>();
|
||||
}
|
||||
|
||||
@@ -63,7 +161,7 @@ default_authorizer::authorize(const role_or_anonymous& maybe_role, const resourc
|
||||
|
||||
const sstring query = seastar::format("SELECT {} FROM {}.{} WHERE {} = ? AND {} = ?",
|
||||
PERMISSIONS_NAME,
|
||||
db::system_keyspace::NAME,
|
||||
get_auth_ks_name(_qp),
|
||||
PERMISSIONS_CF,
|
||||
ROLE_NAME,
|
||||
RESOURCE_NAME);
|
||||
@@ -87,13 +185,21 @@ default_authorizer::modify(
|
||||
std::string_view op,
|
||||
::service::group0_batch& mc) {
|
||||
const sstring query = seastar::format("UPDATE {}.{} SET {} = {} {} ? WHERE {} = ? AND {} = ?",
|
||||
db::system_keyspace::NAME,
|
||||
get_auth_ks_name(_qp),
|
||||
PERMISSIONS_CF,
|
||||
PERMISSIONS_NAME,
|
||||
PERMISSIONS_NAME,
|
||||
op,
|
||||
ROLE_NAME,
|
||||
RESOURCE_NAME);
|
||||
if (legacy_mode(_qp)) {
|
||||
co_return co_await _qp.execute_internal(
|
||||
query,
|
||||
db::consistency_level::ONE,
|
||||
internal_distributed_query_state(),
|
||||
{permissions::to_strings(set), sstring(role_name), resource.name()},
|
||||
cql3::query_processor::cache_internal::no).discard_result();
|
||||
}
|
||||
co_await collect_mutations(_qp, mc, query,
|
||||
{permissions::to_strings(set), sstring(role_name), resource.name()});
|
||||
}
|
||||
@@ -112,7 +218,7 @@ future<std::vector<permission_details>> default_authorizer::list_all() const {
|
||||
ROLE_NAME,
|
||||
RESOURCE_NAME,
|
||||
PERMISSIONS_NAME,
|
||||
db::system_keyspace::NAME,
|
||||
get_auth_ks_name(_qp),
|
||||
PERMISSIONS_CF);
|
||||
|
||||
const auto results = co_await _qp.execute_internal(
|
||||
@@ -137,16 +243,74 @@ future<std::vector<permission_details>> default_authorizer::list_all() const {
|
||||
future<> default_authorizer::revoke_all(std::string_view role_name, ::service::group0_batch& mc) {
|
||||
try {
|
||||
const sstring query = seastar::format("DELETE FROM {}.{} WHERE {} = ?",
|
||||
db::system_keyspace::NAME,
|
||||
get_auth_ks_name(_qp),
|
||||
PERMISSIONS_CF,
|
||||
ROLE_NAME);
|
||||
co_await collect_mutations(_qp, mc, query, {sstring(role_name)});
|
||||
if (legacy_mode(_qp)) {
|
||||
co_await _qp.execute_internal(
|
||||
query,
|
||||
db::consistency_level::ONE,
|
||||
internal_distributed_query_state(),
|
||||
{sstring(role_name)},
|
||||
cql3::query_processor::cache_internal::no).discard_result();
|
||||
} else {
|
||||
co_await collect_mutations(_qp, mc, query, {sstring(role_name)});
|
||||
}
|
||||
} catch (const exceptions::request_execution_exception& e) {
|
||||
alogger.warn("CassandraAuthorizer failed to revoke all permissions of {}: {}", role_name, e);
|
||||
}
|
||||
}
|
||||
|
||||
future<> default_authorizer::revoke_all_legacy(const resource& resource) {
|
||||
static const sstring query = seastar::format("SELECT {} FROM {}.{} WHERE {} = ? ALLOW FILTERING",
|
||||
ROLE_NAME,
|
||||
get_auth_ks_name(_qp),
|
||||
PERMISSIONS_CF,
|
||||
RESOURCE_NAME);
|
||||
|
||||
return _qp.execute_internal(
|
||||
query,
|
||||
db::consistency_level::LOCAL_ONE,
|
||||
{resource.name()},
|
||||
cql3::query_processor::cache_internal::no).then_wrapped([this, resource](future<::shared_ptr<cql3::untyped_result_set>> f) {
|
||||
try {
|
||||
auto res = f.get();
|
||||
return parallel_for_each(
|
||||
res->begin(),
|
||||
res->end(),
|
||||
[this, res, resource](const cql3::untyped_result_set::row& r) {
|
||||
static const sstring query = seastar::format("DELETE FROM {}.{} WHERE {} = ? AND {} = ?",
|
||||
get_auth_ks_name(_qp),
|
||||
PERMISSIONS_CF,
|
||||
ROLE_NAME,
|
||||
RESOURCE_NAME);
|
||||
|
||||
return _qp.execute_internal(
|
||||
query,
|
||||
db::consistency_level::LOCAL_ONE,
|
||||
{r.get_as<sstring>(ROLE_NAME), resource.name()},
|
||||
cql3::query_processor::cache_internal::no).discard_result().handle_exception(
|
||||
[resource](auto ep) {
|
||||
try {
|
||||
std::rethrow_exception(ep);
|
||||
} catch (const exceptions::request_execution_exception& e) {
|
||||
alogger.warn("CassandraAuthorizer failed to revoke all permissions on {}: {}", resource, e);
|
||||
}
|
||||
|
||||
});
|
||||
});
|
||||
} catch (const exceptions::request_execution_exception& e) {
|
||||
alogger.warn("CassandraAuthorizer failed to revoke all permissions on {}: {}", resource, e);
|
||||
return make_ready_future();
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
future<> default_authorizer::revoke_all(const resource& resource, ::service::group0_batch& mc) {
|
||||
if (legacy_mode(_qp)) {
|
||||
co_return co_await revoke_all_legacy(resource);
|
||||
}
|
||||
|
||||
if (resource.kind() == resource_kind::data &&
|
||||
data_resource_view(resource).is_keyspace()) {
|
||||
revoke_all_keyspace_resources(resource, mc);
|
||||
@@ -157,7 +321,7 @@ future<> default_authorizer::revoke_all(const resource& resource, ::service::gro
|
||||
auto gen = [this, name] (api::timestamp_type t) -> ::service::mutations_generator {
|
||||
const sstring query = seastar::format("SELECT {} FROM {}.{} WHERE {} = ? ALLOW FILTERING",
|
||||
ROLE_NAME,
|
||||
db::system_keyspace::NAME,
|
||||
get_auth_ks_name(_qp),
|
||||
PERMISSIONS_CF,
|
||||
RESOURCE_NAME);
|
||||
auto res = co_await _qp.execute_internal(
|
||||
@@ -167,7 +331,7 @@ future<> default_authorizer::revoke_all(const resource& resource, ::service::gro
|
||||
cql3::query_processor::cache_internal::no);
|
||||
for (const auto& r : *res) {
|
||||
const sstring query = seastar::format("DELETE FROM {}.{} WHERE {} = ? AND {} = ?",
|
||||
db::system_keyspace::NAME,
|
||||
get_auth_ks_name(_qp),
|
||||
PERMISSIONS_CF,
|
||||
ROLE_NAME,
|
||||
RESOURCE_NAME);
|
||||
@@ -192,7 +356,7 @@ void default_authorizer::revoke_all_keyspace_resources(const resource& ks_resour
|
||||
const sstring query = seastar::format("SELECT {}, {} FROM {}.{}",
|
||||
ROLE_NAME,
|
||||
RESOURCE_NAME,
|
||||
db::system_keyspace::NAME,
|
||||
get_auth_ks_name(_qp),
|
||||
PERMISSIONS_CF);
|
||||
auto res = co_await _qp.execute_internal(
|
||||
query,
|
||||
@@ -207,7 +371,7 @@ void default_authorizer::revoke_all_keyspace_resources(const resource& ks_resour
|
||||
continue;
|
||||
}
|
||||
const sstring query = seastar::format("DELETE FROM {}.{} WHERE {} = ? AND {} = ?",
|
||||
db::system_keyspace::NAME,
|
||||
get_auth_ks_name(_qp),
|
||||
PERMISSIONS_CF,
|
||||
ROLE_NAME,
|
||||
RESOURCE_NAME);
|
||||
|
||||
@@ -27,12 +27,14 @@ namespace auth {
|
||||
class default_authorizer : public authorizer {
|
||||
cql3::query_processor& _qp;
|
||||
|
||||
::service::migration_manager& _migration_manager;
|
||||
|
||||
abort_source _as{};
|
||||
|
||||
future<> _finished{make_ready_future<>()};
|
||||
|
||||
public:
|
||||
default_authorizer(cql3::query_processor&);
|
||||
default_authorizer(cql3::query_processor&, ::service::raft_group0_client&, ::service::migration_manager&);
|
||||
|
||||
~default_authorizer();
|
||||
|
||||
@@ -57,6 +59,16 @@ public:
|
||||
virtual const resource_set& protected_resources() const override;
|
||||
|
||||
private:
|
||||
future<> start_legacy();
|
||||
|
||||
bool legacy_metadata_exists() const;
|
||||
|
||||
future<> revoke_all_legacy(const resource&);
|
||||
|
||||
future<bool> legacy_any_granted() const;
|
||||
|
||||
future<> migrate_legacy_metadata();
|
||||
|
||||
future<> modify(std::string_view, permission_set, const resource&, std::string_view, ::service::group0_batch&);
|
||||
|
||||
void revoke_all_keyspace_resources(const resource& ks_resource, ::service::group0_batch& mc);
|
||||
|
||||
@@ -24,6 +24,7 @@
|
||||
#include "exceptions/exceptions.hh"
|
||||
#include "seastarx.hh"
|
||||
#include "service/raft/raft_group0_client.hh"
|
||||
#include "utils/class_registrator.hh"
|
||||
#include "db/config.hh"
|
||||
#include "utils/exponential_backoff_retry.hh"
|
||||
|
||||
@@ -32,7 +33,7 @@ namespace {
|
||||
logger mylog{"ldap_role_manager"}; // `log` is taken by math.
|
||||
|
||||
struct url_desc_deleter {
|
||||
void operator()(LDAPURLDesc* p) {
|
||||
void operator()(LDAPURLDesc *p) {
|
||||
ldap_free_urldesc(p);
|
||||
}
|
||||
};
|
||||
@@ -40,7 +41,7 @@ struct url_desc_deleter {
|
||||
using url_desc_ptr = std::unique_ptr<LDAPURLDesc, url_desc_deleter>;
|
||||
|
||||
url_desc_ptr parse_url(std::string_view url) {
|
||||
LDAPURLDesc* desc = nullptr;
|
||||
LDAPURLDesc *desc = nullptr;
|
||||
if (ldap_url_parse(url.data(), &desc)) {
|
||||
mylog.error("error in ldap_url_parse({})", url);
|
||||
}
|
||||
@@ -53,12 +54,8 @@ std::vector<sstring> get_attr_values(LDAP* ld, LDAPMessage* res, const char* att
|
||||
mylog.debug("Analyzing search results");
|
||||
for (auto e = ldap_first_entry(ld, res); e; e = ldap_next_entry(ld, e)) {
|
||||
struct deleter {
|
||||
void operator()(berval** p) {
|
||||
ldap_value_free_len(p);
|
||||
}
|
||||
void operator()(char* p) {
|
||||
ldap_memfree(p);
|
||||
}
|
||||
void operator()(berval** p) { ldap_value_free_len(p); }
|
||||
void operator()(char* p) { ldap_memfree(p); }
|
||||
};
|
||||
const std::unique_ptr<char, deleter> dname(ldap_get_dn(ld, e));
|
||||
mylog.debug("Analyzing entry {}", dname.get());
|
||||
@@ -75,37 +72,42 @@ std::vector<sstring> get_attr_values(LDAP* ld, LDAPMessage* res, const char* att
|
||||
return values;
|
||||
}
|
||||
|
||||
const char* ldap_role_manager_full_name = "com.scylladb.auth.LDAPRoleManager";
|
||||
|
||||
} // anonymous namespace
|
||||
|
||||
namespace auth {
|
||||
|
||||
ldap_role_manager::ldap_role_manager(std::string_view query_template, std::string_view target_attr, std::string_view bind_name, std::string_view bind_password,
|
||||
uint32_t permissions_update_interval_in_ms, utils::observer<uint32_t> permissions_update_interval_in_ms_observer, cql3::query_processor& qp,
|
||||
::service::raft_group0_client& rg0c, ::service::migration_manager& mm, cache& cache)
|
||||
: _std_mgr(qp, rg0c, mm, cache)
|
||||
, _group0_client(rg0c)
|
||||
, _query_template(query_template)
|
||||
, _target_attr(target_attr)
|
||||
, _bind_name(bind_name)
|
||||
, _bind_password(bind_password)
|
||||
, _permissions_update_interval_in_ms(permissions_update_interval_in_ms)
|
||||
, _permissions_update_interval_in_ms_observer(std::move(permissions_update_interval_in_ms_observer))
|
||||
, _connection_factory(bind(std::mem_fn(&ldap_role_manager::reconnect), std::ref(*this)))
|
||||
, _cache(cache)
|
||||
, _cache_pruner(make_ready_future<>()) {
|
||||
static const class_registrator<
|
||||
role_manager,
|
||||
ldap_role_manager,
|
||||
cql3::query_processor&,
|
||||
::service::raft_group0_client&,
|
||||
::service::migration_manager&,
|
||||
cache&> registration(ldap_role_manager_full_name);
|
||||
|
||||
ldap_role_manager::ldap_role_manager(
|
||||
std::string_view query_template, std::string_view target_attr, std::string_view bind_name, std::string_view bind_password,
|
||||
cql3::query_processor& qp, ::service::raft_group0_client& rg0c, ::service::migration_manager& mm, cache& cache)
|
||||
: _std_mgr(qp, rg0c, mm, cache), _group0_client(rg0c), _query_template(query_template), _target_attr(target_attr), _bind_name(bind_name)
|
||||
, _bind_password(bind_password)
|
||||
, _connection_factory(bind(std::mem_fn(&ldap_role_manager::reconnect), std::ref(*this))) {
|
||||
}
|
||||
|
||||
ldap_role_manager::ldap_role_manager(cql3::query_processor& qp, ::service::raft_group0_client& rg0c, ::service::migration_manager& mm, cache& cache)
|
||||
: ldap_role_manager(qp.db().get_config().ldap_url_template(), qp.db().get_config().ldap_attr_role(), qp.db().get_config().ldap_bind_dn(),
|
||||
qp.db().get_config().ldap_bind_passwd(), qp.db().get_config().permissions_update_interval_in_ms(),
|
||||
qp.db().get_config().permissions_update_interval_in_ms.observe([this](const uint32_t& v) {
|
||||
_permissions_update_interval_in_ms = v;
|
||||
}),
|
||||
qp, rg0c, mm, cache) {
|
||||
: ldap_role_manager(
|
||||
qp.db().get_config().ldap_url_template(),
|
||||
qp.db().get_config().ldap_attr_role(),
|
||||
qp.db().get_config().ldap_bind_dn(),
|
||||
qp.db().get_config().ldap_bind_passwd(),
|
||||
qp,
|
||||
rg0c,
|
||||
mm,
|
||||
cache) {
|
||||
}
|
||||
|
||||
std::string_view ldap_role_manager::qualified_java_name() const noexcept {
|
||||
return "com.scylladb.auth.LDAPRoleManager";
|
||||
return ldap_role_manager_full_name;
|
||||
}
|
||||
|
||||
const resource_set& ldap_role_manager::protected_resources() const {
|
||||
@@ -114,24 +116,9 @@ const resource_set& ldap_role_manager::protected_resources() const {
|
||||
|
||||
future<> ldap_role_manager::start() {
|
||||
if (!parse_url(get_url("dummy-user"))) { // Just need host and port -- any user should do.
|
||||
return make_exception_future(std::runtime_error(fmt::format("error getting LDAP server address from template {}", _query_template)));
|
||||
return make_exception_future(
|
||||
std::runtime_error(fmt::format("error getting LDAP server address from template {}", _query_template)));
|
||||
}
|
||||
_cache_pruner = futurize_invoke([this]() -> future<> {
|
||||
while (true) {
|
||||
try {
|
||||
co_await seastar::sleep_abortable(std::chrono::milliseconds(_permissions_update_interval_in_ms), _as);
|
||||
} catch (const seastar::sleep_aborted&) {
|
||||
co_return; // ignore
|
||||
}
|
||||
co_await _cache.container().invoke_on_all([](cache& c) -> future<> {
|
||||
try {
|
||||
co_await c.reload_all_permissions();
|
||||
} catch (...) {
|
||||
mylog.warn("Cache reload all permissions failed: {}", std::current_exception());
|
||||
}
|
||||
});
|
||||
}
|
||||
});
|
||||
return _std_mgr.start();
|
||||
}
|
||||
|
||||
@@ -165,7 +152,7 @@ future<conn_ptr> ldap_role_manager::connect() {
|
||||
future<conn_ptr> ldap_role_manager::reconnect() {
|
||||
unsigned retries_left = 5;
|
||||
using namespace std::literals::chrono_literals;
|
||||
conn_ptr conn = co_await exponential_backoff_retry::do_until_value(1s, 32s, _as, [this, &retries_left]() -> future<std::optional<conn_ptr>> {
|
||||
conn_ptr conn = co_await exponential_backoff_retry::do_until_value(1s, 32s, _as, [this, &retries_left] () -> future<std::optional<conn_ptr>> {
|
||||
if (!retries_left) {
|
||||
co_return conn_ptr{};
|
||||
}
|
||||
@@ -188,13 +175,7 @@ future<conn_ptr> ldap_role_manager::reconnect() {
|
||||
|
||||
future<> ldap_role_manager::stop() {
|
||||
_as.request_abort();
|
||||
return std::move(_cache_pruner)
|
||||
.then([this] {
|
||||
return _std_mgr.stop();
|
||||
})
|
||||
.then([this] {
|
||||
return _connection_factory.stop();
|
||||
});
|
||||
return _std_mgr.stop().then([this] { return _connection_factory.stop(); });
|
||||
}
|
||||
|
||||
future<> ldap_role_manager::create(std::string_view name, const role_config& config, ::service::group0_batch& mc) {
|
||||
@@ -223,42 +204,43 @@ future<role_set> ldap_role_manager::query_granted(std::string_view grantee_name,
|
||||
if (!desc) {
|
||||
return make_exception_future<role_set>(std::runtime_error(format("Error parsing URL {}", url)));
|
||||
}
|
||||
return _connection_factory.with_connection(
|
||||
[this, desc = std::move(desc), grantee_name_ = sstring(grantee_name)](ldap_connection& conn) -> future<role_set> {
|
||||
sstring grantee_name = std::move(grantee_name_);
|
||||
ldap_msg_ptr res = co_await conn.search(desc->lud_dn, desc->lud_scope, desc->lud_filter, desc->lud_attrs,
|
||||
/*attrsonly=*/0, /*serverctrls=*/nullptr, /*clientctrls=*/nullptr,
|
||||
/*timeout=*/nullptr, /*sizelimit=*/0);
|
||||
mylog.trace("query_granted: got search results");
|
||||
const auto mtype = ldap_msgtype(res.get());
|
||||
if (mtype != LDAP_RES_SEARCH_ENTRY && mtype != LDAP_RES_SEARCH_RESULT && mtype != LDAP_RES_SEARCH_REFERENCE) {
|
||||
mylog.error("ldap search yielded result {} of type {}", static_cast<const void*>(res.get()), mtype);
|
||||
co_return coroutine::exception(std::make_exception_ptr(std::runtime_error("ldap_role_manager: search result has wrong type")));
|
||||
return _connection_factory.with_connection([this, desc = std::move(desc), grantee_name_ = sstring(grantee_name)]
|
||||
(ldap_connection& conn) -> future<role_set> {
|
||||
sstring grantee_name = std::move(grantee_name_);
|
||||
ldap_msg_ptr res = co_await conn.search(desc->lud_dn, desc->lud_scope, desc->lud_filter, desc->lud_attrs,
|
||||
/*attrsonly=*/0, /*serverctrls=*/nullptr, /*clientctrls=*/nullptr,
|
||||
/*timeout=*/nullptr, /*sizelimit=*/0);
|
||||
mylog.trace("query_granted: got search results");
|
||||
const auto mtype = ldap_msgtype(res.get());
|
||||
if (mtype != LDAP_RES_SEARCH_ENTRY && mtype != LDAP_RES_SEARCH_RESULT && mtype != LDAP_RES_SEARCH_REFERENCE) {
|
||||
mylog.error("ldap search yielded result {} of type {}", static_cast<const void*>(res.get()), mtype);
|
||||
co_return coroutine::exception(std::make_exception_ptr(std::runtime_error("ldap_role_manager: search result has wrong type")));
|
||||
}
|
||||
std::vector<sstring> values = get_attr_values(conn.get_ldap(), res.get(), _target_attr.c_str());
|
||||
auth::role_set valid_roles{grantee_name};
|
||||
|
||||
// Each value is a role to be granted.
|
||||
co_await parallel_for_each(values, [this, &valid_roles] (const sstring& ldap_role) {
|
||||
return _std_mgr.exists(ldap_role).then([&valid_roles, &ldap_role] (bool exists) {
|
||||
if (exists) {
|
||||
valid_roles.insert(ldap_role);
|
||||
} else {
|
||||
mylog.error("unrecognized role received from LDAP: {}", ldap_role);
|
||||
}
|
||||
std::vector<sstring> values = get_attr_values(conn.get_ldap(), res.get(), _target_attr.c_str());
|
||||
auth::role_set valid_roles{grantee_name};
|
||||
|
||||
// Each value is a role to be granted.
|
||||
co_await parallel_for_each(values, [this, &valid_roles](const sstring& ldap_role) {
|
||||
return _std_mgr.exists(ldap_role).then([&valid_roles, &ldap_role](bool exists) {
|
||||
if (exists) {
|
||||
valid_roles.insert(ldap_role);
|
||||
} else {
|
||||
mylog.error("unrecognized role received from LDAP: {}", ldap_role);
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
co_return std::move(valid_roles);
|
||||
});
|
||||
});
|
||||
|
||||
co_return std::move(valid_roles);
|
||||
});
|
||||
}
|
||||
|
||||
future<role_to_directly_granted_map> ldap_role_manager::query_all_directly_granted(::service::query_state& qs) {
|
||||
future<role_to_directly_granted_map>
|
||||
ldap_role_manager::query_all_directly_granted(::service::query_state& qs) {
|
||||
role_to_directly_granted_map result;
|
||||
auto roles = co_await query_all(qs);
|
||||
for (auto& role : roles) {
|
||||
for (auto& role: roles) {
|
||||
auto granted_set = co_await query_granted(role, recursive_role_query::no);
|
||||
for (auto& granted : granted_set) {
|
||||
for (auto& granted: granted_set) {
|
||||
if (granted != role) {
|
||||
result.insert({role, granted});
|
||||
}
|
||||
@@ -272,7 +254,7 @@ future<role_set> ldap_role_manager::query_all(::service::query_state& qs) {
|
||||
}
|
||||
|
||||
future<> ldap_role_manager::create_role(std::string_view role_name) {
|
||||
return smp::submit_to(0, [this, role_name]() -> future<> {
|
||||
return smp::submit_to(0, [this, role_name] () -> future<> {
|
||||
int retries = 10;
|
||||
while (true) {
|
||||
auto guard = co_await _group0_client.start_operation(_as, ::service::raft_timeout{});
|
||||
@@ -284,8 +266,8 @@ future<> ldap_role_manager::create_role(std::string_view role_name) {
|
||||
} catch (const role_already_exists&) {
|
||||
// ok
|
||||
} catch (const ::service::group0_concurrent_modification& ex) {
|
||||
mylog.warn("Failed to auto-create role \"{}\" due to guard conflict.{}.", role_name,
|
||||
retries ? " Retrying" : " Number of retries exceeded, giving up");
|
||||
mylog.warn("Failed to auto-create role \"{}\" due to guard conflict.{}.",
|
||||
role_name, retries ? " Retrying" : " Number of retries exceeded, giving up");
|
||||
if (retries--) {
|
||||
continue;
|
||||
}
|
||||
@@ -330,7 +312,8 @@ future<bool> ldap_role_manager::can_login(std::string_view role_name) {
|
||||
return _std_mgr.can_login(role_name);
|
||||
}
|
||||
|
||||
future<std::optional<sstring>> ldap_role_manager::get_attribute(std::string_view role_name, std::string_view attribute_name, ::service::query_state& qs) {
|
||||
future<std::optional<sstring>> ldap_role_manager::get_attribute(
|
||||
std::string_view role_name, std::string_view attribute_name, ::service::query_state& qs) {
|
||||
return _std_mgr.get_attribute(role_name, attribute_name, qs);
|
||||
}
|
||||
|
||||
|
||||
@@ -10,7 +10,6 @@
|
||||
#pragma once
|
||||
|
||||
#include <seastar/core/abort_source.hh>
|
||||
#include <seastar/core/future.hh>
|
||||
#include <stdexcept>
|
||||
|
||||
#include "ent/ldap/ldap_connection.hh"
|
||||
@@ -35,29 +34,22 @@ class ldap_role_manager : public role_manager {
|
||||
seastar::sstring _target_attr; ///< LDAP entry attribute containing the Scylla role name.
|
||||
seastar::sstring _bind_name; ///< Username for LDAP simple bind.
|
||||
seastar::sstring _bind_password; ///< Password for LDAP simple bind.
|
||||
|
||||
uint32_t _permissions_update_interval_in_ms;
|
||||
utils::observer<uint32_t> _permissions_update_interval_in_ms_observer;
|
||||
|
||||
mutable ldap_reuser _connection_factory; // Potentially modified by query_granted().
|
||||
seastar::abort_source _as;
|
||||
cache& _cache;
|
||||
seastar::future<> _cache_pruner;
|
||||
public:
|
||||
ldap_role_manager(
|
||||
std::string_view query_template, ///< LDAP query template as described in Scylla documentation.
|
||||
std::string_view target_attr, ///< LDAP entry attribute containing the Scylla role name.
|
||||
std::string_view bind_name, ///< LDAP bind credentials.
|
||||
std::string_view bind_password, ///< LDAP bind credentials.
|
||||
uint32_t permissions_update_interval_in_ms,
|
||||
utils::observer<uint32_t> permissions_update_interval_in_ms_observer,
|
||||
cql3::query_processor& qp, ///< Passed to standard_role_manager.
|
||||
::service::raft_group0_client& rg0c, ///< Passed to standard_role_manager.
|
||||
::service::migration_manager& mm, ///< Passed to standard_role_manager.
|
||||
cache& cache ///< Passed to standard_role_manager.
|
||||
);
|
||||
|
||||
/// Retrieves LDAP configuration entries from qp and invokes the other constructor.
|
||||
/// Retrieves LDAP configuration entries from qp and invokes the other constructor. Required by
|
||||
/// class_registrator<role_manager>.
|
||||
ldap_role_manager(cql3::query_processor& qp, ::service::raft_group0_client& rg0c, ::service::migration_manager& mm, cache& cache);
|
||||
|
||||
/// Thrown when query-template parsing fails.
|
||||
|
||||
@@ -1,31 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2026-present ScyllaDB
|
||||
*
|
||||
* Modified by ScyllaDB
|
||||
*/
|
||||
|
||||
/*
|
||||
* SPDX-License-Identifier: (LicenseRef-ScyllaDB-Source-Available-1.0 and Apache-2.0)
|
||||
*/
|
||||
|
||||
#include "auth/maintenance_socket_authenticator.hh"
|
||||
|
||||
|
||||
namespace auth {
|
||||
|
||||
maintenance_socket_authenticator::~maintenance_socket_authenticator() {
|
||||
}
|
||||
|
||||
future<> maintenance_socket_authenticator::start() {
|
||||
return make_ready_future<>();
|
||||
}
|
||||
|
||||
future<> maintenance_socket_authenticator::ensure_superuser_is_created() const {
|
||||
return make_ready_future<>();
|
||||
}
|
||||
|
||||
bool maintenance_socket_authenticator::require_authentication() const {
|
||||
return false;
|
||||
}
|
||||
|
||||
} // namespace auth
|
||||
@@ -1,36 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2026-present ScyllaDB
|
||||
*
|
||||
* Modified by ScyllaDB
|
||||
*/
|
||||
|
||||
/*
|
||||
* SPDX-License-Identifier: (LicenseRef-ScyllaDB-Source-Available-1.0 and Apache-2.0)
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <seastar/core/shared_future.hh>
|
||||
|
||||
#include "password_authenticator.hh"
|
||||
|
||||
namespace auth {
|
||||
|
||||
// maintenance_socket_authenticator is used for clients connecting to the
|
||||
// maintenance socket. It does not require authentication,
|
||||
// while still allowing the managing of roles and their credentials.
|
||||
class maintenance_socket_authenticator : public password_authenticator {
|
||||
public:
|
||||
using password_authenticator::password_authenticator;
|
||||
|
||||
virtual ~maintenance_socket_authenticator();
|
||||
|
||||
virtual future<> start() override;
|
||||
|
||||
virtual future<> ensure_superuser_is_created() const override;
|
||||
|
||||
bool require_authentication() const override;
|
||||
};
|
||||
|
||||
} // namespace auth
|
||||
|
||||
@@ -1,37 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2026-present ScyllaDB
|
||||
*
|
||||
* Modified by ScyllaDB
|
||||
*/
|
||||
|
||||
/*
|
||||
* SPDX-License-Identifier: (LicenseRef-ScyllaDB-Source-Available-1.0 and Apache-2.0)
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "auth/default_authorizer.hh"
|
||||
#include "auth/permission.hh"
|
||||
|
||||
namespace auth {
|
||||
|
||||
// maintenance_socket_authorizer is used for clients connecting to the
|
||||
// maintenance socket. It grants all permissions unconditionally (like
|
||||
// AllowAllAuthorizer) while still supporting grant/revoke operations
|
||||
// (delegated to the underlying CassandraAuthorizer / default_authorizer).
|
||||
class maintenance_socket_authorizer : public default_authorizer {
|
||||
public:
|
||||
using default_authorizer::default_authorizer;
|
||||
|
||||
~maintenance_socket_authorizer() override = default;
|
||||
|
||||
future<> start() override {
|
||||
return make_ready_future<>();
|
||||
}
|
||||
|
||||
future<permission_set> authorize(const role_or_anonymous&, const resource&) const override {
|
||||
return make_ready_future<permission_set>(permissions::ALL);
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace auth
|
||||
@@ -13,48 +13,23 @@
|
||||
#include <string_view>
|
||||
#include "auth/cache.hh"
|
||||
#include "cql3/description.hh"
|
||||
#include "utils/log.hh"
|
||||
#include "utils/on_internal_error.hh"
|
||||
#include "utils/class_registrator.hh"
|
||||
|
||||
namespace auth {
|
||||
|
||||
static logging::logger log("maintenance_socket_role_manager");
|
||||
constexpr std::string_view maintenance_socket_role_manager_name = "com.scylladb.auth.MaintenanceSocketRoleManager";
|
||||
|
||||
future<> maintenance_socket_role_manager::ensure_role_operations_are_enabled() {
|
||||
if (_is_maintenance_mode) {
|
||||
on_internal_error(log, "enabling role operations not allowed in maintenance mode");
|
||||
}
|
||||
static const class_registrator<
|
||||
role_manager,
|
||||
maintenance_socket_role_manager,
|
||||
cql3::query_processor&,
|
||||
::service::raft_group0_client&,
|
||||
::service::migration_manager&,
|
||||
cache&> registration(sstring{maintenance_socket_role_manager_name});
|
||||
|
||||
if (_std_mgr.has_value()) {
|
||||
on_internal_error(log, "role operations are already enabled");
|
||||
}
|
||||
|
||||
_std_mgr.emplace(_qp, _group0_client, _migration_manager, _cache);
|
||||
return _std_mgr->start();
|
||||
}
|
||||
|
||||
void maintenance_socket_role_manager::set_maintenance_mode() {
|
||||
if (_std_mgr.has_value()) {
|
||||
on_internal_error(log, "cannot enter maintenance mode after role operations have been enabled");
|
||||
}
|
||||
_is_maintenance_mode = true;
|
||||
}
|
||||
|
||||
maintenance_socket_role_manager::maintenance_socket_role_manager(
|
||||
cql3::query_processor& qp,
|
||||
::service::raft_group0_client& rg0c,
|
||||
::service::migration_manager& mm,
|
||||
cache& c)
|
||||
: _qp(qp)
|
||||
, _group0_client(rg0c)
|
||||
, _migration_manager(mm)
|
||||
, _cache(c)
|
||||
, _std_mgr(std::nullopt)
|
||||
, _is_maintenance_mode(false) {
|
||||
}
|
||||
|
||||
std::string_view maintenance_socket_role_manager::qualified_java_name() const noexcept {
|
||||
return "com.scylladb.auth.MaintenanceSocketRoleManager";
|
||||
return maintenance_socket_role_manager_name;
|
||||
}
|
||||
|
||||
const resource_set& maintenance_socket_role_manager::protected_resources() const {
|
||||
@@ -68,161 +43,81 @@ future<> maintenance_socket_role_manager::start() {
|
||||
}
|
||||
|
||||
future<> maintenance_socket_role_manager::stop() {
|
||||
return _std_mgr ? _std_mgr->stop() : make_ready_future<>();
|
||||
}
|
||||
|
||||
future<> maintenance_socket_role_manager::ensure_superuser_is_created() {
|
||||
return _std_mgr ? _std_mgr->ensure_superuser_is_created() : make_ready_future<>();
|
||||
}
|
||||
|
||||
template<typename T = void>
|
||||
future<T> operation_not_available_in_maintenance_mode_exception(std::string_view operation) {
|
||||
return make_exception_future<T>(
|
||||
std::runtime_error(fmt::format("role manager: {} operation not available through maintenance socket in maintenance mode", operation)));
|
||||
}
|
||||
|
||||
template<typename T = void>
|
||||
future<T> manager_not_ready_exception(std::string_view operation) {
|
||||
return make_exception_future<T>(
|
||||
std::runtime_error(fmt::format("role manager: {} operation not available because manager not ready yet (role operations not enabled)", operation)));
|
||||
}
|
||||
|
||||
future<> maintenance_socket_role_manager::validate_operation(std::string_view name) const {
|
||||
if (_is_maintenance_mode) {
|
||||
return operation_not_available_in_maintenance_mode_exception(name);
|
||||
}
|
||||
if (!_std_mgr) {
|
||||
return manager_not_ready_exception(name);
|
||||
}
|
||||
return make_ready_future<>();
|
||||
}
|
||||
|
||||
future<> maintenance_socket_role_manager::create(std::string_view role_name, const role_config& c, ::service::group0_batch& mc) {
|
||||
auto f = validate_operation("CREATE");
|
||||
if (f.failed()) {
|
||||
return f;
|
||||
}
|
||||
return _std_mgr->create(role_name, c, mc);
|
||||
future<> maintenance_socket_role_manager::ensure_superuser_is_created() {
|
||||
return make_ready_future<>();
|
||||
}
|
||||
|
||||
template<typename T = void>
|
||||
future<T> operation_not_supported_exception(std::string_view operation) {
|
||||
return make_exception_future<T>(
|
||||
std::runtime_error(fmt::format("role manager: {} operation not supported through maintenance socket", operation)));
|
||||
}
|
||||
|
||||
future<> maintenance_socket_role_manager::create(std::string_view role_name, const role_config&, ::service::group0_batch&) {
|
||||
return operation_not_supported_exception("CREATE");
|
||||
}
|
||||
|
||||
future<> maintenance_socket_role_manager::drop(std::string_view role_name, ::service::group0_batch& mc) {
|
||||
auto f = validate_operation("DROP");
|
||||
if (f.failed()) {
|
||||
return f;
|
||||
}
|
||||
return _std_mgr->drop(role_name, mc);
|
||||
return operation_not_supported_exception("DROP");
|
||||
}
|
||||
|
||||
future<> maintenance_socket_role_manager::alter(std::string_view role_name, const role_config_update& u, ::service::group0_batch& mc) {
|
||||
auto f = validate_operation("ALTER");
|
||||
if (f.failed()) {
|
||||
return f;
|
||||
}
|
||||
return _std_mgr->alter(role_name, u, mc);
|
||||
future<> maintenance_socket_role_manager::alter(std::string_view role_name, const role_config_update&, ::service::group0_batch&) {
|
||||
return operation_not_supported_exception("ALTER");
|
||||
}
|
||||
|
||||
future<> maintenance_socket_role_manager::grant(std::string_view grantee_name, std::string_view role_name, ::service::group0_batch& mc) {
|
||||
auto f = validate_operation("GRANT");
|
||||
if (f.failed()) {
|
||||
return f;
|
||||
}
|
||||
return _std_mgr->grant(grantee_name, role_name, mc);
|
||||
return operation_not_supported_exception("GRANT");
|
||||
}
|
||||
|
||||
future<> maintenance_socket_role_manager::revoke(std::string_view revokee_name, std::string_view role_name, ::service::group0_batch& mc) {
|
||||
auto f = validate_operation("REVOKE");
|
||||
if (f.failed()) {
|
||||
return f;
|
||||
}
|
||||
return _std_mgr->revoke(revokee_name, role_name, mc);
|
||||
return operation_not_supported_exception("REVOKE");
|
||||
}
|
||||
|
||||
future<role_set> maintenance_socket_role_manager::query_granted(std::string_view grantee_name, recursive_role_query m) {
|
||||
auto f = validate_operation("QUERY GRANTED");
|
||||
if (f.failed()) {
|
||||
return make_exception_future<role_set>(f.get_exception());
|
||||
}
|
||||
return _std_mgr->query_granted(grantee_name, m);
|
||||
future<role_set> maintenance_socket_role_manager::query_granted(std::string_view grantee_name, recursive_role_query) {
|
||||
return operation_not_supported_exception<role_set>("QUERY GRANTED");
|
||||
}
|
||||
|
||||
future<role_to_directly_granted_map> maintenance_socket_role_manager::query_all_directly_granted(::service::query_state& qs) {
|
||||
auto f = validate_operation("QUERY ALL DIRECTLY GRANTED");
|
||||
if (f.failed()) {
|
||||
return make_exception_future<role_to_directly_granted_map>(f.get_exception());
|
||||
}
|
||||
return _std_mgr->query_all_directly_granted(qs);
|
||||
future<role_to_directly_granted_map> maintenance_socket_role_manager::query_all_directly_granted(::service::query_state&) {
|
||||
return operation_not_supported_exception<role_to_directly_granted_map>("QUERY ALL DIRECTLY GRANTED");
|
||||
}
|
||||
|
||||
future<role_set> maintenance_socket_role_manager::query_all(::service::query_state& qs) {
|
||||
auto f = validate_operation("QUERY ALL");
|
||||
if (f.failed()) {
|
||||
return make_exception_future<role_set>(f.get_exception());
|
||||
}
|
||||
return _std_mgr->query_all(qs);
|
||||
future<role_set> maintenance_socket_role_manager::query_all(::service::query_state&) {
|
||||
return operation_not_supported_exception<role_set>("QUERY ALL");
|
||||
}
|
||||
|
||||
future<bool> maintenance_socket_role_manager::exists(std::string_view role_name) {
|
||||
auto f = validate_operation("EXISTS");
|
||||
if (f.failed()) {
|
||||
return make_exception_future<bool>(f.get_exception());
|
||||
}
|
||||
return _std_mgr->exists(role_name);
|
||||
return operation_not_supported_exception<bool>("EXISTS");
|
||||
}
|
||||
|
||||
future<bool> maintenance_socket_role_manager::is_superuser(std::string_view role_name) {
|
||||
auto f = validate_operation("IS SUPERUSER");
|
||||
if (f.failed()) {
|
||||
return make_exception_future<bool>(f.get_exception());
|
||||
}
|
||||
return _std_mgr->is_superuser(role_name);
|
||||
return make_ready_future<bool>(true);
|
||||
}
|
||||
|
||||
future<bool> maintenance_socket_role_manager::can_login(std::string_view role_name) {
|
||||
auto f = validate_operation("CAN LOGIN");
|
||||
if (f.failed()) {
|
||||
return make_exception_future<bool>(f.get_exception());
|
||||
}
|
||||
return _std_mgr->can_login(role_name);
|
||||
return make_ready_future<bool>(true);
|
||||
}
|
||||
|
||||
future<std::optional<sstring>> maintenance_socket_role_manager::get_attribute(std::string_view role_name, std::string_view attribute_name, ::service::query_state& qs) {
|
||||
auto f = validate_operation("GET ATTRIBUTE");
|
||||
if (f.failed()) {
|
||||
return make_exception_future<std::optional<sstring>>(f.get_exception());
|
||||
}
|
||||
return _std_mgr->get_attribute(role_name, attribute_name, qs);
|
||||
future<std::optional<sstring>> maintenance_socket_role_manager::get_attribute(std::string_view role_name, std::string_view attribute_name, ::service::query_state&) {
|
||||
return operation_not_supported_exception<std::optional<sstring>>("GET ATTRIBUTE");
|
||||
}
|
||||
|
||||
future<role_manager::attribute_vals> maintenance_socket_role_manager::query_attribute_for_all(std::string_view attribute_name, ::service::query_state& qs) {
|
||||
auto f = validate_operation("QUERY ATTRIBUTE FOR ALL");
|
||||
if (f.failed()) {
|
||||
return make_exception_future<role_manager::attribute_vals>(f.get_exception());
|
||||
}
|
||||
return _std_mgr->query_attribute_for_all(attribute_name, qs);
|
||||
future<role_manager::attribute_vals> maintenance_socket_role_manager::query_attribute_for_all(std::string_view attribute_name, ::service::query_state&) {
|
||||
return operation_not_supported_exception<role_manager::attribute_vals>("QUERY ATTRIBUTE");
|
||||
}
|
||||
|
||||
future<> maintenance_socket_role_manager::set_attribute(std::string_view role_name, std::string_view attribute_name, std::string_view attribute_value, ::service::group0_batch& mc) {
|
||||
auto f = validate_operation("SET ATTRIBUTE");
|
||||
if (f.failed()) {
|
||||
return f;
|
||||
}
|
||||
return _std_mgr->set_attribute(role_name, attribute_name, attribute_value, mc);
|
||||
return operation_not_supported_exception("SET ATTRIBUTE");
|
||||
}
|
||||
|
||||
future<> maintenance_socket_role_manager::remove_attribute(std::string_view role_name, std::string_view attribute_name, ::service::group0_batch& mc) {
|
||||
auto f = validate_operation("REMOVE ATTRIBUTE");
|
||||
if (f.failed()) {
|
||||
return f;
|
||||
}
|
||||
return _std_mgr->remove_attribute(role_name, attribute_name, mc);
|
||||
return operation_not_supported_exception("REMOVE ATTRIBUTE");
|
||||
}
|
||||
|
||||
future<std::vector<cql3::description>> maintenance_socket_role_manager::describe_role_grants() {
|
||||
auto f = validate_operation("DESCRIBE ROLE GRANTS");
|
||||
if (f.failed()) {
|
||||
return make_exception_future<std::vector<cql3::description>>(f.get_exception());
|
||||
}
|
||||
return _std_mgr->describe_role_grants();
|
||||
return operation_not_supported_exception<std::vector<cql3::description>>("DESCRIBE SCHEMA WITH INTERNALS");
|
||||
}
|
||||
|
||||
} // namespace auth
|
||||
|
||||
@@ -11,7 +11,6 @@
|
||||
#include "auth/cache.hh"
|
||||
#include "auth/resource.hh"
|
||||
#include "auth/role_manager.hh"
|
||||
#include "auth/standard_role_manager.hh"
|
||||
#include <seastar/core/future.hh>
|
||||
|
||||
namespace cql3 {
|
||||
@@ -25,26 +24,13 @@ class raft_group0_client;
|
||||
|
||||
namespace auth {
|
||||
|
||||
// This role manager is used by the maintenance socket. It has disabled all role management operations
|
||||
// in maintenance mode. In normal mode it delegates all operations to a standard_role_manager,
|
||||
// which is created on demand when the node joins the cluster.
|
||||
extern const std::string_view maintenance_socket_role_manager_name;
|
||||
|
||||
// This role manager is used by the maintenance socket. It has disabled all role management operations to not depend on
|
||||
// system_auth keyspace, which may be not yet created when the maintenance socket starts listening.
|
||||
class maintenance_socket_role_manager final : public role_manager {
|
||||
cql3::query_processor& _qp;
|
||||
::service::raft_group0_client& _group0_client;
|
||||
::service::migration_manager& _migration_manager;
|
||||
cache& _cache;
|
||||
std::optional<standard_role_manager> _std_mgr;
|
||||
bool _is_maintenance_mode;
|
||||
|
||||
public:
|
||||
void set_maintenance_mode() override;
|
||||
|
||||
// Ensures role management operations are enabled.
|
||||
// It must be called once the node has joined the cluster.
|
||||
// In the meantime all role management operations will fail.
|
||||
future<> ensure_role_operations_are_enabled() override;
|
||||
|
||||
maintenance_socket_role_manager(cql3::query_processor&, ::service::raft_group0_client&, ::service::migration_manager&, cache&);
|
||||
maintenance_socket_role_manager(cql3::query_processor&, ::service::raft_group0_client&, ::service::migration_manager&, cache&) {}
|
||||
|
||||
virtual std::string_view qualified_java_name() const noexcept override;
|
||||
|
||||
@@ -56,21 +42,21 @@ public:
|
||||
|
||||
virtual future<> ensure_superuser_is_created() override;
|
||||
|
||||
virtual future<> create(std::string_view role_name, const role_config& c, ::service::group0_batch& mc) override;
|
||||
virtual future<> create(std::string_view role_name, const role_config&, ::service::group0_batch&) override;
|
||||
|
||||
virtual future<> drop(std::string_view role_name, ::service::group0_batch& mc) override;
|
||||
|
||||
virtual future<> alter(std::string_view role_name, const role_config_update& u, ::service::group0_batch& mc) override;
|
||||
virtual future<> alter(std::string_view role_name, const role_config_update&, ::service::group0_batch&) override;
|
||||
|
||||
virtual future<> grant(std::string_view grantee_name, std::string_view role_name, ::service::group0_batch& mc) override;
|
||||
|
||||
virtual future<> revoke(std::string_view revokee_name, std::string_view role_name, ::service::group0_batch& mc) override;
|
||||
|
||||
virtual future<role_set> query_granted(std::string_view grantee_name, recursive_role_query m) override;
|
||||
virtual future<role_set> query_granted(std::string_view grantee_name, recursive_role_query) override;
|
||||
|
||||
virtual future<role_to_directly_granted_map> query_all_directly_granted(::service::query_state& qs) override;
|
||||
virtual future<role_to_directly_granted_map> query_all_directly_granted(::service::query_state&) override;
|
||||
|
||||
virtual future<role_set> query_all(::service::query_state& qs) override;
|
||||
virtual future<role_set> query_all(::service::query_state&) override;
|
||||
|
||||
virtual future<bool> exists(std::string_view role_name) override;
|
||||
|
||||
@@ -78,19 +64,15 @@ public:
|
||||
|
||||
virtual future<bool> can_login(std::string_view role_name) override;
|
||||
|
||||
virtual future<std::optional<sstring>> get_attribute(std::string_view role_name, std::string_view attribute_name, ::service::query_state& qs) override;
|
||||
virtual future<std::optional<sstring>> get_attribute(std::string_view role_name, std::string_view attribute_name, ::service::query_state&) override;
|
||||
|
||||
virtual future<role_manager::attribute_vals> query_attribute_for_all(std::string_view attribute_name, ::service::query_state& qs) override;
|
||||
virtual future<role_manager::attribute_vals> query_attribute_for_all(std::string_view attribute_name, ::service::query_state&) override;
|
||||
|
||||
virtual future<> set_attribute(std::string_view role_name, std::string_view attribute_name, std::string_view attribute_value, ::service::group0_batch& mc) override;
|
||||
|
||||
virtual future<> remove_attribute(std::string_view role_name, std::string_view attribute_name, ::service::group0_batch& mc) override;
|
||||
|
||||
virtual future<std::vector<cql3::description>> describe_role_grants() override;
|
||||
|
||||
private:
|
||||
future<> validate_operation(std::string_view name) const;
|
||||
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
@@ -26,9 +26,10 @@
|
||||
#include "cql3/untyped_result_set.hh"
|
||||
#include "utils/log.hh"
|
||||
#include "service/migration_manager.hh"
|
||||
#include "utils/class_registrator.hh"
|
||||
#include "replica/database.hh"
|
||||
#include "cql3/query_processor.hh"
|
||||
#include "db/config.hh"
|
||||
#include "db/system_keyspace.hh"
|
||||
|
||||
namespace auth {
|
||||
|
||||
@@ -36,10 +37,29 @@ constexpr std::string_view password_authenticator_name("org.apache.cassandra.aut
|
||||
|
||||
// name of the hash column.
|
||||
static constexpr std::string_view SALTED_HASH = "salted_hash";
|
||||
static constexpr std::string_view DEFAULT_USER_NAME = meta::DEFAULT_SUPERUSER_NAME;
|
||||
static const sstring DEFAULT_USER_PASSWORD = sstring(meta::DEFAULT_SUPERUSER_NAME);
|
||||
|
||||
static logging::logger plogger("password_authenticator");
|
||||
|
||||
// To ensure correct initialization order, we unfortunately need to use a string literal.
|
||||
static const class_registrator<
|
||||
authenticator,
|
||||
password_authenticator,
|
||||
cql3::query_processor&,
|
||||
::service::raft_group0_client&,
|
||||
::service::migration_manager&,
|
||||
cache&> password_auth_reg("org.apache.cassandra.auth.PasswordAuthenticator");
|
||||
|
||||
static thread_local auto rng_for_salt = std::default_random_engine(std::random_device{}());
|
||||
|
||||
static std::string_view get_config_value(std::string_view value, std::string_view def) {
|
||||
return value.empty() ? def : value;
|
||||
}
|
||||
std::string password_authenticator::default_superuser(const db::config& cfg) {
|
||||
return std::string(get_config_value(cfg.auth_superuser_name(), DEFAULT_USER_NAME));
|
||||
}
|
||||
|
||||
password_authenticator::~password_authenticator() {
|
||||
}
|
||||
|
||||
@@ -49,6 +69,7 @@ password_authenticator::password_authenticator(cql3::query_processor& qp, ::serv
|
||||
, _migration_manager(mm)
|
||||
, _cache(cache)
|
||||
, _stopped(make_ready_future<>())
|
||||
, _superuser(default_superuser(qp.db().get_config()))
|
||||
{}
|
||||
|
||||
static bool has_salted_hash(const cql3::untyped_result_set_row& row) {
|
||||
@@ -57,18 +78,76 @@ static bool has_salted_hash(const cql3::untyped_result_set_row& row) {
|
||||
|
||||
sstring password_authenticator::update_row_query() const {
|
||||
return seastar::format("UPDATE {}.{} SET {} = ? WHERE {} = ?",
|
||||
db::system_keyspace::NAME,
|
||||
get_auth_ks_name(_qp),
|
||||
meta::roles_table::name,
|
||||
SALTED_HASH,
|
||||
meta::roles_table::role_col_name);
|
||||
}
|
||||
|
||||
static const sstring legacy_table_name{"credentials"};
|
||||
|
||||
bool password_authenticator::legacy_metadata_exists() const {
|
||||
return _qp.db().has_schema(meta::legacy::AUTH_KS, legacy_table_name);
|
||||
}
|
||||
|
||||
future<> password_authenticator::migrate_legacy_metadata() const {
|
||||
plogger.info("Starting migration of legacy authentication metadata.");
|
||||
static const sstring query = seastar::format("SELECT * FROM {}.{}", meta::legacy::AUTH_KS, legacy_table_name);
|
||||
|
||||
return _qp.execute_internal(
|
||||
query,
|
||||
db::consistency_level::QUORUM,
|
||||
internal_distributed_query_state(),
|
||||
cql3::query_processor::cache_internal::no).then([this](::shared_ptr<cql3::untyped_result_set> results) {
|
||||
return do_for_each(*results, [this](const cql3::untyped_result_set_row& row) {
|
||||
auto username = row.get_as<sstring>("username");
|
||||
auto salted_hash = row.get_as<sstring>(SALTED_HASH);
|
||||
static const auto query = seastar::format("UPDATE {}.{} SET {} = ? WHERE {} = ?",
|
||||
meta::legacy::AUTH_KS,
|
||||
meta::roles_table::name,
|
||||
SALTED_HASH,
|
||||
meta::roles_table::role_col_name);
|
||||
return _qp.execute_internal(
|
||||
query,
|
||||
consistency_for_user(username),
|
||||
internal_distributed_query_state(),
|
||||
{std::move(salted_hash), username},
|
||||
cql3::query_processor::cache_internal::no).discard_result();
|
||||
}).finally([results] {});
|
||||
}).then([] {
|
||||
plogger.info("Finished migrating legacy authentication metadata.");
|
||||
}).handle_exception([](std::exception_ptr ep) {
|
||||
plogger.error("Encountered an error during migration!");
|
||||
std::rethrow_exception(ep);
|
||||
});
|
||||
}
|
||||
|
||||
future<> password_authenticator::legacy_create_default_if_missing() {
|
||||
const auto exists = co_await legacy::default_role_row_satisfies(_qp, &has_salted_hash, _superuser);
|
||||
if (exists) {
|
||||
co_return;
|
||||
}
|
||||
std::string salted_pwd(get_config_value(_qp.db().get_config().auth_superuser_salted_password(), ""));
|
||||
if (salted_pwd.empty()) {
|
||||
salted_pwd = passwords::hash(DEFAULT_USER_PASSWORD, rng_for_salt, _scheme);
|
||||
}
|
||||
const auto query = seastar::format("UPDATE {}.{} SET {} = ? WHERE {} = ?",
|
||||
meta::legacy::AUTH_KS,
|
||||
meta::roles_table::name,
|
||||
SALTED_HASH,
|
||||
meta::roles_table::role_col_name);
|
||||
co_await _qp.execute_internal(
|
||||
query,
|
||||
db::consistency_level::QUORUM,
|
||||
internal_distributed_query_state(),
|
||||
{salted_pwd, _superuser},
|
||||
cql3::query_processor::cache_internal::no);
|
||||
plogger.info("Created default superuser authentication record.");
|
||||
}
|
||||
|
||||
future<> password_authenticator::maybe_create_default_password() {
|
||||
auto needs_password = [this] () -> future<bool> {
|
||||
if (default_superuser(_qp).empty()) {
|
||||
co_return false;
|
||||
}
|
||||
const sstring query = seastar::format("SELECT * FROM {}.{} WHERE is_superuser = true ALLOW FILTERING", db::system_keyspace::NAME, meta::roles_table::name);
|
||||
const sstring query = seastar::format("SELECT * FROM {}.{} WHERE is_superuser = true ALLOW FILTERING", get_auth_ks_name(_qp), meta::roles_table::name);
|
||||
auto results = co_await _qp.execute_internal(query,
|
||||
db::consistency_level::LOCAL_ONE,
|
||||
internal_distributed_query_state(), cql3::query_processor::cache_internal::yes);
|
||||
@@ -78,7 +157,7 @@ future<> password_authenticator::maybe_create_default_password() {
|
||||
bool has_default = false;
|
||||
bool has_superuser_with_password = false;
|
||||
for (auto& result : *results) {
|
||||
if (result.get_as<sstring>(meta::roles_table::role_col_name) == default_superuser(_qp)) {
|
||||
if (result.get_as<sstring>(meta::roles_table::role_col_name) == _superuser) {
|
||||
has_default = true;
|
||||
}
|
||||
if (has_salted_hash(result)) {
|
||||
@@ -99,12 +178,12 @@ future<> password_authenticator::maybe_create_default_password() {
|
||||
co_return;
|
||||
}
|
||||
// Set default superuser's password.
|
||||
std::string salted_pwd(_qp.db().get_config().auth_superuser_salted_password());
|
||||
std::string salted_pwd(get_config_value(_qp.db().get_config().auth_superuser_salted_password(), ""));
|
||||
if (salted_pwd.empty()) {
|
||||
co_return;
|
||||
salted_pwd = passwords::hash(DEFAULT_USER_PASSWORD, rng_for_salt, _scheme);
|
||||
}
|
||||
const auto update_query = update_row_query();
|
||||
co_await collect_mutations(_qp, batch, update_query, {salted_pwd, default_superuser(_qp)});
|
||||
co_await collect_mutations(_qp, batch, update_query, {salted_pwd, _superuser});
|
||||
co_await std::move(batch).commit(_group0_client, _as, get_raft_timeout());
|
||||
plogger.info("Created default superuser authentication record.");
|
||||
}
|
||||
@@ -137,14 +216,58 @@ future<> password_authenticator::start() {
|
||||
|
||||
_stopped = do_after_system_ready(_as, [this] {
|
||||
return async([this] {
|
||||
if (legacy_mode(_qp)) {
|
||||
if (!_superuser_created_promise.available()) {
|
||||
// Counterintuitively, we mark promise as ready before any startup work
|
||||
// because wait_for_schema_agreement() below will block indefinitely
|
||||
// without cluster majority. In that case, blocking node startup
|
||||
// would lead to a cluster deadlock.
|
||||
_superuser_created_promise.set_value();
|
||||
}
|
||||
_migration_manager.wait_for_schema_agreement(_qp.db().real_database(), db::timeout_clock::time_point::max(), &_as).get();
|
||||
|
||||
if (legacy::any_nondefault_role_row_satisfies(_qp, &has_salted_hash, _superuser).get()) {
|
||||
if (legacy_metadata_exists()) {
|
||||
plogger.warn("Ignoring legacy authentication metadata since nondefault data already exist.");
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
if (legacy_metadata_exists()) {
|
||||
migrate_legacy_metadata().get();
|
||||
return;
|
||||
}
|
||||
legacy_create_default_if_missing().get();
|
||||
}
|
||||
utils::get_local_injector().inject("password_authenticator_start_pause", utils::wait_for_message(5min)).get();
|
||||
maybe_create_default_password_with_retries().get();
|
||||
if (!_superuser_created_promise.available()) {
|
||||
_superuser_created_promise.set_value();
|
||||
if (!legacy_mode(_qp)) {
|
||||
maybe_create_default_password_with_retries().get();
|
||||
if (!_superuser_created_promise.available()) {
|
||||
_superuser_created_promise.set_value();
|
||||
}
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
if (legacy_mode(_qp)) {
|
||||
static const sstring create_roles_query = fmt::format(
|
||||
"CREATE TABLE {}.{} ("
|
||||
" {} text PRIMARY KEY,"
|
||||
" can_login boolean,"
|
||||
" is_superuser boolean,"
|
||||
" member_of set<text>,"
|
||||
" salted_hash text"
|
||||
")",
|
||||
meta::legacy::AUTH_KS,
|
||||
meta::roles_table::name,
|
||||
meta::roles_table::role_col_name);
|
||||
return create_legacy_metadata_table_if_missing(
|
||||
meta::roles_table::name,
|
||||
_qp,
|
||||
create_roles_query,
|
||||
_migration_manager);
|
||||
}
|
||||
return make_ready_future<>();
|
||||
});
|
||||
}
|
||||
@@ -154,6 +277,15 @@ future<> password_authenticator::stop() {
|
||||
return _stopped.handle_exception_type([] (const sleep_aborted&) { }).handle_exception_type([](const abort_requested_exception&) {});
|
||||
}
|
||||
|
||||
db::consistency_level password_authenticator::consistency_for_user(std::string_view role_name) {
|
||||
// TODO: this is plain dung. Why treat hardcoded default special, but for example a user-created
|
||||
// super user uses plain LOCAL_ONE?
|
||||
if (role_name == DEFAULT_USER_NAME) {
|
||||
return db::consistency_level::QUORUM;
|
||||
}
|
||||
return db::consistency_level::LOCAL_ONE;
|
||||
}
|
||||
|
||||
std::string_view password_authenticator::qualified_java_name() const {
|
||||
return password_authenticator_name;
|
||||
}
|
||||
@@ -183,12 +315,20 @@ future<authenticated_user> password_authenticator::authenticate(
|
||||
const sstring password = credentials.at(PASSWORD_KEY);
|
||||
|
||||
try {
|
||||
auto role = _cache.get(username);
|
||||
if (!role || role->salted_hash.empty()) {
|
||||
throw exceptions::authentication_exception("Username and/or password are incorrect");
|
||||
std::optional<sstring> salted_hash;
|
||||
if (legacy_mode(_qp)) {
|
||||
salted_hash = co_await get_password_hash(username);
|
||||
if (!salted_hash) {
|
||||
throw exceptions::authentication_exception("Username and/or password are incorrect");
|
||||
}
|
||||
} else {
|
||||
auto role = _cache.get(username);
|
||||
if (!role || role->salted_hash.empty()) {
|
||||
throw exceptions::authentication_exception("Username and/or password are incorrect");
|
||||
}
|
||||
salted_hash = role->salted_hash;
|
||||
}
|
||||
const auto& salted_hash = role->salted_hash;
|
||||
const bool password_match = co_await passwords::check(password, salted_hash);
|
||||
const bool password_match = co_await passwords::check(password, *salted_hash);
|
||||
if (!password_match) {
|
||||
throw exceptions::authentication_exception("Username and/or password are incorrect");
|
||||
}
|
||||
@@ -227,7 +367,16 @@ future<> password_authenticator::create(std::string_view role_name, const authen
|
||||
}
|
||||
|
||||
const auto query = update_row_query();
|
||||
co_await collect_mutations(_qp, mc, query, {std::move(*maybe_hash), sstring(role_name)});
|
||||
if (legacy_mode(_qp)) {
|
||||
co_await _qp.execute_internal(
|
||||
query,
|
||||
consistency_for_user(role_name),
|
||||
internal_distributed_query_state(),
|
||||
{std::move(*maybe_hash), sstring(role_name)},
|
||||
cql3::query_processor::cache_internal::no).discard_result();
|
||||
} else {
|
||||
co_await collect_mutations(_qp, mc, query, {std::move(*maybe_hash), sstring(role_name)});
|
||||
}
|
||||
}
|
||||
|
||||
future<> password_authenticator::alter(std::string_view role_name, const authentication_options& options, ::service::group0_batch& mc) {
|
||||
@@ -238,21 +387,38 @@ future<> password_authenticator::alter(std::string_view role_name, const authent
|
||||
const auto password = std::get<password_option>(*options.credentials).password;
|
||||
|
||||
const sstring query = seastar::format("UPDATE {}.{} SET {} = ? WHERE {} = ?",
|
||||
db::system_keyspace::NAME,
|
||||
get_auth_ks_name(_qp),
|
||||
meta::roles_table::name,
|
||||
SALTED_HASH,
|
||||
meta::roles_table::role_col_name);
|
||||
co_await collect_mutations(_qp, mc, query,
|
||||
{passwords::hash(password, rng_for_salt, _scheme), sstring(role_name)});
|
||||
if (legacy_mode(_qp)) {
|
||||
co_await _qp.execute_internal(
|
||||
query,
|
||||
consistency_for_user(role_name),
|
||||
internal_distributed_query_state(),
|
||||
{passwords::hash(password, rng_for_salt, _scheme), sstring(role_name)},
|
||||
cql3::query_processor::cache_internal::no).discard_result();
|
||||
} else {
|
||||
co_await collect_mutations(_qp, mc, query,
|
||||
{passwords::hash(password, rng_for_salt, _scheme), sstring(role_name)});
|
||||
}
|
||||
}
|
||||
|
||||
future<> password_authenticator::drop(std::string_view name, ::service::group0_batch& mc) {
|
||||
const sstring query = seastar::format("DELETE {} FROM {}.{} WHERE {} = ?",
|
||||
SALTED_HASH,
|
||||
db::system_keyspace::NAME,
|
||||
get_auth_ks_name(_qp),
|
||||
meta::roles_table::name,
|
||||
meta::roles_table::role_col_name);
|
||||
co_await collect_mutations(_qp, mc, query, {sstring(name)});
|
||||
if (legacy_mode(_qp)) {
|
||||
co_await _qp.execute_internal(
|
||||
query, consistency_for_user(name),
|
||||
internal_distributed_query_state(),
|
||||
{sstring(name)},
|
||||
cql3::query_processor::cache_internal::no).discard_result();
|
||||
} else {
|
||||
co_await collect_mutations(_qp, mc, query, {sstring(name)});
|
||||
}
|
||||
}
|
||||
|
||||
future<custom_options> password_authenticator::query_custom_options(std::string_view role_name) const {
|
||||
@@ -271,13 +437,13 @@ future<std::optional<sstring>> password_authenticator::get_password_hash(std::st
|
||||
// that a map lookup string->statement is not gonna kill us much.
|
||||
const sstring query = seastar::format("SELECT {} FROM {}.{} WHERE {} = ?",
|
||||
SALTED_HASH,
|
||||
db::system_keyspace::NAME,
|
||||
get_auth_ks_name(_qp),
|
||||
meta::roles_table::name,
|
||||
meta::roles_table::role_col_name);
|
||||
|
||||
const auto res = co_await _qp.execute_internal(
|
||||
query,
|
||||
db::consistency_level::LOCAL_ONE,
|
||||
consistency_for_user(role_name),
|
||||
internal_distributed_query_state(),
|
||||
{role_name},
|
||||
cql3::query_processor::cache_internal::yes);
|
||||
|
||||
@@ -13,6 +13,7 @@
|
||||
#include <seastar/core/abort_source.hh>
|
||||
#include <seastar/core/shared_future.hh>
|
||||
|
||||
#include "db/consistency_level_type.hh"
|
||||
#include "auth/authenticator.hh"
|
||||
#include "auth/passwords.hh"
|
||||
#include "auth/cache.hh"
|
||||
@@ -43,11 +44,15 @@ class password_authenticator : public authenticator {
|
||||
cache& _cache;
|
||||
future<> _stopped;
|
||||
abort_source _as;
|
||||
std::string _superuser; // default superuser name from the config (may or may not be present in roles table)
|
||||
shared_promise<> _superuser_created_promise;
|
||||
// We used to also support bcrypt, SHA-256, and MD5 (ref. scylladb#24524).
|
||||
constexpr static auth::passwords::scheme _scheme = passwords::scheme::sha_512;
|
||||
|
||||
public:
|
||||
static db::consistency_level consistency_for_user(std::string_view role_name);
|
||||
static std::string default_superuser(const db::config&);
|
||||
|
||||
password_authenticator(cql3::query_processor&, ::service::raft_group0_client&, ::service::migration_manager&, cache&);
|
||||
|
||||
~password_authenticator();
|
||||
@@ -85,6 +90,12 @@ public:
|
||||
virtual future<> ensure_superuser_is_created() const override;
|
||||
|
||||
private:
|
||||
bool legacy_metadata_exists() const;
|
||||
|
||||
future<> migrate_legacy_metadata() const;
|
||||
|
||||
future<> legacy_create_default_if_missing();
|
||||
|
||||
future<> maybe_create_default_password();
|
||||
future<> maybe_create_default_password_with_retries();
|
||||
|
||||
|
||||
38
auth/permissions_cache.cc
Normal file
38
auth/permissions_cache.cc
Normal file
@@ -0,0 +1,38 @@
|
||||
/*
|
||||
* Copyright (C) 2017-present ScyllaDB
|
||||
*/
|
||||
|
||||
/*
|
||||
* SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
|
||||
*/
|
||||
|
||||
#include "auth/permissions_cache.hh"
|
||||
|
||||
#include <fmt/ranges.h>
|
||||
#include "auth/authorizer.hh"
|
||||
#include "auth/service.hh"
|
||||
|
||||
namespace auth {
|
||||
|
||||
permissions_cache::permissions_cache(const utils::loading_cache_config& c, service& ser, logging::logger& log)
|
||||
: _cache(c, log, [&ser, &log](const key_type& k) {
|
||||
log.debug("Refreshing permissions for {}", k.first);
|
||||
return ser.get_uncached_permissions(k.first, k.second);
|
||||
}) {
|
||||
}
|
||||
|
||||
bool permissions_cache::update_config(utils::loading_cache_config c) {
|
||||
return _cache.update_config(std::move(c));
|
||||
}
|
||||
|
||||
void permissions_cache::reset() {
|
||||
_cache.reset();
|
||||
}
|
||||
|
||||
future<permission_set> permissions_cache::get(const role_or_anonymous& maybe_role, const resource& r) {
|
||||
return do_with(key_type(maybe_role, r), [this](const auto& k) {
|
||||
return _cache.get(k);
|
||||
});
|
||||
}
|
||||
|
||||
}
|
||||
66
auth/permissions_cache.hh
Normal file
66
auth/permissions_cache.hh
Normal file
@@ -0,0 +1,66 @@
|
||||
/*
|
||||
* Copyright (C) 2017-present ScyllaDB
|
||||
*/
|
||||
|
||||
/*
|
||||
* SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <iostream>
|
||||
#include <utility>
|
||||
|
||||
#include <fmt/core.h>
|
||||
#include <seastar/core/future.hh>
|
||||
|
||||
#include "auth/permission.hh"
|
||||
#include "auth/resource.hh"
|
||||
#include "auth/role_or_anonymous.hh"
|
||||
#include "utils/log.hh"
|
||||
#include "utils/hash.hh"
|
||||
#include "utils/loading_cache.hh"
|
||||
|
||||
namespace std {
|
||||
|
||||
inline std::ostream& operator<<(std::ostream& os, const pair<auth::role_or_anonymous, auth::resource>& p) {
|
||||
fmt::print(os, "{{role: {}, resource: {}}}", p.first, p.second);
|
||||
return os;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
namespace db {
|
||||
class config;
|
||||
}
|
||||
|
||||
namespace auth {
|
||||
|
||||
class service;
|
||||
|
||||
class permissions_cache final {
|
||||
using cache_type = utils::loading_cache<
|
||||
std::pair<role_or_anonymous, resource>,
|
||||
permission_set,
|
||||
1,
|
||||
utils::loading_cache_reload_enabled::yes,
|
||||
utils::simple_entry_size<permission_set>,
|
||||
utils::tuple_hash>;
|
||||
|
||||
using key_type = typename cache_type::key_type;
|
||||
|
||||
cache_type _cache;
|
||||
|
||||
public:
|
||||
explicit permissions_cache(const utils::loading_cache_config&, service&, logging::logger&);
|
||||
|
||||
future <> stop() {
|
||||
return _cache.stop();
|
||||
}
|
||||
|
||||
bool update_config(utils::loading_cache_config);
|
||||
void reset();
|
||||
future<permission_set> get(const role_or_anonymous&, const resource&);
|
||||
};
|
||||
|
||||
}
|
||||
@@ -112,11 +112,6 @@ public:
|
||||
|
||||
virtual future<> stop() = 0;
|
||||
|
||||
///
|
||||
/// Notify that the maintenance mode is starting.
|
||||
///
|
||||
virtual void set_maintenance_mode() {}
|
||||
|
||||
///
|
||||
/// Ensure that superuser role exists.
|
||||
///
|
||||
@@ -124,11 +119,6 @@ public:
|
||||
///
|
||||
virtual future<> ensure_superuser_is_created() = 0;
|
||||
|
||||
///
|
||||
/// Ensure role management operations are enabled. Some role managers may defer initialization.
|
||||
///
|
||||
virtual future<> ensure_role_operations_are_enabled() { return make_ready_future<>(); }
|
||||
|
||||
///
|
||||
/// \returns an exceptional future with \ref role_already_exists for a role that has previously been created.
|
||||
///
|
||||
|
||||
68
auth/roles-metadata.cc
Normal file
68
auth/roles-metadata.cc
Normal file
@@ -0,0 +1,68 @@
|
||||
/*
|
||||
* Copyright (C) 2018-present ScyllaDB
|
||||
*/
|
||||
|
||||
/*
|
||||
* SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
|
||||
*/
|
||||
|
||||
#include "auth/roles-metadata.hh"
|
||||
|
||||
#include <seastar/core/format.hh>
|
||||
#include <seastar/core/shared_ptr.hh>
|
||||
#include <seastar/core/sstring.hh>
|
||||
|
||||
#include "auth/common.hh"
|
||||
#include "cql3/query_processor.hh"
|
||||
#include "cql3/untyped_result_set.hh"
|
||||
|
||||
namespace auth {
|
||||
|
||||
namespace legacy {
|
||||
|
||||
future<bool> default_role_row_satisfies(
|
||||
cql3::query_processor& qp,
|
||||
std::function<bool(const cql3::untyped_result_set_row&)> p,
|
||||
std::optional<std::string> rolename) {
|
||||
const sstring query = seastar::format("SELECT * FROM {}.{} WHERE {} = ?",
|
||||
auth::meta::legacy::AUTH_KS,
|
||||
meta::roles_table::name,
|
||||
meta::roles_table::role_col_name);
|
||||
|
||||
for (auto cl : { db::consistency_level::ONE, db::consistency_level::QUORUM }) {
|
||||
auto results = co_await qp.execute_internal(query, cl
|
||||
, internal_distributed_query_state()
|
||||
, {rolename.value_or(std::string(auth::meta::DEFAULT_SUPERUSER_NAME))}
|
||||
, cql3::query_processor::cache_internal::yes
|
||||
);
|
||||
if (!results->empty()) {
|
||||
co_return p(results->one());
|
||||
}
|
||||
}
|
||||
co_return false;
|
||||
}
|
||||
|
||||
future<bool> any_nondefault_role_row_satisfies(
|
||||
cql3::query_processor& qp,
|
||||
std::function<bool(const cql3::untyped_result_set_row&)> p,
|
||||
std::optional<std::string> rolename) {
|
||||
const sstring query = seastar::format("SELECT * FROM {}.{}", auth::meta::legacy::AUTH_KS, meta::roles_table::name);
|
||||
|
||||
auto results = co_await qp.execute_internal(query, db::consistency_level::QUORUM
|
||||
, internal_distributed_query_state(), cql3::query_processor::cache_internal::no
|
||||
);
|
||||
if (results->empty()) {
|
||||
co_return false;
|
||||
}
|
||||
static const sstring col_name = sstring(meta::roles_table::role_col_name);
|
||||
|
||||
co_return std::ranges::any_of(*results, [&](const cql3::untyped_result_set_row& row) {
|
||||
auto superuser = rolename ? std::string_view(*rolename) : meta::DEFAULT_SUPERUSER_NAME;
|
||||
const bool is_nondefault = row.get_as<sstring>(col_name) != superuser;
|
||||
return is_nondefault && p(row);
|
||||
});
|
||||
}
|
||||
|
||||
} // namespace legacy
|
||||
|
||||
} // namespace auth
|
||||
@@ -8,7 +8,18 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <optional>
|
||||
#include <string_view>
|
||||
#include <functional>
|
||||
|
||||
#include <seastar/core/future.hh>
|
||||
|
||||
#include "seastarx.hh"
|
||||
|
||||
namespace cql3 {
|
||||
class query_processor;
|
||||
class untyped_result_set_row;
|
||||
}
|
||||
|
||||
namespace auth {
|
||||
|
||||
@@ -24,4 +35,26 @@ constexpr std::string_view role_col_name{"role", 4};
|
||||
|
||||
} // namespace meta
|
||||
|
||||
namespace legacy {
|
||||
|
||||
///
|
||||
/// Check that the default role satisfies a predicate, or `false` if the default role does not exist.
|
||||
///
|
||||
future<bool> default_role_row_satisfies(
|
||||
cql3::query_processor&,
|
||||
std::function<bool(const cql3::untyped_result_set_row&)>,
|
||||
std::optional<std::string> rolename = {}
|
||||
);
|
||||
|
||||
///
|
||||
/// Check that any nondefault role satisfies a predicate. `false` if no nondefault roles exist.
|
||||
///
|
||||
future<bool> any_nondefault_role_row_satisfies(
|
||||
cql3::query_processor&,
|
||||
std::function<bool(const cql3::untyped_result_set_row&)>,
|
||||
std::optional<std::string> rolename = {}
|
||||
);
|
||||
|
||||
} // namespace legacy
|
||||
|
||||
} // namespace auth
|
||||
|
||||
@@ -22,11 +22,21 @@
|
||||
#include "db/config.hh"
|
||||
#include "utils/log.hh"
|
||||
#include "seastarx.hh"
|
||||
#include "utils/class_registrator.hh"
|
||||
|
||||
namespace auth {
|
||||
|
||||
static logging::logger mylog("saslauthd_authenticator");
|
||||
|
||||
// To ensure correct initialization order, we unfortunately need to use a string literal.
|
||||
static const class_registrator<
|
||||
authenticator,
|
||||
saslauthd_authenticator,
|
||||
cql3::query_processor&,
|
||||
::service::raft_group0_client&,
|
||||
::service::migration_manager&,
|
||||
cache&> saslauthd_auth_reg("com.scylladb.auth.SaslauthdAuthenticator");
|
||||
|
||||
saslauthd_authenticator::saslauthd_authenticator(cql3::query_processor& qp, ::service::raft_group0_client&, ::service::migration_manager&, cache&)
|
||||
: _socket_path(qp.db().get_config().saslauthd_socket_path())
|
||||
{}
|
||||
|
||||
382
auth/service.cc
382
auth/service.cc
@@ -16,8 +16,6 @@
|
||||
#include <algorithm>
|
||||
#include <chrono>
|
||||
|
||||
#include <boost/algorithm/string.hpp>
|
||||
|
||||
#include <seastar/core/future-util.hh>
|
||||
#include <seastar/core/shard_id.hh>
|
||||
#include <seastar/core/sharded.hh>
|
||||
@@ -25,18 +23,8 @@
|
||||
|
||||
#include "auth/allow_all_authenticator.hh"
|
||||
#include "auth/allow_all_authorizer.hh"
|
||||
#include "auth/certificate_authenticator.hh"
|
||||
#include "auth/common.hh"
|
||||
#include "auth/default_authorizer.hh"
|
||||
#include "auth/ldap_role_manager.hh"
|
||||
#include "auth/maintenance_socket_authenticator.hh"
|
||||
#include "auth/maintenance_socket_authorizer.hh"
|
||||
#include "auth/maintenance_socket_role_manager.hh"
|
||||
#include "auth/password_authenticator.hh"
|
||||
#include "auth/role_or_anonymous.hh"
|
||||
#include "auth/saslauthd_authenticator.hh"
|
||||
#include "auth/standard_role_manager.hh"
|
||||
#include "auth/transitional.hh"
|
||||
#include "cql3/functions/functions.hh"
|
||||
#include "cql3/query_processor.hh"
|
||||
#include "cql3/description.hh"
|
||||
@@ -55,6 +43,7 @@
|
||||
#include "service/raft/raft_group0_client.hh"
|
||||
#include "mutation/timestamp.hh"
|
||||
#include "utils/assert.hh"
|
||||
#include "utils/class_registrator.hh"
|
||||
#include "locator/abstract_replication_strategy.hh"
|
||||
#include "data_dictionary/keyspace_metadata.hh"
|
||||
#include "service/storage_service.hh"
|
||||
@@ -74,6 +63,91 @@ static const sstring superuser_col_name("super");
|
||||
|
||||
static logging::logger log("auth_service");
|
||||
|
||||
class auth_migration_listener final : public ::service::migration_listener {
|
||||
authorizer& _authorizer;
|
||||
cql3::query_processor& _qp;
|
||||
|
||||
public:
|
||||
explicit auth_migration_listener(authorizer& a, cql3::query_processor& qp) : _authorizer(a), _qp(qp) {
|
||||
}
|
||||
|
||||
private:
|
||||
void on_create_keyspace(const sstring& ks_name) override {}
|
||||
void on_create_column_family(const sstring& ks_name, const sstring& cf_name) override {}
|
||||
void on_create_user_type(const sstring& ks_name, const sstring& type_name) override {}
|
||||
void on_create_function(const sstring& ks_name, const sstring& function_name) override {}
|
||||
void on_create_aggregate(const sstring& ks_name, const sstring& aggregate_name) override {}
|
||||
void on_create_view(const sstring& ks_name, const sstring& view_name) override {}
|
||||
|
||||
void on_update_keyspace(const sstring& ks_name) override {}
|
||||
void on_update_column_family(const sstring& ks_name, const sstring& cf_name, bool) override {}
|
||||
void on_update_user_type(const sstring& ks_name, const sstring& type_name) override {}
|
||||
void on_update_function(const sstring& ks_name, const sstring& function_name) override {}
|
||||
void on_update_aggregate(const sstring& ks_name, const sstring& aggregate_name) override {}
|
||||
void on_update_view(const sstring& ks_name, const sstring& view_name, bool columns_changed) override {}
|
||||
|
||||
void on_drop_keyspace(const sstring& ks_name) override {
|
||||
if (!legacy_mode(_qp)) {
|
||||
// in non legacy path revoke is part of schema change statement execution
|
||||
return;
|
||||
}
|
||||
// Do it in the background.
|
||||
(void)do_with(::service::group0_batch::unused(), [this, &ks_name] (auto& mc) mutable {
|
||||
return _authorizer.revoke_all(auth::make_data_resource(ks_name), mc);
|
||||
}).handle_exception([] (std::exception_ptr e) {
|
||||
log.error("Unexpected exception while revoking all permissions on dropped keyspace: {}", e);
|
||||
});
|
||||
|
||||
(void)do_with(::service::group0_batch::unused(), [this, &ks_name] (auto& mc) mutable {
|
||||
return _authorizer.revoke_all(auth::make_functions_resource(ks_name), mc);
|
||||
}).handle_exception([] (std::exception_ptr e) {
|
||||
log.error("Unexpected exception while revoking all permissions on functions in dropped keyspace: {}", e);
|
||||
});
|
||||
}
|
||||
|
||||
void on_drop_column_family(const sstring& ks_name, const sstring& cf_name) override {
|
||||
if (!legacy_mode(_qp)) {
|
||||
// in non legacy path revoke is part of schema change statement execution
|
||||
return;
|
||||
}
|
||||
// Do it in the background.
|
||||
(void)do_with(::service::group0_batch::unused(), [this, &ks_name, &cf_name] (auto& mc) mutable {
|
||||
return _authorizer.revoke_all(
|
||||
auth::make_data_resource(ks_name, cf_name), mc);
|
||||
}).handle_exception([] (std::exception_ptr e) {
|
||||
log.error("Unexpected exception while revoking all permissions on dropped table: {}", e);
|
||||
});
|
||||
}
|
||||
|
||||
void on_drop_user_type(const sstring& ks_name, const sstring& type_name) override {}
|
||||
void on_drop_function(const sstring& ks_name, const sstring& function_name) override {
|
||||
if (!legacy_mode(_qp)) {
|
||||
// in non legacy path revoke is part of schema change statement execution
|
||||
return;
|
||||
}
|
||||
// Do it in the background.
|
||||
(void)do_with(::service::group0_batch::unused(), [this, &ks_name, &function_name] (auto& mc) mutable {
|
||||
return _authorizer.revoke_all(
|
||||
auth::make_functions_resource(ks_name, function_name), mc);
|
||||
}).handle_exception([] (std::exception_ptr e) {
|
||||
log.error("Unexpected exception while revoking all permissions on dropped function: {}", e);
|
||||
});
|
||||
}
|
||||
void on_drop_aggregate(const sstring& ks_name, const sstring& aggregate_name) override {
|
||||
if (!legacy_mode(_qp)) {
|
||||
// in non legacy path revoke is part of schema change statement execution
|
||||
return;
|
||||
}
|
||||
(void)do_with(::service::group0_batch::unused(), [this, &ks_name, &aggregate_name] (auto& mc) mutable {
|
||||
return _authorizer.revoke_all(
|
||||
auth::make_functions_resource(ks_name, aggregate_name), mc);
|
||||
}).handle_exception([] (std::exception_ptr e) {
|
||||
log.error("Unexpected exception while revoking all permissions on dropped aggregate: {}", e);
|
||||
});
|
||||
}
|
||||
void on_drop_view(const sstring& ks_name, const sstring& view_name) override {}
|
||||
};
|
||||
|
||||
static future<> validate_role_exists(const service& ser, std::string_view role_name) {
|
||||
return ser.underlying_role_manager().exists(role_name).then([role_name](bool exists) {
|
||||
if (!exists) {
|
||||
@@ -83,36 +157,50 @@ static future<> validate_role_exists(const service& ser, std::string_view role_n
|
||||
}
|
||||
|
||||
service::service(
|
||||
utils::loading_cache_config c,
|
||||
cache& cache,
|
||||
cql3::query_processor& qp,
|
||||
::service::raft_group0_client& g0,
|
||||
::service::migration_notifier& mn,
|
||||
std::unique_ptr<authorizer> z,
|
||||
std::unique_ptr<authenticator> a,
|
||||
std::unique_ptr<role_manager> r,
|
||||
maintenance_socket_enabled used_by_maintenance_socket)
|
||||
: _cache(cache)
|
||||
: _loading_cache_config(std::move(c))
|
||||
, _permissions_cache(nullptr)
|
||||
, _cache(cache)
|
||||
, _qp(qp)
|
||||
, _group0_client(g0)
|
||||
, _mnotifier(mn)
|
||||
, _authorizer(std::move(z))
|
||||
, _authenticator(std::move(a))
|
||||
, _role_manager(std::move(r))
|
||||
, _migration_listener(std::make_unique<auth_migration_listener>(*_authorizer, qp))
|
||||
, _permissions_cache_cfg_cb([this] (uint32_t) { (void) _permissions_cache_config_action.trigger_later(); })
|
||||
, _permissions_cache_config_action([this] { update_cache_config(); return make_ready_future<>(); })
|
||||
, _permissions_cache_max_entries_observer(_qp.db().get_config().permissions_cache_max_entries.observe(_permissions_cache_cfg_cb))
|
||||
, _permissions_cache_update_interval_in_ms_observer(_qp.db().get_config().permissions_update_interval_in_ms.observe(_permissions_cache_cfg_cb))
|
||||
, _permissions_cache_validity_in_ms_observer(_qp.db().get_config().permissions_validity_in_ms.observe(_permissions_cache_cfg_cb))
|
||||
, _used_by_maintenance_socket(used_by_maintenance_socket) {}
|
||||
|
||||
service::service(
|
||||
utils::loading_cache_config c,
|
||||
cql3::query_processor& qp,
|
||||
::service::raft_group0_client& g0,
|
||||
authorizer_factory authorizer_factory,
|
||||
authenticator_factory authenticator_factory,
|
||||
role_manager_factory role_manager_factory,
|
||||
::service::migration_notifier& mn,
|
||||
::service::migration_manager& mm,
|
||||
const service_config& sc,
|
||||
maintenance_socket_enabled used_by_maintenance_socket,
|
||||
cache& cache)
|
||||
: service(
|
||||
std::move(c),
|
||||
cache,
|
||||
qp,
|
||||
g0,
|
||||
authorizer_factory(),
|
||||
authenticator_factory(),
|
||||
role_manager_factory(),
|
||||
mn,
|
||||
create_object<authorizer>(sc.authorizer_java_name, qp, g0, mm),
|
||||
create_object<authenticator>(sc.authenticator_java_name, qp, g0, mm, cache),
|
||||
create_object<role_manager>(sc.role_manager_java_name, qp, g0, mm, cache),
|
||||
used_by_maintenance_socket) {
|
||||
}
|
||||
|
||||
@@ -145,6 +233,9 @@ future<> service::create_legacy_keyspace_if_missing(::service::migration_manager
|
||||
}
|
||||
|
||||
future<> service::start(::service::migration_manager& mm, db::system_keyspace& sys_ks) {
|
||||
auto auth_version = co_await sys_ks.get_auth_version();
|
||||
// version is set in query processor to be easily available in various places we call auth::legacy_mode check.
|
||||
_qp.auth_version = auth_version;
|
||||
if (this_shard_id() == 0) {
|
||||
co_await _cache.load_all();
|
||||
}
|
||||
@@ -166,20 +257,25 @@ future<> service::start(::service::migration_manager& mm, db::system_keyspace& s
|
||||
co_await _role_manager->ensure_superuser_is_created();
|
||||
}
|
||||
co_await when_all_succeed(_authorizer->start(), _authenticator->start()).discard_result();
|
||||
if (!_used_by_maintenance_socket) {
|
||||
// Maintenance socket mode can't cache permissions because it has
|
||||
// different authorizer. We can't mix cached permissions, they could be
|
||||
// different in normal mode.
|
||||
_cache.set_permission_loader(std::bind(
|
||||
&service::get_uncached_permissions,
|
||||
this, std::placeholders::_1, std::placeholders::_2));
|
||||
}
|
||||
_permissions_cache = std::make_unique<permissions_cache>(_loading_cache_config, *this, log);
|
||||
co_await once_among_shards([this] {
|
||||
_mnotifier.register_listener(_migration_listener.get());
|
||||
return make_ready_future<>();
|
||||
});
|
||||
}
|
||||
|
||||
future<> service::stop() {
|
||||
_as.request_abort();
|
||||
_cache.set_permission_loader(nullptr);
|
||||
return when_all_succeed(_role_manager->stop(), _authorizer->stop(), _authenticator->stop()).discard_result();
|
||||
// Only one of the shards has the listener registered, but let's try to
|
||||
// unregister on each one just to make sure.
|
||||
return _mnotifier.unregister_listener(_migration_listener.get()).then([this] {
|
||||
if (_permissions_cache) {
|
||||
return _permissions_cache->stop();
|
||||
}
|
||||
return make_ready_future<>();
|
||||
}).then([this] {
|
||||
return when_all_succeed(_role_manager->stop(), _authorizer->stop(), _authenticator->stop()).discard_result();
|
||||
});
|
||||
}
|
||||
|
||||
future<> service::ensure_superuser_is_created() {
|
||||
@@ -187,8 +283,21 @@ future<> service::ensure_superuser_is_created() {
|
||||
co_await _authenticator->ensure_superuser_is_created();
|
||||
}
|
||||
|
||||
void service::update_cache_config() {
|
||||
auto db = _qp.db();
|
||||
|
||||
utils::loading_cache_config perm_cache_config;
|
||||
perm_cache_config.max_size = db.get_config().permissions_cache_max_entries();
|
||||
perm_cache_config.expiry = std::chrono::milliseconds(db.get_config().permissions_validity_in_ms());
|
||||
perm_cache_config.refresh = std::chrono::milliseconds(db.get_config().permissions_update_interval_in_ms());
|
||||
|
||||
if (!_permissions_cache->update_config(std::move(perm_cache_config))) {
|
||||
log.error("Failed to apply permissions cache changes. Please read the documentation of these parameters");
|
||||
}
|
||||
}
|
||||
|
||||
void service::reset_authorization_cache() {
|
||||
_permissions_cache->reset();
|
||||
_qp.reset_cache();
|
||||
}
|
||||
|
||||
@@ -213,14 +322,7 @@ service::get_uncached_permissions(const role_or_anonymous& maybe_role, const res
|
||||
}
|
||||
|
||||
future<permission_set> service::get_permissions(const role_or_anonymous& maybe_role, const resource& r) const {
|
||||
if (_used_by_maintenance_socket) {
|
||||
return get_uncached_permissions(maybe_role, r);
|
||||
}
|
||||
return _cache.get_permissions(maybe_role, r);
|
||||
}
|
||||
|
||||
void service::set_maintenance_mode() {
|
||||
_role_manager->set_maintenance_mode();
|
||||
return _permissions_cache->get(maybe_role, r);
|
||||
}
|
||||
|
||||
future<bool> service::has_superuser(std::string_view role_name, const role_set& roles) const {
|
||||
@@ -258,10 +360,6 @@ static void validate_authentication_options_are_supported(
|
||||
}
|
||||
}
|
||||
|
||||
future<> service::ensure_role_operations_are_enabled() {
|
||||
return _role_manager->ensure_role_operations_are_enabled();
|
||||
}
|
||||
|
||||
future<> service::create_role(std::string_view name,
|
||||
const role_config& config,
|
||||
const authentication_options& options,
|
||||
@@ -279,6 +377,11 @@ future<> service::create_role(std::string_view name,
|
||||
ep = std::current_exception();
|
||||
}
|
||||
if (ep) {
|
||||
// Rollback only in legacy mode as normally mutations won't be
|
||||
// applied in case exception is raised
|
||||
if (legacy_mode(_qp)) {
|
||||
co_await underlying_role_manager().drop(name, mc);
|
||||
}
|
||||
std::rethrow_exception(std::move(ep));
|
||||
}
|
||||
}
|
||||
@@ -344,11 +447,6 @@ future<bool> service::exists(const resource& r) const {
|
||||
return make_ready_future<bool>(false);
|
||||
}
|
||||
|
||||
future<> service::revoke_all(const resource& r, ::service::group0_batch& mc) const {
|
||||
co_await _authorizer->revoke_all(r, mc);
|
||||
co_await _cache.prune(r);
|
||||
}
|
||||
|
||||
future<std::vector<cql3::description>> service::describe_roles(bool with_hashed_passwords) {
|
||||
std::vector<cql3::description> result{};
|
||||
|
||||
@@ -357,11 +455,11 @@ future<std::vector<cql3::description>> service::describe_roles(bool with_hashed_
|
||||
|
||||
const bool authenticator_uses_password_hashes = _authenticator->uses_password_hashes();
|
||||
|
||||
const auto default_su = cql3::util::maybe_quote(default_superuser(_qp));
|
||||
|
||||
auto produce_create_statement = [&default_su, with_hashed_passwords] (const sstring& formatted_role_name,
|
||||
auto produce_create_statement = [with_hashed_passwords] (const sstring& formatted_role_name,
|
||||
const std::optional<sstring>& maybe_hashed_password, bool can_login, bool is_superuser) {
|
||||
const sstring role_part = formatted_role_name == default_su
|
||||
// Even after applying formatting to a role, `formatted_role_name` can only equal `meta::DEFAULT_SUPER_NAME`
|
||||
// if the original identifier was equal to it.
|
||||
const sstring role_part = formatted_role_name == meta::DEFAULT_SUPERUSER_NAME
|
||||
? seastar::format("IF NOT EXISTS {}", formatted_role_name)
|
||||
: formatted_role_name;
|
||||
|
||||
@@ -574,10 +672,6 @@ future<std::vector<cql3::description>> service::describe_auth(bool with_hashed_p
|
||||
// Free functions.
|
||||
//
|
||||
|
||||
void set_maintenance_mode(service& ser) {
|
||||
ser.set_maintenance_mode();
|
||||
}
|
||||
|
||||
future<bool> has_superuser(const service& ser, const authenticated_user& u) {
|
||||
if (is_anonymous(u)) {
|
||||
return make_ready_future<bool>(false);
|
||||
@@ -586,10 +680,6 @@ future<bool> has_superuser(const service& ser, const authenticated_user& u) {
|
||||
return ser.has_superuser(*u.name);
|
||||
}
|
||||
|
||||
future<> ensure_role_operations_are_enabled(service& ser) {
|
||||
return ser.underlying_role_manager().ensure_role_operations_are_enabled();
|
||||
}
|
||||
|
||||
future<role_set> get_roles(const service& ser, const authenticated_user& u) {
|
||||
if (is_anonymous(u)) {
|
||||
return make_ready_future<role_set>();
|
||||
@@ -711,7 +801,7 @@ future<> revoke_permissions(
|
||||
}
|
||||
|
||||
future<> revoke_all(const service& ser, const resource& r, ::service::group0_batch& mc) {
|
||||
return ser.revoke_all(r, mc);
|
||||
return ser.underlying_authorizer().revoke_all(r, mc);
|
||||
}
|
||||
|
||||
future<std::vector<permission_details>> list_filtered_permissions(
|
||||
@@ -772,115 +862,83 @@ future<> commit_mutations(service& ser, ::service::group0_batch&& mc) {
|
||||
return ser.commit_mutations(std::move(mc));
|
||||
}
|
||||
|
||||
namespace {
|
||||
future<> migrate_to_auth_v2(db::system_keyspace& sys_ks, ::service::raft_group0_client& g0, start_operation_func_t start_operation_func, abort_source& as) {
|
||||
// FIXME: if this function fails it may leave partial data in the new tables
|
||||
// that should be cleared
|
||||
auto gen = [&sys_ks] (api::timestamp_type ts) -> ::service::mutations_generator {
|
||||
auto& qp = sys_ks.query_processor();
|
||||
for (const auto& cf_name : std::vector<sstring>{
|
||||
"roles", "role_members", "role_attributes", "role_permissions"}) {
|
||||
schema_ptr schema;
|
||||
try {
|
||||
schema = qp.db().find_schema(meta::legacy::AUTH_KS, cf_name);
|
||||
} catch (const data_dictionary::no_such_column_family&) {
|
||||
continue; // some tables might not have been created if they were not used
|
||||
}
|
||||
|
||||
std::string_view get_short_name(std::string_view name) {
|
||||
auto pos = name.find_last_of('.');
|
||||
if (pos == std::string_view::npos) {
|
||||
return name;
|
||||
}
|
||||
return name.substr(pos + 1);
|
||||
}
|
||||
std::vector<sstring> col_names;
|
||||
for (const auto& col : schema->all_columns()) {
|
||||
col_names.push_back(col.name_as_cql_string());
|
||||
}
|
||||
sstring val_binders_str = "?";
|
||||
for (size_t i = 1; i < col_names.size(); ++i) {
|
||||
val_binders_str += ", ?";
|
||||
}
|
||||
|
||||
} // anonymous namespace
|
||||
std::vector<mutation> collected;
|
||||
// use longer than usual timeout as we scan the whole table
|
||||
// but not infinite or very long as we want to fail reasonably fast
|
||||
const auto t = 5min;
|
||||
const timeout_config tc{t, t, t, t, t, t, t};
|
||||
::service::client_state cs(::service::client_state::internal_tag{}, tc);
|
||||
::service::query_state qs(cs, empty_service_permit());
|
||||
|
||||
authorizer_factory make_authorizer_factory(
|
||||
std::string_view name,
|
||||
sharded<cql3::query_processor>& qp) {
|
||||
std::string_view short_name = get_short_name(name);
|
||||
co_await qp.query_internal(
|
||||
seastar::format("SELECT * FROM {}.{}", meta::legacy::AUTH_KS, cf_name),
|
||||
db::consistency_level::ALL,
|
||||
{},
|
||||
1000,
|
||||
[&qp, &cf_name, &col_names, &val_binders_str, &schema, ts, &collected] (const cql3::untyped_result_set::row& row) -> future<stop_iteration> {
|
||||
std::vector<data_value_or_unset> values;
|
||||
for (const auto& col : schema->all_columns()) {
|
||||
if (row.has(col.name_as_text())) {
|
||||
values.push_back(
|
||||
col.type->deserialize(row.get_blob_unfragmented(col.name_as_text())));
|
||||
} else {
|
||||
values.push_back(unset_value{});
|
||||
}
|
||||
}
|
||||
auto muts = co_await qp.get_mutations_internal(
|
||||
seastar::format("INSERT INTO {}.{} ({}) VALUES ({})",
|
||||
db::system_keyspace::NAME,
|
||||
cf_name,
|
||||
fmt::join(col_names, ", "),
|
||||
val_binders_str),
|
||||
internal_distributed_query_state(),
|
||||
ts,
|
||||
std::move(values));
|
||||
if (muts.size() != 1) {
|
||||
on_internal_error(log,
|
||||
format("expecting single insert mutation, got {}", muts.size()));
|
||||
}
|
||||
|
||||
if (boost::iequals(short_name, "AllowAllAuthorizer")) {
|
||||
return [&qp] {
|
||||
return std::make_unique<allow_all_authorizer>(qp.local());
|
||||
};
|
||||
} else if (boost::iequals(short_name, "CassandraAuthorizer")) {
|
||||
return [&qp] {
|
||||
return std::make_unique<default_authorizer>(qp.local());
|
||||
};
|
||||
} else if (boost::iequals(short_name, "TransitionalAuthorizer")) {
|
||||
return [&qp] {
|
||||
return std::make_unique<transitional_authorizer>(qp.local());
|
||||
};
|
||||
}
|
||||
throw std::invalid_argument(fmt::format("Unknown authorizer: {}", name));
|
||||
}
|
||||
collected.push_back(std::move(muts[0]));
|
||||
co_return stop_iteration::no;
|
||||
},
|
||||
std::move(qs));
|
||||
|
||||
authenticator_factory make_authenticator_factory(
|
||||
std::string_view name,
|
||||
sharded<cql3::query_processor>& qp,
|
||||
::service::raft_group0_client& g0,
|
||||
sharded<::service::migration_manager>& mm,
|
||||
sharded<cache>& auth_cache) {
|
||||
std::string_view short_name = get_short_name(name);
|
||||
|
||||
if (boost::iequals(short_name, "AllowAllAuthenticator")) {
|
||||
return [&qp, &g0, &mm, &auth_cache] {
|
||||
return std::make_unique<allow_all_authenticator>(qp.local(), g0, mm.local(), auth_cache.local());
|
||||
};
|
||||
} else if (boost::iequals(short_name, "PasswordAuthenticator")) {
|
||||
return [&qp, &g0, &mm, &auth_cache] {
|
||||
return std::make_unique<password_authenticator>(qp.local(), g0, mm.local(), auth_cache.local());
|
||||
};
|
||||
} else if (boost::iequals(short_name, "CertificateAuthenticator")) {
|
||||
return [&qp, &g0, &mm, &auth_cache] {
|
||||
return std::make_unique<certificate_authenticator>(qp.local(), g0, mm.local(), auth_cache.local());
|
||||
};
|
||||
} else if (boost::iequals(short_name, "SaslauthdAuthenticator")) {
|
||||
return [&qp, &g0, &mm, &auth_cache] {
|
||||
return std::make_unique<saslauthd_authenticator>(qp.local(), g0, mm.local(), auth_cache.local());
|
||||
};
|
||||
} else if (boost::iequals(short_name, "TransitionalAuthenticator")) {
|
||||
return [&qp, &g0, &mm, &auth_cache] {
|
||||
return std::make_unique<transitional_authenticator>(qp.local(), g0, mm.local(), auth_cache.local());
|
||||
};
|
||||
}
|
||||
throw std::invalid_argument(fmt::format("Unknown authenticator: {}", name));
|
||||
}
|
||||
|
||||
role_manager_factory make_role_manager_factory(
|
||||
std::string_view name,
|
||||
sharded<cql3::query_processor>& qp,
|
||||
::service::raft_group0_client& g0,
|
||||
sharded<::service::migration_manager>& mm,
|
||||
sharded<cache>& auth_cache) {
|
||||
std::string_view short_name = get_short_name(name);
|
||||
|
||||
if (boost::iequals(short_name, "CassandraRoleManager")) {
|
||||
return [&qp, &g0, &mm, &auth_cache] {
|
||||
return std::make_unique<standard_role_manager>(qp.local(), g0, mm.local(), auth_cache.local());
|
||||
};
|
||||
} else if (boost::iequals(short_name, "LDAPRoleManager")) {
|
||||
return [&qp, &g0, &mm, &auth_cache] {
|
||||
return std::make_unique<ldap_role_manager>(qp.local(), g0, mm.local(), auth_cache.local());
|
||||
};
|
||||
}
|
||||
throw std::invalid_argument(fmt::format("Unknown role manager: {}", name));
|
||||
}
|
||||
|
||||
authenticator_factory make_maintenance_socket_authenticator_factory(
|
||||
sharded<cql3::query_processor>& qp,
|
||||
::service::raft_group0_client& g0,
|
||||
sharded<::service::migration_manager>& mm,
|
||||
sharded<cache>& auth_cache) {
|
||||
return [&qp, &g0, &mm, &auth_cache] {
|
||||
return std::make_unique<maintenance_socket_authenticator>(qp.local(), g0, mm.local(), auth_cache.local());
|
||||
};
|
||||
}
|
||||
|
||||
authorizer_factory make_maintenance_socket_authorizer_factory(sharded<cql3::query_processor>& qp) {
|
||||
return [&qp] {
|
||||
return std::make_unique<maintenance_socket_authorizer>(qp.local());
|
||||
};
|
||||
}
|
||||
|
||||
role_manager_factory make_maintenance_socket_role_manager_factory(
|
||||
sharded<cql3::query_processor>& qp,
|
||||
::service::raft_group0_client& g0,
|
||||
sharded<::service::migration_manager>& mm,
|
||||
sharded<cache>& auth_cache) {
|
||||
return [&qp, &g0, &mm, &auth_cache] {
|
||||
return std::make_unique<maintenance_socket_role_manager>(qp.local(), g0, mm.local(), auth_cache.local());
|
||||
for (auto& m : collected) {
|
||||
co_yield std::move(m);
|
||||
}
|
||||
}
|
||||
co_yield co_await sys_ks.make_auth_version_mutation(ts,
|
||||
db::system_keyspace::auth_version_t::v2);
|
||||
};
|
||||
co_await announce_mutations_with_batching(g0,
|
||||
start_operation_func,
|
||||
std::move(gen),
|
||||
as,
|
||||
std::nullopt);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
120
auth/service.hh
120
auth/service.hh
@@ -12,7 +12,6 @@
|
||||
#include <memory>
|
||||
#include <optional>
|
||||
|
||||
#include <seastar/core/coroutine.hh>
|
||||
#include <seastar/core/future.hh>
|
||||
#include <seastar/core/sstring.hh>
|
||||
#include <seastar/util/bool_class.hh>
|
||||
@@ -21,6 +20,7 @@
|
||||
#include "auth/authenticator.hh"
|
||||
#include "auth/authorizer.hh"
|
||||
#include "auth/permission.hh"
|
||||
#include "auth/permissions_cache.hh"
|
||||
#include "auth/cache.hh"
|
||||
#include "auth/role_manager.hh"
|
||||
#include "auth/common.hh"
|
||||
@@ -37,16 +37,19 @@ class query_processor;
|
||||
|
||||
namespace service {
|
||||
class migration_manager;
|
||||
class migration_notifier;
|
||||
class migration_listener;
|
||||
}
|
||||
|
||||
namespace auth {
|
||||
|
||||
class role_or_anonymous;
|
||||
|
||||
/// Factory function types for creating auth module instances on each shard.
|
||||
using authorizer_factory = std::function<std::unique_ptr<authorizer>()>;
|
||||
using authenticator_factory = std::function<std::unique_ptr<authenticator>()>;
|
||||
using role_manager_factory = std::function<std::unique_ptr<role_manager>()>;
|
||||
struct service_config final {
|
||||
sstring authorizer_java_name;
|
||||
sstring authenticator_java_name;
|
||||
sstring role_manager_java_name;
|
||||
};
|
||||
|
||||
///
|
||||
/// Due to poor (in this author's opinion) decisions of Apache Cassandra, certain choices of one role-manager,
|
||||
@@ -72,27 +75,43 @@ public:
|
||||
/// peering_sharded_service inheritance is needed to be able to access shard local authentication service
|
||||
/// given an object from another shard. Used for bouncing lwt requests to correct shard.
|
||||
class service final : public seastar::peering_sharded_service<service> {
|
||||
utils::loading_cache_config _loading_cache_config;
|
||||
std::unique_ptr<permissions_cache> _permissions_cache;
|
||||
cache& _cache;
|
||||
|
||||
cql3::query_processor& _qp;
|
||||
|
||||
::service::raft_group0_client& _group0_client;
|
||||
|
||||
::service::migration_notifier& _mnotifier;
|
||||
|
||||
authorizer::ptr_type _authorizer;
|
||||
|
||||
authenticator::ptr_type _authenticator;
|
||||
|
||||
role_manager::ptr_type _role_manager;
|
||||
|
||||
// Only one of these should be registered, so we end up with some unused instances. Not the end of the world.
|
||||
std::unique_ptr<::service::migration_listener> _migration_listener;
|
||||
|
||||
std::function<void(uint32_t)> _permissions_cache_cfg_cb;
|
||||
serialized_action _permissions_cache_config_action;
|
||||
|
||||
utils::observer<uint32_t> _permissions_cache_max_entries_observer;
|
||||
utils::observer<uint32_t> _permissions_cache_update_interval_in_ms_observer;
|
||||
utils::observer<uint32_t> _permissions_cache_validity_in_ms_observer;
|
||||
|
||||
maintenance_socket_enabled _used_by_maintenance_socket;
|
||||
|
||||
abort_source _as;
|
||||
|
||||
public:
|
||||
service(
|
||||
utils::loading_cache_config,
|
||||
cache& cache,
|
||||
cql3::query_processor&,
|
||||
::service::raft_group0_client&,
|
||||
::service::migration_notifier&,
|
||||
std::unique_ptr<authorizer>,
|
||||
std::unique_ptr<authenticator>,
|
||||
std::unique_ptr<role_manager>,
|
||||
@@ -100,15 +119,16 @@ public:
|
||||
|
||||
///
|
||||
/// This constructor is intended to be used when the class is sharded via \ref seastar::sharded. In that case, the
|
||||
/// arguments must be copyable, which is why we delay construction with instance-construction factories instead
|
||||
/// arguments must be copyable, which is why we delay construction with instance-construction instructions instead
|
||||
/// of the instances themselves.
|
||||
///
|
||||
service(
|
||||
utils::loading_cache_config,
|
||||
cql3::query_processor&,
|
||||
::service::raft_group0_client&,
|
||||
authorizer_factory,
|
||||
authenticator_factory,
|
||||
role_manager_factory,
|
||||
::service::migration_notifier&,
|
||||
::service::migration_manager&,
|
||||
const service_config&,
|
||||
maintenance_socket_enabled,
|
||||
cache&);
|
||||
|
||||
@@ -118,6 +138,8 @@ public:
|
||||
|
||||
future<> ensure_superuser_is_created();
|
||||
|
||||
void update_cache_config();
|
||||
|
||||
void reset_authorization_cache();
|
||||
|
||||
///
|
||||
@@ -130,11 +152,6 @@ public:
|
||||
///
|
||||
future<permission_set> get_uncached_permissions(const role_or_anonymous&, const resource&) const;
|
||||
|
||||
///
|
||||
/// Notify the service that the node is entering maintenance mode.
|
||||
///
|
||||
void set_maintenance_mode();
|
||||
|
||||
///
|
||||
/// Query whether the named role has been granted a role that is a superuser.
|
||||
///
|
||||
@@ -144,11 +161,6 @@ public:
|
||||
///
|
||||
future<bool> has_superuser(std::string_view role_name) const;
|
||||
|
||||
///
|
||||
/// Ensure that the role operations are enabled. Some role managers defer initialization.
|
||||
///
|
||||
future<> ensure_role_operations_are_enabled();
|
||||
|
||||
///
|
||||
/// Create a role with optional authentication information.
|
||||
///
|
||||
@@ -169,13 +181,6 @@ public:
|
||||
|
||||
future<bool> exists(const resource&) const;
|
||||
|
||||
///
|
||||
/// Revoke all permissions granted to any role for a particular resource.
|
||||
///
|
||||
/// \throws \ref unsupported_authorization_operation if revoking permissions is not supported.
|
||||
///
|
||||
future<> revoke_all(const resource&, ::service::group0_batch&) const;
|
||||
|
||||
///
|
||||
/// Produces descriptions that can be used to restore the state of auth. That encompasses
|
||||
/// roles, role grants, and permission grants.
|
||||
@@ -194,9 +199,12 @@ public:
|
||||
return *_role_manager;
|
||||
}
|
||||
|
||||
cql3::query_processor& query_processor() const noexcept {
|
||||
return _qp;
|
||||
}
|
||||
|
||||
future<> commit_mutations(::service::group0_batch&& mc) {
|
||||
co_await std::move(mc).commit(_group0_client, _as, ::service::raft_timeout{});
|
||||
co_await _group0_client.send_group0_read_barrier_to_live_members();
|
||||
return std::move(mc).commit(_group0_client, _as, ::service::raft_timeout{});
|
||||
}
|
||||
|
||||
private:
|
||||
@@ -207,12 +215,8 @@ private:
|
||||
future<std::vector<cql3::description>> describe_permissions() const;
|
||||
};
|
||||
|
||||
void set_maintenance_mode(service&);
|
||||
|
||||
future<bool> has_superuser(const service&, const authenticated_user&);
|
||||
|
||||
future<> ensure_role_operations_are_enabled(service&);
|
||||
|
||||
future<role_set> get_roles(const service&, const authenticated_user&);
|
||||
|
||||
future<permission_set> get_permissions(const service&, const authenticated_user&, const resource&);
|
||||
@@ -396,55 +400,7 @@ future<std::vector<permission_details>> list_filtered_permissions(
|
||||
// Finalizes write operations performed in auth by committing mutations via raft group0.
|
||||
future<> commit_mutations(service& ser, ::service::group0_batch&& mc);
|
||||
|
||||
///
|
||||
/// Factory helper functions for creating auth module instances.
|
||||
/// These are intended for use with sharded<service>::start() where copyable arguments are required.
|
||||
/// The returned factories capture the sharded references and call .local() when invoked on each shard.
|
||||
///
|
||||
|
||||
/// Creates an authorizer factory for config-selectable authorizer types.
|
||||
/// @param name The authorizer class name (e.g., "CassandraAuthorizer", "AllowAllAuthorizer")
|
||||
authorizer_factory make_authorizer_factory(
|
||||
std::string_view name,
|
||||
sharded<cql3::query_processor>& qp);
|
||||
|
||||
/// Creates an authenticator factory for config-selectable authenticator types.
|
||||
/// @param name The authenticator class name (e.g., "PasswordAuthenticator", "AllowAllAuthenticator")
|
||||
authenticator_factory make_authenticator_factory(
|
||||
std::string_view name,
|
||||
sharded<cql3::query_processor>& qp,
|
||||
::service::raft_group0_client& g0,
|
||||
sharded<::service::migration_manager>& mm,
|
||||
sharded<cache>& cache);
|
||||
|
||||
/// Creates a role_manager factory for config-selectable role manager types.
|
||||
/// @param name The role manager class name (e.g., "CassandraRoleManager")
|
||||
role_manager_factory make_role_manager_factory(
|
||||
std::string_view name,
|
||||
sharded<cql3::query_processor>& qp,
|
||||
::service::raft_group0_client& g0,
|
||||
sharded<::service::migration_manager>& mm,
|
||||
sharded<cache>& cache);
|
||||
|
||||
/// Creates a factory for the maintenance socket authenticator.
|
||||
/// This authenticator is not config-selectable and is only used for the maintenance socket.
|
||||
authenticator_factory make_maintenance_socket_authenticator_factory(
|
||||
sharded<cql3::query_processor>& qp,
|
||||
::service::raft_group0_client& g0,
|
||||
sharded<::service::migration_manager>& mm,
|
||||
sharded<cache>& cache);
|
||||
|
||||
/// Creates a factory for the maintenance socket authorizer.
|
||||
/// This authorizer is not config-selectable and is only used for the maintenance socket.
|
||||
/// It grants all permissions unconditionally while delegating grant/revoke to the default authorizer.
|
||||
authorizer_factory make_maintenance_socket_authorizer_factory(sharded<cql3::query_processor>& qp);
|
||||
|
||||
/// Creates a factory for the maintenance socket role manager.
|
||||
/// This role manager is not config-selectable and is only used for the maintenance socket.
|
||||
role_manager_factory make_maintenance_socket_role_manager_factory(
|
||||
sharded<cql3::query_processor>& qp,
|
||||
::service::raft_group0_client& g0,
|
||||
sharded<::service::migration_manager>& mm,
|
||||
sharded<cache>& cache);
|
||||
// Migrates data from old keyspace to new one which supports linearizable writes via raft.
|
||||
future<> migrate_to_auth_v2(db::system_keyspace& sys_ks, ::service::raft_group0_client& g0, start_operation_func_t start_operation_func, abort_source& as);
|
||||
|
||||
}
|
||||
|
||||
@@ -28,14 +28,15 @@
|
||||
#include "cql3/untyped_result_set.hh"
|
||||
#include "cql3/util.hh"
|
||||
#include "db/consistency_level_type.hh"
|
||||
#include "db/system_keyspace.hh"
|
||||
#include "exceptions/exceptions.hh"
|
||||
#include "utils/error_injection.hh"
|
||||
#include "utils/log.hh"
|
||||
#include <seastar/core/loop.hh>
|
||||
#include <seastar/coroutine/maybe_yield.hh>
|
||||
#include "service/raft/raft_group0_client.hh"
|
||||
#include "utils/class_registrator.hh"
|
||||
#include "service/migration_manager.hh"
|
||||
#include "password_authenticator.hh"
|
||||
#include "utils/managed_string.hh"
|
||||
|
||||
namespace auth {
|
||||
@@ -43,13 +44,59 @@ namespace auth {
|
||||
|
||||
static logging::logger log("standard_role_manager");
|
||||
|
||||
static const class_registrator<
|
||||
role_manager,
|
||||
standard_role_manager,
|
||||
cql3::query_processor&,
|
||||
::service::raft_group0_client&,
|
||||
::service::migration_manager&,
|
||||
cache&> registration("org.apache.cassandra.auth.CassandraRoleManager");
|
||||
|
||||
static db::consistency_level consistency_for_role(std::string_view role_name) noexcept {
|
||||
if (role_name == meta::DEFAULT_SUPERUSER_NAME) {
|
||||
return db::consistency_level::QUORUM;
|
||||
}
|
||||
|
||||
return db::consistency_level::LOCAL_ONE;
|
||||
}
|
||||
|
||||
future<std::optional<standard_role_manager::record>> standard_role_manager::legacy_find_record(std::string_view role_name) {
|
||||
const sstring query = seastar::format("SELECT * FROM {}.{} WHERE {} = ?",
|
||||
get_auth_ks_name(_qp),
|
||||
meta::roles_table::name,
|
||||
meta::roles_table::role_col_name);
|
||||
|
||||
const auto results = co_await _qp.execute_internal(
|
||||
query,
|
||||
consistency_for_role(role_name),
|
||||
internal_distributed_query_state(),
|
||||
{sstring(role_name)},
|
||||
cql3::query_processor::cache_internal::yes);
|
||||
if (results->empty()) {
|
||||
co_return std::optional<record>();
|
||||
}
|
||||
|
||||
const cql3::untyped_result_set_row& row = results->one();
|
||||
co_return std::make_optional(record{
|
||||
row.get_as<sstring>(sstring(meta::roles_table::role_col_name)),
|
||||
row.get_or<bool>("is_superuser", false),
|
||||
row.get_or<bool>("can_login", false),
|
||||
(row.has("member_of")
|
||||
? row.get_set<sstring>("member_of")
|
||||
: role_set())});
|
||||
}
|
||||
|
||||
future<std::optional<standard_role_manager::record>> standard_role_manager::find_record(std::string_view role_name) {
|
||||
auto role = _cache.get(role_name);
|
||||
if (legacy_mode(_qp)) {
|
||||
return legacy_find_record(role_name);
|
||||
}
|
||||
auto name = sstring(role_name);
|
||||
auto role = _cache.get(name);
|
||||
if (!role) {
|
||||
return make_ready_future<std::optional<record>>(std::nullopt);
|
||||
}
|
||||
return make_ready_future<std::optional<record>>(std::make_optional(record{
|
||||
.name = sstring(role_name),
|
||||
.name = std::move(name),
|
||||
.is_superuser = role->is_superuser,
|
||||
.can_login = role->can_login,
|
||||
.member_of = role->member_of
|
||||
@@ -76,6 +123,7 @@ standard_role_manager::standard_role_manager(cql3::query_processor& qp, ::servic
|
||||
, _migration_manager(mm)
|
||||
, _cache(cache)
|
||||
, _stopped(make_ready_future<>())
|
||||
, _superuser(password_authenticator::default_superuser(qp.db().get_config()))
|
||||
{}
|
||||
|
||||
std::string_view standard_role_manager::qualified_java_name() const noexcept {
|
||||
@@ -90,12 +138,79 @@ const resource_set& standard_role_manager::protected_resources() const {
|
||||
return resources;
|
||||
}
|
||||
|
||||
future<> standard_role_manager::maybe_create_default_role() {
|
||||
if (default_superuser(_qp).empty()) {
|
||||
co_return;
|
||||
future<> standard_role_manager::create_legacy_metadata_tables_if_missing() const {
|
||||
static const sstring create_roles_query = fmt::format(
|
||||
"CREATE TABLE {}.{} ("
|
||||
" {} text PRIMARY KEY,"
|
||||
" can_login boolean,"
|
||||
" is_superuser boolean,"
|
||||
" member_of set<text>,"
|
||||
" salted_hash text"
|
||||
")",
|
||||
meta::legacy::AUTH_KS,
|
||||
meta::roles_table::name,
|
||||
meta::roles_table::role_col_name);
|
||||
static const sstring create_role_members_query = fmt::format(
|
||||
"CREATE TABLE {}.{} ("
|
||||
" role text,"
|
||||
" member text,"
|
||||
" PRIMARY KEY (role, member)"
|
||||
")",
|
||||
meta::legacy::AUTH_KS,
|
||||
ROLE_MEMBERS_CF);
|
||||
static const sstring create_role_attributes_query = seastar::format(
|
||||
"CREATE TABLE {}.{} ("
|
||||
" role text,"
|
||||
" name text,"
|
||||
" value text,"
|
||||
" PRIMARY KEY(role, name)"
|
||||
")",
|
||||
meta::legacy::AUTH_KS,
|
||||
ROLE_ATTRIBUTES_CF);
|
||||
return when_all_succeed(
|
||||
create_legacy_metadata_table_if_missing(
|
||||
meta::roles_table::name,
|
||||
_qp,
|
||||
create_roles_query,
|
||||
_migration_manager),
|
||||
create_legacy_metadata_table_if_missing(
|
||||
ROLE_MEMBERS_CF,
|
||||
_qp,
|
||||
create_role_members_query,
|
||||
_migration_manager),
|
||||
create_legacy_metadata_table_if_missing(
|
||||
ROLE_ATTRIBUTES_CF,
|
||||
_qp,
|
||||
create_role_attributes_query,
|
||||
_migration_manager)).discard_result();
|
||||
}
|
||||
|
||||
future<> standard_role_manager::legacy_create_default_role_if_missing() {
|
||||
try {
|
||||
const auto exists = co_await legacy::default_role_row_satisfies(_qp, &has_can_login, _superuser);
|
||||
if (exists) {
|
||||
co_return;
|
||||
}
|
||||
const sstring query = seastar::format("INSERT INTO {}.{} ({}, is_superuser, can_login) VALUES (?, true, true)",
|
||||
meta::legacy::AUTH_KS,
|
||||
meta::roles_table::name,
|
||||
meta::roles_table::role_col_name);
|
||||
co_await _qp.execute_internal(
|
||||
query,
|
||||
db::consistency_level::QUORUM,
|
||||
internal_distributed_query_state(),
|
||||
{_superuser},
|
||||
cql3::query_processor::cache_internal::no).discard_result();
|
||||
log.info("Created default superuser role '{}'.", _superuser);
|
||||
} catch (const exceptions::unavailable_exception& e) {
|
||||
log.warn("Skipped default role setup: some nodes were not ready; will retry");
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
|
||||
future<> standard_role_manager::maybe_create_default_role() {
|
||||
auto has_superuser = [this] () -> future<bool> {
|
||||
const sstring query = seastar::format("SELECT * FROM {}.{} WHERE is_superuser = true ALLOW FILTERING", db::system_keyspace::NAME, meta::roles_table::name);
|
||||
const sstring query = seastar::format("SELECT * FROM {}.{} WHERE is_superuser = true ALLOW FILTERING", get_auth_ks_name(_qp), meta::roles_table::name);
|
||||
auto results = co_await _qp.execute_internal(query, db::consistency_level::LOCAL_ONE,
|
||||
internal_distributed_query_state(), cql3::query_processor::cache_internal::yes);
|
||||
for (const auto& result : *results) {
|
||||
@@ -119,12 +234,12 @@ future<> standard_role_manager::maybe_create_default_role() {
|
||||
// There is no superuser which has can_login field - create default role.
|
||||
// Note that we don't check if can_login is set to true.
|
||||
const sstring insert_query = seastar::format("INSERT INTO {}.{} ({}, is_superuser, can_login) VALUES (?, true, true)",
|
||||
db::system_keyspace::NAME,
|
||||
get_auth_ks_name(_qp),
|
||||
meta::roles_table::name,
|
||||
meta::roles_table::role_col_name);
|
||||
co_await collect_mutations(_qp, batch, insert_query, {default_superuser(_qp)});
|
||||
co_await collect_mutations(_qp, batch, insert_query, {_superuser});
|
||||
co_await std::move(batch).commit(_group0_client, _as, get_raft_timeout());
|
||||
log.info("Created default superuser role '{}'.", default_superuser(_qp));
|
||||
log.info("Created default superuser role '{}'.", _superuser);
|
||||
}
|
||||
|
||||
future<> standard_role_manager::maybe_create_default_role_with_retries() {
|
||||
@@ -147,12 +262,78 @@ future<> standard_role_manager::maybe_create_default_role_with_retries() {
|
||||
}
|
||||
}
|
||||
|
||||
static const sstring legacy_table_name{"users"};
|
||||
|
||||
bool standard_role_manager::legacy_metadata_exists() {
|
||||
return _qp.db().has_schema(meta::legacy::AUTH_KS, legacy_table_name);
|
||||
}
|
||||
|
||||
future<> standard_role_manager::migrate_legacy_metadata() {
|
||||
log.info("Starting migration of legacy user metadata.");
|
||||
static const sstring query = seastar::format("SELECT * FROM {}.{}", meta::legacy::AUTH_KS, legacy_table_name);
|
||||
|
||||
return _qp.execute_internal(
|
||||
query,
|
||||
db::consistency_level::QUORUM,
|
||||
internal_distributed_query_state(),
|
||||
cql3::query_processor::cache_internal::no).then([this](::shared_ptr<cql3::untyped_result_set> results) {
|
||||
return do_for_each(*results, [this](const cql3::untyped_result_set_row& row) {
|
||||
role_config config;
|
||||
config.is_superuser = row.get_or<bool>("super", false);
|
||||
config.can_login = true;
|
||||
|
||||
return do_with(
|
||||
row.get_as<sstring>("name"),
|
||||
std::move(config),
|
||||
::service::group0_batch::unused(),
|
||||
[this](const auto& name, const auto& config, auto& mc) {
|
||||
return create_or_replace(meta::legacy::AUTH_KS, name, config, mc);
|
||||
});
|
||||
}).finally([results] {});
|
||||
}).then([] {
|
||||
log.info("Finished migrating legacy user metadata.");
|
||||
}).handle_exception([](std::exception_ptr ep) {
|
||||
log.error("Encountered an error during migration!");
|
||||
std::rethrow_exception(ep);
|
||||
});
|
||||
}
|
||||
|
||||
future<> standard_role_manager::start() {
|
||||
return once_among_shards([this] () -> future<> {
|
||||
if (legacy_mode(_qp)) {
|
||||
co_await create_legacy_metadata_tables_if_missing();
|
||||
}
|
||||
|
||||
auto handler = [this] () -> future<> {
|
||||
co_await maybe_create_default_role_with_retries();
|
||||
if (!_superuser_created_promise.available()) {
|
||||
_superuser_created_promise.set_value();
|
||||
const bool legacy = legacy_mode(_qp);
|
||||
if (legacy) {
|
||||
if (!_superuser_created_promise.available()) {
|
||||
// Counterintuitively, we mark promise as ready before any startup work
|
||||
// because wait_for_schema_agreement() below will block indefinitely
|
||||
// without cluster majority. In that case, blocking node startup
|
||||
// would lead to a cluster deadlock.
|
||||
_superuser_created_promise.set_value();
|
||||
}
|
||||
co_await _migration_manager.wait_for_schema_agreement(_qp.db().real_database(), db::timeout_clock::time_point::max(), &_as);
|
||||
|
||||
if (co_await legacy::any_nondefault_role_row_satisfies(_qp, &has_can_login)) {
|
||||
if (legacy_metadata_exists()) {
|
||||
log.warn("Ignoring legacy user metadata since nondefault roles already exist.");
|
||||
}
|
||||
co_return;
|
||||
}
|
||||
|
||||
if (legacy_metadata_exists()) {
|
||||
co_await migrate_legacy_metadata();
|
||||
co_return;
|
||||
}
|
||||
co_await legacy_create_default_role_if_missing();
|
||||
}
|
||||
if (!legacy) {
|
||||
co_await maybe_create_default_role_with_retries();
|
||||
if (!_superuser_created_promise.available()) {
|
||||
_superuser_created_promise.set_value();
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
@@ -171,12 +352,21 @@ future<> standard_role_manager::ensure_superuser_is_created() {
|
||||
return _superuser_created_promise.get_shared_future();
|
||||
}
|
||||
|
||||
future<> standard_role_manager::create_or_replace(std::string_view role_name, const role_config& c, ::service::group0_batch& mc) {
|
||||
future<> standard_role_manager::create_or_replace(std::string_view auth_ks_name, std::string_view role_name, const role_config& c, ::service::group0_batch& mc) {
|
||||
const sstring query = seastar::format("INSERT INTO {}.{} ({}, is_superuser, can_login) VALUES (?, ?, ?)",
|
||||
db::system_keyspace::NAME,
|
||||
auth_ks_name,
|
||||
meta::roles_table::name,
|
||||
meta::roles_table::role_col_name);
|
||||
co_await collect_mutations(_qp, mc, query, {sstring(role_name), c.is_superuser, c.can_login});
|
||||
if (auth_ks_name == meta::legacy::AUTH_KS) {
|
||||
co_await _qp.execute_internal(
|
||||
query,
|
||||
consistency_for_role(role_name),
|
||||
internal_distributed_query_state(),
|
||||
{sstring(role_name), c.is_superuser, c.can_login},
|
||||
cql3::query_processor::cache_internal::yes).discard_result();
|
||||
} else {
|
||||
co_await collect_mutations(_qp, mc, query, {sstring(role_name), c.is_superuser, c.can_login});
|
||||
}
|
||||
}
|
||||
|
||||
future<>
|
||||
@@ -186,7 +376,7 @@ standard_role_manager::create(std::string_view role_name, const role_config& c,
|
||||
throw role_already_exists(role_name);
|
||||
}
|
||||
|
||||
return create_or_replace(role_name, c, mc);
|
||||
return create_or_replace(get_auth_ks_name(_qp), role_name, c, mc);
|
||||
});
|
||||
}
|
||||
|
||||
@@ -211,11 +401,20 @@ standard_role_manager::alter(std::string_view role_name, const role_config_updat
|
||||
return make_ready_future<>();
|
||||
}
|
||||
const sstring query = seastar::format("UPDATE {}.{} SET {} WHERE {} = ?",
|
||||
db::system_keyspace::NAME,
|
||||
get_auth_ks_name(_qp),
|
||||
meta::roles_table::name,
|
||||
build_column_assignments(u),
|
||||
meta::roles_table::role_col_name);
|
||||
return collect_mutations(_qp, mc, std::move(query), {sstring(role_name)});
|
||||
if (legacy_mode(_qp)) {
|
||||
return _qp.execute_internal(
|
||||
std::move(query),
|
||||
consistency_for_role(role_name),
|
||||
internal_distributed_query_state(),
|
||||
{sstring(role_name)},
|
||||
cql3::query_processor::cache_internal::no).discard_result();
|
||||
} else {
|
||||
return collect_mutations(_qp, mc, std::move(query), {sstring(role_name)});
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
@@ -226,11 +425,11 @@ future<> standard_role_manager::drop(std::string_view role_name, ::service::grou
|
||||
// First, revoke this role from all roles that are members of it.
|
||||
const auto revoke_from_members = [this, role_name, &mc] () -> future<> {
|
||||
const sstring query = seastar::format("SELECT member FROM {}.{} WHERE role = ?",
|
||||
db::system_keyspace::NAME,
|
||||
get_auth_ks_name(_qp),
|
||||
ROLE_MEMBERS_CF);
|
||||
const auto members = co_await _qp.execute_internal(
|
||||
query,
|
||||
db::consistency_level::LOCAL_ONE,
|
||||
consistency_for_role(role_name),
|
||||
internal_distributed_query_state(),
|
||||
{sstring(role_name)},
|
||||
cql3::query_processor::cache_internal::no);
|
||||
@@ -258,33 +457,102 @@ future<> standard_role_manager::drop(std::string_view role_name, ::service::grou
|
||||
// Delete all attributes for that role
|
||||
const auto remove_attributes_of = [this, role_name, &mc] () -> future<> {
|
||||
const sstring query = seastar::format("DELETE FROM {}.{} WHERE role = ?",
|
||||
db::system_keyspace::NAME,
|
||||
get_auth_ks_name(_qp),
|
||||
ROLE_ATTRIBUTES_CF);
|
||||
co_await collect_mutations(_qp, mc, query, {sstring(role_name)});
|
||||
if (legacy_mode(_qp)) {
|
||||
co_await _qp.execute_internal(query, {sstring(role_name)},
|
||||
cql3::query_processor::cache_internal::yes).discard_result();
|
||||
} else {
|
||||
co_await collect_mutations(_qp, mc, query, {sstring(role_name)});
|
||||
}
|
||||
};
|
||||
// Finally, delete the role itself.
|
||||
const auto delete_role = [this, role_name, &mc] () -> future<> {
|
||||
const sstring query = seastar::format("DELETE FROM {}.{} WHERE {} = ?",
|
||||
db::system_keyspace::NAME,
|
||||
get_auth_ks_name(_qp),
|
||||
meta::roles_table::name,
|
||||
meta::roles_table::role_col_name);
|
||||
|
||||
co_await collect_mutations(_qp, mc, query, {sstring(role_name)});
|
||||
if (legacy_mode(_qp)) {
|
||||
co_await _qp.execute_internal(
|
||||
query,
|
||||
consistency_for_role(role_name),
|
||||
internal_distributed_query_state(),
|
||||
{sstring(role_name)},
|
||||
cql3::query_processor::cache_internal::no).discard_result();
|
||||
} else {
|
||||
co_await collect_mutations(_qp, mc, query, {sstring(role_name)});
|
||||
}
|
||||
};
|
||||
|
||||
co_await when_all_succeed(revoke_from_members, revoke_members_of, remove_attributes_of);
|
||||
co_await delete_role();
|
||||
}
|
||||
|
||||
future<>
|
||||
standard_role_manager::legacy_modify_membership(
|
||||
std::string_view grantee_name,
|
||||
std::string_view role_name,
|
||||
membership_change ch) {
|
||||
const auto modify_roles = [this, role_name, grantee_name, ch] () -> future<> {
|
||||
const auto query = seastar::format(
|
||||
"UPDATE {}.{} SET member_of = member_of {} ? WHERE {} = ?",
|
||||
get_auth_ks_name(_qp),
|
||||
meta::roles_table::name,
|
||||
(ch == membership_change::add ? '+' : '-'),
|
||||
meta::roles_table::role_col_name);
|
||||
co_await _qp.execute_internal(
|
||||
query,
|
||||
consistency_for_role(grantee_name),
|
||||
internal_distributed_query_state(),
|
||||
{role_set{sstring(role_name)}, sstring(grantee_name)},
|
||||
cql3::query_processor::cache_internal::no).discard_result();
|
||||
};
|
||||
|
||||
const auto modify_role_members = [this, role_name, grantee_name, ch] () -> future<> {
|
||||
switch (ch) {
|
||||
case membership_change::add: {
|
||||
const sstring insert_query = seastar::format("INSERT INTO {}.{} (role, member) VALUES (?, ?)",
|
||||
get_auth_ks_name(_qp),
|
||||
ROLE_MEMBERS_CF);
|
||||
co_return co_await _qp.execute_internal(
|
||||
insert_query,
|
||||
consistency_for_role(role_name),
|
||||
internal_distributed_query_state(),
|
||||
{sstring(role_name), sstring(grantee_name)},
|
||||
cql3::query_processor::cache_internal::no).discard_result();
|
||||
}
|
||||
|
||||
case membership_change::remove: {
|
||||
const sstring delete_query = seastar::format("DELETE FROM {}.{} WHERE role = ? AND member = ?",
|
||||
get_auth_ks_name(_qp),
|
||||
ROLE_MEMBERS_CF);
|
||||
co_return co_await _qp.execute_internal(
|
||||
delete_query,
|
||||
consistency_for_role(role_name),
|
||||
internal_distributed_query_state(),
|
||||
{sstring(role_name), sstring(grantee_name)},
|
||||
cql3::query_processor::cache_internal::no).discard_result();
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
co_await when_all_succeed(modify_roles, modify_role_members).discard_result();
|
||||
}
|
||||
|
||||
future<>
|
||||
standard_role_manager::modify_membership(
|
||||
std::string_view grantee_name,
|
||||
std::string_view role_name,
|
||||
membership_change ch,
|
||||
::service::group0_batch& mc) {
|
||||
if (legacy_mode(_qp)) {
|
||||
co_return co_await legacy_modify_membership(grantee_name, role_name, ch);
|
||||
}
|
||||
|
||||
const auto modify_roles = seastar::format(
|
||||
"UPDATE {}.{} SET member_of = member_of {} ? WHERE {} = ?",
|
||||
db::system_keyspace::NAME,
|
||||
get_auth_ks_name(_qp),
|
||||
meta::roles_table::name,
|
||||
(ch == membership_change::add ? '+' : '-'),
|
||||
meta::roles_table::role_col_name);
|
||||
@@ -295,12 +563,12 @@ standard_role_manager::modify_membership(
|
||||
switch (ch) {
|
||||
case membership_change::add:
|
||||
modify_role_members = seastar::format("INSERT INTO {}.{} (role, member) VALUES (?, ?)",
|
||||
db::system_keyspace::NAME,
|
||||
get_auth_ks_name(_qp),
|
||||
ROLE_MEMBERS_CF);
|
||||
break;
|
||||
case membership_change::remove:
|
||||
modify_role_members = seastar::format("DELETE FROM {}.{} WHERE role = ? AND member = ?",
|
||||
db::system_keyspace::NAME,
|
||||
get_auth_ks_name(_qp),
|
||||
ROLE_MEMBERS_CF);
|
||||
break;
|
||||
default:
|
||||
@@ -392,21 +660,57 @@ future<role_set> standard_role_manager::query_granted(std::string_view grantee_n
|
||||
}
|
||||
|
||||
future<role_to_directly_granted_map> standard_role_manager::query_all_directly_granted(::service::query_state& qs) {
|
||||
const sstring query = seastar::format("SELECT * FROM {}.{}",
|
||||
get_auth_ks_name(_qp),
|
||||
ROLE_MEMBERS_CF);
|
||||
|
||||
const auto results = co_await _qp.execute_internal(
|
||||
query,
|
||||
db::consistency_level::ONE,
|
||||
qs,
|
||||
cql3::query_processor::cache_internal::yes);
|
||||
|
||||
role_to_directly_granted_map roles_map;
|
||||
_cache.for_each_role([&roles_map] (const cache::role_name_t& name, const cache::role_record& record) {
|
||||
for (const auto& granted_role : record.member_of) {
|
||||
roles_map.emplace(name, granted_role);
|
||||
}
|
||||
});
|
||||
std::transform(
|
||||
results->begin(),
|
||||
results->end(),
|
||||
std::inserter(roles_map, roles_map.begin()),
|
||||
[] (const cql3::untyped_result_set_row& row) {
|
||||
return std::make_pair(row.get_as<sstring>("member"), row.get_as<sstring>("role")); }
|
||||
);
|
||||
|
||||
co_return roles_map;
|
||||
}
|
||||
|
||||
future<role_set> standard_role_manager::query_all(::service::query_state& qs) {
|
||||
const sstring query = seastar::format("SELECT {} FROM {}.{}",
|
||||
meta::roles_table::role_col_name,
|
||||
get_auth_ks_name(_qp),
|
||||
meta::roles_table::name);
|
||||
|
||||
// To avoid many copies of a view.
|
||||
static const auto role_col_name_string = sstring(meta::roles_table::role_col_name);
|
||||
|
||||
if (utils::get_local_injector().enter("standard_role_manager_fail_legacy_query")) {
|
||||
if (legacy_mode(_qp)) {
|
||||
throw std::runtime_error("standard_role_manager::query_all: failed due to error injection");
|
||||
}
|
||||
}
|
||||
|
||||
const auto results = co_await _qp.execute_internal(
|
||||
query,
|
||||
db::consistency_level::QUORUM,
|
||||
qs,
|
||||
cql3::query_processor::cache_internal::yes);
|
||||
|
||||
role_set roles;
|
||||
roles.reserve(_cache.roles_count());
|
||||
_cache.for_each_role([&roles] (const cache::role_name_t& name, const cache::role_record&) {
|
||||
roles.insert(name);
|
||||
});
|
||||
std::transform(
|
||||
results->begin(),
|
||||
results->end(),
|
||||
std::inserter(roles, roles.begin()),
|
||||
[] (const cql3::untyped_result_set_row& row) {
|
||||
return row.get_as<sstring>(role_col_name_string);}
|
||||
);
|
||||
co_return roles;
|
||||
}
|
||||
|
||||
@@ -429,26 +733,31 @@ future<bool> standard_role_manager::can_login(std::string_view role_name) {
|
||||
}
|
||||
|
||||
future<std::optional<sstring>> standard_role_manager::get_attribute(std::string_view role_name, std::string_view attribute_name, ::service::query_state& qs) {
|
||||
auto role = _cache.get(role_name);
|
||||
if (!role) {
|
||||
co_return std::nullopt;
|
||||
const sstring query = seastar::format("SELECT name, value FROM {}.{} WHERE role = ? AND name = ?",
|
||||
get_auth_ks_name(_qp),
|
||||
ROLE_ATTRIBUTES_CF);
|
||||
const auto result_set = co_await _qp.execute_internal(query, db::consistency_level::ONE, qs, {sstring(role_name), sstring(attribute_name)}, cql3::query_processor::cache_internal::yes);
|
||||
if (!result_set->empty()) {
|
||||
const cql3::untyped_result_set_row &row = result_set->one();
|
||||
co_return std::optional<sstring>(row.get_as<sstring>("value"));
|
||||
}
|
||||
auto it = role->attributes.find(attribute_name);
|
||||
if (it != role->attributes.end()) {
|
||||
co_return it->second;
|
||||
}
|
||||
co_return std::nullopt;
|
||||
co_return std::optional<sstring>{};
|
||||
}
|
||||
|
||||
future<role_manager::attribute_vals> standard_role_manager::query_attribute_for_all(std::string_view attribute_name, ::service::query_state& qs) {
|
||||
attribute_vals result;
|
||||
_cache.for_each_role([&result, attribute_name] (const cache::role_name_t& name, const cache::role_record& record) {
|
||||
auto it = record.attributes.find(attribute_name);
|
||||
if (it != record.attributes.end()) {
|
||||
result.emplace(name, it->second);
|
||||
}
|
||||
future<role_manager::attribute_vals> standard_role_manager::query_attribute_for_all (std::string_view attribute_name, ::service::query_state& qs) {
|
||||
return query_all(qs).then([this, attribute_name, &qs] (role_set roles) {
|
||||
return do_with(attribute_vals{}, [this, attribute_name, roles = std::move(roles), &qs] (attribute_vals &role_to_att_val) {
|
||||
return parallel_for_each(roles.begin(), roles.end(), [this, &role_to_att_val, attribute_name, &qs] (sstring role) {
|
||||
return get_attribute(role, attribute_name, qs).then([&role_to_att_val, role] (std::optional<sstring> att_val) {
|
||||
if (att_val) {
|
||||
role_to_att_val.emplace(std::move(role), std::move(*att_val));
|
||||
}
|
||||
});
|
||||
}).then([&role_to_att_val] () {
|
||||
return make_ready_future<attribute_vals>(std::move(role_to_att_val));
|
||||
});
|
||||
});
|
||||
});
|
||||
co_return result;
|
||||
}
|
||||
|
||||
future<> standard_role_manager::set_attribute(std::string_view role_name, std::string_view attribute_name, std::string_view attribute_value, ::service::group0_batch& mc) {
|
||||
@@ -456,10 +765,14 @@ future<> standard_role_manager::set_attribute(std::string_view role_name, std::s
|
||||
throw auth::nonexistant_role(role_name);
|
||||
}
|
||||
const sstring query = seastar::format("INSERT INTO {}.{} (role, name, value) VALUES (?, ?, ?)",
|
||||
db::system_keyspace::NAME,
|
||||
get_auth_ks_name(_qp),
|
||||
ROLE_ATTRIBUTES_CF);
|
||||
co_await collect_mutations(_qp, mc, query,
|
||||
{sstring(role_name), sstring(attribute_name), sstring(attribute_value)});
|
||||
if (legacy_mode(_qp)) {
|
||||
co_await _qp.execute_internal(query, {sstring(role_name), sstring(attribute_name), sstring(attribute_value)}, cql3::query_processor::cache_internal::yes).discard_result();
|
||||
} else {
|
||||
co_await collect_mutations(_qp, mc, query,
|
||||
{sstring(role_name), sstring(attribute_name), sstring(attribute_value)});
|
||||
}
|
||||
}
|
||||
|
||||
future<> standard_role_manager::remove_attribute(std::string_view role_name, std::string_view attribute_name, ::service::group0_batch& mc) {
|
||||
@@ -467,10 +780,14 @@ future<> standard_role_manager::remove_attribute(std::string_view role_name, std
|
||||
throw auth::nonexistant_role(role_name);
|
||||
}
|
||||
const sstring query = seastar::format("DELETE FROM {}.{} WHERE role = ? AND name = ?",
|
||||
db::system_keyspace::NAME,
|
||||
get_auth_ks_name(_qp),
|
||||
ROLE_ATTRIBUTES_CF);
|
||||
co_await collect_mutations(_qp, mc, query,
|
||||
{sstring(role_name), sstring(attribute_name)});
|
||||
if (legacy_mode(_qp)) {
|
||||
co_await _qp.execute_internal(query, {sstring(role_name), sstring(attribute_name)}, cql3::query_processor::cache_internal::yes).discard_result();
|
||||
} else {
|
||||
co_await collect_mutations(_qp, mc, query,
|
||||
{sstring(role_name), sstring(attribute_name)});
|
||||
}
|
||||
}
|
||||
|
||||
future<std::vector<cql3::description>> standard_role_manager::describe_role_grants() {
|
||||
|
||||
@@ -40,6 +40,7 @@ class standard_role_manager final : public role_manager {
|
||||
cache& _cache;
|
||||
future<> _stopped;
|
||||
abort_source _as;
|
||||
std::string _superuser;
|
||||
shared_promise<> _superuser_created_promise;
|
||||
|
||||
public:
|
||||
@@ -96,13 +97,24 @@ private:
|
||||
role_set member_of;
|
||||
};
|
||||
|
||||
future<> create_legacy_metadata_tables_if_missing() const;
|
||||
|
||||
bool legacy_metadata_exists();
|
||||
|
||||
future<> migrate_legacy_metadata();
|
||||
|
||||
future<> legacy_create_default_role_if_missing();
|
||||
|
||||
future<> maybe_create_default_role();
|
||||
future<> maybe_create_default_role_with_retries();
|
||||
|
||||
future<> create_or_replace(std::string_view role_name, const role_config&, ::service::group0_batch&);
|
||||
future<> create_or_replace(std::string_view auth_ks_name, std::string_view role_name, const role_config&, ::service::group0_batch&);
|
||||
|
||||
future<> legacy_modify_membership(std::string_view role_name, std::string_view grantee_name, membership_change);
|
||||
|
||||
future<> modify_membership(std::string_view role_name, std::string_view grantee_name, membership_change, ::service::group0_batch& mc);
|
||||
|
||||
future<std::optional<record>> legacy_find_record(std::string_view role_name);
|
||||
future<std::optional<record>> find_record(std::string_view role_name);
|
||||
future<record> require_record(std::string_view role_name);
|
||||
future<> collect_roles(
|
||||
|
||||
@@ -8,200 +8,244 @@
|
||||
* SPDX-License-Identifier: (LicenseRef-ScyllaDB-Source-Available-1.0 and Apache-2.0)
|
||||
*/
|
||||
|
||||
#include "auth/transitional.hh"
|
||||
#include "auth/authenticated_user.hh"
|
||||
#include "auth/authenticator.hh"
|
||||
#include "auth/authorizer.hh"
|
||||
#include "auth/default_authorizer.hh"
|
||||
#include "auth/password_authenticator.hh"
|
||||
#include "auth/cache.hh"
|
||||
#include "auth/permission.hh"
|
||||
#include "service/raft/raft_group0_client.hh"
|
||||
#include "utils/class_registrator.hh"
|
||||
|
||||
namespace auth {
|
||||
|
||||
transitional_authenticator::transitional_authenticator(cql3::query_processor& qp, ::service::raft_group0_client& g0, ::service::migration_manager& mm, cache& cache)
|
||||
: transitional_authenticator(std::make_unique<password_authenticator>(qp, g0, mm, cache)) {
|
||||
static const sstring PACKAGE_NAME("com.scylladb.auth.");
|
||||
|
||||
static const sstring& transitional_authenticator_name() {
|
||||
static const sstring name = PACKAGE_NAME + "TransitionalAuthenticator";
|
||||
return name;
|
||||
}
|
||||
|
||||
transitional_authenticator::transitional_authenticator(std::unique_ptr<authenticator> a)
|
||||
: _authenticator(std::move(a)) {
|
||||
static const sstring& transitional_authorizer_name() {
|
||||
static const sstring name = PACKAGE_NAME + "TransitionalAuthorizer";
|
||||
return name;
|
||||
}
|
||||
|
||||
future<> transitional_authenticator::start() {
|
||||
return _authenticator->start();
|
||||
}
|
||||
class transitional_authenticator : public authenticator {
|
||||
std::unique_ptr<authenticator> _authenticator;
|
||||
|
||||
future<> transitional_authenticator::stop() {
|
||||
return _authenticator->stop();
|
||||
}
|
||||
public:
|
||||
static const sstring PASSWORD_AUTHENTICATOR_NAME;
|
||||
|
||||
std::string_view transitional_authenticator::qualified_java_name() const {
|
||||
return "com.scylladb.auth.TransitionalAuthenticator";
|
||||
}
|
||||
|
||||
bool transitional_authenticator::require_authentication() const {
|
||||
return true;
|
||||
}
|
||||
|
||||
authentication_option_set transitional_authenticator::supported_options() const {
|
||||
return _authenticator->supported_options();
|
||||
}
|
||||
|
||||
authentication_option_set transitional_authenticator::alterable_options() const {
|
||||
return _authenticator->alterable_options();
|
||||
}
|
||||
|
||||
future<authenticated_user> transitional_authenticator::authenticate(const credentials_map& credentials) const {
|
||||
auto i = credentials.find(authenticator::USERNAME_KEY);
|
||||
if ((i == credentials.end() || i->second.empty())
|
||||
&& (!credentials.contains(PASSWORD_KEY) || credentials.at(PASSWORD_KEY).empty())) {
|
||||
// return anon user
|
||||
return make_ready_future<authenticated_user>(anonymous_user());
|
||||
transitional_authenticator(cql3::query_processor& qp, ::service::raft_group0_client& g0, ::service::migration_manager& mm, cache& cache)
|
||||
: transitional_authenticator(std::make_unique<password_authenticator>(qp, g0, mm, cache)) {
|
||||
}
|
||||
return make_ready_future().then([this, &credentials] {
|
||||
return _authenticator->authenticate(credentials);
|
||||
}).handle_exception([](auto ep) {
|
||||
try {
|
||||
std::rethrow_exception(ep);
|
||||
} catch (const exceptions::authentication_exception&) {
|
||||
transitional_authenticator(std::unique_ptr<authenticator> a)
|
||||
: _authenticator(std::move(a)) {
|
||||
}
|
||||
|
||||
virtual future<> start() override {
|
||||
return _authenticator->start();
|
||||
}
|
||||
|
||||
virtual future<> stop() override {
|
||||
return _authenticator->stop();
|
||||
}
|
||||
|
||||
virtual std::string_view qualified_java_name() const override {
|
||||
return transitional_authenticator_name();
|
||||
}
|
||||
|
||||
virtual bool require_authentication() const override {
|
||||
return true;
|
||||
}
|
||||
|
||||
virtual authentication_option_set supported_options() const override {
|
||||
return _authenticator->supported_options();
|
||||
}
|
||||
|
||||
virtual authentication_option_set alterable_options() const override {
|
||||
return _authenticator->alterable_options();
|
||||
}
|
||||
|
||||
virtual future<authenticated_user> authenticate(const credentials_map& credentials) const override {
|
||||
auto i = credentials.find(authenticator::USERNAME_KEY);
|
||||
if ((i == credentials.end() || i->second.empty())
|
||||
&& (!credentials.contains(PASSWORD_KEY) || credentials.at(PASSWORD_KEY).empty())) {
|
||||
// return anon user
|
||||
return make_ready_future<authenticated_user>(anonymous_user());
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
future<> transitional_authenticator::create(std::string_view role_name, const authentication_options& options, ::service::group0_batch& mc) {
|
||||
return _authenticator->create(role_name, options, mc);
|
||||
}
|
||||
|
||||
future<> transitional_authenticator::alter(std::string_view role_name, const authentication_options& options, ::service::group0_batch& mc) {
|
||||
return _authenticator->alter(role_name, options, mc);
|
||||
}
|
||||
|
||||
future<> transitional_authenticator::drop(std::string_view role_name, ::service::group0_batch& mc) {
|
||||
return _authenticator->drop(role_name, mc);
|
||||
}
|
||||
|
||||
future<custom_options> transitional_authenticator::query_custom_options(std::string_view role_name) const {
|
||||
return _authenticator->query_custom_options(role_name);
|
||||
}
|
||||
|
||||
bool transitional_authenticator::uses_password_hashes() const {
|
||||
return _authenticator->uses_password_hashes();
|
||||
}
|
||||
|
||||
future<std::optional<sstring>> transitional_authenticator::get_password_hash(std::string_view role_name) const {
|
||||
return _authenticator->get_password_hash(role_name);
|
||||
}
|
||||
|
||||
const resource_set& transitional_authenticator::protected_resources() const {
|
||||
return _authenticator->protected_resources();
|
||||
}
|
||||
|
||||
::shared_ptr<sasl_challenge> transitional_authenticator::new_sasl_challenge() const {
|
||||
class sasl_wrapper : public sasl_challenge {
|
||||
public:
|
||||
sasl_wrapper(::shared_ptr<sasl_challenge> sasl)
|
||||
: _sasl(std::move(sasl)) {
|
||||
}
|
||||
|
||||
virtual bytes evaluate_response(bytes_view client_response) override {
|
||||
return make_ready_future().then([this, &credentials] {
|
||||
return _authenticator->authenticate(credentials);
|
||||
}).handle_exception([](auto ep) {
|
||||
try {
|
||||
return _sasl->evaluate_response(client_response);
|
||||
std::rethrow_exception(ep);
|
||||
} catch (const exceptions::authentication_exception&) {
|
||||
_complete = true;
|
||||
return {};
|
||||
// return anon user
|
||||
return make_ready_future<authenticated_user>(anonymous_user());
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
virtual bool is_complete() const override {
|
||||
return _complete || _sasl->is_complete();
|
||||
}
|
||||
virtual future<> create(std::string_view role_name, const authentication_options& options, ::service::group0_batch& mc) override {
|
||||
return _authenticator->create(role_name, options, mc);
|
||||
}
|
||||
|
||||
virtual future<authenticated_user> get_authenticated_user() const override {
|
||||
return futurize_invoke([this] {
|
||||
return _sasl->get_authenticated_user().handle_exception([](auto ep) {
|
||||
try {
|
||||
std::rethrow_exception(ep);
|
||||
} catch (const exceptions::authentication_exception&) {
|
||||
// return anon user
|
||||
return make_ready_future<authenticated_user>(anonymous_user());
|
||||
}
|
||||
virtual future<> alter(std::string_view role_name, const authentication_options& options, ::service::group0_batch& mc) override {
|
||||
return _authenticator->alter(role_name, options, mc);
|
||||
}
|
||||
|
||||
virtual future<> drop(std::string_view role_name, ::service::group0_batch& mc) override {
|
||||
return _authenticator->drop(role_name, mc);
|
||||
}
|
||||
|
||||
virtual future<custom_options> query_custom_options(std::string_view role_name) const override {
|
||||
return _authenticator->query_custom_options(role_name);
|
||||
}
|
||||
|
||||
virtual bool uses_password_hashes() const override {
|
||||
return _authenticator->uses_password_hashes();
|
||||
}
|
||||
|
||||
virtual future<std::optional<sstring>> get_password_hash(std::string_view role_name) const override {
|
||||
return _authenticator->get_password_hash(role_name);
|
||||
}
|
||||
|
||||
virtual const resource_set& protected_resources() const override {
|
||||
return _authenticator->protected_resources();
|
||||
}
|
||||
|
||||
virtual ::shared_ptr<sasl_challenge> new_sasl_challenge() const override {
|
||||
class sasl_wrapper : public sasl_challenge {
|
||||
public:
|
||||
sasl_wrapper(::shared_ptr<sasl_challenge> sasl)
|
||||
: _sasl(std::move(sasl)) {
|
||||
}
|
||||
|
||||
virtual bytes evaluate_response(bytes_view client_response) override {
|
||||
try {
|
||||
return _sasl->evaluate_response(client_response);
|
||||
} catch (const exceptions::authentication_exception&) {
|
||||
_complete = true;
|
||||
return {};
|
||||
}
|
||||
}
|
||||
|
||||
virtual bool is_complete() const override {
|
||||
return _complete || _sasl->is_complete();
|
||||
}
|
||||
|
||||
virtual future<authenticated_user> get_authenticated_user() const override {
|
||||
return futurize_invoke([this] {
|
||||
return _sasl->get_authenticated_user().handle_exception([](auto ep) {
|
||||
try {
|
||||
std::rethrow_exception(ep);
|
||||
} catch (const exceptions::authentication_exception&) {
|
||||
// return anon user
|
||||
return make_ready_future<authenticated_user>(anonymous_user());
|
||||
}
|
||||
});
|
||||
});
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
const sstring& get_username() const override {
|
||||
return _sasl->get_username();
|
||||
}
|
||||
const sstring& get_username() const override {
|
||||
return _sasl->get_username();
|
||||
}
|
||||
|
||||
private:
|
||||
::shared_ptr<sasl_challenge> _sasl;
|
||||
private:
|
||||
::shared_ptr<sasl_challenge> _sasl;
|
||||
|
||||
bool _complete = false;
|
||||
};
|
||||
return ::make_shared<sasl_wrapper>(_authenticator->new_sasl_challenge());
|
||||
}
|
||||
bool _complete = false;
|
||||
};
|
||||
return ::make_shared<sasl_wrapper>(_authenticator->new_sasl_challenge());
|
||||
}
|
||||
|
||||
future<> transitional_authenticator::ensure_superuser_is_created() const {
|
||||
return _authenticator->ensure_superuser_is_created();
|
||||
}
|
||||
virtual future<> ensure_superuser_is_created() const override {
|
||||
return _authenticator->ensure_superuser_is_created();
|
||||
}
|
||||
};
|
||||
|
||||
transitional_authorizer::transitional_authorizer(cql3::query_processor& qp)
|
||||
: transitional_authorizer(std::make_unique<default_authorizer>(qp)) {
|
||||
}
|
||||
class transitional_authorizer : public authorizer {
|
||||
std::unique_ptr<authorizer> _authorizer;
|
||||
|
||||
transitional_authorizer::transitional_authorizer(std::unique_ptr<authorizer> a)
|
||||
: _authorizer(std::move(a)) {
|
||||
}
|
||||
public:
|
||||
transitional_authorizer(cql3::query_processor& qp, ::service::raft_group0_client& g0, ::service::migration_manager& mm)
|
||||
: transitional_authorizer(std::make_unique<default_authorizer>(qp, g0, mm)) {
|
||||
}
|
||||
transitional_authorizer(std::unique_ptr<authorizer> a)
|
||||
: _authorizer(std::move(a)) {
|
||||
}
|
||||
|
||||
transitional_authorizer::~transitional_authorizer() {
|
||||
}
|
||||
~transitional_authorizer() {
|
||||
}
|
||||
|
||||
future<> transitional_authorizer::start() {
|
||||
return _authorizer->start();
|
||||
}
|
||||
virtual future<> start() override {
|
||||
return _authorizer->start();
|
||||
}
|
||||
|
||||
future<> transitional_authorizer::stop() {
|
||||
return _authorizer->stop();
|
||||
}
|
||||
virtual future<> stop() override {
|
||||
return _authorizer->stop();
|
||||
}
|
||||
|
||||
std::string_view transitional_authorizer::qualified_java_name() const {
|
||||
return "com.scylladb.auth.TransitionalAuthorizer";
|
||||
}
|
||||
virtual std::string_view qualified_java_name() const override {
|
||||
return transitional_authorizer_name();
|
||||
}
|
||||
|
||||
future<permission_set> transitional_authorizer::authorize(const role_or_anonymous&, const resource&) const {
|
||||
static const permission_set transitional_permissions =
|
||||
permission_set::of<
|
||||
permission::CREATE,
|
||||
permission::ALTER,
|
||||
permission::DROP,
|
||||
permission::SELECT,
|
||||
permission::MODIFY>();
|
||||
virtual future<permission_set> authorize(const role_or_anonymous&, const resource&) const override {
|
||||
static const permission_set transitional_permissions =
|
||||
permission_set::of<
|
||||
permission::CREATE,
|
||||
permission::ALTER,
|
||||
permission::DROP,
|
||||
permission::SELECT,
|
||||
permission::MODIFY>();
|
||||
|
||||
return make_ready_future<permission_set>(transitional_permissions);
|
||||
}
|
||||
return make_ready_future<permission_set>(transitional_permissions);
|
||||
}
|
||||
|
||||
future<> transitional_authorizer::grant(std::string_view s, permission_set ps, const resource& r, ::service::group0_batch& mc) {
|
||||
return _authorizer->grant(s, std::move(ps), r, mc);
|
||||
}
|
||||
virtual future<> grant(std::string_view s, permission_set ps, const resource& r, ::service::group0_batch& mc) override {
|
||||
return _authorizer->grant(s, std::move(ps), r, mc);
|
||||
}
|
||||
|
||||
future<> transitional_authorizer::revoke(std::string_view s, permission_set ps, const resource& r, ::service::group0_batch& mc) {
|
||||
return _authorizer->revoke(s, std::move(ps), r, mc);
|
||||
}
|
||||
virtual future<> revoke(std::string_view s, permission_set ps, const resource& r, ::service::group0_batch& mc) override {
|
||||
return _authorizer->revoke(s, std::move(ps), r, mc);
|
||||
}
|
||||
|
||||
future<std::vector<permission_details>> transitional_authorizer::list_all() const {
|
||||
return _authorizer->list_all();
|
||||
}
|
||||
virtual future<std::vector<permission_details>> list_all() const override {
|
||||
return _authorizer->list_all();
|
||||
}
|
||||
|
||||
future<> transitional_authorizer::revoke_all(std::string_view s, ::service::group0_batch& mc) {
|
||||
return _authorizer->revoke_all(s, mc);
|
||||
}
|
||||
virtual future<> revoke_all(std::string_view s, ::service::group0_batch& mc) override {
|
||||
return _authorizer->revoke_all(s, mc);
|
||||
}
|
||||
|
||||
future<> transitional_authorizer::revoke_all(const resource& r, ::service::group0_batch& mc) {
|
||||
return _authorizer->revoke_all(r, mc);
|
||||
}
|
||||
virtual future<> revoke_all(const resource& r, ::service::group0_batch& mc) override {
|
||||
return _authorizer->revoke_all(r, mc);
|
||||
}
|
||||
|
||||
const resource_set& transitional_authorizer::protected_resources() const {
|
||||
return _authorizer->protected_resources();
|
||||
}
|
||||
virtual const resource_set& protected_resources() const override {
|
||||
return _authorizer->protected_resources();
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
//
|
||||
// To ensure correct initialization order, we unfortunately need to use string literals.
|
||||
//
|
||||
|
||||
static const class_registrator<
|
||||
auth::authenticator,
|
||||
auth::transitional_authenticator,
|
||||
cql3::query_processor&,
|
||||
::service::raft_group0_client&,
|
||||
::service::migration_manager&,
|
||||
auth::cache&> transitional_authenticator_reg(auth::PACKAGE_NAME + "TransitionalAuthenticator");
|
||||
|
||||
static const class_registrator<
|
||||
auth::authorizer,
|
||||
auth::transitional_authorizer,
|
||||
cql3::query_processor&,
|
||||
::service::raft_group0_client&,
|
||||
::service::migration_manager&> transitional_authorizer_reg(auth::PACKAGE_NAME + "TransitionalAuthorizer");
|
||||
|
||||
@@ -1,81 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2026-present ScyllaDB
|
||||
*
|
||||
* Modified by ScyllaDB
|
||||
*/
|
||||
|
||||
/*
|
||||
* SPDX-License-Identifier: (LicenseRef-ScyllaDB-Source-Available-1.0 and Apache-2.0)
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "auth/authenticator.hh"
|
||||
#include "auth/authorizer.hh"
|
||||
#include "auth/cache.hh"
|
||||
|
||||
namespace cql3 {
|
||||
class query_processor;
|
||||
}
|
||||
|
||||
namespace service {
|
||||
class raft_group0_client;
|
||||
class migration_manager;
|
||||
}
|
||||
|
||||
namespace auth {
|
||||
|
||||
///
|
||||
/// Transitional authenticator that allows anonymous access when credentials are not provided
|
||||
/// or authentication fails. Used for migration scenarios.
|
||||
///
|
||||
class transitional_authenticator : public authenticator {
|
||||
std::unique_ptr<authenticator> _authenticator;
|
||||
|
||||
public:
|
||||
transitional_authenticator(cql3::query_processor& qp, ::service::raft_group0_client& g0, ::service::migration_manager& mm, cache& cache);
|
||||
transitional_authenticator(std::unique_ptr<authenticator> a);
|
||||
|
||||
virtual future<> start() override;
|
||||
virtual future<> stop() override;
|
||||
virtual std::string_view qualified_java_name() const override;
|
||||
virtual bool require_authentication() const override;
|
||||
virtual authentication_option_set supported_options() const override;
|
||||
virtual authentication_option_set alterable_options() const override;
|
||||
virtual future<authenticated_user> authenticate(const credentials_map& credentials) const override;
|
||||
virtual future<> create(std::string_view role_name, const authentication_options& options, ::service::group0_batch& mc) override;
|
||||
virtual future<> alter(std::string_view role_name, const authentication_options& options, ::service::group0_batch& mc) override;
|
||||
virtual future<> drop(std::string_view role_name, ::service::group0_batch& mc) override;
|
||||
virtual future<custom_options> query_custom_options(std::string_view role_name) const override;
|
||||
virtual bool uses_password_hashes() const override;
|
||||
virtual future<std::optional<sstring>> get_password_hash(std::string_view role_name) const override;
|
||||
virtual const resource_set& protected_resources() const override;
|
||||
virtual ::shared_ptr<sasl_challenge> new_sasl_challenge() const override;
|
||||
virtual future<> ensure_superuser_is_created() const override;
|
||||
};
|
||||
|
||||
///
|
||||
/// Transitional authorizer that grants a fixed set of permissions to all users.
|
||||
/// Used for migration scenarios.
|
||||
///
|
||||
class transitional_authorizer : public authorizer {
|
||||
std::unique_ptr<authorizer> _authorizer;
|
||||
|
||||
public:
|
||||
transitional_authorizer(cql3::query_processor& qp);
|
||||
transitional_authorizer(std::unique_ptr<authorizer> a);
|
||||
~transitional_authorizer();
|
||||
|
||||
virtual future<> start() override;
|
||||
virtual future<> stop() override;
|
||||
virtual std::string_view qualified_java_name() const override;
|
||||
virtual future<permission_set> authorize(const role_or_anonymous&, const resource&) const override;
|
||||
virtual future<> grant(std::string_view s, permission_set ps, const resource& r, ::service::group0_batch& mc) override;
|
||||
virtual future<> revoke(std::string_view s, permission_set ps, const resource& r, ::service::group0_batch& mc) override;
|
||||
virtual future<std::vector<permission_details>> list_all() const override;
|
||||
virtual future<> revoke_all(std::string_view s, ::service::group0_batch& mc) override;
|
||||
virtual future<> revoke_all(const resource& r, ::service::group0_batch& mc) override;
|
||||
virtual const resource_set& protected_resources() const override;
|
||||
};
|
||||
|
||||
} // namespace auth
|
||||
@@ -10,15 +10,24 @@
|
||||
#include <random>
|
||||
#include <unordered_set>
|
||||
#include <algorithm>
|
||||
#include <seastar/core/sleep.hh>
|
||||
#include <seastar/core/coroutine.hh>
|
||||
#include <seastar/coroutine/maybe_yield.hh>
|
||||
#include <seastar/util/later.hh>
|
||||
|
||||
#include "gms/endpoint_state.hh"
|
||||
#include "gms/versioned_value.hh"
|
||||
#include "keys/keys.hh"
|
||||
#include "replica/database.hh"
|
||||
#include "db/system_keyspace.hh"
|
||||
#include "db/system_distributed_keyspace.hh"
|
||||
#include "dht/token-sharding.hh"
|
||||
#include "locator/token_metadata.hh"
|
||||
#include "types/set.hh"
|
||||
#include "gms/application_state.hh"
|
||||
#include "gms/inet_address.hh"
|
||||
#include "gms/gossiper.hh"
|
||||
#include "gms/feature_service.hh"
|
||||
#include "utils/assert.hh"
|
||||
#include "utils/error_injection.hh"
|
||||
#include "utils/UUID_gen.hh"
|
||||
@@ -32,6 +41,16 @@
|
||||
|
||||
extern logging::logger cdc_log;
|
||||
|
||||
static int get_shard_count(const locator::host_id& endpoint, const gms::gossiper& g) {
|
||||
auto ep_state = g.get_application_state_ptr(endpoint, gms::application_state::SHARD_COUNT);
|
||||
return ep_state ? std::stoi(ep_state->value()) : -1;
|
||||
}
|
||||
|
||||
static unsigned get_sharding_ignore_msb(const locator::host_id& endpoint, const gms::gossiper& g) {
|
||||
auto ep_state = g.get_application_state_ptr(endpoint, gms::application_state::IGNORE_MSB_BITS);
|
||||
return ep_state ? std::stoi(ep_state->value()) : 0;
|
||||
}
|
||||
|
||||
namespace db {
|
||||
extern thread_local data_type cdc_streams_set_type;
|
||||
}
|
||||
@@ -206,6 +225,12 @@ static std::vector<stream_id> create_stream_ids(
|
||||
return result;
|
||||
}
|
||||
|
||||
bool should_propose_first_generation(const locator::host_id& my_host_id, const gms::gossiper& g) {
|
||||
return g.for_each_endpoint_state_until([&] (const gms::endpoint_state& eps) {
|
||||
return stop_iteration(my_host_id < eps.get_host_id());
|
||||
}) == stop_iteration::no;
|
||||
}
|
||||
|
||||
bool is_cdc_generation_optimal(const cdc::topology_description& gen, const locator::token_metadata& tm) {
|
||||
if (tm.sorted_tokens().size() != gen.entries().size()) {
|
||||
// We probably have garbage streams from old generations
|
||||
@@ -305,6 +330,38 @@ future<utils::chunked_vector<mutation>> get_cdc_generation_mutations_v3(
|
||||
co_return co_await get_common_cdc_generation_mutations(s, pkey, std::move(get_ckey), desc, mutation_size_threshold, ts);
|
||||
}
|
||||
|
||||
// non-static for testing
|
||||
size_t limit_of_streams_in_topology_description() {
|
||||
// Each stream takes 16B and we don't want to exceed 4MB so we can have
|
||||
// at most 262144 streams but not less than 1 per vnode.
|
||||
return 4 * 1024 * 1024 / 16;
|
||||
}
|
||||
|
||||
// non-static for testing
|
||||
topology_description limit_number_of_streams_if_needed(topology_description&& desc) {
|
||||
uint64_t streams_count = 0;
|
||||
for (auto& tr_desc : desc.entries()) {
|
||||
streams_count += tr_desc.streams.size();
|
||||
}
|
||||
|
||||
size_t limit = std::max(limit_of_streams_in_topology_description(), desc.entries().size());
|
||||
if (limit >= streams_count) {
|
||||
return std::move(desc);
|
||||
}
|
||||
size_t streams_per_vnode_limit = limit / desc.entries().size();
|
||||
auto entries = std::move(desc).entries();
|
||||
auto start = entries.back().token_range_end;
|
||||
for (size_t idx = 0; idx < entries.size(); ++idx) {
|
||||
auto end = entries[idx].token_range_end;
|
||||
if (entries[idx].streams.size() > streams_per_vnode_limit) {
|
||||
entries[idx].streams =
|
||||
create_stream_ids(idx, start, end, streams_per_vnode_limit, entries[idx].sharding_ignore_msb);
|
||||
}
|
||||
start = end;
|
||||
}
|
||||
return topology_description(std::move(entries));
|
||||
}
|
||||
|
||||
// Compute a set of tokens that split the token ring into vnodes.
|
||||
static auto get_tokens(const std::unordered_set<dht::token>& bootstrap_tokens, const locator::token_metadata_ptr tmptr) {
|
||||
auto tokens = tmptr->sorted_tokens();
|
||||
@@ -362,6 +419,364 @@ db_clock::time_point new_generation_timestamp(bool add_delay, std::chrono::milli
|
||||
return ts;
|
||||
}
|
||||
|
||||
future<cdc::generation_id> generation_service::legacy_make_new_generation(const std::unordered_set<dht::token>& bootstrap_tokens, bool add_delay) {
|
||||
const locator::token_metadata_ptr tmptr = _token_metadata.get();
|
||||
|
||||
// Fetch sharding parameters for a node that owns vnode ending with this token
|
||||
// using gossiped application states.
|
||||
auto get_sharding_info = [&] (dht::token end) -> std::pair<size_t, uint8_t> {
|
||||
if (bootstrap_tokens.contains(end)) {
|
||||
return {smp::count, _cfg.ignore_msb_bits};
|
||||
} else {
|
||||
auto endpoint = tmptr->get_endpoint(end);
|
||||
if (!endpoint) {
|
||||
throw std::runtime_error(
|
||||
format("Can't find endpoint for token {}", end));
|
||||
}
|
||||
auto sc = get_shard_count(*endpoint, _gossiper);
|
||||
return {sc > 0 ? sc : 1, get_sharding_ignore_msb(*endpoint, _gossiper)};
|
||||
}
|
||||
};
|
||||
|
||||
auto uuid = utils::make_random_uuid();
|
||||
auto gen = make_new_generation_description(bootstrap_tokens, get_sharding_info, tmptr);
|
||||
|
||||
// Our caller should ensure that there are normal tokens in the token ring.
|
||||
auto normal_token_owners = tmptr->count_normal_token_owners();
|
||||
SCYLLA_ASSERT(normal_token_owners);
|
||||
|
||||
if (_feature_service.cdc_generations_v2) {
|
||||
cdc_log.info("Inserting new generation data at UUID {}", uuid);
|
||||
// This may take a while.
|
||||
co_await _sys_dist_ks.local().insert_cdc_generation(uuid, gen, { normal_token_owners });
|
||||
|
||||
// Begin the race.
|
||||
cdc::generation_id_v2 gen_id{new_generation_timestamp(add_delay, _cfg.ring_delay), uuid};
|
||||
|
||||
cdc_log.info("New CDC generation: {}", gen_id);
|
||||
co_return gen_id;
|
||||
}
|
||||
|
||||
// The CDC_GENERATIONS_V2 feature is not enabled: some nodes may still not understand the V2 format.
|
||||
// We must create a generation in the old format.
|
||||
|
||||
// If the cluster is large we may end up with a generation that contains
|
||||
// large number of streams. This is problematic because we store the
|
||||
// generation in a single row (V1 format). For a generation with large number of rows
|
||||
// this will lead to a row that can be as big as 32MB. This is much more
|
||||
// than the limit imposed by commitlog_segment_size_in_mb. If the size of
|
||||
// the row that describes a new generation grows above
|
||||
// commitlog_segment_size_in_mb, the write will fail and the new node won't
|
||||
// be able to join. To avoid such problem we make sure that such row is
|
||||
// always smaller than 4MB. We do that by removing some CDC streams from
|
||||
// each vnode if the total number of streams is too large.
|
||||
gen = limit_number_of_streams_if_needed(std::move(gen));
|
||||
|
||||
cdc_log.warn(
|
||||
"Creating a new CDC generation in the old storage format due to a partially upgraded cluster:"
|
||||
" the CDC_GENERATIONS_V2 feature is known by this node, but not enabled in the cluster."
|
||||
" The old storage format forces us to create a suboptimal generation."
|
||||
" It is recommended to finish the upgrade and then create a new generation either by bootstrapping"
|
||||
" a new node or running the checkAndRepairCdcStreams nodetool command.");
|
||||
|
||||
// Begin the race.
|
||||
cdc::generation_id_v1 gen_id{new_generation_timestamp(add_delay, _cfg.ring_delay)};
|
||||
|
||||
co_await _sys_dist_ks.local().insert_cdc_topology_description(gen_id, std::move(gen), { normal_token_owners });
|
||||
|
||||
cdc_log.info("New CDC generation: {}", gen_id);
|
||||
co_return gen_id;
|
||||
}
|
||||
|
||||
/* Retrieves CDC streams generation timestamp from the given endpoint's application state (broadcasted through gossip).
|
||||
* We might be during a rolling upgrade, so the timestamp might not be there (if the other node didn't upgrade yet),
|
||||
* but if the cluster already supports CDC, then every newly joining node will propose a new CDC generation,
|
||||
* which means it will gossip the generation's timestamp.
|
||||
*/
|
||||
static std::optional<cdc::generation_id> get_generation_id_for(const locator::host_id& endpoint, const gms::endpoint_state& eps) {
|
||||
const auto* gen_id_ptr = eps.get_application_state_ptr(gms::application_state::CDC_GENERATION_ID);
|
||||
if (!gen_id_ptr) {
|
||||
return std::nullopt;
|
||||
}
|
||||
auto gen_id_string = gen_id_ptr->value();
|
||||
cdc_log.trace("endpoint={}, gen_id_string={}", endpoint, gen_id_string);
|
||||
return gms::versioned_value::cdc_generation_id_from_string(gen_id_string);
|
||||
}
|
||||
|
||||
static future<std::optional<cdc::topology_description>> retrieve_generation_data_v2(
|
||||
cdc::generation_id_v2 id,
|
||||
db::system_keyspace& sys_ks,
|
||||
db::system_distributed_keyspace& sys_dist_ks) {
|
||||
auto cdc_gen = co_await sys_dist_ks.read_cdc_generation(id.id);
|
||||
|
||||
if (!cdc_gen && id.id.is_timestamp()) {
|
||||
// If we entered legacy mode due to recovery, we (or some other node)
|
||||
// might gossip about a generation that was previously propagated
|
||||
// through raft. If that's the case, it will sit in
|
||||
// the system.cdc_generations_v3 table.
|
||||
//
|
||||
// If the provided id is not a timeuuid, we don't want to query
|
||||
// the system.cdc_generations_v3 table. This table stores generation
|
||||
// ids as timeuuids. If the provided id is not a timeuuid, the
|
||||
// generation cannot be in system.cdc_generations_v3. Also, the query
|
||||
// would fail with a marshaling error.
|
||||
cdc_gen = co_await sys_ks.read_cdc_generation_opt(id.id);
|
||||
}
|
||||
|
||||
co_return cdc_gen;
|
||||
}
|
||||
|
||||
static future<std::optional<cdc::topology_description>> retrieve_generation_data(
|
||||
cdc::generation_id gen_id,
|
||||
db::system_keyspace& sys_ks,
|
||||
db::system_distributed_keyspace& sys_dist_ks,
|
||||
db::system_distributed_keyspace::context ctx) {
|
||||
return std::visit(make_visitor(
|
||||
[&] (const cdc::generation_id_v1& id) {
|
||||
return sys_dist_ks.read_cdc_topology_description(id, ctx);
|
||||
},
|
||||
[&] (const cdc::generation_id_v2& id) {
|
||||
return retrieve_generation_data_v2(id, sys_ks, sys_dist_ks);
|
||||
}
|
||||
), gen_id);
|
||||
}
|
||||
|
||||
static future<> do_update_streams_description(
|
||||
cdc::generation_id gen_id,
|
||||
db::system_keyspace& sys_ks,
|
||||
db::system_distributed_keyspace& sys_dist_ks,
|
||||
db::system_distributed_keyspace::context ctx) {
|
||||
if (co_await sys_dist_ks.cdc_desc_exists(get_ts(gen_id), ctx)) {
|
||||
cdc_log.info("Generation {}: streams description table already updated.", gen_id);
|
||||
co_return;
|
||||
}
|
||||
|
||||
// We might race with another node also inserting the description, but that's ok. It's an idempotent operation.
|
||||
|
||||
auto topo = co_await retrieve_generation_data(gen_id, sys_ks, sys_dist_ks, ctx);
|
||||
if (!topo) {
|
||||
throw no_generation_data_exception(gen_id);
|
||||
}
|
||||
|
||||
co_await sys_dist_ks.create_cdc_desc(get_ts(gen_id), *topo, ctx);
|
||||
cdc_log.info("CDC description table successfully updated with generation {}.", gen_id);
|
||||
}
|
||||
|
||||
/* Inform CDC users about a generation of streams (identified by the given timestamp)
|
||||
* by inserting it into the cdc_streams table.
|
||||
*
|
||||
* Assumes that the cdc_generation_descriptions table contains this generation.
|
||||
*
|
||||
* Returning from this function does not mean that the table update was successful: the function
|
||||
* might run an asynchronous task in the background.
|
||||
*/
|
||||
static future<> update_streams_description(
|
||||
cdc::generation_id gen_id,
|
||||
db::system_keyspace& sys_ks,
|
||||
shared_ptr<db::system_distributed_keyspace> sys_dist_ks,
|
||||
noncopyable_function<unsigned()> get_num_token_owners,
|
||||
abort_source& abort_src) {
|
||||
try {
|
||||
co_await do_update_streams_description(gen_id, sys_ks, *sys_dist_ks, { get_num_token_owners() });
|
||||
} catch (...) {
|
||||
cdc_log.warn(
|
||||
"Could not update CDC description table with generation {}: {}. Will retry in the background.",
|
||||
gen_id, std::current_exception());
|
||||
|
||||
// It is safe to discard this future: we keep system distributed keyspace alive.
|
||||
(void)(([] (cdc::generation_id gen_id,
|
||||
db::system_keyspace& sys_ks,
|
||||
shared_ptr<db::system_distributed_keyspace> sys_dist_ks,
|
||||
noncopyable_function<unsigned()> get_num_token_owners,
|
||||
abort_source& abort_src) -> future<> {
|
||||
while (true) {
|
||||
try {
|
||||
co_await sleep_abortable(std::chrono::seconds(60), abort_src);
|
||||
} catch (seastar::sleep_aborted&) {
|
||||
cdc_log.warn( "Aborted update CDC description table with generation {}", gen_id);
|
||||
co_return;
|
||||
}
|
||||
try {
|
||||
co_await do_update_streams_description(gen_id, sys_ks, *sys_dist_ks, { get_num_token_owners() });
|
||||
co_return;
|
||||
} catch (...) {
|
||||
cdc_log.warn(
|
||||
"Could not update CDC description table with generation {}: {}. Will try again.",
|
||||
gen_id, std::current_exception());
|
||||
}
|
||||
}
|
||||
})(gen_id, sys_ks, std::move(sys_dist_ks), std::move(get_num_token_owners), abort_src));
|
||||
}
|
||||
}
|
||||
|
||||
static db_clock::time_point as_timepoint(const utils::UUID& uuid) {
|
||||
return db_clock::time_point(utils::UUID_gen::unix_timestamp(uuid));
|
||||
}
|
||||
|
||||
static future<std::vector<db_clock::time_point>> get_cdc_desc_v1_timestamps(
|
||||
db::system_distributed_keyspace& sys_dist_ks,
|
||||
abort_source& abort_src,
|
||||
const noncopyable_function<unsigned()>& get_num_token_owners) {
|
||||
while (true) {
|
||||
try {
|
||||
co_return co_await sys_dist_ks.get_cdc_desc_v1_timestamps({ get_num_token_owners() });
|
||||
} catch (...) {
|
||||
cdc_log.warn(
|
||||
"Failed to retrieve generation timestamps for rewriting: {}. Retrying in 60s.",
|
||||
std::current_exception());
|
||||
}
|
||||
co_await sleep_abortable(std::chrono::seconds(60), abort_src);
|
||||
}
|
||||
}
|
||||
|
||||
// Contains a CDC log table's creation time (extracted from its schema's id)
|
||||
// and its CDC TTL setting.
|
||||
struct time_and_ttl {
|
||||
db_clock::time_point creation_time;
|
||||
int ttl;
|
||||
};
|
||||
|
||||
/*
|
||||
* See `maybe_rewrite_streams_descriptions`.
|
||||
* This is the long-running-in-the-background part of that function.
|
||||
* It returns the timestamp of the last rewritten generation (if any).
|
||||
*/
|
||||
static future<std::optional<cdc::generation_id_v1>> rewrite_streams_descriptions(
|
||||
std::vector<time_and_ttl> times_and_ttls,
|
||||
db::system_keyspace& sys_ks,
|
||||
shared_ptr<db::system_distributed_keyspace> sys_dist_ks,
|
||||
noncopyable_function<unsigned()> get_num_token_owners,
|
||||
abort_source& abort_src) {
|
||||
cdc_log.info("Retrieving generation timestamps for rewriting...");
|
||||
auto tss = co_await get_cdc_desc_v1_timestamps(*sys_dist_ks, abort_src, get_num_token_owners);
|
||||
cdc_log.info("Generation timestamps retrieved.");
|
||||
|
||||
// Find first generation timestamp such that some CDC log table may contain data before this timestamp.
|
||||
// This predicate is monotonic w.r.t the timestamps.
|
||||
auto now = db_clock::now();
|
||||
std::sort(tss.begin(), tss.end());
|
||||
auto first = std::partition_point(tss.begin(), tss.end(), [&] (db_clock::time_point ts) {
|
||||
// partition_point finds first element that does *not* satisfy the predicate.
|
||||
return std::none_of(times_and_ttls.begin(), times_and_ttls.end(),
|
||||
[&] (const time_and_ttl& tat) {
|
||||
// In this CDC log table there are no entries older than the table's creation time
|
||||
// or (now - the table's ttl). We subtract 10s to account for some possible clock drift.
|
||||
// If ttl is set to 0 then entries in this table never expire. In that case we look
|
||||
// only at the table's creation time.
|
||||
auto no_entries_older_than =
|
||||
(tat.ttl == 0 ? tat.creation_time : std::max(tat.creation_time, now - std::chrono::seconds(tat.ttl)))
|
||||
- std::chrono::seconds(10);
|
||||
return no_entries_older_than < ts;
|
||||
});
|
||||
});
|
||||
|
||||
// Find first generation timestamp such that some CDC log table may contain data in this generation.
|
||||
// This and all later generations need to be written to the new streams table.
|
||||
if (first != tss.begin()) {
|
||||
--first;
|
||||
}
|
||||
|
||||
if (first == tss.end()) {
|
||||
cdc_log.info("No generations to rewrite.");
|
||||
co_return std::nullopt;
|
||||
}
|
||||
|
||||
cdc_log.info("First generation to rewrite: {}", *first);
|
||||
|
||||
bool each_success = true;
|
||||
co_await max_concurrent_for_each(first, tss.end(), 10, [&] (db_clock::time_point ts) -> future<> {
|
||||
while (true) {
|
||||
try {
|
||||
co_return co_await do_update_streams_description(cdc::generation_id_v1{ts}, sys_ks, *sys_dist_ks, { get_num_token_owners() });
|
||||
} catch (const no_generation_data_exception& e) {
|
||||
cdc_log.error("Failed to rewrite streams for generation {}: {}. Giving up.", ts, e);
|
||||
each_success = false;
|
||||
co_return;
|
||||
} catch (...) {
|
||||
cdc_log.warn("Failed to rewrite streams for generation {}: {}. Retrying in 60s.", ts, std::current_exception());
|
||||
}
|
||||
co_await sleep_abortable(std::chrono::seconds(60), abort_src);
|
||||
}
|
||||
});
|
||||
|
||||
if (each_success) {
|
||||
cdc_log.info("Rewriting stream tables finished successfully.");
|
||||
} else {
|
||||
cdc_log.info("Rewriting stream tables finished, but some generations could not be rewritten (check the logs).");
|
||||
}
|
||||
|
||||
if (first != tss.end()) {
|
||||
co_return cdc::generation_id_v1{*std::prev(tss.end())};
|
||||
}
|
||||
|
||||
co_return std::nullopt;
|
||||
}
|
||||
|
||||
future<> generation_service::maybe_rewrite_streams_descriptions() {
|
||||
if (!_db.has_schema(_sys_dist_ks.local().NAME, _sys_dist_ks.local().CDC_DESC_V1)) {
|
||||
// This cluster never went through a Scylla version which used this table
|
||||
// or the user deleted the table. Nothing to do.
|
||||
co_return;
|
||||
}
|
||||
|
||||
if (co_await _sys_ks.local().cdc_is_rewritten()) {
|
||||
co_return;
|
||||
}
|
||||
|
||||
if (_cfg.dont_rewrite_streams) {
|
||||
cdc_log.warn("Stream rewriting disabled. Manual administrator intervention may be required...");
|
||||
co_return;
|
||||
}
|
||||
|
||||
// For each CDC log table get the TTL setting (from CDC options) and the table's creation time
|
||||
std::vector<time_and_ttl> times_and_ttls;
|
||||
_db.get_tables_metadata().for_each_table([&] (table_id, lw_shared_ptr<replica::table> t) {
|
||||
auto& s = *t->schema();
|
||||
auto base = cdc::get_base_table(_db, s.ks_name(), s.cf_name());
|
||||
if (!base) {
|
||||
// Not a CDC log table.
|
||||
return;
|
||||
}
|
||||
auto& cdc_opts = base->cdc_options();
|
||||
if (!cdc_opts.enabled()) {
|
||||
// This table is named like a CDC log table but it's not one.
|
||||
return;
|
||||
}
|
||||
|
||||
times_and_ttls.push_back(time_and_ttl{as_timepoint(s.id().uuid()), cdc_opts.ttl()});
|
||||
});
|
||||
|
||||
if (times_and_ttls.empty()) {
|
||||
// There's no point in rewriting old generations' streams (they don't contain any data).
|
||||
cdc_log.info("No CDC log tables present, not rewriting stream tables.");
|
||||
co_return co_await _sys_ks.local().cdc_set_rewritten(std::nullopt);
|
||||
}
|
||||
|
||||
auto get_num_token_owners = [tm = _token_metadata.get()] { return tm->count_normal_token_owners(); };
|
||||
|
||||
// This code is racing with node startup. At this point, we're most likely still waiting for gossip to settle
|
||||
// and some nodes that are UP may still be marked as DOWN by us.
|
||||
// Let's sleep a bit to increase the chance that the first attempt at rewriting succeeds (it's still ok if
|
||||
// it doesn't - we'll retry - but it's nice if we succeed without any warnings).
|
||||
co_await sleep_abortable(std::chrono::seconds(10), _abort_src);
|
||||
|
||||
cdc_log.info("Rewriting stream tables in the background...");
|
||||
auto last_rewritten = co_await rewrite_streams_descriptions(
|
||||
std::move(times_and_ttls),
|
||||
_sys_ks.local(),
|
||||
_sys_dist_ks.local_shared(),
|
||||
std::move(get_num_token_owners),
|
||||
_abort_src);
|
||||
|
||||
co_await _sys_ks.local().cdc_set_rewritten(last_rewritten);
|
||||
}
|
||||
|
||||
static void assert_shard_zero(const sstring& where) {
|
||||
if (this_shard_id() != 0) {
|
||||
on_internal_error(cdc_log, format("`{}`: must be run on shard 0", where));
|
||||
}
|
||||
}
|
||||
|
||||
class and_reducer {
|
||||
private:
|
||||
bool _result = true;
|
||||
@@ -388,26 +803,206 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
class generation_handling_nonfatal_exception : public std::runtime_error {
|
||||
using std::runtime_error::runtime_error;
|
||||
};
|
||||
|
||||
constexpr char could_not_retrieve_msg_template[]
|
||||
= "Could not retrieve CDC streams with timestamp {} upon gossip event. Reason: \"{}\". Action: {}.";
|
||||
|
||||
generation_service::generation_service(
|
||||
config cfg,
|
||||
config cfg, gms::gossiper& g, sharded<db::system_distributed_keyspace>& sys_dist_ks,
|
||||
sharded<db::system_keyspace>& sys_ks,
|
||||
replica::database& db)
|
||||
abort_source& abort_src, const locator::shared_token_metadata& stm, gms::feature_service& f,
|
||||
replica::database& db,
|
||||
std::function<bool()> raft_topology_change_enabled)
|
||||
: _cfg(std::move(cfg))
|
||||
, _gossiper(g)
|
||||
, _sys_dist_ks(sys_dist_ks)
|
||||
, _sys_ks(sys_ks)
|
||||
, _abort_src(abort_src)
|
||||
, _token_metadata(stm)
|
||||
, _feature_service(f)
|
||||
, _db(db)
|
||||
, _raft_topology_change_enabled(std::move(raft_topology_change_enabled))
|
||||
{
|
||||
}
|
||||
|
||||
future<> generation_service::stop() {
|
||||
try {
|
||||
co_await std::move(_cdc_streams_rewrite_complete);
|
||||
} catch (...) {
|
||||
cdc_log.error("CDC stream rewrite failed: ", std::current_exception());
|
||||
}
|
||||
|
||||
if (_joined && (this_shard_id() == 0)) {
|
||||
co_await leave_ring();
|
||||
}
|
||||
|
||||
_stopped = true;
|
||||
return make_ready_future<>();
|
||||
}
|
||||
|
||||
generation_service::~generation_service() {
|
||||
SCYLLA_ASSERT(_stopped);
|
||||
}
|
||||
|
||||
future<> generation_service::handle_cdc_generation(cdc::generation_id gen_id) {
|
||||
future<> generation_service::after_join(std::optional<cdc::generation_id>&& startup_gen_id) {
|
||||
assert_shard_zero(__PRETTY_FUNCTION__);
|
||||
|
||||
_gen_id = std::move(startup_gen_id);
|
||||
_gossiper.register_(shared_from_this());
|
||||
|
||||
_joined = true;
|
||||
|
||||
// Retrieve the latest CDC generation seen in gossip (if any).
|
||||
co_await legacy_scan_cdc_generations();
|
||||
|
||||
// Ensure that the new CDC stream description table has all required streams.
|
||||
// See the function's comment for details.
|
||||
//
|
||||
// Since this depends on the entire cluster (and therefore we cannot guarantee
|
||||
// timely completion), run it in the background and wait for it in stop().
|
||||
_cdc_streams_rewrite_complete = maybe_rewrite_streams_descriptions();
|
||||
}
|
||||
|
||||
future<> generation_service::leave_ring() {
|
||||
assert_shard_zero(__PRETTY_FUNCTION__);
|
||||
_joined = false;
|
||||
co_await _gossiper.unregister_(shared_from_this());
|
||||
}
|
||||
|
||||
future<> generation_service::on_join(gms::inet_address ep, locator::host_id id, gms::endpoint_state_ptr ep_state, gms::permit_id pid) {
|
||||
return on_change(ep, id, ep_state->get_application_state_map(), pid);
|
||||
}
|
||||
|
||||
future<> generation_service::on_change(gms::inet_address ep, locator::host_id id, const gms::application_state_map& states, gms::permit_id pid) {
|
||||
assert_shard_zero(__PRETTY_FUNCTION__);
|
||||
|
||||
if (_raft_topology_change_enabled()) {
|
||||
return make_ready_future<>();
|
||||
}
|
||||
|
||||
return on_application_state_change(ep, id, states, gms::application_state::CDC_GENERATION_ID, pid, [this] (gms::inet_address ep, locator::host_id id, const gms::versioned_value& v, gms::permit_id) {
|
||||
auto gen_id = gms::versioned_value::cdc_generation_id_from_string(v.value());
|
||||
cdc_log.debug("Endpoint: {}, CDC generation ID change: {}", ep, gen_id);
|
||||
|
||||
return legacy_handle_cdc_generation(gen_id);
|
||||
});
|
||||
}
|
||||
|
||||
future<> generation_service::check_and_repair_cdc_streams() {
|
||||
// FIXME: support Raft group 0-based topology changes
|
||||
if (!_joined) {
|
||||
throw std::runtime_error("check_and_repair_cdc_streams: node not initialized yet");
|
||||
}
|
||||
|
||||
std::optional<cdc::generation_id> latest = _gen_id;
|
||||
_gossiper.for_each_endpoint_state([&] (const gms::endpoint_state& state) {
|
||||
auto addr = state.get_host_id();
|
||||
if (_gossiper.is_left(addr)) {
|
||||
cdc_log.info("check_and_repair_cdc_streams ignored node {} because it is in LEFT state", addr);
|
||||
return;
|
||||
}
|
||||
if (!_gossiper.is_normal(addr)) {
|
||||
throw std::runtime_error(fmt::format("All nodes must be in NORMAL or LEFT state while performing check_and_repair_cdc_streams"
|
||||
" ({} is in state {})", addr, _gossiper.get_gossip_status(state)));
|
||||
}
|
||||
|
||||
const auto gen_id = get_generation_id_for(addr, state);
|
||||
if (!latest || (gen_id && get_ts(*gen_id) > get_ts(*latest))) {
|
||||
latest = gen_id;
|
||||
}
|
||||
});
|
||||
|
||||
auto tmptr = _token_metadata.get();
|
||||
auto sys_dist_ks = get_sys_dist_ks();
|
||||
|
||||
bool should_regenerate = false;
|
||||
|
||||
if (!latest) {
|
||||
cdc_log.warn("check_and_repair_cdc_streams: no generation observed in gossip");
|
||||
should_regenerate = true;
|
||||
} else if (std::holds_alternative<cdc::generation_id_v1>(*latest)
|
||||
&& _feature_service.cdc_generations_v2) {
|
||||
cdc_log.info(
|
||||
"Cluster still using CDC generation storage format V1 (id: {}), even though it already understands the V2 format."
|
||||
" Creating a new generation using V2.", *latest);
|
||||
should_regenerate = true;
|
||||
} else {
|
||||
cdc_log.info("check_and_repair_cdc_streams: last generation observed in gossip: {}", *latest);
|
||||
|
||||
static const auto timeout_msg = "Timeout while fetching CDC topology description";
|
||||
static const auto topology_read_error_note = "Note: this is likely caused by"
|
||||
" node(s) being down or unreachable. It is recommended to check the network and"
|
||||
" restart/remove the failed node(s), then retry checkAndRepairCdcStreams command";
|
||||
static const auto exception_translating_msg = "Translating the exception to `request_execution_exception`";
|
||||
|
||||
std::optional<topology_description> gen;
|
||||
try {
|
||||
gen = co_await retrieve_generation_data(*latest, _sys_ks.local(), *sys_dist_ks, { tmptr->count_normal_token_owners() });
|
||||
} catch (exceptions::request_timeout_exception& e) {
|
||||
cdc_log.error("{}: \"{}\". {}.", timeout_msg, e.what(), exception_translating_msg);
|
||||
throw exceptions::request_execution_exception(exceptions::exception_code::READ_TIMEOUT,
|
||||
format("{}. {}.", timeout_msg, topology_read_error_note));
|
||||
} catch (exceptions::unavailable_exception& e) {
|
||||
static const auto unavailable_msg = "Node(s) unavailable while fetching CDC topology description";
|
||||
cdc_log.error("{}: \"{}\". {}.", unavailable_msg, e.what(), exception_translating_msg);
|
||||
throw exceptions::request_execution_exception(exceptions::exception_code::UNAVAILABLE,
|
||||
format("{}. {}.", unavailable_msg, topology_read_error_note));
|
||||
} catch (...) {
|
||||
const auto ep = std::current_exception();
|
||||
if (is_timeout_exception(ep)) {
|
||||
cdc_log.error("{}: \"{}\". {}.", timeout_msg, ep, exception_translating_msg);
|
||||
throw exceptions::request_execution_exception(exceptions::exception_code::READ_TIMEOUT,
|
||||
format("{}. {}.", timeout_msg, topology_read_error_note));
|
||||
}
|
||||
// On exotic errors proceed with regeneration
|
||||
cdc_log.error("Exception while reading CDC topology description: \"{}\". Regenerating streams anyway.", ep);
|
||||
should_regenerate = true;
|
||||
}
|
||||
|
||||
if (!gen) {
|
||||
cdc_log.error(
|
||||
"Could not find CDC generation with timestamp {} in distributed system tables (current time: {}),"
|
||||
" even though some node gossiped about it.",
|
||||
latest, db_clock::now());
|
||||
should_regenerate = true;
|
||||
} else if (!is_cdc_generation_optimal(*gen, *tmptr)) {
|
||||
should_regenerate = true;
|
||||
cdc_log.info("CDC generation {} needs repair, regenerating", latest);
|
||||
}
|
||||
}
|
||||
|
||||
if (!should_regenerate) {
|
||||
if (latest != _gen_id) {
|
||||
co_await legacy_do_handle_cdc_generation(*latest);
|
||||
}
|
||||
cdc_log.info("CDC generation {} does not need repair", latest);
|
||||
co_return;
|
||||
}
|
||||
|
||||
const auto new_gen_id = co_await legacy_make_new_generation({}, true);
|
||||
|
||||
// Need to artificially update our STATUS so other nodes handle the generation ID change
|
||||
// FIXME: after 0e0282cd nodes do not require a STATUS update to react to CDC generation changes.
|
||||
// The artificial STATUS update here should eventually be removed (in a few releases).
|
||||
auto status = _gossiper.get_this_endpoint_state_ptr()->get_application_state_ptr(gms::application_state::STATUS);
|
||||
if (!status) {
|
||||
cdc_log.error("Our STATUS is missing");
|
||||
cdc_log.error("Aborting CDC generation repair due to missing STATUS");
|
||||
co_return;
|
||||
}
|
||||
// Update _gen_id first, so that legacy_do_handle_cdc_generation (which will get called due to the status update)
|
||||
// won't try to update the gossiper, which would result in a deadlock inside add_local_application_state
|
||||
_gen_id = new_gen_id;
|
||||
co_await _gossiper.add_local_application_state(
|
||||
std::pair(gms::application_state::CDC_GENERATION_ID, gms::versioned_value::cdc_generation_id(new_gen_id)),
|
||||
std::pair(gms::application_state::STATUS, *status)
|
||||
);
|
||||
co_await _sys_ks.local().update_cdc_generation_id(new_gen_id);
|
||||
}
|
||||
|
||||
future<> generation_service::handle_cdc_generation(cdc::generation_id_v2 gen_id) {
|
||||
auto ts = get_ts(gen_id);
|
||||
if (co_await container().map_reduce(and_reducer(), [ts] (generation_service& svc) {
|
||||
return !svc._cdc_metadata.prepare(ts);
|
||||
@@ -429,8 +1024,171 @@ future<> generation_service::handle_cdc_generation(cdc::generation_id gen_id) {
|
||||
}
|
||||
}
|
||||
|
||||
future<> generation_service::legacy_handle_cdc_generation(std::optional<cdc::generation_id> gen_id) {
|
||||
assert_shard_zero(__PRETTY_FUNCTION__);
|
||||
|
||||
if (!gen_id) {
|
||||
co_return;
|
||||
}
|
||||
|
||||
if (!_sys_dist_ks.local_is_initialized() || !_sys_dist_ks.local().started()) {
|
||||
on_internal_error(cdc_log, "Legacy handle CDC generation with sys.dist.ks. down");
|
||||
}
|
||||
|
||||
// The service should not be listening for generation changes until after the node
|
||||
// is bootstrapped and since the node leaves the ring on decommission
|
||||
|
||||
if (co_await container().map_reduce(and_reducer(), [ts = get_ts(*gen_id)] (generation_service& svc) {
|
||||
return !svc._cdc_metadata.prepare(ts);
|
||||
})) {
|
||||
co_return;
|
||||
}
|
||||
|
||||
bool using_this_gen = false;
|
||||
try {
|
||||
using_this_gen = co_await legacy_do_handle_cdc_generation_intercept_nonfatal_errors(*gen_id);
|
||||
} catch (generation_handling_nonfatal_exception& e) {
|
||||
cdc_log.warn(could_not_retrieve_msg_template, gen_id, e.what(), "retrying in the background");
|
||||
legacy_async_handle_cdc_generation(*gen_id);
|
||||
co_return;
|
||||
} catch (...) {
|
||||
cdc_log.error(could_not_retrieve_msg_template, gen_id, std::current_exception(), "not retrying");
|
||||
co_return; // Exotic ("fatal") exception => do not retry
|
||||
}
|
||||
|
||||
if (using_this_gen) {
|
||||
cdc_log.info("Starting to use generation {}", *gen_id);
|
||||
co_await update_streams_description(*gen_id, _sys_ks.local(), get_sys_dist_ks(),
|
||||
[&tm = _token_metadata] { return tm.get()->count_normal_token_owners(); },
|
||||
_abort_src);
|
||||
}
|
||||
}
|
||||
|
||||
void generation_service::legacy_async_handle_cdc_generation(cdc::generation_id gen_id) {
|
||||
assert_shard_zero(__PRETTY_FUNCTION__);
|
||||
|
||||
(void)(([] (cdc::generation_id gen_id, shared_ptr<generation_service> svc) -> future<> {
|
||||
while (true) {
|
||||
co_await sleep_abortable(std::chrono::seconds(5), svc->_abort_src);
|
||||
|
||||
try {
|
||||
bool using_this_gen = co_await svc->legacy_do_handle_cdc_generation_intercept_nonfatal_errors(gen_id);
|
||||
if (using_this_gen) {
|
||||
cdc_log.info("Starting to use generation {}", gen_id);
|
||||
co_await update_streams_description(gen_id, svc->_sys_ks.local(), svc->get_sys_dist_ks(),
|
||||
[&tm = svc->_token_metadata] { return tm.get()->count_normal_token_owners(); },
|
||||
svc->_abort_src);
|
||||
}
|
||||
co_return;
|
||||
} catch (generation_handling_nonfatal_exception& e) {
|
||||
cdc_log.warn(could_not_retrieve_msg_template, gen_id, e.what(), "continuing to retry in the background");
|
||||
} catch (...) {
|
||||
cdc_log.error(could_not_retrieve_msg_template, gen_id, std::current_exception(), "not retrying anymore");
|
||||
co_return; // Exotic ("fatal") exception => do not retry
|
||||
}
|
||||
|
||||
if (co_await svc->container().map_reduce(and_reducer(), [ts = get_ts(gen_id)] (generation_service& svc) {
|
||||
return svc._cdc_metadata.known_or_obsolete(ts);
|
||||
})) {
|
||||
co_return;
|
||||
}
|
||||
}
|
||||
})(gen_id, shared_from_this()));
|
||||
}
|
||||
|
||||
future<> generation_service::legacy_scan_cdc_generations() {
|
||||
assert_shard_zero(__PRETTY_FUNCTION__);
|
||||
|
||||
std::optional<cdc::generation_id> latest;
|
||||
_gossiper.for_each_endpoint_state([&] (const gms::endpoint_state& eps) {
|
||||
auto gen_id = get_generation_id_for(eps.get_host_id(), eps);
|
||||
if (!latest || (gen_id && get_ts(*gen_id) > get_ts(*latest))) {
|
||||
latest = gen_id;
|
||||
}
|
||||
});
|
||||
|
||||
if (latest) {
|
||||
cdc_log.info("Latest generation seen during startup: {}", *latest);
|
||||
co_await legacy_handle_cdc_generation(latest);
|
||||
} else {
|
||||
cdc_log.info("No generation seen during startup.");
|
||||
}
|
||||
}
|
||||
|
||||
future<bool> generation_service::legacy_do_handle_cdc_generation_intercept_nonfatal_errors(cdc::generation_id gen_id) {
|
||||
assert_shard_zero(__PRETTY_FUNCTION__);
|
||||
|
||||
// Use futurize_invoke to catch all exceptions from legacy_do_handle_cdc_generation.
|
||||
return futurize_invoke([this, gen_id] {
|
||||
return legacy_do_handle_cdc_generation(gen_id);
|
||||
}).handle_exception([] (std::exception_ptr ep) -> future<bool> {
|
||||
try {
|
||||
std::rethrow_exception(ep);
|
||||
} catch (exceptions::request_timeout_exception& e) {
|
||||
throw generation_handling_nonfatal_exception(e.what());
|
||||
} catch (exceptions::unavailable_exception& e) {
|
||||
throw generation_handling_nonfatal_exception(e.what());
|
||||
} catch (exceptions::read_failure_exception& e) {
|
||||
throw generation_handling_nonfatal_exception(e.what());
|
||||
} catch (...) {
|
||||
const auto ep = std::current_exception();
|
||||
if (is_timeout_exception(ep)) {
|
||||
throw generation_handling_nonfatal_exception(format("{}", ep));
|
||||
}
|
||||
throw;
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
future<bool> generation_service::legacy_do_handle_cdc_generation(cdc::generation_id gen_id) {
|
||||
assert_shard_zero(__PRETTY_FUNCTION__);
|
||||
|
||||
auto sys_dist_ks = get_sys_dist_ks();
|
||||
auto gen = co_await retrieve_generation_data(gen_id, _sys_ks.local(), *sys_dist_ks, { _token_metadata.get()->count_normal_token_owners() });
|
||||
if (!gen) {
|
||||
// This may happen during raft upgrade when a node gossips about a generation that
|
||||
// was propagated through raft and we didn't apply it yet.
|
||||
throw generation_handling_nonfatal_exception(fmt::format(
|
||||
"Could not find CDC generation {} in distributed system tables (current time: {}),"
|
||||
" even though some node gossiped about it.",
|
||||
gen_id, db_clock::now()));
|
||||
}
|
||||
|
||||
// We always gossip about the generation with the greatest timestamp. Specific nodes may remember older generations,
|
||||
// but eventually they forget when their clocks move past the latest generation's timestamp.
|
||||
// The cluster as a whole is only interested in the last generation so restarting nodes may learn what it is.
|
||||
// We assume that generation changes don't happen ``too often'' so every node can learn about a generation
|
||||
// before it is superseded by a newer one which causes nodes to start gossiping the about the newer one.
|
||||
// The assumption follows from the requirement of bootstrapping nodes sequentially.
|
||||
if (!_gen_id || get_ts(*_gen_id) < get_ts(gen_id)) {
|
||||
_gen_id = gen_id;
|
||||
co_await _sys_ks.local().update_cdc_generation_id(gen_id);
|
||||
co_await _gossiper.add_local_application_state(
|
||||
gms::application_state::CDC_GENERATION_ID, gms::versioned_value::cdc_generation_id(gen_id));
|
||||
}
|
||||
|
||||
// Return `true` iff the generation was inserted on any of our shards.
|
||||
co_return co_await container().map_reduce(or_reducer(),
|
||||
[ts = get_ts(gen_id), &gen] (generation_service& svc) -> future<bool> {
|
||||
// We need to copy it here before awaiting anything to avoid destruction of the captures.
|
||||
const auto timestamp = ts;
|
||||
topology_description gen_copy = co_await gen->clone_async();
|
||||
co_return svc._cdc_metadata.insert(timestamp, std::move(gen_copy));
|
||||
});
|
||||
}
|
||||
|
||||
shared_ptr<db::system_distributed_keyspace> generation_service::get_sys_dist_ks() {
|
||||
assert_shard_zero(__PRETTY_FUNCTION__);
|
||||
|
||||
if (!_sys_dist_ks.local_is_initialized()) {
|
||||
throw std::runtime_error("system distributed keyspace not initialized");
|
||||
}
|
||||
|
||||
return _sys_dist_ks.local_shared();
|
||||
}
|
||||
|
||||
db_clock::time_point get_ts(const generation_id& gen_id) {
|
||||
return gen_id.ts;
|
||||
return std::visit([] (auto& id) { return id.ts; }, gen_id);
|
||||
}
|
||||
|
||||
future<mutation> create_table_streams_mutation(table_id table, db_clock::time_point stream_ts, const locator::tablet_map& map, api::timestamp_type ts) {
|
||||
|
||||
@@ -34,6 +34,16 @@ namespace seastar {
|
||||
class abort_source;
|
||||
} // namespace seastar
|
||||
|
||||
namespace db {
|
||||
class config;
|
||||
class system_distributed_keyspace;
|
||||
} // namespace db
|
||||
|
||||
namespace gms {
|
||||
class inet_address;
|
||||
class gossiper;
|
||||
} // namespace gms
|
||||
|
||||
namespace locator {
|
||||
class tablet_map;
|
||||
} // namespace locator
|
||||
@@ -143,6 +153,23 @@ struct cdc_stream_diff {
|
||||
|
||||
using table_streams = std::map<api::timestamp_type, committed_stream_set>;
|
||||
|
||||
class no_generation_data_exception : public std::runtime_error {
|
||||
public:
|
||||
no_generation_data_exception(cdc::generation_id generation_ts)
|
||||
: std::runtime_error(fmt::format("could not find generation data for timestamp {}", generation_ts))
|
||||
{}
|
||||
};
|
||||
|
||||
/* Should be called when we're restarting and we noticed that we didn't save any streams timestamp in our local tables,
|
||||
* which means that we're probably upgrading from a non-CDC/old CDC version (another reason could be
|
||||
* that there's a bug, or the user messed with our local tables).
|
||||
*
|
||||
* It checks whether we should be the node to propose the first generation of CDC streams.
|
||||
* The chosen condition is arbitrary, it only tries to make sure that no two nodes propose a generation of streams
|
||||
* when upgrading, and nothing bad happens if they for some reason do (it's mostly an optimization).
|
||||
*/
|
||||
bool should_propose_first_generation(const locator::host_id& me, const gms::gossiper&);
|
||||
|
||||
/*
|
||||
* Checks if the CDC generation is optimal, which is true if its `topology_description` is consistent
|
||||
* with `token_metadata`.
|
||||
|
||||
@@ -15,22 +15,48 @@
|
||||
|
||||
namespace cdc {
|
||||
|
||||
struct generation_id_v1 {
|
||||
db_clock::time_point ts;
|
||||
bool operator==(const generation_id_v1&) const = default;
|
||||
};
|
||||
|
||||
struct generation_id {
|
||||
struct generation_id_v2 {
|
||||
db_clock::time_point ts;
|
||||
utils::UUID id;
|
||||
bool operator==(const generation_id&) const = default;
|
||||
bool operator==(const generation_id_v2&) const = default;
|
||||
};
|
||||
|
||||
using generation_id = std::variant<generation_id_v1, generation_id_v2>;
|
||||
|
||||
db_clock::time_point get_ts(const generation_id&);
|
||||
|
||||
} // namespace cdc
|
||||
|
||||
template <>
|
||||
struct fmt::formatter<cdc::generation_id_v1> {
|
||||
constexpr auto parse(format_parse_context& ctx) { return ctx.begin(); }
|
||||
template <typename FormatContext>
|
||||
auto format(const cdc::generation_id_v1& gen_id, FormatContext& ctx) const {
|
||||
return fmt::format_to(ctx.out(), "{}", gen_id.ts);
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
struct fmt::formatter<cdc::generation_id_v2> {
|
||||
constexpr auto parse(format_parse_context& ctx) { return ctx.begin(); }
|
||||
template <typename FormatContext>
|
||||
auto format(const cdc::generation_id_v2& gen_id, FormatContext& ctx) const {
|
||||
return fmt::format_to(ctx.out(), "({}, {})", gen_id.ts, gen_id.id);
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
struct fmt::formatter<cdc::generation_id> {
|
||||
constexpr auto parse(format_parse_context& ctx) { return ctx.begin(); }
|
||||
template <typename FormatContext>
|
||||
auto format(const cdc::generation_id& gen_id, FormatContext& ctx) const {
|
||||
return fmt::format_to(ctx.out(), "({}, {})", gen_id.ts, gen_id.id);
|
||||
return std::visit([&ctx] (auto& id) {
|
||||
return fmt::format_to(ctx.out(), "{}", id);
|
||||
}, gen_id);
|
||||
}
|
||||
};
|
||||
|
||||
@@ -11,51 +11,140 @@
|
||||
#include <seastar/core/sharded.hh>
|
||||
#include "cdc/metadata.hh"
|
||||
#include "cdc/generation_id.hh"
|
||||
#include "gms/i_endpoint_state_change_subscriber.hh"
|
||||
|
||||
namespace db {
|
||||
class system_distributed_keyspace;
|
||||
class system_keyspace;
|
||||
}
|
||||
|
||||
namespace gms {
|
||||
class gossiper;
|
||||
class feature_service;
|
||||
}
|
||||
|
||||
namespace seastar {
|
||||
class abort_source;
|
||||
}
|
||||
|
||||
namespace locator {
|
||||
class shared_token_metadata;
|
||||
class tablet_map;
|
||||
}
|
||||
|
||||
namespace cdc {
|
||||
|
||||
class generation_service : public peering_sharded_service<generation_service>
|
||||
, public async_sharded_service<generation_service> {
|
||||
, public async_sharded_service<generation_service>
|
||||
, public gms::i_endpoint_state_change_subscriber {
|
||||
public:
|
||||
struct config {
|
||||
unsigned ignore_msb_bits;
|
||||
std::chrono::milliseconds ring_delay;
|
||||
bool dont_rewrite_streams = false;
|
||||
};
|
||||
|
||||
private:
|
||||
bool _stopped = false;
|
||||
|
||||
// The node has joined the token ring. Set to `true` on `after_join` call.
|
||||
bool _joined = false;
|
||||
|
||||
config _cfg;
|
||||
gms::gossiper& _gossiper;
|
||||
sharded<db::system_distributed_keyspace>& _sys_dist_ks;
|
||||
sharded<db::system_keyspace>& _sys_ks;
|
||||
abort_source& _abort_src;
|
||||
const locator::shared_token_metadata& _token_metadata;
|
||||
gms::feature_service& _feature_service;
|
||||
replica::database& _db;
|
||||
|
||||
/* Maintains the set of known CDC generations used to pick streams for log writes (i.e., the partition keys of these log writes). */
|
||||
/* Maintains the set of known CDC generations used to pick streams for log writes (i.e., the partition keys of these log writes).
|
||||
* Updated in response to certain gossip events (see the handle_cdc_generation function).
|
||||
*/
|
||||
cdc::metadata _cdc_metadata;
|
||||
|
||||
/* The latest known generation timestamp and the timestamp that we're currently gossiping
|
||||
* (as CDC_GENERATION_ID application state).
|
||||
*
|
||||
* Only shard 0 manages this, hence it will be std::nullopt on all shards other than 0.
|
||||
* This timestamp is also persisted in the system.cdc_local table.
|
||||
*
|
||||
* On shard 0 this may be nullopt only in one special case: rolling upgrade, when we upgrade
|
||||
* from an old version of Scylla that didn't support CDC. In that case one node in the cluster
|
||||
* will create the first generation and start gossiping it; it may be us, or it may be some
|
||||
* different node. In any case, eventually - after one of the nodes gossips the first timestamp
|
||||
* - we'll catch on and this variable will be updated with that generation.
|
||||
*/
|
||||
std::optional<cdc::generation_id> _gen_id;
|
||||
future<> _cdc_streams_rewrite_complete = make_ready_future<>();
|
||||
|
||||
/* Returns true if raft topology changes are enabled.
|
||||
* Can only be called from shard 0.
|
||||
*/
|
||||
std::function<bool()> _raft_topology_change_enabled;
|
||||
public:
|
||||
generation_service(config cfg,
|
||||
generation_service(config cfg, gms::gossiper&,
|
||||
sharded<db::system_distributed_keyspace>&,
|
||||
sharded<db::system_keyspace>& sys_ks,
|
||||
replica::database& db);
|
||||
abort_source&, const locator::shared_token_metadata&,
|
||||
gms::feature_service&, replica::database& db,
|
||||
std::function<bool()> raft_topology_change_enabled);
|
||||
|
||||
future<> stop();
|
||||
~generation_service();
|
||||
|
||||
/* After the node bootstraps and creates a new CDC generation, or restarts and loads the last
|
||||
* known generation timestamp from persistent storage, this function should be called with
|
||||
* that generation timestamp moved in as the `startup_gen_id` parameter.
|
||||
* This passes the responsibility of managing generations from the node startup code to this service;
|
||||
* until then, the service remains dormant.
|
||||
* The startup code is in `storage_service::join_topology`, hence
|
||||
* `after_join` should be called at the end of that function.
|
||||
* Precondition: the node has completed bootstrapping and system_distributed_keyspace is initialized.
|
||||
* Must be called on shard 0 - that's where the generation management happens.
|
||||
*/
|
||||
future<> after_join(std::optional<cdc::generation_id>&& startup_gen_id);
|
||||
future<> leave_ring();
|
||||
|
||||
cdc::metadata& get_cdc_metadata() {
|
||||
return _cdc_metadata;
|
||||
}
|
||||
|
||||
virtual future<> on_join(gms::inet_address, locator::host_id id, gms::endpoint_state_ptr, gms::permit_id) override;
|
||||
virtual future<> on_change(gms::inet_address, locator::host_id id, const gms::application_state_map&, gms::permit_id) override;
|
||||
|
||||
future<> check_and_repair_cdc_streams();
|
||||
|
||||
/* Generate a new set of CDC streams and insert it into the internal distributed CDC generations table.
|
||||
* Returns the ID of this new generation.
|
||||
*
|
||||
* Should be called when starting the node for the first time (i.e., joining the ring).
|
||||
*
|
||||
* Assumes that the system_distributed_keyspace service is initialized.
|
||||
* `cluster_supports_generations_v2` must be `true` if and only if the `CDC_GENERATIONS_V2` feature is enabled.
|
||||
*
|
||||
* If `CDC_GENERATIONS_V2` is enabled, the new generation will be inserted into
|
||||
* `system_distributed_everywhere.cdc_generation_descriptions_v2` and the returned ID will be in the v2 format.
|
||||
* Otherwise the new generation will be limited in size, causing suboptimal stream distribution, it will be inserted
|
||||
* into `system_distributed.cdc_generation_descriptions` and the returned ID will be in the v1 format.
|
||||
* The second case should happen only when we create new generations in a mixed cluster.
|
||||
*
|
||||
* The caller of this function is expected to insert the ID into the gossiper as fast as possible,
|
||||
* so that other nodes learn about the generation before their clocks cross the generation's timestamp
|
||||
* (not guaranteed in the current implementation, but expected to be the common case;
|
||||
* we assume that `ring_delay` is enough for other nodes to learn about the new generation).
|
||||
*
|
||||
* Legacy: used for gossiper-based topology changes.
|
||||
*/
|
||||
future<cdc::generation_id> legacy_make_new_generation(
|
||||
const std::unordered_set<dht::token>& bootstrap_tokens, bool add_delay);
|
||||
|
||||
/* Retrieve the CDC generation with the given ID from local tables
|
||||
* and start using it for CDC log writes if it's not obsolete.
|
||||
* Precondition: the generation was committed using group 0 and locally applied.
|
||||
*/
|
||||
future<> handle_cdc_generation(cdc::generation_id);
|
||||
future<> handle_cdc_generation(cdc::generation_id_v2);
|
||||
|
||||
future<> load_cdc_tablet_streams(std::optional<std::unordered_set<table_id>> changed_tables);
|
||||
|
||||
@@ -67,6 +156,56 @@ public:
|
||||
future<utils::chunked_vector<mutation>> garbage_collect_cdc_streams_for_table(table_id table, std::optional<std::chrono::seconds> ttl, api::timestamp_type ts);
|
||||
future<> garbage_collect_cdc_streams(utils::chunked_vector<canonical_mutation>& muts, api::timestamp_type ts);
|
||||
|
||||
private:
|
||||
/* Retrieve the CDC generation which starts at the given timestamp (from a distributed table created for this purpose)
|
||||
* and start using it for CDC log writes if it's not obsolete.
|
||||
*
|
||||
* Legacy: used for gossiper-based topology changes.
|
||||
*/
|
||||
future<> legacy_handle_cdc_generation(std::optional<cdc::generation_id>);
|
||||
|
||||
/* If `legacy_handle_cdc_generation` fails, it schedules an asynchronous retry in the background
|
||||
* using `legacy_async_handle_cdc_generation`.
|
||||
*
|
||||
* Legacy: used for gossiper-based topology changes.
|
||||
*/
|
||||
void legacy_async_handle_cdc_generation(cdc::generation_id);
|
||||
|
||||
/* Wrapper around `legacy_do_handle_cdc_generation` which intercepts timeout/unavailability exceptions.
|
||||
* Returns: legacy_do_handle_cdc_generation(ts).
|
||||
*
|
||||
* Legacy: used for gossiper-based topology changes.
|
||||
*/
|
||||
future<bool> legacy_do_handle_cdc_generation_intercept_nonfatal_errors(cdc::generation_id);
|
||||
|
||||
/* Returns `true` iff we started using the generation (it was not obsolete or already known),
|
||||
* which means that this node might write some CDC log entries using streams from this generation.
|
||||
*
|
||||
* Legacy: used for gossiper-based topology changes.
|
||||
*/
|
||||
future<bool> legacy_do_handle_cdc_generation(cdc::generation_id);
|
||||
|
||||
/* Scan CDC generation timestamps gossiped by other nodes and retrieve the latest one.
|
||||
* This function should be called once at the end of the node startup procedure
|
||||
* (after the node is started and running normally, it will retrieve generations on gossip events instead).
|
||||
*
|
||||
* Legacy: used for gossiper-based topology changes.
|
||||
*/
|
||||
future<> legacy_scan_cdc_generations();
|
||||
|
||||
/* generation_service code might be racing with system_distributed_keyspace deinitialization
|
||||
* (the deinitialization order is broken).
|
||||
* Therefore, whenever we want to access sys_dist_ks in a background task,
|
||||
* we need to check if the instance is still there. Storing the shared pointer will keep it alive.
|
||||
*/
|
||||
shared_ptr<db::system_distributed_keyspace> get_sys_dist_ks();
|
||||
|
||||
/* Part of the upgrade procedure. Useful in case where the version of Scylla that we're upgrading from
|
||||
* used the "cdc_streams_descriptions" table. This procedure ensures that the new "cdc_streams_descriptions_v2"
|
||||
* table contains streams of all generations that were present in the old table and may still contain data
|
||||
* (i.e. there exist CDC log tables that may contain rows with partition keys being the stream IDs from
|
||||
* these generations). */
|
||||
future<> maybe_rewrite_streams_descriptions();
|
||||
};
|
||||
|
||||
} // namespace cdc
|
||||
|
||||
@@ -618,7 +618,7 @@ static void set_default_properties_log_table(schema_builder& b, const schema& s,
|
||||
b.set_caching_options(caching_options::get_disabled_caching_options());
|
||||
|
||||
auto rs = generate_replication_strategy(ksm, db.get_token_metadata().get_topology());
|
||||
auto tombstone_gc_ext = seastar::make_shared<tombstone_gc_extension>(get_default_tombstone_gc_mode(*rs, false));
|
||||
auto tombstone_gc_ext = seastar::make_shared<tombstone_gc_extension>(get_default_tombstone_gc_mode(*rs, db.get_token_metadata(), false));
|
||||
b.add_extension(tombstone_gc_extension::NAME, std::move(tombstone_gc_ext));
|
||||
}
|
||||
|
||||
|
||||
219
cdc/split.cc
219
cdc/split.cc
@@ -76,14 +76,14 @@ struct partition_deletion {
|
||||
|
||||
using clustered_column_set = std::map<clustering_key, cdc::one_kind_column_set, clustering_key::less_compare>;
|
||||
|
||||
template <typename Container>
|
||||
template<typename Container>
|
||||
concept EntryContainer = requires(Container& container) {
|
||||
// Parenthesized due to https://bugs.llvm.org/show_bug.cgi?id=45088
|
||||
{ (container.atomic_entries) } -> std::same_as<std::vector<atomic_column_update>&>;
|
||||
{ (container.nonatomic_entries) } -> std::same_as<std::vector<nonatomic_column_update>&>;
|
||||
};
|
||||
|
||||
template <EntryContainer Container>
|
||||
template<EntryContainer Container>
|
||||
static void add_columns_affected_by_entries(cdc::one_kind_column_set& cset, const Container& cont) {
|
||||
for (const auto& entry : cont.atomic_entries) {
|
||||
cset.set(entry.id);
|
||||
@@ -134,7 +134,7 @@ struct batch {
|
||||
ret.emplace(clustering_key::make_empty(), all_columns);
|
||||
}
|
||||
|
||||
auto process_change_type = [&](const auto& changes) {
|
||||
auto process_change_type = [&] (const auto& changes) {
|
||||
for (const auto& change : changes) {
|
||||
auto& cset = ret[change.key];
|
||||
cset.resize(s.regular_columns_count());
|
||||
@@ -211,9 +211,7 @@ private:
|
||||
|
||||
public:
|
||||
extract_collection_visitor(column_id id, std::map<change_key_t, row_update>& updates)
|
||||
: _id(id)
|
||||
, _updates(updates) {
|
||||
}
|
||||
: _id(id), _updates(updates) {}
|
||||
|
||||
void collection_tombstone(const tombstone& t) {
|
||||
auto& entry = get_or_append_entry(t.timestamp + 1, gc_clock::duration(0));
|
||||
@@ -228,9 +226,7 @@ public:
|
||||
cell(key, c);
|
||||
}
|
||||
|
||||
constexpr bool finished() const {
|
||||
return false;
|
||||
}
|
||||
constexpr bool finished() const { return false; }
|
||||
};
|
||||
|
||||
/* Visits all cells and tombstones in a row, putting the encountered changes into buckets
|
||||
@@ -253,46 +249,41 @@ struct extract_row_visitor {
|
||||
|
||||
void collection_column(const column_definition& cdef, auto&& visit_collection) {
|
||||
visit(*cdef.type, make_visitor(
|
||||
[&](const collection_type_impl& ctype) {
|
||||
struct collection_visitor : public extract_collection_visitor<collection_visitor> {
|
||||
data_type _value_type;
|
||||
[&] (const collection_type_impl& ctype) {
|
||||
struct collection_visitor : public extract_collection_visitor<collection_visitor> {
|
||||
data_type _value_type;
|
||||
|
||||
collection_visitor(column_id id, std::map<change_key_t, row_update>& updates, const collection_type_impl& ctype)
|
||||
: extract_collection_visitor<collection_visitor>(id, updates)
|
||||
, _value_type(ctype.value_comparator()) {
|
||||
}
|
||||
collection_visitor(column_id id, std::map<change_key_t, row_update>& updates, const collection_type_impl& ctype)
|
||||
: extract_collection_visitor<collection_visitor>(id, updates), _value_type(ctype.value_comparator()) {}
|
||||
|
||||
data_type get_value_type(bytes_view) {
|
||||
return _value_type;
|
||||
}
|
||||
} v(cdef.id, _updates, ctype);
|
||||
data_type get_value_type(bytes_view) {
|
||||
return _value_type;
|
||||
}
|
||||
} v(cdef.id, _updates, ctype);
|
||||
|
||||
visit_collection(v);
|
||||
},
|
||||
[&](const user_type_impl& utype) {
|
||||
struct udt_visitor : public extract_collection_visitor<udt_visitor> {
|
||||
const user_type_impl& _utype;
|
||||
visit_collection(v);
|
||||
},
|
||||
[&] (const user_type_impl& utype) {
|
||||
struct udt_visitor : public extract_collection_visitor<udt_visitor> {
|
||||
const user_type_impl& _utype;
|
||||
|
||||
udt_visitor(column_id id, std::map<change_key_t, row_update>& updates, const user_type_impl& utype)
|
||||
: extract_collection_visitor<udt_visitor>(id, updates)
|
||||
, _utype(utype) {
|
||||
}
|
||||
udt_visitor(column_id id, std::map<change_key_t, row_update>& updates, const user_type_impl& utype)
|
||||
: extract_collection_visitor<udt_visitor>(id, updates), _utype(utype) {}
|
||||
|
||||
data_type get_value_type(bytes_view key) {
|
||||
return _utype.type(deserialize_field_index(key));
|
||||
}
|
||||
} v(cdef.id, _updates, utype);
|
||||
data_type get_value_type(bytes_view key) {
|
||||
return _utype.type(deserialize_field_index(key));
|
||||
}
|
||||
} v(cdef.id, _updates, utype);
|
||||
|
||||
visit_collection(v);
|
||||
},
|
||||
[&](const abstract_type& o) {
|
||||
throw std::runtime_error(format("extract_changes: unknown collection type:", o.name()));
|
||||
}));
|
||||
visit_collection(v);
|
||||
},
|
||||
[&] (const abstract_type& o) {
|
||||
throw std::runtime_error(format("extract_changes: unknown collection type:", o.name()));
|
||||
}
|
||||
));
|
||||
}
|
||||
|
||||
constexpr bool finished() const {
|
||||
return false;
|
||||
}
|
||||
constexpr bool finished() const { return false; }
|
||||
};
|
||||
|
||||
struct extract_changes_visitor {
|
||||
@@ -302,8 +293,12 @@ struct extract_changes_visitor {
|
||||
extract_row_visitor v;
|
||||
visit_row_cells(v);
|
||||
|
||||
for (auto& [ts_ttl, row_update] : v._updates) {
|
||||
_result[ts_ttl.first].static_updates.push_back({ts_ttl.second, std::move(row_update.atomic_entries), std::move(row_update.nonatomic_entries)});
|
||||
for (auto& [ts_ttl, row_update]: v._updates) {
|
||||
_result[ts_ttl.first].static_updates.push_back({
|
||||
ts_ttl.second,
|
||||
std::move(row_update.atomic_entries),
|
||||
std::move(row_update.nonatomic_entries)
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
@@ -324,18 +319,24 @@ struct extract_changes_visitor {
|
||||
} v;
|
||||
visit_row_cells(v);
|
||||
|
||||
for (auto& [ts_ttl, row_update] : v._updates) {
|
||||
for (auto& [ts_ttl, row_update]: v._updates) {
|
||||
// It is important that changes in the resulting `set_of_changes` are listed
|
||||
// in increasing TTL order. The reason is explained in a comment in cdc/log.cc,
|
||||
// search for "#6070".
|
||||
auto [ts, ttl] = ts_ttl;
|
||||
|
||||
if (v._marker && ts == v._marker_ts && ttl == v._marker_ttl) {
|
||||
_result[ts].clustered_inserts.push_back({ttl, ckey, *v._marker, std::move(row_update.atomic_entries), {}});
|
||||
_result[ts].clustered_inserts.push_back({
|
||||
ttl,
|
||||
ckey,
|
||||
*v._marker,
|
||||
std::move(row_update.atomic_entries),
|
||||
{}
|
||||
});
|
||||
|
||||
auto& cr_insert = _result[ts].clustered_inserts.back();
|
||||
bool clustered_update_exists = false;
|
||||
for (auto& nonatomic_up : row_update.nonatomic_entries) {
|
||||
for (auto& nonatomic_up: row_update.nonatomic_entries) {
|
||||
// Updating a collection column with an INSERT statement implies inserting a tombstone.
|
||||
//
|
||||
// For example, suppose that we have:
|
||||
@@ -361,7 +362,12 @@ struct extract_changes_visitor {
|
||||
cr_insert.nonatomic_entries.push_back(std::move(nonatomic_up));
|
||||
} else {
|
||||
if (!clustered_update_exists) {
|
||||
_result[ts].clustered_updates.push_back({ttl, ckey, {}, {}});
|
||||
_result[ts].clustered_updates.push_back({
|
||||
ttl,
|
||||
ckey,
|
||||
{},
|
||||
{}
|
||||
});
|
||||
|
||||
// Multiple iterations of this `for` loop (for different collection columns)
|
||||
// might want to put their `nonatomic_up`s into an UPDATE change;
|
||||
@@ -384,7 +390,12 @@ struct extract_changes_visitor {
|
||||
}
|
||||
}
|
||||
} else {
|
||||
_result[ts].clustered_updates.push_back({ttl, ckey, std::move(row_update.atomic_entries), std::move(row_update.nonatomic_entries)});
|
||||
_result[ts].clustered_updates.push_back({
|
||||
ttl,
|
||||
ckey,
|
||||
std::move(row_update.atomic_entries),
|
||||
std::move(row_update.nonatomic_entries)
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -401,9 +412,7 @@ struct extract_changes_visitor {
|
||||
_result[t.timestamp].partition_deletions = partition_deletion{t};
|
||||
}
|
||||
|
||||
constexpr bool finished() const {
|
||||
return false;
|
||||
}
|
||||
constexpr bool finished() const { return false; }
|
||||
};
|
||||
|
||||
set_of_changes extract_changes(const mutation& m) {
|
||||
@@ -417,23 +426,13 @@ namespace cdc {
|
||||
struct find_timestamp_visitor {
|
||||
api::timestamp_type _ts = api::missing_timestamp;
|
||||
|
||||
bool finished() const {
|
||||
return _ts != api::missing_timestamp;
|
||||
}
|
||||
bool finished() const { return _ts != api::missing_timestamp; }
|
||||
|
||||
void visit(api::timestamp_type ts) {
|
||||
_ts = ts;
|
||||
}
|
||||
void visit(const atomic_cell_view& cell) {
|
||||
visit(cell.timestamp());
|
||||
}
|
||||
void visit(api::timestamp_type ts) { _ts = ts; }
|
||||
void visit(const atomic_cell_view& cell) { visit(cell.timestamp()); }
|
||||
|
||||
void live_atomic_cell(const column_definition&, const atomic_cell_view& cell) {
|
||||
visit(cell);
|
||||
}
|
||||
void dead_atomic_cell(const column_definition&, const atomic_cell_view& cell) {
|
||||
visit(cell);
|
||||
}
|
||||
void live_atomic_cell(const column_definition&, const atomic_cell_view& cell) { visit(cell); }
|
||||
void dead_atomic_cell(const column_definition&, const atomic_cell_view& cell) { visit(cell); }
|
||||
void collection_tombstone(const tombstone& t) {
|
||||
// A collection tombstone with timestamp T can be created with:
|
||||
// UPDATE ks.t USING TIMESTAMP T + 1 SET X = null WHERE ...
|
||||
@@ -442,33 +441,15 @@ struct find_timestamp_visitor {
|
||||
// with cdc$time using timestamp T + 1 instead of T.
|
||||
visit(t.timestamp + 1);
|
||||
}
|
||||
void live_collection_cell(bytes_view, const atomic_cell_view& cell) {
|
||||
visit(cell);
|
||||
}
|
||||
void dead_collection_cell(bytes_view, const atomic_cell_view& cell) {
|
||||
visit(cell);
|
||||
}
|
||||
void collection_column(const column_definition&, auto&& visit_collection) {
|
||||
visit_collection(*this);
|
||||
}
|
||||
void marker(const row_marker& rm) {
|
||||
visit(rm.timestamp());
|
||||
}
|
||||
void static_row_cells(auto&& visit_row_cells) {
|
||||
visit_row_cells(*this);
|
||||
}
|
||||
void clustered_row_cells(const clustering_key&, auto&& visit_row_cells) {
|
||||
visit_row_cells(*this);
|
||||
}
|
||||
void clustered_row_delete(const clustering_key&, const tombstone& t) {
|
||||
visit(t.timestamp);
|
||||
}
|
||||
void range_delete(const range_tombstone& t) {
|
||||
visit(t.tomb.timestamp);
|
||||
}
|
||||
void partition_delete(const tombstone& t) {
|
||||
visit(t.timestamp);
|
||||
}
|
||||
void live_collection_cell(bytes_view, const atomic_cell_view& cell) { visit(cell); }
|
||||
void dead_collection_cell(bytes_view, const atomic_cell_view& cell) { visit(cell); }
|
||||
void collection_column(const column_definition&, auto&& visit_collection) { visit_collection(*this); }
|
||||
void marker(const row_marker& rm) { visit(rm.timestamp()); }
|
||||
void static_row_cells(auto&& visit_row_cells) { visit_row_cells(*this); }
|
||||
void clustered_row_cells(const clustering_key&, auto&& visit_row_cells) { visit_row_cells(*this); }
|
||||
void clustered_row_delete(const clustering_key&, const tombstone& t) { visit(t.timestamp); }
|
||||
void range_delete(const range_tombstone& t) { visit(t.tomb.timestamp); }
|
||||
void partition_delete(const tombstone& t) { visit(t.timestamp); }
|
||||
};
|
||||
|
||||
/* Find some timestamp inside the given mutation.
|
||||
@@ -524,12 +505,8 @@ struct should_split_visitor {
|
||||
|
||||
virtual ~should_split_visitor() = default;
|
||||
|
||||
inline bool finished() const {
|
||||
return _result;
|
||||
}
|
||||
inline void stop() {
|
||||
_result = true;
|
||||
}
|
||||
inline bool finished() const { return _result; }
|
||||
inline void stop() { _result = true; }
|
||||
|
||||
void visit(api::timestamp_type ts, gc_clock::duration ttl = gc_clock::duration(0)) {
|
||||
if (_ts != api::missing_timestamp && _ts != ts) {
|
||||
@@ -540,23 +517,15 @@ struct should_split_visitor {
|
||||
if (_ttl && *_ttl != ttl) {
|
||||
return stop();
|
||||
}
|
||||
_ttl = {ttl};
|
||||
_ttl = { ttl };
|
||||
}
|
||||
|
||||
void visit(const atomic_cell_view& cell) {
|
||||
visit(cell.timestamp(), get_ttl(cell));
|
||||
}
|
||||
void visit(const atomic_cell_view& cell) { visit(cell.timestamp(), get_ttl(cell)); }
|
||||
|
||||
void live_atomic_cell(const column_definition&, const atomic_cell_view& cell) {
|
||||
visit(cell);
|
||||
}
|
||||
void dead_atomic_cell(const column_definition&, const atomic_cell_view& cell) {
|
||||
visit(cell);
|
||||
}
|
||||
void live_atomic_cell(const column_definition&, const atomic_cell_view& cell) { visit(cell); }
|
||||
void dead_atomic_cell(const column_definition&, const atomic_cell_view& cell) { visit(cell); }
|
||||
|
||||
void collection_tombstone(const tombstone& t) {
|
||||
visit(t.timestamp + 1);
|
||||
}
|
||||
void collection_tombstone(const tombstone& t) { visit(t.timestamp + 1); }
|
||||
|
||||
virtual void live_collection_cell(bytes_view, const atomic_cell_view& cell) {
|
||||
if (_had_row_marker) {
|
||||
@@ -565,12 +534,8 @@ struct should_split_visitor {
|
||||
}
|
||||
visit(cell);
|
||||
}
|
||||
void dead_collection_cell(bytes_view, const atomic_cell_view& cell) {
|
||||
visit(cell);
|
||||
}
|
||||
void collection_column(const column_definition&, auto&& visit_collection) {
|
||||
visit_collection(*this);
|
||||
}
|
||||
void dead_collection_cell(bytes_view, const atomic_cell_view& cell) { visit(cell); }
|
||||
void collection_column(const column_definition&, auto&& visit_collection) { visit_collection(*this); }
|
||||
|
||||
virtual void marker(const row_marker& rm) {
|
||||
_had_row_marker = true;
|
||||
@@ -641,8 +606,8 @@ bool should_split(const mutation& m, const per_request_options& options) {
|
||||
cdc::inspect_mutation(m, v);
|
||||
|
||||
return v._result
|
||||
// A mutation with no timestamp will be split into 0 mutations:
|
||||
|| v._ts == api::missing_timestamp;
|
||||
// A mutation with no timestamp will be split into 0 mutations:
|
||||
|| v._ts == api::missing_timestamp;
|
||||
}
|
||||
|
||||
// Returns true if the row state and the atomic and nonatomic entries represent
|
||||
@@ -677,7 +642,7 @@ static bool entries_match_row_state(const schema_ptr& base_schema, const cell_ma
|
||||
if (current_values.size() != update.cells.size()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
std::unordered_map<sstring_view, bytes> current_values_map;
|
||||
for (const auto& entry : current_values) {
|
||||
const auto attr_name = std::string_view(value_cast<sstring>(entry.first));
|
||||
@@ -746,8 +711,8 @@ bool should_skip(batch& changes, const mutation& base_mutation, change_processor
|
||||
return true;
|
||||
}
|
||||
|
||||
void process_changes_with_splitting(
|
||||
const mutation& base_mutation, change_processor& processor, bool enable_preimage, bool enable_postimage, bool alternator_strict_compatibility) {
|
||||
void process_changes_with_splitting(const mutation& base_mutation, change_processor& processor,
|
||||
bool enable_preimage, bool enable_postimage, bool alternator_strict_compatibility) {
|
||||
const auto base_schema = base_mutation.schema();
|
||||
auto changes = extract_changes(base_mutation);
|
||||
auto pk = base_mutation.key();
|
||||
@@ -859,8 +824,8 @@ void process_changes_with_splitting(
|
||||
}
|
||||
}
|
||||
|
||||
void process_changes_without_splitting(
|
||||
const mutation& base_mutation, change_processor& processor, bool enable_preimage, bool enable_postimage, bool alternator_strict_compatibility) {
|
||||
void process_changes_without_splitting(const mutation& base_mutation, change_processor& processor,
|
||||
bool enable_preimage, bool enable_postimage, bool alternator_strict_compatibility) {
|
||||
if (alternator_strict_compatibility) {
|
||||
auto changes = extract_changes(base_mutation);
|
||||
if (should_skip(changes.begin()->second, base_mutation, processor)) {
|
||||
@@ -877,7 +842,7 @@ void process_changes_without_splitting(
|
||||
|
||||
one_kind_column_set columns{base_schema->static_columns_count()};
|
||||
if (!p.static_row().empty()) {
|
||||
p.static_row().get().for_each_cell([&](column_id id, const atomic_cell_or_collection& cell) {
|
||||
p.static_row().get().for_each_cell([&] (column_id id, const atomic_cell_or_collection& cell) {
|
||||
columns.set(id);
|
||||
});
|
||||
processor.produce_preimage(nullptr, columns);
|
||||
@@ -890,7 +855,7 @@ void process_changes_without_splitting(
|
||||
// Row deleted - include all columns in preimage
|
||||
columns.set(0, base_schema->regular_columns_count(), true);
|
||||
} else {
|
||||
cr.row().cells().for_each_cell([&](column_id id, const atomic_cell_or_collection& cell) {
|
||||
cr.row().cells().for_each_cell([&] (column_id id, const atomic_cell_or_collection& cell) {
|
||||
columns.set(id);
|
||||
});
|
||||
}
|
||||
|
||||
@@ -48,7 +48,6 @@
|
||||
#include "mutation/mutation_fragment_stream_validator.hh"
|
||||
#include "utils/assert.hh"
|
||||
#include "utils/error_injection.hh"
|
||||
#include "utils/chunked_vector.hh"
|
||||
#include "utils/pretty_printers.hh"
|
||||
#include "readers/multi_range.hh"
|
||||
#include "readers/compacting.hh"
|
||||
@@ -162,7 +161,6 @@ std::string_view to_string(compaction_type type) {
|
||||
case compaction_type::Reshape: return "Reshape";
|
||||
case compaction_type::Split: return "Split";
|
||||
case compaction_type::Major: return "Major";
|
||||
case compaction_type::RewriteComponent: return "RewriteComponent";
|
||||
}
|
||||
on_internal_error_noexcept(clogger, format("Invalid compaction type {}", int(type)));
|
||||
return "(invalid)";
|
||||
@@ -600,7 +598,8 @@ protected:
|
||||
// Garbage collected sstables that were added to SSTable set and should be eventually removed from it.
|
||||
std::vector<sstables::shared_sstable> _used_garbage_collected_sstables;
|
||||
utils::observable<> _stop_request_observable;
|
||||
tombstone_gc_state _tombstone_gc_state;
|
||||
// optional tombstone_gc_state that is used when gc has to check only the compacting sstables to collect tombstones.
|
||||
std::optional<tombstone_gc_state> _tombstone_gc_state_with_commitlog_check_disabled;
|
||||
int64_t _output_repaired_at = 0;
|
||||
private:
|
||||
// Keeps track of monitors for input sstable.
|
||||
@@ -612,23 +611,23 @@ private:
|
||||
}
|
||||
|
||||
// Called in a seastar thread
|
||||
utils::chunked_vector<dht::partition_range>
|
||||
dht::partition_range_vector
|
||||
get_ranges_for_invalidation(const std::vector<sstables::shared_sstable>& sstables) {
|
||||
// If owned ranges is disengaged, it means no cleanup work was done and
|
||||
// so nothing needs to be invalidated.
|
||||
if (!_owned_ranges) {
|
||||
return {};
|
||||
return dht::partition_range_vector{};
|
||||
}
|
||||
auto owned_ranges = dht::to_partition_ranges_chunked(*_owned_ranges).get();
|
||||
auto owned_ranges = dht::to_partition_ranges(*_owned_ranges, utils::can_yield::yes);
|
||||
|
||||
auto non_owned_ranges = sstables
|
||||
| std::views::transform([] (const sstables::shared_sstable& sst) {
|
||||
seastar::thread::maybe_yield();
|
||||
return dht::partition_range::make({sst->get_first_decorated_key(), true},
|
||||
{sst->get_last_decorated_key(), true});
|
||||
}) | std::ranges::to<utils::chunked_vector<dht::partition_range>>();
|
||||
}) | std::ranges::to<dht::partition_range_vector>();
|
||||
|
||||
return dht::subtract_ranges(*_schema, std::move(non_owned_ranges), std::move(owned_ranges)).get();
|
||||
return dht::subtract_ranges(*_schema, non_owned_ranges, std::move(owned_ranges)).get();
|
||||
}
|
||||
protected:
|
||||
compaction(compaction_group_view& table_s, compaction_descriptor descriptor, compaction_data& cdata, compaction_progress_monitor& progress_monitor, use_backlog_tracker use_backlog_tracker)
|
||||
@@ -650,12 +649,9 @@ protected:
|
||||
, _owned_ranges(std::move(descriptor.owned_ranges))
|
||||
, _sharder(descriptor.sharder)
|
||||
, _owned_ranges_checker(_owned_ranges ? std::optional<dht::incremental_owned_ranges_checker>(*_owned_ranges) : std::nullopt)
|
||||
, _tombstone_gc_state(_table_s.get_tombstone_gc_state())
|
||||
, _tombstone_gc_state_with_commitlog_check_disabled(descriptor.gc_check_only_compacting_sstables ? std::make_optional(_table_s.get_tombstone_gc_state().with_commitlog_check_disabled()) : std::nullopt)
|
||||
, _progress_monitor(progress_monitor)
|
||||
{
|
||||
if (descriptor.gc_check_only_compacting_sstables) {
|
||||
_tombstone_gc_state = _tombstone_gc_state.with_commitlog_check_disabled();
|
||||
}
|
||||
std::unordered_set<sstables::run_id> ssts_run_ids;
|
||||
_contains_multi_fragment_runs = std::any_of(_sstables.begin(), _sstables.end(), [&ssts_run_ids] (sstables::shared_sstable& sst) {
|
||||
return !ssts_run_ids.insert(sst->run_identifier()).second;
|
||||
@@ -722,8 +718,8 @@ protected:
|
||||
|
||||
compaction_completion_desc
|
||||
get_compaction_completion_desc(std::vector<sstables::shared_sstable> input_sstables, std::vector<sstables::shared_sstable> output_sstables) {
|
||||
auto ranges = get_ranges_for_invalidation(input_sstables);
|
||||
return compaction_completion_desc{std::move(input_sstables), std::move(output_sstables), std::move(ranges)};
|
||||
auto ranges_for_for_invalidation = get_ranges_for_invalidation(input_sstables);
|
||||
return compaction_completion_desc{std::move(input_sstables), std::move(output_sstables), std::move(ranges_for_for_invalidation)};
|
||||
}
|
||||
|
||||
// Tombstone expiration is enabled based on the presence of sstable set.
|
||||
@@ -853,8 +849,8 @@ private:
|
||||
return _table_s.get_compaction_strategy().make_sstable_set(_table_s);
|
||||
}
|
||||
|
||||
tombstone_gc_state get_tombstone_gc_state() const {
|
||||
return _tombstone_gc_state;
|
||||
const tombstone_gc_state& get_tombstone_gc_state() const {
|
||||
return _tombstone_gc_state_with_commitlog_check_disabled ? _tombstone_gc_state_with_commitlog_check_disabled.value() : _table_s.get_tombstone_gc_state();
|
||||
}
|
||||
|
||||
future<> setup() {
|
||||
@@ -1054,7 +1050,7 @@ private:
|
||||
return can_never_purge;
|
||||
}
|
||||
return [this] (const dht::decorated_key& dk, is_shadowable is_shadowable) {
|
||||
return get_max_purgeable_timestamp(_table_s, *_selector, _compacting_for_max_purgeable_func, dk, _bloom_filter_checks, _compacting_max_timestamp, !_tombstone_gc_state.is_commitlog_check_enabled(), is_shadowable);
|
||||
return get_max_purgeable_timestamp(_table_s, *_selector, _compacting_for_max_purgeable_func, dk, _bloom_filter_checks, _compacting_max_timestamp, _tombstone_gc_state_with_commitlog_check_disabled.has_value(), is_shadowable);
|
||||
};
|
||||
}
|
||||
|
||||
@@ -2052,7 +2048,6 @@ compaction_type compaction_type_options::type() const {
|
||||
compaction_type::Reshape,
|
||||
compaction_type::Split,
|
||||
compaction_type::Major,
|
||||
compaction_type::RewriteComponent,
|
||||
};
|
||||
static_assert(std::variant_size_v<compaction_type_options::options_variant> == std::size(index_to_type));
|
||||
return index_to_type[_options.index()];
|
||||
@@ -2089,9 +2084,6 @@ static std::unique_ptr<compaction> make_compaction(compaction_group_view& table_
|
||||
std::unique_ptr<compaction> operator()(compaction_type_options::split split_options) {
|
||||
return std::make_unique<split_compaction>(table_s, std::move(descriptor), cdata, std::move(split_options), progress_monitor);
|
||||
}
|
||||
std::unique_ptr<compaction> operator()(compaction_type_options::component_rewrite) {
|
||||
throw std::runtime_error("component_rewrite compaction should be handled separately");
|
||||
}
|
||||
} visitor_factory{table_s, std::move(descriptor), cdata, progress_monitor};
|
||||
|
||||
return descriptor.options.visit(visitor_factory);
|
||||
@@ -2109,7 +2101,7 @@ static future<compaction_result> scrub_sstables_validate_mode(compaction_descrip
|
||||
|
||||
validation_errors += co_await sst->validate(permit, cdata.abort, [&schema] (sstring what) {
|
||||
scrub_compaction::report_validation_error(compaction_type::Scrub, *schema, what);
|
||||
}, monitor_generator(sst), true);
|
||||
}, monitor_generator(sst));
|
||||
// Did validation actually finish because aborted?
|
||||
if (cdata.is_stop_requested()) {
|
||||
// Compaction manager will catch this exception and re-schedule the compaction.
|
||||
@@ -2146,34 +2138,6 @@ future<compaction_result> scrub_sstables_validate_mode(compaction_descriptor des
|
||||
co_return res;
|
||||
}
|
||||
|
||||
future<compaction_result> rewrite_sstables_component(compaction_descriptor descriptor, compaction_group_view& table_s) {
|
||||
return seastar::async([descriptor = std::move(descriptor), &table_s] () mutable {
|
||||
compaction_result result {
|
||||
.stats = {
|
||||
.started_at = db_clock::now(),
|
||||
},
|
||||
};
|
||||
|
||||
const auto& options = descriptor.options.as<compaction_type_options::component_rewrite>();
|
||||
bool update_id = static_cast<bool>(options.update_id);
|
||||
// When rewriting a component, we cannot use the standard descriptor creator
|
||||
// because we must preserve the sstable version.
|
||||
auto creator = [&table_s] (sstables::shared_sstable sst) {
|
||||
return table_s.make_sstable(sst->state(), sst->get_version());
|
||||
};
|
||||
result.new_sstables.reserve(descriptor.sstables.size());
|
||||
for (auto& sst : descriptor.sstables) {
|
||||
auto rewritten = sst->link_with_rewritten_component(creator, options.component_to_rewrite, options.modifier, update_id).get();
|
||||
result.new_sstables.push_back(rewritten);
|
||||
}
|
||||
|
||||
descriptor.replacer({std::move(descriptor.sstables), result.new_sstables});
|
||||
|
||||
result.stats.ended_at = db_clock::now();
|
||||
return result;
|
||||
});
|
||||
}
|
||||
|
||||
future<compaction_result>
|
||||
compact_sstables(compaction_descriptor descriptor, compaction_data& cdata, compaction_group_view& table_s, compaction_progress_monitor& progress_monitor) {
|
||||
if (descriptor.sstables.empty()) {
|
||||
@@ -2185,9 +2149,6 @@ compact_sstables(compaction_descriptor descriptor, compaction_data& cdata, compa
|
||||
// Bypass the usual compaction machinery for dry-mode scrub
|
||||
return scrub_sstables_validate_mode(std::move(descriptor), cdata, table_s, progress_monitor);
|
||||
}
|
||||
if (descriptor.options.type() == compaction_type::RewriteComponent) {
|
||||
return rewrite_sstables_component(std::move(descriptor), table_s);
|
||||
}
|
||||
return compaction::run(make_compaction(table_s, std::move(descriptor), cdata, progress_monitor));
|
||||
}
|
||||
|
||||
|
||||
@@ -12,12 +12,10 @@
|
||||
#include <functional>
|
||||
#include <optional>
|
||||
#include <variant>
|
||||
#include "sstables/component_type.hh"
|
||||
#include "sstables/types_fwd.hh"
|
||||
#include "sstables/sstable_set.hh"
|
||||
#include "compaction_fwd.hh"
|
||||
#include "mutation_writer/token_group_based_splitting_writer.hh"
|
||||
#include "utils/chunked_vector.hh"
|
||||
|
||||
namespace compaction {
|
||||
|
||||
@@ -32,7 +30,6 @@ enum class compaction_type {
|
||||
Reshape = 7,
|
||||
Split = 8,
|
||||
Major = 9,
|
||||
RewriteComponent = 10,
|
||||
};
|
||||
|
||||
struct compaction_completion_desc {
|
||||
@@ -41,7 +38,7 @@ struct compaction_completion_desc {
|
||||
// New, fresh SSTables that should be added to SSTable set, replacing the old ones.
|
||||
std::vector<sstables::shared_sstable> new_sstables;
|
||||
// Set of compacted partition ranges that should be invalidated in the cache.
|
||||
utils::chunked_vector<dht::partition_range> ranges_for_cache_invalidation;
|
||||
dht::partition_range_vector ranges_for_cache_invalidation;
|
||||
};
|
||||
|
||||
// creates a new SSTable for a given shard
|
||||
@@ -93,15 +90,8 @@ public:
|
||||
struct split {
|
||||
mutation_writer::classify_by_token_group classifier;
|
||||
};
|
||||
struct component_rewrite {
|
||||
sstables::component_type component_to_rewrite;
|
||||
std::function<void(sstables::sstable&)> modifier;
|
||||
|
||||
using update_sstable_id = bool_class<class update_sstable_id_tag>;
|
||||
update_sstable_id update_id = update_sstable_id::yes;
|
||||
};
|
||||
private:
|
||||
using options_variant = std::variant<regular, cleanup, upgrade, scrub, reshard, reshape, split, major, component_rewrite>;
|
||||
using options_variant = std::variant<regular, cleanup, upgrade, scrub, reshard, reshape, split, major>;
|
||||
|
||||
private:
|
||||
options_variant _options;
|
||||
@@ -139,10 +129,6 @@ public:
|
||||
return compaction_type_options(scrub{.operation_mode = mode, .quarantine_sstables = quarantine_sstables, .drop_unfixable = drop_unfixable_sstables});
|
||||
}
|
||||
|
||||
static compaction_type_options make_component_rewrite(component_type component, std::function<void(sstables::sstable&)> modifier, component_rewrite::update_sstable_id update_id = component_rewrite::update_sstable_id::yes) {
|
||||
return compaction_type_options(component_rewrite{.component_to_rewrite = component, .modifier = std::move(modifier), .update_id = update_id});
|
||||
}
|
||||
|
||||
static compaction_type_options make_split(mutation_writer::classify_by_token_group classifier) {
|
||||
return compaction_type_options(split{std::move(classifier)});
|
||||
}
|
||||
|
||||
@@ -46,7 +46,6 @@ public:
|
||||
virtual reader_permit make_compaction_reader_permit() const = 0;
|
||||
virtual sstables::sstables_manager& get_sstables_manager() noexcept = 0;
|
||||
virtual sstables::shared_sstable make_sstable(sstables::sstable_state) const = 0;
|
||||
virtual sstables::shared_sstable make_sstable(sstables::sstable_state, sstables::sstable_version_types) const = 0;
|
||||
virtual sstables::sstable_writer_config configure_writer(sstring origin) const = 0;
|
||||
virtual api::timestamp_type min_memtable_timestamp() const = 0;
|
||||
virtual api::timestamp_type min_memtable_live_timestamp() const = 0;
|
||||
@@ -55,7 +54,7 @@ public:
|
||||
virtual future<> on_compaction_completion(compaction_completion_desc desc, sstables::offstrategy offstrategy) = 0;
|
||||
virtual bool is_auto_compaction_disabled_by_user() const noexcept = 0;
|
||||
virtual bool tombstone_gc_enabled() const noexcept = 0;
|
||||
virtual tombstone_gc_state get_tombstone_gc_state() const noexcept = 0;
|
||||
virtual const tombstone_gc_state& get_tombstone_gc_state() const noexcept = 0;
|
||||
virtual compaction_backlog_tracker& get_backlog_tracker() = 0;
|
||||
virtual const std::string get_group_id() const noexcept = 0;
|
||||
virtual seastar::condition_variable& get_staging_done_condition() noexcept = 0;
|
||||
|
||||
@@ -778,7 +778,6 @@ compaction_manager::get_incremental_repair_read_lock(compaction::compaction_grou
|
||||
cmlog.debug("Get get_incremental_repair_read_lock for {} started", reason);
|
||||
}
|
||||
compaction::compaction_state& cs = get_compaction_state(&t);
|
||||
auto gh = cs.gate.hold();
|
||||
auto ret = co_await cs.incremental_repair_lock.hold_read_lock();
|
||||
if (!reason.empty()) {
|
||||
cmlog.debug("Get get_incremental_repair_read_lock for {} done", reason);
|
||||
@@ -792,7 +791,6 @@ compaction_manager::get_incremental_repair_write_lock(compaction::compaction_gro
|
||||
cmlog.debug("Get get_incremental_repair_write_lock for {} started", reason);
|
||||
}
|
||||
compaction::compaction_state& cs = get_compaction_state(&t);
|
||||
auto gh = cs.gate.hold();
|
||||
auto ret = co_await cs.incremental_repair_lock.hold_write_lock();
|
||||
if (!reason.empty()) {
|
||||
cmlog.debug("Get get_incremental_repair_write_lock for {} done", reason);
|
||||
@@ -946,7 +944,7 @@ sstables::shared_sstable sstables_task_executor::consume_sstable() {
|
||||
auto sst = _sstables.back();
|
||||
_sstables.pop_back();
|
||||
--_cm._stats.pending_tasks; // from this point on, switch_state(pending|active) works the same way as any other task
|
||||
cmlog.debug("consumed {}", sst->get_filename());
|
||||
cmlog.debug("{}", format("consumed {}", sst->get_filename()));
|
||||
return sst;
|
||||
}
|
||||
|
||||
@@ -1042,7 +1040,7 @@ compaction_manager::compaction_manager(config cfg, abort_source& as, tasks::task
|
||||
_compaction_controller.set_max_shares(max_shares);
|
||||
}))
|
||||
, _strategy_control(std::make_unique<strategy_control>(*this))
|
||||
{
|
||||
, _tombstone_gc_state(_shared_tombstone_gc_state) {
|
||||
tm.register_module(_task_manager_module->get_name(), _task_manager_module);
|
||||
register_metrics();
|
||||
// Bandwidth throttling is node-wide, updater is needed on single shard
|
||||
@@ -1066,7 +1064,7 @@ compaction_manager::compaction_manager(tasks::task_manager& tm)
|
||||
, _compaction_static_shares_observer(_cfg.static_shares.observe(_update_compaction_static_shares_action.make_observer()))
|
||||
, _compaction_max_shares_observer(_cfg.max_shares.observe([] (const float& max_shares) {}))
|
||||
, _strategy_control(std::make_unique<strategy_control>(*this))
|
||||
{
|
||||
, _tombstone_gc_state(_shared_tombstone_gc_state) {
|
||||
tm.register_module(_task_manager_module->get_name(), _task_manager_module);
|
||||
// No metric registration because this constructor is supposed to be used only by the testing
|
||||
// infrastructure.
|
||||
@@ -1208,6 +1206,7 @@ future<> compaction_manager::await_tasks(std::vector<shared_ptr<compaction_task_
|
||||
|
||||
std::vector<shared_ptr<compaction_task_executor>>
|
||||
compaction_manager::do_stop_ongoing_compactions(sstring reason, std::function<bool(const compaction_group_view*)> filter, std::optional<compaction_type> type_opt) noexcept {
|
||||
auto ongoing_compactions = get_compactions(filter).size();
|
||||
auto tasks = _tasks
|
||||
| std::views::filter([&filter, type_opt] (const auto& task) {
|
||||
return filter(task.compacting_table()) && (!type_opt || task.compaction_type() == *type_opt);
|
||||
@@ -1216,7 +1215,6 @@ compaction_manager::do_stop_ongoing_compactions(sstring reason, std::function<bo
|
||||
| std::ranges::to<std::vector<shared_ptr<compaction_task_executor>>>();
|
||||
logging::log_level level = tasks.empty() ? log_level::debug : log_level::info;
|
||||
if (cmlog.is_enabled(level)) {
|
||||
auto ongoing_compactions = get_compactions(filter).size();
|
||||
std::string scope = "";
|
||||
if (!tasks.empty()) {
|
||||
const compaction_group_view* t = tasks.front()->compacting_table();
|
||||
@@ -1268,15 +1266,9 @@ future<> compaction_manager::start(const db::config& cfg, utils::disk_space_moni
|
||||
if (dsm && (this_shard_id() == 0)) {
|
||||
_out_of_space_subscription = dsm->subscribe(cfg.critical_disk_utilization_level, [this] (auto threshold_reached) {
|
||||
if (threshold_reached) {
|
||||
return container().invoke_on_all([] (compaction_manager& cm) {
|
||||
cm._in_critical_disk_utilization_mode = true;
|
||||
return cm.drain();
|
||||
});
|
||||
return container().invoke_on_all([] (compaction_manager& cm) { return cm.drain(); });
|
||||
}
|
||||
return container().invoke_on_all([] (compaction_manager& cm) {
|
||||
cm._in_critical_disk_utilization_mode = false;
|
||||
cm.enable();
|
||||
});
|
||||
return container().invoke_on_all([] (compaction_manager& cm) { cm.enable(); });
|
||||
});
|
||||
}
|
||||
|
||||
@@ -1426,17 +1418,11 @@ protected:
|
||||
compaction_strategy cs = t.get_compaction_strategy();
|
||||
compaction_descriptor descriptor = co_await cs.get_sstables_for_compaction(t, _cm.get_strategy_control());
|
||||
int weight = calculate_weight(descriptor);
|
||||
bool debug_enabled = cmlog.is_enabled(log_level::debug);
|
||||
if (debug_enabled) {
|
||||
cmlog.debug("Started minor compaction sstables={} sstables_reapired_at={} range={} uuid={} compaction_uuid={}",
|
||||
descriptor.sstables, compacting_table()->get_sstables_repaired_at(),
|
||||
compacting_table()->token_range(), uuid, _compaction_data.compaction_uuid);
|
||||
}
|
||||
cmlog.debug("Started minor compaction sstables={} sstables_reapired_at={} range={} uuid={} compaction_uuid={}",
|
||||
descriptor.sstables, compacting_table()->get_sstables_repaired_at(),
|
||||
compacting_table()->token_range(), uuid, _compaction_data.compaction_uuid);
|
||||
|
||||
sstring old_sstables;
|
||||
if (debug_enabled) {
|
||||
old_sstables = ::format("{}", descriptor.sstables);
|
||||
}
|
||||
auto old_sstables = ::format("{}", descriptor.sstables);
|
||||
|
||||
if (descriptor.sstables.empty() || !can_proceed() || t.is_auto_compaction_disabled_by_user()) {
|
||||
cmlog.debug("{}: sstables={} can_proceed={} auto_compaction={}", *this, descriptor.sstables.size(), can_proceed(), t.is_auto_compaction_disabled_by_user());
|
||||
@@ -1466,10 +1452,8 @@ protected:
|
||||
try {
|
||||
bool should_update_history = this->should_update_history(descriptor.options.type());
|
||||
compaction_result res = co_await compact_sstables(std::move(descriptor), _compaction_data, on_replace);
|
||||
if (debug_enabled) {
|
||||
cmlog.debug("Finished minor compaction old_sstables={} new_sstables={} sstables_reapired_at={} range={} uuid={} compaction_uuid={}",
|
||||
old_sstables, res.new_sstables, compacting_table()->get_sstables_repaired_at(), compacting_table()->token_range(), uuid, _compaction_data.compaction_uuid);
|
||||
}
|
||||
cmlog.debug("Finished minor compaction old_sstables={} new_sstables={} sstables_reapired_at={} range={} uuid={} compaction_uuid={}",
|
||||
old_sstables, res.new_sstables, compacting_table()->get_sstables_repaired_at(), compacting_table()->token_range(), uuid, _compaction_data.compaction_uuid);
|
||||
finish_compaction();
|
||||
if (should_update_history) {
|
||||
// update_history can take a long time compared to
|
||||
@@ -1535,9 +1519,7 @@ future<> compaction_manager::maybe_wait_for_sstable_count_reduction(compaction_g
|
||||
| std::views::transform(std::mem_fn(&sstables::sstable::run_identifier))
|
||||
| std::ranges::to<std::unordered_set>());
|
||||
};
|
||||
const auto injected_threshold = utils::get_local_injector().inject_parameter<size_t>("set_sstable_count_reduction_threshold");
|
||||
const auto threshold = injected_threshold.value_or(size_t(std::max(schema->max_compaction_threshold(), 32)));
|
||||
|
||||
const auto threshold = size_t(std::max(schema->max_compaction_threshold(), 32));
|
||||
auto count = co_await num_runs_for_compaction();
|
||||
if (count <= threshold) {
|
||||
cmlog.trace("No need to wait for sstable count reduction in {}: {} <= {}",
|
||||
@@ -1552,7 +1534,9 @@ future<> compaction_manager::maybe_wait_for_sstable_count_reduction(compaction_g
|
||||
auto& cstate = get_compaction_state(&t);
|
||||
try {
|
||||
while (can_perform_regular_compaction(t) && co_await num_runs_for_compaction() > threshold) {
|
||||
co_await cstate.compaction_done.when();
|
||||
co_await cstate.compaction_done.wait([this, &t] {
|
||||
return !can_perform_regular_compaction(t);
|
||||
});
|
||||
}
|
||||
} catch (const broken_condition_variable&) {
|
||||
co_return;
|
||||
@@ -1802,41 +1786,6 @@ protected:
|
||||
}
|
||||
};
|
||||
|
||||
class rewrite_sstables_component_compaction_task_executor final : public rewrite_sstables_compaction_task_executor {
|
||||
std::unordered_map<sstables::shared_sstable, sstables::shared_sstable>& _rewritten_sstables;
|
||||
public:
|
||||
rewrite_sstables_component_compaction_task_executor(compaction_manager& mgr,
|
||||
throw_if_stopping do_throw_if_stopping,
|
||||
compaction_group_view* t,
|
||||
tasks::task_id parent_id,
|
||||
compaction_type_options options,
|
||||
std::vector<sstables::shared_sstable> sstables,
|
||||
compacting_sstable_registration compacting,
|
||||
std::unordered_map<sstables::shared_sstable, sstables::shared_sstable>& rewritten_sstables)
|
||||
: rewrite_sstables_compaction_task_executor(mgr, do_throw_if_stopping, t, parent_id, options, {},
|
||||
std::move(sstables), std::move(compacting), compaction_manager::can_purge_tombstones::no, "component_rewrite"),
|
||||
_rewritten_sstables(rewritten_sstables)
|
||||
{}
|
||||
protected:
|
||||
virtual future<compaction_manager::compaction_stats_opt> do_run() override {
|
||||
compaction_stats stats{};
|
||||
|
||||
switch_state(state::pending);
|
||||
auto maintenance_permit = co_await acquire_semaphore(_cm._maintenance_ops_sem);
|
||||
|
||||
while (!_sstables.empty()) {
|
||||
auto sst = consume_sstable();
|
||||
auto it = _rewritten_sstables.emplace(sst, sstables::shared_sstable{}).first;
|
||||
auto res = co_await rewrite_sstable(std::move(sst));
|
||||
_cm._validation_errors += res.stats.validation_errors;
|
||||
stats += res.stats;
|
||||
it->second = std::move(res.new_sstables.front());
|
||||
}
|
||||
|
||||
co_return stats;
|
||||
}
|
||||
};
|
||||
|
||||
class split_compaction_task_executor final : public rewrite_sstables_compaction_task_executor {
|
||||
compaction_type_options::split _opt;
|
||||
public:
|
||||
@@ -1950,28 +1899,6 @@ compaction_manager::rewrite_sstables(compaction_group_view& t, compaction_type_o
|
||||
return perform_task_on_all_files<rewrite_sstables_compaction_task_executor>("rewrite", info, t, std::move(options), std::move(owned_ranges_ptr), std::move(get_func), throw_if_stopping::no, can_purge, std::move(options_desc));
|
||||
}
|
||||
|
||||
future<compaction_manager::compaction_stats_opt>
|
||||
compaction_manager::rewrite_sstables_component(compaction_group_view& t,
|
||||
std::vector<sstables::shared_sstable>& sstables,
|
||||
compaction_type_options options,
|
||||
std::unordered_map<sstables::shared_sstable, sstables::shared_sstable>& rewritten_sstables,
|
||||
tasks::task_info info) {
|
||||
auto gh = start_compaction(t);
|
||||
if (!gh) {
|
||||
co_return std::nullopt;
|
||||
}
|
||||
|
||||
if (sstables.empty()) {
|
||||
co_return std::nullopt;
|
||||
}
|
||||
|
||||
compacting_sstable_registration compacting(*this, get_compaction_state(&t));
|
||||
compacting.register_compacting(sstables);
|
||||
|
||||
co_return co_await perform_compaction<rewrite_sstables_component_compaction_task_executor>(throw_if_stopping::no, info, &t, info.id,
|
||||
std::move(options), std::move(sstables), std::move(compacting), rewritten_sstables);
|
||||
}
|
||||
|
||||
class validate_sstables_compaction_task_executor : public sstables_task_executor {
|
||||
compaction_manager::quarantine_invalid_sstables _quarantine_sstables;
|
||||
public:
|
||||
@@ -2362,16 +2289,6 @@ future<compaction_manager::compaction_stats_opt> compaction_manager::perform_spl
|
||||
return perform_task_on_all_files<split_compaction_task_executor>("split", info, t, std::move(options), std::move(owned_ranges_ptr), std::move(get_sstables), throw_if_stopping::no);
|
||||
}
|
||||
|
||||
std::exception_ptr compaction_manager::make_disabled_exception(compaction::compaction_group_view& cg) {
|
||||
std::exception_ptr ex;
|
||||
if (_in_critical_disk_utilization_mode) {
|
||||
ex = std::make_exception_ptr(std::runtime_error("critical disk utilization"));
|
||||
} else {
|
||||
ex = std::make_exception_ptr(compaction_stopped_exception(cg.schema()->ks_name(), cg.schema()->cf_name(), "compaction disabled"));
|
||||
}
|
||||
return ex;
|
||||
}
|
||||
|
||||
future<std::vector<sstables::shared_sstable>>
|
||||
compaction_manager::maybe_split_new_sstable(sstables::shared_sstable sst, compaction_group_view& t, compaction_type_options::split opt) {
|
||||
if (!split_compaction_task_executor::sstable_needs_split(sst, opt)) {
|
||||
@@ -2381,7 +2298,8 @@ compaction_manager::maybe_split_new_sstable(sstables::shared_sstable sst, compac
|
||||
// We don't want to prevent split because compaction is temporarily disabled on a view only for synchronization,
|
||||
// which is unneeded against new sstables that aren't part of any set yet, so never use can_proceed(&t) here.
|
||||
if (is_disabled()) {
|
||||
co_return coroutine::exception(make_disabled_exception(t));
|
||||
co_return coroutine::exception(std::make_exception_ptr(std::runtime_error(format("Cannot split {} because manager has compaction disabled, " \
|
||||
"reason might be out of space prevention", sst->get_filename()))));
|
||||
}
|
||||
std::vector<sstables::shared_sstable> ret;
|
||||
|
||||
@@ -2405,18 +2323,6 @@ compaction_manager::maybe_split_new_sstable(sstables::shared_sstable sst, compac
|
||||
co_return ret;
|
||||
}
|
||||
|
||||
future<std::unordered_map<sstables::shared_sstable, sstables::shared_sstable>> compaction_manager::perform_component_rewrite(compaction::compaction_group_view& t,
|
||||
tasks::task_info info,
|
||||
std::vector<sstables::shared_sstable> sstables,
|
||||
sstables::component_type component,
|
||||
std::function<void(sstables::sstable&)> modifier,
|
||||
compaction_type_options::component_rewrite::update_sstable_id update_id) {
|
||||
std::unordered_map<sstables::shared_sstable, sstables::shared_sstable> rewritten_sstables;
|
||||
rewritten_sstables.reserve(sstables.size());
|
||||
co_await rewrite_sstables_component(t, sstables, compaction_type_options::make_component_rewrite(component, std::move(modifier), update_id), rewritten_sstables, info);
|
||||
co_return rewritten_sstables;
|
||||
}
|
||||
|
||||
// Submit a table to be scrubbed and wait for its termination.
|
||||
future<compaction_manager::compaction_stats_opt> compaction_manager::perform_sstable_scrub(compaction_group_view& t, compaction_type_options::scrub opts, tasks::task_info info) {
|
||||
auto scrub_mode = opts.operation_mode;
|
||||
@@ -2481,8 +2387,6 @@ future<> compaction_manager::remove(compaction_group_view& t, sstring reason) no
|
||||
if (!c_state.gate.is_closed()) {
|
||||
auto close_gate = c_state.gate.close();
|
||||
co_await stop_ongoing_compactions(reason, &t);
|
||||
// Wait for users of incremental repair lock (can be either repair itself or maintenance compactions).
|
||||
co_await c_state.incremental_repair_lock.write_lock();
|
||||
co_await std::move(close_gate);
|
||||
}
|
||||
|
||||
|
||||
@@ -55,7 +55,6 @@ class custom_compaction_task_executor;
|
||||
class regular_compaction_task_executor;
|
||||
class offstrategy_compaction_task_executor;
|
||||
class rewrite_sstables_compaction_task_executor;
|
||||
class rewrite_sstables_component_compaction_task_executor;
|
||||
class split_compaction_task_executor;
|
||||
class cleanup_sstables_compaction_task_executor;
|
||||
class validate_sstables_compaction_task_executor;
|
||||
@@ -115,8 +114,6 @@ private:
|
||||
uint32_t _disabled_state_count = 0;
|
||||
|
||||
bool is_disabled() const { return _state != state::running || _disabled_state_count > 0; }
|
||||
// precondition: is_disabled() is true.
|
||||
std::exception_ptr make_disabled_exception(compaction::compaction_group_view& cg);
|
||||
|
||||
std::optional<future<>> _stop_future;
|
||||
|
||||
@@ -170,9 +167,12 @@ private:
|
||||
std::unique_ptr<strategy_control> _strategy_control;
|
||||
|
||||
shared_tombstone_gc_state _shared_tombstone_gc_state;
|
||||
// TODO: tombstone_gc_state should now have value semantics, but the code
|
||||
// still uses it with reference semantics (inconsistently though).
|
||||
// Drop this member, once the code is converted into using value semantics.
|
||||
tombstone_gc_state _tombstone_gc_state;
|
||||
|
||||
utils::disk_space_monitor::subscription _out_of_space_subscription;
|
||||
bool _in_critical_disk_utilization_mode = false;
|
||||
private:
|
||||
// Requires task->_compaction_state.gate to be held and task to be registered in _tasks.
|
||||
future<compaction_stats_opt> perform_task(shared_ptr<compaction::compaction_task_executor> task, throw_if_stopping do_throw_if_stopping);
|
||||
@@ -256,12 +256,6 @@ private:
|
||||
future<compaction_stats_opt> rewrite_sstables(compaction::compaction_group_view& t, compaction_type_options options, owned_ranges_ptr, get_candidates_func, tasks::task_info info,
|
||||
can_purge_tombstones can_purge = can_purge_tombstones::yes, sstring options_desc = "");
|
||||
|
||||
future<compaction_stats_opt> rewrite_sstables_component(compaction_group_view& t,
|
||||
std::vector<sstables::shared_sstable>& sstables,
|
||||
compaction_type_options options,
|
||||
std::unordered_map<sstables::shared_sstable, sstables::shared_sstable>& rewritten_sstables,
|
||||
tasks::task_info info);
|
||||
|
||||
// Stop all fibers, without waiting. Safe to be called multiple times.
|
||||
void do_stop() noexcept;
|
||||
future<> really_do_stop() noexcept;
|
||||
@@ -370,13 +364,6 @@ public:
|
||||
// Submit a table to be scrubbed and wait for its termination.
|
||||
future<compaction_stats_opt> perform_sstable_scrub(compaction::compaction_group_view& t, compaction_type_options::scrub opts, tasks::task_info info);
|
||||
|
||||
future<std::unordered_map<sstables::shared_sstable, sstables::shared_sstable>> perform_component_rewrite(compaction::compaction_group_view& t,
|
||||
tasks::task_info info,
|
||||
std::vector<sstables::shared_sstable> sstables,
|
||||
sstables::component_type component,
|
||||
std::function<void(sstables::sstable&)> modifier,
|
||||
compaction_type_options::component_rewrite::update_sstable_id update_id = compaction_type_options::component_rewrite::update_sstable_id::yes);
|
||||
|
||||
// Submit a table for major compaction.
|
||||
future<> perform_major_compaction(compaction::compaction_group_view& t, tasks::task_info info, bool consider_only_existing_data = false);
|
||||
|
||||
@@ -469,6 +456,10 @@ public:
|
||||
|
||||
compaction::strategy_control& get_strategy_control() const noexcept;
|
||||
|
||||
const tombstone_gc_state& get_tombstone_gc_state() const noexcept {
|
||||
return _tombstone_gc_state;
|
||||
};
|
||||
|
||||
shared_tombstone_gc_state& get_shared_tombstone_gc_state() noexcept {
|
||||
return _shared_tombstone_gc_state;
|
||||
};
|
||||
@@ -498,7 +489,6 @@ public:
|
||||
friend class compaction::regular_compaction_task_executor;
|
||||
friend class compaction::offstrategy_compaction_task_executor;
|
||||
friend class compaction::rewrite_sstables_compaction_task_executor;
|
||||
friend class compaction::rewrite_sstables_component_compaction_task_executor;
|
||||
friend class compaction::cleanup_sstables_compaction_task_executor;
|
||||
friend class compaction::validate_sstables_compaction_task_executor;
|
||||
friend compaction_reenabler;
|
||||
|
||||
@@ -33,10 +33,8 @@ future<compaction_descriptor> leveled_compaction_strategy::get_sstables_for_comp
|
||||
auto candidate = manifest.get_compaction_candidates(*state->last_compacted_keys, state->compaction_counter);
|
||||
|
||||
if (!candidate.sstables.empty()) {
|
||||
if (leveled_manifest::logger.is_enabled(logging::log_level::debug)) {
|
||||
auto main_set = co_await table_s.main_sstable_set();
|
||||
leveled_manifest::logger.debug("leveled: Compacting {} out of {} sstables", candidate.sstables.size(), main_set->size());
|
||||
}
|
||||
auto main_set = co_await table_s.main_sstable_set();
|
||||
leveled_manifest::logger.debug("leveled: Compacting {} out of {} sstables", candidate.sstables.size(), main_set->size());
|
||||
co_return candidate;
|
||||
}
|
||||
|
||||
|
||||
@@ -15,7 +15,6 @@
|
||||
#include "compaction_strategy_state.hh"
|
||||
#include "utils/error_injection.hh"
|
||||
|
||||
#include <seastar/util/lazy.hh>
|
||||
#include <ranges>
|
||||
|
||||
namespace compaction {
|
||||
@@ -29,12 +28,12 @@ time_window_compaction_strategy_state_ptr time_window_compaction_strategy::get_s
|
||||
}
|
||||
|
||||
const std::unordered_map<sstring, std::chrono::seconds> time_window_compaction_strategy_options::valid_window_units = {
|
||||
{"MINUTES", 60s}, {"HOURS", 3600s}, {"DAYS", 86400s}};
|
||||
{ "MINUTES", 60s }, { "HOURS", 3600s }, { "DAYS", 86400s }
|
||||
};
|
||||
|
||||
const std::unordered_map<sstring, time_window_compaction_strategy_options::timestamp_resolutions>
|
||||
time_window_compaction_strategy_options::valid_timestamp_resolutions = {
|
||||
{"MICROSECONDS", timestamp_resolutions::microsecond},
|
||||
{"MILLISECONDS", timestamp_resolutions::millisecond},
|
||||
const std::unordered_map<sstring, time_window_compaction_strategy_options::timestamp_resolutions> time_window_compaction_strategy_options::valid_timestamp_resolutions = {
|
||||
{ "MICROSECONDS", timestamp_resolutions::microsecond },
|
||||
{ "MILLISECONDS", timestamp_resolutions::millisecond },
|
||||
};
|
||||
|
||||
static std::chrono::seconds validate_compaction_window_unit(const std::map<sstring, sstring>& options) {
|
||||
@@ -44,8 +43,7 @@ static std::chrono::seconds validate_compaction_window_unit(const std::map<sstri
|
||||
if (tmp_value) {
|
||||
auto valid_window_units_it = time_window_compaction_strategy_options::valid_window_units.find(tmp_value.value());
|
||||
if (valid_window_units_it == time_window_compaction_strategy_options::valid_window_units.end()) {
|
||||
throw exceptions::configuration_exception(
|
||||
fmt::format("Invalid window unit {} for {}", tmp_value.value(), time_window_compaction_strategy_options::COMPACTION_WINDOW_UNIT_KEY));
|
||||
throw exceptions::configuration_exception(fmt::format("Invalid window unit {} for {}", tmp_value.value(), time_window_compaction_strategy_options::COMPACTION_WINDOW_UNIT_KEY));
|
||||
}
|
||||
window_unit = valid_window_units_it->second;
|
||||
}
|
||||
@@ -61,12 +59,10 @@ static std::chrono::seconds validate_compaction_window_unit(const std::map<sstri
|
||||
|
||||
static int validate_compaction_window_size(const std::map<sstring, sstring>& options) {
|
||||
auto tmp_value = compaction_strategy_impl::get_value(options, time_window_compaction_strategy_options::COMPACTION_WINDOW_SIZE_KEY);
|
||||
int window_size = cql3::statements::property_definitions::to_long(time_window_compaction_strategy_options::COMPACTION_WINDOW_SIZE_KEY, tmp_value,
|
||||
time_window_compaction_strategy_options::DEFAULT_COMPACTION_WINDOW_SIZE);
|
||||
int window_size = cql3::statements::property_definitions::to_long(time_window_compaction_strategy_options::COMPACTION_WINDOW_SIZE_KEY, tmp_value, time_window_compaction_strategy_options::DEFAULT_COMPACTION_WINDOW_SIZE);
|
||||
|
||||
if (window_size <= 0) {
|
||||
throw exceptions::configuration_exception(
|
||||
fmt::format("{} value ({}) must be greater than 1", time_window_compaction_strategy_options::COMPACTION_WINDOW_SIZE_KEY, window_size));
|
||||
throw exceptions::configuration_exception(fmt::format("{} value ({}) must be greater than 1", time_window_compaction_strategy_options::COMPACTION_WINDOW_SIZE_KEY, window_size));
|
||||
}
|
||||
|
||||
return window_size;
|
||||
@@ -86,30 +82,26 @@ static db_clock::duration validate_expired_sstable_check_frequency_seconds(const
|
||||
try {
|
||||
expired_sstable_check_frequency = std::chrono::seconds(std::stol(tmp_value.value()));
|
||||
} catch (const std::exception& e) {
|
||||
throw exceptions::syntax_exception(fmt::format(
|
||||
"Invalid long value {} for {}", tmp_value.value(), time_window_compaction_strategy_options::EXPIRED_SSTABLE_CHECK_FREQUENCY_SECONDS_KEY));
|
||||
throw exceptions::syntax_exception(fmt::format("Invalid long value {} for {}", tmp_value.value(), time_window_compaction_strategy_options::EXPIRED_SSTABLE_CHECK_FREQUENCY_SECONDS_KEY));
|
||||
}
|
||||
}
|
||||
|
||||
return expired_sstable_check_frequency;
|
||||
}
|
||||
|
||||
static db_clock::duration validate_expired_sstable_check_frequency_seconds(
|
||||
const std::map<sstring, sstring>& options, std::map<sstring, sstring>& unchecked_options) {
|
||||
static db_clock::duration validate_expired_sstable_check_frequency_seconds(const std::map<sstring, sstring>& options, std::map<sstring, sstring>& unchecked_options) {
|
||||
db_clock::duration expired_sstable_check_frequency = validate_expired_sstable_check_frequency_seconds(options);
|
||||
unchecked_options.erase(time_window_compaction_strategy_options::EXPIRED_SSTABLE_CHECK_FREQUENCY_SECONDS_KEY);
|
||||
return expired_sstable_check_frequency;
|
||||
}
|
||||
|
||||
static time_window_compaction_strategy_options::timestamp_resolutions validate_timestamp_resolution(const std::map<sstring, sstring>& options) {
|
||||
time_window_compaction_strategy_options::timestamp_resolutions timestamp_resolution =
|
||||
time_window_compaction_strategy_options::timestamp_resolutions::microsecond;
|
||||
time_window_compaction_strategy_options::timestamp_resolutions timestamp_resolution = time_window_compaction_strategy_options::timestamp_resolutions::microsecond;
|
||||
|
||||
auto tmp_value = compaction_strategy_impl::get_value(options, time_window_compaction_strategy_options::TIMESTAMP_RESOLUTION_KEY);
|
||||
if (tmp_value) {
|
||||
if (!time_window_compaction_strategy_options::valid_timestamp_resolutions.contains(tmp_value.value())) {
|
||||
throw exceptions::configuration_exception(fmt::format(
|
||||
"Invalid timestamp resolution {} for {}", tmp_value.value(), time_window_compaction_strategy_options::TIMESTAMP_RESOLUTION_KEY));
|
||||
throw exceptions::configuration_exception(fmt::format("Invalid timestamp resolution {} for {}", tmp_value.value(), time_window_compaction_strategy_options::TIMESTAMP_RESOLUTION_KEY));
|
||||
} else {
|
||||
timestamp_resolution = time_window_compaction_strategy_options::valid_timestamp_resolutions.at(tmp_value.value());
|
||||
}
|
||||
@@ -118,8 +110,7 @@ static time_window_compaction_strategy_options::timestamp_resolutions validate_t
|
||||
return timestamp_resolution;
|
||||
}
|
||||
|
||||
static time_window_compaction_strategy_options::timestamp_resolutions validate_timestamp_resolution(
|
||||
const std::map<sstring, sstring>& options, std::map<sstring, sstring>& unchecked_options) {
|
||||
static time_window_compaction_strategy_options::timestamp_resolutions validate_timestamp_resolution(const std::map<sstring, sstring>& options, std::map<sstring, sstring>& unchecked_options) {
|
||||
time_window_compaction_strategy_options::timestamp_resolutions timestamp_resolution = validate_timestamp_resolution(options);
|
||||
unchecked_options.erase(time_window_compaction_strategy_options::TIMESTAMP_RESOLUTION_KEY);
|
||||
return timestamp_resolution;
|
||||
@@ -154,7 +145,7 @@ void time_window_compaction_strategy_options::validate(const std::map<sstring, s
|
||||
compaction_strategy_impl::validate_min_max_threshold(options, unchecked_options);
|
||||
|
||||
auto it = options.find("enable_optimized_twcs_queries");
|
||||
if (it != options.end() && it->second != "true" && it->second != "false") {
|
||||
if (it != options.end() && it->second != "true" && it->second != "false") {
|
||||
throw exceptions::configuration_exception(fmt::format("enable_optimized_twcs_queries value ({}) must be \"true\" or \"false\"", it->second));
|
||||
}
|
||||
unchecked_options.erase("enable_optimized_twcs_queries");
|
||||
@@ -171,9 +162,7 @@ class classify_by_timestamp {
|
||||
std::vector<int64_t> _known_windows;
|
||||
|
||||
public:
|
||||
explicit classify_by_timestamp(time_window_compaction_strategy_options options)
|
||||
: _options(std::move(options)) {
|
||||
}
|
||||
explicit classify_by_timestamp(time_window_compaction_strategy_options options) : _options(std::move(options)) { }
|
||||
int64_t operator()(api::timestamp_type ts) {
|
||||
const auto window = time_window_compaction_strategy::get_window_for(_options, ts);
|
||||
if (const auto it = std::ranges::find(_known_windows, window); it != _known_windows.end()) {
|
||||
@@ -201,7 +190,7 @@ uint64_t time_window_compaction_strategy::adjust_partition_estimate(const mutati
|
||||
auto estimated_window_count = max_data_segregation_window_count;
|
||||
auto default_ttl = std::chrono::duration_cast<std::chrono::microseconds>(s->default_time_to_live());
|
||||
bool min_and_max_ts_available = ms_meta.min_timestamp && ms_meta.max_timestamp;
|
||||
auto estimate_window_count = [this](timestamp_type min_window, timestamp_type max_window) {
|
||||
auto estimate_window_count = [this] (timestamp_type min_window, timestamp_type max_window) {
|
||||
const auto window_size = get_window_size(_options);
|
||||
return (max_window + (window_size - 1) - min_window) / window_size;
|
||||
};
|
||||
@@ -221,19 +210,21 @@ uint64_t time_window_compaction_strategy::adjust_partition_estimate(const mutati
|
||||
return partition_estimate / std::max(1UL, uint64_t(estimated_window_count));
|
||||
}
|
||||
|
||||
mutation_reader_consumer time_window_compaction_strategy::make_interposer_consumer(
|
||||
const mutation_source_metadata& ms_meta, mutation_reader_consumer end_consumer) const {
|
||||
if (ms_meta.min_timestamp && ms_meta.max_timestamp &&
|
||||
get_window_for(_options, *ms_meta.min_timestamp) == get_window_for(_options, *ms_meta.max_timestamp)) {
|
||||
mutation_reader_consumer time_window_compaction_strategy::make_interposer_consumer(const mutation_source_metadata& ms_meta, mutation_reader_consumer end_consumer) const {
|
||||
if (ms_meta.min_timestamp && ms_meta.max_timestamp
|
||||
&& get_window_for(_options, *ms_meta.min_timestamp) == get_window_for(_options, *ms_meta.max_timestamp)) {
|
||||
return end_consumer;
|
||||
}
|
||||
return [options = _options, end_consumer = std::move(end_consumer)](mutation_reader rd) mutable -> future<> {
|
||||
return mutation_writer::segregate_by_timestamp(std::move(rd), classify_by_timestamp(std::move(options)), end_consumer);
|
||||
return [options = _options, end_consumer = std::move(end_consumer)] (mutation_reader rd) mutable -> future<> {
|
||||
return mutation_writer::segregate_by_timestamp(
|
||||
std::move(rd),
|
||||
classify_by_timestamp(std::move(options)),
|
||||
end_consumer);
|
||||
};
|
||||
}
|
||||
|
||||
compaction_descriptor time_window_compaction_strategy::get_reshaping_job(
|
||||
std::vector<sstables::shared_sstable> input, schema_ptr schema, reshape_config cfg) const {
|
||||
compaction_descriptor
|
||||
time_window_compaction_strategy::get_reshaping_job(std::vector<sstables::shared_sstable> input, schema_ptr schema, reshape_config cfg) const {
|
||||
auto mode = cfg.mode;
|
||||
std::vector<sstables::shared_sstable> single_window;
|
||||
std::vector<sstables::shared_sstable> multi_window;
|
||||
@@ -248,7 +239,7 @@ compaction_descriptor time_window_compaction_strategy::get_reshaping_job(
|
||||
|
||||
// Sort input sstables by first_key order
|
||||
// to allow efficient reshaping of disjoint sstables.
|
||||
std::sort(input.begin(), input.end(), [&schema](const sstables::shared_sstable& a, const sstables::shared_sstable& b) {
|
||||
std::sort(input.begin(), input.end(), [&schema] (const sstables::shared_sstable& a, const sstables::shared_sstable& b) {
|
||||
return dht::ring_position(a->get_first_decorated_key()).less_compare(*schema, dht::ring_position(b->get_first_decorated_key()));
|
||||
});
|
||||
|
||||
@@ -262,34 +253,31 @@ compaction_descriptor time_window_compaction_strategy::get_reshaping_job(
|
||||
}
|
||||
}
|
||||
|
||||
auto is_disjoint = [&schema, mode, max_sstables](const std::vector<sstables::shared_sstable>& ssts) {
|
||||
auto is_disjoint = [&schema, mode, max_sstables] (const std::vector<sstables::shared_sstable>& ssts) {
|
||||
size_t tolerance = (mode == reshape_mode::relaxed) ? max_sstables : 0;
|
||||
return sstable_set_overlapping_count(schema, ssts) <= tolerance;
|
||||
};
|
||||
|
||||
clogger.debug("time_window_compaction_strategy::get_reshaping_job: offstrategy_threshold={} max_sstables={} multi_window={} disjoint={} "
|
||||
"single_window={} disjoint={}",
|
||||
offstrategy_threshold, max_sstables, multi_window.size(), seastar::value_of([&] {
|
||||
return !multi_window.empty() && sstable_set_overlapping_count(schema, multi_window) == 0;
|
||||
}),
|
||||
single_window.size(), seastar::value_of([&] {
|
||||
return !single_window.empty() && sstable_set_overlapping_count(schema, single_window) == 0;
|
||||
}));
|
||||
clogger.debug("time_window_compaction_strategy::get_reshaping_job: offstrategy_threshold={} max_sstables={} multi_window={} disjoint={} single_window={} disjoint={}",
|
||||
offstrategy_threshold, max_sstables,
|
||||
multi_window.size(), !multi_window.empty() && sstable_set_overlapping_count(schema, multi_window) == 0,
|
||||
single_window.size(), !single_window.empty() && sstable_set_overlapping_count(schema, single_window) == 0);
|
||||
|
||||
auto get_job_size = [](const std::vector<sstables::shared_sstable>& ssts) {
|
||||
auto get_job_size = [] (const std::vector<sstables::shared_sstable>& ssts) {
|
||||
return std::ranges::fold_left(ssts | std::views::transform(std::mem_fn(&sstables::sstable::bytes_on_disk)), uint64_t(0), std::plus{});
|
||||
};
|
||||
|
||||
// Targets a space overhead of 10%. All disjoint sstables can be compacted together as long as they won't
|
||||
// cause an overhead above target. Otherwise, the job targets a maximum of #max_threshold sstables.
|
||||
auto need_trimming = [&](const std::vector<sstables::shared_sstable>& ssts, const uint64_t job_size, bool is_disjoint) {
|
||||
auto need_trimming = [&] (const std::vector<sstables::shared_sstable>& ssts, const uint64_t job_size, bool is_disjoint) {
|
||||
const size_t min_sstables = 2;
|
||||
auto is_above_target_size = job_size > target_job_size;
|
||||
|
||||
return (ssts.size() > max_sstables && !is_disjoint) || (ssts.size() > min_sstables && is_above_target_size);
|
||||
return (ssts.size() > max_sstables && !is_disjoint) ||
|
||||
(ssts.size() > min_sstables && is_above_target_size);
|
||||
};
|
||||
|
||||
auto maybe_trim_job = [&need_trimming](std::vector<sstables::shared_sstable>& ssts, uint64_t job_size, bool is_disjoint) {
|
||||
auto maybe_trim_job = [&need_trimming] (std::vector<sstables::shared_sstable>& ssts, uint64_t job_size, bool is_disjoint) {
|
||||
while (need_trimming(ssts, job_size, is_disjoint)) {
|
||||
auto sst = ssts.back();
|
||||
ssts.pop_back();
|
||||
@@ -306,7 +294,7 @@ compaction_descriptor time_window_compaction_strategy::get_reshaping_job(
|
||||
// For example, if there are N sstables spanning window W, where N <= 32, then we can produce all data for W
|
||||
// in a single compaction round, removing the need to later compact W to reduce its number of files.
|
||||
auto sort_size = std::min(max_sstables, multi_window.size());
|
||||
std::ranges::partial_sort(multi_window, multi_window.begin() + sort_size, std::ranges::less(), [](const sstables::shared_sstable& a) {
|
||||
std::ranges::partial_sort(multi_window, multi_window.begin() + sort_size, std::ranges::less(), [] (const sstables::shared_sstable &a) {
|
||||
return a->get_stats_metadata().max_timestamp;
|
||||
});
|
||||
maybe_trim_job(multi_window, job_size, disjoint);
|
||||
@@ -346,7 +334,8 @@ compaction_descriptor time_window_compaction_strategy::get_reshaping_job(
|
||||
return compaction_descriptor();
|
||||
}
|
||||
|
||||
future<compaction_descriptor> time_window_compaction_strategy::get_sstables_for_compaction(compaction_group_view& table_s, strategy_control& control) {
|
||||
future<compaction_descriptor>
|
||||
time_window_compaction_strategy::get_sstables_for_compaction(compaction_group_view& table_s, strategy_control& control) {
|
||||
auto state = get_state(table_s);
|
||||
auto compaction_time = gc_clock::now();
|
||||
auto candidates = co_await control.candidates(table_s);
|
||||
@@ -380,8 +369,10 @@ future<compaction_descriptor> time_window_compaction_strategy::get_sstables_for_
|
||||
co_return compaction_descriptor(std::move(compaction_candidates));
|
||||
}
|
||||
|
||||
time_window_compaction_strategy::bucket_compaction_mode time_window_compaction_strategy::compaction_mode(
|
||||
const time_window_compaction_strategy_state& state, const bucket_t& bucket, timestamp_type bucket_key, timestamp_type now, size_t min_threshold) const {
|
||||
time_window_compaction_strategy::bucket_compaction_mode
|
||||
time_window_compaction_strategy::compaction_mode(const time_window_compaction_strategy_state& state,
|
||||
const bucket_t& bucket, timestamp_type bucket_key,
|
||||
timestamp_type now, size_t min_threshold) const {
|
||||
// STCS will also be performed on older window buckets, to avoid a bad write and
|
||||
// space amplification when something like read repair cause small updates to
|
||||
// those past windows.
|
||||
@@ -394,7 +385,8 @@ time_window_compaction_strategy::bucket_compaction_mode time_window_compaction_s
|
||||
return bucket_compaction_mode::none;
|
||||
}
|
||||
|
||||
std::vector<sstables::shared_sstable> time_window_compaction_strategy::get_next_non_expired_sstables(compaction_group_view& table_s, strategy_control& control,
|
||||
std::vector<sstables::shared_sstable>
|
||||
time_window_compaction_strategy::get_next_non_expired_sstables(compaction_group_view& table_s, strategy_control& control,
|
||||
std::vector<sstables::shared_sstable> non_expiring_sstables, gc_clock::time_point compaction_time, time_window_compaction_strategy_state& state) {
|
||||
auto most_interesting = get_compaction_candidates(table_s, control, non_expiring_sstables, state);
|
||||
|
||||
@@ -408,29 +400,31 @@ std::vector<sstables::shared_sstable> time_window_compaction_strategy::get_next_
|
||||
|
||||
// if there is no sstable to compact in standard way, try compacting single sstable whose droppable tombstone
|
||||
// ratio is greater than threshold.
|
||||
std::erase_if(non_expiring_sstables, [this, compaction_time, &table_s](const sstables::shared_sstable& sst) -> bool {
|
||||
std::erase_if(non_expiring_sstables, [this, compaction_time, &table_s] (const sstables::shared_sstable& sst) -> bool {
|
||||
return !worth_dropping_tombstones(sst, compaction_time, table_s);
|
||||
});
|
||||
if (non_expiring_sstables.empty()) {
|
||||
return {};
|
||||
}
|
||||
auto it = std::ranges::min_element(non_expiring_sstables, [](auto& i, auto& j) {
|
||||
auto it = std::ranges::min_element(non_expiring_sstables, [] (auto& i, auto& j) {
|
||||
return i->get_stats_metadata().min_timestamp < j->get_stats_metadata().min_timestamp;
|
||||
});
|
||||
return {*it};
|
||||
return { *it };
|
||||
}
|
||||
|
||||
std::vector<sstables::shared_sstable> time_window_compaction_strategy::get_compaction_candidates(compaction_group_view& table_s, strategy_control& control,
|
||||
std::vector<sstables::shared_sstable> candidate_sstables, time_window_compaction_strategy_state& state) {
|
||||
std::vector<sstables::shared_sstable>
|
||||
time_window_compaction_strategy::get_compaction_candidates(compaction_group_view& table_s, strategy_control& control,
|
||||
std::vector<sstables::shared_sstable> candidate_sstables, time_window_compaction_strategy_state& state) {
|
||||
auto [buckets, max_timestamp] = get_buckets(std::move(candidate_sstables), _options);
|
||||
// Update the highest window seen, if necessary
|
||||
state.highest_window_seen = std::max(state.highest_window_seen, max_timestamp);
|
||||
|
||||
return newest_bucket(table_s, control, std::move(buckets), table_s.min_compaction_threshold(), table_s.schema()->max_compaction_threshold(),
|
||||
state.highest_window_seen, state);
|
||||
state.highest_window_seen, state);
|
||||
}
|
||||
|
||||
timestamp_type time_window_compaction_strategy::get_window_lower_bound(std::chrono::seconds sstable_window_size, timestamp_type timestamp) {
|
||||
timestamp_type
|
||||
time_window_compaction_strategy::get_window_lower_bound(std::chrono::seconds sstable_window_size, timestamp_type timestamp) {
|
||||
using namespace std::chrono;
|
||||
// mask out window size from timestamp to get lower bound of its window
|
||||
auto num_windows = microseconds(timestamp) / sstable_window_size;
|
||||
@@ -438,8 +432,8 @@ timestamp_type time_window_compaction_strategy::get_window_lower_bound(std::chro
|
||||
return duration_cast<microseconds>(num_windows * sstable_window_size).count();
|
||||
}
|
||||
|
||||
std::pair<std::map<timestamp_type, std::vector<sstables::shared_sstable>>, timestamp_type> time_window_compaction_strategy::get_buckets(
|
||||
std::vector<sstables::shared_sstable> files, const time_window_compaction_strategy_options& options) {
|
||||
std::pair<std::map<timestamp_type, std::vector<sstables::shared_sstable>>, timestamp_type>
|
||||
time_window_compaction_strategy::get_buckets(std::vector<sstables::shared_sstable> files, const time_window_compaction_strategy_options& options) {
|
||||
std::map<timestamp_type, std::vector<sstables::shared_sstable>> buckets;
|
||||
|
||||
timestamp_type max_timestamp = 0;
|
||||
@@ -456,13 +450,11 @@ std::pair<std::map<timestamp_type, std::vector<sstables::shared_sstable>>, times
|
||||
return std::make_pair(std::move(buckets), max_timestamp);
|
||||
}
|
||||
|
||||
} // namespace compaction
|
||||
}
|
||||
|
||||
template <>
|
||||
struct fmt::formatter<std::map<compaction::timestamp_type, std::vector<sstables::shared_sstable>>> {
|
||||
constexpr auto parse(format_parse_context& ctx) {
|
||||
return ctx.begin();
|
||||
}
|
||||
constexpr auto parse(format_parse_context& ctx) { return ctx.begin(); }
|
||||
auto format(const std::map<compaction::timestamp_type, std::vector<sstables::shared_sstable>>& buckets, fmt::format_context& ctx) const {
|
||||
auto out = fmt::format_to(ctx.out(), " buckets = {{\n");
|
||||
for (auto& [timestamp, sstables] : buckets | std::views::reverse) {
|
||||
@@ -474,9 +466,9 @@ struct fmt::formatter<std::map<compaction::timestamp_type, std::vector<sstables:
|
||||
|
||||
namespace compaction {
|
||||
|
||||
std::vector<sstables::shared_sstable> time_window_compaction_strategy::newest_bucket(compaction_group_view& table_s, strategy_control& control,
|
||||
std::map<timestamp_type, std::vector<sstables::shared_sstable>> buckets, int min_threshold, int max_threshold, timestamp_type now,
|
||||
time_window_compaction_strategy_state& state) {
|
||||
std::vector<sstables::shared_sstable>
|
||||
time_window_compaction_strategy::newest_bucket(compaction_group_view& table_s, strategy_control& control, std::map<timestamp_type, std::vector<sstables::shared_sstable>> buckets,
|
||||
int min_threshold, int max_threshold, timestamp_type now, time_window_compaction_strategy_state& state) {
|
||||
clogger.debug("time_window_compaction_strategy::newest_bucket:\n now {}\n{}", now, buckets);
|
||||
|
||||
for (auto&& [key, bucket] : buckets | std::views::reverse) {
|
||||
@@ -517,7 +509,8 @@ std::vector<sstables::shared_sstable> time_window_compaction_strategy::newest_bu
|
||||
return {};
|
||||
}
|
||||
|
||||
std::vector<sstables::shared_sstable> time_window_compaction_strategy::trim_to_threshold(std::vector<sstables::shared_sstable> bucket, int max_threshold) {
|
||||
std::vector<sstables::shared_sstable>
|
||||
time_window_compaction_strategy::trim_to_threshold(std::vector<sstables::shared_sstable> bucket, int max_threshold) {
|
||||
auto n = std::min(bucket.size(), size_t(max_threshold));
|
||||
// Trim the largest sstables off the end to meet the maxThreshold
|
||||
std::ranges::partial_sort(bucket, bucket.begin() + n, std::ranges::less(), std::mem_fn(&sstables::sstable::ondisk_data_size));
|
||||
@@ -549,8 +542,8 @@ future<int64_t> time_window_compaction_strategy::estimated_pending_compactions(c
|
||||
co_return n;
|
||||
}
|
||||
|
||||
std::vector<compaction_descriptor> time_window_compaction_strategy::get_cleanup_compaction_jobs(
|
||||
compaction_group_view& table_s, std::vector<sstables::shared_sstable> candidates) const {
|
||||
std::vector<compaction_descriptor>
|
||||
time_window_compaction_strategy::get_cleanup_compaction_jobs(compaction_group_view& table_s, std::vector<sstables::shared_sstable> candidates) const {
|
||||
std::vector<compaction_descriptor> ret;
|
||||
for (auto&& [_, sstables] : get_buckets(std::move(candidates), _options).first) {
|
||||
auto per_window_jobs = size_tiered_compaction_strategy(_stcs_options).get_cleanup_compaction_jobs(table_s, std::move(sstables));
|
||||
@@ -563,4 +556,4 @@ std::unique_ptr<sstables::sstable_set_impl> time_window_compaction_strategy::mak
|
||||
return std::make_unique<sstables::time_series_sstable_set>(ts.schema(), _options.enable_optimized_twcs_queries);
|
||||
}
|
||||
|
||||
} // namespace compaction
|
||||
}
|
||||
|
||||
@@ -299,11 +299,13 @@ batch_size_fail_threshold_in_kb: 1024
|
||||
# max_hint_window_in_ms: 10800000 # 3 hours
|
||||
|
||||
|
||||
# Validity period for authorized statements cache. Defaults to 10000, set to 0 to disable.
|
||||
# Validity period for permissions cache (fetching permissions can be an
|
||||
# expensive operation depending on the authorizer, CassandraAuthorizer is
|
||||
# one example). Defaults to 10000, set to 0 to disable.
|
||||
# Will be disabled automatically for AllowAllAuthorizer.
|
||||
# permissions_validity_in_ms: 10000
|
||||
|
||||
# Refresh interval for authorized statements cache.
|
||||
# Refresh interval for permissions cache (if enabled).
|
||||
# After this interval, cache entries become eligible for refresh. Upon next
|
||||
# access, an async reload is scheduled and the old value returned until it
|
||||
# completes. If permissions_validity_in_ms is non-zero, then this also must have
|
||||
@@ -397,17 +399,6 @@ commitlog_total_space_in_mb: -1
|
||||
# you can cache more hot rows
|
||||
# column_index_size_in_kb: 64
|
||||
|
||||
# sstable format version for newly written sstables.
|
||||
# Currently allowed values are `me` and `ms`.
|
||||
# If not specified in the config, this defaults to `me`.
|
||||
#
|
||||
# The difference between `me` and `ms` are the data structures used
|
||||
# in the primary index.
|
||||
# In short, `ms` needs more CPU during sstable writes,
|
||||
# but should behave better during reads,
|
||||
# although it might behave worse for very long clustering keys.
|
||||
sstable_format: ms
|
||||
|
||||
# Auto-scaling of the promoted index prevents running out of memory
|
||||
# when the promoted index grows too large (due to partitions with many rows
|
||||
# vs. too small column_index_size_in_kb). When the serialized representation
|
||||
@@ -575,16 +566,15 @@ sstable_format: ms
|
||||
# prometheus_address: 1.2.3.4
|
||||
|
||||
# audit settings
|
||||
# Table audit is enabled by default.
|
||||
# By default, Scylla does not audit anything.
|
||||
# 'audit' config option controls if and where to output audited events:
|
||||
# - "none": auditing is disabled
|
||||
# - "table": save audited events in audit.audit_log column family (default)
|
||||
# - "none": auditing is disabled (default)
|
||||
# - "table": save audited events in audit.audit_log column family
|
||||
# - "syslog": send audited events via syslog (depends on OS, but usually to /dev/log)
|
||||
audit: "table"
|
||||
#
|
||||
# List of statement categories that should be audited.
|
||||
# Possible categories are: QUERY, DML, DCL, DDL, AUTH, ADMIN
|
||||
audit_categories: "DCL,AUTH,ADMIN"
|
||||
audit_categories: "DCL,DDL,AUTH,ADMIN"
|
||||
#
|
||||
# List of tables that should be audited.
|
||||
# audit_tables: "<keyspace_name>.<table_name>,<keyspace_name>.<table_name>"
|
||||
@@ -650,7 +640,7 @@ strict_is_not_null_in_views: true
|
||||
# * workdir: the node will open the maintenance socket on the path <scylla's workdir>/cql.m,
|
||||
# where <scylla's workdir> is a path defined by the workdir configuration option,
|
||||
# * <socket path>: the node will open the maintenance socket on the path <socket path>.
|
||||
maintenance_socket: workdir
|
||||
maintenance_socket: ignore
|
||||
|
||||
# If set to true, configuration parameters defined with LiveUpdate option can be updated in runtime with CQL
|
||||
# by updating system.config virtual table. If we don't want any configuration parameter to be changed in runtime
|
||||
@@ -659,9 +649,10 @@ maintenance_socket: workdir
|
||||
# e.g. for cloud users, for whom scylla's configuration should be changed only by support engineers.
|
||||
# live_updatable_config_params_changeable_via_cql: true
|
||||
|
||||
#
|
||||
# Guardrails options
|
||||
#
|
||||
# ****************
|
||||
# * GUARDRAILS *
|
||||
# ****************
|
||||
|
||||
# Guardrails to warn or fail when Replication Factor is smaller/greater than the threshold.
|
||||
# Please note that the value of 0 is always allowed,
|
||||
# which means that having no replication at all, i.e. RF = 0, is always valid.
|
||||
@@ -671,27 +662,6 @@ maintenance_socket: workdir
|
||||
# minimum_replication_factor_warn_threshold: 3
|
||||
# maximum_replication_factor_warn_threshold: -1
|
||||
# maximum_replication_factor_fail_threshold: -1
|
||||
#
|
||||
# Guardrails to warn about or disallow creating a keyspace with specific replication strategy.
|
||||
# Each of these 2 settings is a list storing replication strategies considered harmful.
|
||||
# The replication strategies to choose from are:
|
||||
# 1) SimpleStrategy,
|
||||
# 2) NetworkTopologyStrategy,
|
||||
# 3) LocalStrategy,
|
||||
# 4) EverywhereStrategy
|
||||
#
|
||||
# replication_strategy_warn_list:
|
||||
# - SimpleStrategy
|
||||
# replication_strategy_fail_list:
|
||||
#
|
||||
# Guardrail to enable the deprecated feature of CREATE TABLE WITH COMPACT STORAGE.
|
||||
# enable_create_table_with_compact_storage: false
|
||||
#
|
||||
# Guardrails to limit usage of selected consistency levels for writes.
|
||||
# Adding a warning to a CQL query response can significantly increase network
|
||||
# traffic and decrease overall throughput.
|
||||
# write_consistency_levels_warned: []
|
||||
# write_consistency_levels_disallowed: []
|
||||
|
||||
#
|
||||
# System information encryption settings
|
||||
@@ -869,6 +839,21 @@ maintenance_socket: workdir
|
||||
# key_namespace: <kmip key namespace> (optional)
|
||||
#
|
||||
|
||||
# Guardrails to warn about or disallow creating a keyspace with specific replication strategy.
|
||||
# Each of these 2 settings is a list storing replication strategies considered harmful.
|
||||
# The replication strategies to choose from are:
|
||||
# 1) SimpleStrategy,
|
||||
# 2) NetworkTopologyStrategy,
|
||||
# 3) LocalStrategy,
|
||||
# 4) EverywhereStrategy
|
||||
#
|
||||
# replication_strategy_warn_list:
|
||||
# - SimpleStrategy
|
||||
# replication_strategy_fail_list:
|
||||
|
||||
# Guardrail to enable the deprecated feature of CREATE TABLE WITH COMPACT STORAGE.
|
||||
# enable_create_table_with_compact_storage: false
|
||||
|
||||
# Control tablets for new keyspaces.
|
||||
# Can be set to: disabled|enabled|enforced
|
||||
#
|
||||
@@ -890,16 +875,7 @@ maintenance_socket: workdir
|
||||
# The `tablets` option cannot be changed using `ALTER KEYSPACE`.
|
||||
tablets_mode_for_new_keyspaces: enabled
|
||||
|
||||
# Require every tablet-enabled keyspace to be RF-rack-valid.
|
||||
#
|
||||
# A tablet-enabled keyspace is RF-rack-valid when, for each data center,
|
||||
# its replication factor (RF) is 0, 1, or exactly equal to the number of
|
||||
# racks in that data center. Setting the RF to the number of racks ensures
|
||||
# that a single rack failure never results in data unavailability.
|
||||
#
|
||||
# When set to true, CREATE KEYSPACE and ALTER KEYSPACE statements that
|
||||
# would produce an RF-rack-invalid keyspace are rejected.
|
||||
# When set to false, such statements are allowed but emit a warning.
|
||||
# Enforce RF-rack-valid keyspaces.
|
||||
rf_rack_valid_keyspaces: false
|
||||
|
||||
#
|
||||
|
||||
70
configure.py
70
configure.py
@@ -544,6 +544,7 @@ scylla_tests = set([
|
||||
'test/boost/caching_options_test',
|
||||
'test/boost/canonical_mutation_test',
|
||||
'test/boost/cartesian_product_test',
|
||||
'test/boost/cdc_generation_test',
|
||||
'test/boost/cell_locker_test',
|
||||
'test/boost/checksum_utils_test',
|
||||
'test/boost/chunked_managed_vector_test',
|
||||
@@ -618,7 +619,6 @@ scylla_tests = set([
|
||||
'test/boost/reservoir_sampling_test',
|
||||
'test/boost/result_utils_test',
|
||||
'test/boost/rest_client_test',
|
||||
'test/boost/rolling_max_tracker_test',
|
||||
'test/boost/reusable_buffer_test',
|
||||
'test/boost/rust_test',
|
||||
'test/boost/s3_test',
|
||||
@@ -730,6 +730,28 @@ vector_search_tests = set([
|
||||
'test/vector_search/rescoring_test'
|
||||
])
|
||||
|
||||
vector_search_validator_bin = 'vector-search-validator/bin/vector-search-validator'
|
||||
vector_search_validator_deps = set([
|
||||
'test/vector_search_validator/build-validator',
|
||||
'test/vector_search_validator/Cargo.toml',
|
||||
'test/vector_search_validator/crates/validator/Cargo.toml',
|
||||
'test/vector_search_validator/crates/validator/src/main.rs',
|
||||
'test/vector_search_validator/crates/validator-scylla/Cargo.toml',
|
||||
'test/vector_search_validator/crates/validator-scylla/src/lib.rs',
|
||||
'test/vector_search_validator/crates/validator-scylla/src/cql.rs',
|
||||
])
|
||||
|
||||
vector_store_bin = 'vector-search-validator/bin/vector-store'
|
||||
vector_store_deps = set([
|
||||
'test/vector_search_validator/build-env',
|
||||
'test/vector_search_validator/build-vector-store',
|
||||
])
|
||||
|
||||
vector_search_validator_bins = set([
|
||||
vector_search_validator_bin,
|
||||
vector_store_bin,
|
||||
])
|
||||
|
||||
wasms = set([
|
||||
'wasm/return_input.wat',
|
||||
'wasm/test_complex_null_values.wat',
|
||||
@@ -763,7 +785,7 @@ other = set([
|
||||
'iotune',
|
||||
])
|
||||
|
||||
all_artifacts = apps | cpp_apps | tests | other | wasms
|
||||
all_artifacts = apps | cpp_apps | tests | other | wasms | vector_search_validator_bins
|
||||
|
||||
arg_parser = argparse.ArgumentParser('Configure scylla', add_help=False, formatter_class=argparse.ArgumentDefaultsHelpFormatter)
|
||||
arg_parser.add_argument('--out', dest='buildfile', action='store', default='build.ninja',
|
||||
@@ -896,9 +918,6 @@ scylla_core = (['message/messaging_service.cc',
|
||||
'replica/multishard_query.cc',
|
||||
'replica/mutation_dump.cc',
|
||||
'replica/querier.cc',
|
||||
'replica/logstor/segment_manager.cc',
|
||||
'replica/logstor/logstor.cc',
|
||||
'replica/logstor/write_buffer.cc',
|
||||
'mutation/atomic_cell.cc',
|
||||
'mutation/canonical_mutation.cc',
|
||||
'mutation/frozen_mutation.cc',
|
||||
@@ -1177,7 +1196,6 @@ scylla_core = (['message/messaging_service.cc',
|
||||
'utils/gz/crc_combine.cc',
|
||||
'utils/gz/crc_combine_table.cc',
|
||||
'utils/http.cc',
|
||||
'utils/http_client_error_processing.cc',
|
||||
'utils/rest/client.cc',
|
||||
'utils/s3/aws_error.cc',
|
||||
'utils/s3/client.cc',
|
||||
@@ -1195,7 +1213,6 @@ scylla_core = (['message/messaging_service.cc',
|
||||
'utils/azure/identity/default_credentials.cc',
|
||||
'utils/gcp/gcp_credentials.cc',
|
||||
'utils/gcp/object_storage.cc',
|
||||
'utils/gcp/object_storage_retry_strategy.cc',
|
||||
'gms/version_generator.cc',
|
||||
'gms/versioned_value.cc',
|
||||
'gms/gossiper.cc',
|
||||
@@ -1207,7 +1224,6 @@ scylla_core = (['message/messaging_service.cc',
|
||||
'gms/application_state.cc',
|
||||
'gms/inet_address.cc',
|
||||
'dht/i_partitioner.cc',
|
||||
'dht/fixed_shard.cc',
|
||||
'dht/token.cc',
|
||||
'dht/murmur3_partitioner.cc',
|
||||
'dht/boot_strapper.cc',
|
||||
@@ -1243,6 +1259,7 @@ scylla_core = (['message/messaging_service.cc',
|
||||
'service/pager/query_pagers.cc',
|
||||
'service/qos/qos_common.cc',
|
||||
'service/qos/service_level_controller.cc',
|
||||
'service/qos/standard_service_level_distributed_data_accessor.cc',
|
||||
'service/qos/raft_service_level_distributed_data_accessor.cc',
|
||||
'streaming/stream_task.cc',
|
||||
'streaming/stream_session.cc',
|
||||
@@ -1276,10 +1293,11 @@ scylla_core = (['message/messaging_service.cc',
|
||||
'auth/common.cc',
|
||||
'auth/default_authorizer.cc',
|
||||
'auth/resource.cc',
|
||||
'auth/roles-metadata.cc',
|
||||
'auth/passwords.cc',
|
||||
'auth/maintenance_socket_authenticator.cc',
|
||||
'auth/password_authenticator.cc',
|
||||
'auth/permission.cc',
|
||||
'auth/permissions_cache.cc',
|
||||
'auth/service.cc',
|
||||
'auth/standard_role_manager.cc',
|
||||
'auth/ldap_role_manager.cc',
|
||||
@@ -1343,7 +1361,6 @@ scylla_core = (['message/messaging_service.cc',
|
||||
'service/strong_consistency/groups_manager.cc',
|
||||
'service/strong_consistency/coordinator.cc',
|
||||
'service/strong_consistency/state_machine.cc',
|
||||
'service/strong_consistency/raft_groups_storage.cc',
|
||||
'service/raft/group0_state_id_handler.cc',
|
||||
'service/raft/group0_state_machine.cc',
|
||||
'service/raft/group0_state_machine_merger.cc',
|
||||
@@ -1365,6 +1382,7 @@ scylla_core = (['message/messaging_service.cc',
|
||||
'service/topology_state_machine.cc',
|
||||
'service/topology_mutation.cc',
|
||||
'service/topology_coordinator.cc',
|
||||
'node_ops/node_ops_ctl.cc',
|
||||
'node_ops/task_manager_module.cc',
|
||||
'reader_concurrency_semaphore_group.cc',
|
||||
'utils/disk_space_monitor.cc',
|
||||
@@ -1470,7 +1488,6 @@ idls = ['idl/gossip_digest.idl.hh',
|
||||
'idl/query.idl.hh',
|
||||
'idl/idl_test.idl.hh',
|
||||
'idl/commitlog.idl.hh',
|
||||
'idl/logstor.idl.hh',
|
||||
'idl/tracing.idl.hh',
|
||||
'idl/consistency_level.idl.hh',
|
||||
'idl/cache_temperature.idl.hh',
|
||||
@@ -1478,7 +1495,6 @@ idls = ['idl/gossip_digest.idl.hh',
|
||||
'idl/messaging_service.idl.hh',
|
||||
'idl/paxos.idl.hh',
|
||||
'idl/raft.idl.hh',
|
||||
'idl/raft_util.idl.hh',
|
||||
'idl/raft_storage.idl.hh',
|
||||
'idl/group0.idl.hh',
|
||||
'idl/hinted_handoff.idl.hh',
|
||||
@@ -1498,9 +1514,7 @@ idls = ['idl/gossip_digest.idl.hh',
|
||||
'idl/gossip.idl.hh',
|
||||
'idl/migration_manager.idl.hh',
|
||||
"idl/node_ops.idl.hh",
|
||||
"idl/tasks.idl.hh",
|
||||
"idl/client_state.idl.hh",
|
||||
"idl/forward_cql.idl.hh",
|
||||
"idl/tasks.idl.hh"
|
||||
]
|
||||
|
||||
scylla_tests_generic_dependencies = [
|
||||
@@ -1593,7 +1607,6 @@ pure_boost_tests = set([
|
||||
'test/boost/wrapping_interval_test',
|
||||
'test/boost/range_tombstone_list_test',
|
||||
'test/boost/reservoir_sampling_test',
|
||||
'test/boost/rolling_max_tracker_test',
|
||||
'test/boost/serialization_test',
|
||||
'test/boost/small_vector_test',
|
||||
'test/boost/top_k_test',
|
||||
@@ -1654,7 +1667,6 @@ for t in sorted(perf_tests):
|
||||
|
||||
deps['test/boost/combined_tests'] += [
|
||||
'test/boost/aggregate_fcts_test.cc',
|
||||
'test/boost/auth_cache_test.cc',
|
||||
'test/boost/auth_test.cc',
|
||||
'test/boost/batchlog_manager_test.cc',
|
||||
'test/boost/cache_algorithm_test.cc',
|
||||
@@ -1742,7 +1754,6 @@ deps['test/boost/url_parse_test'] = ['utils/http.cc', 'test/boost/url_parse_test
|
||||
deps['test/boost/murmur_hash_test'] = ['bytes.cc', 'utils/murmur_hash.cc', 'test/boost/murmur_hash_test.cc']
|
||||
deps['test/boost/allocation_strategy_test'] = ['test/boost/allocation_strategy_test.cc', 'utils/logalloc.cc', 'utils/dynamic_bitset.cc', 'utils/labels.cc']
|
||||
deps['test/boost/log_heap_test'] = ['test/boost/log_heap_test.cc']
|
||||
deps['test/boost/rolling_max_tracker_test'] = ['test/boost/rolling_max_tracker_test.cc']
|
||||
deps['test/boost/estimated_histogram_test'] = ['test/boost/estimated_histogram_test.cc']
|
||||
deps['test/boost/summary_test'] = ['test/boost/summary_test.cc']
|
||||
deps['test/boost/anchorless_list_test'] = ['test/boost/anchorless_list_test.cc']
|
||||
@@ -2574,10 +2585,11 @@ def write_build_file(f,
|
||||
description = RUST_LIB $out
|
||||
''').format(mode=mode, antlr3_exec=args.antlr3_exec, fmt_lib=fmt_lib, test_repeat=args.test_repeat, test_timeout=args.test_timeout, rustc_wrapper=rustc_wrapper, **modeval))
|
||||
f.write(
|
||||
'build {mode}-build: phony {artifacts} {wasms}\n'.format(
|
||||
'build {mode}-build: phony {artifacts} {wasms} {vector_search_validator_bins}\n'.format(
|
||||
mode=mode,
|
||||
artifacts=str.join(' ', ['$builddir/' + mode + '/' + x for x in sorted(build_artifacts - wasms)]),
|
||||
artifacts=str.join(' ', ['$builddir/' + mode + '/' + x for x in sorted(build_artifacts - wasms - vector_search_validator_bins)]),
|
||||
wasms = str.join(' ', ['$builddir/' + x for x in sorted(build_artifacts & wasms)]),
|
||||
vector_search_validator_bins=str.join(' ', ['$builddir/' + x for x in sorted(build_artifacts & vector_search_validator_bins)]),
|
||||
)
|
||||
)
|
||||
if profile_recipe := modes[mode].get('profile_recipe'):
|
||||
@@ -2607,7 +2619,7 @@ def write_build_file(f,
|
||||
continue
|
||||
profile_dep = modes[mode].get('profile_target', "")
|
||||
|
||||
if binary in other or binary in wasms:
|
||||
if binary in other or binary in wasms or binary in vector_search_validator_bins:
|
||||
continue
|
||||
srcs = deps[binary]
|
||||
# 'scylla'
|
||||
@@ -2718,10 +2730,11 @@ def write_build_file(f,
|
||||
)
|
||||
|
||||
f.write(
|
||||
'build {mode}-test: test.{mode} {test_executables} $builddir/{mode}/scylla {wasms}\n'.format(
|
||||
'build {mode}-test: test.{mode} {test_executables} $builddir/{mode}/scylla {wasms} {vector_search_validator_bins} \n'.format(
|
||||
mode=mode,
|
||||
test_executables=' '.join(['$builddir/{}/{}'.format(mode, binary) for binary in sorted(tests)]),
|
||||
wasms=' '.join([f'$builddir/{binary}' for binary in sorted(wasms)]),
|
||||
vector_search_validator_bins=' '.join([f'$builddir/{binary}' for binary in sorted(vector_search_validator_bins)]),
|
||||
)
|
||||
)
|
||||
f.write(
|
||||
@@ -2889,6 +2902,19 @@ def write_build_file(f,
|
||||
'build compiler-training: phony {}\n'.format(' '.join(['{mode}-compiler-training'.format(mode=mode) for mode in default_modes]))
|
||||
)
|
||||
|
||||
f.write(textwrap.dedent(f'''\
|
||||
rule build-vector-search-validator
|
||||
command = test/vector_search_validator/build-validator $builddir
|
||||
rule build-vector-store
|
||||
command = test/vector_search_validator/build-vector-store $builddir
|
||||
'''))
|
||||
f.write(
|
||||
'build $builddir/{vector_search_validator_bin}: build-vector-search-validator {}\n'.format(' '.join([dep for dep in sorted(vector_search_validator_deps)]), vector_search_validator_bin=vector_search_validator_bin)
|
||||
)
|
||||
f.write(
|
||||
'build $builddir/{vector_store_bin}: build-vector-store {}\n'.format(' '.join([dep for dep in sorted(vector_store_deps)]), vector_store_bin=vector_store_bin)
|
||||
)
|
||||
|
||||
f.write(textwrap.dedent(f'''\
|
||||
build dist-unified-tar: phony {' '.join([f'$builddir/{mode}/dist/tar/{scylla_product}-unified-{scylla_version}-{scylla_release}.{arch}.tar.gz' for mode in default_modes])}
|
||||
build dist-unified: phony dist-unified-tar
|
||||
|
||||
62
cql3/Cql.g
62
cql3/Cql.g
@@ -389,10 +389,8 @@ selectStatement returns [std::unique_ptr<raw::select_statement> expr]
|
||||
bool is_ann_ordering = false;
|
||||
}
|
||||
: K_SELECT (
|
||||
( (K_JSON K_DISTINCT)=> K_JSON { statement_subtype = raw::select_statement::parameters::statement_subtype::JSON; }
|
||||
| (K_JSON selectClause K_FROM)=> K_JSON { statement_subtype = raw::select_statement::parameters::statement_subtype::JSON; }
|
||||
)?
|
||||
( (K_DISTINCT selectClause K_FROM)=> K_DISTINCT { is_distinct = true; } )?
|
||||
( K_JSON { statement_subtype = raw::select_statement::parameters::statement_subtype::JSON; } )?
|
||||
( K_DISTINCT { is_distinct = true; } )?
|
||||
sclause=selectClause
|
||||
)
|
||||
K_FROM (
|
||||
@@ -427,13 +425,13 @@ selector returns [shared_ptr<raw_selector> s]
|
||||
|
||||
unaliasedSelector returns [uexpression tmp]
|
||||
: ( c=cident { tmp = unresolved_identifier{std::move(c)}; }
|
||||
| v=value { tmp = std::move(v); }
|
||||
| K_COUNT '(' countArgument ')' { tmp = make_count_rows_function_expression(); }
|
||||
| K_WRITETIME '(' c=cident ')' { tmp = column_mutation_attribute{column_mutation_attribute::attribute_kind::writetime,
|
||||
unresolved_identifier{std::move(c)}}; }
|
||||
| K_TTL '(' c=cident ')' { tmp = column_mutation_attribute{column_mutation_attribute::attribute_kind::ttl,
|
||||
unresolved_identifier{std::move(c)}}; }
|
||||
| f=functionName args=selectionFunctionArgs { tmp = function_call{std::move(f), std::move(args)}; }
|
||||
| f=similarityFunctionName args=vectorSimilarityArgs { tmp = function_call{std::move(f), std::move(args)}; }
|
||||
| K_CAST '(' arg=unaliasedSelector K_AS t=native_type ')' { tmp = cast{.style = cast::cast_style::sql, .arg = std::move(arg), .type = std::move(t)}; }
|
||||
)
|
||||
( '.' fi=cident { tmp = field_selection{std::move(tmp), std::move(fi)}; }
|
||||
@@ -448,9 +446,23 @@ selectionFunctionArgs returns [std::vector<expression> a]
|
||||
')'
|
||||
;
|
||||
|
||||
vectorSimilarityArgs returns [std::vector<expression> a]
|
||||
: '(' ')'
|
||||
| '(' v1=vectorSimilarityArg { a.push_back(std::move(v1)); }
|
||||
( ',' vn=vectorSimilarityArg { a.push_back(std::move(vn)); } )*
|
||||
')'
|
||||
;
|
||||
|
||||
vectorSimilarityArg returns [uexpression a]
|
||||
: s=unaliasedSelector { a = std::move(s); }
|
||||
| v=value { a = std::move(v); }
|
||||
;
|
||||
|
||||
countArgument
|
||||
: '*'
|
||||
/* COUNT(1) is also allowed, it is recognized via the general function(args) path */
|
||||
| i=INTEGER { if (i->getText() != "1") {
|
||||
add_recognition_error("Only COUNT(1) is supported, got COUNT(" + i->getText() + ")");
|
||||
} }
|
||||
;
|
||||
|
||||
whereClause returns [uexpression clause]
|
||||
@@ -874,8 +886,8 @@ cfamDefinition[cql3::statements::create_table_statement::raw_statement& expr]
|
||||
;
|
||||
|
||||
cfamColumns[cql3::statements::create_table_statement::raw_statement& expr]
|
||||
@init { bool is_static=false, is_ttl=false; }
|
||||
: k=ident v=comparatorType (K_TTL {is_ttl = true;})? (K_STATIC {is_static = true;})? { $expr.add_definition(k, v, is_static, is_ttl); }
|
||||
@init { bool is_static=false; }
|
||||
: k=ident v=comparatorType (K_STATIC {is_static = true;})? { $expr.add_definition(k, v, is_static); }
|
||||
(K_PRIMARY K_KEY { $expr.add_key_aliases(std::vector<shared_ptr<cql3::column_identifier>>{k}); })?
|
||||
| K_PRIMARY K_KEY '(' pkDef[expr] (',' c=ident { $expr.add_column_alias(c); } )* ')'
|
||||
;
|
||||
@@ -1042,7 +1054,6 @@ alterTableStatement returns [std::unique_ptr<alter_table_statement::raw_statemen
|
||||
std::vector<alter_table_statement::column_change> column_changes;
|
||||
std::vector<std::pair<shared_ptr<cql3::column_identifier::raw>, shared_ptr<cql3::column_identifier::raw>>> renames;
|
||||
auto attrs = std::make_unique<cql3::attributes::raw>();
|
||||
shared_ptr<cql3::column_identifier::raw> ttl_change;
|
||||
}
|
||||
: K_ALTER K_COLUMNFAMILY cf=columnFamilyName
|
||||
( K_ALTER id=cident K_TYPE v=comparatorType { type = alter_table_statement::type::alter; column_changes.emplace_back(alter_table_statement::column_change{id, v}); }
|
||||
@@ -1061,11 +1072,9 @@ alterTableStatement returns [std::unique_ptr<alter_table_statement::raw_statemen
|
||||
| K_RENAME { type = alter_table_statement::type::rename; }
|
||||
id1=cident K_TO toId1=cident { renames.emplace_back(id1, toId1); }
|
||||
( K_AND idn=cident K_TO toIdn=cident { renames.emplace_back(idn, toIdn); } )*
|
||||
| K_TTL { type = alter_table_statement::type::ttl; }
|
||||
( id=cident { ttl_change = id; } | K_NULL )
|
||||
)
|
||||
{
|
||||
$expr = std::make_unique<alter_table_statement::raw_statement>(std::move(cf), type, std::move(column_changes), std::move(props), std::move(renames), std::move(attrs), std::move(ttl_change));
|
||||
$expr = std::make_unique<alter_table_statement::raw_statement>(std::move(cf), type, std::move(column_changes), std::move(props), std::move(renames), std::move(attrs));
|
||||
}
|
||||
;
|
||||
|
||||
@@ -1697,6 +1706,10 @@ functionName returns [cql3::functions::function_name s]
|
||||
: (ks=keyspaceName '.')? f=allowedFunctionName { $s.keyspace = std::move(ks); $s.name = std::move(f); }
|
||||
;
|
||||
|
||||
similarityFunctionName returns [cql3::functions::function_name s]
|
||||
: f=allowedSimilarityFunctionName { $s = cql3::functions::function_name::native_function(std::move(f)); }
|
||||
;
|
||||
|
||||
allowedFunctionName returns [sstring s]
|
||||
: f=IDENT { $s = $f.text; std::transform(s.begin(), s.end(), s.begin(), ::tolower); }
|
||||
| f=QUOTED_NAME { $s = $f.text; }
|
||||
@@ -1705,6 +1718,11 @@ allowedFunctionName returns [sstring s]
|
||||
| K_COUNT { $s = "count"; }
|
||||
;
|
||||
|
||||
allowedSimilarityFunctionName returns [sstring s]
|
||||
: f=(K_SIMILARITY_COSINE | K_SIMILARITY_EUCLIDEAN | K_SIMILARITY_DOT_PRODUCT)
|
||||
{ $s = $f.text; std::transform(s.begin(), s.end(), s.begin(), ::tolower); }
|
||||
;
|
||||
|
||||
functionArgs returns [std::vector<expression> a]
|
||||
: '(' ')'
|
||||
| '(' t1=term { a.push_back(std::move(t1)); }
|
||||
@@ -2074,21 +2092,7 @@ vector_type returns [shared_ptr<cql3::cql3_type::raw> pt]
|
||||
{
|
||||
if ($d.text[0] == '-')
|
||||
throw exceptions::invalid_request_exception("Vectors must have a dimension greater than 0");
|
||||
unsigned long parsed_dimension;
|
||||
try {
|
||||
parsed_dimension = std::stoul($d.text);
|
||||
} catch (const std::exception& e) {
|
||||
throw exceptions::invalid_request_exception(format("Invalid vector dimension: {}", $d.text));
|
||||
}
|
||||
static_assert(sizeof(unsigned long) >= sizeof(vector_dimension_t));
|
||||
if (parsed_dimension == 0) {
|
||||
throw exceptions::invalid_request_exception("Vectors must have a dimension greater than 0");
|
||||
}
|
||||
if (parsed_dimension > cql3::cql3_type::MAX_VECTOR_DIMENSION) {
|
||||
throw exceptions::invalid_request_exception(
|
||||
format("Vectors must have a dimension less than or equal to {}", cql3::cql3_type::MAX_VECTOR_DIMENSION));
|
||||
}
|
||||
$pt = cql3::cql3_type::raw::vector(t, static_cast<vector_dimension_t>(parsed_dimension));
|
||||
$pt = cql3::cql3_type::raw::vector(t, std::stoul($d.text));
|
||||
}
|
||||
;
|
||||
|
||||
@@ -2415,6 +2419,10 @@ K_MUTATION_FRAGMENTS: M U T A T I O N '_' F R A G M E N T S;
|
||||
|
||||
K_VECTOR_SEARCH_INDEXING: V E C T O R '_' S E A R C H '_' I N D E X I N G;
|
||||
|
||||
K_SIMILARITY_EUCLIDEAN: S I M I L A R I T Y '_' E U C L I D E A N;
|
||||
K_SIMILARITY_COSINE: S I M I L A R I T Y '_' C O S I N E;
|
||||
K_SIMILARITY_DOT_PRODUCT: S I M I L A R I T Y '_' D O T '_' P R O D U C T;
|
||||
|
||||
// Case-insensitive alpha characters
|
||||
fragment A: ('a'|'A');
|
||||
fragment B: ('b'|'B');
|
||||
|
||||
@@ -27,7 +27,7 @@ public:
|
||||
|
||||
struct vector_test_result {
|
||||
test_result result;
|
||||
std::optional<vector_dimension_t> dimension_opt;
|
||||
std::optional<size_t> dimension_opt;
|
||||
};
|
||||
|
||||
static bool is_assignable(test_result tr) {
|
||||
|
||||
@@ -23,7 +23,7 @@ column_specification::column_specification(std::string_view ks_name_, std::strin
|
||||
|
||||
bool column_specification::all_in_same_table(const std::vector<lw_shared_ptr<column_specification>>& names)
|
||||
{
|
||||
throwing_assert(!names.empty());
|
||||
SCYLLA_ASSERT(!names.empty());
|
||||
|
||||
auto first = names.front();
|
||||
return std::all_of(std::next(names.begin()), names.end(), [first] (auto&& spec) {
|
||||
|
||||
@@ -49,9 +49,9 @@ static cql3_type::kind get_cql3_kind(const abstract_type& t) {
|
||||
cql3_type::kind operator()(const uuid_type_impl&) { return cql3_type::kind::UUID; }
|
||||
cql3_type::kind operator()(const varint_type_impl&) { return cql3_type::kind::VARINT; }
|
||||
cql3_type::kind operator()(const reversed_type_impl& r) { return get_cql3_kind(*r.underlying_type()); }
|
||||
cql3_type::kind operator()(const tuple_type_impl&) { throwing_assert(0 && "no kind for this type"); }
|
||||
cql3_type::kind operator()(const vector_type_impl&) { throwing_assert(0 && "no kind for this type"); }
|
||||
cql3_type::kind operator()(const collection_type_impl&) { throwing_assert(0 && "no kind for this type"); }
|
||||
cql3_type::kind operator()(const tuple_type_impl&) { SCYLLA_ASSERT(0 && "no kind for this type"); }
|
||||
cql3_type::kind operator()(const vector_type_impl&) { SCYLLA_ASSERT(0 && "no kind for this type"); }
|
||||
cql3_type::kind operator()(const collection_type_impl&) { SCYLLA_ASSERT(0 && "no kind for this type"); }
|
||||
};
|
||||
return visit(t, visitor{});
|
||||
}
|
||||
@@ -124,7 +124,7 @@ class cql3_type::raw_collection : public raw {
|
||||
} else if (_kind == abstract_type::kind::map) {
|
||||
return format("{}map<{}, {}>{}", start, _keys, _values, end);
|
||||
}
|
||||
throwing_assert(0 && "invalid raw_collection kind");
|
||||
abort();
|
||||
}
|
||||
public:
|
||||
raw_collection(const abstract_type::kind kind, shared_ptr<raw> keys, shared_ptr<raw> values)
|
||||
@@ -150,7 +150,7 @@ public:
|
||||
}
|
||||
|
||||
virtual cql3_type prepare_internal(const sstring& keyspace, const data_dictionary::user_types_metadata& user_types) override {
|
||||
throwing_assert(_values); // "Got null values type for a collection";
|
||||
SCYLLA_ASSERT(_values); // "Got null values type for a collection";
|
||||
|
||||
if (_values->is_counter()) {
|
||||
throw exceptions::invalid_request_exception(format("Counters are not allowed inside collections: {}", *this));
|
||||
@@ -190,7 +190,7 @@ private:
|
||||
}
|
||||
return cql3_type(set_type_impl::get_instance(_values->prepare_internal(keyspace, user_types).get_type(), !is_frozen()));
|
||||
} else if (_kind == abstract_type::kind::map) {
|
||||
throwing_assert(_keys); // "Got null keys type for a collection";
|
||||
SCYLLA_ASSERT(_keys); // "Got null keys type for a collection";
|
||||
if (_keys->is_duration()) {
|
||||
throw exceptions::invalid_request_exception(format("Durations are not allowed as map keys: {}", *this));
|
||||
}
|
||||
@@ -198,7 +198,7 @@ private:
|
||||
_values->prepare_internal(keyspace, user_types).get_type(),
|
||||
!is_frozen()));
|
||||
}
|
||||
throwing_assert(0 && "do_prepare invalid kind");
|
||||
abort();
|
||||
}
|
||||
};
|
||||
|
||||
@@ -307,14 +307,17 @@ public:
|
||||
|
||||
class cql3_type::raw_vector : public raw {
|
||||
shared_ptr<raw> _type;
|
||||
vector_dimension_t _dimension;
|
||||
size_t _dimension;
|
||||
|
||||
// This limitation is acquired from the maximum number of dimensions in OpenSearch.
|
||||
static constexpr size_t MAX_VECTOR_DIMENSION = 16000;
|
||||
|
||||
virtual sstring to_string() const override {
|
||||
return seastar::format("vector<{}, {}>", _type, _dimension);
|
||||
}
|
||||
|
||||
public:
|
||||
raw_vector(shared_ptr<raw> type, vector_dimension_t dimension)
|
||||
raw_vector(shared_ptr<raw> type, size_t dimension)
|
||||
: _type(std::move(type)), _dimension(dimension) {
|
||||
}
|
||||
|
||||
@@ -414,7 +417,7 @@ cql3_type::raw::tuple(std::vector<shared_ptr<raw>> ts) {
|
||||
}
|
||||
|
||||
shared_ptr<cql3_type::raw>
|
||||
cql3_type::raw::vector(shared_ptr<raw> t, vector_dimension_t dimension) {
|
||||
cql3_type::raw::vector(shared_ptr<raw> t, size_t dimension) {
|
||||
return ::make_shared<raw_vector>(std::move(t), dimension);
|
||||
}
|
||||
|
||||
|
||||
@@ -39,9 +39,6 @@ public:
|
||||
data_type get_type() const { return _type; }
|
||||
const sstring& to_string() const { return _type->cql3_type_name(); }
|
||||
|
||||
// This limitation is acquired from the maximum number of dimensions in OpenSearch.
|
||||
static constexpr vector_dimension_t MAX_VECTOR_DIMENSION = 16000;
|
||||
|
||||
// For UserTypes, we need to know the current keyspace to resolve the
|
||||
// actual type used, so Raw is a "not yet prepared" CQL3Type.
|
||||
class raw {
|
||||
@@ -67,7 +64,7 @@ public:
|
||||
static shared_ptr<raw> list(shared_ptr<raw> t);
|
||||
static shared_ptr<raw> set(shared_ptr<raw> t);
|
||||
static shared_ptr<raw> tuple(std::vector<shared_ptr<raw>> ts);
|
||||
static shared_ptr<raw> vector(shared_ptr<raw> t, vector_dimension_t dimension);
|
||||
static shared_ptr<raw> vector(shared_ptr<raw> t, size_t dimension);
|
||||
static shared_ptr<raw> frozen(shared_ptr<raw> t);
|
||||
friend sstring format_as(const raw& r) {
|
||||
return r.to_string();
|
||||
|
||||
@@ -1603,7 +1603,7 @@ static cql3::raw_value do_evaluate(const collection_constructor& collection, con
|
||||
case collection_constructor::style_type::vector:
|
||||
return evaluate_vector(collection, inputs);
|
||||
}
|
||||
throwing_assert(0 && "do_evaluate invalid style");
|
||||
std::abort();
|
||||
}
|
||||
|
||||
static cql3::raw_value do_evaluate(const usertype_constructor& user_val, const evaluation_inputs& inputs) {
|
||||
|
||||
@@ -10,7 +10,6 @@
|
||||
#include "expr-utils.hh"
|
||||
#include "evaluate.hh"
|
||||
#include "cql3/functions/functions.hh"
|
||||
#include "cql3/functions/aggregate_fcts.hh"
|
||||
#include "cql3/functions/castas_fcts.hh"
|
||||
#include "cql3/functions/scalar_function.hh"
|
||||
#include "cql3/column_identifier.hh"
|
||||
@@ -502,8 +501,8 @@ vector_validate_assignable_to(const collection_constructor& c, data_dictionary::
|
||||
throw exceptions::invalid_request_exception(format("Invalid vector type literal for {} of type {}", *receiver.name, receiver.type->as_cql3_type()));
|
||||
}
|
||||
|
||||
vector_dimension_t expected_size = vt->get_dimension();
|
||||
if (expected_size == 0) {
|
||||
size_t expected_size = vt->get_dimension();
|
||||
if (!expected_size) {
|
||||
throw exceptions::invalid_request_exception(format("Invalid vector type literal for {}: type {} expects at least one element",
|
||||
*receiver.name, receiver.type->as_cql3_type()));
|
||||
}
|
||||
@@ -876,7 +875,7 @@ cast_test_assignment(const cast& c, data_dictionary::database db, const sstring&
|
||||
return assignment_testable::test_result::NOT_ASSIGNABLE;
|
||||
}
|
||||
} catch (exceptions::invalid_request_exception& e) {
|
||||
throwing_assert(0 && "cast_test_assignment exception");
|
||||
abort();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1048,47 +1047,8 @@ prepare_function_args_for_type_inference(std::span<const expression> args, data_
|
||||
return partially_prepared_args;
|
||||
}
|
||||
|
||||
// Special case for count(1) - recognize it as the countRows() function. Note it is quite
|
||||
// artificial and we might relax it to the more general count(expression) later.
|
||||
static
|
||||
std::optional<expression>
|
||||
try_prepare_count_rows(const expr::function_call& fc, data_dictionary::database db, const sstring& keyspace, const schema* schema_opt, lw_shared_ptr<column_specification> receiver) {
|
||||
return std::visit(overloaded_functor{
|
||||
[&] (const functions::function_name& name) -> std::optional<expression> {
|
||||
auto native_name = name;
|
||||
if (!native_name.has_keyspace()) {
|
||||
native_name = name.as_native_function();
|
||||
}
|
||||
// Collapse count(1) into countRows()
|
||||
if (native_name == functions::function_name::native_function("count")) {
|
||||
if (fc.args.size() == 1) {
|
||||
if (auto uc_arg = expr::as_if<expr::untyped_constant>(&fc.args[0])) {
|
||||
if (uc_arg->partial_type == expr::untyped_constant::type_class::integer
|
||||
&& uc_arg->raw_text == "1") {
|
||||
return expr::function_call{
|
||||
.func = functions::aggregate_fcts::make_count_rows_function(),
|
||||
.args = {},
|
||||
};
|
||||
} else {
|
||||
throw exceptions::invalid_request_exception(format("count() expects a column or the literal 1 as an argument", fc.args[0]));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return std::nullopt;
|
||||
},
|
||||
[] (const shared_ptr<functions::function>&) -> std::optional<expression> {
|
||||
// Already prepared, nothing to do
|
||||
return std::nullopt;
|
||||
},
|
||||
}, fc.func);
|
||||
}
|
||||
|
||||
std::optional<expression>
|
||||
prepare_function_call(const expr::function_call& fc, data_dictionary::database db, const sstring& keyspace, const schema* schema_opt, lw_shared_ptr<column_specification> receiver) {
|
||||
if (auto prepared = try_prepare_count_rows(fc, db, keyspace, schema_opt, receiver)) {
|
||||
return prepared;
|
||||
}
|
||||
// Try to extract a column family name from the available information.
|
||||
// Most functions can be prepared without information about the column family, usually just the keyspace is enough.
|
||||
// One exception is the token() function - in order to prepare system.token() we have to know the partition key of the table,
|
||||
|
||||
@@ -544,7 +544,7 @@ functions::get_user_aggregates(const sstring& keyspace) const {
|
||||
|
||||
std::ranges::subrange<functions::declared_t::const_iterator>
|
||||
functions::find(const function_name& name) const {
|
||||
throwing_assert(name.has_keyspace()); // : "function name not fully qualified";
|
||||
SCYLLA_ASSERT(name.has_keyspace()); // : "function name not fully qualified";
|
||||
auto pair = _declared.equal_range(name);
|
||||
return std::ranges::subrange(pair.first, pair.second);
|
||||
}
|
||||
|
||||
@@ -10,38 +10,9 @@
|
||||
#include "types/types.hh"
|
||||
#include "types/vector.hh"
|
||||
#include "exceptions/exceptions.hh"
|
||||
#include <bit>
|
||||
#include <span>
|
||||
#include <seastar/core/byteorder.hh>
|
||||
|
||||
namespace cql3 {
|
||||
namespace functions {
|
||||
|
||||
namespace detail {
|
||||
|
||||
std::vector<float> extract_float_vector(const bytes_opt& param, vector_dimension_t dimension) {
|
||||
if (!param) {
|
||||
throw exceptions::invalid_request_exception("Cannot extract float vector from null parameter");
|
||||
}
|
||||
|
||||
const size_t expected_size = dimension * sizeof(float);
|
||||
if (param->size() != expected_size) {
|
||||
throw exceptions::invalid_request_exception(
|
||||
fmt::format("Invalid vector size: expected {} bytes for {} floats, got {} bytes",
|
||||
expected_size, dimension, param->size()));
|
||||
}
|
||||
|
||||
std::vector<float> result(dimension);
|
||||
const char* p = reinterpret_cast<const char*>(param->data());
|
||||
for (size_t i = 0; i < dimension; ++i) {
|
||||
result[i] = std::bit_cast<float>(consume_be<uint32_t>(p));
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
} // namespace detail
|
||||
|
||||
namespace {
|
||||
|
||||
// The computations of similarity scores match the exact formulas of Cassandra's (jVector's) implementation to ensure compatibility.
|
||||
@@ -51,15 +22,14 @@ namespace {
|
||||
|
||||
// You should only use this function if you need to preserve the original vectors and cannot normalize
|
||||
// them in advance.
|
||||
float compute_cosine_similarity(std::span<const float> v1, std::span<const float> v2) {
|
||||
#pragma clang fp contract(fast) reassociate(on) // Allow the compiler to optimize the loop.
|
||||
float dot_product = 0.0;
|
||||
float squared_norm_a = 0.0;
|
||||
float squared_norm_b = 0.0;
|
||||
float compute_cosine_similarity(const std::vector<data_value>& v1, const std::vector<data_value>& v2) {
|
||||
double dot_product = 0.0;
|
||||
double squared_norm_a = 0.0;
|
||||
double squared_norm_b = 0.0;
|
||||
|
||||
for (size_t i = 0; i < v1.size(); ++i) {
|
||||
float a = v1[i];
|
||||
float b = v2[i];
|
||||
double a = value_cast<float>(v1[i]);
|
||||
double b = value_cast<float>(v2[i]);
|
||||
|
||||
dot_product += a * b;
|
||||
squared_norm_a += a * a;
|
||||
@@ -67,7 +37,7 @@ float compute_cosine_similarity(std::span<const float> v1, std::span<const float
|
||||
}
|
||||
|
||||
if (squared_norm_a == 0 || squared_norm_b == 0) {
|
||||
return std::numeric_limits<float>::quiet_NaN();
|
||||
throw exceptions::invalid_request_exception("Function system.similarity_cosine doesn't support all-zero vectors");
|
||||
}
|
||||
|
||||
// The cosine similarity is in the range [-1, 1].
|
||||
@@ -76,15 +46,14 @@ float compute_cosine_similarity(std::span<const float> v1, std::span<const float
|
||||
return (1 + (dot_product / (std::sqrt(squared_norm_a * squared_norm_b)))) / 2;
|
||||
}
|
||||
|
||||
float compute_euclidean_similarity(std::span<const float> v1, std::span<const float> v2) {
|
||||
#pragma clang fp contract(fast) reassociate(on) // Allow the compiler to optimize the loop.
|
||||
float sum = 0.0;
|
||||
float compute_euclidean_similarity(const std::vector<data_value>& v1, const std::vector<data_value>& v2) {
|
||||
double sum = 0.0;
|
||||
|
||||
for (size_t i = 0; i < v1.size(); ++i) {
|
||||
float a = v1[i];
|
||||
float b = v2[i];
|
||||
double a = value_cast<float>(v1[i]);
|
||||
double b = value_cast<float>(v2[i]);
|
||||
|
||||
float diff = a - b;
|
||||
double diff = a - b;
|
||||
sum += diff * diff;
|
||||
}
|
||||
|
||||
@@ -96,13 +65,12 @@ float compute_euclidean_similarity(std::span<const float> v1, std::span<const fl
|
||||
|
||||
// Assumes that both vectors are L2-normalized.
|
||||
// This similarity is intended as an optimized way to perform cosine similarity calculation.
|
||||
float compute_dot_product_similarity(std::span<const float> v1, std::span<const float> v2) {
|
||||
#pragma clang fp contract(fast) reassociate(on) // Allow the compiler to optimize the loop.
|
||||
float dot_product = 0.0;
|
||||
float compute_dot_product_similarity(const std::vector<data_value>& v1, const std::vector<data_value>& v2) {
|
||||
double dot_product = 0.0;
|
||||
|
||||
for (size_t i = 0; i < v1.size(); ++i) {
|
||||
float a = v1[i];
|
||||
float b = v2[i];
|
||||
double a = value_cast<float>(v1[i]);
|
||||
double b = value_cast<float>(v2[i]);
|
||||
dot_product += a * b;
|
||||
}
|
||||
|
||||
@@ -156,7 +124,7 @@ std::vector<data_type> retrieve_vector_arg_types(const function_name& name, cons
|
||||
}
|
||||
}
|
||||
|
||||
vector_dimension_t dimension = first_dim_opt ? *first_dim_opt : *second_dim_opt;
|
||||
size_t dimension = first_dim_opt ? *first_dim_opt : *second_dim_opt;
|
||||
auto type = vector_type_impl::get_instance(float_type, dimension);
|
||||
return {type, type};
|
||||
}
|
||||
@@ -168,15 +136,13 @@ bytes_opt vector_similarity_fct::execute(std::span<const bytes_opt> parameters)
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
// Extract dimension from the vector type
|
||||
const auto& type = static_cast<const vector_type_impl&>(*arg_types()[0]);
|
||||
vector_dimension_t dimension = type.get_dimension();
|
||||
const auto& type = arg_types()[0];
|
||||
data_value v1 = type->deserialize(*parameters[0]);
|
||||
data_value v2 = type->deserialize(*parameters[1]);
|
||||
const auto& v1_elements = value_cast<std::vector<data_value>>(v1);
|
||||
const auto& v2_elements = value_cast<std::vector<data_value>>(v2);
|
||||
|
||||
// Optimized path: extract floats directly from bytes, bypassing data_value overhead
|
||||
std::vector<float> v1 = detail::extract_float_vector(parameters[0], dimension);
|
||||
std::vector<float> v2 = detail::extract_float_vector(parameters[1], dimension);
|
||||
|
||||
float result = SIMILARITY_FUNCTIONS.at(_name)(v1, v2);
|
||||
float result = SIMILARITY_FUNCTIONS.at(_name)(v1_elements, v2_elements);
|
||||
return float_type->decompose(result);
|
||||
}
|
||||
|
||||
|
||||
@@ -11,7 +11,6 @@
|
||||
#include "native_scalar_function.hh"
|
||||
#include "cql3/assignment_testable.hh"
|
||||
#include "cql3/functions/function_name.hh"
|
||||
#include <span>
|
||||
|
||||
namespace cql3 {
|
||||
namespace functions {
|
||||
@@ -20,7 +19,7 @@ static const function_name SIMILARITY_COSINE_FUNCTION_NAME = function_name::nati
|
||||
static const function_name SIMILARITY_EUCLIDEAN_FUNCTION_NAME = function_name::native_function("similarity_euclidean");
|
||||
static const function_name SIMILARITY_DOT_PRODUCT_FUNCTION_NAME = function_name::native_function("similarity_dot_product");
|
||||
|
||||
using similarity_function_t = float (*)(std::span<const float>, std::span<const float>);
|
||||
using similarity_function_t = float (*)(const std::vector<data_value>&, const std::vector<data_value>&);
|
||||
extern thread_local const std::unordered_map<function_name, similarity_function_t> SIMILARITY_FUNCTIONS;
|
||||
|
||||
std::vector<data_type> retrieve_vector_arg_types(const function_name& name, const std::vector<shared_ptr<assignment_testable>>& provided_args);
|
||||
@@ -34,14 +33,5 @@ public:
|
||||
virtual bytes_opt execute(std::span<const bytes_opt> parameters) override;
|
||||
};
|
||||
|
||||
namespace detail {
|
||||
|
||||
// Extract float vector directly from serialized bytes, bypassing data_value overhead.
|
||||
// This is an internal API exposed for testing purposes.
|
||||
// Vector<float, N> wire format: N floats as big-endian uint32_t values, 4 bytes each.
|
||||
std::vector<float> extract_float_vector(const bytes_opt& param, vector_dimension_t dimension);
|
||||
|
||||
} // namespace detail
|
||||
|
||||
} // namespace functions
|
||||
} // namespace cql3
|
||||
|
||||
@@ -25,7 +25,7 @@ bool keyspace_element_name::has_keyspace() const
|
||||
|
||||
const sstring& keyspace_element_name::get_keyspace() const
|
||||
{
|
||||
throwing_assert(_ks_name);
|
||||
SCYLLA_ASSERT(_ks_name);
|
||||
return *_ks_name;
|
||||
}
|
||||
|
||||
|
||||
@@ -62,7 +62,7 @@ lists::setter_by_index::fill_prepare_context(prepare_context& ctx) {
|
||||
void
|
||||
lists::setter_by_index::execute(mutation& m, const clustering_key_prefix& prefix, const update_parameters& params) {
|
||||
// we should not get here for frozen lists
|
||||
throwing_assert(column.type->is_multi_cell()); // "Attempted to set an individual element on a frozen list";
|
||||
SCYLLA_ASSERT(column.type->is_multi_cell()); // "Attempted to set an individual element on a frozen list";
|
||||
|
||||
auto index = expr::evaluate(_idx, params._options);
|
||||
if (index.is_null()) {
|
||||
@@ -105,7 +105,7 @@ lists::setter_by_uuid::requires_read() const {
|
||||
void
|
||||
lists::setter_by_uuid::execute(mutation& m, const clustering_key_prefix& prefix, const update_parameters& params) {
|
||||
// we should not get here for frozen lists
|
||||
throwing_assert(column.type->is_multi_cell()); // "Attempted to set an individual element on a frozen list";
|
||||
SCYLLA_ASSERT(column.type->is_multi_cell()); // "Attempted to set an individual element on a frozen list";
|
||||
|
||||
auto index = expr::evaluate(_idx, params._options);
|
||||
auto value = expr::evaluate(*_e, params._options);
|
||||
@@ -133,7 +133,7 @@ lists::setter_by_uuid::execute(mutation& m, const clustering_key_prefix& prefix,
|
||||
void
|
||||
lists::appender::execute(mutation& m, const clustering_key_prefix& prefix, const update_parameters& params) {
|
||||
const cql3::raw_value value = expr::evaluate(*_e, params._options);
|
||||
throwing_assert(column.type->is_multi_cell()); // "Attempted to append to a frozen list";
|
||||
SCYLLA_ASSERT(column.type->is_multi_cell()); // "Attempted to append to a frozen list";
|
||||
do_append(value, m, prefix, column, params);
|
||||
}
|
||||
|
||||
@@ -189,7 +189,7 @@ lists::do_append(const cql3::raw_value& list_value,
|
||||
|
||||
void
|
||||
lists::prepender::execute(mutation& m, const clustering_key_prefix& prefix, const update_parameters& params) {
|
||||
throwing_assert(column.type->is_multi_cell()); // "Attempted to prepend to a frozen list";
|
||||
SCYLLA_ASSERT(column.type->is_multi_cell()); // "Attempted to prepend to a frozen list";
|
||||
cql3::raw_value lvalue = expr::evaluate(*_e, params._options);
|
||||
if (lvalue.is_null()) {
|
||||
return;
|
||||
@@ -244,7 +244,7 @@ lists::discarder::requires_read() const {
|
||||
|
||||
void
|
||||
lists::discarder::execute(mutation& m, const clustering_key_prefix& prefix, const update_parameters& params) {
|
||||
throwing_assert(column.type->is_multi_cell()); // "Attempted to delete from a frozen list";
|
||||
SCYLLA_ASSERT(column.type->is_multi_cell()); // "Attempted to delete from a frozen list";
|
||||
|
||||
auto&& existing_list = params.get_prefetched_list(m.key(), prefix, column);
|
||||
// We want to call bind before possibly returning to reject queries where the value provided is not a list.
|
||||
@@ -300,7 +300,7 @@ lists::discarder_by_index::requires_read() const {
|
||||
|
||||
void
|
||||
lists::discarder_by_index::execute(mutation& m, const clustering_key_prefix& prefix, const update_parameters& params) {
|
||||
throwing_assert(column.type->is_multi_cell()); // "Attempted to delete an item by index from a frozen list";
|
||||
SCYLLA_ASSERT(column.type->is_multi_cell()); // "Attempted to delete an item by index from a frozen list";
|
||||
cql3::raw_value index = expr::evaluate(*_e, params._options);
|
||||
if (index.is_null()) {
|
||||
throw exceptions::invalid_request_exception("Invalid null value for list index");
|
||||
|
||||
@@ -45,7 +45,7 @@ maps::setter_by_key::fill_prepare_context(prepare_context& ctx) {
|
||||
void
|
||||
maps::setter_by_key::execute(mutation& m, const clustering_key_prefix& prefix, const update_parameters& params) {
|
||||
using exceptions::invalid_request_exception;
|
||||
throwing_assert(column.type->is_multi_cell()); // "Attempted to set a value for a single key on a frozen map"m
|
||||
SCYLLA_ASSERT(column.type->is_multi_cell()); // "Attempted to set a value for a single key on a frozen map"m
|
||||
auto key = expr::evaluate(_k, params._options);
|
||||
auto value = expr::evaluate(*_e, params._options);
|
||||
if (key.is_null()) {
|
||||
@@ -63,7 +63,7 @@ maps::setter_by_key::execute(mutation& m, const clustering_key_prefix& prefix, c
|
||||
|
||||
void
|
||||
maps::putter::execute(mutation& m, const clustering_key_prefix& prefix, const update_parameters& params) {
|
||||
throwing_assert(column.type->is_multi_cell()); // "Attempted to add items to a frozen map";
|
||||
SCYLLA_ASSERT(column.type->is_multi_cell()); // "Attempted to add items to a frozen map";
|
||||
cql3::raw_value value = expr::evaluate(*_e, params._options);
|
||||
do_put(m, prefix, params, value, column);
|
||||
}
|
||||
@@ -96,7 +96,7 @@ maps::do_put(mutation& m, const clustering_key_prefix& prefix, const update_para
|
||||
|
||||
void
|
||||
maps::discarder_by_key::execute(mutation& m, const clustering_key_prefix& prefix, const update_parameters& params) {
|
||||
throwing_assert(column.type->is_multi_cell()); // "Attempted to delete a single key in a frozen map";
|
||||
SCYLLA_ASSERT(column.type->is_multi_cell()); // "Attempted to delete a single key in a frozen map";
|
||||
cql3::raw_value key = expr::evaluate(*_e, params._options);
|
||||
if (key.is_null()) {
|
||||
throw exceptions::invalid_request_exception("Invalid null map key");
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user