mirror of
https://github.com/scylladb/scylladb.git
synced 2026-05-02 06:05:53 +00:00
Compare commits
2 Commits
copilot/do
...
copilot/co
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
9e806cb3f7 | ||
|
|
f267af38bd |
53
.github/workflows/call_backport_with_jira.yaml
vendored
53
.github/workflows/call_backport_with_jira.yaml
vendored
@@ -1,53 +0,0 @@
|
|||||||
name: Backport with Jira Integration
|
|
||||||
|
|
||||||
on:
|
|
||||||
push:
|
|
||||||
branches:
|
|
||||||
- master
|
|
||||||
- next-*.*
|
|
||||||
- branch-*.*
|
|
||||||
pull_request_target:
|
|
||||||
types: [labeled, closed]
|
|
||||||
branches:
|
|
||||||
- master
|
|
||||||
- next
|
|
||||||
- next-*.*
|
|
||||||
- branch-*.*
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
backport-on-push:
|
|
||||||
if: github.event_name == 'push'
|
|
||||||
uses: scylladb/github-automation/.github/workflows/backport-with-jira.yaml@main
|
|
||||||
with:
|
|
||||||
event_type: 'push'
|
|
||||||
base_branch: ${{ github.ref }}
|
|
||||||
commits: ${{ github.event.before }}..${{ github.sha }}
|
|
||||||
secrets:
|
|
||||||
gh_token: ${{ secrets.AUTO_BACKPORT_TOKEN }}
|
|
||||||
jira_auth: ${{ secrets.USER_AND_KEY_FOR_JIRA_AUTOMATION }}
|
|
||||||
|
|
||||||
backport-on-label:
|
|
||||||
if: github.event_name == 'pull_request_target' && github.event.action == 'labeled'
|
|
||||||
uses: scylladb/github-automation/.github/workflows/backport-with-jira.yaml@main
|
|
||||||
with:
|
|
||||||
event_type: 'labeled'
|
|
||||||
base_branch: refs/heads/${{ github.event.pull_request.base.ref }}
|
|
||||||
pull_request_number: ${{ github.event.pull_request.number }}
|
|
||||||
head_commit: ${{ github.event.pull_request.base.sha }}
|
|
||||||
label_name: ${{ github.event.label.name }}
|
|
||||||
pr_state: ${{ github.event.pull_request.state }}
|
|
||||||
secrets:
|
|
||||||
gh_token: ${{ secrets.AUTO_BACKPORT_TOKEN }}
|
|
||||||
jira_auth: ${{ secrets.USER_AND_KEY_FOR_JIRA_AUTOMATION }}
|
|
||||||
|
|
||||||
backport-chain:
|
|
||||||
if: github.event_name == 'pull_request_target' && github.event.action == 'closed' && github.event.pull_request.merged == true
|
|
||||||
uses: scylladb/github-automation/.github/workflows/backport-with-jira.yaml@main
|
|
||||||
with:
|
|
||||||
event_type: 'chain'
|
|
||||||
base_branch: refs/heads/${{ github.event.pull_request.base.ref }}
|
|
||||||
pull_request_number: ${{ github.event.pull_request.number }}
|
|
||||||
pr_body: ${{ github.event.pull_request.body }}
|
|
||||||
secrets:
|
|
||||||
gh_token: ${{ secrets.AUTO_BACKPORT_TOKEN }}
|
|
||||||
jira_auth: ${{ secrets.USER_AND_KEY_FOR_JIRA_AUTOMATION }}
|
|
||||||
@@ -1,22 +0,0 @@
|
|||||||
name: Sync Jira Based on PR Milestone Events
|
|
||||||
|
|
||||||
on:
|
|
||||||
pull_request_target:
|
|
||||||
types: [milestoned, demilestoned]
|
|
||||||
|
|
||||||
permissions:
|
|
||||||
contents: read
|
|
||||||
pull-requests: read
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
jira-sync-milestone-set:
|
|
||||||
if: github.event.action == 'milestoned'
|
|
||||||
uses: scylladb/github-automation/.github/workflows/main_jira_sync_pr_milestone_set.yml@main
|
|
||||||
secrets:
|
|
||||||
caller_jira_auth: ${{ secrets.USER_AND_KEY_FOR_JIRA_AUTOMATION }}
|
|
||||||
|
|
||||||
jira-sync-milestone-removed:
|
|
||||||
if: github.event.action == 'demilestoned'
|
|
||||||
uses: scylladb/github-automation/.github/workflows/main_jira_sync_pr_milestone_removed.yml@main
|
|
||||||
secrets:
|
|
||||||
caller_jira_auth: ${{ secrets.USER_AND_KEY_FOR_JIRA_AUTOMATION }}
|
|
||||||
@@ -1,4 +1,4 @@
|
|||||||
name: Call Jira release creation for new milestone
|
name: Call Jira release creation for new milestone
|
||||||
|
|
||||||
on:
|
on:
|
||||||
milestone:
|
milestone:
|
||||||
@@ -9,6 +9,6 @@ jobs:
|
|||||||
uses: scylladb/github-automation/.github/workflows/main_sync_milestone_to_jira_release.yml@main
|
uses: scylladb/github-automation/.github/workflows/main_sync_milestone_to_jira_release.yml@main
|
||||||
with:
|
with:
|
||||||
# Comma-separated list of Jira project keys
|
# Comma-separated list of Jira project keys
|
||||||
jira_project_keys: "SCYLLADB,CUSTOMER,SMI"
|
jira_project_keys: "SCYLLADB,CUSTOMER"
|
||||||
secrets:
|
secrets:
|
||||||
caller_jira_auth: ${{ secrets.USER_AND_KEY_FOR_JIRA_AUTOMATION }}
|
caller_jira_auth: ${{ secrets.USER_AND_KEY_FOR_JIRA_AUTOMATION }}
|
||||||
|
|||||||
@@ -1,62 +0,0 @@
|
|||||||
name: Close issues created by Scylla associates
|
|
||||||
|
|
||||||
on:
|
|
||||||
issues:
|
|
||||||
types: [opened, reopened]
|
|
||||||
|
|
||||||
permissions:
|
|
||||||
issues: write
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
comment-and-close:
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
|
|
||||||
steps:
|
|
||||||
- name: Comment and close if author email is scylladb.com
|
|
||||||
uses: actions/github-script@v7
|
|
||||||
with:
|
|
||||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
|
||||||
script: |
|
|
||||||
const issue = context.payload.issue;
|
|
||||||
const actor = context.actor;
|
|
||||||
|
|
||||||
// Get user data (only public email is available)
|
|
||||||
const { data: user } = await github.rest.users.getByUsername({
|
|
||||||
username: actor,
|
|
||||||
});
|
|
||||||
|
|
||||||
const email = user.email || "";
|
|
||||||
console.log(`Actor: ${actor}, public email: ${email || "<none>"}`);
|
|
||||||
|
|
||||||
// Only continue if email exists and ends with @scylladb.com
|
|
||||||
if (!email || !email.toLowerCase().endsWith("@scylladb.com")) {
|
|
||||||
console.log("User is not a scylladb.com email (or email not public); skipping.");
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
const owner = context.repo.owner;
|
|
||||||
const repo = context.repo.repo;
|
|
||||||
const issue_number = issue.number;
|
|
||||||
|
|
||||||
const body = "Issues in this repository are closed automatically. Scylla associates should use Jira to manage issues.\nPlease move this issue to Jira https://scylladb.atlassian.net/jira/software/c/projects/SCYLLADB/list";
|
|
||||||
|
|
||||||
// Add the comment
|
|
||||||
await github.rest.issues.createComment({
|
|
||||||
owner,
|
|
||||||
repo,
|
|
||||||
issue_number,
|
|
||||||
body,
|
|
||||||
});
|
|
||||||
|
|
||||||
console.log(`Comment added to #${issue_number}`);
|
|
||||||
|
|
||||||
// Close the issue
|
|
||||||
await github.rest.issues.update({
|
|
||||||
owner,
|
|
||||||
repo,
|
|
||||||
issue_number,
|
|
||||||
state: "closed",
|
|
||||||
state_reason: "not_planned"
|
|
||||||
});
|
|
||||||
|
|
||||||
console.log(`Issue #${issue_number} closed.`);
|
|
||||||
44
.github/workflows/trigger-scylla-ci.yaml
vendored
44
.github/workflows/trigger-scylla-ci.yaml
vendored
@@ -9,52 +9,16 @@ on:
|
|||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
trigger-jenkins:
|
trigger-jenkins:
|
||||||
if: (github.event_name == 'issue_comment' && github.event.comment.user.login != 'scylladbbot') || github.event.label.name == 'conflicts'
|
if: (github.event.comment.user.login != 'scylladbbot' && contains(github.event.comment.body, '@scylladbbot') && contains(github.event.comment.body, 'trigger-ci')) || github.event.label.name == 'conflicts'
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- name: Verify Org Membership
|
|
||||||
id: verify_author
|
|
||||||
shell: bash
|
|
||||||
run: |
|
|
||||||
if [[ "${{ github.event_name }}" == "pull_request_target" ]]; then
|
|
||||||
AUTHOR="${{ github.event.pull_request.user.login }}"
|
|
||||||
ASSOCIATION="${{ github.event.pull_request.author_association }}"
|
|
||||||
else
|
|
||||||
AUTHOR="${{ github.event.comment.user.login }}"
|
|
||||||
ASSOCIATION="${{ github.event.comment.author_association }}"
|
|
||||||
fi
|
|
||||||
if [[ "$ASSOCIATION" == "MEMBER" || "$ASSOCIATION" == "OWNER" ]]; then
|
|
||||||
echo "member=true" >> $GITHUB_OUTPUT
|
|
||||||
else
|
|
||||||
echo "::warning::${AUTHOR} is not a member of scylladb (association: ${ASSOCIATION}); skipping CI trigger."
|
|
||||||
echo "member=false" >> $GITHUB_OUTPUT
|
|
||||||
fi
|
|
||||||
|
|
||||||
- name: Validate Comment Trigger
|
|
||||||
if: github.event_name == 'issue_comment'
|
|
||||||
id: verify_comment
|
|
||||||
shell: bash
|
|
||||||
run: |
|
|
||||||
BODY=$(cat << 'EOF'
|
|
||||||
${{ github.event.comment.body }}
|
|
||||||
EOF
|
|
||||||
)
|
|
||||||
CLEAN_BODY=$(echo "$BODY" | grep -v '^[[:space:]]*>')
|
|
||||||
|
|
||||||
if echo "$CLEAN_BODY" | grep -qi '@scylladbbot' && echo "$CLEAN_BODY" | grep -qi 'trigger-ci'; then
|
|
||||||
echo "trigger=true" >> $GITHUB_OUTPUT
|
|
||||||
else
|
|
||||||
echo "trigger=false" >> $GITHUB_OUTPUT
|
|
||||||
fi
|
|
||||||
|
|
||||||
- name: Trigger Scylla-CI-Route Jenkins Job
|
- name: Trigger Scylla-CI-Route Jenkins Job
|
||||||
if: steps.verify_author.outputs.member == 'true' && (github.event_name == 'pull_request_target' || steps.verify_comment.outputs.trigger == 'true')
|
|
||||||
env:
|
env:
|
||||||
JENKINS_USER: ${{ secrets.JENKINS_USERNAME }}
|
JENKINS_USER: ${{ secrets.JENKINS_USERNAME }}
|
||||||
JENKINS_API_TOKEN: ${{ secrets.JENKINS_TOKEN }}
|
JENKINS_API_TOKEN: ${{ secrets.JENKINS_TOKEN }}
|
||||||
JENKINS_URL: "https://jenkins.scylladb.com"
|
JENKINS_URL: "https://jenkins.scylladb.com"
|
||||||
PR_NUMBER: "${{ github.event.issue.number || github.event.pull_request.number }}"
|
|
||||||
PR_REPO_NAME: "${{ github.event.repository.full_name }}"
|
|
||||||
run: |
|
run: |
|
||||||
|
PR_NUMBER=${{ github.event.issue.number }}
|
||||||
|
PR_REPO_NAME=${{ github.event.repository.full_name }}
|
||||||
curl -X POST "$JENKINS_URL/job/releng/job/Scylla-CI-Route/buildWithParameters?PR_NUMBER=$PR_NUMBER&PR_REPO_NAME=$PR_REPO_NAME" \
|
curl -X POST "$JENKINS_URL/job/releng/job/Scylla-CI-Route/buildWithParameters?PR_NUMBER=$PR_NUMBER&PR_REPO_NAME=$PR_REPO_NAME" \
|
||||||
--user "$JENKINS_USER:$JENKINS_API_TOKEN" --fail
|
--user "$JENKINS_USER:$JENKINS_API_TOKEN" --fail -i -v
|
||||||
|
|||||||
@@ -300,6 +300,7 @@ add_subdirectory(locator)
|
|||||||
add_subdirectory(message)
|
add_subdirectory(message)
|
||||||
add_subdirectory(mutation)
|
add_subdirectory(mutation)
|
||||||
add_subdirectory(mutation_writer)
|
add_subdirectory(mutation_writer)
|
||||||
|
add_subdirectory(node_ops)
|
||||||
add_subdirectory(readers)
|
add_subdirectory(readers)
|
||||||
add_subdirectory(replica)
|
add_subdirectory(replica)
|
||||||
add_subdirectory(raft)
|
add_subdirectory(raft)
|
||||||
|
|||||||
@@ -43,7 +43,7 @@ For further information, please see:
|
|||||||
|
|
||||||
[developer documentation]: HACKING.md
|
[developer documentation]: HACKING.md
|
||||||
[build documentation]: docs/dev/building.md
|
[build documentation]: docs/dev/building.md
|
||||||
[docker image build documentation]: dist/docker/redhat/README.md
|
[docker image build documentation]: dist/docker/debian/README.md
|
||||||
|
|
||||||
## Running Scylla
|
## Running Scylla
|
||||||
|
|
||||||
|
|||||||
@@ -244,7 +244,10 @@ static bool is_set_of(const rjson::value& type1, const rjson::value& type2) {
|
|||||||
|
|
||||||
// Check if two JSON-encoded values match with the CONTAINS relation
|
// Check if two JSON-encoded values match with the CONTAINS relation
|
||||||
bool check_CONTAINS(const rjson::value* v1, const rjson::value& v2, bool v1_from_query, bool v2_from_query) {
|
bool check_CONTAINS(const rjson::value* v1, const rjson::value& v2, bool v1_from_query, bool v2_from_query) {
|
||||||
if (!v1) {
|
if (!v1 || !v1->IsObject() || v1->MemberCount() == 0) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (!v2.IsObject() || v2.MemberCount() == 0) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
const auto& kv1 = *v1->MemberBegin();
|
const auto& kv1 = *v1->MemberBegin();
|
||||||
@@ -618,7 +621,7 @@ conditional_operator_type get_conditional_operator(const rjson::value& req) {
|
|||||||
// Check if the existing values of the item (previous_item) match the
|
// Check if the existing values of the item (previous_item) match the
|
||||||
// conditions given by the Expected and ConditionalOperator parameters
|
// conditions given by the Expected and ConditionalOperator parameters
|
||||||
// (if they exist) in the request (an UpdateItem, PutItem or DeleteItem).
|
// (if they exist) in the request (an UpdateItem, PutItem or DeleteItem).
|
||||||
// This function can throw a ValidationException API error if there
|
// This function can throw an ValidationException API error if there
|
||||||
// are errors in the format of the condition itself.
|
// are errors in the format of the condition itself.
|
||||||
bool verify_expected(const rjson::value& req, const rjson::value* previous_item) {
|
bool verify_expected(const rjson::value& req, const rjson::value* previous_item) {
|
||||||
const rjson::value* expected = rjson::find(req, "Expected");
|
const rjson::value* expected = rjson::find(req, "Expected");
|
||||||
|
|||||||
@@ -53,7 +53,9 @@ void consumed_capacity_counter::add_consumed_capacity_to_response_if_needed(rjso
|
|||||||
}
|
}
|
||||||
|
|
||||||
static uint64_t calculate_half_units(uint64_t unit_block_size, uint64_t total_bytes, bool is_quorum) {
|
static uint64_t calculate_half_units(uint64_t unit_block_size, uint64_t total_bytes, bool is_quorum) {
|
||||||
uint64_t half_units = (total_bytes + unit_block_size -1) / unit_block_size; //divide by unit_block_size and round up
|
// Avoid potential integer overflow when total_bytes is close to UINT64_MAX
|
||||||
|
// by using division with modulo instead of addition before division
|
||||||
|
uint64_t half_units = total_bytes / unit_block_size + (total_bytes % unit_block_size != 0 ? 1 : 0);
|
||||||
|
|
||||||
if (is_quorum) {
|
if (is_quorum) {
|
||||||
half_units *= 2;
|
half_units *= 2;
|
||||||
|
|||||||
@@ -63,7 +63,6 @@
|
|||||||
#include "types/types.hh"
|
#include "types/types.hh"
|
||||||
#include "db/system_keyspace.hh"
|
#include "db/system_keyspace.hh"
|
||||||
#include "cql3/statements/ks_prop_defs.hh"
|
#include "cql3/statements/ks_prop_defs.hh"
|
||||||
#include "alternator/ttl_tag.hh"
|
|
||||||
|
|
||||||
using namespace std::chrono_literals;
|
using namespace std::chrono_literals;
|
||||||
|
|
||||||
@@ -165,7 +164,7 @@ static map_type attrs_type() {
|
|||||||
|
|
||||||
static const column_definition& attrs_column(const schema& schema) {
|
static const column_definition& attrs_column(const schema& schema) {
|
||||||
const column_definition* cdef = schema.get_column_definition(bytes(executor::ATTRS_COLUMN_NAME));
|
const column_definition* cdef = schema.get_column_definition(bytes(executor::ATTRS_COLUMN_NAME));
|
||||||
throwing_assert(cdef);
|
SCYLLA_ASSERT(cdef);
|
||||||
return *cdef;
|
return *cdef;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -238,7 +237,7 @@ static void validate_is_object(const rjson::value& value, const char* caller) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// This function assumes the given value is an object and returns requested member value.
|
// This function assumes the given value is an object and returns requested member value.
|
||||||
// If it is not possible, an api_error::validation is thrown.
|
// If it is not possible an api_error::validation is thrown.
|
||||||
static const rjson::value& get_member(const rjson::value& obj, const char* member_name, const char* caller) {
|
static const rjson::value& get_member(const rjson::value& obj, const char* member_name, const char* caller) {
|
||||||
validate_is_object(obj, caller);
|
validate_is_object(obj, caller);
|
||||||
const rjson::value* ret = rjson::find(obj, member_name);
|
const rjson::value* ret = rjson::find(obj, member_name);
|
||||||
@@ -250,7 +249,7 @@ static const rjson::value& get_member(const rjson::value& obj, const char* membe
|
|||||||
|
|
||||||
|
|
||||||
// This function assumes the given value is an object with a single member, and returns this member.
|
// This function assumes the given value is an object with a single member, and returns this member.
|
||||||
// In case the requirements are not met, an api_error::validation is thrown.
|
// In case the requirements are not met an api_error::validation is thrown.
|
||||||
static const rjson::value::Member& get_single_member(const rjson::value& v, const char* caller) {
|
static const rjson::value::Member& get_single_member(const rjson::value& v, const char* caller) {
|
||||||
if (!v.IsObject() || v.MemberCount() != 1) {
|
if (!v.IsObject() || v.MemberCount() != 1) {
|
||||||
throw api_error::validation(format("{}: expected an object with a single member.", caller));
|
throw api_error::validation(format("{}: expected an object with a single member.", caller));
|
||||||
@@ -683,7 +682,7 @@ static std::optional<int> get_int_attribute(const rjson::value& value, std::stri
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Sets a KeySchema object inside the given JSON parent describing the key
|
// Sets a KeySchema object inside the given JSON parent describing the key
|
||||||
// attributes of the given schema as being either HASH or RANGE keys.
|
// attributes of the the given schema as being either HASH or RANGE keys.
|
||||||
// Additionally, adds to a given map mappings between the key attribute
|
// Additionally, adds to a given map mappings between the key attribute
|
||||||
// names and their type (as a DynamoDB type string).
|
// names and their type (as a DynamoDB type string).
|
||||||
void executor::describe_key_schema(rjson::value& parent, const schema& schema, std::unordered_map<std::string,std::string>* attribute_types, const std::map<sstring, sstring> *tags) {
|
void executor::describe_key_schema(rjson::value& parent, const schema& schema, std::unordered_map<std::string,std::string>* attribute_types, const std::map<sstring, sstring> *tags) {
|
||||||
@@ -835,11 +834,13 @@ future<> executor::fill_table_size(rjson::value &table_description, schema_ptr s
|
|||||||
total_size = co_await _ss.estimate_total_sstable_volume(schema->id(), service::storage_service::ignore_errors::yes);
|
total_size = co_await _ss.estimate_total_sstable_volume(schema->id(), service::storage_service::ignore_errors::yes);
|
||||||
const auto expiry = std::chrono::seconds{ _proxy.data_dictionary().get_config().alternator_describe_table_info_cache_validity_in_seconds() };
|
const auto expiry = std::chrono::seconds{ _proxy.data_dictionary().get_config().alternator_describe_table_info_cache_validity_in_seconds() };
|
||||||
// Note: we don't care when the notification of other shards will finish, as long as it will be done
|
// Note: we don't care when the notification of other shards will finish, as long as it will be done
|
||||||
// it's possible to get into race condition (next DescribeTable comes to other shard, that new shard doesn't have
|
// A race condition is possible: if a DescribeTable request arrives on a different shard before
|
||||||
// the size yet, so it will calculate it again) - this is not a problem, because it will call cache_newly_calculated_size_on_all_shards
|
// that shard receives the cached size, it will recalculate independently. This is acceptable because:
|
||||||
// with expiry, which is extremely unlikely to be exactly the same as the previous one, all shards will keep the size coming with expiry that is further into the future.
|
// 1. Both calculations will cache their results with an expiry time
|
||||||
// In case of the same expiry, some shards will have different size, which means DescribeTable will return different values depending on the shard
|
// 2. Expiry times are unlikely to be identical, so eventually all shards converge to the most recent value
|
||||||
// which is also fine, as the specification doesn't give precision guarantees of any kind.
|
// 3. Even if expiry times match, different shards may briefly return different table sizes
|
||||||
|
// 4. This temporary inconsistency is acceptable per DynamoDB specification, which doesn't guarantee
|
||||||
|
// exact precision for DescribeTable size information
|
||||||
co_await cache_newly_calculated_size_on_all_shards(schema, total_size, expiry);
|
co_await cache_newly_calculated_size_on_all_shards(schema, total_size, expiry);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -917,7 +918,7 @@ future<rjson::value> executor::fill_table_description(schema_ptr schema, table_s
|
|||||||
sstring index_name = cf_name.substr(delim_it + 1);
|
sstring index_name = cf_name.substr(delim_it + 1);
|
||||||
rjson::add(view_entry, "IndexName", rjson::from_string(index_name));
|
rjson::add(view_entry, "IndexName", rjson::from_string(index_name));
|
||||||
rjson::add(view_entry, "IndexArn", generate_arn_for_index(*schema, index_name));
|
rjson::add(view_entry, "IndexArn", generate_arn_for_index(*schema, index_name));
|
||||||
// Add index's KeySchema and collect types for AttributeDefinitions:
|
// Add indexes's KeySchema and collect types for AttributeDefinitions:
|
||||||
executor::describe_key_schema(view_entry, *vptr, key_attribute_types, db::get_tags_of_table(vptr));
|
executor::describe_key_schema(view_entry, *vptr, key_attribute_types, db::get_tags_of_table(vptr));
|
||||||
// Add projection type
|
// Add projection type
|
||||||
rjson::value projection = rjson::empty_object();
|
rjson::value projection = rjson::empty_object();
|
||||||
@@ -1650,7 +1651,7 @@ static future<> mark_view_schemas_as_built(utils::chunked_vector<mutation>& out,
|
|||||||
}
|
}
|
||||||
|
|
||||||
future<executor::request_return_type> executor::create_table_on_shard0(service::client_state&& client_state, tracing::trace_state_ptr trace_state, rjson::value request, bool enforce_authorization, bool warn_authorization, const db::tablets_mode_t::mode tablets_mode) {
|
future<executor::request_return_type> executor::create_table_on_shard0(service::client_state&& client_state, tracing::trace_state_ptr trace_state, rjson::value request, bool enforce_authorization, bool warn_authorization, const db::tablets_mode_t::mode tablets_mode) {
|
||||||
throwing_assert(this_shard_id() == 0);
|
SCYLLA_ASSERT(this_shard_id() == 0);
|
||||||
|
|
||||||
// We begin by parsing and validating the content of the CreateTable
|
// We begin by parsing and validating the content of the CreateTable
|
||||||
// command. We can't inspect the current database schema at this point
|
// command. We can't inspect the current database schema at this point
|
||||||
@@ -2436,7 +2437,7 @@ std::unordered_map<bytes, std::string> si_key_attributes(data_dictionary::table
|
|||||||
// case, this function simply won't be called for this attribute.)
|
// case, this function simply won't be called for this attribute.)
|
||||||
//
|
//
|
||||||
// This function checks if the given attribute update is an update to some
|
// This function checks if the given attribute update is an update to some
|
||||||
// GSI's key, and if the value is unsuitable, an api_error::validation is
|
// GSI's key, and if the value is unsuitable, a api_error::validation is
|
||||||
// thrown. The checking here is similar to the checking done in
|
// thrown. The checking here is similar to the checking done in
|
||||||
// get_key_from_typed_value() for the base table's key columns.
|
// get_key_from_typed_value() for the base table's key columns.
|
||||||
//
|
//
|
||||||
@@ -2838,12 +2839,14 @@ future<executor::request_return_type> rmw_operation::execute(service::storage_pr
|
|||||||
}
|
}
|
||||||
} else if (_write_isolation != write_isolation::LWT_ALWAYS) {
|
} else if (_write_isolation != write_isolation::LWT_ALWAYS) {
|
||||||
std::optional<mutation> m = apply(nullptr, api::new_timestamp(), cdc_opts);
|
std::optional<mutation> m = apply(nullptr, api::new_timestamp(), cdc_opts);
|
||||||
throwing_assert(m); // !needs_read_before_write, so apply() did not check a condition
|
SCYLLA_ASSERT(m); // !needs_read_before_write, so apply() did not check a condition
|
||||||
return proxy.mutate(utils::chunked_vector<mutation>{std::move(*m)}, db::consistency_level::LOCAL_QUORUM, executor::default_timeout(), trace_state, std::move(permit), db::allow_per_partition_rate_limit::yes, false, std::move(cdc_opts)).then([this, &wcu_total] () mutable {
|
return proxy.mutate(utils::chunked_vector<mutation>{std::move(*m)}, db::consistency_level::LOCAL_QUORUM, executor::default_timeout(), trace_state, std::move(permit), db::allow_per_partition_rate_limit::yes, false, std::move(cdc_opts)).then([this, &wcu_total] () mutable {
|
||||||
return rmw_operation_return(std::move(_return_attributes), _consumed_capacity, wcu_total);
|
return rmw_operation_return(std::move(_return_attributes), _consumed_capacity, wcu_total);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
throwing_assert(cas_shard);
|
if (!cas_shard) {
|
||||||
|
on_internal_error(elogger, "cas_shard is not set");
|
||||||
|
}
|
||||||
// If we're still here, we need to do this write using LWT:
|
// If we're still here, we need to do this write using LWT:
|
||||||
global_stats.write_using_lwt++;
|
global_stats.write_using_lwt++;
|
||||||
per_table_stats.write_using_lwt++;
|
per_table_stats.write_using_lwt++;
|
||||||
@@ -3547,7 +3550,7 @@ static bool hierarchy_filter(rjson::value& val, const attribute_path_map_node<T>
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Add a path to an attribute_path_map. Throws a validation error if the path
|
// Add a path to a attribute_path_map. Throws a validation error if the path
|
||||||
// "overlaps" with one already in the filter (one is a sub-path of the other)
|
// "overlaps" with one already in the filter (one is a sub-path of the other)
|
||||||
// or "conflicts" with it (both a member and index is requested).
|
// or "conflicts" with it (both a member and index is requested).
|
||||||
template<typename T>
|
template<typename T>
|
||||||
@@ -5412,7 +5415,7 @@ static future<executor::request_return_type> do_query(service::storage_proxy& pr
|
|||||||
}
|
}
|
||||||
|
|
||||||
static dht::token token_for_segment(int segment, int total_segments) {
|
static dht::token token_for_segment(int segment, int total_segments) {
|
||||||
throwing_assert(total_segments > 1 && segment >= 0 && segment < total_segments);
|
SCYLLA_ASSERT(total_segments > 1 && segment >= 0 && segment < total_segments);
|
||||||
uint64_t delta = std::numeric_limits<uint64_t>::max() / total_segments;
|
uint64_t delta = std::numeric_limits<uint64_t>::max() / total_segments;
|
||||||
return dht::token::from_int64(std::numeric_limits<int64_t>::min() + delta * segment);
|
return dht::token::from_int64(std::numeric_limits<int64_t>::min() + delta * segment);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -50,7 +50,7 @@ public:
|
|||||||
_operators.emplace_back(i);
|
_operators.emplace_back(i);
|
||||||
check_depth_limit();
|
check_depth_limit();
|
||||||
}
|
}
|
||||||
void add_dot(std::string name) {
|
void add_dot(std::string(name)) {
|
||||||
_operators.emplace_back(std::move(name));
|
_operators.emplace_back(std::move(name));
|
||||||
check_depth_limit();
|
check_depth_limit();
|
||||||
}
|
}
|
||||||
@@ -85,7 +85,7 @@ struct constant {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
// "value" is a value used in the right hand side of an assignment
|
// "value" is is a value used in the right hand side of an assignment
|
||||||
// expression, "SET a = ...". It can be a constant (a reference to a value
|
// expression, "SET a = ...". It can be a constant (a reference to a value
|
||||||
// included in the request, e.g., ":val"), a path to an attribute from the
|
// included in the request, e.g., ":val"), a path to an attribute from the
|
||||||
// existing item (e.g., "a.b[3].c"), or a function of other such values.
|
// existing item (e.g., "a.b[3].c"), or a function of other such values.
|
||||||
@@ -205,7 +205,7 @@ public:
|
|||||||
// The supported primitive conditions are:
|
// The supported primitive conditions are:
|
||||||
// 1. Binary operators - v1 OP v2, where OP is =, <>, <, <=, >, or >= and
|
// 1. Binary operators - v1 OP v2, where OP is =, <>, <, <=, >, or >= and
|
||||||
// v1 and v2 are values - from the item (an attribute path), the query
|
// v1 and v2 are values - from the item (an attribute path), the query
|
||||||
// (a ":val" reference), or a function of the above (only the size()
|
// (a ":val" reference), or a function of the the above (only the size()
|
||||||
// function is supported).
|
// function is supported).
|
||||||
// 2. Ternary operator - v1 BETWEEN v2 and v3 (means v1 >= v2 AND v1 <= v3).
|
// 2. Ternary operator - v1 BETWEEN v2 and v3 (means v1 >= v2 AND v1 <= v3).
|
||||||
// 3. N-ary operator - v1 IN ( v2, v3, ... )
|
// 3. N-ary operator - v1 IN ( v2, v3, ... )
|
||||||
|
|||||||
@@ -55,7 +55,7 @@ partition_key pk_from_json(const rjson::value& item, schema_ptr schema);
|
|||||||
clustering_key ck_from_json(const rjson::value& item, schema_ptr schema);
|
clustering_key ck_from_json(const rjson::value& item, schema_ptr schema);
|
||||||
position_in_partition pos_from_json(const rjson::value& item, schema_ptr schema);
|
position_in_partition pos_from_json(const rjson::value& item, schema_ptr schema);
|
||||||
|
|
||||||
// If v encodes a number (i.e., it is a {"N": [...]}), returns an object representing it. Otherwise,
|
// If v encodes a number (i.e., it is a {"N": [...]}, returns an object representing it. Otherwise,
|
||||||
// raises ValidationException with diagnostic.
|
// raises ValidationException with diagnostic.
|
||||||
big_decimal unwrap_number(const rjson::value& v, std::string_view diagnostic);
|
big_decimal unwrap_number(const rjson::value& v, std::string_view diagnostic);
|
||||||
|
|
||||||
|
|||||||
@@ -710,7 +710,7 @@ future<executor::request_return_type> server::handle_api_request(std::unique_ptr
|
|||||||
++_executor._stats.requests_blocked_memory;
|
++_executor._stats.requests_blocked_memory;
|
||||||
}
|
}
|
||||||
auto units = co_await std::move(units_fut);
|
auto units = co_await std::move(units_fut);
|
||||||
throwing_assert(req->content_stream);
|
SCYLLA_ASSERT(req->content_stream);
|
||||||
chunked_content content = co_await read_entire_stream(*req->content_stream, request_content_length_limit);
|
chunked_content content = co_await read_entire_stream(*req->content_stream, request_content_length_limit);
|
||||||
// If the request had no Content-Length, we reserved too many units
|
// If the request had no Content-Length, we reserved too many units
|
||||||
// so need to return some
|
// so need to return some
|
||||||
|
|||||||
@@ -46,7 +46,6 @@
|
|||||||
#include "alternator/executor.hh"
|
#include "alternator/executor.hh"
|
||||||
#include "alternator/controller.hh"
|
#include "alternator/controller.hh"
|
||||||
#include "alternator/serialization.hh"
|
#include "alternator/serialization.hh"
|
||||||
#include "alternator/ttl_tag.hh"
|
|
||||||
#include "dht/sharder.hh"
|
#include "dht/sharder.hh"
|
||||||
#include "db/config.hh"
|
#include "db/config.hh"
|
||||||
#include "db/tags/utils.hh"
|
#include "db/tags/utils.hh"
|
||||||
@@ -58,10 +57,19 @@ static logging::logger tlogger("alternator_ttl");
|
|||||||
|
|
||||||
namespace alternator {
|
namespace alternator {
|
||||||
|
|
||||||
|
// We write the expiration-time attribute enabled on a table in a
|
||||||
|
// tag TTL_TAG_KEY.
|
||||||
|
// Currently, the *value* of this tag is simply the name of the attribute,
|
||||||
|
// and the expiration scanner interprets it as an Alternator attribute name -
|
||||||
|
// It can refer to a real column or if that doesn't exist, to a member of
|
||||||
|
// the ":attrs" map column. Although this is designed for Alternator, it may
|
||||||
|
// be good enough for CQL as well (there, the ":attrs" column won't exist).
|
||||||
|
extern const sstring TTL_TAG_KEY;
|
||||||
|
|
||||||
future<executor::request_return_type> executor::update_time_to_live(client_state& client_state, service_permit permit, rjson::value request) {
|
future<executor::request_return_type> executor::update_time_to_live(client_state& client_state, service_permit permit, rjson::value request) {
|
||||||
_stats.api_operations.update_time_to_live++;
|
_stats.api_operations.update_time_to_live++;
|
||||||
if (!_proxy.features().alternator_ttl) {
|
if (!_proxy.features().alternator_ttl) {
|
||||||
co_return api_error::unknown_operation("UpdateTimeToLive not yet supported. Upgrade all nodes to a version that supports it.");
|
co_return api_error::unknown_operation("UpdateTimeToLive not yet supported. Experimental support is available if the 'alternator-ttl' experimental feature is enabled on all nodes.");
|
||||||
}
|
}
|
||||||
|
|
||||||
schema_ptr schema = get_table(_proxy, request);
|
schema_ptr schema = get_table(_proxy, request);
|
||||||
@@ -133,7 +141,7 @@ future<executor::request_return_type> executor::describe_time_to_live(client_sta
|
|||||||
|
|
||||||
// expiration_service is a sharded service responsible for cleaning up expired
|
// expiration_service is a sharded service responsible for cleaning up expired
|
||||||
// items in all tables with per-item expiration enabled. Currently, this means
|
// items in all tables with per-item expiration enabled. Currently, this means
|
||||||
// Alternator tables with TTL configured via an UpdateTimeToLive request.
|
// Alternator tables with TTL configured via a UpdateTimeToLive request.
|
||||||
//
|
//
|
||||||
// Here is a brief overview of how the expiration service works:
|
// Here is a brief overview of how the expiration service works:
|
||||||
//
|
//
|
||||||
@@ -316,7 +324,9 @@ static future<std::vector<std::pair<dht::token_range, locator::host_id>>> get_se
|
|||||||
const auto& tm = *erm->get_token_metadata_ptr();
|
const auto& tm = *erm->get_token_metadata_ptr();
|
||||||
const auto& sorted_tokens = tm.sorted_tokens();
|
const auto& sorted_tokens = tm.sorted_tokens();
|
||||||
std::vector<std::pair<dht::token_range, locator::host_id>> ret;
|
std::vector<std::pair<dht::token_range, locator::host_id>> ret;
|
||||||
throwing_assert(!sorted_tokens.empty());
|
if (sorted_tokens.empty()) {
|
||||||
|
on_internal_error(tlogger, "Token metadata is empty");
|
||||||
|
}
|
||||||
auto prev_tok = sorted_tokens.back();
|
auto prev_tok = sorted_tokens.back();
|
||||||
for (const auto& tok : sorted_tokens) {
|
for (const auto& tok : sorted_tokens) {
|
||||||
co_await coroutine::maybe_yield();
|
co_await coroutine::maybe_yield();
|
||||||
@@ -553,7 +563,7 @@ static future<> scan_table_ranges(
|
|||||||
expiration_service::stats& expiration_stats)
|
expiration_service::stats& expiration_stats)
|
||||||
{
|
{
|
||||||
const schema_ptr& s = scan_ctx.s;
|
const schema_ptr& s = scan_ctx.s;
|
||||||
throwing_assert(partition_ranges.size() == 1); // otherwise issue #9167 will cause incorrect results.
|
SCYLLA_ASSERT (partition_ranges.size() == 1); // otherwise issue #9167 will cause incorrect results.
|
||||||
auto p = service::pager::query_pagers::pager(proxy, s, scan_ctx.selection, *scan_ctx.query_state_ptr,
|
auto p = service::pager::query_pagers::pager(proxy, s, scan_ctx.selection, *scan_ctx.query_state_ptr,
|
||||||
*scan_ctx.query_options, scan_ctx.command, std::move(partition_ranges), nullptr);
|
*scan_ctx.query_options, scan_ctx.command, std::move(partition_ranges), nullptr);
|
||||||
while (!p->is_exhausted()) {
|
while (!p->is_exhausted()) {
|
||||||
@@ -583,7 +593,7 @@ static future<> scan_table_ranges(
|
|||||||
if (retries >= 10) {
|
if (retries >= 10) {
|
||||||
// Don't get stuck forever asking the same page, maybe there's
|
// Don't get stuck forever asking the same page, maybe there's
|
||||||
// a bug or a real problem in several replicas. Give up on
|
// a bug or a real problem in several replicas. Give up on
|
||||||
// this scan and retry the scan from a random position later,
|
// this scan an retry the scan from a random position later,
|
||||||
// in the next scan period.
|
// in the next scan period.
|
||||||
throw runtime_exception("scanner thread failed after too many timeouts for the same page");
|
throw runtime_exception("scanner thread failed after too many timeouts for the same page");
|
||||||
}
|
}
|
||||||
@@ -630,38 +640,13 @@ static future<> scan_table_ranges(
|
|||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// For a real column to contain an expiration time, it
|
// For a real column to contain an expiration time, it
|
||||||
// must be a numeric type. We currently support decimal
|
// must be a numeric type.
|
||||||
// (used by Alternator TTL) as well as bigint, int and
|
// FIXME: Currently we only support decimal_type (which is
|
||||||
// timestamp (used by CQL per-row TTL).
|
// what Alternator uses), but other numeric types can be
|
||||||
switch (meta[*expiration_column]->type->get_kind()) {
|
// supported as well to make this feature more useful in CQL.
|
||||||
case abstract_type::kind::decimal:
|
// Note that kind::decimal is also checked above.
|
||||||
// Used by Alternator TTL for key columns not stored
|
big_decimal n = value_cast<big_decimal>(v);
|
||||||
// in the map. The value is in seconds, fractional
|
expired = is_expired(n, now);
|
||||||
// part is ignored.
|
|
||||||
expired = is_expired(value_cast<big_decimal>(v), now);
|
|
||||||
break;
|
|
||||||
case abstract_type::kind::long_kind:
|
|
||||||
// Used by CQL per-row TTL. The value is in seconds.
|
|
||||||
expired = is_expired(gc_clock::time_point(std::chrono::seconds(value_cast<int64_t>(v))), now);
|
|
||||||
break;
|
|
||||||
case abstract_type::kind::int32:
|
|
||||||
// Used by CQL per-row TTL. The value is in seconds.
|
|
||||||
// Using int type is not recommended because it will
|
|
||||||
// overflow in 2038, but we support it to allow users
|
|
||||||
// to use existing int columns for expiration.
|
|
||||||
expired = is_expired(gc_clock::time_point(std::chrono::seconds(value_cast<int32_t>(v))), now);
|
|
||||||
break;
|
|
||||||
case abstract_type::kind::timestamp:
|
|
||||||
// Used by CQL per-row TTL. The value is in milliseconds
|
|
||||||
// but we truncate it to gc_clock's precision (whole seconds).
|
|
||||||
expired = is_expired(gc_clock::time_point(std::chrono::duration_cast<gc_clock::duration>(value_cast<db_clock::time_point>(v).time_since_epoch())), now);
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
// Should never happen - we verified the column's type
|
|
||||||
// before starting the scan.
|
|
||||||
[[unlikely]]
|
|
||||||
on_internal_error(tlogger, format("expiration scanner value of unsupported type {} in column {}", meta[*expiration_column]->type->cql3_type_name(), scan_ctx.column_name) );
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
if (expired) {
|
if (expired) {
|
||||||
expiration_stats.items_deleted++;
|
expiration_stats.items_deleted++;
|
||||||
@@ -723,12 +708,16 @@ static future<bool> scan_table(
|
|||||||
co_return false;
|
co_return false;
|
||||||
}
|
}
|
||||||
// attribute_name may be one of the schema's columns (in Alternator, this
|
// attribute_name may be one of the schema's columns (in Alternator, this
|
||||||
// means a key column, in CQL it's a regular column), or an element in
|
// means it's a key column), or an element in Alternator's attrs map
|
||||||
// Alternator's attrs map encoded in Alternator's JSON encoding (which we
|
// encoded in Alternator's JSON encoding.
|
||||||
// decode). If attribute_name is a real column, in Alternator it will have
|
// FIXME: To make this less Alternators-specific, we should encode in the
|
||||||
// the type decimal, counting seconds since the UNIX epoch, while in CQL
|
// single key's value three things:
|
||||||
// it will one of the types bigint or int (counting seconds) or timestamp
|
// 1. The name of a column
|
||||||
// (counting milliseconds).
|
// 2. Optionally if column is a map, a member in the map
|
||||||
|
// 3. The deserializer for the value: CQL or Alternator (JSON).
|
||||||
|
// The deserializer can be guessed: If the given column or map item is
|
||||||
|
// numeric, it can be used directly. If it is a "bytes" type, it needs to
|
||||||
|
// be deserialized using Alternator's deserializer.
|
||||||
bytes column_name = to_bytes(*attribute_name);
|
bytes column_name = to_bytes(*attribute_name);
|
||||||
const column_definition *cd = s->get_column_definition(column_name);
|
const column_definition *cd = s->get_column_definition(column_name);
|
||||||
std::optional<std::string> member;
|
std::optional<std::string> member;
|
||||||
@@ -747,14 +736,11 @@ static future<bool> scan_table(
|
|||||||
data_type column_type = cd->type;
|
data_type column_type = cd->type;
|
||||||
// Verify that the column has the right type: If "member" exists
|
// Verify that the column has the right type: If "member" exists
|
||||||
// the column must be a map, and if it doesn't, the column must
|
// the column must be a map, and if it doesn't, the column must
|
||||||
// be decimal_type (Alternator), bigint, int or timestamp (CQL).
|
// (currently) be a decimal_type. If the column has the wrong type
|
||||||
// If the column has the wrong type nothing can get expired in
|
// nothing can get expired in this table, and it's pointless to
|
||||||
// this table, and it's pointless to scan it.
|
// scan it.
|
||||||
if ((member && column_type->get_kind() != abstract_type::kind::map) ||
|
if ((member && column_type->get_kind() != abstract_type::kind::map) ||
|
||||||
(!member && column_type->get_kind() != abstract_type::kind::decimal &&
|
(!member && column_type->get_kind() != abstract_type::kind::decimal)) {
|
||||||
column_type->get_kind() != abstract_type::kind::long_kind &&
|
|
||||||
column_type->get_kind() != abstract_type::kind::int32 &&
|
|
||||||
column_type->get_kind() != abstract_type::kind::timestamp)) {
|
|
||||||
tlogger.info("table {} TTL column has unsupported type, not scanning", s->cf_name());
|
tlogger.info("table {} TTL column has unsupported type, not scanning", s->cf_name());
|
||||||
co_return false;
|
co_return false;
|
||||||
}
|
}
|
||||||
@@ -781,7 +767,7 @@ static future<bool> scan_table(
|
|||||||
// by tasking another node to take over scanning of the dead node's primary
|
// by tasking another node to take over scanning of the dead node's primary
|
||||||
// ranges. What we do here is that this node will also check expiration
|
// ranges. What we do here is that this node will also check expiration
|
||||||
// on its *secondary* ranges - but only those whose primary owner is down.
|
// on its *secondary* ranges - but only those whose primary owner is down.
|
||||||
auto tablet_secondary_replica = tablet_map.get_secondary_replica(*tablet, erm->get_topology()); // throws if no secondary replica
|
auto tablet_secondary_replica = tablet_map.get_secondary_replica(*tablet); // throws if no secondary replica
|
||||||
if (tablet_secondary_replica.host == my_host_id && tablet_secondary_replica.shard == this_shard_id()) {
|
if (tablet_secondary_replica.host == my_host_id && tablet_secondary_replica.shard == this_shard_id()) {
|
||||||
if (!gossiper.is_alive(tablet_primary_replica.host)) {
|
if (!gossiper.is_alive(tablet_primary_replica.host)) {
|
||||||
co_await scan_tablet(*tablet, proxy, abort_source, page_sem, expiration_stats, scan_ctx, tablet_map);
|
co_await scan_tablet(*tablet, proxy, abort_source, page_sem, expiration_stats, scan_ctx, tablet_map);
|
||||||
@@ -892,11 +878,13 @@ future<> expiration_service::run() {
|
|||||||
future<> expiration_service::start() {
|
future<> expiration_service::start() {
|
||||||
// Called by main() on each shard to start the expiration-service
|
// Called by main() on each shard to start the expiration-service
|
||||||
// thread. Just runs run() in the background and allows stop().
|
// thread. Just runs run() in the background and allows stop().
|
||||||
|
if (_db.features().alternator_ttl) {
|
||||||
if (!shutting_down()) {
|
if (!shutting_down()) {
|
||||||
_end = run().handle_exception([] (std::exception_ptr ep) {
|
_end = run().handle_exception([] (std::exception_ptr ep) {
|
||||||
tlogger.error("expiration_service failed: {}", ep);
|
tlogger.error("expiration_service failed: {}", ep);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
}
|
||||||
return make_ready_future<>();
|
return make_ready_future<>();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -30,7 +30,7 @@ namespace alternator {
|
|||||||
|
|
||||||
// expiration_service is a sharded service responsible for cleaning up expired
|
// expiration_service is a sharded service responsible for cleaning up expired
|
||||||
// items in all tables with per-item expiration enabled. Currently, this means
|
// items in all tables with per-item expiration enabled. Currently, this means
|
||||||
// Alternator tables with TTL configured via an UpdateTimeToLive request.
|
// Alternator tables with TTL configured via a UpdateTimeToLeave request.
|
||||||
class expiration_service final : public seastar::peering_sharded_service<expiration_service> {
|
class expiration_service final : public seastar::peering_sharded_service<expiration_service> {
|
||||||
public:
|
public:
|
||||||
// Object holding per-shard statistics related to the expiration service.
|
// Object holding per-shard statistics related to the expiration service.
|
||||||
@@ -52,7 +52,7 @@ private:
|
|||||||
data_dictionary::database _db;
|
data_dictionary::database _db;
|
||||||
service::storage_proxy& _proxy;
|
service::storage_proxy& _proxy;
|
||||||
gms::gossiper& _gossiper;
|
gms::gossiper& _gossiper;
|
||||||
// _end is set by start(), and resolves when the background service
|
// _end is set by start(), and resolves when the the background service
|
||||||
// started by it ends. To ask the background service to end, _abort_source
|
// started by it ends. To ask the background service to end, _abort_source
|
||||||
// should be triggered. stop() below uses both _abort_source and _end.
|
// should be triggered. stop() below uses both _abort_source and _end.
|
||||||
std::optional<future<>> _end;
|
std::optional<future<>> _end;
|
||||||
|
|||||||
@@ -1,26 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright 2026-present ScyllaDB
|
|
||||||
*/
|
|
||||||
|
|
||||||
/*
|
|
||||||
* SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
|
|
||||||
*/
|
|
||||||
|
|
||||||
#pragma once
|
|
||||||
|
|
||||||
#include "seastarx.hh"
|
|
||||||
#include <seastar/core/sstring.hh>
|
|
||||||
|
|
||||||
namespace alternator {
|
|
||||||
// We use the table tag TTL_TAG_KEY ("system:ttl_attribute") to remember
|
|
||||||
// which attribute was chosen as the expiration-time attribute for
|
|
||||||
// Alternator's TTL and CQL's per-row TTL features.
|
|
||||||
// Currently, the *value* of this tag is simply the name of the attribute:
|
|
||||||
// It can refer to a real column or if that doesn't exist, to a member of
|
|
||||||
// the ":attrs" map column (which Alternator uses).
|
|
||||||
extern const sstring TTL_TAG_KEY;
|
|
||||||
} // namespace alternator
|
|
||||||
|
|
||||||
// let users use TTL_TAG_KEY without the "alternator::" prefix,
|
|
||||||
// to make it easier to move it to a different namespace later.
|
|
||||||
using alternator::TTL_TAG_KEY;
|
|
||||||
@@ -12,7 +12,7 @@
|
|||||||
"operations":[
|
"operations":[
|
||||||
{
|
{
|
||||||
"method":"POST",
|
"method":"POST",
|
||||||
"summary":"Resets authorized prepared statements cache",
|
"summary":"Reset cache",
|
||||||
"type":"void",
|
"type":"void",
|
||||||
"nickname":"authorization_cache_reset",
|
"nickname":"authorization_cache_reset",
|
||||||
"produces":[
|
"produces":[
|
||||||
|
|||||||
@@ -3085,48 +3085,6 @@
|
|||||||
}
|
}
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
|
||||||
{
|
|
||||||
"path":"/storage_service/tablets/snapshots",
|
|
||||||
"operations":[
|
|
||||||
{
|
|
||||||
"method":"POST",
|
|
||||||
"summary":"Takes the snapshot for the given keyspaces/tables. A snapshot name must be specified.",
|
|
||||||
"type":"void",
|
|
||||||
"nickname":"take_cluster_snapshot",
|
|
||||||
"produces":[
|
|
||||||
"application/json"
|
|
||||||
],
|
|
||||||
"parameters":[
|
|
||||||
{
|
|
||||||
"name":"tag",
|
|
||||||
"description":"the tag given to the snapshot",
|
|
||||||
"required":true,
|
|
||||||
"allowMultiple":false,
|
|
||||||
"type":"string",
|
|
||||||
"paramType":"query"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name":"keyspace",
|
|
||||||
"description":"Keyspace(s) to snapshot. Multiple keyspaces can be provided using a comma-separated list. If omitted, snapshot all keyspaces.",
|
|
||||||
"required":false,
|
|
||||||
"allowMultiple":false,
|
|
||||||
"type":"string",
|
|
||||||
"paramType":"query"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name":"table",
|
|
||||||
"description":"Table(s) to snapshot. Multiple tables (in a single keyspace) can be provided using a comma-separated list. If omitted, snapshot all tables in the given keyspace(s).",
|
|
||||||
"required":false,
|
|
||||||
"allowMultiple":false,
|
|
||||||
"type":"string",
|
|
||||||
"paramType":"query"
|
|
||||||
}
|
|
||||||
]
|
|
||||||
}
|
|
||||||
]
|
|
||||||
},
|
|
||||||
|
|
||||||
{
|
{
|
||||||
"path":"/storage_service/quiesce_topology",
|
"path":"/storage_service/quiesce_topology",
|
||||||
"operations":[
|
"operations":[
|
||||||
|
|||||||
25
api/api.hh
25
api/api.hh
@@ -23,6 +23,31 @@
|
|||||||
|
|
||||||
namespace api {
|
namespace api {
|
||||||
|
|
||||||
|
template<class T>
|
||||||
|
std::vector<T> map_to_key_value(const std::map<sstring, sstring>& map) {
|
||||||
|
std::vector<T> res;
|
||||||
|
res.reserve(map.size());
|
||||||
|
|
||||||
|
for (const auto& [key, value] : map) {
|
||||||
|
res.push_back(T());
|
||||||
|
res.back().key = key;
|
||||||
|
res.back().value = value;
|
||||||
|
}
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class T, class MAP>
|
||||||
|
std::vector<T>& map_to_key_value(const MAP& map, std::vector<T>& res) {
|
||||||
|
res.reserve(res.size() + std::size(map));
|
||||||
|
|
||||||
|
for (const auto& [key, value] : map) {
|
||||||
|
T val;
|
||||||
|
val.key = fmt::to_string(key);
|
||||||
|
val.value = fmt::to_string(value);
|
||||||
|
res.push_back(val);
|
||||||
|
}
|
||||||
|
return res;
|
||||||
|
}
|
||||||
template <typename T, typename S = T>
|
template <typename T, typename S = T>
|
||||||
T map_sum(T&& dest, const S& src) {
|
T map_sum(T&& dest, const S& src) {
|
||||||
for (const auto& i : src) {
|
for (const auto& i : src) {
|
||||||
|
|||||||
@@ -515,15 +515,6 @@ void set_sstables_loader(http_context& ctx, routes& r, sharded<sstables_loader>&
|
|||||||
auto sstables = parsed.GetArray() |
|
auto sstables = parsed.GetArray() |
|
||||||
std::views::transform([] (const auto& s) { return sstring(rjson::to_string_view(s)); }) |
|
std::views::transform([] (const auto& s) { return sstring(rjson::to_string_view(s)); }) |
|
||||||
std::ranges::to<std::vector>();
|
std::ranges::to<std::vector>();
|
||||||
apilog.info("Restore invoked with following parameters: keyspace={}, table={}, endpoint={}, bucket={}, prefix={}, sstables_count={}, scope={}, primary_replica_only={}",
|
|
||||||
keyspace,
|
|
||||||
table,
|
|
||||||
endpoint,
|
|
||||||
bucket,
|
|
||||||
prefix,
|
|
||||||
sstables.size(),
|
|
||||||
scope,
|
|
||||||
primary_replica_only);
|
|
||||||
auto task_id = co_await sst_loader.local().download_new_sstables(keyspace, table, prefix, std::move(sstables), endpoint, bucket, scope, primary_replica_only);
|
auto task_id = co_await sst_loader.local().download_new_sstables(keyspace, table, prefix, std::move(sstables), endpoint, bucket, scope, primary_replica_only);
|
||||||
co_return json::json_return_type(fmt::to_string(task_id));
|
co_return json::json_return_type(fmt::to_string(task_id));
|
||||||
});
|
});
|
||||||
@@ -536,15 +527,13 @@ void unset_sstables_loader(http_context& ctx, routes& r) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void set_view_builder(http_context& ctx, routes& r, sharded<db::view::view_builder>& vb, sharded<gms::gossiper>& g) {
|
void set_view_builder(http_context& ctx, routes& r, sharded<db::view::view_builder>& vb, sharded<gms::gossiper>& g) {
|
||||||
ss::view_build_statuses.set(r, [&ctx, &vb, &g] (std::unique_ptr<http::request> req) -> future<json::json_return_type> {
|
ss::view_build_statuses.set(r, [&ctx, &vb, &g] (std::unique_ptr<http::request> req) {
|
||||||
auto keyspace = validate_keyspace(ctx, req);
|
auto keyspace = validate_keyspace(ctx, req);
|
||||||
auto view = req->get_path_param("view");
|
auto view = req->get_path_param("view");
|
||||||
co_return json::json_return_type(stream_range_as_array(co_await vb.local().view_build_statuses(std::move(keyspace), std::move(view), g.local()), [] (const auto& i) {
|
return vb.local().view_build_statuses(std::move(keyspace), std::move(view), g.local()).then([] (std::unordered_map<sstring, sstring> status) {
|
||||||
storage_service_json::mapper res;
|
std::vector<storage_service_json::mapper> res;
|
||||||
res.key = i.first;
|
return make_ready_future<json::json_return_type>(map_to_key_value(std::move(status), res));
|
||||||
res.value = i.second;
|
});
|
||||||
return res;
|
|
||||||
}));
|
|
||||||
});
|
});
|
||||||
|
|
||||||
cf::get_built_indexes.set(r, [&vb](std::unique_ptr<http::request> req) -> future<json::json_return_type> {
|
cf::get_built_indexes.set(r, [&vb](std::unique_ptr<http::request> req) -> future<json::json_return_type> {
|
||||||
@@ -582,16 +571,6 @@ static future<json::json_return_type> describe_ring_as_json_for_table(const shar
|
|||||||
co_return json::json_return_type(stream_range_as_array(co_await ss.local().describe_ring_for_table(keyspace, table), token_range_endpoints_to_json));
|
co_return json::json_return_type(stream_range_as_array(co_await ss.local().describe_ring_for_table(keyspace, table), token_range_endpoints_to_json));
|
||||||
}
|
}
|
||||||
|
|
||||||
namespace {
|
|
||||||
template <typename Key, typename Value>
|
|
||||||
storage_service_json::mapper map_to_json(const std::pair<Key, Value>& i) {
|
|
||||||
storage_service_json::mapper val;
|
|
||||||
val.key = fmt::to_string(i.first);
|
|
||||||
val.value = fmt::to_string(i.second);
|
|
||||||
return val;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static
|
static
|
||||||
future<json::json_return_type>
|
future<json::json_return_type>
|
||||||
rest_get_token_endpoint(http_context& ctx, sharded<service::storage_service>& ss, std::unique_ptr<http::request> req) {
|
rest_get_token_endpoint(http_context& ctx, sharded<service::storage_service>& ss, std::unique_ptr<http::request> req) {
|
||||||
@@ -609,7 +588,12 @@ rest_get_token_endpoint(http_context& ctx, sharded<service::storage_service>& ss
|
|||||||
throw bad_param_exception("Either provide both keyspace and table (for tablet table) or neither (for vnodes)");
|
throw bad_param_exception("Either provide both keyspace and table (for tablet table) or neither (for vnodes)");
|
||||||
}
|
}
|
||||||
|
|
||||||
co_return json::json_return_type(stream_range_as_array(token_endpoints, &map_to_json<dht::token, gms::inet_address>));
|
co_return json::json_return_type(stream_range_as_array(token_endpoints, [](const auto& i) {
|
||||||
|
storage_service_json::mapper val;
|
||||||
|
val.key = fmt::to_string(i.first);
|
||||||
|
val.value = fmt::to_string(i.second);
|
||||||
|
return val;
|
||||||
|
}));
|
||||||
}
|
}
|
||||||
|
|
||||||
static
|
static
|
||||||
@@ -693,6 +677,7 @@ rest_get_range_to_endpoint_map(http_context& ctx, sharded<service::storage_servi
|
|||||||
table_id = validate_table(ctx.db.local(), keyspace, table);
|
table_id = validate_table(ctx.db.local(), keyspace, table);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::vector<ss::maplist_mapper> res;
|
||||||
co_return stream_range_as_array(co_await ss.local().get_range_to_address_map(keyspace, table_id),
|
co_return stream_range_as_array(co_await ss.local().get_range_to_address_map(keyspace, table_id),
|
||||||
[](const std::pair<dht::token_range, inet_address_vector_replica_set>& entry){
|
[](const std::pair<dht::token_range, inet_address_vector_replica_set>& entry){
|
||||||
ss::maplist_mapper m;
|
ss::maplist_mapper m;
|
||||||
@@ -783,13 +768,17 @@ rest_cleanup_all(http_context& ctx, sharded<service::storage_service>& ss, std::
|
|||||||
|
|
||||||
apilog.info("cleanup_all global={}", global);
|
apilog.info("cleanup_all global={}", global);
|
||||||
|
|
||||||
if (global) {
|
auto done = !global ? false : co_await ss.invoke_on(0, [] (service::storage_service& ss) -> future<bool> {
|
||||||
co_await ss.invoke_on(0, [] (service::storage_service& ss) -> future<> {
|
if (!ss.is_topology_coordinator_enabled()) {
|
||||||
co_return co_await ss.do_clusterwide_vnodes_cleanup();
|
co_return false;
|
||||||
|
}
|
||||||
|
co_await ss.do_clusterwide_vnodes_cleanup();
|
||||||
|
co_return true;
|
||||||
});
|
});
|
||||||
|
if (done) {
|
||||||
co_return json::json_return_type(0);
|
co_return json::json_return_type(0);
|
||||||
}
|
}
|
||||||
// fall back to the local cleanup if local cleanup is requested
|
// fall back to the local cleanup if topology coordinator is not enabled or local cleanup is requested
|
||||||
auto& db = ctx.db;
|
auto& db = ctx.db;
|
||||||
auto& compaction_module = db.local().get_compaction_manager().get_task_manager_module();
|
auto& compaction_module = db.local().get_compaction_manager().get_task_manager_module();
|
||||||
auto task = co_await compaction_module.make_and_start_task<compaction::global_cleanup_compaction_task_impl>({}, db);
|
auto task = co_await compaction_module.make_and_start_task<compaction::global_cleanup_compaction_task_impl>({}, db);
|
||||||
@@ -797,7 +786,9 @@ rest_cleanup_all(http_context& ctx, sharded<service::storage_service>& ss, std::
|
|||||||
|
|
||||||
// Mark this node as clean
|
// Mark this node as clean
|
||||||
co_await ss.invoke_on(0, [] (service::storage_service& ss) -> future<> {
|
co_await ss.invoke_on(0, [] (service::storage_service& ss) -> future<> {
|
||||||
|
if (ss.is_topology_coordinator_enabled()) {
|
||||||
co_await ss.reset_cleanup_needed();
|
co_await ss.reset_cleanup_needed();
|
||||||
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
co_return json::json_return_type(0);
|
co_return json::json_return_type(0);
|
||||||
@@ -808,6 +799,9 @@ future<json::json_return_type>
|
|||||||
rest_reset_cleanup_needed(http_context& ctx, sharded<service::storage_service>& ss, std::unique_ptr<http::request> req) {
|
rest_reset_cleanup_needed(http_context& ctx, sharded<service::storage_service>& ss, std::unique_ptr<http::request> req) {
|
||||||
apilog.info("reset_cleanup_needed");
|
apilog.info("reset_cleanup_needed");
|
||||||
co_await ss.invoke_on(0, [] (service::storage_service& ss) {
|
co_await ss.invoke_on(0, [] (service::storage_service& ss) {
|
||||||
|
if (!ss.is_topology_coordinator_enabled()) {
|
||||||
|
throw std::runtime_error("mark_node_as_clean is only supported when topology over raft is enabled");
|
||||||
|
}
|
||||||
return ss.reset_cleanup_needed();
|
return ss.reset_cleanup_needed();
|
||||||
});
|
});
|
||||||
co_return json_void();
|
co_return json_void();
|
||||||
@@ -1314,7 +1308,10 @@ rest_get_ownership(http_context& ctx, sharded<service::storage_service>& ss, std
|
|||||||
throw httpd::bad_param_exception("storage_service/ownership cannot be used when a keyspace uses tablets");
|
throw httpd::bad_param_exception("storage_service/ownership cannot be used when a keyspace uses tablets");
|
||||||
}
|
}
|
||||||
|
|
||||||
co_return json::json_return_type(stream_range_as_array(co_await ss.local().get_ownership(), &map_to_json<gms::inet_address, float>));
|
return ss.local().get_ownership().then([] (auto&& ownership) {
|
||||||
|
std::vector<storage_service_json::mapper> res;
|
||||||
|
return make_ready_future<json::json_return_type>(map_to_key_value(ownership, res));
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
static
|
static
|
||||||
@@ -1331,7 +1328,10 @@ rest_get_effective_ownership(http_context& ctx, sharded<service::storage_service
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
co_return json::json_return_type(stream_range_as_array(co_await ss.local().effective_ownership(keyspace_name, table_name), &map_to_json<gms::inet_address, float>));
|
return ss.local().effective_ownership(keyspace_name, table_name).then([] (auto&& ownership) {
|
||||||
|
std::vector<storage_service_json::mapper> res;
|
||||||
|
return make_ready_future<json::json_return_type>(map_to_key_value(ownership, res));
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
static
|
static
|
||||||
@@ -1341,7 +1341,7 @@ rest_estimate_compression_ratios(http_context& ctx, sharded<service::storage_ser
|
|||||||
apilog.warn("estimate_compression_ratios: called before the cluster feature was enabled");
|
apilog.warn("estimate_compression_ratios: called before the cluster feature was enabled");
|
||||||
throw std::runtime_error("estimate_compression_ratios requires all nodes to support the SSTABLE_COMPRESSION_DICTS cluster feature");
|
throw std::runtime_error("estimate_compression_ratios requires all nodes to support the SSTABLE_COMPRESSION_DICTS cluster feature");
|
||||||
}
|
}
|
||||||
auto ticket = co_await get_units(ss.local().get_do_sample_sstables_concurrency_limiter(), 1);
|
auto ticket = get_units(ss.local().get_do_sample_sstables_concurrency_limiter(), 1);
|
||||||
auto ks = api::req_param<sstring>(*req, "keyspace", {}).value;
|
auto ks = api::req_param<sstring>(*req, "keyspace", {}).value;
|
||||||
auto cf = api::req_param<sstring>(*req, "cf", {}).value;
|
auto cf = api::req_param<sstring>(*req, "cf", {}).value;
|
||||||
apilog.debug("estimate_compression_ratios: called with ks={} cf={}", ks, cf);
|
apilog.debug("estimate_compression_ratios: called with ks={} cf={}", ks, cf);
|
||||||
@@ -1407,7 +1407,7 @@ rest_retrain_dict(http_context& ctx, sharded<service::storage_service>& ss, serv
|
|||||||
apilog.warn("retrain_dict: called before the cluster feature was enabled");
|
apilog.warn("retrain_dict: called before the cluster feature was enabled");
|
||||||
throw std::runtime_error("retrain_dict requires all nodes to support the SSTABLE_COMPRESSION_DICTS cluster feature");
|
throw std::runtime_error("retrain_dict requires all nodes to support the SSTABLE_COMPRESSION_DICTS cluster feature");
|
||||||
}
|
}
|
||||||
auto ticket = co_await get_units(ss.local().get_do_sample_sstables_concurrency_limiter(), 1);
|
auto ticket = get_units(ss.local().get_do_sample_sstables_concurrency_limiter(), 1);
|
||||||
auto ks = api::req_param<sstring>(*req, "keyspace", {}).value;
|
auto ks = api::req_param<sstring>(*req, "keyspace", {}).value;
|
||||||
auto cf = api::req_param<sstring>(*req, "cf", {}).value;
|
auto cf = api::req_param<sstring>(*req, "cf", {}).value;
|
||||||
apilog.debug("retrain_dict: called with ks={} cf={}", ks, cf);
|
apilog.debug("retrain_dict: called with ks={} cf={}", ks, cf);
|
||||||
@@ -1565,7 +1565,16 @@ rest_reload_raft_topology_state(sharded<service::storage_service>& ss, service::
|
|||||||
static
|
static
|
||||||
future<json::json_return_type>
|
future<json::json_return_type>
|
||||||
rest_upgrade_to_raft_topology(sharded<service::storage_service>& ss, std::unique_ptr<http::request> req) {
|
rest_upgrade_to_raft_topology(sharded<service::storage_service>& ss, std::unique_ptr<http::request> req) {
|
||||||
apilog.info("Requested to schedule upgrade to raft topology, but this version does not need it since it uses raft topology by default.");
|
apilog.info("Requested to schedule upgrade to raft topology");
|
||||||
|
try {
|
||||||
|
co_await ss.invoke_on(0, [] (auto& ss) {
|
||||||
|
return ss.start_upgrade_to_raft_topology();
|
||||||
|
});
|
||||||
|
} catch (...) {
|
||||||
|
auto ex = std::current_exception();
|
||||||
|
apilog.error("Failed to schedule upgrade to raft topology: {}", ex);
|
||||||
|
std::rethrow_exception(std::move(ex));
|
||||||
|
}
|
||||||
co_return json_void();
|
co_return json_void();
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -2007,8 +2016,6 @@ void set_snapshot(http_context& ctx, routes& r, sharded<db::snapshot_ctl>& snap_
|
|||||||
auto tag = req->get_query_param("tag");
|
auto tag = req->get_query_param("tag");
|
||||||
auto column_families = split(req->get_query_param("cf"), ",");
|
auto column_families = split(req->get_query_param("cf"), ",");
|
||||||
auto sfopt = req->get_query_param("sf");
|
auto sfopt = req->get_query_param("sf");
|
||||||
auto tcopt = req->get_query_param("tc");
|
|
||||||
|
|
||||||
db::snapshot_options opts = {
|
db::snapshot_options opts = {
|
||||||
.skip_flush = strcasecmp(sfopt.c_str(), "true") == 0,
|
.skip_flush = strcasecmp(sfopt.c_str(), "true") == 0,
|
||||||
};
|
};
|
||||||
@@ -2033,27 +2040,6 @@ void set_snapshot(http_context& ctx, routes& r, sharded<db::snapshot_ctl>& snap_
|
|||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
ss::take_cluster_snapshot.set(r, [&snap_ctl](std::unique_ptr<http::request> req) -> future<json::json_return_type> {
|
|
||||||
apilog.info("take_cluster_snapshot: {}", req->get_query_params());
|
|
||||||
auto tag = req->get_query_param("tag");
|
|
||||||
auto column_families = split(req->get_query_param("table"), ",");
|
|
||||||
// Note: not published/active. Retain as internal option, but...
|
|
||||||
auto sfopt = req->get_query_param("skip_flush");
|
|
||||||
|
|
||||||
db::snapshot_options opts = {
|
|
||||||
.skip_flush = strcasecmp(sfopt.c_str(), "true") == 0,
|
|
||||||
};
|
|
||||||
|
|
||||||
std::vector<sstring> keynames = split(req->get_query_param("keyspace"), ",");
|
|
||||||
try {
|
|
||||||
co_await snap_ctl.local().take_cluster_column_family_snapshot(keynames, column_families, tag, opts);
|
|
||||||
co_return json_void();
|
|
||||||
} catch (...) {
|
|
||||||
apilog.error("take_cluster_snapshot failed: {}", std::current_exception());
|
|
||||||
throw;
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
ss::del_snapshot.set(r, [&snap_ctl](std::unique_ptr<http::request> req) -> future<json::json_return_type> {
|
ss::del_snapshot.set(r, [&snap_ctl](std::unique_ptr<http::request> req) -> future<json::json_return_type> {
|
||||||
apilog.info("del_snapshot: {}", req->get_query_params());
|
apilog.info("del_snapshot: {}", req->get_query_params());
|
||||||
auto tag = req->get_query_param("tag");
|
auto tag = req->get_query_param("tag");
|
||||||
|
|||||||
@@ -17,6 +17,7 @@ target_sources(scylla_auth
|
|||||||
password_authenticator.cc
|
password_authenticator.cc
|
||||||
passwords.cc
|
passwords.cc
|
||||||
permission.cc
|
permission.cc
|
||||||
|
permissions_cache.cc
|
||||||
resource.cc
|
resource.cc
|
||||||
role_or_anonymous.cc
|
role_or_anonymous.cc
|
||||||
roles-metadata.cc
|
roles-metadata.cc
|
||||||
|
|||||||
183
auth/cache.cc
183
auth/cache.cc
@@ -8,7 +8,6 @@
|
|||||||
|
|
||||||
#include "auth/cache.hh"
|
#include "auth/cache.hh"
|
||||||
#include "auth/common.hh"
|
#include "auth/common.hh"
|
||||||
#include "auth/role_or_anonymous.hh"
|
|
||||||
#include "auth/roles-metadata.hh"
|
#include "auth/roles-metadata.hh"
|
||||||
#include "cql3/query_processor.hh"
|
#include "cql3/query_processor.hh"
|
||||||
#include "cql3/untyped_result_set.hh"
|
#include "cql3/untyped_result_set.hh"
|
||||||
@@ -19,8 +18,6 @@
|
|||||||
#include <seastar/core/abort_source.hh>
|
#include <seastar/core/abort_source.hh>
|
||||||
#include <seastar/coroutine/maybe_yield.hh>
|
#include <seastar/coroutine/maybe_yield.hh>
|
||||||
#include <seastar/core/format.hh>
|
#include <seastar/core/format.hh>
|
||||||
#include <seastar/core/metrics.hh>
|
|
||||||
#include <seastar/core/do_with.hh>
|
|
||||||
|
|
||||||
namespace auth {
|
namespace auth {
|
||||||
|
|
||||||
@@ -30,21 +27,7 @@ cache::cache(cql3::query_processor& qp, abort_source& as) noexcept
|
|||||||
: _current_version(0)
|
: _current_version(0)
|
||||||
, _qp(qp)
|
, _qp(qp)
|
||||||
, _loading_sem(1)
|
, _loading_sem(1)
|
||||||
, _as(as)
|
, _as(as) {
|
||||||
, _permission_loader(nullptr)
|
|
||||||
, _permission_loader_sem(8) {
|
|
||||||
namespace sm = seastar::metrics;
|
|
||||||
_metrics.add_group("auth_cache", {
|
|
||||||
sm::make_gauge("roles", [this] { return _roles.size(); },
|
|
||||||
sm::description("Number of roles currently cached")),
|
|
||||||
sm::make_gauge("permissions", [this] {
|
|
||||||
return _cached_permissions_count;
|
|
||||||
}, sm::description("Total number of permission sets currently cached across all roles"))
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
void cache::set_permission_loader(permission_loader_func loader) {
|
|
||||||
_permission_loader = std::move(loader);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
lw_shared_ptr<const cache::role_record> cache::get(const role_name_t& role) const noexcept {
|
lw_shared_ptr<const cache::role_record> cache::get(const role_name_t& role) const noexcept {
|
||||||
@@ -55,83 +38,6 @@ lw_shared_ptr<const cache::role_record> cache::get(const role_name_t& role) cons
|
|||||||
return it->second;
|
return it->second;
|
||||||
}
|
}
|
||||||
|
|
||||||
future<permission_set> cache::get_permissions(const role_or_anonymous& role, const resource& r) {
|
|
||||||
std::unordered_map<resource, permission_set>* perms_cache;
|
|
||||||
lw_shared_ptr<role_record> role_ptr;
|
|
||||||
|
|
||||||
if (is_anonymous(role)) {
|
|
||||||
perms_cache = &_anonymous_permissions;
|
|
||||||
} else {
|
|
||||||
const auto& role_name = *role.name;
|
|
||||||
auto role_it = _roles.find(role_name);
|
|
||||||
if (role_it == _roles.end()) {
|
|
||||||
// Role might have been deleted but there are some connections
|
|
||||||
// left which reference it. They should no longer have access to anything.
|
|
||||||
return make_ready_future<permission_set>(permissions::NONE);
|
|
||||||
}
|
|
||||||
role_ptr = role_it->second;
|
|
||||||
perms_cache = &role_ptr->cached_permissions;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (auto it = perms_cache->find(r); it != perms_cache->end()) {
|
|
||||||
return make_ready_future<permission_set>(it->second);
|
|
||||||
}
|
|
||||||
// keep alive role_ptr as it holds perms_cache (except anonymous)
|
|
||||||
return do_with(std::move(role_ptr), [this, &role, &r, perms_cache] (auto& role_ptr) {
|
|
||||||
return load_permissions(role, r, perms_cache);
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
future<permission_set> cache::load_permissions(const role_or_anonymous& role, const resource& r, std::unordered_map<resource, permission_set>* perms_cache) {
|
|
||||||
SCYLLA_ASSERT(_permission_loader);
|
|
||||||
auto units = co_await get_units(_permission_loader_sem, 1, _as);
|
|
||||||
|
|
||||||
// Check again, perhaps we were blocked and other call loaded
|
|
||||||
// the permissions already. This is a protection against misses storm.
|
|
||||||
if (auto it = perms_cache->find(r); it != perms_cache->end()) {
|
|
||||||
co_return it->second;
|
|
||||||
}
|
|
||||||
auto perms = co_await _permission_loader(role, r);
|
|
||||||
add_permissions(*perms_cache, r, perms);
|
|
||||||
co_return perms;
|
|
||||||
}
|
|
||||||
|
|
||||||
future<> cache::prune(const resource& r) {
|
|
||||||
auto units = co_await get_units(_loading_sem, 1, _as);
|
|
||||||
_anonymous_permissions.erase(r);
|
|
||||||
for (auto& it : _roles) {
|
|
||||||
// Prunning can run concurrently with other functions but it
|
|
||||||
// can only cause cached_permissions extra reload via get_permissions.
|
|
||||||
remove_permissions(it.second->cached_permissions, r);
|
|
||||||
co_await coroutine::maybe_yield();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
future<> cache::reload_all_permissions() noexcept {
|
|
||||||
SCYLLA_ASSERT(_permission_loader);
|
|
||||||
auto units = co_await get_units(_loading_sem, 1, _as);
|
|
||||||
auto copy_keys = [] (const std::unordered_map<resource, permission_set>& m) {
|
|
||||||
std::vector<resource> keys;
|
|
||||||
keys.reserve(m.size());
|
|
||||||
for (const auto& [res, _] : m) {
|
|
||||||
keys.push_back(res);
|
|
||||||
}
|
|
||||||
return keys;
|
|
||||||
};
|
|
||||||
const role_or_anonymous anon;
|
|
||||||
for (const auto& res : copy_keys(_anonymous_permissions)) {
|
|
||||||
_anonymous_permissions[res] = co_await _permission_loader(anon, res);
|
|
||||||
}
|
|
||||||
for (auto& [role, entry] : _roles) {
|
|
||||||
auto& perms_cache = entry->cached_permissions;
|
|
||||||
auto r = role_or_anonymous(role);
|
|
||||||
for (const auto& res : copy_keys(perms_cache)) {
|
|
||||||
perms_cache[res] = co_await _permission_loader(r, res);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
logger.debug("Reloaded auth cache with {} entries", _roles.size());
|
|
||||||
}
|
|
||||||
|
|
||||||
future<lw_shared_ptr<cache::role_record>> cache::fetch_role(const role_name_t& role) const {
|
future<lw_shared_ptr<cache::role_record>> cache::fetch_role(const role_name_t& role) const {
|
||||||
auto rec = make_lw_shared<role_record>();
|
auto rec = make_lw_shared<role_record>();
|
||||||
rec->version = _current_version;
|
rec->version = _current_version;
|
||||||
@@ -199,7 +105,7 @@ future<lw_shared_ptr<cache::role_record>> cache::fetch_role(const role_name_t& r
|
|||||||
future<> cache::prune_all() noexcept {
|
future<> cache::prune_all() noexcept {
|
||||||
for (auto it = _roles.begin(); it != _roles.end(); ) {
|
for (auto it = _roles.begin(); it != _roles.end(); ) {
|
||||||
if (it->second->version != _current_version) {
|
if (it->second->version != _current_version) {
|
||||||
remove_role(it++);
|
_roles.erase(it++);
|
||||||
co_await coroutine::maybe_yield();
|
co_await coroutine::maybe_yield();
|
||||||
} else {
|
} else {
|
||||||
++it;
|
++it;
|
||||||
@@ -223,7 +129,7 @@ future<> cache::load_all() {
|
|||||||
const auto name = r.get_as<sstring>("role");
|
const auto name = r.get_as<sstring>("role");
|
||||||
auto role = co_await fetch_role(name);
|
auto role = co_await fetch_role(name);
|
||||||
if (role) {
|
if (role) {
|
||||||
add_role(name, role);
|
_roles[name] = role;
|
||||||
}
|
}
|
||||||
co_return stop_iteration::no;
|
co_return stop_iteration::no;
|
||||||
};
|
};
|
||||||
@@ -236,32 +142,11 @@ future<> cache::load_all() {
|
|||||||
co_await distribute_role(name, role);
|
co_await distribute_role(name, role);
|
||||||
}
|
}
|
||||||
co_await container().invoke_on_others([this](cache& c) -> future<> {
|
co_await container().invoke_on_others([this](cache& c) -> future<> {
|
||||||
auto units = co_await get_units(c._loading_sem, 1, c._as);
|
|
||||||
c._current_version = _current_version;
|
c._current_version = _current_version;
|
||||||
co_await c.prune_all();
|
co_await c.prune_all();
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
future<> cache::gather_inheriting_roles(std::unordered_set<role_name_t>& roles, lw_shared_ptr<cache::role_record> role, const role_name_t& name) {
|
|
||||||
if (!role) {
|
|
||||||
// Role might have been removed or not yet added, either way
|
|
||||||
// their members will be handled by another top call to this function.
|
|
||||||
co_return;
|
|
||||||
}
|
|
||||||
for (const auto& member_name : role->members) {
|
|
||||||
bool is_new = roles.insert(member_name).second;
|
|
||||||
if (!is_new) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
lw_shared_ptr<cache::role_record> member_role;
|
|
||||||
auto r = _roles.find(member_name);
|
|
||||||
if (r != _roles.end()) {
|
|
||||||
member_role = r->second;
|
|
||||||
}
|
|
||||||
co_await gather_inheriting_roles(roles, member_role, member_name);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
future<> cache::load_roles(std::unordered_set<role_name_t> roles) {
|
future<> cache::load_roles(std::unordered_set<role_name_t> roles) {
|
||||||
if (legacy_mode(_qp)) {
|
if (legacy_mode(_qp)) {
|
||||||
co_return;
|
co_return;
|
||||||
@@ -269,41 +154,27 @@ future<> cache::load_roles(std::unordered_set<role_name_t> roles) {
|
|||||||
SCYLLA_ASSERT(this_shard_id() == 0);
|
SCYLLA_ASSERT(this_shard_id() == 0);
|
||||||
auto units = co_await get_units(_loading_sem, 1, _as);
|
auto units = co_await get_units(_loading_sem, 1, _as);
|
||||||
|
|
||||||
std::unordered_set<role_name_t> roles_to_clear_perms;
|
|
||||||
for (const auto& name : roles) {
|
for (const auto& name : roles) {
|
||||||
logger.info("Loading role {}", name);
|
logger.info("Loading role {}", name);
|
||||||
auto role = co_await fetch_role(name);
|
auto role = co_await fetch_role(name);
|
||||||
if (role) {
|
if (role) {
|
||||||
add_role(name, role);
|
_roles[name] = role;
|
||||||
co_await gather_inheriting_roles(roles_to_clear_perms, role, name);
|
|
||||||
} else {
|
} else {
|
||||||
if (auto it = _roles.find(name); it != _roles.end()) {
|
_roles.erase(name);
|
||||||
auto old_role = it->second;
|
|
||||||
remove_role(it);
|
|
||||||
co_await gather_inheriting_roles(roles_to_clear_perms, old_role, name);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
co_await distribute_role(name, role);
|
co_await distribute_role(name, role);
|
||||||
}
|
}
|
||||||
|
|
||||||
co_await container().invoke_on_all([&roles_to_clear_perms] (cache& c) -> future<> {
|
|
||||||
for (const auto& name : roles_to_clear_perms) {
|
|
||||||
c.clear_role_permissions(name);
|
|
||||||
co_await coroutine::maybe_yield();
|
|
||||||
}
|
|
||||||
});
|
|
||||||
}
|
}
|
||||||
|
|
||||||
future<> cache::distribute_role(const role_name_t& name, lw_shared_ptr<role_record> role) {
|
future<> cache::distribute_role(const role_name_t& name, lw_shared_ptr<role_record> role) {
|
||||||
auto role_ptr = role.get();
|
auto role_ptr = role.get();
|
||||||
co_await container().invoke_on_others([&name, role_ptr](cache& c) -> future<> {
|
co_await container().invoke_on_others([&name, role_ptr](cache& c) {
|
||||||
auto units = co_await get_units(c._loading_sem, 1, c._as);
|
|
||||||
if (!role_ptr) {
|
if (!role_ptr) {
|
||||||
c.remove_role(name);
|
c._roles.erase(name);
|
||||||
co_return;
|
return;
|
||||||
}
|
}
|
||||||
auto role_copy = make_lw_shared<role_record>(*role_ptr);
|
auto role_copy = make_lw_shared<role_record>(*role_ptr);
|
||||||
c.add_role(name, std::move(role_copy));
|
c._roles[name] = std::move(role_copy);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -314,40 +185,4 @@ bool cache::includes_table(const table_id& id) noexcept {
|
|||||||
|| id == db::system_keyspace::role_permissions()->id();
|
|| id == db::system_keyspace::role_permissions()->id();
|
||||||
}
|
}
|
||||||
|
|
||||||
void cache::add_role(const role_name_t& name, lw_shared_ptr<role_record> role) {
|
|
||||||
if (auto it = _roles.find(name); it != _roles.end()) {
|
|
||||||
_cached_permissions_count -= it->second->cached_permissions.size();
|
|
||||||
}
|
|
||||||
_cached_permissions_count += role->cached_permissions.size();
|
|
||||||
_roles[name] = std::move(role);
|
|
||||||
}
|
|
||||||
|
|
||||||
void cache::remove_role(const role_name_t& name) {
|
|
||||||
if (auto it = _roles.find(name); it != _roles.end()) {
|
|
||||||
remove_role(it);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void cache::remove_role(roles_map::iterator it) {
|
|
||||||
_cached_permissions_count -= it->second->cached_permissions.size();
|
|
||||||
_roles.erase(it);
|
|
||||||
}
|
|
||||||
|
|
||||||
void cache::clear_role_permissions(const role_name_t& name) {
|
|
||||||
if (auto it = _roles.find(name); it != _roles.end()) {
|
|
||||||
_cached_permissions_count -= it->second->cached_permissions.size();
|
|
||||||
it->second->cached_permissions.clear();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void cache::add_permissions(std::unordered_map<resource, permission_set>& cache, const resource& r, permission_set perms) {
|
|
||||||
if (cache.emplace(r, perms).second) {
|
|
||||||
++_cached_permissions_count;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void cache::remove_permissions(std::unordered_map<resource, permission_set>& cache, const resource& r) {
|
|
||||||
_cached_permissions_count -= cache.erase(r);
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace auth
|
} // namespace auth
|
||||||
|
|||||||
@@ -17,14 +17,11 @@
|
|||||||
#include <seastar/core/sharded.hh>
|
#include <seastar/core/sharded.hh>
|
||||||
#include <seastar/core/shared_ptr.hh>
|
#include <seastar/core/shared_ptr.hh>
|
||||||
#include <seastar/core/semaphore.hh>
|
#include <seastar/core/semaphore.hh>
|
||||||
#include <seastar/core/metrics_registration.hh>
|
|
||||||
|
|
||||||
#include <absl/container/flat_hash_map.h>
|
#include <absl/container/flat_hash_map.h>
|
||||||
|
|
||||||
#include "auth/permission.hh"
|
#include "auth/permission.hh"
|
||||||
#include "auth/common.hh"
|
#include "auth/common.hh"
|
||||||
#include "auth/resource.hh"
|
|
||||||
#include "auth/role_or_anonymous.hh"
|
|
||||||
|
|
||||||
namespace cql3 { class query_processor; }
|
namespace cql3 { class query_processor; }
|
||||||
|
|
||||||
@@ -34,7 +31,6 @@ class cache : public peering_sharded_service<cache> {
|
|||||||
public:
|
public:
|
||||||
using role_name_t = sstring;
|
using role_name_t = sstring;
|
||||||
using version_tag_t = char;
|
using version_tag_t = char;
|
||||||
using permission_loader_func = std::function<future<permission_set>(const role_or_anonymous&, const resource&)>;
|
|
||||||
|
|
||||||
struct role_record {
|
struct role_record {
|
||||||
bool can_login = false;
|
bool can_login = false;
|
||||||
@@ -44,19 +40,11 @@ public:
|
|||||||
sstring salted_hash;
|
sstring salted_hash;
|
||||||
std::unordered_map<sstring, sstring> attributes;
|
std::unordered_map<sstring, sstring> attributes;
|
||||||
std::unordered_map<sstring, permission_set> permissions;
|
std::unordered_map<sstring, permission_set> permissions;
|
||||||
private:
|
|
||||||
friend cache;
|
|
||||||
// cached permissions include effects of role's inheritance
|
|
||||||
std::unordered_map<resource, permission_set> cached_permissions;
|
|
||||||
version_tag_t version; // used for seamless cache reloads
|
version_tag_t version; // used for seamless cache reloads
|
||||||
};
|
};
|
||||||
|
|
||||||
explicit cache(cql3::query_processor& qp, abort_source& as) noexcept;
|
explicit cache(cql3::query_processor& qp, abort_source& as) noexcept;
|
||||||
lw_shared_ptr<const role_record> get(const role_name_t& role) const noexcept;
|
lw_shared_ptr<const role_record> get(const role_name_t& role) const noexcept;
|
||||||
void set_permission_loader(permission_loader_func loader);
|
|
||||||
future<permission_set> get_permissions(const role_or_anonymous& role, const resource& r);
|
|
||||||
future<> prune(const resource& r);
|
|
||||||
future<> reload_all_permissions() noexcept;
|
|
||||||
future<> load_all();
|
future<> load_all();
|
||||||
future<> load_roles(std::unordered_set<role_name_t> roles);
|
future<> load_roles(std::unordered_set<role_name_t> roles);
|
||||||
static bool includes_table(const table_id&) noexcept;
|
static bool includes_table(const table_id&) noexcept;
|
||||||
@@ -64,31 +52,14 @@ public:
|
|||||||
private:
|
private:
|
||||||
using roles_map = absl::flat_hash_map<role_name_t, lw_shared_ptr<role_record>>;
|
using roles_map = absl::flat_hash_map<role_name_t, lw_shared_ptr<role_record>>;
|
||||||
roles_map _roles;
|
roles_map _roles;
|
||||||
// anonymous permissions map exists mainly due to compatibility with
|
|
||||||
// higher layers which use role_or_anonymous to get permissions.
|
|
||||||
std::unordered_map<resource, permission_set> _anonymous_permissions;
|
|
||||||
version_tag_t _current_version;
|
version_tag_t _current_version;
|
||||||
cql3::query_processor& _qp;
|
cql3::query_processor& _qp;
|
||||||
semaphore _loading_sem; // protects iteration of _roles map
|
semaphore _loading_sem;
|
||||||
abort_source& _as;
|
abort_source& _as;
|
||||||
permission_loader_func _permission_loader;
|
|
||||||
semaphore _permission_loader_sem; // protects against reload storms on a single role change
|
|
||||||
metrics::metric_groups _metrics;
|
|
||||||
size_t _cached_permissions_count = 0;
|
|
||||||
|
|
||||||
future<lw_shared_ptr<role_record>> fetch_role(const role_name_t& role) const;
|
future<lw_shared_ptr<role_record>> fetch_role(const role_name_t& role) const;
|
||||||
future<> prune_all() noexcept;
|
future<> prune_all() noexcept;
|
||||||
future<> distribute_role(const role_name_t& name, const lw_shared_ptr<role_record> role);
|
future<> distribute_role(const role_name_t& name, const lw_shared_ptr<role_record> role);
|
||||||
future<> gather_inheriting_roles(std::unordered_set<role_name_t>& roles, lw_shared_ptr<cache::role_record> role, const role_name_t& name);
|
|
||||||
|
|
||||||
void add_role(const role_name_t& name, lw_shared_ptr<role_record> role);
|
|
||||||
void remove_role(const role_name_t& name);
|
|
||||||
void remove_role(roles_map::iterator it);
|
|
||||||
void clear_role_permissions(const role_name_t& name);
|
|
||||||
void add_permissions(std::unordered_map<resource, permission_set>& cache, const resource& r, permission_set perms);
|
|
||||||
void remove_permissions(std::unordered_map<resource, permission_set>& cache, const resource& r);
|
|
||||||
|
|
||||||
future<permission_set> load_permissions(const role_or_anonymous& role, const resource& r, std::unordered_map<resource, permission_set>* perms_cache);
|
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace auth
|
} // namespace auth
|
||||||
|
|||||||
@@ -88,16 +88,10 @@ static const class_registrator<
|
|||||||
|
|
||||||
ldap_role_manager::ldap_role_manager(
|
ldap_role_manager::ldap_role_manager(
|
||||||
std::string_view query_template, std::string_view target_attr, std::string_view bind_name, std::string_view bind_password,
|
std::string_view query_template, std::string_view target_attr, std::string_view bind_name, std::string_view bind_password,
|
||||||
uint32_t permissions_update_interval_in_ms,
|
|
||||||
utils::observer<uint32_t> permissions_update_interval_in_ms_observer,
|
|
||||||
cql3::query_processor& qp, ::service::raft_group0_client& rg0c, ::service::migration_manager& mm, cache& cache)
|
cql3::query_processor& qp, ::service::raft_group0_client& rg0c, ::service::migration_manager& mm, cache& cache)
|
||||||
: _std_mgr(qp, rg0c, mm, cache), _group0_client(rg0c), _query_template(query_template), _target_attr(target_attr), _bind_name(bind_name)
|
: _std_mgr(qp, rg0c, mm, cache), _group0_client(rg0c), _query_template(query_template), _target_attr(target_attr), _bind_name(bind_name)
|
||||||
, _bind_password(bind_password)
|
, _bind_password(bind_password)
|
||||||
, _permissions_update_interval_in_ms(permissions_update_interval_in_ms)
|
, _connection_factory(bind(std::mem_fn(&ldap_role_manager::reconnect), std::ref(*this))) {
|
||||||
, _permissions_update_interval_in_ms_observer(std::move(permissions_update_interval_in_ms_observer))
|
|
||||||
, _connection_factory(bind(std::mem_fn(&ldap_role_manager::reconnect), std::ref(*this)))
|
|
||||||
, _cache(cache)
|
|
||||||
, _cache_pruner(make_ready_future<>()) {
|
|
||||||
}
|
}
|
||||||
|
|
||||||
ldap_role_manager::ldap_role_manager(cql3::query_processor& qp, ::service::raft_group0_client& rg0c, ::service::migration_manager& mm, cache& cache)
|
ldap_role_manager::ldap_role_manager(cql3::query_processor& qp, ::service::raft_group0_client& rg0c, ::service::migration_manager& mm, cache& cache)
|
||||||
@@ -106,8 +100,6 @@ ldap_role_manager::ldap_role_manager(cql3::query_processor& qp, ::service::raft_
|
|||||||
qp.db().get_config().ldap_attr_role(),
|
qp.db().get_config().ldap_attr_role(),
|
||||||
qp.db().get_config().ldap_bind_dn(),
|
qp.db().get_config().ldap_bind_dn(),
|
||||||
qp.db().get_config().ldap_bind_passwd(),
|
qp.db().get_config().ldap_bind_passwd(),
|
||||||
qp.db().get_config().permissions_update_interval_in_ms(),
|
|
||||||
qp.db().get_config().permissions_update_interval_in_ms.observe([this] (const uint32_t& v) { _permissions_update_interval_in_ms = v; }),
|
|
||||||
qp,
|
qp,
|
||||||
rg0c,
|
rg0c,
|
||||||
mm,
|
mm,
|
||||||
@@ -127,22 +119,6 @@ future<> ldap_role_manager::start() {
|
|||||||
return make_exception_future(
|
return make_exception_future(
|
||||||
std::runtime_error(fmt::format("error getting LDAP server address from template {}", _query_template)));
|
std::runtime_error(fmt::format("error getting LDAP server address from template {}", _query_template)));
|
||||||
}
|
}
|
||||||
_cache_pruner = futurize_invoke([this] () -> future<> {
|
|
||||||
while (true) {
|
|
||||||
try {
|
|
||||||
co_await seastar::sleep_abortable(std::chrono::milliseconds(_permissions_update_interval_in_ms), _as);
|
|
||||||
} catch (const seastar::sleep_aborted&) {
|
|
||||||
co_return; // ignore
|
|
||||||
}
|
|
||||||
co_await _cache.container().invoke_on_all([] (cache& c) -> future<> {
|
|
||||||
try {
|
|
||||||
co_await c.reload_all_permissions();
|
|
||||||
} catch (...) {
|
|
||||||
mylog.warn("Cache reload all permissions failed: {}", std::current_exception());
|
|
||||||
}
|
|
||||||
});
|
|
||||||
}
|
|
||||||
});
|
|
||||||
return _std_mgr.start();
|
return _std_mgr.start();
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -199,11 +175,7 @@ future<conn_ptr> ldap_role_manager::reconnect() {
|
|||||||
|
|
||||||
future<> ldap_role_manager::stop() {
|
future<> ldap_role_manager::stop() {
|
||||||
_as.request_abort();
|
_as.request_abort();
|
||||||
return std::move(_cache_pruner).then([this] {
|
return _std_mgr.stop().then([this] { return _connection_factory.stop(); });
|
||||||
return _std_mgr.stop();
|
|
||||||
}).then([this] {
|
|
||||||
return _connection_factory.stop();
|
|
||||||
});
|
|
||||||
}
|
}
|
||||||
|
|
||||||
future<> ldap_role_manager::create(std::string_view name, const role_config& config, ::service::group0_batch& mc) {
|
future<> ldap_role_manager::create(std::string_view name, const role_config& config, ::service::group0_batch& mc) {
|
||||||
|
|||||||
@@ -10,7 +10,6 @@
|
|||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include <seastar/core/abort_source.hh>
|
#include <seastar/core/abort_source.hh>
|
||||||
#include <seastar/core/future.hh>
|
|
||||||
#include <stdexcept>
|
#include <stdexcept>
|
||||||
|
|
||||||
#include "ent/ldap/ldap_connection.hh"
|
#include "ent/ldap/ldap_connection.hh"
|
||||||
@@ -35,22 +34,14 @@ class ldap_role_manager : public role_manager {
|
|||||||
seastar::sstring _target_attr; ///< LDAP entry attribute containing the Scylla role name.
|
seastar::sstring _target_attr; ///< LDAP entry attribute containing the Scylla role name.
|
||||||
seastar::sstring _bind_name; ///< Username for LDAP simple bind.
|
seastar::sstring _bind_name; ///< Username for LDAP simple bind.
|
||||||
seastar::sstring _bind_password; ///< Password for LDAP simple bind.
|
seastar::sstring _bind_password; ///< Password for LDAP simple bind.
|
||||||
|
|
||||||
uint32_t _permissions_update_interval_in_ms;
|
|
||||||
utils::observer<uint32_t> _permissions_update_interval_in_ms_observer;
|
|
||||||
|
|
||||||
mutable ldap_reuser _connection_factory; // Potentially modified by query_granted().
|
mutable ldap_reuser _connection_factory; // Potentially modified by query_granted().
|
||||||
seastar::abort_source _as;
|
seastar::abort_source _as;
|
||||||
cache& _cache;
|
|
||||||
seastar::future<> _cache_pruner;
|
|
||||||
public:
|
public:
|
||||||
ldap_role_manager(
|
ldap_role_manager(
|
||||||
std::string_view query_template, ///< LDAP query template as described in Scylla documentation.
|
std::string_view query_template, ///< LDAP query template as described in Scylla documentation.
|
||||||
std::string_view target_attr, ///< LDAP entry attribute containing the Scylla role name.
|
std::string_view target_attr, ///< LDAP entry attribute containing the Scylla role name.
|
||||||
std::string_view bind_name, ///< LDAP bind credentials.
|
std::string_view bind_name, ///< LDAP bind credentials.
|
||||||
std::string_view bind_password, ///< LDAP bind credentials.
|
std::string_view bind_password, ///< LDAP bind credentials.
|
||||||
uint32_t permissions_update_interval_in_ms,
|
|
||||||
utils::observer<uint32_t> permissions_update_interval_in_ms_observer,
|
|
||||||
cql3::query_processor& qp, ///< Passed to standard_role_manager.
|
cql3::query_processor& qp, ///< Passed to standard_role_manager.
|
||||||
::service::raft_group0_client& rg0c, ///< Passed to standard_role_manager.
|
::service::raft_group0_client& rg0c, ///< Passed to standard_role_manager.
|
||||||
::service::migration_manager& mm, ///< Passed to standard_role_manager.
|
::service::migration_manager& mm, ///< Passed to standard_role_manager.
|
||||||
|
|||||||
38
auth/permissions_cache.cc
Normal file
38
auth/permissions_cache.cc
Normal file
@@ -0,0 +1,38 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (C) 2017-present ScyllaDB
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
* SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "auth/permissions_cache.hh"
|
||||||
|
|
||||||
|
#include <fmt/ranges.h>
|
||||||
|
#include "auth/authorizer.hh"
|
||||||
|
#include "auth/service.hh"
|
||||||
|
|
||||||
|
namespace auth {
|
||||||
|
|
||||||
|
permissions_cache::permissions_cache(const utils::loading_cache_config& c, service& ser, logging::logger& log)
|
||||||
|
: _cache(c, log, [&ser, &log](const key_type& k) {
|
||||||
|
log.debug("Refreshing permissions for {}", k.first);
|
||||||
|
return ser.get_uncached_permissions(k.first, k.second);
|
||||||
|
}) {
|
||||||
|
}
|
||||||
|
|
||||||
|
bool permissions_cache::update_config(utils::loading_cache_config c) {
|
||||||
|
return _cache.update_config(std::move(c));
|
||||||
|
}
|
||||||
|
|
||||||
|
void permissions_cache::reset() {
|
||||||
|
_cache.reset();
|
||||||
|
}
|
||||||
|
|
||||||
|
future<permission_set> permissions_cache::get(const role_or_anonymous& maybe_role, const resource& r) {
|
||||||
|
return do_with(key_type(maybe_role, r), [this](const auto& k) {
|
||||||
|
return _cache.get(k);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
66
auth/permissions_cache.hh
Normal file
66
auth/permissions_cache.hh
Normal file
@@ -0,0 +1,66 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (C) 2017-present ScyllaDB
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
* SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
|
||||||
|
*/
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <iostream>
|
||||||
|
#include <utility>
|
||||||
|
|
||||||
|
#include <fmt/core.h>
|
||||||
|
#include <seastar/core/future.hh>
|
||||||
|
|
||||||
|
#include "auth/permission.hh"
|
||||||
|
#include "auth/resource.hh"
|
||||||
|
#include "auth/role_or_anonymous.hh"
|
||||||
|
#include "utils/log.hh"
|
||||||
|
#include "utils/hash.hh"
|
||||||
|
#include "utils/loading_cache.hh"
|
||||||
|
|
||||||
|
namespace std {
|
||||||
|
|
||||||
|
inline std::ostream& operator<<(std::ostream& os, const pair<auth::role_or_anonymous, auth::resource>& p) {
|
||||||
|
fmt::print(os, "{{role: {}, resource: {}}}", p.first, p.second);
|
||||||
|
return os;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
namespace db {
|
||||||
|
class config;
|
||||||
|
}
|
||||||
|
|
||||||
|
namespace auth {
|
||||||
|
|
||||||
|
class service;
|
||||||
|
|
||||||
|
class permissions_cache final {
|
||||||
|
using cache_type = utils::loading_cache<
|
||||||
|
std::pair<role_or_anonymous, resource>,
|
||||||
|
permission_set,
|
||||||
|
1,
|
||||||
|
utils::loading_cache_reload_enabled::yes,
|
||||||
|
utils::simple_entry_size<permission_set>,
|
||||||
|
utils::tuple_hash>;
|
||||||
|
|
||||||
|
using key_type = typename cache_type::key_type;
|
||||||
|
|
||||||
|
cache_type _cache;
|
||||||
|
|
||||||
|
public:
|
||||||
|
explicit permissions_cache(const utils::loading_cache_config&, service&, logging::logger&);
|
||||||
|
|
||||||
|
future <> stop() {
|
||||||
|
return _cache.stop();
|
||||||
|
}
|
||||||
|
|
||||||
|
bool update_config(utils::loading_cache_config);
|
||||||
|
void reset();
|
||||||
|
future<permission_set> get(const role_or_anonymous&, const resource&);
|
||||||
|
};
|
||||||
|
|
||||||
|
}
|
||||||
@@ -64,11 +64,11 @@ static const sstring superuser_col_name("super");
|
|||||||
static logging::logger log("auth_service");
|
static logging::logger log("auth_service");
|
||||||
|
|
||||||
class auth_migration_listener final : public ::service::migration_listener {
|
class auth_migration_listener final : public ::service::migration_listener {
|
||||||
service& _service;
|
authorizer& _authorizer;
|
||||||
cql3::query_processor& _qp;
|
cql3::query_processor& _qp;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
explicit auth_migration_listener(service& s, cql3::query_processor& qp) : _service(s), _qp(qp) {
|
explicit auth_migration_listener(authorizer& a, cql3::query_processor& qp) : _authorizer(a), _qp(qp) {
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
@@ -92,14 +92,14 @@ private:
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
// Do it in the background.
|
// Do it in the background.
|
||||||
(void)do_with(auth::make_data_resource(ks_name), ::service::group0_batch::unused(), [this] (auto& r, auto& mc) mutable {
|
(void)do_with(::service::group0_batch::unused(), [this, &ks_name] (auto& mc) mutable {
|
||||||
return _service.revoke_all(r, mc);
|
return _authorizer.revoke_all(auth::make_data_resource(ks_name), mc);
|
||||||
}).handle_exception([] (std::exception_ptr e) {
|
}).handle_exception([] (std::exception_ptr e) {
|
||||||
log.error("Unexpected exception while revoking all permissions on dropped keyspace: {}", e);
|
log.error("Unexpected exception while revoking all permissions on dropped keyspace: {}", e);
|
||||||
});
|
});
|
||||||
|
|
||||||
(void)do_with(auth::make_functions_resource(ks_name), ::service::group0_batch::unused(), [this] (auto& r, auto& mc) mutable {
|
(void)do_with(::service::group0_batch::unused(), [this, &ks_name] (auto& mc) mutable {
|
||||||
return _service.revoke_all(r, mc);
|
return _authorizer.revoke_all(auth::make_functions_resource(ks_name), mc);
|
||||||
}).handle_exception([] (std::exception_ptr e) {
|
}).handle_exception([] (std::exception_ptr e) {
|
||||||
log.error("Unexpected exception while revoking all permissions on functions in dropped keyspace: {}", e);
|
log.error("Unexpected exception while revoking all permissions on functions in dropped keyspace: {}", e);
|
||||||
});
|
});
|
||||||
@@ -111,8 +111,9 @@ private:
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
// Do it in the background.
|
// Do it in the background.
|
||||||
(void)do_with(auth::make_data_resource(ks_name, cf_name), ::service::group0_batch::unused(), [this] (auto& r, auto& mc) mutable {
|
(void)do_with(::service::group0_batch::unused(), [this, &ks_name, &cf_name] (auto& mc) mutable {
|
||||||
return _service.revoke_all(r, mc);
|
return _authorizer.revoke_all(
|
||||||
|
auth::make_data_resource(ks_name, cf_name), mc);
|
||||||
}).handle_exception([] (std::exception_ptr e) {
|
}).handle_exception([] (std::exception_ptr e) {
|
||||||
log.error("Unexpected exception while revoking all permissions on dropped table: {}", e);
|
log.error("Unexpected exception while revoking all permissions on dropped table: {}", e);
|
||||||
});
|
});
|
||||||
@@ -125,8 +126,9 @@ private:
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
// Do it in the background.
|
// Do it in the background.
|
||||||
(void)do_with(auth::make_functions_resource(ks_name, function_name), ::service::group0_batch::unused(), [this] (auto& r, auto& mc) mutable {
|
(void)do_with(::service::group0_batch::unused(), [this, &ks_name, &function_name] (auto& mc) mutable {
|
||||||
return _service.revoke_all(r, mc);
|
return _authorizer.revoke_all(
|
||||||
|
auth::make_functions_resource(ks_name, function_name), mc);
|
||||||
}).handle_exception([] (std::exception_ptr e) {
|
}).handle_exception([] (std::exception_ptr e) {
|
||||||
log.error("Unexpected exception while revoking all permissions on dropped function: {}", e);
|
log.error("Unexpected exception while revoking all permissions on dropped function: {}", e);
|
||||||
});
|
});
|
||||||
@@ -136,8 +138,9 @@ private:
|
|||||||
// in non legacy path revoke is part of schema change statement execution
|
// in non legacy path revoke is part of schema change statement execution
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
(void)do_with(auth::make_functions_resource(ks_name, aggregate_name), ::service::group0_batch::unused(), [this] (auto& r, auto& mc) mutable {
|
(void)do_with(::service::group0_batch::unused(), [this, &ks_name, &aggregate_name] (auto& mc) mutable {
|
||||||
return _service.revoke_all(r, mc);
|
return _authorizer.revoke_all(
|
||||||
|
auth::make_functions_resource(ks_name, aggregate_name), mc);
|
||||||
}).handle_exception([] (std::exception_ptr e) {
|
}).handle_exception([] (std::exception_ptr e) {
|
||||||
log.error("Unexpected exception while revoking all permissions on dropped aggregate: {}", e);
|
log.error("Unexpected exception while revoking all permissions on dropped aggregate: {}", e);
|
||||||
});
|
});
|
||||||
@@ -154,6 +157,7 @@ static future<> validate_role_exists(const service& ser, std::string_view role_n
|
|||||||
}
|
}
|
||||||
|
|
||||||
service::service(
|
service::service(
|
||||||
|
utils::loading_cache_config c,
|
||||||
cache& cache,
|
cache& cache,
|
||||||
cql3::query_processor& qp,
|
cql3::query_processor& qp,
|
||||||
::service::raft_group0_client& g0,
|
::service::raft_group0_client& g0,
|
||||||
@@ -162,17 +166,25 @@ service::service(
|
|||||||
std::unique_ptr<authenticator> a,
|
std::unique_ptr<authenticator> a,
|
||||||
std::unique_ptr<role_manager> r,
|
std::unique_ptr<role_manager> r,
|
||||||
maintenance_socket_enabled used_by_maintenance_socket)
|
maintenance_socket_enabled used_by_maintenance_socket)
|
||||||
: _cache(cache)
|
: _loading_cache_config(std::move(c))
|
||||||
|
, _permissions_cache(nullptr)
|
||||||
|
, _cache(cache)
|
||||||
, _qp(qp)
|
, _qp(qp)
|
||||||
, _group0_client(g0)
|
, _group0_client(g0)
|
||||||
, _mnotifier(mn)
|
, _mnotifier(mn)
|
||||||
, _authorizer(std::move(z))
|
, _authorizer(std::move(z))
|
||||||
, _authenticator(std::move(a))
|
, _authenticator(std::move(a))
|
||||||
, _role_manager(std::move(r))
|
, _role_manager(std::move(r))
|
||||||
, _migration_listener(std::make_unique<auth_migration_listener>(*this, qp))
|
, _migration_listener(std::make_unique<auth_migration_listener>(*_authorizer, qp))
|
||||||
|
, _permissions_cache_cfg_cb([this] (uint32_t) { (void) _permissions_cache_config_action.trigger_later(); })
|
||||||
|
, _permissions_cache_config_action([this] { update_cache_config(); return make_ready_future<>(); })
|
||||||
|
, _permissions_cache_max_entries_observer(_qp.db().get_config().permissions_cache_max_entries.observe(_permissions_cache_cfg_cb))
|
||||||
|
, _permissions_cache_update_interval_in_ms_observer(_qp.db().get_config().permissions_update_interval_in_ms.observe(_permissions_cache_cfg_cb))
|
||||||
|
, _permissions_cache_validity_in_ms_observer(_qp.db().get_config().permissions_validity_in_ms.observe(_permissions_cache_cfg_cb))
|
||||||
, _used_by_maintenance_socket(used_by_maintenance_socket) {}
|
, _used_by_maintenance_socket(used_by_maintenance_socket) {}
|
||||||
|
|
||||||
service::service(
|
service::service(
|
||||||
|
utils::loading_cache_config c,
|
||||||
cql3::query_processor& qp,
|
cql3::query_processor& qp,
|
||||||
::service::raft_group0_client& g0,
|
::service::raft_group0_client& g0,
|
||||||
::service::migration_notifier& mn,
|
::service::migration_notifier& mn,
|
||||||
@@ -181,6 +193,7 @@ service::service(
|
|||||||
maintenance_socket_enabled used_by_maintenance_socket,
|
maintenance_socket_enabled used_by_maintenance_socket,
|
||||||
cache& cache)
|
cache& cache)
|
||||||
: service(
|
: service(
|
||||||
|
std::move(c),
|
||||||
cache,
|
cache,
|
||||||
qp,
|
qp,
|
||||||
g0,
|
g0,
|
||||||
@@ -244,14 +257,7 @@ future<> service::start(::service::migration_manager& mm, db::system_keyspace& s
|
|||||||
co_await _role_manager->ensure_superuser_is_created();
|
co_await _role_manager->ensure_superuser_is_created();
|
||||||
}
|
}
|
||||||
co_await when_all_succeed(_authorizer->start(), _authenticator->start()).discard_result();
|
co_await when_all_succeed(_authorizer->start(), _authenticator->start()).discard_result();
|
||||||
if (!_used_by_maintenance_socket) {
|
_permissions_cache = std::make_unique<permissions_cache>(_loading_cache_config, *this, log);
|
||||||
// Maintenance socket mode can't cache permissions because it has
|
|
||||||
// different authorizer. We can't mix cached permissions, they could be
|
|
||||||
// different in normal mode.
|
|
||||||
_cache.set_permission_loader(std::bind(
|
|
||||||
&service::get_uncached_permissions,
|
|
||||||
this, std::placeholders::_1, std::placeholders::_2));
|
|
||||||
}
|
|
||||||
co_await once_among_shards([this] {
|
co_await once_among_shards([this] {
|
||||||
_mnotifier.register_listener(_migration_listener.get());
|
_mnotifier.register_listener(_migration_listener.get());
|
||||||
return make_ready_future<>();
|
return make_ready_future<>();
|
||||||
@@ -263,7 +269,9 @@ future<> service::stop() {
|
|||||||
// Only one of the shards has the listener registered, but let's try to
|
// Only one of the shards has the listener registered, but let's try to
|
||||||
// unregister on each one just to make sure.
|
// unregister on each one just to make sure.
|
||||||
return _mnotifier.unregister_listener(_migration_listener.get()).then([this] {
|
return _mnotifier.unregister_listener(_migration_listener.get()).then([this] {
|
||||||
_cache.set_permission_loader(nullptr);
|
if (_permissions_cache) {
|
||||||
|
return _permissions_cache->stop();
|
||||||
|
}
|
||||||
return make_ready_future<>();
|
return make_ready_future<>();
|
||||||
}).then([this] {
|
}).then([this] {
|
||||||
return when_all_succeed(_role_manager->stop(), _authorizer->stop(), _authenticator->stop()).discard_result();
|
return when_all_succeed(_role_manager->stop(), _authorizer->stop(), _authenticator->stop()).discard_result();
|
||||||
@@ -275,8 +283,21 @@ future<> service::ensure_superuser_is_created() {
|
|||||||
co_await _authenticator->ensure_superuser_is_created();
|
co_await _authenticator->ensure_superuser_is_created();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void service::update_cache_config() {
|
||||||
|
auto db = _qp.db();
|
||||||
|
|
||||||
|
utils::loading_cache_config perm_cache_config;
|
||||||
|
perm_cache_config.max_size = db.get_config().permissions_cache_max_entries();
|
||||||
|
perm_cache_config.expiry = std::chrono::milliseconds(db.get_config().permissions_validity_in_ms());
|
||||||
|
perm_cache_config.refresh = std::chrono::milliseconds(db.get_config().permissions_update_interval_in_ms());
|
||||||
|
|
||||||
|
if (!_permissions_cache->update_config(std::move(perm_cache_config))) {
|
||||||
|
log.error("Failed to apply permissions cache changes. Please read the documentation of these parameters");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void service::reset_authorization_cache() {
|
void service::reset_authorization_cache() {
|
||||||
|
_permissions_cache->reset();
|
||||||
_qp.reset_cache();
|
_qp.reset_cache();
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -301,10 +322,7 @@ service::get_uncached_permissions(const role_or_anonymous& maybe_role, const res
|
|||||||
}
|
}
|
||||||
|
|
||||||
future<permission_set> service::get_permissions(const role_or_anonymous& maybe_role, const resource& r) const {
|
future<permission_set> service::get_permissions(const role_or_anonymous& maybe_role, const resource& r) const {
|
||||||
if (legacy_mode(_qp) || _used_by_maintenance_socket) {
|
return _permissions_cache->get(maybe_role, r);
|
||||||
return get_uncached_permissions(maybe_role, r);
|
|
||||||
}
|
|
||||||
return _cache.get_permissions(maybe_role, r);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
future<bool> service::has_superuser(std::string_view role_name, const role_set& roles) const {
|
future<bool> service::has_superuser(std::string_view role_name, const role_set& roles) const {
|
||||||
@@ -429,11 +447,6 @@ future<bool> service::exists(const resource& r) const {
|
|||||||
return make_ready_future<bool>(false);
|
return make_ready_future<bool>(false);
|
||||||
}
|
}
|
||||||
|
|
||||||
future<> service::revoke_all(const resource& r, ::service::group0_batch& mc) const {
|
|
||||||
co_await _authorizer->revoke_all(r, mc);
|
|
||||||
co_await _cache.prune(r);
|
|
||||||
}
|
|
||||||
|
|
||||||
future<std::vector<cql3::description>> service::describe_roles(bool with_hashed_passwords) {
|
future<std::vector<cql3::description>> service::describe_roles(bool with_hashed_passwords) {
|
||||||
std::vector<cql3::description> result{};
|
std::vector<cql3::description> result{};
|
||||||
|
|
||||||
@@ -788,7 +801,7 @@ future<> revoke_permissions(
|
|||||||
}
|
}
|
||||||
|
|
||||||
future<> revoke_all(const service& ser, const resource& r, ::service::group0_batch& mc) {
|
future<> revoke_all(const service& ser, const resource& r, ::service::group0_batch& mc) {
|
||||||
return ser.revoke_all(r, mc);
|
return ser.underlying_authorizer().revoke_all(r, mc);
|
||||||
}
|
}
|
||||||
|
|
||||||
future<std::vector<permission_details>> list_filtered_permissions(
|
future<std::vector<permission_details>> list_filtered_permissions(
|
||||||
|
|||||||
@@ -20,6 +20,7 @@
|
|||||||
#include "auth/authenticator.hh"
|
#include "auth/authenticator.hh"
|
||||||
#include "auth/authorizer.hh"
|
#include "auth/authorizer.hh"
|
||||||
#include "auth/permission.hh"
|
#include "auth/permission.hh"
|
||||||
|
#include "auth/permissions_cache.hh"
|
||||||
#include "auth/cache.hh"
|
#include "auth/cache.hh"
|
||||||
#include "auth/role_manager.hh"
|
#include "auth/role_manager.hh"
|
||||||
#include "auth/common.hh"
|
#include "auth/common.hh"
|
||||||
@@ -74,6 +75,8 @@ public:
|
|||||||
/// peering_sharded_service inheritance is needed to be able to access shard local authentication service
|
/// peering_sharded_service inheritance is needed to be able to access shard local authentication service
|
||||||
/// given an object from another shard. Used for bouncing lwt requests to correct shard.
|
/// given an object from another shard. Used for bouncing lwt requests to correct shard.
|
||||||
class service final : public seastar::peering_sharded_service<service> {
|
class service final : public seastar::peering_sharded_service<service> {
|
||||||
|
utils::loading_cache_config _loading_cache_config;
|
||||||
|
std::unique_ptr<permissions_cache> _permissions_cache;
|
||||||
cache& _cache;
|
cache& _cache;
|
||||||
|
|
||||||
cql3::query_processor& _qp;
|
cql3::query_processor& _qp;
|
||||||
@@ -91,12 +94,20 @@ class service final : public seastar::peering_sharded_service<service> {
|
|||||||
// Only one of these should be registered, so we end up with some unused instances. Not the end of the world.
|
// Only one of these should be registered, so we end up with some unused instances. Not the end of the world.
|
||||||
std::unique_ptr<::service::migration_listener> _migration_listener;
|
std::unique_ptr<::service::migration_listener> _migration_listener;
|
||||||
|
|
||||||
|
std::function<void(uint32_t)> _permissions_cache_cfg_cb;
|
||||||
|
serialized_action _permissions_cache_config_action;
|
||||||
|
|
||||||
|
utils::observer<uint32_t> _permissions_cache_max_entries_observer;
|
||||||
|
utils::observer<uint32_t> _permissions_cache_update_interval_in_ms_observer;
|
||||||
|
utils::observer<uint32_t> _permissions_cache_validity_in_ms_observer;
|
||||||
|
|
||||||
maintenance_socket_enabled _used_by_maintenance_socket;
|
maintenance_socket_enabled _used_by_maintenance_socket;
|
||||||
|
|
||||||
abort_source _as;
|
abort_source _as;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
service(
|
service(
|
||||||
|
utils::loading_cache_config,
|
||||||
cache& cache,
|
cache& cache,
|
||||||
cql3::query_processor&,
|
cql3::query_processor&,
|
||||||
::service::raft_group0_client&,
|
::service::raft_group0_client&,
|
||||||
@@ -112,6 +123,7 @@ public:
|
|||||||
/// of the instances themselves.
|
/// of the instances themselves.
|
||||||
///
|
///
|
||||||
service(
|
service(
|
||||||
|
utils::loading_cache_config,
|
||||||
cql3::query_processor&,
|
cql3::query_processor&,
|
||||||
::service::raft_group0_client&,
|
::service::raft_group0_client&,
|
||||||
::service::migration_notifier&,
|
::service::migration_notifier&,
|
||||||
@@ -126,6 +138,8 @@ public:
|
|||||||
|
|
||||||
future<> ensure_superuser_is_created();
|
future<> ensure_superuser_is_created();
|
||||||
|
|
||||||
|
void update_cache_config();
|
||||||
|
|
||||||
void reset_authorization_cache();
|
void reset_authorization_cache();
|
||||||
|
|
||||||
///
|
///
|
||||||
@@ -167,13 +181,6 @@ public:
|
|||||||
|
|
||||||
future<bool> exists(const resource&) const;
|
future<bool> exists(const resource&) const;
|
||||||
|
|
||||||
///
|
|
||||||
/// Revoke all permissions granted to any role for a particular resource.
|
|
||||||
///
|
|
||||||
/// \throws \ref unsupported_authorization_operation if revoking permissions is not supported.
|
|
||||||
///
|
|
||||||
future<> revoke_all(const resource&, ::service::group0_batch&) const;
|
|
||||||
|
|
||||||
///
|
///
|
||||||
/// Produces descriptions that can be used to restore the state of auth. That encompasses
|
/// Produces descriptions that can be used to restore the state of auth. That encompasses
|
||||||
/// roles, role grants, and permission grants.
|
/// roles, role grants, and permission grants.
|
||||||
|
|||||||
@@ -814,7 +814,8 @@ generation_service::generation_service(
|
|||||||
config cfg, gms::gossiper& g, sharded<db::system_distributed_keyspace>& sys_dist_ks,
|
config cfg, gms::gossiper& g, sharded<db::system_distributed_keyspace>& sys_dist_ks,
|
||||||
sharded<db::system_keyspace>& sys_ks,
|
sharded<db::system_keyspace>& sys_ks,
|
||||||
abort_source& abort_src, const locator::shared_token_metadata& stm, gms::feature_service& f,
|
abort_source& abort_src, const locator::shared_token_metadata& stm, gms::feature_service& f,
|
||||||
replica::database& db)
|
replica::database& db,
|
||||||
|
std::function<bool()> raft_topology_change_enabled)
|
||||||
: _cfg(std::move(cfg))
|
: _cfg(std::move(cfg))
|
||||||
, _gossiper(g)
|
, _gossiper(g)
|
||||||
, _sys_dist_ks(sys_dist_ks)
|
, _sys_dist_ks(sys_dist_ks)
|
||||||
@@ -823,6 +824,7 @@ generation_service::generation_service(
|
|||||||
, _token_metadata(stm)
|
, _token_metadata(stm)
|
||||||
, _feature_service(f)
|
, _feature_service(f)
|
||||||
, _db(db)
|
, _db(db)
|
||||||
|
, _raft_topology_change_enabled(std::move(raft_topology_change_enabled))
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -876,7 +878,16 @@ future<> generation_service::on_join(gms::inet_address ep, locator::host_id id,
|
|||||||
future<> generation_service::on_change(gms::inet_address ep, locator::host_id id, const gms::application_state_map& states, gms::permit_id pid) {
|
future<> generation_service::on_change(gms::inet_address ep, locator::host_id id, const gms::application_state_map& states, gms::permit_id pid) {
|
||||||
assert_shard_zero(__PRETTY_FUNCTION__);
|
assert_shard_zero(__PRETTY_FUNCTION__);
|
||||||
|
|
||||||
|
if (_raft_topology_change_enabled()) {
|
||||||
return make_ready_future<>();
|
return make_ready_future<>();
|
||||||
|
}
|
||||||
|
|
||||||
|
return on_application_state_change(ep, id, states, gms::application_state::CDC_GENERATION_ID, pid, [this] (gms::inet_address ep, locator::host_id id, const gms::versioned_value& v, gms::permit_id) {
|
||||||
|
auto gen_id = gms::versioned_value::cdc_generation_id_from_string(v.value());
|
||||||
|
cdc_log.debug("Endpoint: {}, CDC generation ID change: {}", ep, gen_id);
|
||||||
|
|
||||||
|
return legacy_handle_cdc_generation(gen_id);
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
future<> generation_service::check_and_repair_cdc_streams() {
|
future<> generation_service::check_and_repair_cdc_streams() {
|
||||||
|
|||||||
@@ -79,12 +79,17 @@ private:
|
|||||||
std::optional<cdc::generation_id> _gen_id;
|
std::optional<cdc::generation_id> _gen_id;
|
||||||
future<> _cdc_streams_rewrite_complete = make_ready_future<>();
|
future<> _cdc_streams_rewrite_complete = make_ready_future<>();
|
||||||
|
|
||||||
|
/* Returns true if raft topology changes are enabled.
|
||||||
|
* Can only be called from shard 0.
|
||||||
|
*/
|
||||||
|
std::function<bool()> _raft_topology_change_enabled;
|
||||||
public:
|
public:
|
||||||
generation_service(config cfg, gms::gossiper&,
|
generation_service(config cfg, gms::gossiper&,
|
||||||
sharded<db::system_distributed_keyspace>&,
|
sharded<db::system_distributed_keyspace>&,
|
||||||
sharded<db::system_keyspace>& sys_ks,
|
sharded<db::system_keyspace>& sys_ks,
|
||||||
abort_source&, const locator::shared_token_metadata&,
|
abort_source&, const locator::shared_token_metadata&,
|
||||||
gms::feature_service&, replica::database& db);
|
gms::feature_service&, replica::database& db,
|
||||||
|
std::function<bool()> raft_topology_change_enabled);
|
||||||
|
|
||||||
future<> stop();
|
future<> stop();
|
||||||
~generation_service();
|
~generation_service();
|
||||||
|
|||||||
@@ -1519,9 +1519,7 @@ future<> compaction_manager::maybe_wait_for_sstable_count_reduction(compaction_g
|
|||||||
| std::views::transform(std::mem_fn(&sstables::sstable::run_identifier))
|
| std::views::transform(std::mem_fn(&sstables::sstable::run_identifier))
|
||||||
| std::ranges::to<std::unordered_set>());
|
| std::ranges::to<std::unordered_set>());
|
||||||
};
|
};
|
||||||
const auto injected_threshold = utils::get_local_injector().inject_parameter<size_t>("set_sstable_count_reduction_threshold");
|
const auto threshold = size_t(std::max(schema->max_compaction_threshold(), 32));
|
||||||
const auto threshold = injected_threshold.value_or(size_t(std::max(schema->max_compaction_threshold(), 32)));
|
|
||||||
|
|
||||||
auto count = co_await num_runs_for_compaction();
|
auto count = co_await num_runs_for_compaction();
|
||||||
if (count <= threshold) {
|
if (count <= threshold) {
|
||||||
cmlog.trace("No need to wait for sstable count reduction in {}: {} <= {}",
|
cmlog.trace("No need to wait for sstable count reduction in {}: {} <= {}",
|
||||||
@@ -1536,7 +1534,9 @@ future<> compaction_manager::maybe_wait_for_sstable_count_reduction(compaction_g
|
|||||||
auto& cstate = get_compaction_state(&t);
|
auto& cstate = get_compaction_state(&t);
|
||||||
try {
|
try {
|
||||||
while (can_perform_regular_compaction(t) && co_await num_runs_for_compaction() > threshold) {
|
while (can_perform_regular_compaction(t) && co_await num_runs_for_compaction() > threshold) {
|
||||||
co_await cstate.compaction_done.when();
|
co_await cstate.compaction_done.wait([this, &t] {
|
||||||
|
return !can_perform_regular_compaction(t);
|
||||||
|
});
|
||||||
}
|
}
|
||||||
} catch (const broken_condition_variable&) {
|
} catch (const broken_condition_variable&) {
|
||||||
co_return;
|
co_return;
|
||||||
|
|||||||
@@ -299,11 +299,13 @@ batch_size_fail_threshold_in_kb: 1024
|
|||||||
# max_hint_window_in_ms: 10800000 # 3 hours
|
# max_hint_window_in_ms: 10800000 # 3 hours
|
||||||
|
|
||||||
|
|
||||||
# Validity period for authorized statements cache. Defaults to 10000, set to 0 to disable.
|
# Validity period for permissions cache (fetching permissions can be an
|
||||||
|
# expensive operation depending on the authorizer, CassandraAuthorizer is
|
||||||
|
# one example). Defaults to 10000, set to 0 to disable.
|
||||||
# Will be disabled automatically for AllowAllAuthorizer.
|
# Will be disabled automatically for AllowAllAuthorizer.
|
||||||
# permissions_validity_in_ms: 10000
|
# permissions_validity_in_ms: 10000
|
||||||
|
|
||||||
# Refresh interval for authorized statements cache.
|
# Refresh interval for permissions cache (if enabled).
|
||||||
# After this interval, cache entries become eligible for refresh. Upon next
|
# After this interval, cache entries become eligible for refresh. Upon next
|
||||||
# access, an async reload is scheduled and the old value returned until it
|
# access, an async reload is scheduled and the old value returned until it
|
||||||
# completes. If permissions_validity_in_ms is non-zero, then this also must have
|
# completes. If permissions_validity_in_ms is non-zero, then this also must have
|
||||||
@@ -564,16 +566,15 @@ commitlog_total_space_in_mb: -1
|
|||||||
# prometheus_address: 1.2.3.4
|
# prometheus_address: 1.2.3.4
|
||||||
|
|
||||||
# audit settings
|
# audit settings
|
||||||
# Table audit is enabled by default.
|
# By default, Scylla does not audit anything.
|
||||||
# 'audit' config option controls if and where to output audited events:
|
# 'audit' config option controls if and where to output audited events:
|
||||||
# - "none": auditing is disabled
|
# - "none": auditing is disabled (default)
|
||||||
# - "table": save audited events in audit.audit_log column family (default)
|
# - "table": save audited events in audit.audit_log column family
|
||||||
# - "syslog": send audited events via syslog (depends on OS, but usually to /dev/log)
|
# - "syslog": send audited events via syslog (depends on OS, but usually to /dev/log)
|
||||||
audit: "table"
|
audit: "table"
|
||||||
#
|
#
|
||||||
# List of statement categories that should be audited.
|
# List of statement categories that should be audited.
|
||||||
# Possible categories are: QUERY, DML, DCL, DDL, AUTH, ADMIN
|
audit_categories: "DCL,DDL,AUTH,ADMIN"
|
||||||
audit_categories: "DCL,AUTH,ADMIN"
|
|
||||||
#
|
#
|
||||||
# List of tables that should be audited.
|
# List of tables that should be audited.
|
||||||
# audit_tables: "<keyspace_name>.<table_name>,<keyspace_name>.<table_name>"
|
# audit_tables: "<keyspace_name>.<table_name>,<keyspace_name>.<table_name>"
|
||||||
@@ -874,16 +875,7 @@ maintenance_socket: ignore
|
|||||||
# The `tablets` option cannot be changed using `ALTER KEYSPACE`.
|
# The `tablets` option cannot be changed using `ALTER KEYSPACE`.
|
||||||
tablets_mode_for_new_keyspaces: enabled
|
tablets_mode_for_new_keyspaces: enabled
|
||||||
|
|
||||||
# Require every tablet-enabled keyspace to be RF-rack-valid.
|
# Enforce RF-rack-valid keyspaces.
|
||||||
#
|
|
||||||
# A tablet-enabled keyspace is RF-rack-valid when, for each data center,
|
|
||||||
# its replication factor (RF) is 0, 1, or exactly equal to the number of
|
|
||||||
# racks in that data center. Setting the RF to the number of racks ensures
|
|
||||||
# that a single rack failure never results in data unavailability.
|
|
||||||
#
|
|
||||||
# When set to true, CREATE KEYSPACE and ALTER KEYSPACE statements that
|
|
||||||
# would produce an RF-rack-invalid keyspace are rejected.
|
|
||||||
# When set to false, such statements are allowed but emit a warning.
|
|
||||||
rf_rack_valid_keyspaces: false
|
rf_rack_valid_keyspaces: false
|
||||||
|
|
||||||
#
|
#
|
||||||
|
|||||||
52
configure.py
52
configure.py
@@ -730,6 +730,28 @@ vector_search_tests = set([
|
|||||||
'test/vector_search/rescoring_test'
|
'test/vector_search/rescoring_test'
|
||||||
])
|
])
|
||||||
|
|
||||||
|
vector_search_validator_bin = 'vector-search-validator/bin/vector-search-validator'
|
||||||
|
vector_search_validator_deps = set([
|
||||||
|
'test/vector_search_validator/build-validator',
|
||||||
|
'test/vector_search_validator/Cargo.toml',
|
||||||
|
'test/vector_search_validator/crates/validator/Cargo.toml',
|
||||||
|
'test/vector_search_validator/crates/validator/src/main.rs',
|
||||||
|
'test/vector_search_validator/crates/validator-scylla/Cargo.toml',
|
||||||
|
'test/vector_search_validator/crates/validator-scylla/src/lib.rs',
|
||||||
|
'test/vector_search_validator/crates/validator-scylla/src/cql.rs',
|
||||||
|
])
|
||||||
|
|
||||||
|
vector_store_bin = 'vector-search-validator/bin/vector-store'
|
||||||
|
vector_store_deps = set([
|
||||||
|
'test/vector_search_validator/build-env',
|
||||||
|
'test/vector_search_validator/build-vector-store',
|
||||||
|
])
|
||||||
|
|
||||||
|
vector_search_validator_bins = set([
|
||||||
|
vector_search_validator_bin,
|
||||||
|
vector_store_bin,
|
||||||
|
])
|
||||||
|
|
||||||
wasms = set([
|
wasms = set([
|
||||||
'wasm/return_input.wat',
|
'wasm/return_input.wat',
|
||||||
'wasm/test_complex_null_values.wat',
|
'wasm/test_complex_null_values.wat',
|
||||||
@@ -763,7 +785,7 @@ other = set([
|
|||||||
'iotune',
|
'iotune',
|
||||||
])
|
])
|
||||||
|
|
||||||
all_artifacts = apps | cpp_apps | tests | other | wasms
|
all_artifacts = apps | cpp_apps | tests | other | wasms | vector_search_validator_bins
|
||||||
|
|
||||||
arg_parser = argparse.ArgumentParser('Configure scylla', add_help=False, formatter_class=argparse.ArgumentDefaultsHelpFormatter)
|
arg_parser = argparse.ArgumentParser('Configure scylla', add_help=False, formatter_class=argparse.ArgumentDefaultsHelpFormatter)
|
||||||
arg_parser.add_argument('--out', dest='buildfile', action='store', default='build.ninja',
|
arg_parser.add_argument('--out', dest='buildfile', action='store', default='build.ninja',
|
||||||
@@ -1174,7 +1196,6 @@ scylla_core = (['message/messaging_service.cc',
|
|||||||
'utils/gz/crc_combine.cc',
|
'utils/gz/crc_combine.cc',
|
||||||
'utils/gz/crc_combine_table.cc',
|
'utils/gz/crc_combine_table.cc',
|
||||||
'utils/http.cc',
|
'utils/http.cc',
|
||||||
'utils/http_client_error_processing.cc',
|
|
||||||
'utils/rest/client.cc',
|
'utils/rest/client.cc',
|
||||||
'utils/s3/aws_error.cc',
|
'utils/s3/aws_error.cc',
|
||||||
'utils/s3/client.cc',
|
'utils/s3/client.cc',
|
||||||
@@ -1192,7 +1213,6 @@ scylla_core = (['message/messaging_service.cc',
|
|||||||
'utils/azure/identity/default_credentials.cc',
|
'utils/azure/identity/default_credentials.cc',
|
||||||
'utils/gcp/gcp_credentials.cc',
|
'utils/gcp/gcp_credentials.cc',
|
||||||
'utils/gcp/object_storage.cc',
|
'utils/gcp/object_storage.cc',
|
||||||
'utils/gcp/object_storage_retry_strategy.cc',
|
|
||||||
'gms/version_generator.cc',
|
'gms/version_generator.cc',
|
||||||
'gms/versioned_value.cc',
|
'gms/versioned_value.cc',
|
||||||
'gms/gossiper.cc',
|
'gms/gossiper.cc',
|
||||||
@@ -1277,6 +1297,7 @@ scylla_core = (['message/messaging_service.cc',
|
|||||||
'auth/passwords.cc',
|
'auth/passwords.cc',
|
||||||
'auth/password_authenticator.cc',
|
'auth/password_authenticator.cc',
|
||||||
'auth/permission.cc',
|
'auth/permission.cc',
|
||||||
|
'auth/permissions_cache.cc',
|
||||||
'auth/service.cc',
|
'auth/service.cc',
|
||||||
'auth/standard_role_manager.cc',
|
'auth/standard_role_manager.cc',
|
||||||
'auth/ldap_role_manager.cc',
|
'auth/ldap_role_manager.cc',
|
||||||
@@ -1361,6 +1382,7 @@ scylla_core = (['message/messaging_service.cc',
|
|||||||
'service/topology_state_machine.cc',
|
'service/topology_state_machine.cc',
|
||||||
'service/topology_mutation.cc',
|
'service/topology_mutation.cc',
|
||||||
'service/topology_coordinator.cc',
|
'service/topology_coordinator.cc',
|
||||||
|
'node_ops/node_ops_ctl.cc',
|
||||||
'node_ops/task_manager_module.cc',
|
'node_ops/task_manager_module.cc',
|
||||||
'reader_concurrency_semaphore_group.cc',
|
'reader_concurrency_semaphore_group.cc',
|
||||||
'utils/disk_space_monitor.cc',
|
'utils/disk_space_monitor.cc',
|
||||||
@@ -1645,7 +1667,6 @@ for t in sorted(perf_tests):
|
|||||||
|
|
||||||
deps['test/boost/combined_tests'] += [
|
deps['test/boost/combined_tests'] += [
|
||||||
'test/boost/aggregate_fcts_test.cc',
|
'test/boost/aggregate_fcts_test.cc',
|
||||||
'test/boost/auth_cache_test.cc',
|
|
||||||
'test/boost/auth_test.cc',
|
'test/boost/auth_test.cc',
|
||||||
'test/boost/batchlog_manager_test.cc',
|
'test/boost/batchlog_manager_test.cc',
|
||||||
'test/boost/cache_algorithm_test.cc',
|
'test/boost/cache_algorithm_test.cc',
|
||||||
@@ -2564,10 +2585,11 @@ def write_build_file(f,
|
|||||||
description = RUST_LIB $out
|
description = RUST_LIB $out
|
||||||
''').format(mode=mode, antlr3_exec=args.antlr3_exec, fmt_lib=fmt_lib, test_repeat=args.test_repeat, test_timeout=args.test_timeout, rustc_wrapper=rustc_wrapper, **modeval))
|
''').format(mode=mode, antlr3_exec=args.antlr3_exec, fmt_lib=fmt_lib, test_repeat=args.test_repeat, test_timeout=args.test_timeout, rustc_wrapper=rustc_wrapper, **modeval))
|
||||||
f.write(
|
f.write(
|
||||||
'build {mode}-build: phony {artifacts} {wasms}\n'.format(
|
'build {mode}-build: phony {artifacts} {wasms} {vector_search_validator_bins}\n'.format(
|
||||||
mode=mode,
|
mode=mode,
|
||||||
artifacts=str.join(' ', ['$builddir/' + mode + '/' + x for x in sorted(build_artifacts - wasms)]),
|
artifacts=str.join(' ', ['$builddir/' + mode + '/' + x for x in sorted(build_artifacts - wasms - vector_search_validator_bins)]),
|
||||||
wasms = str.join(' ', ['$builddir/' + x for x in sorted(build_artifacts & wasms)]),
|
wasms = str.join(' ', ['$builddir/' + x for x in sorted(build_artifacts & wasms)]),
|
||||||
|
vector_search_validator_bins=str.join(' ', ['$builddir/' + x for x in sorted(build_artifacts & vector_search_validator_bins)]),
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
if profile_recipe := modes[mode].get('profile_recipe'):
|
if profile_recipe := modes[mode].get('profile_recipe'):
|
||||||
@@ -2597,7 +2619,7 @@ def write_build_file(f,
|
|||||||
continue
|
continue
|
||||||
profile_dep = modes[mode].get('profile_target', "")
|
profile_dep = modes[mode].get('profile_target', "")
|
||||||
|
|
||||||
if binary in other or binary in wasms:
|
if binary in other or binary in wasms or binary in vector_search_validator_bins:
|
||||||
continue
|
continue
|
||||||
srcs = deps[binary]
|
srcs = deps[binary]
|
||||||
# 'scylla'
|
# 'scylla'
|
||||||
@@ -2708,10 +2730,11 @@ def write_build_file(f,
|
|||||||
)
|
)
|
||||||
|
|
||||||
f.write(
|
f.write(
|
||||||
'build {mode}-test: test.{mode} {test_executables} $builddir/{mode}/scylla {wasms}\n'.format(
|
'build {mode}-test: test.{mode} {test_executables} $builddir/{mode}/scylla {wasms} {vector_search_validator_bins} \n'.format(
|
||||||
mode=mode,
|
mode=mode,
|
||||||
test_executables=' '.join(['$builddir/{}/{}'.format(mode, binary) for binary in sorted(tests)]),
|
test_executables=' '.join(['$builddir/{}/{}'.format(mode, binary) for binary in sorted(tests)]),
|
||||||
wasms=' '.join([f'$builddir/{binary}' for binary in sorted(wasms)]),
|
wasms=' '.join([f'$builddir/{binary}' for binary in sorted(wasms)]),
|
||||||
|
vector_search_validator_bins=' '.join([f'$builddir/{binary}' for binary in sorted(vector_search_validator_bins)]),
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
f.write(
|
f.write(
|
||||||
@@ -2879,6 +2902,19 @@ def write_build_file(f,
|
|||||||
'build compiler-training: phony {}\n'.format(' '.join(['{mode}-compiler-training'.format(mode=mode) for mode in default_modes]))
|
'build compiler-training: phony {}\n'.format(' '.join(['{mode}-compiler-training'.format(mode=mode) for mode in default_modes]))
|
||||||
)
|
)
|
||||||
|
|
||||||
|
f.write(textwrap.dedent(f'''\
|
||||||
|
rule build-vector-search-validator
|
||||||
|
command = test/vector_search_validator/build-validator $builddir
|
||||||
|
rule build-vector-store
|
||||||
|
command = test/vector_search_validator/build-vector-store $builddir
|
||||||
|
'''))
|
||||||
|
f.write(
|
||||||
|
'build $builddir/{vector_search_validator_bin}: build-vector-search-validator {}\n'.format(' '.join([dep for dep in sorted(vector_search_validator_deps)]), vector_search_validator_bin=vector_search_validator_bin)
|
||||||
|
)
|
||||||
|
f.write(
|
||||||
|
'build $builddir/{vector_store_bin}: build-vector-store {}\n'.format(' '.join([dep for dep in sorted(vector_store_deps)]), vector_store_bin=vector_store_bin)
|
||||||
|
)
|
||||||
|
|
||||||
f.write(textwrap.dedent(f'''\
|
f.write(textwrap.dedent(f'''\
|
||||||
build dist-unified-tar: phony {' '.join([f'$builddir/{mode}/dist/tar/{scylla_product}-unified-{scylla_version}-{scylla_release}.{arch}.tar.gz' for mode in default_modes])}
|
build dist-unified-tar: phony {' '.join([f'$builddir/{mode}/dist/tar/{scylla_product}-unified-{scylla_version}-{scylla_release}.{arch}.tar.gz' for mode in default_modes])}
|
||||||
build dist-unified: phony dist-unified-tar
|
build dist-unified: phony dist-unified-tar
|
||||||
|
|||||||
62
cql3/Cql.g
62
cql3/Cql.g
@@ -389,10 +389,8 @@ selectStatement returns [std::unique_ptr<raw::select_statement> expr]
|
|||||||
bool is_ann_ordering = false;
|
bool is_ann_ordering = false;
|
||||||
}
|
}
|
||||||
: K_SELECT (
|
: K_SELECT (
|
||||||
( (K_JSON K_DISTINCT)=> K_JSON { statement_subtype = raw::select_statement::parameters::statement_subtype::JSON; }
|
( K_JSON { statement_subtype = raw::select_statement::parameters::statement_subtype::JSON; } )?
|
||||||
| (K_JSON selectClause K_FROM)=> K_JSON { statement_subtype = raw::select_statement::parameters::statement_subtype::JSON; }
|
( K_DISTINCT { is_distinct = true; } )?
|
||||||
)?
|
|
||||||
( (K_DISTINCT selectClause K_FROM)=> K_DISTINCT { is_distinct = true; } )?
|
|
||||||
sclause=selectClause
|
sclause=selectClause
|
||||||
)
|
)
|
||||||
K_FROM (
|
K_FROM (
|
||||||
@@ -427,13 +425,13 @@ selector returns [shared_ptr<raw_selector> s]
|
|||||||
|
|
||||||
unaliasedSelector returns [uexpression tmp]
|
unaliasedSelector returns [uexpression tmp]
|
||||||
: ( c=cident { tmp = unresolved_identifier{std::move(c)}; }
|
: ( c=cident { tmp = unresolved_identifier{std::move(c)}; }
|
||||||
| v=value { tmp = std::move(v); }
|
|
||||||
| K_COUNT '(' countArgument ')' { tmp = make_count_rows_function_expression(); }
|
| K_COUNT '(' countArgument ')' { tmp = make_count_rows_function_expression(); }
|
||||||
| K_WRITETIME '(' c=cident ')' { tmp = column_mutation_attribute{column_mutation_attribute::attribute_kind::writetime,
|
| K_WRITETIME '(' c=cident ')' { tmp = column_mutation_attribute{column_mutation_attribute::attribute_kind::writetime,
|
||||||
unresolved_identifier{std::move(c)}}; }
|
unresolved_identifier{std::move(c)}}; }
|
||||||
| K_TTL '(' c=cident ')' { tmp = column_mutation_attribute{column_mutation_attribute::attribute_kind::ttl,
|
| K_TTL '(' c=cident ')' { tmp = column_mutation_attribute{column_mutation_attribute::attribute_kind::ttl,
|
||||||
unresolved_identifier{std::move(c)}}; }
|
unresolved_identifier{std::move(c)}}; }
|
||||||
| f=functionName args=selectionFunctionArgs { tmp = function_call{std::move(f), std::move(args)}; }
|
| f=functionName args=selectionFunctionArgs { tmp = function_call{std::move(f), std::move(args)}; }
|
||||||
|
| f=similarityFunctionName args=vectorSimilarityArgs { tmp = function_call{std::move(f), std::move(args)}; }
|
||||||
| K_CAST '(' arg=unaliasedSelector K_AS t=native_type ')' { tmp = cast{.style = cast::cast_style::sql, .arg = std::move(arg), .type = std::move(t)}; }
|
| K_CAST '(' arg=unaliasedSelector K_AS t=native_type ')' { tmp = cast{.style = cast::cast_style::sql, .arg = std::move(arg), .type = std::move(t)}; }
|
||||||
)
|
)
|
||||||
( '.' fi=cident { tmp = field_selection{std::move(tmp), std::move(fi)}; }
|
( '.' fi=cident { tmp = field_selection{std::move(tmp), std::move(fi)}; }
|
||||||
@@ -448,9 +446,23 @@ selectionFunctionArgs returns [std::vector<expression> a]
|
|||||||
')'
|
')'
|
||||||
;
|
;
|
||||||
|
|
||||||
|
vectorSimilarityArgs returns [std::vector<expression> a]
|
||||||
|
: '(' ')'
|
||||||
|
| '(' v1=vectorSimilarityArg { a.push_back(std::move(v1)); }
|
||||||
|
( ',' vn=vectorSimilarityArg { a.push_back(std::move(vn)); } )*
|
||||||
|
')'
|
||||||
|
;
|
||||||
|
|
||||||
|
vectorSimilarityArg returns [uexpression a]
|
||||||
|
: s=unaliasedSelector { a = std::move(s); }
|
||||||
|
| v=value { a = std::move(v); }
|
||||||
|
;
|
||||||
|
|
||||||
countArgument
|
countArgument
|
||||||
: '*'
|
: '*'
|
||||||
/* COUNT(1) is also allowed, it is recognized via the general function(args) path */
|
| i=INTEGER { if (i->getText() != "1") {
|
||||||
|
add_recognition_error("Only COUNT(1) is supported, got COUNT(" + i->getText() + ")");
|
||||||
|
} }
|
||||||
;
|
;
|
||||||
|
|
||||||
whereClause returns [uexpression clause]
|
whereClause returns [uexpression clause]
|
||||||
@@ -874,8 +886,8 @@ cfamDefinition[cql3::statements::create_table_statement::raw_statement& expr]
|
|||||||
;
|
;
|
||||||
|
|
||||||
cfamColumns[cql3::statements::create_table_statement::raw_statement& expr]
|
cfamColumns[cql3::statements::create_table_statement::raw_statement& expr]
|
||||||
@init { bool is_static=false, is_ttl=false; }
|
@init { bool is_static=false; }
|
||||||
: k=ident v=comparatorType (K_TTL {is_ttl = true;})? (K_STATIC {is_static = true;})? { $expr.add_definition(k, v, is_static, is_ttl); }
|
: k=ident v=comparatorType (K_STATIC {is_static = true;})? { $expr.add_definition(k, v, is_static); }
|
||||||
(K_PRIMARY K_KEY { $expr.add_key_aliases(std::vector<shared_ptr<cql3::column_identifier>>{k}); })?
|
(K_PRIMARY K_KEY { $expr.add_key_aliases(std::vector<shared_ptr<cql3::column_identifier>>{k}); })?
|
||||||
| K_PRIMARY K_KEY '(' pkDef[expr] (',' c=ident { $expr.add_column_alias(c); } )* ')'
|
| K_PRIMARY K_KEY '(' pkDef[expr] (',' c=ident { $expr.add_column_alias(c); } )* ')'
|
||||||
;
|
;
|
||||||
@@ -1042,7 +1054,6 @@ alterTableStatement returns [std::unique_ptr<alter_table_statement::raw_statemen
|
|||||||
std::vector<alter_table_statement::column_change> column_changes;
|
std::vector<alter_table_statement::column_change> column_changes;
|
||||||
std::vector<std::pair<shared_ptr<cql3::column_identifier::raw>, shared_ptr<cql3::column_identifier::raw>>> renames;
|
std::vector<std::pair<shared_ptr<cql3::column_identifier::raw>, shared_ptr<cql3::column_identifier::raw>>> renames;
|
||||||
auto attrs = std::make_unique<cql3::attributes::raw>();
|
auto attrs = std::make_unique<cql3::attributes::raw>();
|
||||||
shared_ptr<cql3::column_identifier::raw> ttl_change;
|
|
||||||
}
|
}
|
||||||
: K_ALTER K_COLUMNFAMILY cf=columnFamilyName
|
: K_ALTER K_COLUMNFAMILY cf=columnFamilyName
|
||||||
( K_ALTER id=cident K_TYPE v=comparatorType { type = alter_table_statement::type::alter; column_changes.emplace_back(alter_table_statement::column_change{id, v}); }
|
( K_ALTER id=cident K_TYPE v=comparatorType { type = alter_table_statement::type::alter; column_changes.emplace_back(alter_table_statement::column_change{id, v}); }
|
||||||
@@ -1061,11 +1072,9 @@ alterTableStatement returns [std::unique_ptr<alter_table_statement::raw_statemen
|
|||||||
| K_RENAME { type = alter_table_statement::type::rename; }
|
| K_RENAME { type = alter_table_statement::type::rename; }
|
||||||
id1=cident K_TO toId1=cident { renames.emplace_back(id1, toId1); }
|
id1=cident K_TO toId1=cident { renames.emplace_back(id1, toId1); }
|
||||||
( K_AND idn=cident K_TO toIdn=cident { renames.emplace_back(idn, toIdn); } )*
|
( K_AND idn=cident K_TO toIdn=cident { renames.emplace_back(idn, toIdn); } )*
|
||||||
| K_TTL { type = alter_table_statement::type::ttl; }
|
|
||||||
( id=cident { ttl_change = id; } | K_NULL )
|
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
$expr = std::make_unique<alter_table_statement::raw_statement>(std::move(cf), type, std::move(column_changes), std::move(props), std::move(renames), std::move(attrs), std::move(ttl_change));
|
$expr = std::make_unique<alter_table_statement::raw_statement>(std::move(cf), type, std::move(column_changes), std::move(props), std::move(renames), std::move(attrs));
|
||||||
}
|
}
|
||||||
;
|
;
|
||||||
|
|
||||||
@@ -1697,6 +1706,10 @@ functionName returns [cql3::functions::function_name s]
|
|||||||
: (ks=keyspaceName '.')? f=allowedFunctionName { $s.keyspace = std::move(ks); $s.name = std::move(f); }
|
: (ks=keyspaceName '.')? f=allowedFunctionName { $s.keyspace = std::move(ks); $s.name = std::move(f); }
|
||||||
;
|
;
|
||||||
|
|
||||||
|
similarityFunctionName returns [cql3::functions::function_name s]
|
||||||
|
: f=allowedSimilarityFunctionName { $s = cql3::functions::function_name::native_function(std::move(f)); }
|
||||||
|
;
|
||||||
|
|
||||||
allowedFunctionName returns [sstring s]
|
allowedFunctionName returns [sstring s]
|
||||||
: f=IDENT { $s = $f.text; std::transform(s.begin(), s.end(), s.begin(), ::tolower); }
|
: f=IDENT { $s = $f.text; std::transform(s.begin(), s.end(), s.begin(), ::tolower); }
|
||||||
| f=QUOTED_NAME { $s = $f.text; }
|
| f=QUOTED_NAME { $s = $f.text; }
|
||||||
@@ -1705,6 +1718,11 @@ allowedFunctionName returns [sstring s]
|
|||||||
| K_COUNT { $s = "count"; }
|
| K_COUNT { $s = "count"; }
|
||||||
;
|
;
|
||||||
|
|
||||||
|
allowedSimilarityFunctionName returns [sstring s]
|
||||||
|
: f=(K_SIMILARITY_COSINE | K_SIMILARITY_EUCLIDEAN | K_SIMILARITY_DOT_PRODUCT)
|
||||||
|
{ $s = $f.text; std::transform(s.begin(), s.end(), s.begin(), ::tolower); }
|
||||||
|
;
|
||||||
|
|
||||||
functionArgs returns [std::vector<expression> a]
|
functionArgs returns [std::vector<expression> a]
|
||||||
: '(' ')'
|
: '(' ')'
|
||||||
| '(' t1=term { a.push_back(std::move(t1)); }
|
| '(' t1=term { a.push_back(std::move(t1)); }
|
||||||
@@ -2074,21 +2092,7 @@ vector_type returns [shared_ptr<cql3::cql3_type::raw> pt]
|
|||||||
{
|
{
|
||||||
if ($d.text[0] == '-')
|
if ($d.text[0] == '-')
|
||||||
throw exceptions::invalid_request_exception("Vectors must have a dimension greater than 0");
|
throw exceptions::invalid_request_exception("Vectors must have a dimension greater than 0");
|
||||||
unsigned long parsed_dimension;
|
$pt = cql3::cql3_type::raw::vector(t, std::stoul($d.text));
|
||||||
try {
|
|
||||||
parsed_dimension = std::stoul($d.text);
|
|
||||||
} catch (const std::exception& e) {
|
|
||||||
throw exceptions::invalid_request_exception(format("Invalid vector dimension: {}", $d.text));
|
|
||||||
}
|
|
||||||
static_assert(sizeof(unsigned long) >= sizeof(vector_dimension_t));
|
|
||||||
if (parsed_dimension == 0) {
|
|
||||||
throw exceptions::invalid_request_exception("Vectors must have a dimension greater than 0");
|
|
||||||
}
|
|
||||||
if (parsed_dimension > cql3::cql3_type::MAX_VECTOR_DIMENSION) {
|
|
||||||
throw exceptions::invalid_request_exception(
|
|
||||||
format("Vectors must have a dimension less than or equal to {}", cql3::cql3_type::MAX_VECTOR_DIMENSION));
|
|
||||||
}
|
|
||||||
$pt = cql3::cql3_type::raw::vector(t, static_cast<vector_dimension_t>(parsed_dimension));
|
|
||||||
}
|
}
|
||||||
;
|
;
|
||||||
|
|
||||||
@@ -2415,6 +2419,10 @@ K_MUTATION_FRAGMENTS: M U T A T I O N '_' F R A G M E N T S;
|
|||||||
|
|
||||||
K_VECTOR_SEARCH_INDEXING: V E C T O R '_' S E A R C H '_' I N D E X I N G;
|
K_VECTOR_SEARCH_INDEXING: V E C T O R '_' S E A R C H '_' I N D E X I N G;
|
||||||
|
|
||||||
|
K_SIMILARITY_EUCLIDEAN: S I M I L A R I T Y '_' E U C L I D E A N;
|
||||||
|
K_SIMILARITY_COSINE: S I M I L A R I T Y '_' C O S I N E;
|
||||||
|
K_SIMILARITY_DOT_PRODUCT: S I M I L A R I T Y '_' D O T '_' P R O D U C T;
|
||||||
|
|
||||||
// Case-insensitive alpha characters
|
// Case-insensitive alpha characters
|
||||||
fragment A: ('a'|'A');
|
fragment A: ('a'|'A');
|
||||||
fragment B: ('b'|'B');
|
fragment B: ('b'|'B');
|
||||||
|
|||||||
@@ -27,7 +27,7 @@ public:
|
|||||||
|
|
||||||
struct vector_test_result {
|
struct vector_test_result {
|
||||||
test_result result;
|
test_result result;
|
||||||
std::optional<vector_dimension_t> dimension_opt;
|
std::optional<size_t> dimension_opt;
|
||||||
};
|
};
|
||||||
|
|
||||||
static bool is_assignable(test_result tr) {
|
static bool is_assignable(test_result tr) {
|
||||||
|
|||||||
@@ -307,14 +307,17 @@ public:
|
|||||||
|
|
||||||
class cql3_type::raw_vector : public raw {
|
class cql3_type::raw_vector : public raw {
|
||||||
shared_ptr<raw> _type;
|
shared_ptr<raw> _type;
|
||||||
vector_dimension_t _dimension;
|
size_t _dimension;
|
||||||
|
|
||||||
|
// This limitation is acquired from the maximum number of dimensions in OpenSearch.
|
||||||
|
static constexpr size_t MAX_VECTOR_DIMENSION = 16000;
|
||||||
|
|
||||||
virtual sstring to_string() const override {
|
virtual sstring to_string() const override {
|
||||||
return seastar::format("vector<{}, {}>", _type, _dimension);
|
return seastar::format("vector<{}, {}>", _type, _dimension);
|
||||||
}
|
}
|
||||||
|
|
||||||
public:
|
public:
|
||||||
raw_vector(shared_ptr<raw> type, vector_dimension_t dimension)
|
raw_vector(shared_ptr<raw> type, size_t dimension)
|
||||||
: _type(std::move(type)), _dimension(dimension) {
|
: _type(std::move(type)), _dimension(dimension) {
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -414,7 +417,7 @@ cql3_type::raw::tuple(std::vector<shared_ptr<raw>> ts) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
shared_ptr<cql3_type::raw>
|
shared_ptr<cql3_type::raw>
|
||||||
cql3_type::raw::vector(shared_ptr<raw> t, vector_dimension_t dimension) {
|
cql3_type::raw::vector(shared_ptr<raw> t, size_t dimension) {
|
||||||
return ::make_shared<raw_vector>(std::move(t), dimension);
|
return ::make_shared<raw_vector>(std::move(t), dimension);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -39,9 +39,6 @@ public:
|
|||||||
data_type get_type() const { return _type; }
|
data_type get_type() const { return _type; }
|
||||||
const sstring& to_string() const { return _type->cql3_type_name(); }
|
const sstring& to_string() const { return _type->cql3_type_name(); }
|
||||||
|
|
||||||
// This limitation is acquired from the maximum number of dimensions in OpenSearch.
|
|
||||||
static constexpr vector_dimension_t MAX_VECTOR_DIMENSION = 16000;
|
|
||||||
|
|
||||||
// For UserTypes, we need to know the current keyspace to resolve the
|
// For UserTypes, we need to know the current keyspace to resolve the
|
||||||
// actual type used, so Raw is a "not yet prepared" CQL3Type.
|
// actual type used, so Raw is a "not yet prepared" CQL3Type.
|
||||||
class raw {
|
class raw {
|
||||||
@@ -67,7 +64,7 @@ public:
|
|||||||
static shared_ptr<raw> list(shared_ptr<raw> t);
|
static shared_ptr<raw> list(shared_ptr<raw> t);
|
||||||
static shared_ptr<raw> set(shared_ptr<raw> t);
|
static shared_ptr<raw> set(shared_ptr<raw> t);
|
||||||
static shared_ptr<raw> tuple(std::vector<shared_ptr<raw>> ts);
|
static shared_ptr<raw> tuple(std::vector<shared_ptr<raw>> ts);
|
||||||
static shared_ptr<raw> vector(shared_ptr<raw> t, vector_dimension_t dimension);
|
static shared_ptr<raw> vector(shared_ptr<raw> t, size_t dimension);
|
||||||
static shared_ptr<raw> frozen(shared_ptr<raw> t);
|
static shared_ptr<raw> frozen(shared_ptr<raw> t);
|
||||||
friend sstring format_as(const raw& r) {
|
friend sstring format_as(const raw& r) {
|
||||||
return r.to_string();
|
return r.to_string();
|
||||||
|
|||||||
@@ -10,7 +10,6 @@
|
|||||||
#include "expr-utils.hh"
|
#include "expr-utils.hh"
|
||||||
#include "evaluate.hh"
|
#include "evaluate.hh"
|
||||||
#include "cql3/functions/functions.hh"
|
#include "cql3/functions/functions.hh"
|
||||||
#include "cql3/functions/aggregate_fcts.hh"
|
|
||||||
#include "cql3/functions/castas_fcts.hh"
|
#include "cql3/functions/castas_fcts.hh"
|
||||||
#include "cql3/functions/scalar_function.hh"
|
#include "cql3/functions/scalar_function.hh"
|
||||||
#include "cql3/column_identifier.hh"
|
#include "cql3/column_identifier.hh"
|
||||||
@@ -502,8 +501,8 @@ vector_validate_assignable_to(const collection_constructor& c, data_dictionary::
|
|||||||
throw exceptions::invalid_request_exception(format("Invalid vector type literal for {} of type {}", *receiver.name, receiver.type->as_cql3_type()));
|
throw exceptions::invalid_request_exception(format("Invalid vector type literal for {} of type {}", *receiver.name, receiver.type->as_cql3_type()));
|
||||||
}
|
}
|
||||||
|
|
||||||
vector_dimension_t expected_size = vt->get_dimension();
|
size_t expected_size = vt->get_dimension();
|
||||||
if (expected_size == 0) {
|
if (!expected_size) {
|
||||||
throw exceptions::invalid_request_exception(format("Invalid vector type literal for {}: type {} expects at least one element",
|
throw exceptions::invalid_request_exception(format("Invalid vector type literal for {}: type {} expects at least one element",
|
||||||
*receiver.name, receiver.type->as_cql3_type()));
|
*receiver.name, receiver.type->as_cql3_type()));
|
||||||
}
|
}
|
||||||
@@ -1048,47 +1047,8 @@ prepare_function_args_for_type_inference(std::span<const expression> args, data_
|
|||||||
return partially_prepared_args;
|
return partially_prepared_args;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Special case for count(1) - recognize it as the countRows() function. Note it is quite
|
|
||||||
// artificial and we might relax it to the more general count(expression) later.
|
|
||||||
static
|
|
||||||
std::optional<expression>
|
|
||||||
try_prepare_count_rows(const expr::function_call& fc, data_dictionary::database db, const sstring& keyspace, const schema* schema_opt, lw_shared_ptr<column_specification> receiver) {
|
|
||||||
return std::visit(overloaded_functor{
|
|
||||||
[&] (const functions::function_name& name) -> std::optional<expression> {
|
|
||||||
auto native_name = name;
|
|
||||||
if (!native_name.has_keyspace()) {
|
|
||||||
native_name = name.as_native_function();
|
|
||||||
}
|
|
||||||
// Collapse count(1) into countRows()
|
|
||||||
if (native_name == functions::function_name::native_function("count")) {
|
|
||||||
if (fc.args.size() == 1) {
|
|
||||||
if (auto uc_arg = expr::as_if<expr::untyped_constant>(&fc.args[0])) {
|
|
||||||
if (uc_arg->partial_type == expr::untyped_constant::type_class::integer
|
|
||||||
&& uc_arg->raw_text == "1") {
|
|
||||||
return expr::function_call{
|
|
||||||
.func = functions::aggregate_fcts::make_count_rows_function(),
|
|
||||||
.args = {},
|
|
||||||
};
|
|
||||||
} else {
|
|
||||||
throw exceptions::invalid_request_exception(format("count() expects a column or the literal 1 as an argument", fc.args[0]));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return std::nullopt;
|
|
||||||
},
|
|
||||||
[] (const shared_ptr<functions::function>&) -> std::optional<expression> {
|
|
||||||
// Already prepared, nothing to do
|
|
||||||
return std::nullopt;
|
|
||||||
},
|
|
||||||
}, fc.func);
|
|
||||||
}
|
|
||||||
|
|
||||||
std::optional<expression>
|
std::optional<expression>
|
||||||
prepare_function_call(const expr::function_call& fc, data_dictionary::database db, const sstring& keyspace, const schema* schema_opt, lw_shared_ptr<column_specification> receiver) {
|
prepare_function_call(const expr::function_call& fc, data_dictionary::database db, const sstring& keyspace, const schema* schema_opt, lw_shared_ptr<column_specification> receiver) {
|
||||||
if (auto prepared = try_prepare_count_rows(fc, db, keyspace, schema_opt, receiver)) {
|
|
||||||
return prepared;
|
|
||||||
}
|
|
||||||
// Try to extract a column family name from the available information.
|
// Try to extract a column family name from the available information.
|
||||||
// Most functions can be prepared without information about the column family, usually just the keyspace is enough.
|
// Most functions can be prepared without information about the column family, usually just the keyspace is enough.
|
||||||
// One exception is the token() function - in order to prepare system.token() we have to know the partition key of the table,
|
// One exception is the token() function - in order to prepare system.token() we have to know the partition key of the table,
|
||||||
|
|||||||
@@ -10,41 +10,9 @@
|
|||||||
#include "types/types.hh"
|
#include "types/types.hh"
|
||||||
#include "types/vector.hh"
|
#include "types/vector.hh"
|
||||||
#include "exceptions/exceptions.hh"
|
#include "exceptions/exceptions.hh"
|
||||||
#include <span>
|
|
||||||
#include <bit>
|
|
||||||
|
|
||||||
namespace cql3 {
|
namespace cql3 {
|
||||||
namespace functions {
|
namespace functions {
|
||||||
|
|
||||||
namespace detail {
|
|
||||||
|
|
||||||
std::vector<float> extract_float_vector(const bytes_opt& param, vector_dimension_t dimension) {
|
|
||||||
if (!param) {
|
|
||||||
throw exceptions::invalid_request_exception("Cannot extract float vector from null parameter");
|
|
||||||
}
|
|
||||||
|
|
||||||
const size_t expected_size = dimension * sizeof(float);
|
|
||||||
if (param->size() != expected_size) {
|
|
||||||
throw exceptions::invalid_request_exception(
|
|
||||||
fmt::format("Invalid vector size: expected {} bytes for {} floats, got {} bytes",
|
|
||||||
expected_size, dimension, param->size()));
|
|
||||||
}
|
|
||||||
|
|
||||||
std::vector<float> result;
|
|
||||||
result.reserve(dimension);
|
|
||||||
|
|
||||||
bytes_view view(*param);
|
|
||||||
for (size_t i = 0; i < dimension; ++i) {
|
|
||||||
// read_simple handles network byte order (big-endian) conversion
|
|
||||||
uint32_t raw = read_simple<uint32_t>(view);
|
|
||||||
result.push_back(std::bit_cast<float>(raw));
|
|
||||||
}
|
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace detail
|
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
// The computations of similarity scores match the exact formulas of Cassandra's (jVector's) implementation to ensure compatibility.
|
// The computations of similarity scores match the exact formulas of Cassandra's (jVector's) implementation to ensure compatibility.
|
||||||
@@ -54,14 +22,14 @@ namespace {
|
|||||||
|
|
||||||
// You should only use this function if you need to preserve the original vectors and cannot normalize
|
// You should only use this function if you need to preserve the original vectors and cannot normalize
|
||||||
// them in advance.
|
// them in advance.
|
||||||
float compute_cosine_similarity(std::span<const float> v1, std::span<const float> v2) {
|
float compute_cosine_similarity(const std::vector<data_value>& v1, const std::vector<data_value>& v2) {
|
||||||
double dot_product = 0.0;
|
double dot_product = 0.0;
|
||||||
double squared_norm_a = 0.0;
|
double squared_norm_a = 0.0;
|
||||||
double squared_norm_b = 0.0;
|
double squared_norm_b = 0.0;
|
||||||
|
|
||||||
for (size_t i = 0; i < v1.size(); ++i) {
|
for (size_t i = 0; i < v1.size(); ++i) {
|
||||||
double a = v1[i];
|
double a = value_cast<float>(v1[i]);
|
||||||
double b = v2[i];
|
double b = value_cast<float>(v2[i]);
|
||||||
|
|
||||||
dot_product += a * b;
|
dot_product += a * b;
|
||||||
squared_norm_a += a * a;
|
squared_norm_a += a * a;
|
||||||
@@ -69,7 +37,7 @@ float compute_cosine_similarity(std::span<const float> v1, std::span<const float
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (squared_norm_a == 0 || squared_norm_b == 0) {
|
if (squared_norm_a == 0 || squared_norm_b == 0) {
|
||||||
return std::numeric_limits<float>::quiet_NaN();
|
throw exceptions::invalid_request_exception("Function system.similarity_cosine doesn't support all-zero vectors");
|
||||||
}
|
}
|
||||||
|
|
||||||
// The cosine similarity is in the range [-1, 1].
|
// The cosine similarity is in the range [-1, 1].
|
||||||
@@ -78,12 +46,12 @@ float compute_cosine_similarity(std::span<const float> v1, std::span<const float
|
|||||||
return (1 + (dot_product / (std::sqrt(squared_norm_a * squared_norm_b)))) / 2;
|
return (1 + (dot_product / (std::sqrt(squared_norm_a * squared_norm_b)))) / 2;
|
||||||
}
|
}
|
||||||
|
|
||||||
float compute_euclidean_similarity(std::span<const float> v1, std::span<const float> v2) {
|
float compute_euclidean_similarity(const std::vector<data_value>& v1, const std::vector<data_value>& v2) {
|
||||||
double sum = 0.0;
|
double sum = 0.0;
|
||||||
|
|
||||||
for (size_t i = 0; i < v1.size(); ++i) {
|
for (size_t i = 0; i < v1.size(); ++i) {
|
||||||
double a = v1[i];
|
double a = value_cast<float>(v1[i]);
|
||||||
double b = v2[i];
|
double b = value_cast<float>(v2[i]);
|
||||||
|
|
||||||
double diff = a - b;
|
double diff = a - b;
|
||||||
sum += diff * diff;
|
sum += diff * diff;
|
||||||
@@ -97,12 +65,12 @@ float compute_euclidean_similarity(std::span<const float> v1, std::span<const fl
|
|||||||
|
|
||||||
// Assumes that both vectors are L2-normalized.
|
// Assumes that both vectors are L2-normalized.
|
||||||
// This similarity is intended as an optimized way to perform cosine similarity calculation.
|
// This similarity is intended as an optimized way to perform cosine similarity calculation.
|
||||||
float compute_dot_product_similarity(std::span<const float> v1, std::span<const float> v2) {
|
float compute_dot_product_similarity(const std::vector<data_value>& v1, const std::vector<data_value>& v2) {
|
||||||
double dot_product = 0.0;
|
double dot_product = 0.0;
|
||||||
|
|
||||||
for (size_t i = 0; i < v1.size(); ++i) {
|
for (size_t i = 0; i < v1.size(); ++i) {
|
||||||
double a = v1[i];
|
double a = value_cast<float>(v1[i]);
|
||||||
double b = v2[i];
|
double b = value_cast<float>(v2[i]);
|
||||||
dot_product += a * b;
|
dot_product += a * b;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -156,7 +124,7 @@ std::vector<data_type> retrieve_vector_arg_types(const function_name& name, cons
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
vector_dimension_t dimension = first_dim_opt ? *first_dim_opt : *second_dim_opt;
|
size_t dimension = first_dim_opt ? *first_dim_opt : *second_dim_opt;
|
||||||
auto type = vector_type_impl::get_instance(float_type, dimension);
|
auto type = vector_type_impl::get_instance(float_type, dimension);
|
||||||
return {type, type};
|
return {type, type};
|
||||||
}
|
}
|
||||||
@@ -168,15 +136,13 @@ bytes_opt vector_similarity_fct::execute(std::span<const bytes_opt> parameters)
|
|||||||
return std::nullopt;
|
return std::nullopt;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Extract dimension from the vector type
|
const auto& type = arg_types()[0];
|
||||||
const auto& type = static_cast<const vector_type_impl&>(*arg_types()[0]);
|
data_value v1 = type->deserialize(*parameters[0]);
|
||||||
vector_dimension_t dimension = type.get_dimension();
|
data_value v2 = type->deserialize(*parameters[1]);
|
||||||
|
const auto& v1_elements = value_cast<std::vector<data_value>>(v1);
|
||||||
|
const auto& v2_elements = value_cast<std::vector<data_value>>(v2);
|
||||||
|
|
||||||
// Optimized path: extract floats directly from bytes, bypassing data_value overhead
|
float result = SIMILARITY_FUNCTIONS.at(_name)(v1_elements, v2_elements);
|
||||||
std::vector<float> v1 = detail::extract_float_vector(parameters[0], dimension);
|
|
||||||
std::vector<float> v2 = detail::extract_float_vector(parameters[1], dimension);
|
|
||||||
|
|
||||||
float result = SIMILARITY_FUNCTIONS.at(_name)(v1, v2);
|
|
||||||
return float_type->decompose(result);
|
return float_type->decompose(result);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -11,7 +11,6 @@
|
|||||||
#include "native_scalar_function.hh"
|
#include "native_scalar_function.hh"
|
||||||
#include "cql3/assignment_testable.hh"
|
#include "cql3/assignment_testable.hh"
|
||||||
#include "cql3/functions/function_name.hh"
|
#include "cql3/functions/function_name.hh"
|
||||||
#include <span>
|
|
||||||
|
|
||||||
namespace cql3 {
|
namespace cql3 {
|
||||||
namespace functions {
|
namespace functions {
|
||||||
@@ -20,7 +19,7 @@ static const function_name SIMILARITY_COSINE_FUNCTION_NAME = function_name::nati
|
|||||||
static const function_name SIMILARITY_EUCLIDEAN_FUNCTION_NAME = function_name::native_function("similarity_euclidean");
|
static const function_name SIMILARITY_EUCLIDEAN_FUNCTION_NAME = function_name::native_function("similarity_euclidean");
|
||||||
static const function_name SIMILARITY_DOT_PRODUCT_FUNCTION_NAME = function_name::native_function("similarity_dot_product");
|
static const function_name SIMILARITY_DOT_PRODUCT_FUNCTION_NAME = function_name::native_function("similarity_dot_product");
|
||||||
|
|
||||||
using similarity_function_t = float (*)(std::span<const float>, std::span<const float>);
|
using similarity_function_t = float (*)(const std::vector<data_value>&, const std::vector<data_value>&);
|
||||||
extern thread_local const std::unordered_map<function_name, similarity_function_t> SIMILARITY_FUNCTIONS;
|
extern thread_local const std::unordered_map<function_name, similarity_function_t> SIMILARITY_FUNCTIONS;
|
||||||
|
|
||||||
std::vector<data_type> retrieve_vector_arg_types(const function_name& name, const std::vector<shared_ptr<assignment_testable>>& provided_args);
|
std::vector<data_type> retrieve_vector_arg_types(const function_name& name, const std::vector<shared_ptr<assignment_testable>>& provided_args);
|
||||||
@@ -34,14 +33,5 @@ public:
|
|||||||
virtual bytes_opt execute(std::span<const bytes_opt> parameters) override;
|
virtual bytes_opt execute(std::span<const bytes_opt> parameters) override;
|
||||||
};
|
};
|
||||||
|
|
||||||
namespace detail {
|
|
||||||
|
|
||||||
// Extract float vector directly from serialized bytes, bypassing data_value overhead.
|
|
||||||
// This is an internal API exposed for testing purposes.
|
|
||||||
// Vector<float, N> wire format: N floats as big-endian uint32_t values, 4 bytes each.
|
|
||||||
std::vector<float> extract_float_vector(const bytes_opt& param, vector_dimension_t dimension);
|
|
||||||
|
|
||||||
} // namespace detail
|
|
||||||
|
|
||||||
} // namespace functions
|
} // namespace functions
|
||||||
} // namespace cql3
|
} // namespace cql3
|
||||||
|
|||||||
@@ -1,20 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright 2025-present ScyllaDB
|
|
||||||
*/
|
|
||||||
|
|
||||||
/*
|
|
||||||
* SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
|
|
||||||
*/
|
|
||||||
|
|
||||||
#pragma once
|
|
||||||
|
|
||||||
#include <ostream>
|
|
||||||
|
|
||||||
namespace cql3 {
|
|
||||||
|
|
||||||
class result;
|
|
||||||
|
|
||||||
void print_query_results_text(std::ostream& os, const result& result);
|
|
||||||
void print_query_results_json(std::ostream& os, const result& result);
|
|
||||||
|
|
||||||
} // namespace cql3
|
|
||||||
@@ -9,10 +9,8 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
#include <cstdint>
|
#include <cstdint>
|
||||||
#include "types/json_utils.hh"
|
|
||||||
#include "utils/assert.hh"
|
#include "utils/assert.hh"
|
||||||
#include "utils/hashers.hh"
|
#include "utils/hashers.hh"
|
||||||
#include "utils/rjson.hh"
|
|
||||||
#include "cql3/result_set.hh"
|
#include "cql3/result_set.hh"
|
||||||
|
|
||||||
namespace cql3 {
|
namespace cql3 {
|
||||||
@@ -197,85 +195,4 @@ make_empty_metadata() {
|
|||||||
return empty_metadata_cache;
|
return empty_metadata_cache;
|
||||||
}
|
}
|
||||||
|
|
||||||
void print_query_results_text(std::ostream& os, const cql3::result& result) {
|
|
||||||
const auto& metadata = result.get_metadata();
|
|
||||||
const auto& column_metadata = metadata.get_names();
|
|
||||||
|
|
||||||
struct column_values {
|
|
||||||
size_t max_size{0};
|
|
||||||
sstring header_format;
|
|
||||||
sstring row_format;
|
|
||||||
std::vector<sstring> values;
|
|
||||||
|
|
||||||
void add(sstring value) {
|
|
||||||
max_size = std::max(max_size, value.size());
|
|
||||||
values.push_back(std::move(value));
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
std::vector<column_values> columns;
|
|
||||||
columns.resize(column_metadata.size());
|
|
||||||
|
|
||||||
for (size_t i = 0; i < column_metadata.size(); ++i) {
|
|
||||||
columns[i].add(column_metadata[i]->name->text());
|
|
||||||
}
|
|
||||||
|
|
||||||
for (const auto& row : result.result_set().rows()) {
|
|
||||||
for (size_t i = 0; i < row.size(); ++i) {
|
|
||||||
if (row[i]) {
|
|
||||||
columns[i].add(column_metadata[i]->type->to_string(linearized(managed_bytes_view(*row[i]))));
|
|
||||||
} else {
|
|
||||||
columns[i].add("");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
std::vector<sstring> separators(columns.size(), sstring());
|
|
||||||
for (size_t i = 0; i < columns.size(); ++i) {
|
|
||||||
auto& col_values = columns[i];
|
|
||||||
col_values.header_format = seastar::format(" {{:<{}}} ", col_values.max_size);
|
|
||||||
col_values.row_format = seastar::format(" {{:>{}}} ", col_values.max_size);
|
|
||||||
for (size_t c = 0; c < col_values.max_size; ++c) {
|
|
||||||
separators[i] += "-";
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
for (size_t r = 0; r < result.result_set().rows().size() + 1; ++r) {
|
|
||||||
std::vector<sstring> row;
|
|
||||||
row.reserve(columns.size());
|
|
||||||
for (size_t i = 0; i < columns.size(); ++i) {
|
|
||||||
const auto& format = r == 0 ? columns[i].header_format : columns[i].row_format;
|
|
||||||
row.push_back(fmt::format(fmt::runtime(std::string_view(format)), columns[i].values[r]));
|
|
||||||
}
|
|
||||||
fmt::print(os, "{}\n", fmt::join(row, "|"));
|
|
||||||
if (!r) {
|
|
||||||
fmt::print(os, "-{}-\n", fmt::join(separators, "-+-"));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void print_query_results_json(std::ostream& os, const cql3::result& result) {
|
|
||||||
const auto& metadata = result.get_metadata();
|
|
||||||
const auto& column_metadata = metadata.get_names();
|
|
||||||
|
|
||||||
rjson::streaming_writer writer(os);
|
|
||||||
|
|
||||||
writer.StartArray();
|
|
||||||
for (const auto& row : result.result_set().rows()) {
|
|
||||||
writer.StartObject();
|
|
||||||
for (size_t i = 0; i < row.size(); ++i) {
|
|
||||||
writer.Key(column_metadata[i]->name->text());
|
|
||||||
if (!row[i] || row[i]->empty()) {
|
|
||||||
writer.Null();
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
const auto value = to_json_string(*column_metadata[i]->type, *row[i]);
|
|
||||||
const auto type = to_json_type(*column_metadata[i]->type, *row[i]);
|
|
||||||
writer.RawValue(value, type);
|
|
||||||
}
|
|
||||||
writer.EndObject();
|
|
||||||
}
|
|
||||||
writer.EndArray();
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -10,7 +10,6 @@
|
|||||||
|
|
||||||
#include "cdc/log.hh"
|
#include "cdc/log.hh"
|
||||||
#include "index/vector_index.hh"
|
#include "index/vector_index.hh"
|
||||||
#include "types/types.hh"
|
|
||||||
#include "utils/assert.hh"
|
#include "utils/assert.hh"
|
||||||
#include <seastar/core/coroutine.hh>
|
#include <seastar/core/coroutine.hh>
|
||||||
#include "cql3/query_options.hh"
|
#include "cql3/query_options.hh"
|
||||||
@@ -31,9 +30,6 @@
|
|||||||
#include "cql3/query_processor.hh"
|
#include "cql3/query_processor.hh"
|
||||||
#include "cdc/cdc_extension.hh"
|
#include "cdc/cdc_extension.hh"
|
||||||
#include "cdc/cdc_partitioner.hh"
|
#include "cdc/cdc_partitioner.hh"
|
||||||
#include "db/tags/extension.hh"
|
|
||||||
#include "db/tags/utils.hh"
|
|
||||||
#include "alternator/ttl_tag.hh"
|
|
||||||
|
|
||||||
namespace cql3 {
|
namespace cql3 {
|
||||||
|
|
||||||
@@ -47,8 +43,7 @@ alter_table_statement::alter_table_statement(uint32_t bound_terms,
|
|||||||
std::vector<column_change> column_changes,
|
std::vector<column_change> column_changes,
|
||||||
std::optional<cf_prop_defs> properties,
|
std::optional<cf_prop_defs> properties,
|
||||||
renames_type renames,
|
renames_type renames,
|
||||||
std::unique_ptr<attributes> attrs,
|
std::unique_ptr<attributes> attrs)
|
||||||
shared_ptr<column_identifier::raw> ttl_change)
|
|
||||||
: schema_altering_statement(std::move(name))
|
: schema_altering_statement(std::move(name))
|
||||||
, _bound_terms(bound_terms)
|
, _bound_terms(bound_terms)
|
||||||
, _type(t)
|
, _type(t)
|
||||||
@@ -56,7 +51,6 @@ alter_table_statement::alter_table_statement(uint32_t bound_terms,
|
|||||||
, _properties(std::move(properties))
|
, _properties(std::move(properties))
|
||||||
, _renames(std::move(renames))
|
, _renames(std::move(renames))
|
||||||
, _attrs(std::move(attrs))
|
, _attrs(std::move(attrs))
|
||||||
, _ttl_change(std::move(ttl_change))
|
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -386,21 +380,6 @@ std::pair<schema_ptr, std::vector<view_ptr>> alter_table_statement::prepare_sche
|
|||||||
throw exceptions::invalid_request_exception("Cannot drop columns from a non-CQL3 table");
|
throw exceptions::invalid_request_exception("Cannot drop columns from a non-CQL3 table");
|
||||||
}
|
}
|
||||||
invoke_column_change_fn(std::mem_fn(&alter_table_statement::drop_column));
|
invoke_column_change_fn(std::mem_fn(&alter_table_statement::drop_column));
|
||||||
|
|
||||||
// If we dropped the column used for per-row TTL, we need to remove the tag.
|
|
||||||
if (std::optional<std::string> ttl_column = db::find_tag(*s, TTL_TAG_KEY)) {
|
|
||||||
for (auto& [raw_name, raw_validator, is_static] : _column_changes) {
|
|
||||||
if (*ttl_column == raw_name->text()) {
|
|
||||||
const std::map<sstring, sstring>* tags_ptr = db::get_tags_of_table(s);
|
|
||||||
if (tags_ptr) {
|
|
||||||
std::map<sstring, sstring> tags_map = *tags_ptr;
|
|
||||||
tags_map.erase(TTL_TAG_KEY);
|
|
||||||
cfm.add_extension(db::tags_extension::NAME, ::make_shared<db::tags_extension>(std::move(tags_map)));
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case alter_table_statement::type::opts:
|
case alter_table_statement::type::opts:
|
||||||
@@ -455,7 +434,6 @@ std::pair<schema_ptr, std::vector<view_ptr>> alter_table_statement::prepare_sche
|
|||||||
break;
|
break;
|
||||||
|
|
||||||
case alter_table_statement::type::rename:
|
case alter_table_statement::type::rename:
|
||||||
{
|
|
||||||
for (auto&& entry : _renames) {
|
for (auto&& entry : _renames) {
|
||||||
auto from = entry.first->prepare_column_identifier(*s);
|
auto from = entry.first->prepare_column_identifier(*s);
|
||||||
auto to = entry.second->prepare_column_identifier(*s);
|
auto to = entry.second->prepare_column_identifier(*s);
|
||||||
@@ -492,53 +470,6 @@ std::pair<schema_ptr, std::vector<view_ptr>> alter_table_statement::prepare_sche
|
|||||||
}
|
}
|
||||||
return make_pair(std::move(new_base_schema), std::move(view_updates));
|
return make_pair(std::move(new_base_schema), std::move(view_updates));
|
||||||
}
|
}
|
||||||
case alter_table_statement::type::ttl:
|
|
||||||
if (!db.features().cql_row_ttl) {
|
|
||||||
throw exceptions::invalid_request_exception("The CQL per-row TTL feature is not yet supported by this cluster. Upgrade all nodes to use it.");
|
|
||||||
}
|
|
||||||
if (_ttl_change) {
|
|
||||||
// Enable per-row TTL with chosen column for expiration time
|
|
||||||
const column_definition *cdef =
|
|
||||||
s->get_column_definition(to_bytes(_ttl_change->text()));
|
|
||||||
if (!cdef) {
|
|
||||||
throw exceptions::invalid_request_exception(fmt::format("Column '{}' does not exist in table {}.{}", _ttl_change->text(), keyspace(), column_family()));
|
|
||||||
}
|
|
||||||
if (cdef->type != timestamp_type && cdef->type != long_type && cdef->type != int32_type) {
|
|
||||||
throw exceptions::invalid_request_exception(fmt::format("TTL column {} must be of type timestamp, bigint or int, can't be {}", _ttl_change->text(), cdef->type->as_cql3_type().to_string()));
|
|
||||||
}
|
|
||||||
if (cdef->is_primary_key()) {
|
|
||||||
throw exceptions::invalid_request_exception(fmt::format("Cannot use a primary key column {} as a TTL column", _ttl_change->text()));
|
|
||||||
}
|
|
||||||
if (cdef->is_static()) {
|
|
||||||
throw exceptions::invalid_request_exception(fmt::format("Cannot use a static column {} as a TTL column", _ttl_change->text()));
|
|
||||||
}
|
|
||||||
std::optional<std::string> old_ttl_column = db::find_tag(*s, TTL_TAG_KEY);
|
|
||||||
if (old_ttl_column) {
|
|
||||||
throw exceptions::invalid_request_exception(fmt::format("Cannot set TTL column, table {}.{} already has a TTL column defined: {}", keyspace(), column_family(), *old_ttl_column));
|
|
||||||
}
|
|
||||||
const std::map<sstring, sstring>* old_tags_ptr = db::get_tags_of_table(s);
|
|
||||||
std::map<sstring, sstring> tags_map;
|
|
||||||
if (old_tags_ptr) {
|
|
||||||
// tags_ptr is a constant pointer to schema data. To modify
|
|
||||||
// it, we must make a copy.
|
|
||||||
tags_map = *old_tags_ptr;
|
|
||||||
}
|
|
||||||
tags_map[TTL_TAG_KEY] = _ttl_change->text();
|
|
||||||
cfm.add_extension(db::tags_extension::NAME, ::make_shared<db::tags_extension>(std::move(tags_map)));
|
|
||||||
} else {
|
|
||||||
// Disable per-row TTL
|
|
||||||
const std::map<sstring, sstring>* tags_ptr = db::get_tags_of_table(s);
|
|
||||||
if (!tags_ptr || tags_ptr->find(TTL_TAG_KEY) == tags_ptr->end()) {
|
|
||||||
throw exceptions::invalid_request_exception(fmt::format("Cannot unset TTL column, table {}.{} does not have a TTL column set", keyspace(), column_family()));
|
|
||||||
}
|
|
||||||
// tags_ptr is a constant pointer to schema data. To modify it, we
|
|
||||||
// must make a copy.
|
|
||||||
std::map<sstring, sstring> tags_map = *tags_ptr;
|
|
||||||
tags_map.erase(TTL_TAG_KEY);
|
|
||||||
cfm.add_extension(db::tags_extension::NAME, ::make_shared<db::tags_extension>(std::move(tags_map)));
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
return make_pair(cfm.build(), std::move(view_updates));
|
return make_pair(cfm.build(), std::move(view_updates));
|
||||||
}
|
}
|
||||||
@@ -577,15 +508,13 @@ alter_table_statement::raw_statement::raw_statement(cf_name name,
|
|||||||
std::vector<column_change> column_changes,
|
std::vector<column_change> column_changes,
|
||||||
std::optional<cf_prop_defs> properties,
|
std::optional<cf_prop_defs> properties,
|
||||||
renames_type renames,
|
renames_type renames,
|
||||||
std::unique_ptr<attributes::raw> attrs,
|
std::unique_ptr<attributes::raw> attrs)
|
||||||
shared_ptr<column_identifier::raw> ttl_change)
|
|
||||||
: cf_statement(std::move(name))
|
: cf_statement(std::move(name))
|
||||||
, _type(t)
|
, _type(t)
|
||||||
, _column_changes(std::move(column_changes))
|
, _column_changes(std::move(column_changes))
|
||||||
, _properties(std::move(properties))
|
, _properties(std::move(properties))
|
||||||
, _renames(std::move(renames))
|
, _renames(std::move(renames))
|
||||||
, _attrs(std::move(attrs))
|
, _attrs(std::move(attrs))
|
||||||
, _ttl_change(std::move(ttl_change))
|
|
||||||
{}
|
{}
|
||||||
|
|
||||||
std::unique_ptr<cql3::statements::prepared_statement>
|
std::unique_ptr<cql3::statements::prepared_statement>
|
||||||
@@ -610,8 +539,7 @@ alter_table_statement::raw_statement::prepare(data_dictionary::database db, cql_
|
|||||||
_column_changes,
|
_column_changes,
|
||||||
_properties,
|
_properties,
|
||||||
_renames,
|
_renames,
|
||||||
std::move(prepared_attrs),
|
std::move(prepared_attrs)
|
||||||
_ttl_change
|
|
||||||
),
|
),
|
||||||
ctx,
|
ctx,
|
||||||
// since alter table is `cql_statement_no_metadata` (it doesn't return any metadata when preparing)
|
// since alter table is `cql_statement_no_metadata` (it doesn't return any metadata when preparing)
|
||||||
|
|||||||
@@ -32,7 +32,6 @@ public:
|
|||||||
drop,
|
drop,
|
||||||
opts,
|
opts,
|
||||||
rename,
|
rename,
|
||||||
ttl,
|
|
||||||
};
|
};
|
||||||
using renames_type = std::vector<std::pair<shared_ptr<column_identifier::raw>,
|
using renames_type = std::vector<std::pair<shared_ptr<column_identifier::raw>,
|
||||||
shared_ptr<column_identifier::raw>>>;
|
shared_ptr<column_identifier::raw>>>;
|
||||||
@@ -51,7 +50,6 @@ private:
|
|||||||
const std::optional<cf_prop_defs> _properties;
|
const std::optional<cf_prop_defs> _properties;
|
||||||
const renames_type _renames;
|
const renames_type _renames;
|
||||||
const std::unique_ptr<attributes> _attrs;
|
const std::unique_ptr<attributes> _attrs;
|
||||||
shared_ptr<column_identifier::raw> _ttl_change;
|
|
||||||
public:
|
public:
|
||||||
alter_table_statement(uint32_t bound_terms,
|
alter_table_statement(uint32_t bound_terms,
|
||||||
cf_name name,
|
cf_name name,
|
||||||
@@ -59,8 +57,7 @@ public:
|
|||||||
std::vector<column_change> column_changes,
|
std::vector<column_change> column_changes,
|
||||||
std::optional<cf_prop_defs> properties,
|
std::optional<cf_prop_defs> properties,
|
||||||
renames_type renames,
|
renames_type renames,
|
||||||
std::unique_ptr<attributes> attrs,
|
std::unique_ptr<attributes> attrs);
|
||||||
shared_ptr<column_identifier::raw> ttl_change);
|
|
||||||
|
|
||||||
virtual uint32_t get_bound_terms() const override;
|
virtual uint32_t get_bound_terms() const override;
|
||||||
virtual future<> check_access(query_processor& qp, const service::client_state& state) const override;
|
virtual future<> check_access(query_processor& qp, const service::client_state& state) const override;
|
||||||
@@ -81,7 +78,6 @@ class alter_table_statement::raw_statement : public raw::cf_statement {
|
|||||||
const std::optional<cf_prop_defs> _properties;
|
const std::optional<cf_prop_defs> _properties;
|
||||||
const alter_table_statement::renames_type _renames;
|
const alter_table_statement::renames_type _renames;
|
||||||
const std::unique_ptr<attributes::raw> _attrs;
|
const std::unique_ptr<attributes::raw> _attrs;
|
||||||
shared_ptr<column_identifier::raw> _ttl_change;
|
|
||||||
|
|
||||||
public:
|
public:
|
||||||
raw_statement(cf_name name,
|
raw_statement(cf_name name,
|
||||||
@@ -89,8 +85,7 @@ public:
|
|||||||
std::vector<column_change> column_changes,
|
std::vector<column_change> column_changes,
|
||||||
std::optional<cf_prop_defs> properties,
|
std::optional<cf_prop_defs> properties,
|
||||||
renames_type renames,
|
renames_type renames,
|
||||||
std::unique_ptr<attributes::raw> attrs,
|
std::unique_ptr<attributes::raw> attrs);
|
||||||
shared_ptr<column_identifier::raw> ttl_change);
|
|
||||||
|
|
||||||
virtual std::unique_ptr<prepared_statement> prepare(data_dictionary::database db, cql_stats& stats) override;
|
virtual std::unique_ptr<prepared_statement> prepare(data_dictionary::database db, cql_stats& stats) override;
|
||||||
|
|
||||||
|
|||||||
@@ -30,9 +30,6 @@
|
|||||||
#include "service/storage_proxy.hh"
|
#include "service/storage_proxy.hh"
|
||||||
#include "db/config.hh"
|
#include "db/config.hh"
|
||||||
#include "compaction/time_window_compaction_strategy.hh"
|
#include "compaction/time_window_compaction_strategy.hh"
|
||||||
#include "db/tags/extension.hh"
|
|
||||||
#include "db/tags/utils.hh"
|
|
||||||
#include "alternator/ttl_tag.hh"
|
|
||||||
|
|
||||||
namespace cql3 {
|
namespace cql3 {
|
||||||
|
|
||||||
@@ -44,12 +41,10 @@ create_table_statement::create_table_statement(cf_name name,
|
|||||||
::shared_ptr<cf_prop_defs> properties,
|
::shared_ptr<cf_prop_defs> properties,
|
||||||
bool if_not_exists,
|
bool if_not_exists,
|
||||||
column_set_type static_columns,
|
column_set_type static_columns,
|
||||||
::shared_ptr<column_identifier> ttl_column,
|
|
||||||
const std::optional<table_id>& id)
|
const std::optional<table_id>& id)
|
||||||
: schema_altering_statement{name}
|
: schema_altering_statement{name}
|
||||||
, _use_compact_storage(false)
|
, _use_compact_storage(false)
|
||||||
, _static_columns{static_columns}
|
, _static_columns{static_columns}
|
||||||
, _ttl_column{ttl_column}
|
|
||||||
, _properties{properties}
|
, _properties{properties}
|
||||||
, _if_not_exists{if_not_exists}
|
, _if_not_exists{if_not_exists}
|
||||||
, _id(id)
|
, _id(id)
|
||||||
@@ -128,13 +123,6 @@ void create_table_statement::apply_properties_to(schema_builder& builder, const
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
_properties->apply_to_builder(builder, _properties->make_schema_extensions(db.extensions()), db, keyspace(), true);
|
_properties->apply_to_builder(builder, _properties->make_schema_extensions(db.extensions()), db, keyspace(), true);
|
||||||
// Remembering which column was designated as the TTL column for row-based
|
|
||||||
// TTL column is done using a "tag" extension. If there is no TTL column,
|
|
||||||
// we don't need this extension at all.
|
|
||||||
if (_ttl_column) {
|
|
||||||
std::map<sstring, sstring> tags_map = {{TTL_TAG_KEY, _ttl_column->text()}};
|
|
||||||
builder.add_extension(db::tags_extension::NAME, ::make_shared<db::tags_extension>(std::move(tags_map)));
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void create_table_statement::add_column_metadata_from_aliases(schema_builder& builder, std::vector<bytes> aliases, const std::vector<data_type>& types, column_kind kind) const
|
void create_table_statement::add_column_metadata_from_aliases(schema_builder& builder, std::vector<bytes> aliases, const std::vector<data_type>& types, column_kind kind) const
|
||||||
@@ -210,7 +198,7 @@ std::unique_ptr<prepared_statement> create_table_statement::raw_statement::prepa
|
|||||||
}
|
}
|
||||||
const bool has_default_ttl = _properties.properties()->get_default_time_to_live() > 0;
|
const bool has_default_ttl = _properties.properties()->get_default_time_to_live() > 0;
|
||||||
|
|
||||||
auto stmt = ::make_shared<create_table_statement>(*_cf_name, _properties.properties(), _if_not_exists, _static_columns, _ttl_column, _properties.properties()->get_id());
|
auto stmt = ::make_shared<create_table_statement>(*_cf_name, _properties.properties(), _if_not_exists, _static_columns, _properties.properties()->get_id());
|
||||||
|
|
||||||
bool ks_uses_tablets;
|
bool ks_uses_tablets;
|
||||||
try {
|
try {
|
||||||
@@ -415,27 +403,6 @@ std::unique_ptr<prepared_statement> create_table_statement::raw_statement::prepa
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// If a TTL column is defined, it must be a regular column - not a static
|
|
||||||
// column or part of the primary key.
|
|
||||||
if (_ttl_column) {
|
|
||||||
if (!db.features().cql_row_ttl) {
|
|
||||||
throw exceptions::invalid_request_exception("The CQL per-row TTL feature is not yet supported by this cluster. Upgrade all nodes to use it.");
|
|
||||||
}
|
|
||||||
for (const auto& alias : key_aliases) {
|
|
||||||
if (alias->text() == _ttl_column->text()) {
|
|
||||||
throw exceptions::invalid_request_exception(format("TTL column {} cannot be part of the PRIMARY KEY", alias->text()));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
for (const auto& alias : _column_aliases) {
|
|
||||||
if (alias->text() == _ttl_column->text()) {
|
|
||||||
throw exceptions::invalid_request_exception(format("TTL column {} cannot be part of the PRIMARY KEY", alias->text()));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (_static_columns.contains(_ttl_column)) {
|
|
||||||
throw exceptions::invalid_request_exception(format("TTL column {} cannot be a static column", _ttl_column->text()));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return std::make_unique<prepared_statement>(audit_info(), stmt, std::move(stmt_warnings));
|
return std::make_unique<prepared_statement>(audit_info(), stmt, std::move(stmt_warnings));
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -458,23 +425,12 @@ data_type create_table_statement::raw_statement::get_type_and_remove(column_map_
|
|||||||
return _properties.get_reversable_type(*t, type);
|
return _properties.get_reversable_type(*t, type);
|
||||||
}
|
}
|
||||||
|
|
||||||
void create_table_statement::raw_statement::add_definition(::shared_ptr<column_identifier> def, ::shared_ptr<cql3_type::raw> type, bool is_static, bool is_ttl) {
|
void create_table_statement::raw_statement::add_definition(::shared_ptr<column_identifier> def, ::shared_ptr<cql3_type::raw> type, bool is_static) {
|
||||||
_defined_names.emplace(def);
|
_defined_names.emplace(def);
|
||||||
_definitions.emplace(def, type);
|
_definitions.emplace(def, type);
|
||||||
if (is_static) {
|
if (is_static) {
|
||||||
_static_columns.emplace(def);
|
_static_columns.emplace(def);
|
||||||
}
|
}
|
||||||
if (is_ttl) {
|
|
||||||
if (_ttl_column) {
|
|
||||||
throw exceptions::invalid_request_exception(fmt::format("Cannot have more than one TTL column in a table. Saw {} and {}", _ttl_column->text(), def->text()));
|
|
||||||
}
|
|
||||||
// FIXME: find a way to check cql3_type::raw without fmt::format
|
|
||||||
auto type_name = fmt::format("{}", type);
|
|
||||||
if (type_name != "timestamp" && type_name != "bigint" && type_name != "int") {
|
|
||||||
throw exceptions::invalid_request_exception(fmt::format("TTL column '{}' must be of type timestamp, bigint or int, can't be {}", def->text(), type_name));
|
|
||||||
}
|
|
||||||
_ttl_column = def;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void create_table_statement::raw_statement::add_key_aliases(const std::vector<::shared_ptr<column_identifier>> aliases) {
|
void create_table_statement::raw_statement::add_key_aliases(const std::vector<::shared_ptr<column_identifier>> aliases) {
|
||||||
|
|||||||
@@ -57,7 +57,6 @@ class create_table_statement : public schema_altering_statement {
|
|||||||
shared_ptr_equal_by_value<column_identifier>>;
|
shared_ptr_equal_by_value<column_identifier>>;
|
||||||
column_map_type _columns;
|
column_map_type _columns;
|
||||||
column_set_type _static_columns;
|
column_set_type _static_columns;
|
||||||
::shared_ptr<column_identifier> _ttl_column; // for row-based TTL
|
|
||||||
const ::shared_ptr<cf_prop_defs> _properties;
|
const ::shared_ptr<cf_prop_defs> _properties;
|
||||||
const bool _if_not_exists;
|
const bool _if_not_exists;
|
||||||
std::optional<table_id> _id;
|
std::optional<table_id> _id;
|
||||||
@@ -66,7 +65,6 @@ public:
|
|||||||
::shared_ptr<cf_prop_defs> properties,
|
::shared_ptr<cf_prop_defs> properties,
|
||||||
bool if_not_exists,
|
bool if_not_exists,
|
||||||
column_set_type static_columns,
|
column_set_type static_columns,
|
||||||
::shared_ptr<column_identifier> ttl_column,
|
|
||||||
const std::optional<table_id>& id);
|
const std::optional<table_id>& id);
|
||||||
|
|
||||||
virtual future<> check_access(query_processor& qp, const service::client_state& state) const override;
|
virtual future<> check_access(query_processor& qp, const service::client_state& state) const override;
|
||||||
@@ -102,7 +100,6 @@ private:
|
|||||||
std::vector<std::vector<::shared_ptr<column_identifier>>> _key_aliases;
|
std::vector<std::vector<::shared_ptr<column_identifier>>> _key_aliases;
|
||||||
std::vector<::shared_ptr<column_identifier>> _column_aliases;
|
std::vector<::shared_ptr<column_identifier>> _column_aliases;
|
||||||
create_table_statement::column_set_type _static_columns;
|
create_table_statement::column_set_type _static_columns;
|
||||||
::shared_ptr<column_identifier> _ttl_column; // for row-based TTL
|
|
||||||
|
|
||||||
std::multiset<::shared_ptr<column_identifier>,
|
std::multiset<::shared_ptr<column_identifier>,
|
||||||
indirect_less<::shared_ptr<column_identifier>, column_identifier::text_comparator>> _defined_names;
|
indirect_less<::shared_ptr<column_identifier>, column_identifier::text_comparator>> _defined_names;
|
||||||
@@ -119,7 +116,7 @@ public:
|
|||||||
|
|
||||||
data_type get_type_and_remove(column_map_type& columns, ::shared_ptr<column_identifier> t);
|
data_type get_type_and_remove(column_map_type& columns, ::shared_ptr<column_identifier> t);
|
||||||
|
|
||||||
void add_definition(::shared_ptr<column_identifier> def, ::shared_ptr<cql3_type::raw> type, bool is_static, bool is_ttl);
|
void add_definition(::shared_ptr<column_identifier> def, ::shared_ptr<cql3_type::raw> type, bool is_static);
|
||||||
|
|
||||||
void add_key_aliases(const std::vector<::shared_ptr<column_identifier>> aliases);
|
void add_key_aliases(const std::vector<::shared_ptr<column_identifier>> aliases);
|
||||||
|
|
||||||
|
|||||||
@@ -23,7 +23,6 @@
|
|||||||
#include "index/vector_index.hh"
|
#include "index/vector_index.hh"
|
||||||
#include "schema/schema.hh"
|
#include "schema/schema.hh"
|
||||||
#include "service/client_state.hh"
|
#include "service/client_state.hh"
|
||||||
#include "service/paxos/paxos_state.hh"
|
|
||||||
#include "types/types.hh"
|
#include "types/types.hh"
|
||||||
#include "cql3/query_processor.hh"
|
#include "cql3/query_processor.hh"
|
||||||
#include "cql3/cql_statement.hh"
|
#include "cql3/cql_statement.hh"
|
||||||
@@ -330,19 +329,6 @@ future<std::vector<description>> table(const data_dictionary::database& db, cons
|
|||||||
"*/",
|
"*/",
|
||||||
*table_desc.create_statement);
|
*table_desc.create_statement);
|
||||||
|
|
||||||
table_desc.create_statement = std::move(os).to_managed_string();
|
|
||||||
} else if (service::paxos::paxos_store::try_get_base_table(name)) {
|
|
||||||
// Paxos state table is internally managed by Scylla and it shouldn't be exposed to the user.
|
|
||||||
// The table is allowed to be described as a comment to ease administrative work but it's hidden from all listings.
|
|
||||||
fragmented_ostringstream os{};
|
|
||||||
|
|
||||||
fmt::format_to(os.to_iter(),
|
|
||||||
"/* Do NOT execute this statement! It's only for informational purposes.\n"
|
|
||||||
" A paxos state table is created automatically when enabling LWT on a base table.\n"
|
|
||||||
"\n{}\n"
|
|
||||||
"*/",
|
|
||||||
*table_desc.create_statement);
|
|
||||||
|
|
||||||
table_desc.create_statement = std::move(os).to_managed_string();
|
table_desc.create_statement = std::move(os).to_managed_string();
|
||||||
}
|
}
|
||||||
result.push_back(std::move(table_desc));
|
result.push_back(std::move(table_desc));
|
||||||
@@ -378,7 +364,7 @@ future<std::vector<description>> table(const data_dictionary::database& db, cons
|
|||||||
future<std::vector<description>> tables(const data_dictionary::database& db, const lw_shared_ptr<keyspace_metadata>& ks, std::optional<bool> with_internals = std::nullopt) {
|
future<std::vector<description>> tables(const data_dictionary::database& db, const lw_shared_ptr<keyspace_metadata>& ks, std::optional<bool> with_internals = std::nullopt) {
|
||||||
auto& replica_db = db.real_database();
|
auto& replica_db = db.real_database();
|
||||||
auto tables = ks->tables() | std::views::filter([&replica_db] (const schema_ptr& s) {
|
auto tables = ks->tables() | std::views::filter([&replica_db] (const schema_ptr& s) {
|
||||||
return !cdc::is_log_for_some_table(replica_db, s->ks_name(), s->cf_name()) && !service::paxos::paxos_store::try_get_base_table(s->cf_name());
|
return !cdc::is_log_for_some_table(replica_db, s->ks_name(), s->cf_name());
|
||||||
}) | std::ranges::to<std::vector<schema_ptr>>();
|
}) | std::ranges::to<std::vector<schema_ptr>>();
|
||||||
std::ranges::sort(tables, std::ranges::less(), std::mem_fn(&schema::cf_name));
|
std::ranges::sort(tables, std::ranges::less(), std::mem_fn(&schema::cf_name));
|
||||||
|
|
||||||
|
|||||||
@@ -259,9 +259,11 @@ uint32_t select_statement::get_bound_terms() const {
|
|||||||
|
|
||||||
future<> select_statement::check_access(query_processor& qp, const service::client_state& state) const {
|
future<> select_statement::check_access(query_processor& qp, const service::client_state& state) const {
|
||||||
try {
|
try {
|
||||||
auto cdc = qp.db().get_cdc_base_table(*_schema);
|
const data_dictionary::database db = qp.db();
|
||||||
auto& cf_name = _schema->is_view()
|
auto&& s = db.find_schema(keyspace(), column_family());
|
||||||
? _schema->view_info()->base_name()
|
auto cdc = db.get_cdc_base_table(*s);
|
||||||
|
auto& cf_name = s->is_view()
|
||||||
|
? s->view_info()->base_name()
|
||||||
: (cdc ? cdc->cf_name() : column_family());
|
: (cdc ? cdc->cf_name() : column_family());
|
||||||
const schema_ptr& base_schema = cdc ? cdc : _schema;
|
const schema_ptr& base_schema = cdc ? cdc : _schema;
|
||||||
bool is_vector_indexed = secondary_index::vector_index::has_vector_index(*base_schema);
|
bool is_vector_indexed = secondary_index::vector_index::has_vector_index(*base_schema);
|
||||||
@@ -2004,7 +2006,9 @@ static std::optional<ann_ordering_info> get_ann_ordering_info(
|
|||||||
|
|
||||||
auto indexes = sim.list_indexes();
|
auto indexes = sim.list_indexes();
|
||||||
auto it = std::find_if(indexes.begin(), indexes.end(), [&prepared_ann_ordering](const auto& ind) {
|
auto it = std::find_if(indexes.begin(), indexes.end(), [&prepared_ann_ordering](const auto& ind) {
|
||||||
return secondary_index::vector_index::is_vector_index_on_column(ind.metadata(), prepared_ann_ordering.first->name_as_text());
|
return (ind.metadata().options().contains(db::index::secondary_index::custom_class_option_name) &&
|
||||||
|
ind.metadata().options().at(db::index::secondary_index::custom_class_option_name) == ANN_CUSTOM_INDEX_OPTION) &&
|
||||||
|
(ind.target_column() == prepared_ann_ordering.first->name_as_text());
|
||||||
});
|
});
|
||||||
|
|
||||||
if (it == indexes.end()) {
|
if (it == indexes.end()) {
|
||||||
|
|||||||
@@ -55,21 +55,8 @@ int32_t batchlog_shard_of(db_clock::time_point written_at) {
|
|||||||
return hash & ((1ULL << batchlog_shard_bits) - 1);
|
return hash & ((1ULL << batchlog_shard_bits) - 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool is_batchlog_v1(const schema& schema) {
|
|
||||||
return schema.cf_name() == system_keyspace::BATCHLOG;
|
|
||||||
}
|
|
||||||
|
|
||||||
std::pair<partition_key, clustering_key>
|
std::pair<partition_key, clustering_key>
|
||||||
get_batchlog_key(const schema& schema, int32_t version, db::batchlog_stage stage, int32_t batchlog_shard, db_clock::time_point written_at, std::optional<utils::UUID> id) {
|
get_batchlog_key(const schema& schema, int32_t version, db::batchlog_stage stage, int32_t batchlog_shard, db_clock::time_point written_at, std::optional<utils::UUID> id) {
|
||||||
if (is_batchlog_v1(schema)) {
|
|
||||||
if (!id) {
|
|
||||||
on_internal_error(blogger, "get_batchlog_key(): key for batchlog v1 requires batchlog id");
|
|
||||||
}
|
|
||||||
auto pkey = partition_key::from_single_value(schema, {serialized(*id)});
|
|
||||||
auto ckey = clustering_key::make_empty();
|
|
||||||
return std::pair(std::move(pkey), std::move(ckey));
|
|
||||||
}
|
|
||||||
|
|
||||||
auto pkey = partition_key::from_exploded(schema, {serialized(version), serialized(int8_t(stage)), serialized(batchlog_shard)});
|
auto pkey = partition_key::from_exploded(schema, {serialized(version), serialized(int8_t(stage)), serialized(batchlog_shard)});
|
||||||
|
|
||||||
std::vector<bytes> ckey_components;
|
std::vector<bytes> ckey_components;
|
||||||
@@ -98,14 +85,6 @@ mutation get_batchlog_mutation_for(schema_ptr schema, managed_bytes data, int32_
|
|||||||
auto cdef_data = schema->get_column_definition(to_bytes("data"));
|
auto cdef_data = schema->get_column_definition(to_bytes("data"));
|
||||||
m.set_cell(ckey, *cdef_data, atomic_cell::make_live(*cdef_data->type, timestamp, std::move(data)));
|
m.set_cell(ckey, *cdef_data, atomic_cell::make_live(*cdef_data->type, timestamp, std::move(data)));
|
||||||
|
|
||||||
if (is_batchlog_v1(*schema)) {
|
|
||||||
auto cdef_version = schema->get_column_definition(to_bytes("version"));
|
|
||||||
m.set_cell(ckey, *cdef_version, atomic_cell::make_live(*cdef_version->type, timestamp, serialized(version)));
|
|
||||||
|
|
||||||
auto cdef_written_at = schema->get_column_definition(to_bytes("written_at"));
|
|
||||||
m.set_cell(ckey, *cdef_written_at, atomic_cell::make_live(*cdef_written_at->type, timestamp, serialized(now)));
|
|
||||||
}
|
|
||||||
|
|
||||||
return m;
|
return m;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -143,10 +122,9 @@ mutation get_batchlog_delete_mutation(schema_ptr schema, int32_t version, db_clo
|
|||||||
const std::chrono::seconds db::batchlog_manager::replay_interval;
|
const std::chrono::seconds db::batchlog_manager::replay_interval;
|
||||||
const uint32_t db::batchlog_manager::page_size;
|
const uint32_t db::batchlog_manager::page_size;
|
||||||
|
|
||||||
db::batchlog_manager::batchlog_manager(cql3::query_processor& qp, db::system_keyspace& sys_ks, gms::feature_service& fs, batchlog_manager_config config)
|
db::batchlog_manager::batchlog_manager(cql3::query_processor& qp, db::system_keyspace& sys_ks, batchlog_manager_config config)
|
||||||
: _qp(qp)
|
: _qp(qp)
|
||||||
, _sys_ks(sys_ks)
|
, _sys_ks(sys_ks)
|
||||||
, _fs(fs)
|
|
||||||
, _replay_timeout(config.replay_timeout)
|
, _replay_timeout(config.replay_timeout)
|
||||||
, _replay_rate(config.replay_rate)
|
, _replay_rate(config.replay_rate)
|
||||||
, _delay(config.delay)
|
, _delay(config.delay)
|
||||||
@@ -322,39 +300,42 @@ future<> db::batchlog_manager::maybe_migrate_v1_to_v2() {
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
namespace {
|
future<db::all_batches_replayed> db::batchlog_manager::replay_all_failed_batches(post_replay_cleanup cleanup) {
|
||||||
|
co_await maybe_migrate_v1_to_v2();
|
||||||
|
|
||||||
using clock_type = db_clock::rep;
|
typedef db_clock::rep clock_type;
|
||||||
|
|
||||||
struct replay_stats {
|
db::all_batches_replayed all_replayed = all_batches_replayed::yes;
|
||||||
|
// rate limit is in bytes per second. Uses Double.MAX_VALUE if disabled (set to 0 in cassandra.yaml).
|
||||||
|
// max rate is scaled by the number of nodes in the cluster (same as for HHOM - see CASSANDRA-5272).
|
||||||
|
auto throttle = _replay_rate / _qp.proxy().get_token_metadata_ptr()->count_normal_token_owners();
|
||||||
|
auto limiter = make_lw_shared<utils::rate_limiter>(throttle);
|
||||||
|
|
||||||
|
auto schema = _qp.db().find_schema(system_keyspace::NAME, system_keyspace::BATCHLOG_V2);
|
||||||
|
|
||||||
|
struct replay_stats {
|
||||||
std::optional<db_clock::time_point> min_too_fresh;
|
std::optional<db_clock::time_point> min_too_fresh;
|
||||||
bool need_cleanup = false;
|
bool need_cleanup = false;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // anonymous namespace
|
std::unordered_map<int32_t, replay_stats> replay_stats_per_shard;
|
||||||
|
|
||||||
static future<db::all_batches_replayed> process_batch(
|
// Use a stable `now` across all batches, so skip/replay decisions are the
|
||||||
cql3::query_processor& qp,
|
// same across a while prefix of written_at (across all ids).
|
||||||
db::batchlog_manager::stats& stats,
|
const auto now = db_clock::now();
|
||||||
db::batchlog_manager::post_replay_cleanup cleanup,
|
|
||||||
utils::rate_limiter& limiter,
|
auto batch = [this, cleanup, limiter, schema, &all_replayed, &replay_stats_per_shard, now] (const cql3::untyped_result_set::row& row) -> future<stop_iteration> {
|
||||||
schema_ptr schema,
|
const auto stage = static_cast<batchlog_stage>(row.get_as<int8_t>("stage"));
|
||||||
std::unordered_map<int32_t, replay_stats>& replay_stats_per_shard,
|
const auto batch_shard = row.get_as<int32_t>("shard");
|
||||||
const db_clock::time_point now,
|
|
||||||
db_clock::duration replay_timeout,
|
|
||||||
std::chrono::seconds write_timeout,
|
|
||||||
const cql3::untyped_result_set::row& row) {
|
|
||||||
const bool is_v1 = db::is_batchlog_v1(*schema);
|
|
||||||
const auto stage = is_v1 ? db::batchlog_stage::initial : static_cast<db::batchlog_stage>(row.get_as<int8_t>("stage"));
|
|
||||||
const auto batch_shard = is_v1 ? 0 : row.get_as<int32_t>("shard");
|
|
||||||
auto written_at = row.get_as<db_clock::time_point>("written_at");
|
auto written_at = row.get_as<db_clock::time_point>("written_at");
|
||||||
auto id = row.get_as<utils::UUID>("id");
|
auto id = row.get_as<utils::UUID>("id");
|
||||||
// enough time for the actual write + batchlog entry mutation delivery (two separate requests).
|
// enough time for the actual write + batchlog entry mutation delivery (two separate requests).
|
||||||
auto timeout = replay_timeout;
|
auto timeout = _replay_timeout;
|
||||||
|
|
||||||
if (utils::get_local_injector().is_enabled("skip_batch_replay")) {
|
if (utils::get_local_injector().is_enabled("skip_batch_replay")) {
|
||||||
blogger.debug("Skipping batch replay due to skip_batch_replay injection");
|
blogger.debug("Skipping batch replay due to skip_batch_replay injection");
|
||||||
co_return db::all_batches_replayed::no;
|
all_replayed = all_batches_replayed::no;
|
||||||
|
co_return stop_iteration::no;
|
||||||
}
|
}
|
||||||
|
|
||||||
auto data = row.get_blob_unfragmented("data");
|
auto data = row.get_blob_unfragmented("data");
|
||||||
@@ -371,7 +352,7 @@ static future<db::all_batches_replayed> process_batch(
|
|||||||
auto in = ser::as_input_stream(data);
|
auto in = ser::as_input_stream(data);
|
||||||
while (in.size()) {
|
while (in.size()) {
|
||||||
auto fm = ser::deserialize(in, std::type_identity<canonical_mutation>());
|
auto fm = ser::deserialize(in, std::type_identity<canonical_mutation>());
|
||||||
const auto tbl = qp.db().try_find_table(fm.column_family_id());
|
const auto tbl = _qp.db().try_find_table(fm.column_family_id());
|
||||||
if (!tbl) {
|
if (!tbl) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
@@ -390,7 +371,7 @@ static future<db::all_batches_replayed> process_batch(
|
|||||||
|
|
||||||
shard_written_at.min_too_fresh = std::min(shard_written_at.min_too_fresh.value_or(written_at), written_at);
|
shard_written_at.min_too_fresh = std::min(shard_written_at.min_too_fresh.value_or(written_at), written_at);
|
||||||
|
|
||||||
co_return db::all_batches_replayed::no;
|
co_return stop_iteration::no;
|
||||||
}
|
}
|
||||||
|
|
||||||
auto size = data.size();
|
auto size = data.size();
|
||||||
@@ -421,13 +402,13 @@ static future<db::all_batches_replayed> process_batch(
|
|||||||
// Our normal write path does not add much redundancy to the dispatch, and rate is handled after send
|
// Our normal write path does not add much redundancy to the dispatch, and rate is handled after send
|
||||||
// in both cases.
|
// in both cases.
|
||||||
// FIXME: verify that the above is reasonably true.
|
// FIXME: verify that the above is reasonably true.
|
||||||
co_await limiter.reserve(size);
|
co_await limiter->reserve(size);
|
||||||
stats.write_attempts += mutations.size();
|
_stats.write_attempts += mutations.size();
|
||||||
auto timeout = db::timeout_clock::now() + write_timeout;
|
auto timeout = db::timeout_clock::now() + write_timeout;
|
||||||
if (cleanup) {
|
if (cleanup) {
|
||||||
co_await qp.proxy().send_batchlog_replay_to_all_replicas(mutations, timeout);
|
co_await _qp.proxy().send_batchlog_replay_to_all_replicas(mutations, timeout);
|
||||||
} else {
|
} else {
|
||||||
co_await qp.proxy().send_batchlog_replay_to_all_replicas(std::move(mutations), timeout);
|
co_await _qp.proxy().send_batchlog_replay_to_all_replicas(std::move(mutations), timeout);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -437,91 +418,31 @@ static future<db::all_batches_replayed> process_batch(
|
|||||||
// As above -- we should drop the batch if the table doesn't exist anymore.
|
// As above -- we should drop the batch if the table doesn't exist anymore.
|
||||||
} catch (...) {
|
} catch (...) {
|
||||||
blogger.warn("Replay failed (will retry): {}", std::current_exception());
|
blogger.warn("Replay failed (will retry): {}", std::current_exception());
|
||||||
|
all_replayed = all_batches_replayed::no;
|
||||||
// timeout, overload etc.
|
// timeout, overload etc.
|
||||||
// Do _not_ remove the batch, assuning we got a node write error.
|
// Do _not_ remove the batch, assuning we got a node write error.
|
||||||
// Since we don't have hints (which origin is satisfied with),
|
// Since we don't have hints (which origin is satisfied with),
|
||||||
// we have to resort to keeping this batch to next lap.
|
// we have to resort to keeping this batch to next lap.
|
||||||
if (is_v1 || !cleanup || stage == db::batchlog_stage::failed_replay) {
|
if (!cleanup || stage == batchlog_stage::failed_replay) {
|
||||||
co_return db::all_batches_replayed::no;
|
co_return stop_iteration::no;
|
||||||
}
|
}
|
||||||
send_failed = true;
|
send_failed = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
auto& sp = qp.proxy();
|
auto& sp = _qp.proxy();
|
||||||
|
|
||||||
if (send_failed) {
|
if (send_failed) {
|
||||||
blogger.debug("Moving batch {} to stage failed_replay", id);
|
blogger.debug("Moving batch {} to stage failed_replay", id);
|
||||||
auto m = get_batchlog_mutation_for(schema, mutations, netw::messaging_service::current_version, db::batchlog_stage::failed_replay, written_at, id);
|
auto m = get_batchlog_mutation_for(schema, mutations, netw::messaging_service::current_version, batchlog_stage::failed_replay, written_at, id);
|
||||||
co_await sp.mutate_locally(m, tracing::trace_state_ptr(), db::commitlog::force_sync::no);
|
co_await sp.mutate_locally(m, tracing::trace_state_ptr(), db::commitlog::force_sync::no);
|
||||||
}
|
}
|
||||||
|
|
||||||
// delete batch
|
// delete batch
|
||||||
auto m = get_batchlog_delete_mutation(schema, netw::messaging_service::current_version, stage, written_at, id);
|
auto m = get_batchlog_delete_mutation(schema, netw::messaging_service::current_version, stage, written_at, id);
|
||||||
co_await qp.proxy().mutate_locally(m, tracing::trace_state_ptr(), db::commitlog::force_sync::no);
|
co_await _qp.proxy().mutate_locally(m, tracing::trace_state_ptr(), db::commitlog::force_sync::no);
|
||||||
|
|
||||||
shard_written_at.need_cleanup = true;
|
shard_written_at.need_cleanup = true;
|
||||||
|
|
||||||
co_return db::all_batches_replayed(!send_failed);
|
|
||||||
}
|
|
||||||
|
|
||||||
future<db::all_batches_replayed> db::batchlog_manager::replay_all_failed_batches_v1(post_replay_cleanup) {
|
|
||||||
db::all_batches_replayed all_replayed = all_batches_replayed::yes;
|
|
||||||
// rate limit is in bytes per second. Uses Double.MAX_VALUE if disabled (set to 0 in cassandra.yaml).
|
|
||||||
// max rate is scaled by the number of nodes in the cluster (same as for HHOM - see CASSANDRA-5272).
|
|
||||||
auto throttle = _replay_rate / _qp.proxy().get_token_metadata_ptr()->count_normal_token_owners();
|
|
||||||
utils::rate_limiter limiter(throttle);
|
|
||||||
|
|
||||||
auto schema = _qp.db().find_schema(system_keyspace::NAME, system_keyspace::BATCHLOG);
|
|
||||||
|
|
||||||
std::unordered_map<int32_t, replay_stats> replay_stats_per_shard;
|
|
||||||
|
|
||||||
// Use a stable `now` across all batches, so skip/replay decisions are the
|
|
||||||
// same across a while prefix of written_at (across all ids).
|
|
||||||
const auto now = db_clock::now();
|
|
||||||
|
|
||||||
auto batch = [this, &limiter, schema, &all_replayed, &replay_stats_per_shard, now] (const cql3::untyped_result_set::row& row) mutable -> future<stop_iteration> {
|
|
||||||
all_replayed = all_replayed && co_await process_batch(_qp, _stats, post_replay_cleanup::no, limiter, schema, replay_stats_per_shard, now, _replay_timeout, write_timeout, row);
|
|
||||||
co_return stop_iteration::no;
|
|
||||||
};
|
|
||||||
|
|
||||||
co_await with_gate(_gate, [this, &all_replayed, batch = std::move(batch)] () mutable -> future<> {
|
|
||||||
blogger.debug("Started replayAllFailedBatches");
|
|
||||||
co_await utils::get_local_injector().inject("add_delay_to_batch_replay", std::chrono::milliseconds(1000));
|
|
||||||
|
|
||||||
auto schema = _qp.db().find_schema(system_keyspace::NAME, system_keyspace::BATCHLOG);
|
|
||||||
|
|
||||||
co_await _qp.query_internal(
|
|
||||||
format("SELECT * FROM {}.{} BYPASS CACHE", system_keyspace::NAME, system_keyspace::BATCHLOG),
|
|
||||||
db::consistency_level::ONE,
|
|
||||||
{},
|
|
||||||
page_size,
|
|
||||||
batch);
|
|
||||||
|
|
||||||
blogger.debug("Finished replayAllFailedBatches with all_replayed: {}", all_replayed);
|
|
||||||
});
|
|
||||||
|
|
||||||
co_return all_replayed;
|
|
||||||
}
|
|
||||||
|
|
||||||
future<db::all_batches_replayed> db::batchlog_manager::replay_all_failed_batches_v2(post_replay_cleanup cleanup) {
|
|
||||||
co_await maybe_migrate_v1_to_v2();
|
|
||||||
|
|
||||||
db::all_batches_replayed all_replayed = all_batches_replayed::yes;
|
|
||||||
// rate limit is in bytes per second. Uses Double.MAX_VALUE if disabled (set to 0 in cassandra.yaml).
|
|
||||||
// max rate is scaled by the number of nodes in the cluster (same as for HHOM - see CASSANDRA-5272).
|
|
||||||
auto throttle = _replay_rate / _qp.proxy().get_token_metadata_ptr()->count_normal_token_owners();
|
|
||||||
utils::rate_limiter limiter(throttle);
|
|
||||||
|
|
||||||
auto schema = _qp.db().find_schema(system_keyspace::NAME, system_keyspace::BATCHLOG_V2);
|
|
||||||
|
|
||||||
std::unordered_map<int32_t, replay_stats> replay_stats_per_shard;
|
|
||||||
|
|
||||||
// Use a stable `now` across all batches, so skip/replay decisions are the
|
|
||||||
// same across a while prefix of written_at (across all ids).
|
|
||||||
const auto now = db_clock::now();
|
|
||||||
|
|
||||||
auto batch = [this, cleanup, &limiter, schema, &all_replayed, &replay_stats_per_shard, now] (const cql3::untyped_result_set::row& row) mutable -> future<stop_iteration> {
|
|
||||||
all_replayed = all_replayed && co_await process_batch(_qp, _stats, cleanup, limiter, schema, replay_stats_per_shard, now, _replay_timeout, write_timeout, row);
|
|
||||||
co_return stop_iteration::no;
|
co_return stop_iteration::no;
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -580,10 +501,3 @@ future<db::all_batches_replayed> db::batchlog_manager::replay_all_failed_batches
|
|||||||
|
|
||||||
co_return all_replayed;
|
co_return all_replayed;
|
||||||
}
|
}
|
||||||
|
|
||||||
future<db::all_batches_replayed> db::batchlog_manager::replay_all_failed_batches(post_replay_cleanup cleanup) {
|
|
||||||
if (_fs.batchlog_v2) {
|
|
||||||
return replay_all_failed_batches_v2(cleanup);
|
|
||||||
}
|
|
||||||
return replay_all_failed_batches_v1(cleanup);
|
|
||||||
}
|
|
||||||
|
|||||||
@@ -27,12 +27,6 @@ class query_processor;
|
|||||||
|
|
||||||
} // namespace cql3
|
} // namespace cql3
|
||||||
|
|
||||||
namespace gms {
|
|
||||||
|
|
||||||
class feature_service;
|
|
||||||
|
|
||||||
} // namespace gms
|
|
||||||
|
|
||||||
namespace db {
|
namespace db {
|
||||||
|
|
||||||
class system_keyspace;
|
class system_keyspace;
|
||||||
@@ -55,11 +49,6 @@ class batchlog_manager : public peering_sharded_service<batchlog_manager> {
|
|||||||
public:
|
public:
|
||||||
using post_replay_cleanup = bool_class<class post_replay_cleanup_tag>;
|
using post_replay_cleanup = bool_class<class post_replay_cleanup_tag>;
|
||||||
|
|
||||||
struct stats {
|
|
||||||
uint64_t write_attempts = 0;
|
|
||||||
};
|
|
||||||
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
static constexpr std::chrono::seconds replay_interval = std::chrono::seconds(60);
|
static constexpr std::chrono::seconds replay_interval = std::chrono::seconds(60);
|
||||||
static constexpr uint32_t page_size = 128; // same as HHOM, for now, w/out using any heuristics. TODO: set based on avg batch size.
|
static constexpr uint32_t page_size = 128; // same as HHOM, for now, w/out using any heuristics. TODO: set based on avg batch size.
|
||||||
@@ -67,13 +56,14 @@ private:
|
|||||||
|
|
||||||
using clock_type = lowres_clock;
|
using clock_type = lowres_clock;
|
||||||
|
|
||||||
stats _stats;
|
struct stats {
|
||||||
|
uint64_t write_attempts = 0;
|
||||||
|
} _stats;
|
||||||
|
|
||||||
seastar::metrics::metric_groups _metrics;
|
seastar::metrics::metric_groups _metrics;
|
||||||
|
|
||||||
cql3::query_processor& _qp;
|
cql3::query_processor& _qp;
|
||||||
db::system_keyspace& _sys_ks;
|
db::system_keyspace& _sys_ks;
|
||||||
gms::feature_service& _fs;
|
|
||||||
db_clock::duration _replay_timeout;
|
db_clock::duration _replay_timeout;
|
||||||
uint64_t _replay_rate;
|
uint64_t _replay_rate;
|
||||||
std::chrono::milliseconds _delay;
|
std::chrono::milliseconds _delay;
|
||||||
@@ -94,14 +84,12 @@ private:
|
|||||||
|
|
||||||
future<> maybe_migrate_v1_to_v2();
|
future<> maybe_migrate_v1_to_v2();
|
||||||
|
|
||||||
future<all_batches_replayed> replay_all_failed_batches_v1(post_replay_cleanup cleanup);
|
|
||||||
future<all_batches_replayed> replay_all_failed_batches_v2(post_replay_cleanup cleanup);
|
|
||||||
future<all_batches_replayed> replay_all_failed_batches(post_replay_cleanup cleanup);
|
future<all_batches_replayed> replay_all_failed_batches(post_replay_cleanup cleanup);
|
||||||
public:
|
public:
|
||||||
// Takes a QP, not a distributes. Because this object is supposed
|
// Takes a QP, not a distributes. Because this object is supposed
|
||||||
// to be per shard and does no dispatching beyond delegating the the
|
// to be per shard and does no dispatching beyond delegating the the
|
||||||
// shard qp (which is what you feed here).
|
// shard qp (which is what you feed here).
|
||||||
batchlog_manager(cql3::query_processor&, db::system_keyspace& sys_ks, gms::feature_service& fs, batchlog_manager_config config);
|
batchlog_manager(cql3::query_processor&, db::system_keyspace& sys_ks, batchlog_manager_config config);
|
||||||
|
|
||||||
// abort the replay loop and return its future.
|
// abort the replay loop and return its future.
|
||||||
future<> drain();
|
future<> drain();
|
||||||
@@ -114,7 +102,7 @@ public:
|
|||||||
return _last_replay;
|
return _last_replay;
|
||||||
}
|
}
|
||||||
|
|
||||||
const stats& get_stats() const {
|
const stats& stats() const {
|
||||||
return _stats;
|
return _stats;
|
||||||
}
|
}
|
||||||
private:
|
private:
|
||||||
|
|||||||
@@ -1986,13 +1986,13 @@ future<> db::commitlog::segment_manager::replenish_reserve() {
|
|||||||
}
|
}
|
||||||
continue;
|
continue;
|
||||||
} catch (shutdown_marker&) {
|
} catch (shutdown_marker&) {
|
||||||
|
_reserve_segments.abort(std::current_exception());
|
||||||
break;
|
break;
|
||||||
} catch (...) {
|
} catch (...) {
|
||||||
clogger.warn("Exception in segment reservation: {}", std::current_exception());
|
clogger.warn("Exception in segment reservation: {}", std::current_exception());
|
||||||
}
|
}
|
||||||
co_await sleep(100ms);
|
co_await sleep(100ms);
|
||||||
}
|
}
|
||||||
_reserve_segments.abort(std::make_exception_ptr(shutdown_marker()));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
future<std::vector<db::commitlog::descriptor>>
|
future<std::vector<db::commitlog::descriptor>>
|
||||||
|
|||||||
20
db/config.cc
20
db/config.cc
@@ -1201,13 +1201,13 @@ db::config::config(std::shared_ptr<db::extensions> exts)
|
|||||||
"* org.apache.cassandra.auth.CassandraRoleManager: Stores role data in the system_auth keyspace;\n"
|
"* org.apache.cassandra.auth.CassandraRoleManager: Stores role data in the system_auth keyspace;\n"
|
||||||
"* com.scylladb.auth.LDAPRoleManager: Fetches role data from an LDAP server.")
|
"* com.scylladb.auth.LDAPRoleManager: Fetches role data from an LDAP server.")
|
||||||
, permissions_validity_in_ms(this, "permissions_validity_in_ms", liveness::LiveUpdate, value_status::Used, 10000,
|
, permissions_validity_in_ms(this, "permissions_validity_in_ms", liveness::LiveUpdate, value_status::Used, 10000,
|
||||||
"How long authorized statements cache entries remain valid. The cached value is considered valid as long as both its value is not older than the permissions_validity_in_ms "
|
"How long permissions in cache remain valid. Depending on the authorizer, such as CassandraAuthorizer, fetching permissions can be resource intensive. Permissions caching is disabled when this property is set to 0 or when AllowAllAuthorizer is used. The cached value is considered valid as long as both its value is not older than the permissions_validity_in_ms "
|
||||||
"and the cached value has been read at least once during the permissions_validity_in_ms time frame. If any of these two conditions doesn't hold the cached value is going to be evicted from the cache.\n"
|
"and the cached value has been read at least once during the permissions_validity_in_ms time frame. If any of these two conditions doesn't hold the cached value is going to be evicted from the cache.\n"
|
||||||
"\n"
|
"\n"
|
||||||
"Related information: Object permissions")
|
"Related information: Object permissions")
|
||||||
, permissions_update_interval_in_ms(this, "permissions_update_interval_in_ms", liveness::LiveUpdate, value_status::Used, 2000,
|
, permissions_update_interval_in_ms(this, "permissions_update_interval_in_ms", liveness::LiveUpdate, value_status::Used, 2000,
|
||||||
"Refresh interval for authorized statements cache. After this interval, cache entries become eligible for refresh. An async reload is scheduled every permissions_update_interval_in_ms time period and the old value is returned until it completes. If permissions_validity_in_ms has a non-zero value, then this property must also have a non-zero value. It's recommended to set this value to be at least 3 times smaller than the permissions_validity_in_ms. This option additionally controls the permissions refresh interval for LDAP.")
|
"Refresh interval for permissions cache (if enabled). After this interval, cache entries become eligible for refresh. An async reload is scheduled every permissions_update_interval_in_ms time period and the old value is returned until it completes. If permissions_validity_in_ms has a non-zero value, then this property must also have a non-zero value. It's recommended to set this value to be at least 3 times smaller than the permissions_validity_in_ms.")
|
||||||
, permissions_cache_max_entries(this, "permissions_cache_max_entries", liveness::LiveUpdate, value_status::Unused, 1000,
|
, permissions_cache_max_entries(this, "permissions_cache_max_entries", liveness::LiveUpdate, value_status::Used, 1000,
|
||||||
"Maximum cached permission entries. Must have a non-zero value if permissions caching is enabled (see a permissions_validity_in_ms description).")
|
"Maximum cached permission entries. Must have a non-zero value if permissions caching is enabled (see a permissions_validity_in_ms description).")
|
||||||
, server_encryption_options(this, "server_encryption_options", value_status::Used, {/*none*/},
|
, server_encryption_options(this, "server_encryption_options", value_status::Used, {/*none*/},
|
||||||
"Enable or disable inter-node encryption. You must also generate keys and provide the appropriate key and trust store locations and passwords. The available options are:\n"
|
"Enable or disable inter-node encryption. You must also generate keys and provide the appropriate key and trust store locations and passwords. The available options are:\n"
|
||||||
@@ -1272,7 +1272,7 @@ db::config::config(std::shared_ptr<db::extensions> exts)
|
|||||||
, ignore_dead_nodes_for_replace(this, "ignore_dead_nodes_for_replace", value_status::Used, "", "List dead nodes to ignore for replace operation using a comma-separated list of host IDs. E.g., scylla --ignore-dead-nodes-for-replace 8d5ed9f4-7764-4dbd-bad8-43fddce94b7c,125ed9f4-7777-1dbn-mac8-43fddce9123e")
|
, ignore_dead_nodes_for_replace(this, "ignore_dead_nodes_for_replace", value_status::Used, "", "List dead nodes to ignore for replace operation using a comma-separated list of host IDs. E.g., scylla --ignore-dead-nodes-for-replace 8d5ed9f4-7764-4dbd-bad8-43fddce94b7c,125ed9f4-7777-1dbn-mac8-43fddce9123e")
|
||||||
, override_decommission(this, "override_decommission", value_status::Deprecated, false, "Set true to force a decommissioned node to join the cluster (cannot be set if consistent-cluster-management is enabled).")
|
, override_decommission(this, "override_decommission", value_status::Deprecated, false, "Set true to force a decommissioned node to join the cluster (cannot be set if consistent-cluster-management is enabled).")
|
||||||
, enable_repair_based_node_ops(this, "enable_repair_based_node_ops", liveness::LiveUpdate, value_status::Used, true, "Set true to use enable repair based node operations instead of streaming based.")
|
, enable_repair_based_node_ops(this, "enable_repair_based_node_ops", liveness::LiveUpdate, value_status::Used, true, "Set true to use enable repair based node operations instead of streaming based.")
|
||||||
, allowed_repair_based_node_ops(this, "allowed_repair_based_node_ops", liveness::LiveUpdate, value_status::Used, "replace,removenode,rebuild", "A comma separated list of node operations which are allowed to enable repair based node operations. The operations can be bootstrap, replace, removenode, decommission and rebuild.")
|
, allowed_repair_based_node_ops(this, "allowed_repair_based_node_ops", liveness::LiveUpdate, value_status::Used, "replace,removenode,rebuild,bootstrap,decommission", "A comma separated list of node operations which are allowed to enable repair based node operations. The operations can be bootstrap, replace, removenode, decommission and rebuild.")
|
||||||
, enable_compacting_data_for_streaming_and_repair(this, "enable_compacting_data_for_streaming_and_repair", liveness::LiveUpdate, value_status::Used, true, "Enable the compacting reader, which compacts the data for streaming and repair (load'n'stream included) before sending it to, or synchronizing it with peers. Can reduce the amount of data to be processed by removing dead data, but adds CPU overhead.")
|
, enable_compacting_data_for_streaming_and_repair(this, "enable_compacting_data_for_streaming_and_repair", liveness::LiveUpdate, value_status::Used, true, "Enable the compacting reader, which compacts the data for streaming and repair (load'n'stream included) before sending it to, or synchronizing it with peers. Can reduce the amount of data to be processed by removing dead data, but adds CPU overhead.")
|
||||||
, enable_tombstone_gc_for_streaming_and_repair(this, "enable_tombstone_gc_for_streaming_and_repair", liveness::LiveUpdate, value_status::Used, false,
|
, enable_tombstone_gc_for_streaming_and_repair(this, "enable_tombstone_gc_for_streaming_and_repair", liveness::LiveUpdate, value_status::Used, false,
|
||||||
"If the compacting reader is enabled for streaming and repair (see enable_compacting_data_for_streaming_and_repair), allow it to garbage-collect tombstones."
|
"If the compacting reader is enabled for streaming and repair (see enable_compacting_data_for_streaming_and_repair), allow it to garbage-collect tombstones."
|
||||||
@@ -1292,7 +1292,7 @@ db::config::config(std::shared_ptr<db::extensions> exts)
|
|||||||
, fd_initial_value_ms(this, "fd_initial_value_ms", value_status::Used, 2 * 1000, "The initial failure_detector interval time in milliseconds.")
|
, fd_initial_value_ms(this, "fd_initial_value_ms", value_status::Used, 2 * 1000, "The initial failure_detector interval time in milliseconds.")
|
||||||
, shutdown_announce_in_ms(this, "shutdown_announce_in_ms", value_status::Used, 2 * 1000, "Time a node waits after sending gossip shutdown message in milliseconds. Same as -Dcassandra.shutdown_announce_in_ms in cassandra.")
|
, shutdown_announce_in_ms(this, "shutdown_announce_in_ms", value_status::Used, 2 * 1000, "Time a node waits after sending gossip shutdown message in milliseconds. Same as -Dcassandra.shutdown_announce_in_ms in cassandra.")
|
||||||
, developer_mode(this, "developer_mode", value_status::Used, DEVELOPER_MODE_DEFAULT, "Relax environment checks. Setting to true can reduce performance and reliability significantly.")
|
, developer_mode(this, "developer_mode", value_status::Used, DEVELOPER_MODE_DEFAULT, "Relax environment checks. Setting to true can reduce performance and reliability significantly.")
|
||||||
, skip_wait_for_gossip_to_settle(this, "skip_wait_for_gossip_to_settle", value_status::Deprecated, -1, "An integer to configure the wait for gossip to settle. -1: wait normally, 0: do not wait at all, n: wait for at most n polls. Same as -Dcassandra.skip_wait_for_gossip_to_settle in cassandra.")
|
, skip_wait_for_gossip_to_settle(this, "skip_wait_for_gossip_to_settle", value_status::Used, -1, "An integer to configure the wait for gossip to settle. -1: wait normally, 0: do not wait at all, n: wait for at most n polls. Same as -Dcassandra.skip_wait_for_gossip_to_settle in cassandra.")
|
||||||
, force_gossip_generation(this, "force_gossip_generation", liveness::LiveUpdate, value_status::Used, -1 , "Force gossip to use the generation number provided by user.")
|
, force_gossip_generation(this, "force_gossip_generation", liveness::LiveUpdate, value_status::Used, -1 , "Force gossip to use the generation number provided by user.")
|
||||||
, experimental_features(this, "experimental_features", value_status::Used, {}, experimental_features_help_string())
|
, experimental_features(this, "experimental_features", value_status::Used, {}, experimental_features_help_string())
|
||||||
, lsa_reclamation_step(this, "lsa_reclamation_step", value_status::Used, 1, "Minimum number of segments to reclaim in a single step.")
|
, lsa_reclamation_step(this, "lsa_reclamation_step", value_status::Used, 1, "Minimum number of segments to reclaim in a single step.")
|
||||||
@@ -1498,7 +1498,7 @@ db::config::config(std::shared_ptr<db::extensions> exts)
|
|||||||
, index_cache_fraction(this, "index_cache_fraction", liveness::LiveUpdate, value_status::Used, 0.2,
|
, index_cache_fraction(this, "index_cache_fraction", liveness::LiveUpdate, value_status::Used, 0.2,
|
||||||
"The maximum fraction of cache memory permitted for use by index cache. Clamped to the [0.0; 1.0] range. Must be small enough to not deprive the row cache of memory, but should be big enough to fit a large fraction of the index. The default value 0.2 means that at least 80\% of cache memory is reserved for the row cache, while at most 20\% is usable by the index cache.")
|
"The maximum fraction of cache memory permitted for use by index cache. Clamped to the [0.0; 1.0] range. Must be small enough to not deprive the row cache of memory, but should be big enough to fit a large fraction of the index. The default value 0.2 means that at least 80\% of cache memory is reserved for the row cache, while at most 20\% is usable by the index cache.")
|
||||||
, consistent_cluster_management(this, "consistent_cluster_management", value_status::Deprecated, true, "Use RAFT for cluster management and DDL.")
|
, consistent_cluster_management(this, "consistent_cluster_management", value_status::Deprecated, true, "Use RAFT for cluster management and DDL.")
|
||||||
, force_gossip_topology_changes(this, "force_gossip_topology_changes", value_status::Deprecated, false, "Force gossip-based topology operations in a fresh cluster. Only the first node in the cluster must use it. The rest will fall back to gossip-based operations anyway. This option should be used only for testing. Note: gossip topology changes are incompatible with tablets.")
|
, force_gossip_topology_changes(this, "force_gossip_topology_changes", value_status::Used, false, "Force gossip-based topology operations in a fresh cluster. Only the first node in the cluster must use it. The rest will fall back to gossip-based operations anyway. This option should be used only for testing. Note: gossip topology changes are incompatible with tablets.")
|
||||||
, recovery_leader(this, "recovery_leader", liveness::LiveUpdate, value_status::Used, utils::null_uuid(), "Host ID of the node restarted first while performing the Manual Raft-based Recovery Procedure. Warning: this option disables some guardrails for the needs of the Manual Raft-based Recovery Procedure. Make sure you unset it at the end of the procedure.")
|
, recovery_leader(this, "recovery_leader", liveness::LiveUpdate, value_status::Used, utils::null_uuid(), "Host ID of the node restarted first while performing the Manual Raft-based Recovery Procedure. Warning: this option disables some guardrails for the needs of the Manual Raft-based Recovery Procedure. Make sure you unset it at the end of the procedure.")
|
||||||
, wasm_cache_memory_fraction(this, "wasm_cache_memory_fraction", value_status::Used, 0.01, "Maximum total size of all WASM instances stored in the cache as fraction of total shard memory.")
|
, wasm_cache_memory_fraction(this, "wasm_cache_memory_fraction", value_status::Used, 0.01, "Maximum total size of all WASM instances stored in the cache as fraction of total shard memory.")
|
||||||
, wasm_cache_timeout_in_ms(this, "wasm_cache_timeout_in_ms", value_status::Used, 5000, "Time after which an instance is evicted from the cache.")
|
, wasm_cache_timeout_in_ms(this, "wasm_cache_timeout_in_ms", value_status::Used, 5000, "Time after which an instance is evicted from the cache.")
|
||||||
@@ -1527,21 +1527,17 @@ db::config::config(std::shared_ptr<db::extensions> exts)
|
|||||||
"Allows target tablet size to be configured. Defaults to 5G (in bytes). Maintaining tablets at reasonable sizes is important to be able to " \
|
"Allows target tablet size to be configured. Defaults to 5G (in bytes). Maintaining tablets at reasonable sizes is important to be able to " \
|
||||||
"redistribute load. A higher value means tablet migration throughput can be reduced. A lower value may cause number of tablets to increase significantly, " \
|
"redistribute load. A higher value means tablet migration throughput can be reduced. A lower value may cause number of tablets to increase significantly, " \
|
||||||
"potentially resulting in performance drawbacks.")
|
"potentially resulting in performance drawbacks.")
|
||||||
, tablet_streaming_read_concurrency_per_shard(this, "tablet_streaming_read_concurrency_per_shard", liveness::LiveUpdate, value_status::Used, 2,
|
|
||||||
"Maximum number of tablets which may be leaving a shard at the same time. Effecting only on topology coordinator. Set to the same value on all nodes.")
|
|
||||||
, tablet_streaming_write_concurrency_per_shard(this, "tablet_streaming_write_concurrency_per_shard", liveness::LiveUpdate, value_status::Used, 2,
|
|
||||||
"Maximum number of tablets which may be pending on a shard at the same time. Effecting only on topology coordinator. Set to the same value on all nodes.")
|
|
||||||
, replication_strategy_warn_list(this, "replication_strategy_warn_list", liveness::LiveUpdate, value_status::Used, {locator::replication_strategy_type::simple}, "Controls which replication strategies to warn about when creating/altering a keyspace. Doesn't affect the pre-existing keyspaces.")
|
, replication_strategy_warn_list(this, "replication_strategy_warn_list", liveness::LiveUpdate, value_status::Used, {locator::replication_strategy_type::simple}, "Controls which replication strategies to warn about when creating/altering a keyspace. Doesn't affect the pre-existing keyspaces.")
|
||||||
, replication_strategy_fail_list(this, "replication_strategy_fail_list", liveness::LiveUpdate, value_status::Used, {}, "Controls which replication strategies are disallowed to be used when creating/altering a keyspace. Doesn't affect the pre-existing keyspaces.")
|
, replication_strategy_fail_list(this, "replication_strategy_fail_list", liveness::LiveUpdate, value_status::Used, {}, "Controls which replication strategies are disallowed to be used when creating/altering a keyspace. Doesn't affect the pre-existing keyspaces.")
|
||||||
, service_levels_interval(this, "service_levels_interval_ms", liveness::LiveUpdate, value_status::Used, 10000, "Controls how often service levels module polls configuration table")
|
, service_levels_interval(this, "service_levels_interval_ms", liveness::LiveUpdate, value_status::Used, 10000, "Controls how often service levels module polls configuration table")
|
||||||
|
|
||||||
, audit(this, "audit", value_status::Used, "table",
|
, audit(this, "audit", value_status::Used, "none",
|
||||||
"Controls the audit feature:\n"
|
"Controls the audit feature:\n"
|
||||||
"\n"
|
"\n"
|
||||||
"\tnone : No auditing enabled.\n"
|
"\tnone : No auditing enabled.\n"
|
||||||
"\tsyslog : Audit messages sent to Syslog.\n"
|
"\tsyslog : Audit messages sent to Syslog.\n"
|
||||||
"\ttable : Audit messages written to column family named audit.audit_log.\n")
|
"\ttable : Audit messages written to column family named audit.audit_log.\n")
|
||||||
, audit_categories(this, "audit_categories", liveness::LiveUpdate, value_status::Used, "DCL,AUTH,ADMIN", "Comma separated list of operation categories that should be audited.")
|
, audit_categories(this, "audit_categories", liveness::LiveUpdate, value_status::Used, "DCL,DDL,AUTH", "Comma separated list of operation categories that should be audited.")
|
||||||
, audit_tables(this, "audit_tables", liveness::LiveUpdate, value_status::Used, "", "Comma separated list of table names (<keyspace>.<table>) that will be audited.")
|
, audit_tables(this, "audit_tables", liveness::LiveUpdate, value_status::Used, "", "Comma separated list of table names (<keyspace>.<table>) that will be audited.")
|
||||||
, audit_keyspaces(this, "audit_keyspaces", liveness::LiveUpdate, value_status::Used, "", "Comma separated list of keyspaces that will be audited. All tables in those keyspaces will be audited")
|
, audit_keyspaces(this, "audit_keyspaces", liveness::LiveUpdate, value_status::Used, "", "Comma separated list of keyspaces that will be audited. All tables in those keyspaces will be audited")
|
||||||
, audit_unix_socket_path(this, "audit_unix_socket_path", value_status::Used, "/dev/log", "The path to the unix socket used for writing to syslog. Only applicable when audit is set to syslog.")
|
, audit_unix_socket_path(this, "audit_unix_socket_path", value_status::Used, "/dev/log", "The path to the unix socket used for writing to syslog. Only applicable when audit is set to syslog.")
|
||||||
|
|||||||
@@ -542,8 +542,6 @@ public:
|
|||||||
named_value<double> tablets_initial_scale_factor;
|
named_value<double> tablets_initial_scale_factor;
|
||||||
named_value<unsigned> tablets_per_shard_goal;
|
named_value<unsigned> tablets_per_shard_goal;
|
||||||
named_value<uint64_t> target_tablet_size_in_bytes;
|
named_value<uint64_t> target_tablet_size_in_bytes;
|
||||||
named_value<unsigned> tablet_streaming_read_concurrency_per_shard;
|
|
||||||
named_value<unsigned> tablet_streaming_write_concurrency_per_shard;
|
|
||||||
|
|
||||||
named_value<std::vector<enum_option<replication_strategy_restriction_t>>> replication_strategy_warn_list;
|
named_value<std::vector<enum_option<replication_strategy_restriction_t>>> replication_strategy_warn_list;
|
||||||
named_value<std::vector<enum_option<replication_strategy_restriction_t>>> replication_strategy_fail_list;
|
named_value<std::vector<enum_option<replication_strategy_restriction_t>>> replication_strategy_fail_list;
|
||||||
|
|||||||
@@ -16,7 +16,6 @@
|
|||||||
#include <string>
|
#include <string>
|
||||||
#include <tuple>
|
#include <tuple>
|
||||||
|
|
||||||
#include "cql3/cql3_type.hh"
|
|
||||||
#include "types/user.hh"
|
#include "types/user.hh"
|
||||||
#include "types/map.hh"
|
#include "types/map.hh"
|
||||||
#include "types/list.hh"
|
#include "types/list.hh"
|
||||||
@@ -114,7 +113,7 @@ std::vector<data_type> type_parser::get_type_parameters(bool multicell)
|
|||||||
throw parse_exception(_str, _idx, "unexpected end of string");
|
throw parse_exception(_str, _idx, "unexpected end of string");
|
||||||
}
|
}
|
||||||
|
|
||||||
std::tuple<data_type, vector_dimension_t> type_parser::get_vector_parameters()
|
std::tuple<data_type, size_t> type_parser::get_vector_parameters()
|
||||||
{
|
{
|
||||||
if (is_eos() || _str[_idx] != '(') {
|
if (is_eos() || _str[_idx] != '(') {
|
||||||
throw std::logic_error("internal error");
|
throw std::logic_error("internal error");
|
||||||
@@ -129,7 +128,7 @@ std::tuple<data_type, vector_dimension_t> type_parser::get_vector_parameters()
|
|||||||
}
|
}
|
||||||
|
|
||||||
data_type type = do_parse(true);
|
data_type type = do_parse(true);
|
||||||
vector_dimension_t size = 0;
|
size_t size = 0;
|
||||||
if (_str[_idx] == ',') {
|
if (_str[_idx] == ',') {
|
||||||
++_idx;
|
++_idx;
|
||||||
skip_blank();
|
skip_blank();
|
||||||
@@ -143,20 +142,7 @@ std::tuple<data_type, vector_dimension_t> type_parser::get_vector_parameters()
|
|||||||
throw parse_exception(_str, _idx, "expected digit or ')'");
|
throw parse_exception(_str, _idx, "expected digit or ')'");
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned long parsed_size;
|
size = std::stoul(_str.substr(i, _idx - i));
|
||||||
try {
|
|
||||||
parsed_size = std::stoul(_str.substr(i, _idx - i));
|
|
||||||
} catch (const std::exception& e) {
|
|
||||||
throw parse_exception(_str, i, format("Invalid vector dimension: {}", e.what()));
|
|
||||||
}
|
|
||||||
static_assert(sizeof(unsigned long) >= sizeof(vector_dimension_t));
|
|
||||||
if (parsed_size == 0) {
|
|
||||||
throw parse_exception(_str, _idx, "Vectors must have a dimension greater than 0");
|
|
||||||
}
|
|
||||||
if (parsed_size > cql3::cql3_type::MAX_VECTOR_DIMENSION) {
|
|
||||||
throw parse_exception(_str, _idx, format("Vectors must have a dimension less than or equal to {}", cql3::cql3_type::MAX_VECTOR_DIMENSION));
|
|
||||||
}
|
|
||||||
size = static_cast<vector_dimension_t>(parsed_size);
|
|
||||||
|
|
||||||
++_idx; // skipping ')'
|
++_idx; // skipping ')'
|
||||||
return std::make_tuple(type, size);
|
return std::make_tuple(type, size);
|
||||||
|
|||||||
@@ -97,7 +97,7 @@ public:
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
std::vector<data_type> get_type_parameters(bool multicell=true);
|
std::vector<data_type> get_type_parameters(bool multicell=true);
|
||||||
std::tuple<data_type, vector_dimension_t> get_vector_parameters();
|
std::tuple<data_type, size_t> get_vector_parameters();
|
||||||
std::tuple<sstring, bytes, std::vector<bytes>, std::vector<data_type>> get_user_type_parameters();
|
std::tuple<sstring, bytes, std::vector<bytes>, std::vector<data_type>> get_user_type_parameters();
|
||||||
data_type do_parse(bool multicell = true);
|
data_type do_parse(bool multicell = true);
|
||||||
|
|
||||||
|
|||||||
@@ -21,16 +21,14 @@
|
|||||||
#include "replica/database.hh"
|
#include "replica/database.hh"
|
||||||
#include "replica/global_table_ptr.hh"
|
#include "replica/global_table_ptr.hh"
|
||||||
#include "sstables/sstables_manager.hh"
|
#include "sstables/sstables_manager.hh"
|
||||||
#include "service/storage_proxy.hh"
|
|
||||||
|
|
||||||
logging::logger snap_log("snapshots");
|
logging::logger snap_log("snapshots");
|
||||||
|
|
||||||
namespace db {
|
namespace db {
|
||||||
|
|
||||||
snapshot_ctl::snapshot_ctl(sharded<replica::database>& db, sharded<service::storage_proxy>& sp, tasks::task_manager& tm, sstables::storage_manager& sstm, config cfg)
|
snapshot_ctl::snapshot_ctl(sharded<replica::database>& db, tasks::task_manager& tm, sstables::storage_manager& sstm, config cfg)
|
||||||
: _config(std::move(cfg))
|
: _config(std::move(cfg))
|
||||||
, _db(db)
|
, _db(db)
|
||||||
, _sp(sp)
|
|
||||||
, _ops("snapshot_ctl")
|
, _ops("snapshot_ctl")
|
||||||
, _task_manager_module(make_shared<snapshot::task_manager_module>(tm))
|
, _task_manager_module(make_shared<snapshot::task_manager_module>(tm))
|
||||||
, _storage_manager(sstm)
|
, _storage_manager(sstm)
|
||||||
@@ -106,45 +104,6 @@ future<> snapshot_ctl::take_column_family_snapshot(sstring ks_name, std::vector<
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
future<> snapshot_ctl::take_cluster_column_family_snapshot(std::vector<sstring> ks_names, std::vector<sstring> tables, sstring tag, snapshot_options opts) {
|
|
||||||
if (tag.empty()) {
|
|
||||||
throw std::invalid_argument("You must supply a snapshot name.");
|
|
||||||
}
|
|
||||||
if (ks_names.size() != 1 && !tables.empty()) {
|
|
||||||
throw std::invalid_argument("Cannot name tables when doing multiple keyspaces snapshot");
|
|
||||||
}
|
|
||||||
if (ks_names.empty()) {
|
|
||||||
std::ranges::copy(_db.local().get_keyspaces() | std::views::keys, std::back_inserter(ks_names));
|
|
||||||
}
|
|
||||||
|
|
||||||
return run_snapshot_modify_operation([this, ks_names = std::move(ks_names), tables = std::move(tables), tag = std::move(tag), opts] () mutable {
|
|
||||||
return do_take_cluster_column_family_snapshot(std::move(ks_names), std::move(tables), std::move(tag), opts);
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
future<> snapshot_ctl::do_take_cluster_column_family_snapshot(std::vector<sstring> ks_names, std::vector<sstring> tables, sstring tag, snapshot_options opts) {
|
|
||||||
if (tables.empty()) {
|
|
||||||
co_await coroutine::parallel_for_each(ks_names, [tag, this] (const auto& ks_name) {
|
|
||||||
return check_snapshot_not_exist(ks_name, tag);
|
|
||||||
});
|
|
||||||
co_await _sp.local().snapshot_keyspace(
|
|
||||||
ks_names | std::views::transform([&](auto& ks) { return std::make_pair(ks, sstring{}); })
|
|
||||||
| std::ranges::to<std::unordered_multimap>(),
|
|
||||||
tag, opts
|
|
||||||
);
|
|
||||||
co_return;
|
|
||||||
};
|
|
||||||
|
|
||||||
auto ks = ks_names[0];
|
|
||||||
co_await check_snapshot_not_exist(ks, tag, tables);
|
|
||||||
|
|
||||||
co_await _sp.local().snapshot_keyspace(
|
|
||||||
tables | std::views::transform([&](auto& cf) { return std::make_pair(ks, cf); })
|
|
||||||
| std::ranges::to<std::unordered_multimap>(),
|
|
||||||
tag, opts
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
future<> snapshot_ctl::do_take_column_family_snapshot(sstring ks_name, std::vector<sstring> tables, sstring tag, snapshot_options opts) {
|
future<> snapshot_ctl::do_take_column_family_snapshot(sstring ks_name, std::vector<sstring> tables, sstring tag, snapshot_options opts) {
|
||||||
co_await check_snapshot_not_exist(ks_name, tag, tables);
|
co_await check_snapshot_not_exist(ks_name, tag, tables);
|
||||||
co_await replica::database::snapshot_tables_on_all_shards(_db, ks_name, std::move(tables), std::move(tag), opts);
|
co_await replica::database::snapshot_tables_on_all_shards(_db, ks_name, std::move(tables), std::move(tag), opts);
|
||||||
|
|||||||
@@ -24,7 +24,6 @@
|
|||||||
using namespace seastar;
|
using namespace seastar;
|
||||||
|
|
||||||
namespace sstables { class storage_manager; }
|
namespace sstables { class storage_manager; }
|
||||||
namespace service { class storage_proxy; }
|
|
||||||
|
|
||||||
namespace db {
|
namespace db {
|
||||||
|
|
||||||
@@ -64,7 +63,7 @@ public:
|
|||||||
|
|
||||||
using db_snapshot_details = std::vector<table_snapshot_details_ext>;
|
using db_snapshot_details = std::vector<table_snapshot_details_ext>;
|
||||||
|
|
||||||
snapshot_ctl(sharded<replica::database>& db, sharded<service::storage_proxy>&, tasks::task_manager& tm, sstables::storage_manager& sstm, config cfg);
|
snapshot_ctl(sharded<replica::database>& db, tasks::task_manager& tm, sstables::storage_manager& sstm, config cfg);
|
||||||
|
|
||||||
future<> stop();
|
future<> stop();
|
||||||
|
|
||||||
@@ -96,17 +95,6 @@ public:
|
|||||||
*/
|
*/
|
||||||
future<> take_column_family_snapshot(sstring ks_name, std::vector<sstring> tables, sstring tag, snapshot_options opts = {});
|
future<> take_column_family_snapshot(sstring ks_name, std::vector<sstring> tables, sstring tag, snapshot_options opts = {});
|
||||||
|
|
||||||
/**
|
|
||||||
* Takes the snapshot of multiple tables or a whole keyspace, or all keyspaces,
|
|
||||||
* using global, clusterwide topology coordinated op.
|
|
||||||
* A snapshot name must be specified.
|
|
||||||
*
|
|
||||||
* @param ks_names the keyspaces to snapshot
|
|
||||||
* @param tables optional - a vector of tables names to snapshot
|
|
||||||
* @param tag the tag given to the snapshot; may not be null or empty
|
|
||||||
*/
|
|
||||||
future<> take_cluster_column_family_snapshot(std::vector<sstring> ks_names, std::vector<sstring> tables, sstring tag, snapshot_options opts = {});
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Remove the snapshot with the given name from the given keyspaces.
|
* Remove the snapshot with the given name from the given keyspaces.
|
||||||
* If no tag is specified we will remove all snapshots.
|
* If no tag is specified we will remove all snapshots.
|
||||||
@@ -123,7 +111,6 @@ public:
|
|||||||
private:
|
private:
|
||||||
config _config;
|
config _config;
|
||||||
sharded<replica::database>& _db;
|
sharded<replica::database>& _db;
|
||||||
sharded<service::storage_proxy>& _sp;
|
|
||||||
seastar::rwlock _lock;
|
seastar::rwlock _lock;
|
||||||
seastar::named_gate _ops;
|
seastar::named_gate _ops;
|
||||||
shared_ptr<snapshot::task_manager_module> _task_manager_module;
|
shared_ptr<snapshot::task_manager_module> _task_manager_module;
|
||||||
@@ -146,7 +133,6 @@ private:
|
|||||||
|
|
||||||
future<> do_take_snapshot(sstring tag, std::vector<sstring> keyspace_names, snapshot_options opts = {} );
|
future<> do_take_snapshot(sstring tag, std::vector<sstring> keyspace_names, snapshot_options opts = {} );
|
||||||
future<> do_take_column_family_snapshot(sstring ks_name, std::vector<sstring> tables, sstring tag, snapshot_options opts = {});
|
future<> do_take_column_family_snapshot(sstring ks_name, std::vector<sstring> tables, sstring tag, snapshot_options opts = {});
|
||||||
future<> do_take_cluster_column_family_snapshot(std::vector<sstring> ks_names, std::vector<sstring> tables, sstring tag, snapshot_options opts = {});
|
|
||||||
};
|
};
|
||||||
|
|
||||||
}
|
}
|
||||||
@@ -770,6 +770,13 @@ system_distributed_keyspace::get_cdc_desc_v1_timestamps(context ctx) {
|
|||||||
co_return res;
|
co_return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool system_distributed_keyspace::workload_prioritization_tables_exists() {
|
||||||
|
auto wp_table = get_updated_service_levels(_qp.db(), true);
|
||||||
|
auto table = _qp.db().try_find_table(NAME, wp_table->cf_name());
|
||||||
|
|
||||||
|
return table && table->schema()->equal_columns(*wp_table);
|
||||||
|
}
|
||||||
|
|
||||||
future<qos::service_levels_info> system_distributed_keyspace::get_service_levels(qos::query_context ctx) const {
|
future<qos::service_levels_info> system_distributed_keyspace::get_service_levels(qos::query_context ctx) const {
|
||||||
return qos::get_service_levels(_qp, NAME, SERVICE_LEVELS, db::consistency_level::ONE, ctx);
|
return qos::get_service_levels(_qp, NAME, SERVICE_LEVELS, db::consistency_level::ONE, ctx);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -117,6 +117,7 @@ public:
|
|||||||
future<qos::service_levels_info> get_service_level(sstring service_level_name) const;
|
future<qos::service_levels_info> get_service_level(sstring service_level_name) const;
|
||||||
future<> set_service_level(sstring service_level_name, qos::service_level_options slo) const;
|
future<> set_service_level(sstring service_level_name, qos::service_level_options slo) const;
|
||||||
future<> drop_service_level(sstring service_level_name) const;
|
future<> drop_service_level(sstring service_level_name) const;
|
||||||
|
bool workload_prioritization_tables_exists();
|
||||||
|
|
||||||
private:
|
private:
|
||||||
future<> create_tables(std::vector<schema_ptr> tables);
|
future<> create_tables(std::vector<schema_ptr> tables);
|
||||||
|
|||||||
@@ -335,10 +335,6 @@ schema_ptr system_keyspace::topology_requests() {
|
|||||||
.with_column("truncate_table_id", uuid_type)
|
.with_column("truncate_table_id", uuid_type)
|
||||||
.with_column("new_keyspace_rf_change_ks_name", utf8_type)
|
.with_column("new_keyspace_rf_change_ks_name", utf8_type)
|
||||||
.with_column("new_keyspace_rf_change_data", map_type_impl::get_instance(utf8_type, utf8_type, false))
|
.with_column("new_keyspace_rf_change_data", map_type_impl::get_instance(utf8_type, utf8_type, false))
|
||||||
.with_column("snapshot_table_ids", set_type_impl::get_instance(uuid_type, false))
|
|
||||||
.with_column("snapshot_tag", utf8_type)
|
|
||||||
.with_column("snapshot_expiry", timestamp_type)
|
|
||||||
.with_column("snapshot_skip_flush", boolean_type)
|
|
||||||
.set_comment("Topology request tracking")
|
.set_comment("Topology request tracking")
|
||||||
.with_hash_version()
|
.with_hash_version()
|
||||||
.build();
|
.build();
|
||||||
@@ -1718,9 +1714,7 @@ std::unordered_set<dht::token> decode_tokens(const set_type_impl::native_type& t
|
|||||||
std::unordered_set<dht::token> tset;
|
std::unordered_set<dht::token> tset;
|
||||||
for (auto& t: tokens) {
|
for (auto& t: tokens) {
|
||||||
auto str = value_cast<sstring>(t);
|
auto str = value_cast<sstring>(t);
|
||||||
if (str != dht::token::from_sstring(str).to_sstring()) {
|
SCYLLA_ASSERT(str == dht::token::from_sstring(str).to_sstring());
|
||||||
on_internal_error(slogger, format("decode_tokens: invalid token string '{}'", str));
|
|
||||||
}
|
|
||||||
tset.insert(dht::token::from_sstring(str));
|
tset.insert(dht::token::from_sstring(str));
|
||||||
}
|
}
|
||||||
return tset;
|
return tset;
|
||||||
@@ -3197,7 +3191,7 @@ future<service::topology> system_keyspace::load_topology_state(const std::unorde
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
} else if (must_have_tokens(nstate)) {
|
} else if (must_have_tokens(nstate)) {
|
||||||
on_internal_error(slogger, format(
|
on_fatal_internal_error(slogger, format(
|
||||||
"load_topology_state: node {} in {} state but missing ring slice", host_id, nstate));
|
"load_topology_state: node {} in {} state but missing ring slice", host_id, nstate));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -3279,7 +3273,7 @@ future<service::topology> system_keyspace::load_topology_state(const std::unorde
|
|||||||
// Currently, at most one node at a time can be in transitioning state.
|
// Currently, at most one node at a time can be in transitioning state.
|
||||||
if (!map->empty()) {
|
if (!map->empty()) {
|
||||||
const auto& [other_id, other_rs] = *map->begin();
|
const auto& [other_id, other_rs] = *map->begin();
|
||||||
on_internal_error(slogger, format(
|
on_fatal_internal_error(slogger, format(
|
||||||
"load_topology_state: found two nodes in transitioning state: {} in {} state and {} in {} state",
|
"load_topology_state: found two nodes in transitioning state: {} in {} state and {} in {} state",
|
||||||
other_id, other_rs.state, host_id, nstate));
|
other_id, other_rs.state, host_id, nstate));
|
||||||
}
|
}
|
||||||
@@ -3337,7 +3331,8 @@ future<service::topology> system_keyspace::load_topology_state(const std::unorde
|
|||||||
format("SELECT count(range_end) as cnt FROM {}.{} WHERE key = '{}' AND id = ?",
|
format("SELECT count(range_end) as cnt FROM {}.{} WHERE key = '{}' AND id = ?",
|
||||||
NAME, CDC_GENERATIONS_V3, cdc::CDC_GENERATIONS_V3_KEY),
|
NAME, CDC_GENERATIONS_V3, cdc::CDC_GENERATIONS_V3_KEY),
|
||||||
gen_id.id);
|
gen_id.id);
|
||||||
if (!gen_rows || gen_rows->empty()) {
|
SCYLLA_ASSERT(gen_rows);
|
||||||
|
if (gen_rows->empty()) {
|
||||||
on_internal_error(slogger, format(
|
on_internal_error(slogger, format(
|
||||||
"load_topology_state: last committed CDC generation time UUID ({}) present, but data missing", gen_id.id));
|
"load_topology_state: last committed CDC generation time UUID ({}) present, but data missing", gen_id.id));
|
||||||
}
|
}
|
||||||
@@ -3585,18 +3580,6 @@ system_keyspace::topology_requests_entry system_keyspace::topology_request_row_t
|
|||||||
entry.new_keyspace_rf_change_ks_name = row.get_as<sstring>("new_keyspace_rf_change_ks_name");
|
entry.new_keyspace_rf_change_ks_name = row.get_as<sstring>("new_keyspace_rf_change_ks_name");
|
||||||
entry.new_keyspace_rf_change_data = row.get_map<sstring,sstring>("new_keyspace_rf_change_data");
|
entry.new_keyspace_rf_change_data = row.get_map<sstring,sstring>("new_keyspace_rf_change_data");
|
||||||
}
|
}
|
||||||
if (row.has("snapshot_table_ids")) {
|
|
||||||
entry.snapshot_tag = row.get_as<sstring>("snapshot_tag");
|
|
||||||
entry.snapshot_skip_flush = row.get_as<bool>("snapshot_skip_flush");
|
|
||||||
entry.snapshot_table_ids = row.get_set<utils::UUID>("snapshot_table_ids")
|
|
||||||
| std::views::transform([](auto& uuid) { return table_id(uuid); })
|
|
||||||
| std::ranges::to<std::unordered_set>()
|
|
||||||
;
|
|
||||||
;
|
|
||||||
if (row.has("snapshot_expiry")) {
|
|
||||||
entry.snapshot_expiry = row.get_as<db_clock::time_point>("snapshot_expiry");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return entry;
|
return entry;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -215,8 +215,6 @@ public:
|
|||||||
static constexpr auto BUILT_VIEWS = "built_views";
|
static constexpr auto BUILT_VIEWS = "built_views";
|
||||||
static constexpr auto SCYLLA_VIEWS_BUILDS_IN_PROGRESS = "scylla_views_builds_in_progress";
|
static constexpr auto SCYLLA_VIEWS_BUILDS_IN_PROGRESS = "scylla_views_builds_in_progress";
|
||||||
static constexpr auto CDC_LOCAL = "cdc_local";
|
static constexpr auto CDC_LOCAL = "cdc_local";
|
||||||
static constexpr auto CDC_TIMESTAMPS = "cdc_timestamps";
|
|
||||||
static constexpr auto CDC_STREAMS = "cdc_streams";
|
|
||||||
|
|
||||||
// auth
|
// auth
|
||||||
static constexpr auto ROLES = "roles";
|
static constexpr auto ROLES = "roles";
|
||||||
@@ -417,10 +415,6 @@ public:
|
|||||||
std::optional<sstring> new_keyspace_rf_change_ks_name;
|
std::optional<sstring> new_keyspace_rf_change_ks_name;
|
||||||
// The KS options to be used when executing the scheduled ALTER KS statement
|
// The KS options to be used when executing the scheduled ALTER KS statement
|
||||||
std::optional<std::unordered_map<sstring, sstring>> new_keyspace_rf_change_data;
|
std::optional<std::unordered_map<sstring, sstring>> new_keyspace_rf_change_data;
|
||||||
std::optional<std::unordered_set<table_id>> snapshot_table_ids;
|
|
||||||
std::optional<sstring> snapshot_tag;
|
|
||||||
std::optional<db_clock::time_point> snapshot_expiry;
|
|
||||||
bool snapshot_skip_flush;
|
|
||||||
};
|
};
|
||||||
using topology_requests_entries = std::unordered_map<utils::UUID, system_keyspace::topology_requests_entry>;
|
using topology_requests_entries = std::unordered_map<utils::UUID, system_keyspace::topology_requests_entry>;
|
||||||
|
|
||||||
|
|||||||
@@ -2308,7 +2308,6 @@ future<> view_builder::drain() {
|
|||||||
vlogger.info("Draining view builder");
|
vlogger.info("Draining view builder");
|
||||||
_as.request_abort();
|
_as.request_abort();
|
||||||
co_await _mnotifier.unregister_listener(this);
|
co_await _mnotifier.unregister_listener(this);
|
||||||
co_await _ops_gate.close();
|
|
||||||
co_await _vug.drain();
|
co_await _vug.drain();
|
||||||
co_await _sem.wait();
|
co_await _sem.wait();
|
||||||
_sem.broken();
|
_sem.broken();
|
||||||
@@ -2743,24 +2742,22 @@ void view_builder::on_create_view(const sstring& ks_name, const sstring& view_na
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Do it in the background, serialized and broadcast from shard 0.
|
// Do it in the background, serialized and broadcast from shard 0.
|
||||||
static_cast<void>(with_gate(_ops_gate, [this, ks_name = ks_name, view_name = view_name] () mutable {
|
static_cast<void>(dispatch_create_view(ks_name, view_name).handle_exception([ks_name, view_name] (std::exception_ptr ep) {
|
||||||
return dispatch_create_view(std::move(ks_name), std::move(view_name));
|
|
||||||
}).handle_exception([ks_name, view_name] (std::exception_ptr ep) {
|
|
||||||
vlogger.warn("Failed to dispatch view creation {}.{}: {}", ks_name, view_name, ep);
|
vlogger.warn("Failed to dispatch view creation {}.{}: {}", ks_name, view_name, ep);
|
||||||
}));
|
}));
|
||||||
}
|
}
|
||||||
|
|
||||||
future<> view_builder::dispatch_update_view(sstring ks_name, sstring view_name) {
|
void view_builder::on_update_view(const sstring& ks_name, const sstring& view_name, bool) {
|
||||||
if (should_ignore_tablet_keyspace(_db, ks_name)) {
|
if (should_ignore_tablet_keyspace(_db, ks_name)) {
|
||||||
co_return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
[[maybe_unused]] auto sem_units = co_await get_or_adopt_view_builder_lock(std::nullopt);
|
// Do it in the background, serialized.
|
||||||
|
(void)with_semaphore(_sem, view_builder_semaphore_units, [ks_name, view_name, this] {
|
||||||
auto view = view_ptr(_db.find_schema(ks_name, view_name));
|
auto view = view_ptr(_db.find_schema(ks_name, view_name));
|
||||||
auto step_it = _base_to_build_step.find(view->view_info()->base_id());
|
auto step_it = _base_to_build_step.find(view->view_info()->base_id());
|
||||||
if (step_it == _base_to_build_step.end()) {
|
if (step_it == _base_to_build_step.end()) {
|
||||||
co_return; // In case all the views for this CF have finished building already.
|
return;// In case all the views for this CF have finished building already.
|
||||||
}
|
}
|
||||||
auto status_it = std::ranges::find_if(step_it->second.build_status, [view] (const view_build_status& bs) {
|
auto status_it = std::ranges::find_if(step_it->second.build_status, [view] (const view_build_status& bs) {
|
||||||
return bs.view->id() == view->id();
|
return bs.view->id() == view->id();
|
||||||
@@ -2768,23 +2765,7 @@ future<> view_builder::dispatch_update_view(sstring ks_name, sstring view_name)
|
|||||||
if (status_it != step_it->second.build_status.end()) {
|
if (status_it != step_it->second.build_status.end()) {
|
||||||
status_it->view = std::move(view);
|
status_it->view = std::move(view);
|
||||||
}
|
}
|
||||||
}
|
}).handle_exception_type([] (replica::no_such_column_family&) { });
|
||||||
|
|
||||||
void view_builder::on_update_view(const sstring& ks_name, const sstring& view_name, bool) {
|
|
||||||
// Do it in the background, serialized.
|
|
||||||
static_cast<void>(with_gate(_ops_gate, [this, ks_name = ks_name, view_name = view_name] () mutable {
|
|
||||||
return dispatch_update_view(std::move(ks_name), std::move(view_name));
|
|
||||||
}).handle_exception([ks_name, view_name] (std::exception_ptr ep) {
|
|
||||||
try {
|
|
||||||
std::rethrow_exception(ep);
|
|
||||||
} catch (const seastar::gate_closed_exception&) {
|
|
||||||
vlogger.warn("Ignoring gate_closed_exception during view update {}.{}", ks_name, view_name);
|
|
||||||
} catch (const seastar::broken_named_semaphore&) {
|
|
||||||
vlogger.warn("Ignoring broken_named_semaphore during view update {}.{}", ks_name, view_name);
|
|
||||||
} catch (const replica::no_such_column_family&) {
|
|
||||||
vlogger.warn("Ignoring no_such_column_family during view update {}.{}", ks_name, view_name);
|
|
||||||
}
|
|
||||||
}));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
future<> view_builder::dispatch_drop_view(sstring ks_name, sstring view_name) {
|
future<> view_builder::dispatch_drop_view(sstring ks_name, sstring view_name) {
|
||||||
@@ -2846,9 +2827,7 @@ void view_builder::on_drop_view(const sstring& ks_name, const sstring& view_name
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Do it in the background, serialized and broadcast from shard 0.
|
// Do it in the background, serialized and broadcast from shard 0.
|
||||||
static_cast<void>(with_gate(_ops_gate, [this, ks_name = ks_name, view_name = view_name] () mutable {
|
static_cast<void>(dispatch_drop_view(ks_name, view_name).handle_exception([ks_name, view_name] (std::exception_ptr ep) {
|
||||||
return dispatch_drop_view(std::move(ks_name), std::move(view_name));
|
|
||||||
}).handle_exception([ks_name, view_name] (std::exception_ptr ep) {
|
|
||||||
vlogger.warn("Failed to dispatch view drop {}.{}: {}", ks_name, view_name, ep);
|
vlogger.warn("Failed to dispatch view drop {}.{}: {}", ks_name, view_name, ep);
|
||||||
}));
|
}));
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -16,7 +16,6 @@
|
|||||||
|
|
||||||
#include <seastar/core/abort_source.hh>
|
#include <seastar/core/abort_source.hh>
|
||||||
#include <seastar/core/future.hh>
|
#include <seastar/core/future.hh>
|
||||||
#include <seastar/core/gate.hh>
|
|
||||||
#include <seastar/core/semaphore.hh>
|
#include <seastar/core/semaphore.hh>
|
||||||
#include <seastar/core/condition-variable.hh>
|
#include <seastar/core/condition-variable.hh>
|
||||||
#include <seastar/core/sharded.hh>
|
#include <seastar/core/sharded.hh>
|
||||||
@@ -191,7 +190,6 @@ class view_builder final : public service::migration_listener::only_view_notific
|
|||||||
// Guard the whole startup routine with a semaphore so that it's not intercepted by
|
// Guard the whole startup routine with a semaphore so that it's not intercepted by
|
||||||
// `on_drop_view`, `on_create_view`, or `on_update_view` events.
|
// `on_drop_view`, `on_create_view`, or `on_update_view` events.
|
||||||
seastar::named_semaphore _sem{view_builder_semaphore_units, named_semaphore_exception_factory{"view builder"}};
|
seastar::named_semaphore _sem{view_builder_semaphore_units, named_semaphore_exception_factory{"view builder"}};
|
||||||
seastar::gate _ops_gate;
|
|
||||||
seastar::abort_source _as;
|
seastar::abort_source _as;
|
||||||
future<> _step_fiber = make_ready_future<>();
|
future<> _step_fiber = make_ready_future<>();
|
||||||
// Used to coordinate between shards the conclusion of the build process for a particular view.
|
// Used to coordinate between shards the conclusion of the build process for a particular view.
|
||||||
@@ -286,7 +284,6 @@ private:
|
|||||||
future<> mark_as_built(view_ptr);
|
future<> mark_as_built(view_ptr);
|
||||||
void setup_metrics();
|
void setup_metrics();
|
||||||
future<> dispatch_create_view(sstring ks_name, sstring view_name);
|
future<> dispatch_create_view(sstring ks_name, sstring view_name);
|
||||||
future<> dispatch_update_view(sstring ks_name, sstring view_name);
|
|
||||||
future<> dispatch_drop_view(sstring ks_name, sstring view_name);
|
future<> dispatch_drop_view(sstring ks_name, sstring view_name);
|
||||||
future<> handle_seed_view_build_progress(const sstring& ks_name, const sstring& view_name);
|
future<> handle_seed_view_build_progress(const sstring& ks_name, const sstring& view_name);
|
||||||
future<> handle_create_view_local(const sstring& ks_name, const sstring& view_name, view_builder_units_opt units);
|
future<> handle_create_view_local(const sstring& ks_name, const sstring& view_name, view_builder_units_opt units);
|
||||||
|
|||||||
@@ -588,7 +588,11 @@ future<> view_building_worker::do_build_range(table_id base_id, std::vector<tabl
|
|||||||
utils::get_local_injector().inject("do_build_range_fail",
|
utils::get_local_injector().inject("do_build_range_fail",
|
||||||
[] { throw std::runtime_error("do_build_range failed due to error injection"); });
|
[] { throw std::runtime_error("do_build_range failed due to error injection"); });
|
||||||
|
|
||||||
return seastar::async([this, base_id, views_ids = std::move(views_ids), last_token, &as] {
|
// Run the view building in the streaming scheduling group
|
||||||
|
// so that it doesn't impact other tasks with higher priority.
|
||||||
|
seastar::thread_attributes attr;
|
||||||
|
attr.sched_group = _db.get_streaming_scheduling_group();
|
||||||
|
return seastar::async(std::move(attr), [this, base_id, views_ids = std::move(views_ids), last_token, &as] {
|
||||||
gc_clock::time_point now = gc_clock::now();
|
gc_clock::time_point now = gc_clock::now();
|
||||||
auto base_cf = _db.find_column_family(base_id).shared_from_this();
|
auto base_cf = _db.find_column_family(base_id).shared_from_this();
|
||||||
reader_permit permit = _db.get_reader_concurrency_semaphore().make_tracking_only_permit(nullptr, "build_views_range", db::no_timeout, {});
|
reader_permit permit = _db.get_reader_concurrency_semaphore().make_tracking_only_permit(nullptr, "build_views_range", db::no_timeout, {});
|
||||||
|
|||||||
@@ -67,7 +67,6 @@ public:
|
|||||||
return schema_builder(system_keyspace::NAME, "cluster_status", std::make_optional(id))
|
return schema_builder(system_keyspace::NAME, "cluster_status", std::make_optional(id))
|
||||||
.with_column("peer", inet_addr_type, column_kind::partition_key)
|
.with_column("peer", inet_addr_type, column_kind::partition_key)
|
||||||
.with_column("dc", utf8_type)
|
.with_column("dc", utf8_type)
|
||||||
.with_column("rack", utf8_type)
|
|
||||||
.with_column("up", boolean_type)
|
.with_column("up", boolean_type)
|
||||||
.with_column("draining", boolean_type)
|
.with_column("draining", boolean_type)
|
||||||
.with_column("excluded", boolean_type)
|
.with_column("excluded", boolean_type)
|
||||||
@@ -98,22 +97,21 @@ public:
|
|||||||
auto hostid = eps.get_host_id();
|
auto hostid = eps.get_host_id();
|
||||||
|
|
||||||
set_cell(cr, "up", gossiper.is_alive(hostid));
|
set_cell(cr, "up", gossiper.is_alive(hostid));
|
||||||
if (gossiper.is_shutdown(endpoint)) {
|
if (!ss.raft_topology_change_enabled() || gossiper.is_shutdown(endpoint)) {
|
||||||
set_cell(cr, "status", gossiper.get_gossip_status(endpoint));
|
set_cell(cr, "status", gossiper.get_gossip_status(endpoint));
|
||||||
} else {
|
|
||||||
set_cell(cr, "status", boost::to_upper_copy<std::string>(fmt::format("{}", ss.get_node_state(hostid))));
|
|
||||||
}
|
}
|
||||||
set_cell(cr, "load", gossiper.get_application_state_value(endpoint, gms::application_state::LOAD));
|
set_cell(cr, "load", gossiper.get_application_state_value(endpoint, gms::application_state::LOAD));
|
||||||
|
|
||||||
|
if (ss.raft_topology_change_enabled() && !gossiper.is_shutdown(endpoint)) {
|
||||||
|
set_cell(cr, "status", boost::to_upper_copy<std::string>(fmt::format("{}", ss.get_node_state(hostid))));
|
||||||
|
}
|
||||||
set_cell(cr, "host_id", hostid.uuid());
|
set_cell(cr, "host_id", hostid.uuid());
|
||||||
|
|
||||||
if (tm.get_topology().has_node(hostid)) {
|
if (tm.get_topology().has_node(hostid)) {
|
||||||
// Not all entries in gossiper are present in the topology
|
// Not all entries in gossiper are present in the topology
|
||||||
auto& node = tm.get_topology().get_node(hostid);
|
auto& node = tm.get_topology().get_node(hostid);
|
||||||
sstring dc = node.dc_rack().dc;
|
sstring dc = node.dc_rack().dc;
|
||||||
sstring rack = node.dc_rack().rack;
|
|
||||||
set_cell(cr, "dc", dc);
|
set_cell(cr, "dc", dc);
|
||||||
set_cell(cr, "rack", rack);
|
|
||||||
set_cell(cr, "draining", node.is_draining());
|
set_cell(cr, "draining", node.is_draining());
|
||||||
set_cell(cr, "excluded", node.is_excluded());
|
set_cell(cr, "excluded", node.is_excluded());
|
||||||
}
|
}
|
||||||
@@ -1347,8 +1345,8 @@ public:
|
|||||||
|
|
||||||
private:
|
private:
|
||||||
static schema_ptr build_schema() {
|
static schema_ptr build_schema() {
|
||||||
auto id = generate_legacy_id(system_keyspace::NAME, system_keyspace::CDC_TIMESTAMPS);
|
auto id = generate_legacy_id(system_keyspace::NAME, "cdc_timestamps");
|
||||||
return schema_builder(system_keyspace::NAME, system_keyspace::CDC_TIMESTAMPS, std::make_optional(id))
|
return schema_builder(system_keyspace::NAME, "cdc_timestamps", std::make_optional(id))
|
||||||
.with_column("keyspace_name", utf8_type, column_kind::partition_key)
|
.with_column("keyspace_name", utf8_type, column_kind::partition_key)
|
||||||
.with_column("table_name", utf8_type, column_kind::partition_key)
|
.with_column("table_name", utf8_type, column_kind::partition_key)
|
||||||
.with_column("timestamp", reversed_type_impl::get_instance(timestamp_type), column_kind::clustering_key)
|
.with_column("timestamp", reversed_type_impl::get_instance(timestamp_type), column_kind::clustering_key)
|
||||||
@@ -1430,8 +1428,8 @@ public:
|
|||||||
}
|
}
|
||||||
private:
|
private:
|
||||||
static schema_ptr build_schema() {
|
static schema_ptr build_schema() {
|
||||||
auto id = generate_legacy_id(system_keyspace::NAME, system_keyspace::CDC_STREAMS);
|
auto id = generate_legacy_id(system_keyspace::NAME, "cdc_streams");
|
||||||
return schema_builder(system_keyspace::NAME, system_keyspace::CDC_STREAMS, std::make_optional(id))
|
return schema_builder(system_keyspace::NAME, "cdc_streams", std::make_optional(id))
|
||||||
.with_column("keyspace_name", utf8_type, column_kind::partition_key)
|
.with_column("keyspace_name", utf8_type, column_kind::partition_key)
|
||||||
.with_column("table_name", utf8_type, column_kind::partition_key)
|
.with_column("table_name", utf8_type, column_kind::partition_key)
|
||||||
.with_column("timestamp", timestamp_type, column_kind::clustering_key)
|
.with_column("timestamp", timestamp_type, column_kind::clustering_key)
|
||||||
|
|||||||
1
debug.cc
1
debug.cc
@@ -12,6 +12,5 @@ namespace debug {
|
|||||||
|
|
||||||
seastar::sharded<replica::database>* volatile the_database = nullptr;
|
seastar::sharded<replica::database>* volatile the_database = nullptr;
|
||||||
seastar::scheduling_group streaming_scheduling_group;
|
seastar::scheduling_group streaming_scheduling_group;
|
||||||
seastar::scheduling_group gossip_scheduling_group;
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
1
debug.hh
1
debug.hh
@@ -18,7 +18,6 @@ namespace debug {
|
|||||||
|
|
||||||
extern seastar::sharded<replica::database>* volatile the_database;
|
extern seastar::sharded<replica::database>* volatile the_database;
|
||||||
extern seastar::scheduling_group streaming_scheduling_group;
|
extern seastar::scheduling_group streaming_scheduling_group;
|
||||||
extern seastar::scheduling_group gossip_scheduling_group;
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
2
dist/docker/redhat/README.md
vendored
2
dist/docker/redhat/README.md
vendored
@@ -12,7 +12,7 @@ Do the following in the top-level Scylla source directory:
|
|||||||
2. Run `ninja dist-dev` (with the same mode name as above) to prepare
|
2. Run `ninja dist-dev` (with the same mode name as above) to prepare
|
||||||
the distribution artifacts.
|
the distribution artifacts.
|
||||||
|
|
||||||
3. Run `./dist/docker/redhat/build_docker.sh --mode dev`
|
3. Run `./dist/docker/debian/build_docker.sh --mode dev`
|
||||||
|
|
||||||
This creates a docker image as a **file**, in the OCI format, and prints
|
This creates a docker image as a **file**, in the OCI format, and prints
|
||||||
its name, looking something like:
|
its name, looking something like:
|
||||||
|
|||||||
8
dist/docker/redhat/build_docker.sh
vendored
8
dist/docker/redhat/build_docker.sh
vendored
@@ -70,7 +70,7 @@ bcp() { buildah copy "$container" "$@"; }
|
|||||||
run() { buildah run "$container" "$@"; }
|
run() { buildah run "$container" "$@"; }
|
||||||
bconfig() { buildah config "$@" "$container"; }
|
bconfig() { buildah config "$@" "$container"; }
|
||||||
|
|
||||||
container="$(buildah from --pull=always docker.io/redhat/ubi9-minimal:latest)"
|
container="$(buildah from docker.io/redhat/ubi9-minimal:latest)"
|
||||||
|
|
||||||
packages=(
|
packages=(
|
||||||
"build/dist/$config/redhat/RPMS/$arch/$product-$version-$release.$arch.rpm"
|
"build/dist/$config/redhat/RPMS/$arch/$product-$version-$release.$arch.rpm"
|
||||||
@@ -97,9 +97,7 @@ bcp LICENSE-ScyllaDB-Source-Available.md /licenses/
|
|||||||
|
|
||||||
run microdnf clean all
|
run microdnf clean all
|
||||||
run microdnf --setopt=tsflags=nodocs -y update
|
run microdnf --setopt=tsflags=nodocs -y update
|
||||||
run microdnf --setopt=tsflags=nodocs -y install hostname kmod procps-ng python3 python3-pip cpio
|
run microdnf --setopt=tsflags=nodocs -y install hostname kmod procps-ng python3 python3-pip
|
||||||
# Extract only systemctl binary from systemd package to avoid installing the whole systemd in the container.
|
|
||||||
run bash -rc "microdnf download systemd && rpm2cpio systemd-*.rpm | cpio -idmv ./usr/bin/systemctl && rm -rf systemd-*.rpm"
|
|
||||||
run curl -L --output /etc/yum.repos.d/scylla.repo ${repo_file_url}
|
run curl -L --output /etc/yum.repos.d/scylla.repo ${repo_file_url}
|
||||||
run pip3 install --no-cache-dir --prefix /usr supervisor
|
run pip3 install --no-cache-dir --prefix /usr supervisor
|
||||||
run bash -ec "echo LANG=C.UTF-8 > /etc/locale.conf"
|
run bash -ec "echo LANG=C.UTF-8 > /etc/locale.conf"
|
||||||
@@ -108,8 +106,6 @@ run bash -ec "cat /scylla_bashrc >> /etc/bash.bashrc"
|
|||||||
run mkdir -p /var/log/scylla
|
run mkdir -p /var/log/scylla
|
||||||
run chown -R scylla:scylla /var/lib/scylla
|
run chown -R scylla:scylla /var/lib/scylla
|
||||||
run sed -i -e 's/^SCYLLA_ARGS=".*"$/SCYLLA_ARGS="--log-to-syslog 0 --log-to-stdout 1 --network-stack posix"/' /etc/sysconfig/scylla-server
|
run sed -i -e 's/^SCYLLA_ARGS=".*"$/SCYLLA_ARGS="--log-to-syslog 0 --log-to-stdout 1 --network-stack posix"/' /etc/sysconfig/scylla-server
|
||||||
# Cleanup packages not needed in the final image and clean package manager cache to reduce image size.
|
|
||||||
run bash -rc "microdnf remove -y cpio && microdnf clean all"
|
|
||||||
|
|
||||||
run mkdir -p /opt/scylladb/supervisor
|
run mkdir -p /opt/scylladb/supervisor
|
||||||
run touch /opt/scylladb/SCYLLA-CONTAINER-FILE
|
run touch /opt/scylladb/SCYLLA-CONTAINER-FILE
|
||||||
|
|||||||
@@ -1,10 +1,6 @@
|
|||||||
### a dictionary of redirections
|
### a dictionary of redirections
|
||||||
#old path: new path
|
#old path: new path
|
||||||
|
|
||||||
# Move the OS Support page
|
|
||||||
|
|
||||||
/stable/getting-started/os-support.html: https://docs.scylladb.com/stable/versioning/os-support-per-version.html
|
|
||||||
|
|
||||||
# Remove an outdated KB
|
# Remove an outdated KB
|
||||||
|
|
||||||
/stable/kb/perftune-modes-sync.html: /stable/kb/index.html
|
/stable/kb/perftune-modes-sync.html: /stable/kb/index.html
|
||||||
|
|||||||
@@ -142,6 +142,10 @@ want modify a non-top-level attribute directly (e.g., a.b[3].c) need RMW:
|
|||||||
Alternator implements such requests by reading the entire top-level
|
Alternator implements such requests by reading the entire top-level
|
||||||
attribute a, modifying only a.b[3].c, and then writing back a.
|
attribute a, modifying only a.b[3].c, and then writing back a.
|
||||||
|
|
||||||
|
Currently, Alternator doesn't use Tablets. That's because Alternator relies
|
||||||
|
on LWT (lightweight transactions), and LWT is not supported in keyspaces
|
||||||
|
with Tablets enabled.
|
||||||
|
|
||||||
```{eval-rst}
|
```{eval-rst}
|
||||||
.. toctree::
|
.. toctree::
|
||||||
:maxdepth: 2
|
:maxdepth: 2
|
||||||
|
|||||||
@@ -187,23 +187,6 @@ You can create a keyspace with tablets enabled with the ``tablets = {'enabled':
|
|||||||
the keyspace schema with ``tablets = { 'enabled': false }`` or
|
the keyspace schema with ``tablets = { 'enabled': false }`` or
|
||||||
``tablets = { 'enabled': true }``.
|
``tablets = { 'enabled': true }``.
|
||||||
|
|
||||||
.. _keyspace-rf-rack-valid-to-enforce-rack-list:
|
|
||||||
|
|
||||||
Enforcing Rack-List Replication for Tablet Keyspaces
|
|
||||||
------------------------------------------------------------------
|
|
||||||
|
|
||||||
The ``rf_rack_valid_keyspaces`` is a legacy option that ensures that all keyspaces with tablets enabled are
|
|
||||||
:term:`RF-rack-valid <RF-rack-valid keyspace>`.
|
|
||||||
|
|
||||||
Requiring every tablet keyspace to use the rack list replication factor exclusively is enough to guarantee the keyspace is
|
|
||||||
:term:`RF-rack-valid <RF-rack-valid keyspace>`. It reduces restrictions and provides stronger guarantees compared
|
|
||||||
to ``rf_rack_valid_keyspaces`` option.
|
|
||||||
|
|
||||||
To enforce rack list in tablet keyspaces, use ``enforce_rack_list`` option. It can be set only if all tablet keyspaces use
|
|
||||||
rack list. To ensure that, follow a procedure of :ref:`conversion to rack list replication factor <conversion-to-rack-list-rf>`.
|
|
||||||
After that restart all nodes in the cluster, with ``enforce_rack_list`` enabled and ``rf_rack_valid_keyspaces`` disabled. Make
|
|
||||||
sure to avoid setting or updating replication factor (with CREATE KEYSPACE or ALTER KEYSPACE) while nodes are being restarted.
|
|
||||||
|
|
||||||
.. _tablets-limitations:
|
.. _tablets-limitations:
|
||||||
|
|
||||||
Limitations and Unsupported Features
|
Limitations and Unsupported Features
|
||||||
|
|||||||
@@ -402,82 +402,3 @@ it also describes authentication/authorization and service levels. Additionally,
|
|||||||
statement: `DESCRIBE SCHEMA WITH INTERNALS AND PASSWORDS`, which also includes the information about hashed passwords of the roles.
|
statement: `DESCRIBE SCHEMA WITH INTERNALS AND PASSWORDS`, which also includes the information about hashed passwords of the roles.
|
||||||
|
|
||||||
For more details, see [the article on DESCRIBE SCHEMA](./describe-schema.rst).
|
For more details, see [the article on DESCRIBE SCHEMA](./describe-schema.rst).
|
||||||
|
|
||||||
## Per-row TTL
|
|
||||||
|
|
||||||
CQL's traditional time-to-live (TTL) feature attaches an expiration time to
|
|
||||||
each cell - i.e., each value in each column. For example, the statement:
|
|
||||||
```
|
|
||||||
UPDATE tbl USING TTL 60 SET x = 1 WHERE p = 2
|
|
||||||
```
|
|
||||||
Sets a new value for the column `x` in row `p = 2`, and asks for this value to
|
|
||||||
expire in 60 seconds. When a row is updated incrementally, with different
|
|
||||||
columns set at different times, this can result in different pieces of the
|
|
||||||
row expiring at different times. Applications rarely want partially-expired
|
|
||||||
rows, so they often need to re-write an entire row each time the row needs
|
|
||||||
updating. In particular, it is not possible to change the expiration time of
|
|
||||||
an existing row without re-writing it.
|
|
||||||
|
|
||||||
Per-row time-to-live (TTL) is a new CQL feature that is an alternative to
|
|
||||||
the traditional per-cell TTL. One column is designated as the "expiration
|
|
||||||
time" column, and the value of this column determines when the entire row
|
|
||||||
will expire. It becomes possible to update pieces of a row without changing
|
|
||||||
its expiration time, and vice versa - to change a row's expiration time
|
|
||||||
without rewriting its data.
|
|
||||||
|
|
||||||
The expiration-time column of a table can be chosen when it is created by
|
|
||||||
adding the keyword "TTL" to one of the columns:
|
|
||||||
```cql
|
|
||||||
CREATE TABLE tab (
|
|
||||||
id int PRIMARY KEY,
|
|
||||||
t text,
|
|
||||||
expiration timestamp TTL
|
|
||||||
);
|
|
||||||
```
|
|
||||||
The TTL column's name, in this example `expiration`, can be anything.
|
|
||||||
|
|
||||||
Per-row TTL can also be enabled on an existing table by adding the "TTL"
|
|
||||||
designation to one of the existing columns, with:
|
|
||||||
```cql
|
|
||||||
ALTER TABLE tab TTL colname
|
|
||||||
```
|
|
||||||
Or per-row TTL can be disabled (rows will never expire), with:
|
|
||||||
```cql
|
|
||||||
ALTER TABLE tab TTL NULL
|
|
||||||
```
|
|
||||||
|
|
||||||
It is not possible to enable per-row TTL if it's already enabled, or disable
|
|
||||||
it when already disabled. If you have TTL enabled on one column and want to
|
|
||||||
enable it instead on a second column, you must do it in two steps: First
|
|
||||||
disable TTL and then re-enable it on the second column.
|
|
||||||
|
|
||||||
The designated TTL column must have the type `timestamp` or `bigint`,
|
|
||||||
and specifies the absolute time when the row should expire (the `bigint`
|
|
||||||
type is interpreted as seconds since the UNIX epoch). It must be a regular
|
|
||||||
column (not a primary key column or a static column), and there can only be
|
|
||||||
one such column.
|
|
||||||
|
|
||||||
The 32-bit type `int` (specifying number of seconds since the UNIX epoch)
|
|
||||||
is also supported, but not recommended because it will wrap around in 2038.
|
|
||||||
Unless you must use the `int` type because of pre-existing expiration data
|
|
||||||
with that type, please prefer `timestamp` or `bigint`.
|
|
||||||
|
|
||||||
Another important feature of per-row TTL is that if CDC is enabled, when a
|
|
||||||
row expires a deletion event appears in the CDC log - something that doesn't
|
|
||||||
happen in per-cell TTL. This deletion event can be distinguished from user-
|
|
||||||
initiated deletes: Whereas user-initiated deletes have `cdc_operation` set to
|
|
||||||
3 (`row_delete`) or 4 (`partition_delete`), those generated by expiration have
|
|
||||||
`cdc_operation` -3 (`service_row_delete`) or -4 (`service_partition_delete`).
|
|
||||||
|
|
||||||
Unlike per-cell TTL where a value becomes unreadable at the precise specified
|
|
||||||
second, the per-row TTL's expiration is _eventual_ - the row will expire
|
|
||||||
some time _after_ its requested expiration time, where this "some time" can
|
|
||||||
be controlled by the configuration `alternator_ttl_period_in_seconds`. Until
|
|
||||||
the row is actually deleted, it can still be read, and even written.
|
|
||||||
Importantly, the CDC event will appear immediately after the row is finally
|
|
||||||
deleted.
|
|
||||||
|
|
||||||
It's important to re-iterate that the per-cell TTL and per-row TTL features
|
|
||||||
are separate and distinct, use a different CQL syntax, have a different
|
|
||||||
implementation and provide different guarantees. It is possible to use
|
|
||||||
both features in the same table, or even the same row.
|
|
||||||
|
|||||||
@@ -200,6 +200,8 @@ for two cases. One is setting replication factor to 0, in which case the number
|
|||||||
The other is when the numeric replication factor is equal to the current number of replicas
|
The other is when the numeric replication factor is equal to the current number of replicas
|
||||||
for a given datacanter, in which case the current rack list is preserved.
|
for a given datacanter, in which case the current rack list is preserved.
|
||||||
|
|
||||||
|
Altering from a numeric replication factor to a rack list is not supported yet.
|
||||||
|
|
||||||
Note that when ``ALTER`` ing keyspaces and supplying ``replication_factor``,
|
Note that when ``ALTER`` ing keyspaces and supplying ``replication_factor``,
|
||||||
auto-expansion will only *add* new datacenters for safety, it will not alter
|
auto-expansion will only *add* new datacenters for safety, it will not alter
|
||||||
existing datacenters or remove any even if they are no longer in the cluster.
|
existing datacenters or remove any even if they are no longer in the cluster.
|
||||||
@@ -422,21 +424,6 @@ Altering from a rack list to a numeric replication factor is not supported.
|
|||||||
|
|
||||||
Keyspaces which use rack lists are :term:`RF-rack-valid <RF-rack-valid keyspace>` if each rack in the rack list contains at least one node (excluding :doc:`zero-token nodes </architecture/zero-token-nodes>`).
|
Keyspaces which use rack lists are :term:`RF-rack-valid <RF-rack-valid keyspace>` if each rack in the rack list contains at least one node (excluding :doc:`zero-token nodes </architecture/zero-token-nodes>`).
|
||||||
|
|
||||||
.. _conversion-to-rack-list-rf:
|
|
||||||
|
|
||||||
Conversion to rack-list replication factor
|
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
||||||
|
|
||||||
To migrate a keyspace from a numeric replication factor to a rack-list replication factor, provide the rack-list replication factor explicitly in ALTER KEYSPACE statement. The number of racks in the list must be equal to the numeric replication factor. The replication factor can be converted in any number of DCs at once. In a statement that converts replication factor, no replication factor updates (increase or decrease) are allowed in any DC.
|
|
||||||
|
|
||||||
.. code-block:: cql
|
|
||||||
|
|
||||||
CREATE KEYSPACE Excelsior
|
|
||||||
WITH replication = { 'class' : 'NetworkTopologyStrategy', 'dc1' : 3, 'dc2' : 1} AND tablets = { 'enabled': true };
|
|
||||||
|
|
||||||
ALTER KEYSPACE Excelsior
|
|
||||||
WITH replication = { 'class' : 'NetworkTopologyStrategy', 'dc1' : ['RAC1', 'RAC2', 'RAC3'], 'dc2' : ['RAC4']} AND tablets = { 'enabled': true };
|
|
||||||
|
|
||||||
.. _drop-keyspace-statement:
|
.. _drop-keyspace-statement:
|
||||||
|
|
||||||
DROP KEYSPACE
|
DROP KEYSPACE
|
||||||
@@ -476,7 +463,7 @@ Creating a new table uses the ``CREATE TABLE`` statement:
|
|||||||
: [ ',' PRIMARY KEY '(' `primary_key` ')' ]
|
: [ ',' PRIMARY KEY '(' `primary_key` ')' ]
|
||||||
: ')' [ WITH `table_options` ]
|
: ')' [ WITH `table_options` ]
|
||||||
|
|
||||||
column_definition: `column_name` `cql_type` [ TTL ] [ STATIC ] [ PRIMARY KEY]
|
column_definition: `column_name` `cql_type` [ STATIC ] [ PRIMARY KEY]
|
||||||
|
|
||||||
primary_key: `partition_key` [ ',' `clustering_columns` ]
|
primary_key: `partition_key` [ ',' `clustering_columns` ]
|
||||||
|
|
||||||
@@ -572,11 +559,6 @@ A :token:`column_definition` is primarily comprised of the name of the column de
|
|||||||
which restricts which values are accepted for that column. Additionally, a column definition can have the following
|
which restricts which values are accepted for that column. Additionally, a column definition can have the following
|
||||||
modifiers:
|
modifiers:
|
||||||
|
|
||||||
``TTL``
|
|
||||||
declares the column as being the expiration-time column for the
|
|
||||||
`per-row TTL <https://docs.scylladb.com/stable/cql/cql-extensions.html#per-row-ttl>`_
|
|
||||||
feature.
|
|
||||||
|
|
||||||
``STATIC``
|
``STATIC``
|
||||||
declares the column as being a :ref:`static column <static-columns>`.
|
declares the column as being a :ref:`static column <static-columns>`.
|
||||||
|
|
||||||
@@ -1177,7 +1159,6 @@ Altering an existing table uses the ``ALTER TABLE`` statement:
|
|||||||
: | DROP '(' `column_name` ( ',' `column_name` )* ')' [ USING TIMESTAMP `timestamp` ]
|
: | DROP '(' `column_name` ( ',' `column_name` )* ')' [ USING TIMESTAMP `timestamp` ]
|
||||||
: | ALTER `column_name` TYPE `cql_type`
|
: | ALTER `column_name` TYPE `cql_type`
|
||||||
: | WITH `options`
|
: | WITH `options`
|
||||||
: | TTL (`column_name` | NULL)
|
|
||||||
: | scylla_encryption_options: '=' '{'[`cipher_algorithm` : <hash>]','[`secret_key_strength` : <len>]','[`key_provider`: <provider>]'}'
|
: | scylla_encryption_options: '=' '{'[`cipher_algorithm` : <hash>]','[`secret_key_strength` : <len>]','[`key_provider`: <provider>]'}'
|
||||||
|
|
||||||
For instance:
|
For instance:
|
||||||
@@ -1221,11 +1202,6 @@ The ``ALTER TABLE`` statement can:
|
|||||||
- Change or add any of the ``Encryption options`` above.
|
- Change or add any of the ``Encryption options`` above.
|
||||||
- Change or add any of the :ref:`CDC options <cdc-options>` above.
|
- Change or add any of the :ref:`CDC options <cdc-options>` above.
|
||||||
- Change or add per-partition rate limits. See :ref:`Limiting the rate of requests per partition <ddl-per-parition-rate-limit>`.
|
- Change or add per-partition rate limits. See :ref:`Limiting the rate of requests per partition <ddl-per-parition-rate-limit>`.
|
||||||
- Enable `per-row TTL <https://docs.scylladb.com/stable/cql/cql-extensions.html#per-row-ttl>`_
|
|
||||||
using the given column as the expiration-time column, or disable per-row
|
|
||||||
TTL on this table. If per-row TTL is already enabled, to change the choice
|
|
||||||
of expiration-time column you must first disable per-row TTL and then
|
|
||||||
re-enable it using the chosen column.
|
|
||||||
|
|
||||||
.. warning:: Dropping a column assumes that the timestamps used for the value of this column are "real" timestamp in
|
.. warning:: Dropping a column assumes that the timestamps used for the value of this column are "real" timestamp in
|
||||||
microseconds. Using "real" timestamps in microseconds is the default is and is **strongly** recommended, but as
|
microseconds. Using "real" timestamps in microseconds is the default is and is **strongly** recommended, but as
|
||||||
|
|||||||
@@ -25,8 +25,6 @@ Querying data from data is done using a ``SELECT`` statement:
|
|||||||
: | CAST '(' `selector` AS `cql_type` ')'
|
: | CAST '(' `selector` AS `cql_type` ')'
|
||||||
: | `function_name` '(' [ `selector` ( ',' `selector` )* ] ')'
|
: | `function_name` '(' [ `selector` ( ',' `selector` )* ] ')'
|
||||||
: | COUNT '(' '*' ')'
|
: | COUNT '(' '*' ')'
|
||||||
: | literal
|
|
||||||
: | bind_marker
|
|
||||||
: )
|
: )
|
||||||
: ( '.' `field_name` | '[' `term` ']' )*
|
: ( '.' `field_name` | '[' `term` ']' )*
|
||||||
where_clause: `relation` ( AND `relation` )*
|
where_clause: `relation` ( AND `relation` )*
|
||||||
@@ -37,8 +35,6 @@ Querying data from data is done using a ``SELECT`` statement:
|
|||||||
operator: '=' | '<' | '>' | '<=' | '>=' | IN | NOT IN | CONTAINS | CONTAINS KEY
|
operator: '=' | '<' | '>' | '<=' | '>=' | IN | NOT IN | CONTAINS | CONTAINS KEY
|
||||||
ordering_clause: `column_name` [ ASC | DESC ] ( ',' `column_name` [ ASC | DESC ] )*
|
ordering_clause: `column_name` [ ASC | DESC ] ( ',' `column_name` [ ASC | DESC ] )*
|
||||||
timeout: `duration`
|
timeout: `duration`
|
||||||
literal: number | 'string' | boolean | NULL | tuple_literal | list_literal | map_literal
|
|
||||||
bind_marker: '?' | ':' `identifier`
|
|
||||||
|
|
||||||
For instance::
|
For instance::
|
||||||
|
|
||||||
@@ -85,13 +81,6 @@ A :token:`selector` can be one of the following:
|
|||||||
- A casting, which allows you to convert a nested selector to a (compatible) type.
|
- A casting, which allows you to convert a nested selector to a (compatible) type.
|
||||||
- A function call, where the arguments are selector themselves.
|
- A function call, where the arguments are selector themselves.
|
||||||
- A call to the :ref:`COUNT function <count-function>`, which counts all non-null results.
|
- A call to the :ref:`COUNT function <count-function>`, which counts all non-null results.
|
||||||
- A literal value (constant).
|
|
||||||
- A bind variable (`?` or `:name`).
|
|
||||||
|
|
||||||
Note that due to a quirk of the type system, literals and bind markers cannot be
|
|
||||||
used as top-level selectors, as the parser cannot infer their type. However, they can be used
|
|
||||||
when nested inside functions, as the function formal parameter types provide the
|
|
||||||
necessary context.
|
|
||||||
|
|
||||||
Aliases
|
Aliases
|
||||||
```````
|
```````
|
||||||
@@ -292,8 +281,7 @@ For example::
|
|||||||
ORDER BY embedding ANN OF [0.1, 0.2, 0.3, 0.4] LIMIT 5;
|
ORDER BY embedding ANN OF [0.1, 0.2, 0.3, 0.4] LIMIT 5;
|
||||||
|
|
||||||
|
|
||||||
Vector queries also support filtering with ``WHERE`` clauses on columns that are part of the primary key
|
Vector queries also support filtering with ``WHERE`` clauses on columns that are part of the primary key.
|
||||||
or columns provided in a definition of the index.
|
|
||||||
|
|
||||||
For example::
|
For example::
|
||||||
|
|
||||||
|
|||||||
@@ -140,18 +140,10 @@ Vector Index :label-note:`ScyllaDB Cloud`
|
|||||||
`ScyllaDB Cloud documentation <https://cloud.docs.scylladb.com/stable/vector-search/>`_.
|
`ScyllaDB Cloud documentation <https://cloud.docs.scylladb.com/stable/vector-search/>`_.
|
||||||
|
|
||||||
ScyllaDB supports creating vector indexes on tables, allowing queries on the table to use those indexes for efficient
|
ScyllaDB supports creating vector indexes on tables, allowing queries on the table to use those indexes for efficient
|
||||||
similarity search on vector data. Vector indexes can be a global index for indexing vectors per table or a local
|
similarity search on vector data.
|
||||||
index for indexing vectors per partition.
|
|
||||||
|
|
||||||
The vector index is the only custom type index supported in ScyllaDB. It is created using
|
The vector index is the only custom type index supported in ScyllaDB. It is created using
|
||||||
the ``CUSTOM`` keyword and specifying the index type as ``vector_index``. It is also possible to
|
the ``CUSTOM`` keyword and specifying the index type as ``vector_index``. Example:
|
||||||
add additional columns to the index for filtering the search results. The partition column
|
|
||||||
specified in the global vector index definition must be the vector column, and any subsequent
|
|
||||||
columns are treated as filtering columns. The local vector index requires that the partition key
|
|
||||||
of the base table is also the partition key of the index and the vector column is the first one
|
|
||||||
from the following columns.
|
|
||||||
|
|
||||||
Example of a simple index:
|
|
||||||
|
|
||||||
.. code-block:: cql
|
.. code-block:: cql
|
||||||
|
|
||||||
@@ -159,64 +151,6 @@ Example of a simple index:
|
|||||||
USING 'vector_index'
|
USING 'vector_index'
|
||||||
WITH OPTIONS = {'similarity_function': 'COSINE', 'maximum_node_connections': '16'};
|
WITH OPTIONS = {'similarity_function': 'COSINE', 'maximum_node_connections': '16'};
|
||||||
|
|
||||||
The vector column (``embedding``) is indexed to enable similarity search using
|
|
||||||
a global vector index. Additional filtering can be performed on the primary key
|
|
||||||
columns of the base table.
|
|
||||||
|
|
||||||
Example of a global vector index with additional filtering:
|
|
||||||
|
|
||||||
.. code-block:: cql
|
|
||||||
|
|
||||||
CREATE CUSTOM INDEX vectorIndex ON ImageEmbeddings (embedding, category, info)
|
|
||||||
USING 'vector_index'
|
|
||||||
WITH OPTIONS = {'similarity_function': 'COSINE', 'maximum_node_connections': '16'};
|
|
||||||
|
|
||||||
The vector column (``embedding``) is indexed to enable similarity search using
|
|
||||||
a global index. Additional columns are added for filtering the search results.
|
|
||||||
The filtering is possible on ``category``, ``info`` and all primary key columns
|
|
||||||
of the base table.
|
|
||||||
|
|
||||||
Example of a local vector index:
|
|
||||||
|
|
||||||
.. code-block:: cql
|
|
||||||
|
|
||||||
CREATE CUSTOM INDEX vectorIndex ON ImageEmbeddings ((id, created_at), embedding, category, info)
|
|
||||||
USING 'vector_index'
|
|
||||||
WITH OPTIONS = {'similarity_function': 'COSINE', 'maximum_node_connections': '16'};
|
|
||||||
|
|
||||||
The vector column (``embedding``) is indexed for similarity search (a local
|
|
||||||
index) and additional columns are added for filtering the search results. The
|
|
||||||
filtering is possible on ``category``, ``info`` and all primary key columns of
|
|
||||||
the base table. The columns ``id`` and ``created_at`` must be the partition key
|
|
||||||
of the base table.
|
|
||||||
|
|
||||||
Vector indexes support additional filtering columns of native data types
|
|
||||||
(excluding counter and duration). The indexed column itself must be a vector
|
|
||||||
column, while the extra columns can be used to filter search results.
|
|
||||||
|
|
||||||
The supported types are:
|
|
||||||
|
|
||||||
* ``ascii``
|
|
||||||
* ``bigint``
|
|
||||||
* ``blob``
|
|
||||||
* ``boolean``
|
|
||||||
* ``date``
|
|
||||||
* ``decimal``
|
|
||||||
* ``double``
|
|
||||||
* ``float``
|
|
||||||
* ``inet``
|
|
||||||
* ``int``
|
|
||||||
* ``smallint``
|
|
||||||
* ``text``
|
|
||||||
* ``varchar``
|
|
||||||
* ``time``
|
|
||||||
* ``timestamp``
|
|
||||||
* ``timeuuid``
|
|
||||||
* ``tinyint``
|
|
||||||
* ``uuid``
|
|
||||||
* ``varint``
|
|
||||||
|
|
||||||
|
|
||||||
The following options are supported for vector indexes. All of them are optional.
|
The following options are supported for vector indexes. All of them are optional.
|
||||||
|
|
||||||
+------------------------------+----------------------------------------------------------------------------------------------------------+---------------+
|
+------------------------------+----------------------------------------------------------------------------------------------------------+---------------+
|
||||||
@@ -277,9 +211,6 @@ Dropping a secondary index uses the ``DROP INDEX`` statement:
|
|||||||
The ``DROP INDEX`` statement is used to drop an existing secondary index. The argument of the statement is the index
|
The ``DROP INDEX`` statement is used to drop an existing secondary index. The argument of the statement is the index
|
||||||
name, which may optionally specify the keyspace of the index.
|
name, which may optionally specify the keyspace of the index.
|
||||||
|
|
||||||
If the index is currently being built, the ``DROP INDEX`` can still be executed. Once the ``DROP INDEX`` command is issued,
|
|
||||||
the system stops the build process and cleans up any partially built data associated with the index.
|
|
||||||
|
|
||||||
.. If the index does not exists, the statement will return an error, unless ``IF EXISTS`` is used in which case the
|
.. If the index does not exists, the statement will return an error, unless ``IF EXISTS`` is used in which case the
|
||||||
.. operation is a no-op.
|
.. operation is a no-op.
|
||||||
|
|
||||||
|
|||||||
@@ -13,19 +13,6 @@ The TTL can be set when defining a Table (CREATE), or when using the INSERT and
|
|||||||
The expiration works at the individual column level, which provides a lot of flexibility.
|
The expiration works at the individual column level, which provides a lot of flexibility.
|
||||||
By default, the TTL value is null, which means that the data will not expire.
|
By default, the TTL value is null, which means that the data will not expire.
|
||||||
|
|
||||||
This document is about CQL's classic per-write TTL feature, where individual
|
|
||||||
columns from the same row can expire at separate times if written at
|
|
||||||
different times. ScyllaDB also supports an alternative TTL feature,
|
|
||||||
`Per-row TTL <https://docs.scylladb.com/stable/cql/cql-extensions.html#per-row-ttl>`_.
|
|
||||||
In *per-row TTL* each row has an expiration time for the entire row,
|
|
||||||
defined by the value of the expiration-time column. In per-row TTL, the
|
|
||||||
entire row expires together regardless of how its indivial columns were
|
|
||||||
written, and the expiration time of an entire row can be modified by modifying
|
|
||||||
the expiration-time column. Another benefit of per-row TTL is that it
|
|
||||||
generates a CDC event when a row expires - in contrast in per-write TTL
|
|
||||||
(the feature described in this document) where expiration events do not
|
|
||||||
show up in CDC.
|
|
||||||
|
|
||||||
.. note::
|
.. note::
|
||||||
|
|
||||||
The expiration time is always calculated as *now() on the Coordinator + TTL* where, *now()* is the wall clock during the corresponding write operation.
|
The expiration time is always calculated as *now() on the Coordinator + TTL* where, *now()* is the wall clock during the corresponding write operation.
|
||||||
|
|||||||
@@ -108,4 +108,6 @@ check the statement and throw if it is disallowed, similar to what
|
|||||||
|
|
||||||
Obviously, an audit definition must survive a server restart and stay
|
Obviously, an audit definition must survive a server restart and stay
|
||||||
consistent among all nodes in a cluster. We'll accomplish both by
|
consistent among all nodes in a cluster. We'll accomplish both by
|
||||||
storing audits in a system table.
|
storing audits in a system table. They will be cached in memory the
|
||||||
|
same way `permissions_cache` caches table contents in `permission_set`
|
||||||
|
objects resident in memory.
|
||||||
|
|||||||
@@ -39,17 +39,6 @@ Both client and server use the same string identifiers for the keys to determine
|
|||||||
negotiated extension set, judging by the presence of a particular key in the
|
negotiated extension set, judging by the presence of a particular key in the
|
||||||
SUPPORTED/STARTUP messages.
|
SUPPORTED/STARTUP messages.
|
||||||
|
|
||||||
## Client options
|
|
||||||
|
|
||||||
`client_options` column in `system.clients` table stores all data sent by the
|
|
||||||
client in STARTUP request, as a `map<text, text>`. This column may be useful
|
|
||||||
for debugging and monitoring purposes.
|
|
||||||
|
|
||||||
Drivers can send additional data in STARTUP, e.g. load balancing policy, retry
|
|
||||||
policy, timeouts, and other configuration.
|
|
||||||
Such data should be sent in `CLIENT_OPTIONS` key, as JSON. The recommended
|
|
||||||
structure of this JSON will be decided in the future.
|
|
||||||
|
|
||||||
## Intranode sharding
|
## Intranode sharding
|
||||||
|
|
||||||
This extension allows the driver to discover how Scylla internally
|
This extension allows the driver to discover how Scylla internally
|
||||||
@@ -85,6 +74,8 @@ The keys and values are:
|
|||||||
as an indicator to which shard client wants to connect. The desired shard number
|
as an indicator to which shard client wants to connect. The desired shard number
|
||||||
is calculated as: `desired_shard_no = client_port % SCYLLA_NR_SHARDS`.
|
is calculated as: `desired_shard_no = client_port % SCYLLA_NR_SHARDS`.
|
||||||
Its value is a decimal representation of type `uint16_t`, by default `19142`.
|
Its value is a decimal representation of type `uint16_t`, by default `19142`.
|
||||||
|
- `CLIENT_OPTIONS` is a string containing a JSON object representation that
|
||||||
|
contains CQL Driver configuration, e.g. load balancing policy, retry policy, timeouts, etc.
|
||||||
|
|
||||||
Currently, one `SCYLLA_SHARDING_ALGORITHM` is defined,
|
Currently, one `SCYLLA_SHARDING_ALGORITHM` is defined,
|
||||||
`biased-token-round-robin`. To apply the algorithm,
|
`biased-token-round-robin`. To apply the algorithm,
|
||||||
|
|||||||
@@ -563,18 +563,17 @@ CREATE TABLE system.clients (
|
|||||||
address inet,
|
address inet,
|
||||||
port int,
|
port int,
|
||||||
client_type text,
|
client_type text,
|
||||||
client_options frozen<map<text, text>>,
|
|
||||||
connection_stage text,
|
connection_stage text,
|
||||||
driver_name text,
|
driver_name text,
|
||||||
driver_version text,
|
driver_version text,
|
||||||
hostname text,
|
hostname text,
|
||||||
protocol_version int,
|
protocol_version int,
|
||||||
scheduling_group text,
|
|
||||||
shard_id int,
|
shard_id int,
|
||||||
ssl_cipher_suite text,
|
ssl_cipher_suite text,
|
||||||
ssl_enabled boolean,
|
ssl_enabled boolean,
|
||||||
ssl_protocol text,
|
ssl_protocol text,
|
||||||
username text,
|
username text,
|
||||||
|
scheduling_group text,
|
||||||
PRIMARY KEY (address, port, client_type)
|
PRIMARY KEY (address, port, client_type)
|
||||||
) WITH CLUSTERING ORDER BY (port ASC, client_type ASC)
|
) WITH CLUSTERING ORDER BY (port ASC, client_type ASC)
|
||||||
~~~
|
~~~
|
||||||
@@ -582,7 +581,4 @@ CREATE TABLE system.clients (
|
|||||||
Currently only CQL clients are tracked. The table used to be present on disk (in data
|
Currently only CQL clients are tracked. The table used to be present on disk (in data
|
||||||
directory) before and including version 4.5.
|
directory) before and including version 4.5.
|
||||||
|
|
||||||
`client_options` column stores all data sent by the client in the STARTUP request.
|
|
||||||
This column is useful for debugging and monitoring purposes.
|
|
||||||
|
|
||||||
## TODO: the rest
|
## TODO: the rest
|
||||||
|
|||||||
@@ -156,7 +156,7 @@ How do I check the current version of ScyllaDB that I am running?
|
|||||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||||
* On a regular system or VM (running Ubuntu, CentOS, or RedHat Enterprise): :code:`$ scylla --version`
|
* On a regular system or VM (running Ubuntu, CentOS, or RedHat Enterprise): :code:`$ scylla --version`
|
||||||
|
|
||||||
Check the `Operating System Support Guide <https://docs.scylladb.com/stable/versioning/os-support-per-version.html>`_ for a list of supported operating systems and versions.
|
Check the :doc:`Operating System Support Guide </getting-started/os-support>` for a list of supported operating systems and versions.
|
||||||
|
|
||||||
* On a docker node: :code:`$ docker exec -it Node_Z scylla --version`
|
* On a docker node: :code:`$ docker exec -it Node_Z scylla --version`
|
||||||
|
|
||||||
|
|||||||
@@ -3,9 +3,9 @@
|
|||||||
Automatic Repair
|
Automatic Repair
|
||||||
================
|
================
|
||||||
|
|
||||||
Traditionally, launching :doc:`repairs </operating-scylla/procedures/maintenance/repair>` in a ScyllaDB cluster is left to an external process, typically done via `Scylla Manager <https://manager.docs.scylladb.com/stable/repair/index.html>`_.
|
Traditionally, launching `repairs </operating-scylla/procedures/maintenance/repair>`_ in a ScyllaDB cluster is left to an external process, typically done via `Scylla Manager <https://manager.docs.scylladb.com/stable/repair/index.html>`_.
|
||||||
|
|
||||||
Automatic repair offers built-in scheduling in ScyllaDB itself. If the time since the last repair is greater than the configured repair interval, ScyllaDB will start a repair for the :doc:`tablet table </architecture/tablets>` automatically.
|
Automatic repair offers built-in scheduling in ScyllaDB itself. If the time since the last repair is greater than the configured repair interval, ScyllaDB will start a repair for the tablet `tablet </architecture/tablets>`_ automatically.
|
||||||
Repairs are spread over time and among nodes and shards, to avoid load spikes or any adverse effects on user workloads.
|
Repairs are spread over time and among nodes and shards, to avoid load spikes or any adverse effects on user workloads.
|
||||||
|
|
||||||
To enable automatic repair, add this to the configuration (``scylla.yaml``):
|
To enable automatic repair, add this to the configuration (``scylla.yaml``):
|
||||||
@@ -20,4 +20,4 @@ More featureful configuration methods will be implemented in the future.
|
|||||||
|
|
||||||
To disable, set ``auto_repair_enabled_default: false``.
|
To disable, set ``auto_repair_enabled_default: false``.
|
||||||
|
|
||||||
Automatic repair relies on :doc:`Incremental Repair </features/incremental-repair>` and as such it only works with :doc:`tablet </architecture/tablets>` tables.
|
Automatic repair relies on `Incremental Repair </features/incremental-repair>`_ and as such it only works with `tablet </architecture/tablets>`_ tables.
|
||||||
|
|||||||
@@ -10,11 +10,6 @@ The CDC log table reflects operations that are performed on the base table. Diff
|
|||||||
* row range deletions,
|
* row range deletions,
|
||||||
* partition deletions.
|
* partition deletions.
|
||||||
|
|
||||||
Note that TTL expirations are not operations, and not reflected in the CDC
|
|
||||||
log tables. If you do need CDC events when entire rows expire, consider
|
|
||||||
using `per-row TTL <https://docs.scylladb.com/stable/cql/cql-extensions.html#per-row-ttl>`_
|
|
||||||
which does generate special CDC events when rows expire.
|
|
||||||
|
|
||||||
The following sections describe how each of these operations are handled by the CDC log.
|
The following sections describe how each of these operations are handled by the CDC log.
|
||||||
|
|
||||||
.. include:: /features/cdc/_common/cdc-updates.rst
|
.. include:: /features/cdc/_common/cdc-updates.rst
|
||||||
|
|||||||
@@ -3,7 +3,7 @@
|
|||||||
Incremental Repair
|
Incremental Repair
|
||||||
==================
|
==================
|
||||||
|
|
||||||
ScyllaDB's standard :doc:`repair </operating-scylla/procedures/maintenance/repair>` process scans and processes all the data on a node, regardless of whether it has changed since the last repair. This operation can be resource-intensive and time-consuming. The Incremental Repair feature provides a much more efficient and lightweight alternative for maintaining data consistency.
|
ScyllaDB's standard `repair </operating-scylla/procedures/maintenance/repair>`_ process scans and processes all the data on a node, regardless of whether it has changed since the last repair. This operation can be resource-intensive and time-consuming. The Incremental Repair feature provides a much more efficient and lightweight alternative for maintaining data consistency.
|
||||||
|
|
||||||
The core idea of incremental repair is to repair only the data that has been written or changed since the last repair was run. It intelligently skips data that has already been verified, dramatically reducing the time, I/O, and CPU resources required for the repair operation.
|
The core idea of incremental repair is to repair only the data that has been written or changed since the last repair was run. It intelligently skips data that has already been verified, dramatically reducing the time, I/O, and CPU resources required for the repair operation.
|
||||||
|
|
||||||
@@ -51,7 +51,7 @@ Benefits of Incremental Repair
|
|||||||
* **Reduced Resource Usage:** Consumes significantly less CPU, I/O, and network bandwidth compared to a full repair.
|
* **Reduced Resource Usage:** Consumes significantly less CPU, I/O, and network bandwidth compared to a full repair.
|
||||||
* **More Frequent Repairs:** The efficiency of incremental repair allows you to run it more frequently, ensuring a higher level of data consistency across your cluster at all times.
|
* **More Frequent Repairs:** The efficiency of incremental repair allows you to run it more frequently, ensuring a higher level of data consistency across your cluster at all times.
|
||||||
|
|
||||||
Tables using Incremental Repair can schedule repairs in ScyllaDB itself, with :doc:`Automatic Repair </features/automatic-repair>`.
|
Tables using Incremental Repair can schedule repairs in ScyllaDB itself, with `Automatic Repair </features/automatic-repair>`_.
|
||||||
|
|
||||||
Notes
|
Notes
|
||||||
-----
|
-----
|
||||||
|
|||||||
@@ -18,7 +18,7 @@ Getting Started
|
|||||||
:class: my-panel
|
:class: my-panel
|
||||||
|
|
||||||
* :doc:`ScyllaDB System Requirements Guide</getting-started/system-requirements/>`
|
* :doc:`ScyllaDB System Requirements Guide</getting-started/system-requirements/>`
|
||||||
* `OS Support by Platform and Version <https://docs.scylladb.com/stable/versioning/os-support-per-version.html>`_
|
* :doc:`OS Support by Platform and Version</getting-started/os-support/>`
|
||||||
|
|
||||||
.. panel-box::
|
.. panel-box::
|
||||||
:title: Install and Configure ScyllaDB
|
:title: Install and Configure ScyllaDB
|
||||||
|
|||||||
@@ -10,6 +10,7 @@ Install ScyllaDB |CURRENT_VERSION|
|
|||||||
/getting-started/install-scylla/launch-on-azure
|
/getting-started/install-scylla/launch-on-azure
|
||||||
/getting-started/installation-common/scylla-web-installer
|
/getting-started/installation-common/scylla-web-installer
|
||||||
/getting-started/install-scylla/install-on-linux
|
/getting-started/install-scylla/install-on-linux
|
||||||
|
/getting-started/installation-common/install-jmx
|
||||||
/getting-started/install-scylla/run-in-docker
|
/getting-started/install-scylla/run-in-docker
|
||||||
/getting-started/installation-common/unified-installer
|
/getting-started/installation-common/unified-installer
|
||||||
/getting-started/installation-common/air-gapped-install
|
/getting-started/installation-common/air-gapped-install
|
||||||
@@ -23,9 +24,9 @@ Keep your versions up-to-date. The two latest versions are supported. Also, alwa
|
|||||||
:id: "getting-started"
|
:id: "getting-started"
|
||||||
:class: my-panel
|
:class: my-panel
|
||||||
|
|
||||||
* :doc:`Launch ScyllaDB on AWS </getting-started/install-scylla/launch-on-aws>`
|
* :doc:`Launch ScyllaDB |CURRENT_VERSION| on AWS </getting-started/install-scylla/launch-on-aws>`
|
||||||
* :doc:`Launch ScyllaDB on GCP </getting-started/install-scylla/launch-on-gcp>`
|
* :doc:`Launch ScyllaDB |CURRENT_VERSION| on GCP </getting-started/install-scylla/launch-on-gcp>`
|
||||||
* :doc:`Launch ScyllaDB on Azure </getting-started/install-scylla/launch-on-azure>`
|
* :doc:`Launch ScyllaDB |CURRENT_VERSION| on Azure </getting-started/install-scylla/launch-on-azure>`
|
||||||
|
|
||||||
|
|
||||||
.. panel-box::
|
.. panel-box::
|
||||||
@@ -34,7 +35,8 @@ Keep your versions up-to-date. The two latest versions are supported. Also, alwa
|
|||||||
:class: my-panel
|
:class: my-panel
|
||||||
|
|
||||||
* :doc:`Install ScyllaDB with Web Installer (recommended) </getting-started/installation-common/scylla-web-installer>`
|
* :doc:`Install ScyllaDB with Web Installer (recommended) </getting-started/installation-common/scylla-web-installer>`
|
||||||
* :doc:`Install ScyllaDB Linux Packages </getting-started/install-scylla/install-on-linux>`
|
* :doc:`Install ScyllaDB |CURRENT_VERSION| Linux Packages </getting-started/install-scylla/install-on-linux>`
|
||||||
|
* :doc:`Install scylla-jmx Package </getting-started/installation-common/install-jmx>`
|
||||||
* :doc:`Install ScyllaDB Without root Privileges </getting-started/installation-common/unified-installer>`
|
* :doc:`Install ScyllaDB Without root Privileges </getting-started/installation-common/unified-installer>`
|
||||||
* :doc:`Air-gapped Server Installation </getting-started/installation-common/air-gapped-install>`
|
* :doc:`Air-gapped Server Installation </getting-started/installation-common/air-gapped-install>`
|
||||||
* :doc:`ScyllaDB Developer Mode </getting-started/installation-common/dev-mod>`
|
* :doc:`ScyllaDB Developer Mode </getting-started/installation-common/dev-mod>`
|
||||||
|
|||||||
@@ -17,7 +17,7 @@ This article will help you install ScyllaDB on Linux using platform-specific pac
|
|||||||
Prerequisites
|
Prerequisites
|
||||||
----------------
|
----------------
|
||||||
|
|
||||||
* Ubuntu, Debian, CentOS, or RHEL (see `OS Support by Platform and Version <https://docs.scylladb.com/stable/versioning/os-support-per-version.html>`_
|
* Ubuntu, Debian, CentOS, or RHEL (see :doc:`OS Support by Platform and Version </getting-started/os-support>`
|
||||||
for details about supported versions and architecture)
|
for details about supported versions and architecture)
|
||||||
* Root or ``sudo`` access to the system
|
* Root or ``sudo`` access to the system
|
||||||
* Open :ref:`ports used by ScyllaDB <networking-ports>`
|
* Open :ref:`ports used by ScyllaDB <networking-ports>`
|
||||||
@@ -94,6 +94,16 @@ Install ScyllaDB
|
|||||||
|
|
||||||
apt-get install scylla{,-server,-kernel-conf,-node-exporter,-conf,-python3,-cqlsh}=2025.3.1-0.20250907.2bbf3cf669bb-1
|
apt-get install scylla{,-server,-kernel-conf,-node-exporter,-conf,-python3,-cqlsh}=2025.3.1-0.20250907.2bbf3cf669bb-1
|
||||||
|
|
||||||
|
|
||||||
|
#. (Ubuntu only) Set Java 11.
|
||||||
|
|
||||||
|
.. code-block:: console
|
||||||
|
|
||||||
|
sudo apt-get update
|
||||||
|
sudo apt-get install -y openjdk-11-jre-headless
|
||||||
|
sudo update-java-alternatives --jre-headless -s java-1.11.0-openjdk-amd64
|
||||||
|
|
||||||
|
|
||||||
.. group-tab:: Centos/RHEL
|
.. group-tab:: Centos/RHEL
|
||||||
|
|
||||||
#. Install the EPEL repository.
|
#. Install the EPEL repository.
|
||||||
@@ -147,6 +157,14 @@ Install ScyllaDB
|
|||||||
|
|
||||||
sudo yum install scylla-5.2.3
|
sudo yum install scylla-5.2.3
|
||||||
|
|
||||||
|
(Optional) Install scylla-jmx
|
||||||
|
-------------------------------
|
||||||
|
|
||||||
|
scylla-jmx is an optional package and is not installed by default.
|
||||||
|
If you need JMX server, see :doc:`Install scylla-jmx Package </getting-started/installation-common/install-jmx>`.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
.. include:: /getting-started/_common/setup-after-install.rst
|
.. include:: /getting-started/_common/setup-after-install.rst
|
||||||
|
|
||||||
Next Steps
|
Next Steps
|
||||||
|
|||||||
78
docs/getting-started/installation-common/install-jmx.rst
Normal file
78
docs/getting-started/installation-common/install-jmx.rst
Normal file
@@ -0,0 +1,78 @@
|
|||||||
|
|
||||||
|
======================================
|
||||||
|
Install scylla-jmx Package
|
||||||
|
======================================
|
||||||
|
|
||||||
|
scylla-jmx is an optional package and is not installed by default.
|
||||||
|
If you need JMX server, you can still install it from scylla-jmx GitHub page.
|
||||||
|
|
||||||
|
.. tabs::
|
||||||
|
|
||||||
|
.. group-tab:: Debian/Ubuntu
|
||||||
|
#. Download .deb package from scylla-jmx page.
|
||||||
|
|
||||||
|
Access to https://github.com/scylladb/scylla-jmx, select latest
|
||||||
|
release from "releases", download a file end with ".deb".
|
||||||
|
|
||||||
|
#. (Optional) Transfer the downloaded package to the install node.
|
||||||
|
|
||||||
|
If the pc from which you downloaded the package is different from
|
||||||
|
the node where you install scylladb, you will need to transfer
|
||||||
|
the files to the node.
|
||||||
|
|
||||||
|
#. Install scylla-jmx package.
|
||||||
|
|
||||||
|
.. code-block:: console
|
||||||
|
|
||||||
|
sudo apt install -y ./scylla-jmx_<version>_all.deb
|
||||||
|
|
||||||
|
|
||||||
|
.. group-tab:: Centos/RHEL
|
||||||
|
|
||||||
|
#. Download .rpm package from scylla-jmx page.
|
||||||
|
|
||||||
|
Access to https://github.com/scylladb/scylla-jmx, select latest
|
||||||
|
release from "releases", download a file end with ".rpm".
|
||||||
|
|
||||||
|
#. (Optional) Transfer the downloaded package to the install node.
|
||||||
|
|
||||||
|
If the pc from which you downloaded the package is different from
|
||||||
|
the node where you install scylladb, you will need to transfer
|
||||||
|
the files to the node.
|
||||||
|
|
||||||
|
#. Install scylla-jmx package.
|
||||||
|
|
||||||
|
.. code-block:: console
|
||||||
|
|
||||||
|
sudo yum install -y ./scylla-jmx-<version>.noarch.rpm
|
||||||
|
|
||||||
|
|
||||||
|
.. group-tab:: Install without root privileges
|
||||||
|
|
||||||
|
#. Download .tar.gz package from scylla-jmx page.
|
||||||
|
|
||||||
|
Access to https://github.com/scylladb/scylla-jmx, select latest
|
||||||
|
release from "releases", download a file end with ".tar.gz".
|
||||||
|
|
||||||
|
#. (Optional) Transfer the downloaded package to the install node.
|
||||||
|
|
||||||
|
If the pc from which you downloaded the package is different from
|
||||||
|
the node where you install scylladb, you will need to transfer
|
||||||
|
the files to the node.
|
||||||
|
|
||||||
|
#. Install scylla-jmx package.
|
||||||
|
|
||||||
|
.. code:: console
|
||||||
|
|
||||||
|
tar xpf scylla-jmx-<version>.noarch.tar.gz
|
||||||
|
cd scylla-jmx
|
||||||
|
./install.sh --nonroot
|
||||||
|
|
||||||
|
Next Steps
|
||||||
|
-----------
|
||||||
|
|
||||||
|
* :doc:`Configure ScyllaDB </getting-started/system-configuration>`
|
||||||
|
* Manage your clusters with `ScyllaDB Manager <https://manager.docs.scylladb.com/>`_
|
||||||
|
* Monitor your cluster and data with `ScyllaDB Monitoring <https://monitoring.docs.scylladb.com/>`_
|
||||||
|
* Get familiar with ScyllaDB’s :doc:`command line reference guide </operating-scylla/nodetool>`.
|
||||||
|
* Learn about ScyllaDB at `ScyllaDB University <https://university.scylladb.com/>`_
|
||||||
@@ -10,7 +10,7 @@ Prerequisites
|
|||||||
--------------
|
--------------
|
||||||
|
|
||||||
Ensure that your platform is supported by the ScyllaDB version you want to install.
|
Ensure that your platform is supported by the ScyllaDB version you want to install.
|
||||||
See `OS Support by Platform and Version <https://docs.scylladb.com/stable/versioning/os-support-per-version.html>`_.
|
See :doc:`OS Support by Platform and Version </getting-started/os-support/>`.
|
||||||
|
|
||||||
Install ScyllaDB with Web Installer
|
Install ScyllaDB with Web Installer
|
||||||
---------------------------------------
|
---------------------------------------
|
||||||
|
|||||||
@@ -12,8 +12,7 @@ the package manager (dnf and apt).
|
|||||||
Prerequisites
|
Prerequisites
|
||||||
---------------
|
---------------
|
||||||
Ensure your platform is supported by the ScyllaDB version you want to install.
|
Ensure your platform is supported by the ScyllaDB version you want to install.
|
||||||
See `OS Support <https://docs.scylladb.com/stable/versioning/os-support-per-version.html>`_
|
See :doc:`OS Support </getting-started/os-support>` for information about supported Linux distributions and versions.
|
||||||
for information about supported Linux distributions and versions.
|
|
||||||
|
|
||||||
Note that if you're on CentOS 7, only root offline installation is supported.
|
Note that if you're on CentOS 7, only root offline installation is supported.
|
||||||
|
|
||||||
@@ -49,6 +48,11 @@ Download and Install
|
|||||||
|
|
||||||
./install.sh --nonroot --python3 ~/scylladb/python3/bin/python3
|
./install.sh --nonroot --python3 ~/scylladb/python3/bin/python3
|
||||||
|
|
||||||
|
#. (Optional) Install scylla-jmx
|
||||||
|
|
||||||
|
scylla-jmx is an optional package and is not installed by default.
|
||||||
|
If you need JMX server, see :doc:`Install scylla-jmx Package </getting-started/installation-common/install-jmx>`.
|
||||||
|
|
||||||
Configure and Run ScyllaDB
|
Configure and Run ScyllaDB
|
||||||
----------------------------
|
----------------------------
|
||||||
|
|
||||||
|
|||||||
26
docs/getting-started/os-support.rst
Normal file
26
docs/getting-started/os-support.rst
Normal file
@@ -0,0 +1,26 @@
|
|||||||
|
OS Support by Linux Distributions and Version
|
||||||
|
==============================================
|
||||||
|
|
||||||
|
The following matrix shows which Linux distributions, containers, and images
|
||||||
|
are :ref:`supported <os-support-definition>` with which versions of ScyllaDB.
|
||||||
|
|
||||||
|
.. datatemplate:json:: /_static/data/os-support.json
|
||||||
|
:template: platforms.tmpl
|
||||||
|
|
||||||
|
``*`` 2024.1.9 and later
|
||||||
|
|
||||||
|
All releases are available as a Docker container, EC2 AMI, GCP, and Azure images.
|
||||||
|
|
||||||
|
.. _os-support-definition:
|
||||||
|
|
||||||
|
By *supported*, it is meant that:
|
||||||
|
|
||||||
|
- A binary installation package is available.
|
||||||
|
- The download and install procedures are tested as part of the ScyllaDB release process for each version.
|
||||||
|
- An automated install is included from :doc:`ScyllaDB Web Installer for Linux tool </getting-started/installation-common/scylla-web-installer>` (for the latest versions).
|
||||||
|
|
||||||
|
You can `build ScyllaDB from source <https://github.com/scylladb/scylladb#build-prerequisites>`_
|
||||||
|
on other x86_64 or aarch64 platforms, without any guarantees.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@@ -8,12 +8,12 @@ ScyllaDB Requirements
|
|||||||
:hidden:
|
:hidden:
|
||||||
|
|
||||||
system-requirements
|
system-requirements
|
||||||
OS Support <https://docs.scylladb.com/stable/versioning/os-support-per-version.html>
|
OS Support <os-support>
|
||||||
Cloud Instance Recommendations <cloud-instance-recommendations>
|
Cloud Instance Recommendations <cloud-instance-recommendations>
|
||||||
scylla-in-a-shared-environment
|
scylla-in-a-shared-environment
|
||||||
|
|
||||||
* :doc:`System Requirements</getting-started/system-requirements/>`
|
* :doc:`System Requirements</getting-started/system-requirements/>`
|
||||||
* `OS Support by Platform and Version <https://docs.scylladb.com/stable/versioning/os-support-per-version.html>`_
|
* :doc:`OS Support by Platform and Version</getting-started/os-support/>`
|
||||||
* :doc:`Cloud Instance Recommendations AWS, GCP, and Azure </getting-started/cloud-instance-recommendations>`
|
* :doc:`Cloud Instance Recommendations AWS, GCP, and Azure </getting-started/cloud-instance-recommendations>`
|
||||||
* :doc:`Running ScyllaDB in a Shared Environment </getting-started/scylla-in-a-shared-environment>`
|
* :doc:`Running ScyllaDB in a Shared Environment </getting-started/scylla-in-a-shared-environment>`
|
||||||
|
|
||||||
|
|||||||
@@ -8,7 +8,7 @@ Supported Platforms
|
|||||||
===================
|
===================
|
||||||
ScyllaDB runs on 64-bit Linux. The x86_64 and AArch64 architectures are supported (AArch64 support includes AWS EC2 Graviton).
|
ScyllaDB runs on 64-bit Linux. The x86_64 and AArch64 architectures are supported (AArch64 support includes AWS EC2 Graviton).
|
||||||
|
|
||||||
See `OS Support by Platform and Version <https://docs.scylladb.com/stable/versioning/os-support-per-version.html>`_ for information about
|
See :doc:`OS Support by Platform and Version </getting-started/os-support>` for information about
|
||||||
supported operating systems, distros, and versions.
|
supported operating systems, distros, and versions.
|
||||||
|
|
||||||
See :doc:`Cloud Instance Recommendations for AWS, GCP, and Azure </getting-started/cloud-instance-recommendations>` for information
|
See :doc:`Cloud Instance Recommendations for AWS, GCP, and Azure </getting-started/cloud-instance-recommendations>` for information
|
||||||
|
|||||||
43
docs/kb/increase-permission-cache.rst
Normal file
43
docs/kb/increase-permission-cache.rst
Normal file
@@ -0,0 +1,43 @@
|
|||||||
|
====================================================
|
||||||
|
Increase Permission Cache to Avoid Non-paged Queries
|
||||||
|
====================================================
|
||||||
|
|
||||||
|
**Topic: Mitigate non-paged queries coming from connection authentications**
|
||||||
|
|
||||||
|
**Audience: ScyllaDB administrators**
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
Issue
|
||||||
|
-----
|
||||||
|
|
||||||
|
If you create lots of roles and give them lots of permissions your nodes might spike with non-paged queries.
|
||||||
|
|
||||||
|
Root Cause
|
||||||
|
----------
|
||||||
|
|
||||||
|
``permissions_cache_max_entries`` is set to 1000 by default. This setting may not be high enough for bigger deployments with lots of tables, users, and roles with permissions.
|
||||||
|
|
||||||
|
|
||||||
|
Solution
|
||||||
|
--------
|
||||||
|
|
||||||
|
Open the scylla.yaml configuration for editing and adjust the following parameters:
|
||||||
|
``permissions_cache_max_entries`` - increase this value to suit your needs. See the example below.
|
||||||
|
``permissions_update_interval_in_ms``
|
||||||
|
``permissions_validity_in_ms``
|
||||||
|
|
||||||
|
Note:: ``permissions_update_interval_in_ms`` and ``permissions_validity_in_ms`` can be set to also make the authentication records come from cache instead of lookups, which generate non-paged queries
|
||||||
|
|
||||||
|
|
||||||
|
Example
|
||||||
|
-------
|
||||||
|
|
||||||
|
Considering with ``permissions_cache_max_entries`` there is no maximum value, it's just limited by your memory.
|
||||||
|
The cache consumes memory as it caches all records from the list of users and their associated roles (similar to a cartesian product).
|
||||||
|
|
||||||
|
Every user, role, and permissions(7 types) on a per table basis are cached.
|
||||||
|
|
||||||
|
If for example, you have 1 user with 1 role and 1 table, the table will have 7 permission types and 7 entries 1 * 1 * 1 * 7 = 7.
|
||||||
|
When expanded to 5 users, 5 roles, and 10 tables this will be 5 * 5 * 10 * 7 = 1750 entries, which is above the default cache value of 1000. The entries that go over the max value (750 entries) will be non-paged queries for every new connection from the client (and clients tend to reconnect often).
|
||||||
|
In cases like this, you may want to consider trading your memory for not stressing the entire cluster with ``auth`` queries.
|
||||||
@@ -38,6 +38,7 @@ Knowledge Base
|
|||||||
* :doc:`If a query does not reveal enough results </kb/cqlsh-results>`
|
* :doc:`If a query does not reveal enough results </kb/cqlsh-results>`
|
||||||
* :doc:`How to Change gc_grace_seconds for a Table </kb/gc-grace-seconds>` - How to change the ``gc_grace_seconds`` parameter and prevent data resurrection.
|
* :doc:`How to Change gc_grace_seconds for a Table </kb/gc-grace-seconds>` - How to change the ``gc_grace_seconds`` parameter and prevent data resurrection.
|
||||||
* :doc:`How to flush old tombstones from a table </kb/tombstones-flush>` - How to remove old tombstones from SSTables.
|
* :doc:`How to flush old tombstones from a table </kb/tombstones-flush>` - How to remove old tombstones from SSTables.
|
||||||
|
* :doc:`Increase Cache to Avoid Non-paged Queries </kb/increase-permission-cache>` - How to increase the ``permissions_cache_max_entries`` setting.
|
||||||
* :doc:`How to Safely Increase the Replication Factor </kb/rf-increase>`
|
* :doc:`How to Safely Increase the Replication Factor </kb/rf-increase>`
|
||||||
* :doc:`Facts about TTL, Compaction, and gc_grace_seconds <ttl-facts>`
|
* :doc:`Facts about TTL, Compaction, and gc_grace_seconds <ttl-facts>`
|
||||||
* :doc:`Efficient Tombstone Garbage Collection in ICS <garbage-collection-ics>`
|
* :doc:`Efficient Tombstone Garbage Collection in ICS <garbage-collection-ics>`
|
||||||
|
|||||||
@@ -15,10 +15,6 @@ It is not always clear under which circumstances data is deleted when using Time
|
|||||||
This article clarifies what may not be apparent.
|
This article clarifies what may not be apparent.
|
||||||
It corrects some assumptions you may have that are not exactly true.
|
It corrects some assumptions you may have that are not exactly true.
|
||||||
|
|
||||||
This document is about CQL's :doc:`per-write TTL feature </cql/time-to-live>`,
|
|
||||||
the `per-row TTL <https://docs.scylladb.com/stable/cql/cql-extensions.html#per-row-ttl>`_
|
|
||||||
feature behaves differently.
|
|
||||||
|
|
||||||
|
|
||||||
Facts About Expiring Data
|
Facts About Expiring Data
|
||||||
-------------------------
|
-------------------------
|
||||||
|
|||||||
@@ -898,63 +898,6 @@ By default, each input sstable is filtered individually. Use ``--merge`` to filt
|
|||||||
|
|
||||||
Output sstables use the latest supported sstable format (can be changed with ``--sstable-version``).
|
Output sstables use the latest supported sstable format (can be changed with ``--sstable-version``).
|
||||||
|
|
||||||
split
|
|
||||||
^^^^^
|
|
||||||
|
|
||||||
Split SSTable(s) into multiple output SSTables based on token boundaries.
|
|
||||||
|
|
||||||
This operation divides SSTable(s) according to the specified split tokens, creating one output SSTable per token range.
|
|
||||||
This is useful for redistributing data across different token ranges, such as when preparing data for different nodes or shards.
|
|
||||||
|
|
||||||
Tokens should be provided via the ``--split-token`` (or ``-t``) option. Multiple tokens can be specified by repeating the option.
|
|
||||||
The tokens will be sorted automatically to ensure proper ordering.
|
|
||||||
|
|
||||||
For N split tokens, N+1 output SSTables will be generated:
|
|
||||||
|
|
||||||
* First SSTable: from minimum token to first split token
|
|
||||||
* Middle SSTables: between consecutive split tokens
|
|
||||||
* Last SSTable: from last split token to maximum token
|
|
||||||
|
|
||||||
By default, each input SSTable is split individually. Use ``--merge`` to split the combined content of all input SSTables, producing a single set of output SSTables.
|
|
||||||
|
|
||||||
Output SSTables use the latest supported sstable format (can be changed with ``--sstable-version``) and are written to the directory specified by ``--output-dir``.
|
|
||||||
|
|
||||||
**Examples:**
|
|
||||||
|
|
||||||
Split a single SSTable at token boundaries 100 and 500:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
scylla sstable split --split-token 100 --split-token 500 /path/to/md-123456-big-Data.db
|
|
||||||
|
|
||||||
Or using the short-hand form:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
scylla sstable split -t 100 -t 500 /path/to/md-123456-big-Data.db
|
|
||||||
|
|
||||||
This will create 3 output SSTables:
|
|
||||||
|
|
||||||
* One containing partitions with tokens < 100
|
|
||||||
* One containing partitions with tokens >= 100 and < 500
|
|
||||||
* One containing partitions with tokens >= 500
|
|
||||||
|
|
||||||
Split multiple SSTables individually:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
scylla sstable split -t 100 -t 500 /path/to/md-123456-big-Data.db /path/to/md-123457-big-Data.db
|
|
||||||
|
|
||||||
This will split each input SSTable separately, creating 6 output SSTables total (3 per input).
|
|
||||||
|
|
||||||
Split multiple SSTables as a combined stream:
|
|
||||||
|
|
||||||
.. code-block:: console
|
|
||||||
|
|
||||||
scylla sstable split --merge -t 100 -t 500 /path/to/md-123456-big-Data.db /path/to/md-123457-big-Data.db
|
|
||||||
|
|
||||||
This will merge both input SSTables first, then split the combined data, creating 3 output SSTables.
|
|
||||||
|
|
||||||
Examples
|
Examples
|
||||||
--------
|
--------
|
||||||
Dumping the content of the SStable:
|
Dumping the content of the SStable:
|
||||||
|
|||||||
@@ -25,8 +25,7 @@ Before you run ``nodetool decommission``:
|
|||||||
starting the removal procedure.
|
starting the removal procedure.
|
||||||
* Make sure that the number of nodes remaining in the DC after you decommission a node
|
* Make sure that the number of nodes remaining in the DC after you decommission a node
|
||||||
will be the same or higher than the Replication Factor configured for the keyspace
|
will be the same or higher than the Replication Factor configured for the keyspace
|
||||||
in this DC. Please mind that e.g. audit feature, which is enabled by default, may require
|
in this DC. If the number of remaining nodes is lower than the RF, the decommission
|
||||||
adjusting ``audit`` keyspace. If the number of remaining nodes is lower than the RF, the decommission
|
|
||||||
request may fail.
|
request may fail.
|
||||||
In such a case, ALTER the keyspace to reduce the RF before running ``nodetool decommission``.
|
In such a case, ALTER the keyspace to reduce the RF before running ``nodetool decommission``.
|
||||||
|
|
||||||
|
|||||||
@@ -25,8 +25,4 @@ For Example:
|
|||||||
|
|
||||||
nodetool rebuild <source-dc-name>
|
nodetool rebuild <source-dc-name>
|
||||||
|
|
||||||
``nodetool rebuild`` command works only for vnode keyspaces. For tablet keyspaces, use ``nodetool cluster repair`` instead.
|
|
||||||
|
|
||||||
See :doc:`Data Distribution with Tablets </architecture/tablets/>`.
|
|
||||||
|
|
||||||
.. include:: nodetool-index.rst
|
.. include:: nodetool-index.rst
|
||||||
|
|||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user