Compare commits

..

1 Commits

Author SHA1 Message Date
Dani Tweig
42629175b5 Update urgent_issue_reminder.yml - run daily
The action will run daily, alerting about urgent issues not touched in the last 7 days.
2025-08-20 14:49:33 +03:00
1178 changed files with 17378 additions and 56859 deletions

View File

@@ -47,29 +47,13 @@ def create_pull_request(repo, new_branch_name, base_branch_name, pr, backport_pr
draft=is_draft
)
logging.info(f"Pull request created: {backport_pr.html_url}")
labels_to_add = []
priority_labels = {"P0", "P1"}
parent_pr_labels = [label.name for label in pr.labels]
for label in priority_labels:
if label in parent_pr_labels:
labels_to_add.append(label)
labels_to_add.append("force_on_cloud")
logging.info(f"Adding {label} and force_on_cloud labels from parent PR to backport PR")
break # Only apply the highest priority label
if is_collaborator:
backport_pr.add_to_assignees(pr.user)
if is_draft:
labels_to_add.append("conflicts")
backport_pr.add_to_labels("conflicts")
pr_comment = f"@{pr.user.login} - This PR was marked as draft because it has conflicts\n"
pr_comment += "Please resolve them and remove the 'conflicts' label. The PR will be made ready for review automatically."
pr_comment += "Please resolve them and mark this PR as ready for review"
backport_pr.create_issue_comment(pr_comment)
# Apply all labels at once if we have any
if labels_to_add:
backport_pr.add_to_labels(*labels_to_add)
logging.info(f"Added labels to backport PR: {labels_to_add}")
logging.info(f"Assigned PR to original author: {pr.user}")
return backport_pr
except GithubException as e:
@@ -142,31 +126,20 @@ def backport(repo, pr, version, commits, backport_base_branch, is_collaborator):
def with_github_keyword_prefix(repo, pr):
# GitHub issue pattern: #123, scylladb/scylladb#123, or full GitHub URLs
github_pattern = rf"(?:fix(?:|es|ed))\s*:?\s*(?:(?:(?:{repo.full_name})?#)|https://github\.com/{repo.full_name}/issues/)(\d+)"
# JIRA issue pattern: PKG-92 or https://scylladb.atlassian.net/browse/PKG-92
jira_pattern = r"(?:fix(?:|es|ed))\s*:?\s*(?:(?:https://scylladb\.atlassian\.net/browse/)?([A-Z]+-\d+))"
# Check PR body for GitHub issues
github_match = re.findall(github_pattern, pr.body, re.IGNORECASE)
# Check PR body for JIRA issues
jira_match = re.findall(jira_pattern, pr.body, re.IGNORECASE)
match = github_match or jira_match
if match:
pattern = rf"(?:fix(?:|es|ed))\s*:?\s*(?:(?:(?:{repo.full_name})?#)|https://github\.com/{repo.full_name}/issues/)(\d+)"
match = re.findall(pattern, pr.body, re.IGNORECASE)
if not match:
for commit in pr.get_commits():
match = re.findall(pattern, commit.commit.message, re.IGNORECASE)
if match:
print(f'{pr.number} has a valid close reference in commit message {commit.sha}')
break
if not match:
print(f'No valid close reference for {pr.number}')
return False
else:
return True
for commit in pr.get_commits():
github_match = re.findall(github_pattern, commit.commit.message, re.IGNORECASE)
jira_match = re.findall(jira_pattern, commit.commit.message, re.IGNORECASE)
if github_match or jira_match:
print(f'{pr.number} has a valid close reference in commit message {commit.sha}')
return True
print(f'No valid close reference for {pr.number}')
return False
def main():
args = parse_args()

View File

@@ -30,13 +30,8 @@ def copy_labels_from_linked_issues(repo, pr_number):
try:
issue = repo.get_issue(int(issue_number))
for label in issue.labels:
# Copy ALL labels from issues to PR when PR is opened
pr.add_to_labels(label.name)
print(f"Copied label '{label.name}' from issue #{issue_number} to PR #{pr_number}")
if label.name in ['P0', 'P1']:
pr.add_to_labels('force_on_cloud')
print(f"Added force_on_cloud label to PR #{pr_number} due to {label.name} label")
print(f"All labels from issue #{issue_number} copied to PR #{pr_number}")
print(f"Labels from issue #{issue_number} copied to PR #{pr_number}")
except Exception as e:
print(f"Error processing issue #{issue_number}: {e}")
@@ -79,22 +74,9 @@ def sync_labels(repo, number, label, action, is_issue=False):
target = repo.get_issue(int(pr_or_issue_number))
if action == 'labeled':
target.add_to_labels(label)
if label in ['P0', 'P1'] and is_issue:
# Only add force_on_cloud to PRs when P0/P1 is added to an issue
target.add_to_labels('force_on_cloud')
print(f"Added 'force_on_cloud' label to PR #{pr_or_issue_number} due to {label} label")
print(f"Label '{label}' successfully added.")
elif action == 'unlabeled':
target.remove_from_labels(label)
if label in ['P0', 'P1'] and is_issue:
# Check if any other P0/P1 labels remain before removing force_on_cloud
remaining_priority_labels = [l.name for l in target.labels if l.name in ['P0', 'P1']]
if not remaining_priority_labels:
try:
target.remove_from_labels('force_on_cloud')
print(f"Removed 'force_on_cloud' label from PR #{pr_or_issue_number} as no P0/P1 labels remain")
except Exception as e:
print(f"Warning: Could not remove force_on_cloud label: {e}")
print(f"Label '{label}' successfully removed.")
elif action == 'opened':
copy_labels_from_linked_issues(repo, number)

View File

@@ -54,13 +54,10 @@ jobs:
GITHUB_TOKEN: ${{ secrets.AUTO_BACKPORT_TOKEN }}
run: python .github/scripts/auto-backport.py --repo ${{ github.repository }} --base-branch ${{ github.ref }} --commits ${{ github.event.before }}..${{ github.sha }}
- name: Check if a valid backport label exists and no backport_error
env:
LABELS_JSON: ${{ toJson(github.event.pull_request.labels) }}
id: check_label
run: |
labels_json="$LABELS_JSON"
echo "Checking labels:"
echo "$labels_json" | jq -r '.[].name'
labels_json='${{ toJson(github.event.pull_request.labels) }}'
echo "Checking labels: $(echo "$labels_json" | jq -r '.[].name')"
# Check if a valid backport label exists
if echo "$labels_json" | jq -e 'any(.[] | .name; test("backport/[0-9]+\\.[0-9]+$"))' > /dev/null; then

View File

@@ -18,7 +18,7 @@ jobs:
// Regular expression pattern to check for "Fixes" prefix
// Adjusted to dynamically insert the repository full name
const pattern = `Fixes:? ((?:#|${repo.replace('/', '\\/')}#|https://github\\.com/${repo.replace('/', '\\/')}/issues/)(\\d+)|(?:https://scylladb\\.atlassian\\.net/browse/)?([A-Z]+-\\d+))`;
const pattern = `Fixes:? (?:#|${repo.replace('/', '\\/')}#|https://github\\.com/${repo.replace('/', '\\/')}/issues/)(\\d+)`;
const regex = new RegExp(pattern);
if (!regex.test(body)) {

View File

@@ -1,53 +0,0 @@
name: Backport with Jira Integration
on:
push:
branches:
- master
- next-*.*
- branch-*.*
pull_request_target:
types: [labeled, closed]
branches:
- master
- next
- next-*.*
- branch-*.*
jobs:
backport-on-push:
if: github.event_name == 'push'
uses: scylladb/github-automation/.github/workflows/backport-with-jira.yaml@main
with:
event_type: 'push'
base_branch: ${{ github.ref }}
commits: ${{ github.event.before }}..${{ github.sha }}
secrets:
gh_token: ${{ secrets.AUTO_BACKPORT_TOKEN }}
jira_auth: ${{ secrets.USER_AND_KEY_FOR_JIRA_AUTOMATION }}
backport-on-label:
if: github.event_name == 'pull_request_target' && github.event.action == 'labeled'
uses: scylladb/github-automation/.github/workflows/backport-with-jira.yaml@main
with:
event_type: 'labeled'
base_branch: refs/heads/${{ github.event.pull_request.base.ref }}
pull_request_number: ${{ github.event.pull_request.number }}
head_commit: ${{ github.event.pull_request.base.sha }}
label_name: ${{ github.event.label.name }}
pr_state: ${{ github.event.pull_request.state }}
secrets:
gh_token: ${{ secrets.AUTO_BACKPORT_TOKEN }}
jira_auth: ${{ secrets.USER_AND_KEY_FOR_JIRA_AUTOMATION }}
backport-chain:
if: github.event_name == 'pull_request_target' && github.event.action == 'closed' && github.event.pull_request.merged == true
uses: scylladb/github-automation/.github/workflows/backport-with-jira.yaml@main
with:
event_type: 'chain'
base_branch: refs/heads/${{ github.event.pull_request.base.ref }}
pull_request_number: ${{ github.event.pull_request.number }}
pr_body: ${{ github.event.pull_request.body }}
secrets:
gh_token: ${{ secrets.AUTO_BACKPORT_TOKEN }}
jira_auth: ${{ secrets.USER_AND_KEY_FOR_JIRA_AUTOMATION }}

View File

@@ -1,12 +1,11 @@
name: Call Jira Status In Progress
on:
pull_request_target:
pull_request:
types: [opened]
jobs:
call-jira-status-in-progress:
uses: scylladb/github-automation/.github/workflows/main_update_jira_status_to_in_progress.yml@main
secrets:
caller_jira_auth: ${{ secrets.USER_AND_KEY_FOR_JIRA_AUTOMATION }}
jira_auth: ${{ secrets.USER_AND_KEY_FOR_JIRA_AUTOMATION }}

View File

@@ -1,12 +1,11 @@
name: Call Jira Status In Review
on:
pull_request_target:
pull_request:
types: [ready_for_review, review_requested]
jobs:
call-jira-status-in-review:
uses: scylladb/github-automation/.github/workflows/main_update_jira_status_to_in_review.yml@main
secrets:
caller_jira_auth: ${{ secrets.USER_AND_KEY_FOR_JIRA_AUTOMATION }}
jira_auth: ${{ secrets.USER_AND_KEY_FOR_JIRA_AUTOMATION }}

View File

@@ -1,12 +1,13 @@
name: Call Jira Status Ready For Merge
on:
pull_request_target:
pull_request:
types: [labeled]
jobs:
call-jira-status-update:
uses: scylladb/github-automation/.github/workflows/main_update_jira_status_to_ready_for_merge.yml@main
with:
label_name: 'status/merge_candidate'
secrets:
caller_jira_auth: ${{ secrets.USER_AND_KEY_FOR_JIRA_AUTOMATION }}
jira_auth: ${{ secrets.USER_AND_KEY_FOR_JIRA_AUTOMATION }}

View File

@@ -37,13 +37,13 @@ jobs:
run: python .github/scripts/sync_labels.py --repo ${{ github.repository }} --number ${{ github.event.number }} --action ${{ github.event.action }}
- name: Pull request labeled or unlabeled event
if: github.event_name == 'pull_request_target' && (startsWith(github.event.label.name, 'backport/') || github.event.label.name == 'P0' || github.event.label.name == 'P1')
if: github.event_name == 'pull_request_target' && startsWith(github.event.label.name, 'backport/')
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: python .github/scripts/sync_labels.py --repo ${{ github.repository }} --number ${{ github.event.number }} --action ${{ github.event.action }} --label ${{ github.event.label.name }}
- name: Issue labeled or unlabeled event
if: github.event_name == 'issues' && (startsWith(github.event.label.name, 'backport/') || github.event.label.name == 'P0' || github.event.label.name == 'P1')
if: github.event_name == 'issues' && startsWith(github.event.label.name, 'backport/')
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: python .github/scripts/sync_labels.py --repo ${{ github.repository }} --number ${{ github.event.issue.number }} --action ${{ github.event.action }} --is_issue --label ${{ github.event.label.name }}

View File

@@ -1,65 +0,0 @@
name: Trigger Scylla CI Route
on:
issue_comment:
types: [created]
pull_request_target:
types:
- unlabeled
jobs:
trigger-jenkins:
if: (github.event_name == 'issue_comment' && github.event.comment.user.login != 'scylladbbot') || github.event.label.name == 'conflicts'
runs-on: ubuntu-latest
steps:
- name: Verify Org Membership
id: verify_author
env:
EVENT_NAME: ${{ github.event_name }}
PR_AUTHOR: ${{ github.event.pull_request.user.login }}
PR_ASSOCIATION: ${{ github.event.pull_request.author_association }}
COMMENT_AUTHOR: ${{ github.event.comment.user.login }}
COMMENT_ASSOCIATION: ${{ github.event.comment.author_association }}
shell: bash
run: |
if [[ "$EVENT_NAME" == "pull_request_target" ]]; then
AUTHOR="$PR_AUTHOR"
ASSOCIATION="$PR_ASSOCIATION"
else
AUTHOR="$COMMENT_AUTHOR"
ASSOCIATION="$COMMENT_ASSOCIATION"
fi
ORG="scylladb"
if gh api "/orgs/${ORG}/members/${AUTHOR}" --silent 2>/dev/null; then
echo "member=true" >> $GITHUB_OUTPUT
else
echo "::warning::${AUTHOR} is not a member of ${ORG}; skipping CI trigger."
echo "member=false" >> $GITHUB_OUTPUT
fi
- name: Validate Comment Trigger
if: github.event_name == 'issue_comment'
id: verify_comment
env:
COMMENT_BODY: ${{ github.event.comment.body }}
shell: bash
run: |
CLEAN_BODY=$(echo "$COMMENT_BODY" | grep -v '^[[:space:]]*>')
if echo "$CLEAN_BODY" | grep -qi '@scylladbbot' && echo "$CLEAN_BODY" | grep -qi 'trigger-ci'; then
echo "trigger=true" >> $GITHUB_OUTPUT
else
echo "trigger=false" >> $GITHUB_OUTPUT
fi
- name: Trigger Scylla-CI-Route Jenkins Job
if: steps.verify_author.outputs.member == 'true' && (github.event_name == 'pull_request_target' || steps.verify_comment.outputs.trigger == 'true')
env:
JENKINS_USER: ${{ secrets.JENKINS_USERNAME }}
JENKINS_API_TOKEN: ${{ secrets.JENKINS_TOKEN }}
JENKINS_URL: "https://jenkins.scylladb.com"
PR_NUMBER: "${{ github.event.issue.number || github.event.pull_request.number }}"
PR_REPO_NAME: "${{ github.event.repository.full_name }}"
run: |
curl -X POST "$JENKINS_URL/job/releng/job/Scylla-CI-Route/buildWithParameters?PR_NUMBER=$PR_NUMBER&PR_REPO_NAME=$PR_REPO_NAME" \
--user "$JENKINS_USER:$JENKINS_API_TOKEN" --fail

2
.gitmodules vendored
View File

@@ -1,6 +1,6 @@
[submodule "seastar"]
path = seastar
url = ../scylla-seastar
url = ../seastar
ignore = dirty
[submodule "swagger-ui"]
path = swagger-ui

View File

@@ -90,13 +90,13 @@ if(is_multi_config)
add_dependencies(Seastar::seastar_testing Seastar)
else()
set(Seastar_TESTING ON CACHE BOOL "" FORCE)
set(Seastar_API_LEVEL 8 CACHE STRING "" FORCE)
set(Seastar_API_LEVEL 7 CACHE STRING "" FORCE)
set(Seastar_DEPRECATED_OSTREAM_FORMATTERS OFF CACHE BOOL "" FORCE)
set(Seastar_APPS ON CACHE BOOL "" FORCE)
set(Seastar_EXCLUDE_APPS_FROM_ALL ON CACHE BOOL "" FORCE)
set(Seastar_EXCLUDE_TESTS_FROM_ALL ON CACHE BOOL "" FORCE)
set(Seastar_IO_URING ON CACHE BOOL "" FORCE)
set(Seastar_SCHEDULING_GROUPS_COUNT 20 CACHE STRING "" FORCE)
set(Seastar_SCHEDULING_GROUPS_COUNT 19 CACHE STRING "" FORCE)
set(Seastar_UNUSED_RESULT_ERROR ON CACHE BOOL "" FORCE)
add_subdirectory(seastar)
target_compile_definitions (seastar
@@ -170,22 +170,30 @@ target_sources(scylla-main
bytes.cc
client_data.cc
clocks-impl.cc
collection_mutation.cc
converting_mutation_partition_applier.cc
counters.cc
sstable_dict_autotrainer.cc
duration.cc
exceptions/exceptions.cc
frozen_schema.cc
generic_server.cc
debug.cc
init.cc
keys/keys.cc
multishard_mutation_query.cc
mutation_query.cc
node_ops/task_manager_module.cc
partition_slice_builder.cc
querier.cc
query/query.cc
query.cc
query_ranges_to_vnodes.cc
query/query-result-set.cc
query-result-set.cc
tombstone_gc_options.cc
tombstone_gc.cc
reader_concurrency_semaphore.cc
reader_concurrency_semaphore_group.cc
schema_mutations.cc
serializer.cc
service/direct_failure_detector/failure_detector.cc
sstables_loader.cc
@@ -263,6 +271,7 @@ add_subdirectory(mutation)
add_subdirectory(mutation_writer)
add_subdirectory(node_ops)
add_subdirectory(readers)
add_subdirectory(redis)
add_subdirectory(replica)
add_subdirectory(raft)
add_subdirectory(repair)
@@ -277,7 +286,6 @@ add_subdirectory(tracing)
add_subdirectory(transport)
add_subdirectory(types)
add_subdirectory(utils)
add_subdirectory(vector_search)
add_version_library(scylla_version
release.cc)
@@ -307,6 +315,7 @@ set(scylla_libs
mutation_writer
raft
readers
redis
repair
replica
schema
@@ -319,8 +328,7 @@ set(scylla_libs
tracing
transport
types
utils
vector_search)
utils)
target_link_libraries(scylla PRIVATE
${scylla_libs})

View File

@@ -78,7 +78,7 @@ fi
# Default scylla product/version tags
PRODUCT=scylla
VERSION=2025.4.8
VERSION=2025.4.0-dev
if test -f version
then

View File

@@ -17,7 +17,6 @@ target_sources(alternator
streams.cc
consumed_capacity.cc
ttl.cc
parsed_expression_cache.cc
${cql_grammar_srcs})
target_include_directories(alternator
PUBLIC

View File

@@ -136,7 +136,6 @@ future<> controller::start_server() {
[this, addr, alternator_port, alternator_https_port, creds = std::move(creds)] (server& server) mutable {
return server.init(addr, alternator_port, alternator_https_port, creds,
_config.alternator_enforce_authorization,
_config.alternator_warn_authorization,
&_memory_limiter.local().get_semaphore(),
_config.max_concurrent_requests_per_shard);
}).handle_exception([this, addr, alternator_port, alternator_https_port] (std::exception_ptr ep) {

View File

@@ -11,7 +11,7 @@
#include <seastar/core/sharded.hh>
#include <seastar/core/smp.hh>
#include "transport/protocol_server.hh"
#include "protocol_server.hh"
namespace service {
class storage_proxy;

File diff suppressed because it is too large Load Diff

View File

@@ -40,7 +40,6 @@ namespace cql3::selection {
namespace service {
class storage_proxy;
class cas_shard;
}
namespace cdc {
@@ -58,7 +57,10 @@ class schema_builder;
namespace alternator {
class rmw_operation;
class put_or_delete_item;
namespace parsed {
class path;
};
schema_ptr get_table(service::storage_proxy& proxy, const rjson::value& request);
bool is_alternator_keyspace(const sstring& ks_name);
@@ -130,9 +132,6 @@ using attrs_to_get_node = attribute_path_map_node<std::monostate>;
// optional means we should get all attributes, not specific ones.
using attrs_to_get = attribute_path_map<std::monostate>;
namespace parsed {
class expression_cache;
}
class executor : public peering_sharded_service<executor> {
gms::gossiper& _gossiper;
@@ -141,13 +140,10 @@ class executor : public peering_sharded_service<executor> {
db::system_distributed_keyspace& _sdks;
cdc::metadata& _cdc_metadata;
utils::updateable_value<bool> _enforce_authorization;
utils::updateable_value<bool> _warn_authorization;
// An smp_service_group to be used for limiting the concurrency when
// forwarding Alternator request between shards - if necessary for LWT.
smp_service_group _ssg;
std::unique_ptr<parsed::expression_cache> _parsed_expression_cache;
public:
using client_state = service::client_state;
// request_return_type is the return type of the executor methods, which
@@ -178,7 +174,6 @@ public:
cdc::metadata& cdc_metadata,
smp_service_group ssg,
utils::updateable_value<uint32_t> default_timeout_in_ms);
~executor();
future<request_return_type> create_table(client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value request);
future<request_return_type> describe_table(client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value request);
@@ -206,7 +201,11 @@ public:
future<request_return_type> describe_continuous_backups(client_state& client_state, service_permit permit, rjson::value request);
future<> start();
future<> stop();
future<> stop() {
// disconnect from the value source, but keep the value unchanged.
s_default_timeout_in_ms = utils::updateable_value<uint32_t>{s_default_timeout_in_ms()};
return make_ready_future<>();
}
static sstring table_name(const schema&);
static db::timeout_clock::time_point default_timeout();
@@ -219,20 +218,10 @@ public:
private:
friend class rmw_operation;
static void describe_key_schema(rjson::value& parent, const schema&, std::unordered_map<std::string,std::string> * = nullptr, const std::map<sstring, sstring> *tags = nullptr);
future<> do_batch_write(
std::vector<std::pair<schema_ptr, put_or_delete_item>> mutation_builders,
service::client_state& client_state,
tracing::trace_state_ptr trace_state,
service_permit permit);
future<> cas_write(schema_ptr schema, service::cas_shard cas_shard, const dht::decorated_key& dk,
const std::vector<put_or_delete_item>& mutation_builders, service::client_state& client_state,
tracing::trace_state_ptr trace_state, service_permit permit);
static void describe_key_schema(rjson::value& parent, const schema&, std::unordered_map<std::string,std::string> * = nullptr);
public:
static void describe_key_schema(rjson::value& parent, const schema& schema, std::unordered_map<std::string,std::string>&, const std::map<sstring, sstring> *tags = nullptr);
static void describe_key_schema(rjson::value& parent, const schema& schema, std::unordered_map<std::string,std::string>&);
static std::optional<rjson::value> describe_single_item(schema_ptr,
const query::partition_slice&,
@@ -241,15 +230,12 @@ public:
const std::optional<attrs_to_get>&,
uint64_t* = nullptr);
// Converts a multi-row selection result to JSON compatible with DynamoDB.
// For each row, this method calls item_callback, which takes the size of
// the item as the parameter.
static future<std::vector<rjson::value>> describe_multi_item(schema_ptr schema,
const query::partition_slice&& slice,
shared_ptr<cql3::selection::selection> selection,
foreign_ptr<lw_shared_ptr<query::result>> query_result,
shared_ptr<const std::optional<attrs_to_get>> attrs_to_get,
noncopyable_function<void(uint64_t)> item_callback = {});
uint64_t& rcu_half_units);
static void describe_single_item(const cql3::selection::selection&,
const std::vector<managed_bytes_opt>&,
@@ -277,7 +263,7 @@ bool is_big(const rjson::value& val, int big_size = 100'000);
// Check CQL's Role-Based Access Control (RBAC) permission (MODIFY,
// SELECT, DROP, etc.) on the given table. When permission is denied an
// appropriate user-readable api_error::access_denied is thrown.
future<> verify_permission(bool enforce_authorization, bool warn_authorization, const service::client_state&, const schema_ptr&, auth::permission, alternator::stats& stats);
future<> verify_permission(bool enforce_authorization, const service::client_state&, const schema_ptr&, auth::permission);
/**
* Make return type for serializing the object "streamed",

View File

@@ -91,18 +91,6 @@ options {
throw expressions_syntax_error(format("{} at char {}", err,
ex->get_charPositionInLine()));
}
// ANTLR3 tries to recover missing tokens - it tries to finish parsing
// and create valid objects, as if the missing token was there.
// But it has a bug and leaks these tokens.
// We override offending method and handle abandoned pointers.
std::vector<std::unique_ptr<TokenType>> _missing_tokens;
TokenType* getMissingSymbol(IntStreamType* istream, ExceptionBaseType* e,
ANTLR_UINT32 expectedTokenType, BitsetListType* follow) {
auto token = BaseType::getMissingSymbol(istream, e, expectedTokenType, follow);
_missing_tokens.emplace_back(token);
return token;
}
}
@lexer::context {
void displayRecognitionError(ANTLR_UINT8** token_names, ExceptionBaseType* ex) {
@@ -196,13 +184,7 @@ path_component: NAME | NAMEREF;
path returns [parsed::path p]:
root=path_component { $p.set_root($root.text); }
( '.' name=path_component { $p.add_dot($name.text); }
| '[' INTEGER ']' {
try {
$p.add_index(std::stoi($INTEGER.text));
} catch(std::out_of_range&) {
throw expressions_syntax_error("list index out of integer range");
}
}
| '[' INTEGER ']' { $p.add_index(std::stoi($INTEGER.text)); }
)*;
/* See comment above why the "depth" counter was needed here */
@@ -248,7 +230,7 @@ update_expression_clause returns [parsed::update_expression e]:
// Note the "EOF" token at the end of the update expression. We want to the
// parser to match the entire string given to it - not just its beginning!
update_expression returns [parsed::update_expression e]:
(update_expression_clause { e.append($update_expression_clause.e); })+ EOF;
(update_expression_clause { e.append($update_expression_clause.e); })* EOF;
projection_expression returns [std::vector<parsed::path> v]:
p=path { $v.push_back(std::move($p.p)); }
@@ -275,13 +257,6 @@ primitive_condition returns [parsed::primitive_condition c]:
(',' v=value[0] { $c.add_value(std::move($v.v)); })*
')'
)?
{
// Post-parse check to reject non-function single values
if ($c._op == parsed::primitive_condition::type::VALUE &&
!$c._values.front().is_func()) {
throw expressions_syntax_error("Single value must be a function");
}
}
;
// The following rules for parsing boolean expressions are verbose and

View File

@@ -18,8 +18,6 @@
#include "expressions_types.hh"
#include "utils/rjson.hh"
#include "utils/updateable_value.hh"
#include "stats.hh"
namespace alternator {
@@ -28,26 +26,6 @@ public:
using runtime_error::runtime_error;
};
namespace parsed {
class expression_cache_impl;
class expression_cache {
std::unique_ptr<expression_cache_impl> _impl;
public:
struct config {
utils::updateable_value<uint32_t> max_cache_entries;
};
expression_cache(config cfg, stats& stats);
~expression_cache();
// stop background tasks, if any
future<> stop();
update_expression parse_update_expression(std::string_view query);
std::vector<path> parse_projection_expression(std::string_view query);
condition_expression parse_condition_expression(std::string_view query, const char* caller);
};
} // namespace parsed
// Preferably use parsed::expression_cache instance instead of this free functions.
parsed::update_expression parse_update_expression(std::string_view query);
std::vector<parsed::path> parse_projection_expression(std::string_view query);
parsed::condition_expression parse_condition_expression(std::string_view query, const char* caller);

View File

@@ -209,7 +209,9 @@ public:
// function is supported).
// 2. Ternary operator - v1 BETWEEN v2 and v3 (means v1 >= v2 AND v1 <= v3).
// 3. N-ary operator - v1 IN ( v2, v3, ... )
// 4. A single function call (attribute_exists etc.).
// 4. A single function call (attribute_exists etc.). The parser actually
// accepts a more general "value" here but later stages reject a value
// which is not a function call (because DynamoDB does it too).
class primitive_condition {
public:
enum class type {

View File

@@ -13,7 +13,7 @@
#include "utils/rjson.hh"
#include "serialization.hh"
#include "schema/column_computation.hh"
#include "column_computation.hh"
#include "db/view/regular_column_transformation.hh"
namespace alternator {

View File

@@ -1,109 +0,0 @@
/*
* Copyright 2025-present ScyllaDB
*/
/*
* SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
*/
#include "expressions.hh"
#include "utils/log.hh"
#include "utils/lru_string_map.hh"
#include <variant>
static logging::logger logger_("parsed-expression-cache");
namespace alternator::parsed {
struct expression_cache_impl {
stats& _stats;
using cached_expressions_types = std::variant<
update_expression,
condition_expression,
std::vector<path>
>;
sized_lru_string_map<cached_expressions_types> _cached_entries;
utils::observable<uint32_t>::observer _max_cache_entries_observer;
expression_cache_impl(expression_cache::config cfg, stats& stats);
// to define the specialized return type of `get_or_create()`
template <typename Func, typename... Args>
using ParseResult = std::invoke_result_t<Func, std::string_view, Args...>;
// Caching layer for parsed expressions
// The expression type is determined by the type of the parsing function passed as a parameter,
// and the return type is exactly the same as the return type of this parsing function.
// StatsType is used only to update appropriate statistics - currently it is aligned with the expression type,
// but it could be extended in the future if needed, e.g. split per operation.
template <stats::expression_types StatsType, typename Func, typename... Args>
ParseResult<Func, Args...> get_or_create(std::string_view query, Func&& parse_func, Args&&... other_args) {
if (_cached_entries.disabled()) {
return parse_func(query, std::forward<Args>(other_args)...);
}
if (!_cached_entries.sanity_check()) {
_stats.expression_cache.requests[StatsType].misses++;
return parse_func(query, std::forward<Args>(other_args)...);
}
auto value = _cached_entries.find(query);
if (value) {
logger_.trace("Cache hit for query: {}", query);
_stats.expression_cache.requests[StatsType].hits++;
try {
return std::get<ParseResult<Func, Args...>>(value->get());
} catch (const std::bad_variant_access&) {
// User can reach this code, by sending the same query string as a different expression type.
// In practice valid queries are different enough to not collide.
// Entries in cache are only valid queries.
// This request will fail at parsing below.
// If, by any chance this is a valid query, it will be updated below with the new value.
logger_.trace("Cache hit for '{}', but type mismatch.", query);
_stats.expression_cache.requests[StatsType].hits--;
}
} else {
logger_.trace("Cache miss for query: {}", query);
}
ParseResult<Func, Args...> expr = parse_func(query, std::forward<Args>(other_args)...);
// Invalid query will throw here ^
_stats.expression_cache.requests[StatsType].misses++;
if (value) [[unlikely]] {
value->get() = cached_expressions_types{expr};
} else {
_cached_entries.insert(query, cached_expressions_types{expr});
}
return expr;
}
};
expression_cache_impl::expression_cache_impl(expression_cache::config cfg, stats& stats) :
_stats(stats), _cached_entries(logger_, _stats.expression_cache.evictions),
_max_cache_entries_observer(cfg.max_cache_entries.observe([this] (uint32_t max_value) {
_cached_entries.set_max_size(max_value);
})) {
_cached_entries.set_max_size(cfg.max_cache_entries());
}
expression_cache::expression_cache(expression_cache::config cfg, stats& stats) :
_impl(std::make_unique<expression_cache_impl>(std::move(cfg), stats)) {
}
expression_cache::~expression_cache() = default;
future<> expression_cache::stop() {
return _impl->_cached_entries.stop();
}
update_expression expression_cache::parse_update_expression(std::string_view query) {
return _impl->get_or_create<stats::expression_types::UPDATE_EXPRESSION>(query, alternator::parse_update_expression);
}
std::vector<path> expression_cache::parse_projection_expression(std::string_view query) {
return _impl->get_or_create<stats::expression_types::PROJECTION_EXPRESSION>(query, alternator::parse_projection_expression);
}
condition_expression expression_cache::parse_condition_expression(std::string_view query, const char* caller) {
return _impl->get_or_create<stats::expression_types::CONDITION_EXPRESSION>(query, alternator::parse_condition_expression, caller);
}
} // namespace alternator::parsed

View File

@@ -11,7 +11,7 @@
#include "utils/log.hh"
#include "serialization.hh"
#include "error.hh"
#include "types/concrete_types.hh"
#include "concrete_types.hh"
#include "cql3/type_json.hh"
#include "mutation/position_in_partition.hh"
@@ -282,23 +282,15 @@ std::string type_to_string(data_type type) {
return it->second;
}
std::optional<bytes> try_get_key_column_value(const rjson::value& item, const column_definition& column) {
bytes get_key_column_value(const rjson::value& item, const column_definition& column) {
std::string column_name = column.name_as_text();
const rjson::value* key_typed_value = rjson::find(item, column_name);
if (!key_typed_value) {
return std::nullopt;
throw api_error::validation(fmt::format("Key column {} not found", column_name));
}
return get_key_from_typed_value(*key_typed_value, column);
}
bytes get_key_column_value(const rjson::value& item, const column_definition& column) {
auto value = try_get_key_column_value(item, column);
if (!value) {
throw api_error::validation(fmt::format("Key column {} not found", column.name_as_text()));
}
return std::move(*value);
}
// Parses the JSON encoding for a key value, which is a map with a single
// entry whose key is the type and the value is the encoded value.
// If this type does not match the desired "type_str", an api_error::validation
@@ -388,38 +380,20 @@ clustering_key ck_from_json(const rjson::value& item, schema_ptr schema) {
return clustering_key::make_empty();
}
std::vector<bytes> raw_ck;
// Note: it's possible to get more than one clustering column here, as
// Alternator can be used to read scylla internal tables.
// FIXME: this is a loop, but we really allow only one clustering key column.
for (const column_definition& cdef : schema->clustering_key_columns()) {
auto raw_value = get_key_column_value(item, cdef);
bytes raw_value = get_key_column_value(item, cdef);
raw_ck.push_back(std::move(raw_value));
}
return clustering_key::from_exploded(raw_ck);
}
clustering_key_prefix ck_prefix_from_json(const rjson::value& item, schema_ptr schema) {
if (schema->clustering_key_size() == 0) {
return clustering_key_prefix::make_empty();
}
std::vector<bytes> raw_ck;
for (const column_definition& cdef : schema->clustering_key_columns()) {
auto raw_value = try_get_key_column_value(item, cdef);
if (!raw_value) {
break;
}
raw_ck.push_back(std::move(*raw_value));
}
return clustering_key_prefix::from_exploded(raw_ck);
}
position_in_partition pos_from_json(const rjson::value& item, schema_ptr schema) {
const bool is_alternator_ks = is_alternator_keyspace(schema->ks_name());
if (is_alternator_ks) {
return position_in_partition::for_key(ck_from_json(item, schema));
auto ck = ck_from_json(item, schema);
if (is_alternator_keyspace(schema->ks_name())) {
return position_in_partition::for_key(std::move(ck));
}
const auto region_item = rjson::find(item, scylla_paging_region);
const auto weight_item = rjson::find(item, scylla_paging_weight);
if (bool(region_item) != bool(weight_item)) {
@@ -439,9 +413,8 @@ position_in_partition pos_from_json(const rjson::value& item, schema_ptr schema)
} else {
throw std::runtime_error(fmt::format("Invalid value for weight: {}", weight_view));
}
return position_in_partition(region, weight, region == partition_region::clustered ? std::optional(ck_prefix_from_json(item, schema)) : std::nullopt);
return position_in_partition(region, weight, region == partition_region::clustered ? std::optional(std::move(ck)) : std::nullopt);
}
auto ck = ck_from_json(item, schema);
if (ck.is_empty()) {
return position_in_partition::for_partition_start();
}

View File

@@ -31,7 +31,6 @@
#include "utils/overloaded_functor.hh"
#include "utils/aws_sigv4.hh"
#include "client_data.hh"
#include "utils/updateable_value.hh"
static logging::logger slogger("alternator-server");
@@ -101,13 +100,6 @@ static void handle_CORS(const request& req, reply& rep, bool preflight) {
// the user directly. Other exceptions are unexpected, and reported as
// Internal Server Error.
class api_handler : public handler_base {
// Although the the DynamoDB API responses are JSON, additional
// conventions apply to these responses. For this reason, DynamoDB uses
// the content type "application/x-amz-json-1.0" instead of the standard
// "application/json". Some other AWS services use later versions instead
// of "1.0", but DynamoDB currently uses "1.0". Note that this content
// type applies to all replies, both success and error.
static constexpr const char* REPLY_CONTENT_TYPE = "application/x-amz-json-1.0";
public:
api_handler(const std::function<future<executor::request_return_type>(std::unique_ptr<request> req)>& _handle) : _f_handle(
[this, _handle](std::unique_ptr<request> req, std::unique_ptr<reply> rep) {
@@ -136,10 +128,13 @@ public:
// Note that despite the move, there is a copy here -
// as str is std::string and rep->_content is sstring.
rep->_content = std::move(str);
rep->set_content_type(REPLY_CONTENT_TYPE);
},
[&] (executor::body_writer&& body_writer) {
rep->write_body(REPLY_CONTENT_TYPE, std::move(body_writer));
// Unfortunately, write_body() forces us to choose
// from a fixed and irrelevant list of "mime-types"
// at this point. But we'll override it with the
// correct one (application/x-amz-json-1.0) below.
rep->write_body("json", std::move(body_writer));
},
[&] (const api_error& err) {
generate_error_reply(*rep, err);
@@ -156,6 +151,7 @@ public:
handle_CORS(*req, *rep, false);
return _f_handle(std::move(req), std::move(rep)).then(
[](std::unique_ptr<reply> rep) {
rep->set_mime_type("application/x-amz-json-1.0");
rep->done();
return make_ready_future<std::unique_ptr<reply>>(std::move(rep));
});
@@ -171,7 +167,6 @@ protected:
rjson::add(results, "message", err._msg);
rep._content = rjson::print(std::move(results));
rep._status = err._http_code;
rep.set_content_type(REPLY_CONTENT_TYPE);
slogger.trace("api_handler error case: {}", rep._content);
}
@@ -271,57 +266,24 @@ protected:
}
};
// This function increments the authentication_failures counter, and may also
// log a warn-level message and/or throw an exception, depending on what
// enforce_authorization and warn_authorization are set to.
// The username and client address are only used for logging purposes -
// they are not included in the error message returned to the client, since
// the client knows who it is.
// Note that if enforce_authorization is false, this function will return
// without throwing. So a caller that doesn't want to continue after an
// authentication_error must explicitly return after calling this function.
template<typename Exception>
static void authentication_error(alternator::stats& stats, bool enforce_authorization, bool warn_authorization, Exception&& e, std::string_view user, gms::inet_address client_address) {
stats.authentication_failures++;
if (enforce_authorization) {
if (warn_authorization) {
slogger.warn("alternator_warn_authorization=true: {} for user {}, client address {}", e.what(), user, client_address);
}
throw std::move(e);
} else {
if (warn_authorization) {
slogger.warn("If you set alternator_enforce_authorization=true the following will be enforced: {} for user {}, client address {}", e.what(), user, client_address);
}
}
}
future<std::string> server::verify_signature(const request& req, const chunked_content& content) {
if (!_enforce_authorization.get() && !_warn_authorization.get()) {
if (!_enforce_authorization) {
slogger.debug("Skipping authorization");
return make_ready_future<std::string>();
}
auto host_it = req._headers.find("Host");
if (host_it == req._headers.end()) {
authentication_error(_executor._stats, _enforce_authorization.get(), _warn_authorization.get(),
api_error::invalid_signature("Host header is mandatory for signature verification"),
"", req.get_client_address());
return make_ready_future<std::string>();
throw api_error::invalid_signature("Host header is mandatory for signature verification");
}
auto authorization_it = req._headers.find("Authorization");
if (authorization_it == req._headers.end()) {
authentication_error(_executor._stats, _enforce_authorization.get(), _warn_authorization.get(),
api_error::missing_authentication_token("Authorization header is mandatory for signature verification"),
"", req.get_client_address());
return make_ready_future<std::string>();
throw api_error::missing_authentication_token("Authorization header is mandatory for signature verification");
}
std::string host = host_it->second;
std::string_view authorization_header = authorization_it->second;
auto pos = authorization_header.find_first_of(' ');
if (pos == std::string_view::npos || authorization_header.substr(0, pos) != "AWS4-HMAC-SHA256") {
authentication_error(_executor._stats, _enforce_authorization.get(), _warn_authorization.get(),
api_error::invalid_signature(fmt::format("Authorization header must use AWS4-HMAC-SHA256 algorithm: {}", authorization_header)),
"", req.get_client_address());
return make_ready_future<std::string>();
throw api_error::invalid_signature(fmt::format("Authorization header must use AWS4-HMAC-SHA256 algorithm: {}", authorization_header));
}
authorization_header.remove_prefix(pos+1);
std::string credential;
@@ -356,9 +318,7 @@ future<std::string> server::verify_signature(const request& req, const chunked_c
std::vector<std::string_view> credential_split = split(credential, '/');
if (credential_split.size() != 5) {
authentication_error(_executor._stats, _enforce_authorization.get(), _warn_authorization.get(),
api_error::validation(fmt::format("Incorrect credential information format: {}", credential)), "", req.get_client_address());
return make_ready_future<std::string>();
throw api_error::validation(fmt::format("Incorrect credential information format: {}", credential));
}
std::string user(credential_split[0]);
std::string datestamp(credential_split[1]);
@@ -382,7 +342,7 @@ future<std::string> server::verify_signature(const request& req, const chunked_c
auto cache_getter = [&proxy = _proxy, &as = _auth_service] (std::string username) {
return get_key_from_roles(proxy, as, std::move(username));
};
return _key_cache.get_ptr(user, cache_getter).then_wrapped([this, &req, &content,
return _key_cache.get_ptr(user, cache_getter).then([this, &req, &content,
user = std::move(user),
host = std::move(host),
datestamp = std::move(datestamp),
@@ -390,32 +350,18 @@ future<std::string> server::verify_signature(const request& req, const chunked_c
signed_headers_map = std::move(signed_headers_map),
region = std::move(region),
service = std::move(service),
user_signature = std::move(user_signature)] (future<key_cache::value_ptr> key_ptr_fut) {
key_cache::value_ptr key_ptr(nullptr);
try {
key_ptr = key_ptr_fut.get();
} catch (const api_error& e) {
authentication_error(_executor._stats, _enforce_authorization.get(), _warn_authorization.get(),
e, user, req.get_client_address());
return std::string();
}
user_signature = std::move(user_signature)] (key_cache::value_ptr key_ptr) {
std::string signature;
try {
signature = utils::aws::get_signature(user, *key_ptr, std::string_view(host), "/", req._method,
datestamp, signed_headers_str, signed_headers_map, &content, region, service, "");
} catch (const std::exception& e) {
authentication_error(_executor._stats, _enforce_authorization.get(), _warn_authorization.get(),
api_error::invalid_signature(fmt::format("invalid signature: {}", e.what())),
user, req.get_client_address());
return std::string();
throw api_error::invalid_signature(e.what());
}
if (signature != std::string_view(user_signature)) {
_key_cache.remove(user);
authentication_error(_executor._stats, _enforce_authorization.get(), _warn_authorization.get(),
api_error::unrecognized_client("wrong signature"),
user, req.get_client_address());
return std::string();
throw api_error::unrecognized_client("The security token included in the request is invalid.");
}
return user;
});
@@ -647,11 +593,9 @@ server::server(executor& exec, service::storage_proxy& proxy, gms::gossiper& gos
}
future<> server::init(net::inet_address addr, std::optional<uint16_t> port, std::optional<uint16_t> https_port, std::optional<tls::credentials_builder> creds,
utils::updateable_value<bool> enforce_authorization, utils::updateable_value<bool> warn_authorization,
semaphore* memory_limiter, utils::updateable_value<uint32_t> max_concurrent_requests) {
utils::updateable_value<bool> enforce_authorization, semaphore* memory_limiter, utils::updateable_value<uint32_t> max_concurrent_requests) {
_memory_limiter = memory_limiter;
_enforce_authorization = std::move(enforce_authorization);
_warn_authorization = std::move(warn_authorization);
_max_concurrent_requests = std::move(max_concurrent_requests);
if (!port && !https_port) {
return make_exception_future<>(std::runtime_error("Either regular port or TLS port"

View File

@@ -43,7 +43,6 @@ class server : public peering_sharded_service<server> {
key_cache _key_cache;
utils::updateable_value<bool> _enforce_authorization;
utils::updateable_value<bool> _warn_authorization;
utils::small_vector<std::reference_wrapper<seastar::httpd::http_server>, 2> _enabled_servers;
named_gate _pending_requests;
// In some places we will need a CQL updateable_timeout_config object even
@@ -95,8 +94,7 @@ public:
server(executor& executor, service::storage_proxy& proxy, gms::gossiper& gossiper, auth::service& service, qos::service_level_controller& sl_controller);
future<> init(net::inet_address addr, std::optional<uint16_t> port, std::optional<uint16_t> https_port, std::optional<tls::credentials_builder> creds,
utils::updateable_value<bool> enforce_authorization, utils::updateable_value<bool> warn_authorization,
semaphore* memory_limiter, utils::updateable_value<uint32_t> max_concurrent_requests);
utils::updateable_value<bool> enforce_authorization, semaphore* memory_limiter, utils::updateable_value<uint32_t> max_concurrent_requests);
future<> stop();
// get_client_data() is called (on each shard separately) when the virtual
// table "system.clients" is read. It is expected to generate a list of

View File

@@ -155,37 +155,6 @@ static void register_metrics_with_optional_table(seastar::metrics::metric_groups
seastar::metrics::make_histogram("batch_item_count_histogram", seastar::metrics::description("Histogram of the number of items in a batch request"), labels,
[&stats]{ return estimated_histogram_to_metrics(stats.api_operations.batch_write_item_histogram);})(op("BatchWriteItem")).aggregate({seastar::metrics::shard_label}).set_skip_when_empty(),
});
seastar::metrics::label expression_label("expression");
metrics.add_group(group_name, {
seastar::metrics::make_total_operations("expression_cache_evictions", stats.expression_cache.evictions,
seastar::metrics::description("Counts number of entries evicted from expressions cache"), labels).aggregate(aggregate_labels).set_skip_when_empty(),
seastar::metrics::make_total_operations("expression_cache_hits", stats.expression_cache.requests[stats::expression_types::UPDATE_EXPRESSION].hits,
seastar::metrics::description("Counts number of hits of cached expressions"), labels)(expression_label("UpdateExpression")).aggregate(aggregate_labels).set_skip_when_empty(),
seastar::metrics::make_total_operations("expression_cache_misses", stats.expression_cache.requests[stats::expression_types::UPDATE_EXPRESSION].misses,
seastar::metrics::description("Counts number of misses of cached expressions"), labels)(expression_label("UpdateExpression")).aggregate(aggregate_labels).set_skip_when_empty(),
seastar::metrics::make_total_operations("expression_cache_hits", stats.expression_cache.requests[stats::expression_types::CONDITION_EXPRESSION].hits,
seastar::metrics::description("Counts number of hits of cached expressions"), labels)(expression_label("ConditionExpression")).aggregate(aggregate_labels).set_skip_when_empty(),
seastar::metrics::make_total_operations("expression_cache_misses", stats.expression_cache.requests[stats::expression_types::CONDITION_EXPRESSION].misses,
seastar::metrics::description("Counts number of misses of cached expressions"), labels)(expression_label("ConditionExpression")).aggregate(aggregate_labels).set_skip_when_empty(),
seastar::metrics::make_total_operations("expression_cache_hits", stats.expression_cache.requests[stats::expression_types::PROJECTION_EXPRESSION].hits,
seastar::metrics::description("Counts number of hits of cached expressions"), labels)(expression_label("ProjectionExpression")).aggregate(aggregate_labels).set_skip_when_empty(),
seastar::metrics::make_total_operations("expression_cache_misses", stats.expression_cache.requests[stats::expression_types::PROJECTION_EXPRESSION].misses,
seastar::metrics::description("Counts number of misses of cached expressions"), labels)(expression_label("ProjectionExpression")).aggregate(aggregate_labels).set_skip_when_empty()
});
// Only register the following metrics for the global metrics, not per-table
if (!has_table) {
metrics.add_group("alternator", {
seastar::metrics::make_counter("authentication_failures", stats.authentication_failures,
seastar::metrics::description("total number of authentication failures"), labels).aggregate({seastar::metrics::shard_label}).set_skip_when_empty(),
seastar::metrics::make_counter("authorization_failures", stats.authorization_failures,
seastar::metrics::description("total number of authorization failures"), labels).aggregate({seastar::metrics::shard_label}).set_skip_when_empty(),
});
}
}
void register_metrics(seastar::metrics::metric_groups& metrics, const stats& stats) {

View File

@@ -79,17 +79,6 @@ public:
utils::estimated_histogram batch_get_item_histogram{22}; // a histogram that covers the range 1 - 100
utils::estimated_histogram batch_write_item_histogram{22}; // a histogram that covers the range 1 - 100
} api_operations;
// Count of authentication and authorization failures, counted if either
// alternator_enforce_authorization or alternator_warn_authorization are
// set to true. If both are false, no authentication or authorization
// checks are performed, so failures are not recognized or counted.
// "authentication" failure means the request was not signed with a valid
// user and key combination. "authorization" failure means the request was
// authenticated to a valid user - but this user did not have permissions
// to perform the operation (considering RBAC settings and the user's
// superuser status).
uint64_t authentication_failures = 0;
uint64_t authorization_failures = 0;
// Miscellaneous event counters
uint64_t total_operations = 0;
uint64_t unsupported_operations = 0;
@@ -112,22 +101,6 @@ public:
uint64_t wcu_total[NUM_TYPES] = {0};
// CQL-derived stats
cql3::cql_stats cql_stats;
// Enumeration of expression types only for stats
// if needed it can be extended e.g. per operation
enum expression_types {
UPDATE_EXPRESSION,
CONDITION_EXPRESSION,
PROJECTION_EXPRESSION,
NUM_EXPRESSION_TYPES
};
struct {
struct {
uint64_t hits = 0;
uint64_t misses = 0;
} requests[NUM_EXPRESSION_TYPES];
uint64_t evictions = 0;
} expression_cache;
};
struct table_stats {

View File

@@ -32,7 +32,6 @@
#include "executor.hh"
#include "data_dictionary/data_dictionary.hh"
#include "utils/rjson.hh"
/**
* Base template type to implement rapidjson::internal::TypeHelper<...>:s
@@ -828,7 +827,7 @@ future<executor::request_return_type> executor::get_records(client_state& client
tracing::add_table_name(trace_state, schema->ks_name(), schema->cf_name());
co_await verify_permission(_enforce_authorization, _warn_authorization, client_state, schema, auth::permission::SELECT, _stats);
co_await verify_permission(_enforce_authorization, client_state, schema, auth::permission::SELECT);
db::consistency_level cl = db::consistency_level::LOCAL_QUORUM;
partition_key pk = iter.shard.id.to_partition_key(*schema);
@@ -872,12 +871,10 @@ future<executor::request_return_type> executor::get_records(client_state& client
std::transform(pks.begin(), pks.end(), std::back_inserter(columns), [](auto& c) { return &c; });
std::transform(cks.begin(), cks.end(), std::back_inserter(columns), [](auto& c) { return &c; });
auto regular_column_start_idx = columns.size();
auto regular_column_filter = std::views::filter([](const column_definition& cdef) { return cdef.name() == op_column_name || cdef.name() == eor_column_name || !cdc::is_cdc_metacolumn_name(cdef.name_as_text()); });
std::ranges::transform(schema->regular_columns() | regular_column_filter, std::back_inserter(columns), [](auto& c) { return &c; });
auto regular_columns = std::ranges::subrange(columns.begin() + regular_column_start_idx, columns.end())
| std::views::transform(&column_definition::id)
auto regular_columns = schema->regular_columns()
| std::views::filter([](const column_definition& cdef) { return cdef.name() == op_column_name || cdef.name() == eor_column_name || !cdc::is_cdc_metacolumn_name(cdef.name_as_text()); })
| std::views::transform([&] (const column_definition& cdef) { columns.emplace_back(&cdef); return cdef.id; })
| std::ranges::to<query::column_id_vector>()
;
@@ -928,7 +925,6 @@ future<executor::request_return_type> executor::get_records(client_state& client
std::optional<utils::UUID> timestamp;
auto dynamodb = rjson::empty_object();
auto record = rjson::empty_object();
const auto dc_name = _proxy.get_token_metadata_ptr()->get_topology().get_datacenter();
using op_utype = std::underlying_type_t<cdc::operation>;
@@ -938,10 +934,9 @@ future<executor::request_return_type> executor::get_records(client_state& client
dynamodb = rjson::empty_object();
}
if (!record.ObjectEmpty()) {
rjson::add(record, "awsRegion", rjson::from_string(dc_name));
// TODO: awsRegion?
rjson::add(record, "eventID", event_id(iter.shard.id, *timestamp));
rjson::add(record, "eventSource", "scylladb:alternator");
rjson::add(record, "eventVersion", "1.0");
rjson::push_back(records, std::move(record));
record = rjson::empty_object();
--limit;
@@ -960,7 +955,7 @@ future<executor::request_return_type> executor::get_records(client_state& client
rjson::add(dynamodb, "ApproximateCreationDateTime", utils::UUID_gen::unix_timestamp_in_sec(ts).count());
rjson::add(dynamodb, "SequenceNumber", sequence_number(ts));
rjson::add(dynamodb, "StreamViewType", type);
// TODO: SizeBytes
//TODO: SizeInBytes
}
/**

View File

@@ -27,7 +27,7 @@
#include "replica/database.hh"
#include "service/client_state.hh"
#include "service_permit.hh"
#include "mutation/timestamp.hh"
#include "timestamp.hh"
#include "service/storage_proxy.hh"
#include "service/pager/paging_state.hh"
#include "service/pager/query_pagers.hh"
@@ -56,14 +56,14 @@ static logging::logger tlogger("alternator_ttl");
namespace alternator {
// We write the expiration-time attribute enabled on a table in a
// We write the expiration-time attribute enabled on a table using a
// tag TTL_TAG_KEY.
// Currently, the *value* of this tag is simply the name of the attribute,
// and the expiration scanner interprets it as an Alternator attribute name -
// It can refer to a real column or if that doesn't exist, to a member of
// the ":attrs" map column. Although this is designed for Alternator, it may
// be good enough for CQL as well (there, the ":attrs" column won't exist).
extern const sstring TTL_TAG_KEY;
static const sstring TTL_TAG_KEY("system:ttl_attribute");
future<executor::request_return_type> executor::update_time_to_live(client_state& client_state, service_permit permit, rjson::value request) {
_stats.api_operations.update_time_to_live++;
@@ -94,7 +94,7 @@ future<executor::request_return_type> executor::update_time_to_live(client_state
}
sstring attribute_name(v->GetString(), v->GetStringLength());
co_await verify_permission(_enforce_authorization, _warn_authorization, client_state, schema, auth::permission::ALTER, _stats);
co_await verify_permission(_enforce_authorization, client_state, schema, auth::permission::ALTER);
co_await db::modify_tags(_mm, schema->ks_name(), schema->cf_name(), [&](std::map<sstring, sstring>& tags_map) {
if (enabled) {
if (tags_map.contains(TTL_TAG_KEY)) {
@@ -747,7 +747,7 @@ static future<bool> scan_table(
auto my_host_id = erm->get_topology().my_host_id();
const auto &tablet_map = erm->get_token_metadata().tablets().get_tablet_map(s->id());
for (std::optional tablet = tablet_map.first_tablet(); tablet; tablet = tablet_map.next_tablet(*tablet)) {
auto tablet_primary_replica = tablet_map.get_primary_replica(*tablet, erm->get_topology());
auto tablet_primary_replica = tablet_map.get_primary_replica(*tablet);
// check if this is the primary replica for the current tablet
if (tablet_primary_replica.host == my_host_id && tablet_primary_replica.shard == this_shard_id()) {
co_await scan_tablet(*tablet, proxy, abort_source, page_sem, expiration_stats, scan_ctx, tablet_map);

View File

@@ -898,14 +898,6 @@
"type":"string",
"paramType":"query",
"enum": ["all", "dc", "rack", "node"]
},
{
"name":"primary_replica_only",
"description":"Load the sstables and stream to the primary replica node within the scope, if one is specified. If not, stream to the global primary replica.",
"required":false,
"allowMultiple":false,
"type":"boolean",
"paramType":"query"
}
]
}
@@ -992,7 +984,7 @@
]
},
{
"path":"/storage_service/cleanup_all/",
"path":"/storage_service/cleanup_all",
"operations":[
{
"method":"POST",
@@ -1002,30 +994,6 @@
"produces":[
"application/json"
],
"parameters":[
{
"name":"global",
"description":"true if cleanup of entire cluster is requested",
"required":false,
"allowMultiple":false,
"type":"boolean",
"paramType":"query"
}
]
}
]
},
{
"path":"/storage_service/mark_node_as_clean",
"operations":[
{
"method":"POST",
"summary":"Mark the node as clean. After that the node will not be considered as needing cleanup during automatic cleanup which is triggered by some topology operations",
"type":"void",
"nickname":"reset_cleanup_needed",
"produces":[
"application/json"
],
"parameters":[]
}
]
@@ -2953,14 +2921,6 @@
"allowMultiple":false,
"type":"string",
"paramType":"query"
},
{
"name":"incremental_mode",
"description":"Set the incremental repair mode. Can be 'disabled', 'incremental', or 'full'. 'incremental': The incremental repair logic is enabled. Unrepaired sstables will be included for repair. Repaired sstables will be skipped. The incremental repair states will be updated after repair. 'full': The incremental repair logic is enabled. Both repaired and unrepaired sstables will be included for repair. The incremental repair states will be updated after repair. 'disabled': The incremental repair logic is disabled completely. The incremental repair states, e.g., repaired_at in sstables and sstables_repaired_at in the system.tablets table, will not be updated after repair. When the option is not provided, it defaults to 'disabled' mode.",
"required":false,
"allowMultiple":false,
"type":"string",
"paramType":"query"
}
]
}
@@ -3385,11 +3345,11 @@
"properties":{
"start_token":{
"type":"string",
"description":"The range start token (exclusive)"
"description":"The range start token"
},
"end_token":{
"type":"string",
"description":"The range end token (inclusive)"
"description":"The range start token"
},
"endpoints":{
"type":"array",
@@ -3462,7 +3422,7 @@
"version":{
"type":"string",
"enum":[
"ka", "la", "mc", "md", "me", "ms"
"ka", "la", "mc", "md", "me"
],
"description":"SSTable version"
},

View File

@@ -447,7 +447,7 @@
"description":"The number of units completed so far"
},
"children_ids":{
"type":"chunked_array",
"type":"array",
"items":{
"type":"task_identity"
},

View File

@@ -42,14 +42,6 @@
"allowMultiple":false,
"type":"boolean",
"paramType":"query"
},
{
"name":"consider_only_existing_data",
"description":"Set to \"true\" to flush all memtables and force tombstone garbage collection to check only the sstables being compacted (false by default). The memtable, commitlog and other uncompacted sstables will not be checked during tombstone garbage collection.",
"required":false,
"allowMultiple":false,
"type":"boolean",
"paramType":"query"
}
]
}

View File

@@ -137,6 +137,14 @@ future<> unset_load_meter(http_context& ctx) {
return ctx.http_server.set_routes([&ctx] (routes& r) { unset_load_meter(ctx, r); });
}
future<> set_format_selector(http_context& ctx, db::sstables_format_selector& sel) {
return ctx.http_server.set_routes([&ctx, &sel] (routes& r) { set_format_selector(ctx, r, sel); });
}
future<> unset_format_selector(http_context& ctx) {
return ctx.http_server.set_routes([&ctx] (routes& r) { unset_format_selector(ctx, r); });
}
future<> set_server_sstables_loader(http_context& ctx, sharded<sstables_loader>& sst_loader) {
return ctx.http_server.set_routes([&ctx, &sst_loader] (routes& r) { set_sstables_loader(ctx, r, sst_loader); });
}
@@ -216,22 +224,15 @@ future<> unset_server_gossip(http_context& ctx) {
});
}
future<> set_server_column_family(http_context& ctx, sharded<replica::database>& db, sharded<db::system_keyspace>& sys_ks) {
co_await register_api(ctx, "column_family",
"The column family API", [&db, &sys_ks] (http_context& ctx, routes& r) {
set_column_family(ctx, r, db, sys_ks);
});
co_await register_api(ctx, "cache_service",
"The cache service API", [&db] (http_context& ctx, routes& r) {
set_cache_service(ctx, db, r);
future<> set_server_column_family(http_context& ctx, sharded<db::system_keyspace>& sys_ks) {
return register_api(ctx, "column_family",
"The column family API", [&sys_ks] (http_context& ctx, routes& r) {
set_column_family(ctx, r, sys_ks);
});
}
future<> unset_server_column_family(http_context& ctx) {
return ctx.http_server.set_routes([&ctx] (routes& r) {
unset_column_family(ctx, r);
unset_cache_service(ctx, r);
});
return ctx.http_server.set_routes([&ctx] (routes& r) { unset_column_family(ctx, r); });
}
future<> set_server_messaging_service(http_context& ctx, sharded<netw::messaging_service>& ms) {
@@ -263,6 +264,15 @@ future<> unset_server_stream_manager(http_context& ctx) {
return ctx.http_server.set_routes([&ctx] (routes& r) { unset_stream_manager(ctx, r); });
}
future<> set_server_cache(http_context& ctx) {
return register_api(ctx, "cache_service",
"The cache service API", set_cache_service);
}
future<> unset_server_cache(http_context& ctx) {
return ctx.http_server.set_routes([&ctx] (routes& r) { unset_cache_service(ctx, r); });
}
future<> set_hinted_handoff(http_context& ctx, sharded<service::storage_proxy>& proxy, sharded<gms::gossiper>& g) {
return register_api(ctx, "hinted_handoff",
"The hinted handoff API", [&proxy, &g] (http_context& ctx, routes& r) {
@@ -274,7 +284,7 @@ future<> unset_hinted_handoff(http_context& ctx) {
return ctx.http_server.set_routes([&ctx] (routes& r) { unset_hinted_handoff(ctx, r); });
}
future<> set_server_compaction_manager(http_context& ctx, sharded<compaction::compaction_manager>& cm) {
future<> set_server_compaction_manager(http_context& ctx, sharded<compaction_manager>& cm) {
return register_api(ctx, "compaction_manager", "The Compaction manager API", [&cm] (http_context& ctx, routes& r) {
set_compaction_manager(ctx, r, cm);
});

View File

@@ -73,7 +73,7 @@ inline std::vector<sstring> split(const sstring& text, const char* separator) {
*
*/
template<class T, class F, class V>
future<json::json_return_type> sum_stats(sharded<T>& d, V F::*f) {
future<json::json_return_type> sum_stats(distributed<T>& d, V F::*f) {
return d.map_reduce0([f](const T& p) {return p.get_stats().*f;}, 0,
std::plus<V>()).then([](V val) {
return make_ready_future<json::json_return_type>(val);
@@ -106,7 +106,7 @@ httpd::utils_json::rate_moving_average_and_histogram timer_to_json(const utils::
}
template<class T, class F>
future<json::json_return_type> sum_histogram_stats(sharded<T>& d, utils::timed_rate_moving_average_and_histogram F::*f) {
future<json::json_return_type> sum_histogram_stats(distributed<T>& d, utils::timed_rate_moving_average_and_histogram F::*f) {
return d.map_reduce0([f](const T& p) {return (p.get_stats().*f).hist;}, utils::ihistogram(),
std::plus<utils::ihistogram>()).then([](const utils::ihistogram& val) {
@@ -115,7 +115,7 @@ future<json::json_return_type> sum_histogram_stats(sharded<T>& d, utils::timed_
}
template<class T, class F>
future<json::json_return_type> sum_timer_stats(sharded<T>& d, utils::timed_rate_moving_average_and_histogram F::*f) {
future<json::json_return_type> sum_timer_stats(distributed<T>& d, utils::timed_rate_moving_average_and_histogram F::*f) {
return d.map_reduce0([f](const T& p) {return (p.get_stats().*f).rate();}, utils::rate_moving_average_and_histogram(),
std::plus<utils::rate_moving_average_and_histogram>()).then([](const utils::rate_moving_average_and_histogram& val) {
@@ -124,7 +124,7 @@ future<json::json_return_type> sum_timer_stats(sharded<T>& d, utils::timed_rate
}
template<class T, class F>
future<json::json_return_type> sum_timer_stats(sharded<T>& d, utils::timed_rate_moving_average_summary_and_histogram F::*f) {
future<json::json_return_type> sum_timer_stats(distributed<T>& d, utils::timed_rate_moving_average_summary_and_histogram F::*f) {
return d.map_reduce0([f](const T& p) {return (p.get_stats().*f).rate();}, utils::rate_moving_average_and_histogram(),
std::plus<utils::rate_moving_average_and_histogram>()).then([](const utils::rate_moving_average_and_histogram& val) {
return make_ready_future<json::json_return_type>(timer_to_json(val));

View File

@@ -18,9 +18,7 @@
using request = http::request;
using reply = http::reply;
namespace compaction {
class compaction_manager;
}
namespace service {
@@ -85,9 +83,9 @@ struct http_context {
sstring api_dir;
sstring api_doc;
httpd::http_server_control http_server;
sharded<replica::database>& db;
distributed<replica::database>& db;
http_context(sharded<replica::database>& _db)
http_context(distributed<replica::database>& _db)
: db(_db)
{
}
@@ -118,7 +116,7 @@ future<> set_server_token_metadata(http_context& ctx, sharded<locator::shared_to
future<> unset_server_token_metadata(http_context& ctx);
future<> set_server_gossip(http_context& ctx, sharded<gms::gossiper>& g);
future<> unset_server_gossip(http_context& ctx);
future<> set_server_column_family(http_context& ctx, sharded<replica::database>& db, sharded<db::system_keyspace>& sys_ks);
future<> set_server_column_family(http_context& ctx, sharded<db::system_keyspace>& sys_ks);
future<> unset_server_column_family(http_context& ctx);
future<> set_server_messaging_service(http_context& ctx, sharded<netw::messaging_service>& ms);
future<> unset_server_messaging_service(http_context& ctx);
@@ -128,7 +126,9 @@ future<> set_server_stream_manager(http_context& ctx, sharded<streaming::stream_
future<> unset_server_stream_manager(http_context& ctx);
future<> set_hinted_handoff(http_context& ctx, sharded<service::storage_proxy>& p, sharded<gms::gossiper>& g);
future<> unset_hinted_handoff(http_context& ctx);
future<> set_server_compaction_manager(http_context& ctx, sharded<compaction::compaction_manager>& cm);
future<> set_server_cache(http_context& ctx);
future<> unset_server_cache(http_context& ctx);
future<> set_server_compaction_manager(http_context& ctx, sharded<compaction_manager>& cm);
future<> unset_server_compaction_manager(http_context& ctx);
future<> set_server_done(http_context& ctx);
future<> set_server_task_manager(http_context& ctx, sharded<tasks::task_manager>& tm, lw_shared_ptr<db::config> cfg, sharded<gms::gossiper>& gossiper);
@@ -141,6 +141,8 @@ future<> set_server_raft(http_context&, sharded<service::raft_group_registry>&);
future<> unset_server_raft(http_context&);
future<> set_load_meter(http_context& ctx, service::load_meter& lm);
future<> unset_load_meter(http_context& ctx);
future<> set_format_selector(http_context& ctx, db::sstables_format_selector& sel);
future<> unset_format_selector(http_context& ctx);
future<> set_server_cql_server_test(http_context& ctx, cql_transport::controller& ctl);
future<> unset_server_cql_server_test(http_context& ctx);
future<> set_server_service_levels(http_context& ctx, cql_transport::controller& ctl, sharded<cql3::query_processor>& qp);

View File

@@ -16,7 +16,7 @@ using namespace json;
using namespace seastar::httpd;
namespace cs = httpd::cache_service_json;
void set_cache_service(http_context& ctx, sharded<replica::database>& db, routes& r) {
void set_cache_service(http_context& ctx, routes& r) {
cs::get_row_cache_save_period_in_seconds.set(r, [](std::unique_ptr<http::request> req) {
// We never save the cache
// Origin uses 0 for never
@@ -204,53 +204,53 @@ void set_cache_service(http_context& ctx, sharded<replica::database>& db, routes
});
});
cs::get_row_hits.set(r, [&db] (std::unique_ptr<http::request> req) {
return map_reduce_cf(db, uint64_t(0), [](const replica::column_family& cf) {
cs::get_row_hits.set(r, [&ctx] (std::unique_ptr<http::request> req) {
return map_reduce_cf(ctx, uint64_t(0), [](const replica::column_family& cf) {
return cf.get_row_cache().stats().hits.count();
}, std::plus<uint64_t>());
});
cs::get_row_requests.set(r, [&db] (std::unique_ptr<http::request> req) {
return map_reduce_cf(db, uint64_t(0), [](const replica::column_family& cf) {
cs::get_row_requests.set(r, [&ctx] (std::unique_ptr<http::request> req) {
return map_reduce_cf(ctx, uint64_t(0), [](const replica::column_family& cf) {
return cf.get_row_cache().stats().hits.count() + cf.get_row_cache().stats().misses.count();
}, std::plus<uint64_t>());
});
cs::get_row_hit_rate.set(r, [&db] (std::unique_ptr<http::request> req) {
return map_reduce_cf(db, ratio_holder(), [](const replica::column_family& cf) {
cs::get_row_hit_rate.set(r, [&ctx] (std::unique_ptr<http::request> req) {
return map_reduce_cf(ctx, ratio_holder(), [](const replica::column_family& cf) {
return ratio_holder(cf.get_row_cache().stats().hits.count() + cf.get_row_cache().stats().misses.count(),
cf.get_row_cache().stats().hits.count());
}, std::plus<ratio_holder>());
});
cs::get_row_hits_moving_avrage.set(r, [&db] (std::unique_ptr<http::request> req) {
return map_reduce_cf_raw(db, utils::rate_moving_average(), [](const replica::column_family& cf) {
cs::get_row_hits_moving_avrage.set(r, [&ctx] (std::unique_ptr<http::request> req) {
return map_reduce_cf_raw(ctx, utils::rate_moving_average(), [](const replica::column_family& cf) {
return cf.get_row_cache().stats().hits.rate();
}, std::plus<utils::rate_moving_average>()).then([](const utils::rate_moving_average& m) {
return make_ready_future<json::json_return_type>(meter_to_json(m));
});
});
cs::get_row_requests_moving_avrage.set(r, [&db] (std::unique_ptr<http::request> req) {
return map_reduce_cf_raw(db, utils::rate_moving_average(), [](const replica::column_family& cf) {
cs::get_row_requests_moving_avrage.set(r, [&ctx] (std::unique_ptr<http::request> req) {
return map_reduce_cf_raw(ctx, utils::rate_moving_average(), [](const replica::column_family& cf) {
return cf.get_row_cache().stats().hits.rate() + cf.get_row_cache().stats().misses.rate();
}, std::plus<utils::rate_moving_average>()).then([](const utils::rate_moving_average& m) {
return make_ready_future<json::json_return_type>(meter_to_json(m));
});
});
cs::get_row_size.set(r, [&db] (std::unique_ptr<http::request> req) {
cs::get_row_size.set(r, [&ctx] (std::unique_ptr<http::request> req) {
// In origin row size is the weighted size.
// We currently do not support weights, so we use raw size in bytes instead
return db.map_reduce0([](replica::database& db) -> uint64_t {
return ctx.db.map_reduce0([](replica::database& db) -> uint64_t {
return db.row_cache_tracker().region().occupancy().used_space();
}, uint64_t(0), std::plus<uint64_t>()).then([](const int64_t& res) {
return make_ready_future<json::json_return_type>(res);
});
});
cs::get_row_entries.set(r, [&db] (std::unique_ptr<http::request> req) {
return db.map_reduce0([](replica::database& db) -> uint64_t {
cs::get_row_entries.set(r, [&ctx] (std::unique_ptr<http::request> req) {
return ctx.db.map_reduce0([](replica::database& db) -> uint64_t {
return db.row_cache_tracker().partitions();
}, uint64_t(0), std::plus<uint64_t>()).then([](const int64_t& res) {
return make_ready_future<json::json_return_type>(res);

View File

@@ -7,20 +7,15 @@
*/
#pragma once
#include <seastar/core/sharded.hh>
namespace seastar::httpd {
class routes;
}
namespace replica {
class database;
}
namespace api {
struct http_context;
void set_cache_service(http_context& ctx, seastar::sharded<replica::database>& db, seastar::httpd::routes& r);
void set_cache_service(http_context& ctx, seastar::httpd::routes& r);
void unset_cache_service(http_context& ctx, seastar::httpd::routes& r);
}

File diff suppressed because it is too large Load Diff

View File

@@ -19,18 +19,18 @@ class system_keyspace;
namespace api {
void set_column_family(http_context& ctx, httpd::routes& r, sharded<replica::database>& db, sharded<db::system_keyspace>& sys_ks);
void set_column_family(http_context& ctx, httpd::routes& r, sharded<db::system_keyspace>& sys_ks);
void unset_column_family(http_context& ctx, httpd::routes& r);
table_info parse_table_info(const sstring& name, const replica::database& db);
template<class Mapper, class I, class Reducer>
future<I> map_reduce_cf_raw(sharded<replica::database>& db, const sstring& name, I init,
future<I> map_reduce_cf_raw(http_context& ctx, const sstring& name, I init,
Mapper mapper, Reducer reducer) {
auto uuid = parse_table_info(name, db.local()).id;
auto uuid = parse_table_info(name, ctx.db.local()).id;
using mapper_type = std::function<future<std::unique_ptr<std::any>>(replica::database&)>;
using reducer_type = std::function<std::unique_ptr<std::any>(std::unique_ptr<std::any>, std::unique_ptr<std::any>)>;
return db.map_reduce0(mapper_type([mapper, uuid](replica::database& db) {
return ctx.db.map_reduce0(mapper_type([mapper, uuid](replica::database& db) {
return futurize_invoke([mapper, &db, uuid] {
return mapper(db.find_column_family(uuid));
}).then([] (auto result) {
@@ -45,22 +45,24 @@ future<I> map_reduce_cf_raw(sharded<replica::database>& db, const sstring& name,
template<class Mapper, class I, class Reducer>
future<json::json_return_type> map_reduce_cf(sharded<replica::database>& db, const sstring& name, I init,
future<json::json_return_type> map_reduce_cf(http_context& ctx, const sstring& name, I init,
Mapper mapper, Reducer reducer) {
return map_reduce_cf_raw(db, name, init, mapper, reducer).then([](const I& res) {
return map_reduce_cf_raw(ctx, name, init, mapper, reducer).then([](const I& res) {
return make_ready_future<json::json_return_type>(res);
});
}
template<class Mapper, class I, class Reducer, class Result>
future<json::json_return_type> map_reduce_cf(sharded<replica::database>& db, const sstring& name, I init,
future<json::json_return_type> map_reduce_cf(http_context& ctx, const sstring& name, I init,
Mapper mapper, Reducer reducer, Result result) {
return map_reduce_cf_raw(db, name, init, mapper, reducer).then([result](const I& res) mutable {
return map_reduce_cf_raw(ctx, name, init, mapper, reducer).then([result](const I& res) mutable {
result = res;
return make_ready_future<json::json_return_type>(result);
});
}
future<json::json_return_type> map_reduce_cf_time_histogram(http_context& ctx, const sstring& name, std::function<utils::time_estimated_histogram(const replica::column_family&)> f);
struct map_reduce_column_families_locally {
std::any init;
std::function<future<std::unique_ptr<std::any>>(replica::column_family&)> mapper;
@@ -76,7 +78,7 @@ struct map_reduce_column_families_locally {
};
template<class Mapper, class I, class Reducer>
future<I> map_reduce_cf_raw(sharded<replica::database>& db, I init,
future<I> map_reduce_cf_raw(http_context& ctx, I init,
Mapper mapper, Reducer reducer) {
using mapper_type = std::function<future<std::unique_ptr<std::any>>(replica::column_family&)>;
using reducer_type = std::function<std::unique_ptr<std::any>(std::unique_ptr<std::any>, std::unique_ptr<std::any>)>;
@@ -90,7 +92,7 @@ future<I> map_reduce_cf_raw(sharded<replica::database>& db, I init,
auto wrapped_reducer = reducer_type([reducer = std::move(reducer)] (std::unique_ptr<std::any> a, std::unique_ptr<std::any> b) mutable {
return std::make_unique<std::any>(I(reducer(std::any_cast<I>(std::move(*a)), std::any_cast<I>(std::move(*b)))));
});
return db.map_reduce0(map_reduce_column_families_locally{init,
return ctx.db.map_reduce0(map_reduce_column_families_locally{init,
std::move(wrapped_mapper), wrapped_reducer}, std::make_unique<std::any>(init), wrapped_reducer).then([] (std::unique_ptr<std::any> res) {
return std::any_cast<I>(std::move(*res));
});
@@ -98,13 +100,20 @@ future<I> map_reduce_cf_raw(sharded<replica::database>& db, I init,
template<class Mapper, class I, class Reducer>
future<json::json_return_type> map_reduce_cf(sharded<replica::database>& db, I init,
future<json::json_return_type> map_reduce_cf(http_context& ctx, I init,
Mapper mapper, Reducer reducer) {
return map_reduce_cf_raw(db, init, mapper, reducer).then([](const I& res) {
return map_reduce_cf_raw(ctx, init, mapper, reducer).then([](const I& res) {
return make_ready_future<json::json_return_type>(res);
});
}
future<json::json_return_type> get_cf_stats(http_context& ctx, const sstring& name,
int64_t replica::column_family_stats::*f);
future<json::json_return_type> get_cf_stats(http_context& ctx,
int64_t replica::column_family_stats::*f);
std::tuple<sstring, sstring> parse_fully_qualified_cf_name(sstring name);
}

View File

@@ -29,9 +29,9 @@ namespace ss = httpd::storage_service_json;
using namespace json;
using namespace seastar::httpd;
static future<json::json_return_type> get_cm_stats(sharded<compaction::compaction_manager>& cm,
int64_t compaction::compaction_manager::stats::*f) {
return cm.map_reduce0([f](compaction::compaction_manager& cm) {
static future<json::json_return_type> get_cm_stats(sharded<compaction_manager>& cm,
int64_t compaction_manager::stats::*f) {
return cm.map_reduce0([f](compaction_manager& cm) {
return cm.get_stats().*f;
}, int64_t(0), std::plus<int64_t>()).then([](const int64_t& res) {
return make_ready_future<json::json_return_type>(res);
@@ -47,9 +47,9 @@ static std::unordered_map<std::pair<sstring, sstring>, uint64_t, utils::tuple_ha
return std::move(a);
}
void set_compaction_manager(http_context& ctx, routes& r, sharded<compaction::compaction_manager>& cm) {
void set_compaction_manager(http_context& ctx, routes& r, sharded<compaction_manager>& cm) {
cm::get_compactions.set(r, [&cm] (std::unique_ptr<http::request> req) {
return cm.map_reduce0([](compaction::compaction_manager& cm) {
return cm.map_reduce0([](compaction_manager& cm) {
std::vector<cm::summary> summaries;
for (const auto& c : cm.get_compactions()) {
@@ -58,7 +58,7 @@ void set_compaction_manager(http_context& ctx, routes& r, sharded<compaction::co
s.ks = c.ks_name;
s.cf = c.cf_name;
s.unit = "keys";
s.task_type = compaction::compaction_name(c.type);
s.task_type = sstables::compaction_name(c.type);
s.completed = c.total_keys_written;
s.total = c.total_partitions;
summaries.push_back(std::move(s));
@@ -103,20 +103,22 @@ void set_compaction_manager(http_context& ctx, routes& r, sharded<compaction::co
cm::stop_compaction.set(r, [&cm] (std::unique_ptr<http::request> req) {
auto type = req->get_query_param("type");
return cm.invoke_on_all([type] (compaction::compaction_manager& cm) {
return cm.invoke_on_all([type] (compaction_manager& cm) {
return cm.stop_compaction(type);
}).then([] {
return make_ready_future<json::json_return_type>(json_void());
});
});
cm::stop_keyspace_compaction.set(r, [&ctx, &cm] (std::unique_ptr<http::request> req) -> future<json::json_return_type> {
cm::stop_keyspace_compaction.set(r, [&ctx] (std::unique_ptr<http::request> req) -> future<json::json_return_type> {
auto [ks_name, tables] = parse_table_infos(ctx, *req, "tables");
auto type = req->get_query_param("type");
co_await cm.invoke_on_all([&] (compaction::compaction_manager& cm) {
co_await ctx.db.invoke_on_all([&] (replica::database& db) {
auto& cm = db.get_compaction_manager();
return parallel_for_each(tables, [&] (const table_info& ti) {
return cm.stop_compaction(type, [id = ti.id] (const compaction::compaction_group_view* x) {
return x->schema()->id() == id;
auto& t = db.find_column_family(ti.id);
return t.parallel_foreach_compaction_group_view([&] (compaction::compaction_group_view& ts) {
return cm.stop_compaction(type, &ts);
});
});
});
@@ -124,13 +126,13 @@ void set_compaction_manager(http_context& ctx, routes& r, sharded<compaction::co
});
cm::get_pending_tasks.set(r, [&ctx] (std::unique_ptr<http::request> req) {
return map_reduce_cf(ctx.db, int64_t(0), [](replica::column_family& cf) {
return map_reduce_cf(ctx, int64_t(0), [](replica::column_family& cf) {
return cf.estimate_pending_compactions();
}, std::plus<int64_t>());
});
cm::get_completed_tasks.set(r, [&cm] (std::unique_ptr<http::request> req) {
return get_cm_stats(cm, &compaction::compaction_manager::stats::completed_tasks);
return get_cm_stats(cm, &compaction_manager::stats::completed_tasks);
});
cm::get_total_compactions_completed.set(r, [] (std::unique_ptr<http::request> req) {
@@ -148,7 +150,7 @@ void set_compaction_manager(http_context& ctx, routes& r, sharded<compaction::co
});
cm::get_compaction_history.set(r, [&cm] (std::unique_ptr<http::request> req) {
noncopyable_function<future<>(output_stream<char>&&)> f = [&cm] (output_stream<char>&& out) -> future<> {
std::function<future<>(output_stream<char>&&)> f = [&cm] (output_stream<char>&& out) -> future<> {
auto s = std::move(out);
bool first = true;
std::exception_ptr ex;

View File

@@ -13,13 +13,11 @@ namespace seastar::httpd {
class routes;
}
namespace compaction {
class compaction_manager;
}
namespace api {
struct http_context;
void set_compaction_manager(http_context& ctx, seastar::httpd::routes& r, seastar::sharded<compaction::compaction_manager>& cm);
void set_compaction_manager(http_context& ctx, seastar::httpd::routes& r, seastar::sharded<compaction_manager>& cm);
void unset_compaction_manager(http_context& ctx, seastar::httpd::routes& r);
}

View File

@@ -71,7 +71,7 @@ void set_raft(http_context&, httpd::routes& r, sharded<service::raft_group_regis
co_return json_void{};
});
r::get_leader_host.set(r, [&raft_gr] (std::unique_ptr<http::request> req) -> future<json_return_type> {
if (req->get_query_param("group_id").empty()) {
if (!req->query_parameters.contains("group_id")) {
const auto leader_id = co_await raft_gr.invoke_on(0, [] (service::raft_group_registry& raft_gr) {
auto& srv = raft_gr.group0();
return srv.current_leader();
@@ -100,7 +100,7 @@ void set_raft(http_context&, httpd::routes& r, sharded<service::raft_group_regis
r::read_barrier.set(r, [&raft_gr] (std::unique_ptr<http::request> req) -> future<json_return_type> {
auto timeout = get_request_timeout(*req);
if (req->get_query_param("group_id").empty()) {
if (!req->query_parameters.contains("group_id")) {
// Read barrier on group 0 by default
co_await raft_gr.invoke_on(0, [timeout] (service::raft_group_registry& raft_gr) -> future<> {
co_await raft_gr.group0_with_timeouts().read_barrier(nullptr, timeout);
@@ -131,7 +131,7 @@ void set_raft(http_context&, httpd::routes& r, sharded<service::raft_group_regis
const auto stepdown_timeout_ticks = dur / service::raft_tick_interval;
auto timeout_dur = raft::logical_clock::duration(stepdown_timeout_ticks);
if (req->get_query_param("group_id").empty()) {
if (!req->query_parameters.contains("group_id")) {
// Stepdown on group 0 by default
co_await raft_gr.invoke_on(0, [timeout_dur] (service::raft_group_registry& raft_gr) {
apilog.info("Triggering stepdown for group0");

View File

@@ -39,7 +39,7 @@ utils::time_estimated_histogram timed_rate_moving_average_summary_merge(utils::t
* @return A future that resolves to the result of the aggregation.
*/
template<typename V, typename Reducer, typename InnerMapper>
future<V> two_dimensional_map_reduce(sharded<service::storage_proxy>& d,
future<V> two_dimensional_map_reduce(distributed<service::storage_proxy>& d,
InnerMapper mapper, Reducer reducer, V initial_value) {
return d.map_reduce0( [mapper, reducer, initial_value] (const service::storage_proxy& sp) {
return map_reduce_scheduling_group_specific<service::storage_proxy_stats::stats>(
@@ -59,7 +59,7 @@ future<V> two_dimensional_map_reduce(sharded<service::storage_proxy>& d,
* @return A future that resolves to the result of the aggregation.
*/
template<typename V, typename Reducer, typename F, typename C>
future<V> two_dimensional_map_reduce(sharded<service::storage_proxy>& d,
future<V> two_dimensional_map_reduce(distributed<service::storage_proxy>& d,
C F::*f, Reducer reducer, V initial_value) {
return two_dimensional_map_reduce(d, [f] (F& stats) -> V {
return stats.*f;
@@ -75,20 +75,20 @@ future<V> two_dimensional_map_reduce(sharded<service::storage_proxy>& d,
*
*/
template<typename V, typename F>
future<json::json_return_type> sum_stats_storage_proxy(sharded<proxy>& d, V F::*f) {
future<json::json_return_type> sum_stats_storage_proxy(distributed<proxy>& d, V F::*f) {
return two_dimensional_map_reduce(d, [f] (F& stats) { return stats.*f; }, std::plus<V>(), V(0)).then([] (V val) {
return make_ready_future<json::json_return_type>(val);
});
}
static future<utils::rate_moving_average> sum_timed_rate(sharded<proxy>& d, utils::timed_rate_moving_average service::storage_proxy_stats::stats::*f) {
static future<utils::rate_moving_average> sum_timed_rate(distributed<proxy>& d, utils::timed_rate_moving_average service::storage_proxy_stats::stats::*f) {
return two_dimensional_map_reduce(d, [f] (service::storage_proxy_stats::stats& stats) {
return (stats.*f).rate();
}, std::plus<utils::rate_moving_average>(), utils::rate_moving_average());
}
static future<json::json_return_type> sum_timed_rate_as_obj(sharded<proxy>& d, utils::timed_rate_moving_average service::storage_proxy_stats::stats::*f) {
static future<json::json_return_type> sum_timed_rate_as_obj(distributed<proxy>& d, utils::timed_rate_moving_average service::storage_proxy_stats::stats::*f) {
return sum_timed_rate(d, f).then([](const utils::rate_moving_average& val) {
httpd::utils_json::rate_moving_average m;
m = val;
@@ -100,7 +100,7 @@ httpd::utils_json::rate_moving_average_and_histogram get_empty_moving_average()
return timer_to_json(utils::rate_moving_average_and_histogram());
}
static future<json::json_return_type> sum_timed_rate_as_long(sharded<proxy>& d, utils::timed_rate_moving_average service::storage_proxy_stats::stats::*f) {
static future<json::json_return_type> sum_timed_rate_as_long(distributed<proxy>& d, utils::timed_rate_moving_average service::storage_proxy_stats::stats::*f) {
return sum_timed_rate(d, f).then([](const utils::rate_moving_average& val) {
return make_ready_future<json::json_return_type>(val.count);
});
@@ -152,7 +152,7 @@ static future<json::json_return_type> total_latency(sharded<service::storage_pr
*/
template<typename F>
future<json::json_return_type>
sum_histogram_stats_storage_proxy(sharded<proxy>& d,
sum_histogram_stats_storage_proxy(distributed<proxy>& d,
utils::timed_rate_moving_average_summary_and_histogram F::*f) {
return two_dimensional_map_reduce(d, [f] (service::storage_proxy_stats::stats& stats) {
return (stats.*f).hist;
@@ -172,7 +172,7 @@ sum_histogram_stats_storage_proxy(sharded<proxy>& d,
*/
template<typename F>
future<json::json_return_type>
sum_timer_stats_storage_proxy(sharded<proxy>& d,
sum_timer_stats_storage_proxy(distributed<proxy>& d,
utils::timed_rate_moving_average_summary_and_histogram F::*f) {
return two_dimensional_map_reduce(d, [f] (service::storage_proxy_stats::stats& stats) {

View File

@@ -20,7 +20,6 @@
#include "utils/hash.hh"
#include <optional>
#include <sstream>
#include <stdexcept>
#include <time.h>
#include <algorithm>
#include <functional>
@@ -175,7 +174,9 @@ std::vector<table_info> parse_table_infos(const sstring& ks_name, const http_con
std::pair<sstring, std::vector<table_info>> parse_table_infos(const http_context& ctx, const http::request& req, sstring cf_param_name) {
auto keyspace = validate_keyspace(ctx, req);
auto tis = parse_table_infos(keyspace, ctx, req.get_query_param(cf_param_name));
const auto& query_params = req.query_parameters;
auto it = query_params.find(cf_param_name);
auto tis = parse_table_infos(keyspace, ctx, it != query_params.end() ? it->second : "");
return std::make_pair(std::move(keyspace), std::move(tis));
}
@@ -197,7 +198,7 @@ static ss::token_range token_range_endpoints_to_json(const dht::token_range_endp
return r;
}
seastar::future<json::json_return_type> run_toppartitions_query(db::toppartitions_query& q, bool legacy_request) {
seastar::future<json::json_return_type> run_toppartitions_query(db::toppartitions_query& q, http_context &ctx, bool legacy_request) {
return q.scatter().then([&q, legacy_request] {
return sleep(q.duration()).then([&q, legacy_request] {
return q.gather(q.capacity()).then([&q, legacy_request] (auto topk_results) {
@@ -227,36 +228,37 @@ seastar::future<json::json_return_type> run_toppartitions_query(db::toppartition
});
}
scrub_info parse_scrub_options(const http_context& ctx, std::unique_ptr<http::request> req) {
future<scrub_info> parse_scrub_options(const http_context& ctx, sharded<db::snapshot_ctl>& snap_ctl, std::unique_ptr<http::request> req) {
scrub_info info;
auto [ keyspace, table_infos ] = parse_table_infos(ctx, *req, "cf");
info.keyspace = std::move(keyspace);
info.column_families = table_infos | std::views::transform(&table_info::name) | std::ranges::to<std::vector>();
auto scrub_mode_str = req->get_query_param("scrub_mode");
auto scrub_mode = compaction::compaction_type_options::scrub::mode::abort;
auto scrub_mode = sstables::compaction_type_options::scrub::mode::abort;
if (scrub_mode_str.empty()) {
const auto skip_corrupted = validate_bool_x(req->get_query_param("skip_corrupted"), false);
if (skip_corrupted) {
scrub_mode = compaction::compaction_type_options::scrub::mode::skip;
scrub_mode = sstables::compaction_type_options::scrub::mode::skip;
}
} else {
if (scrub_mode_str == "ABORT") {
scrub_mode = compaction::compaction_type_options::scrub::mode::abort;
scrub_mode = sstables::compaction_type_options::scrub::mode::abort;
} else if (scrub_mode_str == "SKIP") {
scrub_mode = compaction::compaction_type_options::scrub::mode::skip;
scrub_mode = sstables::compaction_type_options::scrub::mode::skip;
} else if (scrub_mode_str == "SEGREGATE") {
scrub_mode = compaction::compaction_type_options::scrub::mode::segregate;
scrub_mode = sstables::compaction_type_options::scrub::mode::segregate;
} else if (scrub_mode_str == "VALIDATE") {
scrub_mode = compaction::compaction_type_options::scrub::mode::validate;
scrub_mode = sstables::compaction_type_options::scrub::mode::validate;
} else {
throw httpd::bad_param_exception(fmt::format("Unknown argument for 'scrub_mode' parameter: {}", scrub_mode_str));
}
}
if (!req_param<bool>(*req, "disable_snapshot", false) && !info.column_families.empty()) {
info.snapshot_tag = format("pre-scrub-{:d}", db_clock::now().time_since_epoch().count());
auto tag = format("pre-scrub-{:d}", db_clock::now().time_since_epoch().count());
co_await snap_ctl.local().take_column_family_snapshot(info.keyspace, info.column_families, tag, db::snapshot_ctl::skip_flush::no);
}
info.opts = {
@@ -264,16 +266,16 @@ scrub_info parse_scrub_options(const http_context& ctx, std::unique_ptr<http::re
};
const sstring quarantine_mode_str = req_param<sstring>(*req, "quarantine_mode", "INCLUDE");
if (quarantine_mode_str == "INCLUDE") {
info.opts.quarantine_operation_mode = compaction::compaction_type_options::scrub::quarantine_mode::include;
info.opts.quarantine_operation_mode = sstables::compaction_type_options::scrub::quarantine_mode::include;
} else if (quarantine_mode_str == "EXCLUDE") {
info.opts.quarantine_operation_mode = compaction::compaction_type_options::scrub::quarantine_mode::exclude;
info.opts.quarantine_operation_mode = sstables::compaction_type_options::scrub::quarantine_mode::exclude;
} else if (quarantine_mode_str == "ONLY") {
info.opts.quarantine_operation_mode = compaction::compaction_type_options::scrub::quarantine_mode::only;
info.opts.quarantine_operation_mode = sstables::compaction_type_options::scrub::quarantine_mode::only;
} else {
throw httpd::bad_param_exception(fmt::format("Unknown argument for 'quarantine_mode' parameter: {}", quarantine_mode_str));
}
return info;
co_return info;
}
void set_transport_controller(http_context& ctx, routes& r, cql_transport::controller& ctl) {
@@ -330,7 +332,7 @@ void set_repair(http_context& ctx, routes& r, sharded<repair_service>& repair, s
// Nodetool still sends those unsupported options. Ignore them to avoid failing nodetool repair.
static std::unordered_set<sstring> legacy_options_to_ignore = {"pullRepair", "ignoreUnreplicatedKeyspaces"};
for (auto& x : req->get_query_params()) {
for (auto& x : req->query_parameters) {
if (legacy_options_to_ignore.contains(x.first)) {
continue;
}
@@ -497,7 +499,6 @@ void set_sstables_loader(http_context& ctx, routes& r, sharded<sstables_loader>&
auto bucket = req->get_query_param("bucket");
auto prefix = req->get_query_param("prefix");
auto scope = parse_stream_scope(req->get_query_param("scope"));
auto primary_replica_only = validate_bool_x(req->get_query_param("primary_replica_only"), false);
// TODO: the http_server backing the API does not use content streaming
// should use it for better performance
@@ -508,7 +509,7 @@ void set_sstables_loader(http_context& ctx, routes& r, sharded<sstables_loader>&
auto sstables = parsed.GetArray() |
std::views::transform([] (const auto& s) { return sstring(rjson::to_string_view(s)); }) |
std::ranges::to<std::vector>();
auto task_id = co_await sst_loader.local().download_new_sstables(keyspace, table, prefix, std::move(sstables), endpoint, bucket, scope, primary_replica_only);
auto task_id = co_await sst_loader.local().download_new_sstables(keyspace, table, prefix, std::move(sstables), endpoint, bucket, scope);
co_return json::json_return_type(fmt::to_string(task_id));
});
@@ -554,8 +555,10 @@ rest_get_token_endpoint(http_context& ctx, sharded<service::storage_service>& ss
token_endpoints = ss.local().get_token_to_endpoint_map();
} else if (!keyspace_name.empty() && !table_name.empty()) {
auto& db = ctx.db.local();
auto tid = validate_table(db, keyspace_name, table_name);
token_endpoints = co_await ss.local().get_tablet_to_endpoint_map(tid);
if (!db.has_schema(keyspace_name, table_name)) {
throw bad_param_exception(fmt::format("Failed to find table {}.{}", keyspace_name, table_name));
}
token_endpoints = co_await ss.local().get_tablet_to_endpoint_map(db.find_schema(keyspace_name, table_name)->id());
} else {
throw bad_param_exception("Either provide both keyspace and table (for tablet table) or neither (for vnodes)");
}
@@ -574,8 +577,9 @@ rest_toppartitions_generic(http_context& ctx, std::unique_ptr<http::request> req
bool filters_provided = false;
std::unordered_set<std::tuple<sstring, sstring>, utils::tuple_hash> table_filters {};
if (auto filters = req->get_query_param("table_filters"); !filters.empty()) {
if (req->query_parameters.contains("table_filters")) {
filters_provided = true;
auto filters = req->get_query_param("table_filters");
std::stringstream ss { filters };
std::string filter;
while (!filters.empty() && ss.good()) {
@@ -585,8 +589,9 @@ rest_toppartitions_generic(http_context& ctx, std::unique_ptr<http::request> req
}
std::unordered_set<sstring> keyspace_filters {};
if (auto filters = req->get_query_param("keyspace_filters"); !filters.empty()) {
if (req->query_parameters.contains("keyspace_filters")) {
filters_provided = true;
auto filters = req->get_query_param("keyspace_filters");
std::stringstream ss { filters };
std::string filter;
while (!filters.empty() && ss.good()) {
@@ -613,8 +618,8 @@ rest_toppartitions_generic(http_context& ctx, std::unique_ptr<http::request> req
apilog.info("toppartitions query: #table_filters={} #keyspace_filters={} duration={} list_size={} capacity={}",
!table_filters.empty() ? std::to_string(table_filters.size()) : "all", !keyspace_filters.empty() ? std::to_string(keyspace_filters.size()) : "all", duration.value, list_size.value, capacity.value);
return seastar::do_with(db::toppartitions_query(ctx.db, std::move(table_filters), std::move(keyspace_filters), duration.value, list_size, capacity), [] (db::toppartitions_query& q) {
return run_toppartitions_query(q);
return seastar::do_with(db::toppartitions_query(ctx.db, std::move(table_filters), std::move(keyspace_filters), duration.value, list_size, capacity), [&ctx] (db::toppartitions_query& q) {
return run_toppartitions_query(q, ctx);
});
}
@@ -641,16 +646,21 @@ future<json::json_return_type>
rest_get_range_to_endpoint_map(http_context& ctx, sharded<service::storage_service>& ss, std::unique_ptr<http::request> req) {
auto keyspace = validate_keyspace(ctx, req);
auto table = req->get_query_param("cf");
std::optional<table_id> table_id;
if (table.empty()) {
ensure_tablets_disabled(ctx, keyspace, "storage_service/range_to_endpoint_map");
} else {
table_id = validate_table(ctx.db.local(), keyspace, table);
}
auto erm = std::invoke([&]() -> locator::effective_replication_map_ptr {
auto& ks = ctx.db.local().find_keyspace(keyspace);
if (table.empty()) {
ensure_tablets_disabled(ctx, keyspace, "storage_service/range_to_endpoint_map");
return ks.get_static_effective_replication_map();
} else {
auto table_id = validate_table(ctx.db.local(), keyspace, table);
auto& cf = ctx.db.local().find_column_family(table_id);
return cf.get_effective_replication_map();
}
});
std::vector<ss::maplist_mapper> res;
co_return stream_range_as_array(co_await ss.local().get_range_to_address_map(keyspace, table_id),
co_return stream_range_as_array(co_await ss.local().get_range_to_address_map(erm),
[](const std::pair<dht::token_range, inet_address_vector_replica_set>& entry){
ss::maplist_mapper m;
if (entry.first.start()) {
@@ -695,6 +705,12 @@ rest_describe_ring(http_context& ctx, sharded<service::storage_service>& ss, std
return describe_ring_as_json(ss, validate_keyspace(ctx, req));
}
static
future<json::json_return_type>
rest_get_load(http_context& ctx, std::unique_ptr<http::request> req) {
return get_cf_stats(ctx, &replica::column_family_stats::live_disk_space_used);
}
static
future<json::json_return_type>
rest_get_current_generation_number(sharded<service::storage_service>& ss, std::unique_ptr<http::request> req) {
@@ -724,15 +740,71 @@ rest_cdc_streams_check_and_repair(sharded<service::storage_service>& ss, std::un
static
future<json::json_return_type>
rest_cleanup_all(http_context& ctx, sharded<service::storage_service>& ss, std::unique_ptr<http::request> req) {
bool global = true;
if (auto global_param = req->get_query_param("global"); !global_param.empty()) {
global = validate_bool(global_param);
rest_force_compaction(http_context& ctx, std::unique_ptr<http::request> req) {
auto& db = ctx.db;
auto flush = validate_bool_x(req->get_query_param("flush_memtables"), true);
auto consider_only_existing_data = validate_bool_x(req->get_query_param("consider_only_existing_data"), false);
apilog.info("force_compaction: flush={} consider_only_existing_data={}", flush, consider_only_existing_data);
auto& compaction_module = db.local().get_compaction_manager().get_task_manager_module();
std::optional<flush_mode> fmopt;
if (!flush && !consider_only_existing_data) {
fmopt = flush_mode::skip;
}
auto task = co_await compaction_module.make_and_start_task<global_major_compaction_task_impl>({}, db, fmopt, consider_only_existing_data);
co_await task->done();
co_return json_void();
}
static
future<json::json_return_type>
rest_force_keyspace_compaction(http_context& ctx, std::unique_ptr<http::request> req) {
auto& db = ctx.db;
auto [ keyspace, table_infos ] = parse_table_infos(ctx, *req, "cf");
auto flush = validate_bool_x(req->get_query_param("flush_memtables"), true);
auto consider_only_existing_data = validate_bool_x(req->get_query_param("consider_only_existing_data"), false);
apilog.info("force_keyspace_compaction: keyspace={} tables={}, flush={} consider_only_existing_data={}", keyspace, table_infos, flush, consider_only_existing_data);
auto& compaction_module = db.local().get_compaction_manager().get_task_manager_module();
std::optional<flush_mode> fmopt;
if (!flush && !consider_only_existing_data) {
fmopt = flush_mode::skip;
}
auto task = co_await compaction_module.make_and_start_task<major_keyspace_compaction_task_impl>({}, std::move(keyspace), tasks::task_id::create_null_id(), db, table_infos, fmopt, consider_only_existing_data);
co_await task->done();
co_return json_void();
}
static
future<json::json_return_type>
rest_force_keyspace_cleanup(http_context& ctx, sharded<service::storage_service>& ss, std::unique_ptr<http::request> req) {
auto& db = ctx.db;
auto [keyspace, table_infos] = parse_table_infos(ctx, *req);
const auto& rs = db.local().find_keyspace(keyspace).get_replication_strategy();
if (rs.is_local() || !rs.is_vnode_based()) {
auto reason = rs.is_local() ? "require" : "support";
apilog.info("Keyspace {} does not {} cleanup", keyspace, reason);
co_return json::json_return_type(0);
}
apilog.info("force_keyspace_cleanup: keyspace={} tables={}", keyspace, table_infos);
if (!co_await ss.local().is_cleanup_allowed(keyspace)) {
auto msg = "Can not perform cleanup operation when topology changes";
apilog.warn("force_keyspace_cleanup: keyspace={} tables={}: {}", keyspace, table_infos, msg);
co_await coroutine::return_exception(std::runtime_error(msg));
}
apilog.info("cleanup_all global={}", global);
auto& compaction_module = db.local().get_compaction_manager().get_task_manager_module();
auto task = co_await compaction_module.make_and_start_task<cleanup_keyspace_compaction_task_impl>(
{}, std::move(keyspace), db, table_infos, flush_mode::all_tables, tasks::is_user_task::yes);
co_await task->done();
co_return json::json_return_type(0);
}
auto done = !global ? false : co_await ss.invoke_on(0, [] (service::storage_service& ss) -> future<bool> {
static
future<json::json_return_type>
rest_cleanup_all(http_context& ctx, sharded<service::storage_service>& ss, std::unique_ptr<http::request> req) {
apilog.info("cleanup_all");
auto done = co_await ss.invoke_on(0, [] (service::storage_service& ss) -> future<bool> {
if (!ss.is_topology_coordinator_enabled()) {
co_return false;
}
@@ -742,33 +814,39 @@ rest_cleanup_all(http_context& ctx, sharded<service::storage_service>& ss, std::
if (done) {
co_return json::json_return_type(0);
}
// fall back to the local cleanup if topology coordinator is not enabled or local cleanup is requested
// fall back to the local global cleanup if topology coordinator is not enabled
auto& db = ctx.db;
auto& compaction_module = db.local().get_compaction_manager().get_task_manager_module();
auto task = co_await compaction_module.make_and_start_task<compaction::global_cleanup_compaction_task_impl>({}, db);
auto task = co_await compaction_module.make_and_start_task<global_cleanup_compaction_task_impl>({}, db);
co_await task->done();
// Mark this node as clean
co_await ss.invoke_on(0, [] (service::storage_service& ss) -> future<> {
if (ss.is_topology_coordinator_enabled()) {
co_await ss.reset_cleanup_needed();
}
});
co_return json::json_return_type(0);
}
static
future<json::json_return_type>
rest_reset_cleanup_needed(http_context& ctx, sharded<service::storage_service>& ss, std::unique_ptr<http::request> req) {
apilog.info("reset_cleanup_needed");
co_await ss.invoke_on(0, [] (service::storage_service& ss) {
if (!ss.is_topology_coordinator_enabled()) {
throw std::runtime_error("mark_node_as_clean is only supported when topology over raft is enabled");
}
return ss.reset_cleanup_needed();
});
co_return json_void();
rest_perform_keyspace_offstrategy_compaction(http_context& ctx, std::unique_ptr<http::request> req) {
auto [keyspace, table_infos] = parse_table_infos(ctx, *req);
apilog.info("perform_keyspace_offstrategy_compaction: keyspace={} tables={}", keyspace, table_infos);
bool res = false;
auto& compaction_module = ctx.db.local().get_compaction_manager().get_task_manager_module();
auto task = co_await compaction_module.make_and_start_task<offstrategy_keyspace_compaction_task_impl>({}, std::move(keyspace), ctx.db, table_infos, &res);
co_await task->done();
co_return json::json_return_type(res);
}
static
future<json::json_return_type>
rest_upgrade_sstables(http_context& ctx, std::unique_ptr<http::request> req) {
auto& db = ctx.db;
auto [keyspace, table_infos] = parse_table_infos(ctx, *req);
bool exclude_current_version = req_param<bool>(*req, "exclude_current_version", false);
apilog.info("upgrade_sstables: keyspace={} tables={} exclude_current_version={}", keyspace, table_infos, exclude_current_version);
auto& compaction_module = db.local().get_compaction_manager().get_task_manager_module();
auto task = co_await compaction_module.make_and_start_task<upgrade_sstables_compaction_task_impl>({}, std::move(keyspace), db, table_infos, exclude_current_version);
co_await task->done();
co_return json::json_return_type(0);
}
static
@@ -890,9 +968,9 @@ rest_is_starting(sharded<service::storage_service>& ss, std::unique_ptr<http::re
static
future<json::json_return_type>
rest_get_drain_progress(sharded<service::storage_service>& ss, std::unique_ptr<http::request> req) {
return ss.map_reduce(adder<replica::database::drain_progress>(), [] (auto& ss) {
return ss.get_database().get_drain_progress();
rest_get_drain_progress(http_context& ctx, std::unique_ptr<http::request> req) {
return ctx.db.map_reduce(adder<replica::database::drain_progress>(), [] (auto& db) {
return db.get_drain_progress();
}).then([] (auto&& progress) {
auto progress_str = format("Drained {}/{} ColumnFamilies", progress.remaining_cfs, progress.total_cfs);
return make_ready_future<json::json_return_type>(std::move(progress_str));
@@ -908,6 +986,28 @@ rest_drain(sharded<service::storage_service>& ss, std::unique_ptr<http::request>
});
}
static
json::json_return_type
rest_get_keyspaces(http_context& ctx, const_req req) {
auto type = req.get_query_param("type");
auto replication = req.get_query_param("replication");
std::vector<sstring> keyspaces;
if (type == "user") {
keyspaces = ctx.db.local().get_user_keyspaces();
} else if (type == "non_local_strategy") {
keyspaces = ctx.db.local().get_non_local_strategy_keyspaces();
} else {
keyspaces = ctx.db.local().get_all_keyspaces();
}
if (replication.empty() || replication == "all") {
return keyspaces;
}
const auto want_tablets = replication == "tablets";
return keyspaces | std::views::filter([&ctx, want_tablets] (const sstring& ks) {
return ctx.db.local().find_keyspace(ks).get_replication_strategy().uses_tablets() == want_tablets;
}) | std::ranges::to<std::vector>();
}
static
future<json::json_return_type>
rest_stop_gossiping(sharded<service::storage_service>& ss, std::unique_ptr<http::request> req) {
@@ -1224,6 +1324,12 @@ rest_set_hinted_handoff_throttle_in_kb(std::unique_ptr<http::request> req) {
return make_ready_future<json::json_return_type>(json_void());
}
static
future<json::json_return_type>
rest_get_metrics_load(http_context& ctx, std::unique_ptr<http::request> req) {
return get_cf_stats(ctx, &replica::column_family_stats::live_disk_space_used);
}
static
json::json_return_type
rest_get_exceptions(sharded<service::storage_service>& ss, const_req req) {
@@ -1624,10 +1730,6 @@ rest_repair_tablet(http_context& ctx, sharded<service::storage_service>& ss, std
if (!await.empty()) {
await_completion = validate_bool(await);
}
// Use regular mode if the incremental_mode option is not provided by user.
auto incremental = req->get_query_param("incremental_mode");
auto incremental_mode = incremental.empty() ? locator::default_tablet_repair_incremental_mode : locator::tablet_repair_incremental_mode_from_string(incremental);
auto table_id = validate_table(ctx.db.local(), ks, table);
std::variant<utils::chunked_vector<dht::token>, service::storage_service::all_tokens_tag> tokens_variant;
if (all_tokens) {
@@ -1650,12 +1752,8 @@ rest_repair_tablet(http_context& ctx, sharded<service::storage_service>& ss, std
}) | std::ranges::to<std::unordered_set>();
}
auto dcs_filter = locator::tablet_task_info::deserialize_repair_dcs_filter(dcs);
try {
auto res = co_await ss.local().add_repair_tablet_request(table_id, tokens_variant, hosts_filter, dcs_filter, await_completion, incremental_mode);
co_return json::json_return_type(res);
} catch (std::invalid_argument& e) {
throw httpd::bad_param_exception(e.what());
}
auto res = co_await ss.local().add_repair_tablet_request(table_id, tokens_variant, hosts_filter, dcs_filter, await_completion);
co_return json::json_return_type(res);
}
static
@@ -1696,7 +1794,8 @@ rest_drop_quarantined_sstables(http_context& ctx, sharded<service::storage_servi
try {
if (!keyspace.empty()) {
keyspace = validate_keyspace(ctx, keyspace);
auto table_infos = parse_table_infos(keyspace, ctx, req->get_query_param("tables"));
auto it = req->query_parameters.find("tables");
auto table_infos = parse_table_infos(keyspace, ctx, it != req->query_parameters.end() ? it->second : "");
co_await ctx.db.invoke_on_all([&table_infos](replica::database& db) -> future<> {
return parallel_for_each(table_infos, [&db](const auto& table) -> future<> {
@@ -1748,11 +1847,16 @@ void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_
ss::get_range_to_endpoint_map.set(r, rest_bind(rest_get_range_to_endpoint_map, ctx, ss));
ss::get_pending_range_to_endpoint_map.set(r, rest_bind(rest_get_pending_range_to_endpoint_map, ctx));
ss::describe_ring.set(r, rest_bind(rest_describe_ring, ctx, ss));
ss::get_load.set(r, rest_bind(rest_get_load, ctx));
ss::get_current_generation_number.set(r, rest_bind(rest_get_current_generation_number, ss));
ss::get_natural_endpoints.set(r, rest_bind(rest_get_natural_endpoints, ctx, ss));
ss::cdc_streams_check_and_repair.set(r, rest_bind(rest_cdc_streams_check_and_repair, ss));
ss::force_compaction.set(r, rest_bind(rest_force_compaction, ctx));
ss::force_keyspace_compaction.set(r, rest_bind(rest_force_keyspace_compaction, ctx));
ss::force_keyspace_cleanup.set(r, rest_bind(rest_force_keyspace_cleanup, ctx, ss));
ss::cleanup_all.set(r, rest_bind(rest_cleanup_all, ctx, ss));
ss::reset_cleanup_needed.set(r, rest_bind(rest_reset_cleanup_needed, ctx, ss));
ss::perform_keyspace_offstrategy_compaction.set(r, rest_bind(rest_perform_keyspace_offstrategy_compaction, ctx));
ss::upgrade_sstables.set(r, rest_bind(rest_upgrade_sstables, ctx));
ss::force_flush.set(r, rest_bind(rest_force_flush, ctx));
ss::force_keyspace_flush.set(r, rest_bind(rest_force_keyspace_flush, ctx));
ss::decommission.set(r, rest_bind(rest_decommission, ss));
@@ -1764,8 +1868,9 @@ void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_
ss::get_logging_levels.set(r, rest_bind(rest_get_logging_levels));
ss::get_operation_mode.set(r, rest_bind(rest_get_operation_mode, ss));
ss::is_starting.set(r, rest_bind(rest_is_starting, ss));
ss::get_drain_progress.set(r, rest_bind(rest_get_drain_progress, ss));
ss::get_drain_progress.set(r, rest_bind(rest_get_drain_progress, ctx));
ss::drain.set(r, rest_bind(rest_drain, ss));
ss::get_keyspaces.set(r, rest_bind(rest_get_keyspaces, ctx));
ss::stop_gossiping.set(r, rest_bind(rest_stop_gossiping, ss));
ss::start_gossiping.set(r, rest_bind(rest_start_gossiping, ss));
ss::is_gossip_running.set(r, rest_bind(rest_is_gossip_running, ss));
@@ -1795,6 +1900,7 @@ void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_
ss::get_batch_size_failure_threshold.set(r, rest_bind(rest_get_batch_size_failure_threshold));
ss::set_batch_size_failure_threshold.set(r, rest_bind(rest_set_batch_size_failure_threshold));
ss::set_hinted_handoff_throttle_in_kb.set(r, rest_bind(rest_set_hinted_handoff_throttle_in_kb));
ss::get_metrics_load.set(r, rest_bind(rest_get_metrics_load, ctx));
ss::get_exceptions.set(r, rest_bind(rest_get_exceptions, ss));
ss::get_total_hints_in_progress.set(r, rest_bind(rest_get_total_hints_in_progress));
ss::get_total_hints.set(r, rest_bind(rest_get_total_hints));
@@ -1826,11 +1932,16 @@ void unset_storage_service(http_context& ctx, routes& r) {
ss::get_range_to_endpoint_map.unset(r);
ss::get_pending_range_to_endpoint_map.unset(r);
ss::describe_ring.unset(r);
ss::get_load.unset(r);
ss::get_current_generation_number.unset(r);
ss::get_natural_endpoints.unset(r);
ss::cdc_streams_check_and_repair.unset(r);
ss::force_compaction.unset(r);
ss::force_keyspace_compaction.unset(r);
ss::force_keyspace_cleanup.unset(r);
ss::cleanup_all.unset(r);
ss::reset_cleanup_needed.unset(r);
ss::perform_keyspace_offstrategy_compaction.unset(r);
ss::upgrade_sstables.unset(r);
ss::force_flush.unset(r);
ss::force_keyspace_flush.unset(r);
ss::decommission.unset(r);
@@ -1844,6 +1955,7 @@ void unset_storage_service(http_context& ctx, routes& r) {
ss::is_starting.unset(r);
ss::get_drain_progress.unset(r);
ss::drain.unset(r);
ss::get_keyspaces.unset(r);
ss::stop_gossiping.unset(r);
ss::start_gossiping.unset(r);
ss::is_gossip_running.unset(r);
@@ -1873,6 +1985,7 @@ void unset_storage_service(http_context& ctx, routes& r) {
ss::get_batch_size_failure_threshold.unset(r);
ss::set_batch_size_failure_threshold.unset(r);
ss::set_hinted_handoff_throttle_in_kb.unset(r);
ss::get_metrics_load.unset(r);
ss::get_exceptions.unset(r);
ss::get_total_hints_in_progress.unset(r);
ss::get_total_hints.unset(r);
@@ -1914,7 +2027,7 @@ void unset_load_meter(http_context& ctx, routes& r) {
void set_snapshot(http_context& ctx, routes& r, sharded<db::snapshot_ctl>& snap_ctl) {
ss::get_snapshot_details.set(r, [&snap_ctl](std::unique_ptr<http::request> req) -> future<json::json_return_type> {
auto result = co_await snap_ctl.local().get_snapshot_details();
co_return noncopyable_function<future<> (output_stream<char>&&)>([res = std::move(result)] (output_stream<char>&& o) -> future<> {
co_return std::function([res = std::move(result)] (output_stream<char>&& o) -> future<> {
std::exception_ptr ex;
output_stream<char> out = std::move(o);
try {
@@ -1954,7 +2067,7 @@ void set_snapshot(http_context& ctx, routes& r, sharded<db::snapshot_ctl>& snap_
});
ss::take_snapshot.set(r, [&snap_ctl](std::unique_ptr<http::request> req) -> future<json::json_return_type> {
apilog.info("take_snapshot: {}", req->get_query_params());
apilog.info("take_snapshot: {}", req->query_parameters);
auto tag = req->get_query_param("tag");
auto column_families = split(req->get_query_param("cf"), ",");
auto sfopt = req->get_query_param("sf");
@@ -1981,7 +2094,7 @@ void set_snapshot(http_context& ctx, routes& r, sharded<db::snapshot_ctl>& snap_
});
ss::del_snapshot.set(r, [&snap_ctl](std::unique_ptr<http::request> req) -> future<json::json_return_type> {
apilog.info("del_snapshot: {}", req->get_query_params());
apilog.info("del_snapshot: {}", req->query_parameters);
auto tag = req->get_query_param("tag");
auto column_family = req->get_query_param("cf");
@@ -2003,21 +2116,17 @@ void set_snapshot(http_context& ctx, routes& r, sharded<db::snapshot_ctl>& snap_
ss::scrub.set(r, [&ctx, &snap_ctl] (std::unique_ptr<http::request> req) -> future<json::json_return_type> {
auto& db = ctx.db;
auto info = parse_scrub_options(ctx, std::move(req));
auto info = co_await parse_scrub_options(ctx, snap_ctl, std::move(req));
if (!info.snapshot_tag.empty()) {
co_await snap_ctl.local().take_column_family_snapshot(info.keyspace, info.column_families, info.snapshot_tag, db::snapshot_ctl::skip_flush::no);
}
compaction::compaction_stats stats;
sstables::compaction_stats stats;
auto& compaction_module = db.local().get_compaction_manager().get_task_manager_module();
auto task = co_await compaction_module.make_and_start_task<compaction::scrub_sstables_compaction_task_impl>({}, info.keyspace, db, info.column_families, info.opts, &stats);
auto task = co_await compaction_module.make_and_start_task<scrub_sstables_compaction_task_impl>({}, info.keyspace, db, info.column_families, info.opts, &stats);
try {
co_await task->done();
if (stats.validation_errors) {
co_return json::json_return_type(static_cast<int>(scrub_status::validation_errors));
}
} catch (const compaction::compaction_aborted_exception&) {
} catch (const sstables::compaction_aborted_exception&) {
co_return json::json_return_type(static_cast<int>(scrub_status::aborted));
} catch (...) {
apilog.error("scrub keyspace={} tables={} failed: {}", info.keyspace, info.column_families, std::current_exception());

View File

@@ -58,13 +58,12 @@ std::vector<table_info> parse_table_infos(const sstring& ks_name, const http_con
std::pair<sstring, std::vector<table_info>> parse_table_infos(const http_context& ctx, const http::request& req, sstring cf_param_name = "cf");
struct scrub_info {
compaction::compaction_type_options::scrub opts;
sstables::compaction_type_options::scrub opts;
sstring keyspace;
std::vector<sstring> column_families;
sstring snapshot_tag;
};
scrub_info parse_scrub_options(const http_context& ctx, std::unique_ptr<http::request> req);
future<scrub_info> parse_scrub_options(const http_context& ctx, sharded<db::snapshot_ctl>& snap_ctl, std::unique_ptr<http::request> req);
void set_storage_service(http_context& ctx, httpd::routes& r, sharded<service::storage_service>& ss, service::raft_group0_client&);
void unset_storage_service(http_context& ctx, httpd::routes& r);
@@ -82,7 +81,7 @@ void set_snapshot(http_context& ctx, httpd::routes& r, sharded<db::snapshot_ctl>
void unset_snapshot(http_context& ctx, httpd::routes& r);
void set_load_meter(http_context& ctx, httpd::routes& r, service::load_meter& lm);
void unset_load_meter(http_context& ctx, httpd::routes& r);
seastar::future<json::json_return_type> run_toppartitions_query(db::toppartitions_query& q, bool legacy_request = false);
seastar::future<json::json_return_type> run_toppartitions_query(db::toppartitions_query& q, http_context &ctx, bool legacy_request = false);
// converts string value of boolean parameter into bool
// maps (case insensitively)

View File

@@ -10,7 +10,7 @@
#include "api/api-doc/system.json.hh"
#include "api/api-doc/metrics.json.hh"
#include "replica/database.hh"
#include "sstables/sstables_manager.hh"
#include "db/sstables-format-selector.hh"
#include <rapidjson/document.h>
#include <boost/lexical_cast.hpp>
@@ -184,13 +184,18 @@ void set_system(http_context& ctx, routes& r) {
apilog.info("Profile dumped to {}", profile_dest);
return make_ready_future<json::json_return_type>(json::json_return_type(json::json_void()));
}) ;
}
hs::get_highest_supported_sstable_version.set(r, [&ctx] (std::unique_ptr<request> req) {
return smp::submit_to(0, [&ctx] {
auto format = ctx.db.local().get_user_sstables_manager().get_highest_supported_format();
return make_ready_future<json::json_return_type>(seastar::to_sstring(format));
void set_format_selector(http_context& ctx, routes& r, db::sstables_format_selector& sel) {
hs::get_highest_supported_sstable_version.set(r, [&sel] (std::unique_ptr<request> req) {
return smp::submit_to(0, [&sel] {
return make_ready_future<json::json_return_type>(seastar::to_sstring(sel.selected_format()));
});
});
}
void unset_format_selector(http_context& ctx, routes& r) {
hs::get_highest_supported_sstable_version.unset(r);
}
}

View File

@@ -12,9 +12,14 @@ namespace seastar::httpd {
class routes;
}
namespace db { class sstables_format_selector; }
namespace api {
struct http_context;
void set_system(http_context& ctx, seastar::httpd::routes& r);
void set_format_selector(http_context& ctx, seastar::httpd::routes& r, db::sstables_format_selector& sel);
void unset_format_selector(http_context& ctx, seastar::httpd::routes& r);
}

View File

@@ -6,7 +6,6 @@
* SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
*/
#include <seastar/core/chunked_fifo.hh>
#include <seastar/core/coroutine.hh>
#include <seastar/coroutine/exception.hh>
#include <seastar/http/exception.hh>
@@ -35,9 +34,8 @@ static ::tm get_time(db_clock::time_point tp) {
}
tm::task_status make_status(tasks::task_status status, sharded<gms::gossiper>& gossiper) {
chunked_fifo<tm::task_identity> tis;
tis.reserve(status.children.size());
for (const auto& child : status.children) {
std::vector<tm::task_identity> tis{status.children.size()};
std::ranges::transform(status.children, tis.begin(), [&gossiper] (const auto& child) {
tm::task_identity ident;
gms::inet_address addr{};
if (gossiper.local_is_initialized()) {
@@ -45,8 +43,8 @@ tm::task_status make_status(tasks::task_status status, sharded<gms::gossiper>& g
}
ident.task_id = child.task_id.to_sstring();
ident.node = fmt::format("{}", addr);
tis.push_back(std::move(ident));
}
return ident;
});
tm::task_status res{};
res.id = status.task_id.to_sstring();
@@ -107,11 +105,11 @@ void set_task_manager(http_context& ctx, routes& r, sharded<tasks::task_manager>
throw bad_param_exception(fmt::format("{}", std::current_exception()));
}
if (auto param = req->get_query_param("keyspace"); !param.empty()) {
keyspace = param;
if (auto it = req->query_parameters.find("keyspace"); it != req->query_parameters.end()) {
keyspace = it->second;
}
if (auto param = req->get_query_param("table"); !param.empty()) {
table = param;
if (auto it = req->query_parameters.find("table"); it != req->query_parameters.end()) {
table = it->second;
}
return module->get_stats(internal, [keyspace = std::move(keyspace), table = std::move(table)] (std::string& ks, std::string& t) {
@@ -119,7 +117,7 @@ void set_task_manager(http_context& ctx, routes& r, sharded<tasks::task_manager>
});
});
noncopyable_function<future<>(output_stream<char>&&)> f = [r = std::move(res)] (output_stream<char>&& os) -> future<> {
std::function<future<>(output_stream<char>&&)> f = [r = std::move(res)] (output_stream<char>&& os) -> future<> {
auto s = std::move(os);
std::exception_ptr ex;
try {
@@ -175,8 +173,8 @@ void set_task_manager(http_context& ctx, routes& r, sharded<tasks::task_manager>
auto id = tasks::task_id{utils::UUID{req->get_path_param("task_id")}};
tasks::task_status status;
std::optional<std::chrono::seconds> timeout = std::nullopt;
if (auto param = req->get_query_param("timeout"); !param.empty()) {
timeout = std::chrono::seconds(boost::lexical_cast<uint32_t>(param));
if (auto it = req->query_parameters.find("timeout"); it != req->query_parameters.end()) {
timeout = std::chrono::seconds(boost::lexical_cast<uint32_t>(it->second));
}
try {
auto task = tasks::task_handler{tm.local(), id};
@@ -196,7 +194,7 @@ void set_task_manager(http_context& ctx, routes& r, sharded<tasks::task_manager>
auto task = tasks::task_handler{tm.local(), id};
auto res = co_await task.get_status_recursively(true);
noncopyable_function<future<>(output_stream<char>&&)> f = [r = std::move(res), &gossiper] (output_stream<char>&& os) -> future<> {
std::function<future<>(output_stream<char>&&)> f = [r = std::move(res), &gossiper] (output_stream<char>&& os) -> future<> {
auto s = std::move(os);
auto res = std::move(r);
co_await s.write("[");
@@ -217,7 +215,7 @@ void set_task_manager(http_context& ctx, routes& r, sharded<tasks::task_manager>
tm::get_and_update_ttl.set(r, [&cfg] (std::unique_ptr<http::request> req) -> future<json::json_return_type> {
uint32_t ttl = cfg.task_ttl_seconds();
try {
co_await cfg.task_ttl_seconds.set_value_on_all_shards(req->get_query_param("ttl"), utils::config_file::config_source::API);
co_await cfg.task_ttl_seconds.set_value_on_all_shards(req->query_parameters["ttl"], utils::config_file::config_source::API);
} catch (...) {
throw bad_param_exception(fmt::format("{}", std::current_exception()));
}
@@ -232,7 +230,7 @@ void set_task_manager(http_context& ctx, routes& r, sharded<tasks::task_manager>
tm::get_and_update_user_ttl.set(r, [&cfg] (std::unique_ptr<http::request> req) -> future<json::json_return_type> {
uint32_t user_ttl = cfg.user_task_ttl_seconds();
try {
co_await cfg.user_task_ttl_seconds.set_value_on_all_shards(req->get_query_param("user_ttl"), utils::config_file::config_source::API);
co_await cfg.user_task_ttl_seconds.set_value_on_all_shards(req->query_parameters["user_ttl"], utils::config_file::config_source::API);
} catch (...) {
throw bad_param_exception(fmt::format("{}", std::current_exception()));
}

View File

@@ -57,16 +57,20 @@ void set_task_manager_test(http_context& ctx, routes& r, sharded<tasks::task_man
tmt::register_test_task.set(r, [&tm] (std::unique_ptr<http::request> req) -> future<json::json_return_type> {
sharded<tasks::task_manager>& tms = tm;
const auto id_param = req->get_query_param("task_id");
auto id = !id_param.empty() ? tasks::task_id{utils::UUID{id_param}} : tasks::task_id::create_null_id();
const auto shard_param = req->get_query_param("shard");
unsigned shard = shard_param.empty() ? 0 : boost::lexical_cast<unsigned>(shard_param);
std::string keyspace = req->get_query_param("keyspace");
std::string table = req->get_query_param("table");
std::string entity = req->get_query_param("entity");
auto it = req->query_parameters.find("task_id");
auto id = it != req->query_parameters.end() ? tasks::task_id{utils::UUID{it->second}} : tasks::task_id::create_null_id();
it = req->query_parameters.find("shard");
unsigned shard = it != req->query_parameters.end() ? boost::lexical_cast<unsigned>(it->second) : 0;
it = req->query_parameters.find("keyspace");
std::string keyspace = it != req->query_parameters.end() ? it->second : "";
it = req->query_parameters.find("table");
std::string table = it != req->query_parameters.end() ? it->second : "";
it = req->query_parameters.find("entity");
std::string entity = it != req->query_parameters.end() ? it->second : "";
it = req->query_parameters.find("parent_id");
tasks::task_info data;
if (auto parent_id = req->get_query_param("parent_id"); !parent_id.empty()) {
data.id = tasks::task_id{utils::UUID{parent_id}};
if (it != req->query_parameters.end()) {
data.id = tasks::task_id{utils::UUID{it->second}};
auto parent_ptr = co_await tasks::task_manager::lookup_task_on_all_shards(tm, data.id);
data.shard = parent_ptr->get_status().shard;
}
@@ -84,7 +88,7 @@ void set_task_manager_test(http_context& ctx, routes& r, sharded<tasks::task_man
});
tmt::unregister_test_task.set(r, [&tm] (std::unique_ptr<http::request> req) -> future<json::json_return_type> {
auto id = tasks::task_id{utils::UUID{req->get_query_param("task_id")}};
auto id = tasks::task_id{utils::UUID{req->query_parameters["task_id"]}};
try {
co_await tasks::task_manager::invoke_on_task(tm, id, [] (tasks::task_manager::task_variant task_v, tasks::virtual_task_hint) -> future<> {
return std::visit(overloaded_functor{
@@ -105,8 +109,9 @@ void set_task_manager_test(http_context& ctx, routes& r, sharded<tasks::task_man
tmt::finish_test_task.set(r, [&tm] (std::unique_ptr<http::request> req) -> future<json::json_return_type> {
auto id = tasks::task_id{utils::UUID{req->get_path_param("task_id")}};
std::string error = req->get_query_param("error");
bool fail = !error.empty();
auto it = req->query_parameters.find("error");
bool fail = it != req->query_parameters.end();
std::string error = fail ? it->second : "";
try {
co_await tasks::task_manager::invoke_on_task(tm, id, [fail, error = std::move(error)] (tasks::task_manager::task_variant task_v, tasks::virtual_task_hint) -> future<> {

View File

@@ -12,7 +12,6 @@
#include "api/api.hh"
#include "api/storage_service.hh"
#include "api/api-doc/tasks.json.hh"
#include "api/api-doc/storage_service.json.hh"
#include "compaction/compaction_manager.hh"
#include "compaction/task_manager_module.hh"
#include "service/storage_service.hh"
@@ -26,7 +25,6 @@ extern logging::logger apilog;
namespace api {
namespace t = httpd::tasks_json;
namespace ss = httpd::storage_service_json;
using namespace json;
using ks_cf_func = std::function<future<json::json_return_type>(http_context&, std::unique_ptr<http::request>, sstring, std::vector<table_info>)>;
@@ -38,151 +36,76 @@ static auto wrap_ks_cf(http_context &ctx, ks_cf_func f) {
};
}
static future<shared_ptr<compaction::major_keyspace_compaction_task_impl>> force_keyspace_compaction(http_context& ctx, std::unique_ptr<http::request> req) {
auto& db = ctx.db;
auto [ keyspace, table_infos ] = parse_table_infos(ctx, *req, "cf");
auto flush = validate_bool_x(req->get_query_param("flush_memtables"), true);
auto consider_only_existing_data = validate_bool_x(req->get_query_param("consider_only_existing_data"), false);
apilog.info("force_keyspace_compaction: keyspace={} tables={}, flush={} consider_only_existing_data={}", keyspace, table_infos, flush, consider_only_existing_data);
auto& compaction_module = db.local().get_compaction_manager().get_task_manager_module();
std::optional<compaction::flush_mode> fmopt;
if (!flush && !consider_only_existing_data) {
fmopt = compaction::flush_mode::skip;
}
return compaction_module.make_and_start_task<compaction::major_keyspace_compaction_task_impl>({}, std::move(keyspace), tasks::task_id::create_null_id(), db, table_infos, fmopt, consider_only_existing_data);
}
static future<shared_ptr<compaction::upgrade_sstables_compaction_task_impl>> upgrade_sstables(http_context& ctx, std::unique_ptr<http::request> req, sstring keyspace, std::vector<table_info> table_infos) {
auto& db = ctx.db;
bool exclude_current_version = req_param<bool>(*req, "exclude_current_version", false);
apilog.info("upgrade_sstables: keyspace={} tables={} exclude_current_version={}", keyspace, table_infos, exclude_current_version);
auto& compaction_module = db.local().get_compaction_manager().get_task_manager_module();
return compaction_module.make_and_start_task<compaction::upgrade_sstables_compaction_task_impl>({}, std::move(keyspace), db, table_infos, exclude_current_version);
}
static future<shared_ptr<compaction::cleanup_keyspace_compaction_task_impl>> force_keyspace_cleanup(http_context& ctx, sharded<service::storage_service>& ss, std::unique_ptr<http::request> req) {
auto& db = ctx.db;
auto [keyspace, table_infos] = parse_table_infos(ctx, *req);
const auto& rs = db.local().find_keyspace(keyspace).get_replication_strategy();
if (rs.is_local() || !rs.is_vnode_based()) {
auto reason = rs.is_local() ? "require" : "support";
apilog.info("Keyspace {} does not {} cleanup", keyspace, reason);
co_return nullptr;
}
apilog.info("force_keyspace_cleanup: keyspace={} tables={}", keyspace, table_infos);
if (!co_await ss.local().is_cleanup_allowed(keyspace)) {
auto msg = "Can not perform cleanup operation when topology changes";
apilog.warn("force_keyspace_cleanup: keyspace={} tables={}: {}", keyspace, table_infos, msg);
co_await coroutine::return_exception(std::runtime_error(msg));
}
auto& compaction_module = db.local().get_compaction_manager().get_task_manager_module();
co_return co_await compaction_module.make_and_start_task<compaction::cleanup_keyspace_compaction_task_impl>(
{}, std::move(keyspace), db, table_infos, compaction::flush_mode::all_tables, tasks::is_user_task::yes);
}
void set_tasks_compaction_module(http_context& ctx, routes& r, sharded<service::storage_service>& ss, sharded<db::snapshot_ctl>& snap_ctl) {
t::force_keyspace_compaction_async.set(r, [&ctx](std::unique_ptr<http::request> req) -> future<json::json_return_type> {
auto task = co_await force_keyspace_compaction(ctx, std::move(req));
auto& db = ctx.db;
auto [ keyspace, table_infos ] = parse_table_infos(ctx, *req, "cf");
auto flush = validate_bool_x(req->get_query_param("flush_memtables"), true);
apilog.debug("force_keyspace_compaction_async: keyspace={} tables={}, flush={}", keyspace, table_infos, flush);
auto& compaction_module = db.local().get_compaction_manager().get_task_manager_module();
std::optional<flush_mode> fmopt;
if (!flush) {
fmopt = flush_mode::skip;
}
auto task = co_await compaction_module.make_and_start_task<major_keyspace_compaction_task_impl>({}, std::move(keyspace), tasks::task_id::create_null_id(), db, table_infos, fmopt);
co_return json::json_return_type(task->get_status().id.to_sstring());
});
ss::force_keyspace_compaction.set(r, [&ctx](std::unique_ptr<http::request> req) -> future<json::json_return_type> {
auto task = co_await force_keyspace_compaction(ctx, std::move(req));
co_await task->done();
co_return json_void();
});
t::force_keyspace_cleanup_async.set(r, [&ctx, &ss](std::unique_ptr<http::request> req) -> future<json::json_return_type> {
tasks::task_id id = tasks::task_id::create_null_id();
auto task = co_await force_keyspace_cleanup(ctx, ss, std::move(req));
if (task) {
id = task->get_status().id;
auto& db = ctx.db;
auto [keyspace, table_infos] = parse_table_infos(ctx, *req);
apilog.info("force_keyspace_cleanup_async: keyspace={} tables={}", keyspace, table_infos);
if (!co_await ss.local().is_cleanup_allowed(keyspace)) {
auto msg = "Can not perform cleanup operation when topology changes";
apilog.warn("force_keyspace_cleanup_async: keyspace={} tables={}: {}", keyspace, table_infos, msg);
co_await coroutine::return_exception(std::runtime_error(msg));
}
co_return json::json_return_type(id.to_sstring());
});
ss::force_keyspace_cleanup.set(r, [&ctx, &ss](std::unique_ptr<http::request> req) -> future<json::json_return_type> {
auto task = co_await force_keyspace_cleanup(ctx, ss, std::move(req));
if (task) {
co_await task->done();
}
co_return json::json_return_type(0);
auto& compaction_module = db.local().get_compaction_manager().get_task_manager_module();
auto task = co_await compaction_module.make_and_start_task<cleanup_keyspace_compaction_task_impl>({}, std::move(keyspace), db, table_infos, flush_mode::all_tables, tasks::is_user_task::yes);
co_return json::json_return_type(task->get_status().id.to_sstring());
});
t::perform_keyspace_offstrategy_compaction_async.set(r, wrap_ks_cf(ctx, [] (http_context& ctx, std::unique_ptr<http::request> req, sstring keyspace, std::vector<table_info> table_infos) -> future<json::json_return_type> {
apilog.info("perform_keyspace_offstrategy_compaction: keyspace={} tables={}", keyspace, table_infos);
auto& compaction_module = ctx.db.local().get_compaction_manager().get_task_manager_module();
auto task = co_await compaction_module.make_and_start_task<compaction::offstrategy_keyspace_compaction_task_impl>({}, std::move(keyspace), ctx.db, table_infos, nullptr);
auto task = co_await compaction_module.make_and_start_task<offstrategy_keyspace_compaction_task_impl>({}, std::move(keyspace), ctx.db, table_infos, nullptr);
co_return json::json_return_type(task->get_status().id.to_sstring());
}));
ss::perform_keyspace_offstrategy_compaction.set(r, wrap_ks_cf(ctx, [] (http_context& ctx, std::unique_ptr<http::request> req, sstring keyspace, std::vector<table_info> table_infos) -> future<json::json_return_type> {
apilog.info("perform_keyspace_offstrategy_compaction: keyspace={} tables={}", keyspace, table_infos);
bool res = false;
auto& compaction_module = ctx.db.local().get_compaction_manager().get_task_manager_module();
auto task = co_await compaction_module.make_and_start_task<compaction::offstrategy_keyspace_compaction_task_impl>({}, std::move(keyspace), ctx.db, table_infos, &res);
co_await task->done();
co_return json::json_return_type(res);
}));
t::upgrade_sstables_async.set(r, wrap_ks_cf(ctx, [] (http_context& ctx, std::unique_ptr<http::request> req, sstring keyspace, std::vector<table_info> table_infos) -> future<json::json_return_type> {
auto task = co_await upgrade_sstables(ctx, std::move(req), std::move(keyspace), std::move(table_infos));
co_return json::json_return_type(task->get_status().id.to_sstring());
}));
auto& db = ctx.db;
bool exclude_current_version = req_param<bool>(*req, "exclude_current_version", false);
ss::upgrade_sstables.set(r, wrap_ks_cf(ctx, [] (http_context& ctx, std::unique_ptr<http::request> req, sstring keyspace, std::vector<table_info> table_infos) -> future<json::json_return_type> {
auto task = co_await upgrade_sstables(ctx, std::move(req), std::move(keyspace), std::move(table_infos));
co_await task->done();
co_return json::json_return_type(0);
apilog.info("upgrade_sstables: keyspace={} tables={} exclude_current_version={}", keyspace, table_infos, exclude_current_version);
auto& compaction_module = db.local().get_compaction_manager().get_task_manager_module();
auto task = co_await compaction_module.make_and_start_task<upgrade_sstables_compaction_task_impl>({}, std::move(keyspace), db, table_infos, exclude_current_version);
co_return json::json_return_type(task->get_status().id.to_sstring());
}));
t::scrub_async.set(r, [&ctx, &snap_ctl] (std::unique_ptr<http::request> req) -> future<json::json_return_type> {
auto& db = ctx.db;
auto info = parse_scrub_options(ctx, std::move(req));
if (!info.snapshot_tag.empty()) {
co_await snap_ctl.local().take_column_family_snapshot(info.keyspace, info.column_families, info.snapshot_tag, db::snapshot_ctl::skip_flush::no);
}
auto info = co_await parse_scrub_options(ctx, snap_ctl, std::move(req));
auto& compaction_module = db.local().get_compaction_manager().get_task_manager_module();
auto task = co_await compaction_module.make_and_start_task<compaction::scrub_sstables_compaction_task_impl>({}, std::move(info.keyspace), db, std::move(info.column_families), info.opts, nullptr);
auto task = co_await compaction_module.make_and_start_task<scrub_sstables_compaction_task_impl>({}, std::move(info.keyspace), db, std::move(info.column_families), info.opts, nullptr);
co_return json::json_return_type(task->get_status().id.to_sstring());
});
ss::force_compaction.set(r, [&ctx] (std::unique_ptr<http::request> req) -> future<json::json_return_type> {
auto& db = ctx.db;
auto flush = validate_bool_x(req->get_query_param("flush_memtables"), true);
auto consider_only_existing_data = validate_bool_x(req->get_query_param("consider_only_existing_data"), false);
apilog.info("force_compaction: flush={} consider_only_existing_data={}", flush, consider_only_existing_data);
auto& compaction_module = db.local().get_compaction_manager().get_task_manager_module();
std::optional<compaction::flush_mode> fmopt;
if (!flush && !consider_only_existing_data) {
fmopt = compaction::flush_mode::skip;
}
auto task = co_await compaction_module.make_and_start_task<compaction::global_major_compaction_task_impl>({}, db, fmopt, consider_only_existing_data);
co_await task->done();
co_return json_void();
});
}
void unset_tasks_compaction_module(http_context& ctx, httpd::routes& r) {
t::force_keyspace_compaction_async.unset(r);
ss::force_keyspace_compaction.unset(r);
t::force_keyspace_cleanup_async.unset(r);
ss::force_keyspace_cleanup.unset(r);
t::perform_keyspace_offstrategy_compaction_async.unset(r);
ss::perform_keyspace_offstrategy_compaction.unset(r);
t::upgrade_sstables_async.unset(r);
ss::upgrade_sstables.unset(r);
t::scrub_async.unset(r);
ss::force_compaction.unset(r);
}
}

View File

@@ -15,7 +15,7 @@
#include "mutation/canonical_mutation.hh"
#include "schema/schema_fwd.hh"
#include "mutation/timestamp.hh"
#include "timestamp.hh"
#include "utils/assert.hh"
#include "utils/exponential_backoff_retry.hh"
#include "cql3/query_processor.hh"

View File

@@ -233,9 +233,9 @@ future<role_set> ldap_role_manager::query_granted(std::string_view grantee_name,
}
future<role_to_directly_granted_map>
ldap_role_manager::query_all_directly_granted(::service::query_state& qs) {
ldap_role_manager::query_all_directly_granted() {
role_to_directly_granted_map result;
auto roles = co_await query_all(qs);
auto roles = co_await query_all();
for (auto& role: roles) {
auto granted_set = co_await query_granted(role, recursive_role_query::no);
for (auto& granted: granted_set) {
@@ -247,8 +247,8 @@ ldap_role_manager::query_all_directly_granted(::service::query_state& qs) {
co_return result;
}
future<role_set> ldap_role_manager::query_all(::service::query_state& qs) {
return _std_mgr.query_all(qs);
future<role_set> ldap_role_manager::query_all() {
return _std_mgr.query_all();
}
future<> ldap_role_manager::create_role(std::string_view role_name) {
@@ -311,12 +311,12 @@ future<bool> ldap_role_manager::can_login(std::string_view role_name) {
}
future<std::optional<sstring>> ldap_role_manager::get_attribute(
std::string_view role_name, std::string_view attribute_name, ::service::query_state& qs) {
return _std_mgr.get_attribute(role_name, attribute_name, qs);
std::string_view role_name, std::string_view attribute_name) {
return _std_mgr.get_attribute(role_name, attribute_name);
}
future<role_manager::attribute_vals> ldap_role_manager::query_attribute_for_all(std::string_view attribute_name, ::service::query_state& qs) {
return _std_mgr.query_attribute_for_all(attribute_name, qs);
future<role_manager::attribute_vals> ldap_role_manager::query_attribute_for_all(std::string_view attribute_name) {
return _std_mgr.query_attribute_for_all(attribute_name);
}
future<> ldap_role_manager::set_attribute(

View File

@@ -75,9 +75,9 @@ class ldap_role_manager : public role_manager {
future<role_set> query_granted(std::string_view, recursive_role_query) override;
future<role_to_directly_granted_map> query_all_directly_granted(::service::query_state&) override;
future<role_to_directly_granted_map> query_all_directly_granted() override;
future<role_set> query_all(::service::query_state&) override;
future<role_set> query_all() override;
future<bool> exists(std::string_view) override;
@@ -85,9 +85,9 @@ class ldap_role_manager : public role_manager {
future<bool> can_login(std::string_view) override;
future<std::optional<sstring>> get_attribute(std::string_view, std::string_view, ::service::query_state&) override;
future<std::optional<sstring>> get_attribute(std::string_view, std::string_view) override;
future<role_manager::attribute_vals> query_attribute_for_all(std::string_view, ::service::query_state&) override;
future<role_manager::attribute_vals> query_attribute_for_all(std::string_view) override;
future<> set_attribute(std::string_view, std::string_view, std::string_view, ::service::group0_batch& mc) override;

View File

@@ -78,11 +78,11 @@ future<role_set> maintenance_socket_role_manager::query_granted(std::string_view
return operation_not_supported_exception<role_set>("QUERY GRANTED");
}
future<role_to_directly_granted_map> maintenance_socket_role_manager::query_all_directly_granted(::service::query_state&) {
future<role_to_directly_granted_map> maintenance_socket_role_manager::query_all_directly_granted() {
return operation_not_supported_exception<role_to_directly_granted_map>("QUERY ALL DIRECTLY GRANTED");
}
future<role_set> maintenance_socket_role_manager::query_all(::service::query_state&) {
future<role_set> maintenance_socket_role_manager::query_all() {
return operation_not_supported_exception<role_set>("QUERY ALL");
}
@@ -98,11 +98,11 @@ future<bool> maintenance_socket_role_manager::can_login(std::string_view role_na
return make_ready_future<bool>(true);
}
future<std::optional<sstring>> maintenance_socket_role_manager::get_attribute(std::string_view role_name, std::string_view attribute_name, ::service::query_state&) {
future<std::optional<sstring>> maintenance_socket_role_manager::get_attribute(std::string_view role_name, std::string_view attribute_name) {
return operation_not_supported_exception<std::optional<sstring>>("GET ATTRIBUTE");
}
future<role_manager::attribute_vals> maintenance_socket_role_manager::query_attribute_for_all(std::string_view attribute_name, ::service::query_state&) {
future<role_manager::attribute_vals> maintenance_socket_role_manager::query_attribute_for_all(std::string_view attribute_name) {
return operation_not_supported_exception<role_manager::attribute_vals>("QUERY ATTRIBUTE");
}

View File

@@ -53,9 +53,9 @@ public:
virtual future<role_set> query_granted(std::string_view grantee_name, recursive_role_query) override;
virtual future<role_to_directly_granted_map> query_all_directly_granted(::service::query_state&) override;
virtual future<role_to_directly_granted_map> query_all_directly_granted() override;
virtual future<role_set> query_all(::service::query_state&) override;
virtual future<role_set> query_all() override;
virtual future<bool> exists(std::string_view role_name) override;
@@ -63,9 +63,9 @@ public:
virtual future<bool> can_login(std::string_view role_name) override;
virtual future<std::optional<sstring>> get_attribute(std::string_view role_name, std::string_view attribute_name, ::service::query_state&) override;
virtual future<std::optional<sstring>> get_attribute(std::string_view role_name, std::string_view attribute_name) override;
virtual future<role_manager::attribute_vals> query_attribute_for_all(std::string_view attribute_name, ::service::query_state&) override;
virtual future<role_manager::attribute_vals> query_attribute_for_all(std::string_view attribute_name) override;
virtual future<> set_attribute(std::string_view role_name, std::string_view attribute_name, std::string_view attribute_value, ::service::group0_batch& mc) override;

View File

@@ -102,11 +102,7 @@ future<> password_authenticator::migrate_legacy_metadata() const {
return do_for_each(*results, [this](const cql3::untyped_result_set_row& row) {
auto username = row.get_as<sstring>("username");
auto salted_hash = row.get_as<sstring>(SALTED_HASH);
static const auto query = seastar::format("UPDATE {}.{} SET {} = ? WHERE {} = ?",
meta::legacy::AUTH_KS,
meta::roles_table::name,
SALTED_HASH,
meta::roles_table::role_col_name);
static const auto query = update_row_query();
return _qp.execute_internal(
query,
consistency_for_user(username),
@@ -123,7 +119,8 @@ future<> password_authenticator::migrate_legacy_metadata() const {
}
future<> password_authenticator::legacy_create_default_if_missing() {
const auto exists = co_await legacy::default_role_row_satisfies(_qp, &has_salted_hash, _superuser);
SCYLLA_ASSERT(legacy_mode(_qp));
const auto exists = co_await default_role_row_satisfies(_qp, &has_salted_hash, _superuser);
if (exists) {
co_return;
}
@@ -131,11 +128,7 @@ future<> password_authenticator::legacy_create_default_if_missing() {
if (salted_pwd.empty()) {
salted_pwd = passwords::hash(DEFAULT_USER_PASSWORD, rng_for_salt, _scheme);
}
const auto query = seastar::format("UPDATE {}.{} SET {} = ? WHERE {} = ?",
meta::legacy::AUTH_KS,
meta::roles_table::name,
SALTED_HASH,
meta::roles_table::role_col_name);
const auto query = update_row_query();
co_await _qp.execute_internal(
query,
db::consistency_level::QUORUM,
@@ -218,15 +211,11 @@ future<> password_authenticator::start() {
return async([this] {
if (legacy_mode(_qp)) {
if (!_superuser_created_promise.available()) {
// Counterintuitively, we mark promise as ready before any startup work
// because wait_for_schema_agreement() below will block indefinitely
// without cluster majority. In that case, blocking node startup
// would lead to a cluster deadlock.
_superuser_created_promise.set_value();
}
_migration_manager.wait_for_schema_agreement(_qp.db().real_database(), db::timeout_clock::time_point::max(), &_as).get();
if (legacy::any_nondefault_role_row_satisfies(_qp, &has_salted_hash, _superuser).get()) {
if (any_nondefault_role_row_satisfies(_qp, &has_salted_hash, _superuser).get()) {
if (legacy_metadata_exists()) {
plogger.warn("Ignoring legacy authentication metadata since nondefault data already exist.");
}
@@ -251,21 +240,10 @@ future<> password_authenticator::start() {
});
if (legacy_mode(_qp)) {
static const sstring create_roles_query = fmt::format(
"CREATE TABLE {}.{} ("
" {} text PRIMARY KEY,"
" can_login boolean,"
" is_superuser boolean,"
" member_of set<text>,"
" salted_hash text"
")",
meta::legacy::AUTH_KS,
meta::roles_table::name,
meta::roles_table::role_col_name);
return create_legacy_metadata_table_if_missing(
meta::roles_table::name,
_qp,
create_roles_query,
meta::roles_table::creation_query(),
_migration_manager);
}
return make_ready_future<>();

View File

@@ -36,8 +36,7 @@ static const std::unordered_map<sstring, auth::permission> permission_names({
{"MODIFY", auth::permission::MODIFY},
{"AUTHORIZE", auth::permission::AUTHORIZE},
{"DESCRIBE", auth::permission::DESCRIBE},
{"EXECUTE", auth::permission::EXECUTE},
{"VECTOR_SEARCH_INDEXING", auth::permission::VECTOR_SEARCH_INDEXING}});
{"EXECUTE", auth::permission::EXECUTE}});
const sstring& auth::permissions::to_string(permission p) {
for (auto& v : permission_names) {

View File

@@ -33,7 +33,6 @@ enum class permission {
// data access
SELECT, // required for SELECT.
MODIFY, // required for INSERT, UPDATE, DELETE, TRUNCATE.
VECTOR_SEARCH_INDEXING, // required for SELECT from tables with vector indexes if SELECT permission is not granted.
// permission management
AUTHORIZE, // required for GRANT and REVOKE.
@@ -55,8 +54,7 @@ typedef enum_set<
permission::MODIFY,
permission::AUTHORIZE,
permission::DESCRIBE,
permission::EXECUTE,
permission::VECTOR_SEARCH_INDEXING>> permission_set;
permission::EXECUTE>> permission_set;
bool operator<(const permission_set&, const permission_set&);

View File

@@ -41,26 +41,22 @@ static const std::unordered_map<resource_kind, std::size_t> max_parts{
{resource_kind::functions, 2}};
static permission_set applicable_permissions(const data_resource_view& dv) {
// We only support VECTOR_SEARCH_INDEXING permission for ALL KEYSPACES.
auto set = permission_set::of<
if (dv.table()) {
return permission_set::of<
permission::ALTER,
permission::DROP,
permission::SELECT,
permission::MODIFY,
permission::AUTHORIZE>();
if (!dv.table()) {
set.add(permission_set::of<permission::CREATE>());
}
if (!dv.table() && !dv.keyspace()) {
set.add(permission_set::of<permission::VECTOR_SEARCH_INDEXING>());
}
return set;
return permission_set::of<
permission::CREATE,
permission::ALTER,
permission::DROP,
permission::SELECT,
permission::MODIFY,
permission::AUTHORIZE>();
}
static permission_set applicable_permissions(const role_resource_view& rv) {

View File

@@ -17,17 +17,12 @@
#include <seastar/core/format.hh>
#include <seastar/core/sstring.hh>
#include "auth/common.hh"
#include "auth/resource.hh"
#include "cql3/description.hh"
#include "seastarx.hh"
#include "exceptions/exceptions.hh"
#include "service/raft/raft_group0_client.hh"
namespace service {
class query_state;
};
namespace auth {
struct role_config final {
@@ -172,9 +167,9 @@ public:
/// (role2, role3)
/// }
///
virtual future<role_to_directly_granted_map> query_all_directly_granted(::service::query_state& = internal_distributed_query_state()) = 0;
virtual future<role_to_directly_granted_map> query_all_directly_granted() = 0;
virtual future<role_set> query_all(::service::query_state& = internal_distributed_query_state()) = 0;
virtual future<role_set> query_all() = 0;
virtual future<bool> exists(std::string_view role_name) = 0;
@@ -191,12 +186,12 @@ public:
///
/// \returns the value of the named attribute, if one is set.
///
virtual future<std::optional<sstring>> get_attribute(std::string_view role_name, std::string_view attribute_name, ::service::query_state& = internal_distributed_query_state()) = 0;
virtual future<std::optional<sstring>> get_attribute(std::string_view role_name, std::string_view attribute_name) = 0;
///
/// \returns a mapping of each role's value for the named attribute, if one is set for the role.
///
virtual future<attribute_vals> query_attribute_for_all(std::string_view attribute_name, ::service::query_state& = internal_distributed_query_state()) = 0;
virtual future<attribute_vals> query_attribute_for_all(std::string_view attribute_name) = 0;
/// Sets `attribute_name` with `attribute_value` for `role_name`.
/// \returns an exceptional future with nonexistant_role if the role does not exist.

View File

@@ -18,21 +18,43 @@
namespace auth {
namespace legacy {
namespace meta {
namespace roles_table {
std::string_view creation_query() {
static const sstring instance = fmt::format(
"CREATE TABLE {}.{} ("
" {} text PRIMARY KEY,"
" can_login boolean,"
" is_superuser boolean,"
" member_of set<text>,"
" salted_hash text"
")",
meta::legacy::AUTH_KS,
name,
role_col_name);
return instance;
}
} // namespace roles_table
} // namespace meta
future<bool> default_role_row_satisfies(
cql3::query_processor& qp,
std::function<bool(const cql3::untyped_result_set_row&)> p,
std::optional<std::string> rolename) {
const sstring query = seastar::format("SELECT * FROM {}.{} WHERE {} = ?",
auth::meta::legacy::AUTH_KS,
get_auth_ks_name(qp),
meta::roles_table::name,
meta::roles_table::role_col_name);
for (auto cl : { db::consistency_level::ONE, db::consistency_level::QUORUM }) {
auto results = co_await qp.execute_internal(query, cl
, internal_distributed_query_state()
, {rolename.value_or(std::string(auth::meta::DEFAULT_SUPERUSER_NAME))}
, {rolename.value_or(std::string(meta::DEFAULT_SUPERUSER_NAME))}
, cql3::query_processor::cache_internal::yes
);
if (!results->empty()) {
@@ -46,7 +68,7 @@ future<bool> any_nondefault_role_row_satisfies(
cql3::query_processor& qp,
std::function<bool(const cql3::untyped_result_set_row&)> p,
std::optional<std::string> rolename) {
const sstring query = seastar::format("SELECT * FROM {}.{}", auth::meta::legacy::AUTH_KS, meta::roles_table::name);
const sstring query = seastar::format("SELECT * FROM {}.{}", get_auth_ks_name(qp), meta::roles_table::name);
auto results = co_await qp.execute_internal(query, db::consistency_level::QUORUM
, internal_distributed_query_state(), cql3::query_processor::cache_internal::no
@@ -63,6 +85,4 @@ future<bool> any_nondefault_role_row_satisfies(
});
}
} // namespace legacy
} // namespace auth
}

View File

@@ -27,15 +27,15 @@ namespace meta {
namespace roles_table {
std::string_view creation_query();
constexpr std::string_view name{"roles", 5};
constexpr std::string_view role_col_name{"role", 4};
} // namespace roles_table
}
} // namespace meta
namespace legacy {
}
///
/// Check that the default role satisfies a predicate, or `false` if the default role does not exist.
@@ -55,6 +55,4 @@ future<bool> any_nondefault_role_row_satisfies(
std::optional<std::string> rolename = {}
);
} // namespace legacy
} // namespace auth
}

View File

@@ -182,7 +182,7 @@ future<> saslauthd_authenticator::alter(std::string_view role_name, const authen
}
future<> saslauthd_authenticator::drop(std::string_view name, ::service::group0_batch& mc) {
return make_ready_future<>();
throw exceptions::authentication_exception("Cannot delete passwords with SaslauthdAuthenticator");
}
future<custom_options> saslauthd_authenticator::query_custom_options(std::string_view role_name) const {

View File

@@ -40,7 +40,7 @@
#include <variant>
#include "service/migration_manager.hh"
#include "service/raft/raft_group0_client.hh"
#include "mutation/timestamp.hh"
#include "timestamp.hh"
#include "utils/assert.hh"
#include "utils/class_registrator.hh"
#include "locator/abstract_replication_strategy.hh"

View File

@@ -231,17 +231,6 @@ struct command_desc {
} type_ = type::OTHER;
};
/// Similar to command_desc, but used in cases where multiple permissions allow the access to the resource.
struct command_desc_with_permission_set {
permission_set permission;
const ::auth::resource& resource;
enum class type {
ALTER_WITH_OPTS,
ALTER_SYSTEM_WITH_ALLOWED_OPTS,
OTHER
} type_ = type::OTHER;
};
///
/// Protected resources cannot be modified even if the performer has permissions to do so.
///

View File

@@ -50,11 +50,22 @@ constexpr std::string_view name{"role_members" , 12};
}
namespace role_attributes_table {
constexpr std::string_view name{"role_attributes", 15};
}
static std::string_view creation_query() noexcept {
static const sstring instance = seastar::format(
"CREATE TABLE {}.{} ("
" role text,"
" name text,"
" value text,"
" PRIMARY KEY(role, name)"
")",
meta::legacy::AUTH_KS,
name);
return instance;
}
}
}
static logging::logger log("standard_role_manager");
@@ -142,17 +153,6 @@ const resource_set& standard_role_manager::protected_resources() const {
}
future<> standard_role_manager::create_legacy_metadata_tables_if_missing() const {
static const sstring create_roles_query = fmt::format(
"CREATE TABLE {}.{} ("
" {} text PRIMARY KEY,"
" can_login boolean,"
" is_superuser boolean,"
" member_of set<text>,"
" salted_hash text"
")",
meta::legacy::AUTH_KS,
meta::roles_table::name,
meta::roles_table::role_col_name);
static const sstring create_role_members_query = fmt::format(
"CREATE TABLE {}.{} ("
" role text,"
@@ -161,20 +161,13 @@ future<> standard_role_manager::create_legacy_metadata_tables_if_missing() const
")",
meta::legacy::AUTH_KS,
meta::role_members_table::name);
static const sstring create_role_attributes_query = seastar::format(
"CREATE TABLE {}.{} ("
" role text,"
" name text,"
" value text,"
" PRIMARY KEY(role, name)"
")",
meta::legacy::AUTH_KS,
meta::role_attributes_table::name);
return when_all_succeed(
create_legacy_metadata_table_if_missing(
meta::roles_table::name,
_qp,
create_roles_query,
meta::roles_table::creation_query(),
_migration_manager),
create_legacy_metadata_table_if_missing(
meta::role_members_table::name,
@@ -184,18 +177,19 @@ future<> standard_role_manager::create_legacy_metadata_tables_if_missing() const
create_legacy_metadata_table_if_missing(
meta::role_attributes_table::name,
_qp,
create_role_attributes_query,
meta::role_attributes_table::creation_query(),
_migration_manager)).discard_result();
}
future<> standard_role_manager::legacy_create_default_role_if_missing() {
SCYLLA_ASSERT(legacy_mode(_qp));
try {
const auto exists = co_await legacy::default_role_row_satisfies(_qp, &has_can_login, _superuser);
const auto exists = co_await default_role_row_satisfies(_qp, &has_can_login, _superuser);
if (exists) {
co_return;
}
const sstring query = seastar::format("INSERT INTO {}.{} ({}, is_superuser, can_login) VALUES (?, true, true)",
meta::legacy::AUTH_KS,
get_auth_ks_name(_qp),
meta::roles_table::name,
meta::roles_table::role_col_name);
co_await _qp.execute_internal(
@@ -290,7 +284,7 @@ future<> standard_role_manager::migrate_legacy_metadata() {
std::move(config),
::service::group0_batch::unused(),
[this](const auto& name, const auto& config, auto& mc) {
return create_or_replace(meta::legacy::AUTH_KS, name, config, mc);
return create_or_replace(name, config, mc);
});
}).finally([results] {});
}).then([] {
@@ -311,15 +305,11 @@ future<> standard_role_manager::start() {
const bool legacy = legacy_mode(_qp);
if (legacy) {
if (!_superuser_created_promise.available()) {
// Counterintuitively, we mark promise as ready before any startup work
// because wait_for_schema_agreement() below will block indefinitely
// without cluster majority. In that case, blocking node startup
// would lead to a cluster deadlock.
_superuser_created_promise.set_value();
}
co_await _migration_manager.wait_for_schema_agreement(_qp.db().real_database(), db::timeout_clock::time_point::max(), &_as);
if (co_await legacy::any_nondefault_role_row_satisfies(_qp, &has_can_login)) {
if (co_await any_nondefault_role_row_satisfies(_qp, &has_can_login)) {
if (legacy_metadata_exists()) {
log.warn("Ignoring legacy user metadata since nondefault roles already exist.");
}
@@ -355,12 +345,12 @@ future<> standard_role_manager::ensure_superuser_is_created() {
return _superuser_created_promise.get_shared_future();
}
future<> standard_role_manager::create_or_replace(std::string_view auth_ks_name, std::string_view role_name, const role_config& c, ::service::group0_batch& mc) {
future<> standard_role_manager::create_or_replace(std::string_view role_name, const role_config& c, ::service::group0_batch& mc) {
const sstring query = seastar::format("INSERT INTO {}.{} ({}, is_superuser, can_login) VALUES (?, ?, ?)",
auth_ks_name,
get_auth_ks_name(_qp),
meta::roles_table::name,
meta::roles_table::role_col_name);
if (auth_ks_name == meta::legacy::AUTH_KS) {
if (legacy_mode(_qp)) {
co_await _qp.execute_internal(
query,
consistency_for_role(role_name),
@@ -379,7 +369,7 @@ standard_role_manager::create(std::string_view role_name, const role_config& c,
throw role_already_exists(role_name);
}
return create_or_replace(get_auth_ks_name(_qp), role_name, c, mc);
return create_or_replace(role_name, c, mc);
});
}
@@ -663,30 +653,21 @@ future<role_set> standard_role_manager::query_granted(std::string_view grantee_n
});
}
future<role_to_directly_granted_map> standard_role_manager::query_all_directly_granted(::service::query_state& qs) {
future<role_to_directly_granted_map> standard_role_manager::query_all_directly_granted() {
const sstring query = seastar::format("SELECT * FROM {}.{}",
get_auth_ks_name(_qp),
meta::role_members_table::name);
const auto results = co_await _qp.execute_internal(
query,
db::consistency_level::ONE,
qs,
cql3::query_processor::cache_internal::yes);
role_to_directly_granted_map roles_map;
std::transform(
results->begin(),
results->end(),
std::inserter(roles_map, roles_map.begin()),
[] (const cql3::untyped_result_set_row& row) {
return std::make_pair(row.get_as<sstring>("member"), row.get_as<sstring>("role")); }
);
co_await _qp.query_internal(query, [&roles_map] (const cql3::untyped_result_set_row& row) -> future<stop_iteration> {
roles_map.insert({row.get_as<sstring>("member"), row.get_as<sstring>("role")});
co_return stop_iteration::no;
});
co_return roles_map;
}
future<role_set> standard_role_manager::query_all(::service::query_state& qs) {
future<role_set> standard_role_manager::query_all() {
const sstring query = seastar::format("SELECT {} FROM {}.{}",
meta::roles_table::role_col_name,
get_auth_ks_name(_qp),
@@ -704,7 +685,7 @@ future<role_set> standard_role_manager::query_all(::service::query_state& qs) {
const auto results = co_await _qp.execute_internal(
query,
db::consistency_level::QUORUM,
qs,
internal_distributed_query_state(),
cql3::query_processor::cache_internal::yes);
role_set roles;
@@ -736,11 +717,11 @@ future<bool> standard_role_manager::can_login(std::string_view role_name) {
});
}
future<std::optional<sstring>> standard_role_manager::get_attribute(std::string_view role_name, std::string_view attribute_name, ::service::query_state& qs) {
future<std::optional<sstring>> standard_role_manager::get_attribute(std::string_view role_name, std::string_view attribute_name) {
const sstring query = seastar::format("SELECT name, value FROM {}.{} WHERE role = ? AND name = ?",
get_auth_ks_name(_qp),
meta::role_attributes_table::name);
const auto result_set = co_await _qp.execute_internal(query, db::consistency_level::ONE, qs, {sstring(role_name), sstring(attribute_name)}, cql3::query_processor::cache_internal::yes);
const auto result_set = co_await _qp.execute_internal(query, {sstring(role_name), sstring(attribute_name)}, cql3::query_processor::cache_internal::yes);
if (!result_set->empty()) {
const cql3::untyped_result_set_row &row = result_set->one();
co_return std::optional<sstring>(row.get_as<sstring>("value"));
@@ -748,11 +729,11 @@ future<std::optional<sstring>> standard_role_manager::get_attribute(std::string_
co_return std::optional<sstring>{};
}
future<role_manager::attribute_vals> standard_role_manager::query_attribute_for_all (std::string_view attribute_name, ::service::query_state& qs) {
return query_all(qs).then([this, attribute_name, &qs] (role_set roles) {
return do_with(attribute_vals{}, [this, attribute_name, roles = std::move(roles), &qs] (attribute_vals &role_to_att_val) {
return parallel_for_each(roles.begin(), roles.end(), [this, &role_to_att_val, attribute_name, &qs] (sstring role) {
return get_attribute(role, attribute_name, qs).then([&role_to_att_val, role] (std::optional<sstring> att_val) {
future<role_manager::attribute_vals> standard_role_manager::query_attribute_for_all (std::string_view attribute_name) {
return query_all().then([this, attribute_name] (role_set roles) {
return do_with(attribute_vals{}, [this, attribute_name, roles = std::move(roles)] (attribute_vals &role_to_att_val) {
return parallel_for_each(roles.begin(), roles.end(), [this, &role_to_att_val, attribute_name] (sstring role) {
return get_attribute(role, attribute_name).then([&role_to_att_val, role] (std::optional<sstring> att_val) {
if (att_val) {
role_to_att_val.emplace(std::move(role), std::move(*att_val));
}
@@ -797,7 +778,7 @@ future<> standard_role_manager::remove_attribute(std::string_view role_name, std
future<std::vector<cql3::description>> standard_role_manager::describe_role_grants() {
std::vector<cql3::description> result{};
const auto grants = co_await query_all_directly_granted(internal_distributed_query_state());
const auto grants = co_await query_all_directly_granted();
result.reserve(grants.size());
for (const auto& [grantee_role, granted_role] : grants) {

View File

@@ -66,9 +66,9 @@ public:
virtual future<role_set> query_granted(std::string_view grantee_name, recursive_role_query) override;
virtual future<role_to_directly_granted_map> query_all_directly_granted(::service::query_state&) override;
virtual future<role_to_directly_granted_map> query_all_directly_granted() override;
virtual future<role_set> query_all(::service::query_state&) override;
virtual future<role_set> query_all() override;
virtual future<bool> exists(std::string_view role_name) override;
@@ -76,9 +76,9 @@ public:
virtual future<bool> can_login(std::string_view role_name) override;
virtual future<std::optional<sstring>> get_attribute(std::string_view role_name, std::string_view attribute_name, ::service::query_state&) override;
virtual future<std::optional<sstring>> get_attribute(std::string_view role_name, std::string_view attribute_name) override;
virtual future<role_manager::attribute_vals> query_attribute_for_all(std::string_view attribute_name, ::service::query_state&) override;
virtual future<role_manager::attribute_vals> query_attribute_for_all(std::string_view attribute_name) override;
virtual future<> set_attribute(std::string_view role_name, std::string_view attribute_name, std::string_view attribute_value, ::service::group0_batch& mc) override;
@@ -100,7 +100,7 @@ private:
future<> maybe_create_default_role();
future<> maybe_create_default_role_with_retries();
future<> create_or_replace(std::string_view auth_ks_name, std::string_view role_name, const role_config&, ::service::group0_batch&);
future<> create_or_replace(std::string_view role_name, const role_config&, ::service::group0_batch&);
future<> legacy_modify_membership(std::string_view role_name, std::string_view grantee_name, membership_change);

View File

@@ -13,7 +13,6 @@
#include <seastar/core/sleep.hh>
#include <seastar/core/coroutine.hh>
#include <seastar/coroutine/maybe_yield.hh>
#include <seastar/util/later.hh>
#include "gms/endpoint_state.hh"
#include "gms/versioned_value.hh"
@@ -31,7 +30,6 @@
#include "utils/assert.hh"
#include "utils/error_injection.hh"
#include "utils/UUID_gen.hh"
#include "utils/stall_free.hh"
#include "utils/to_string.hh"
#include "cdc/generation.hh"
@@ -68,15 +66,6 @@ api::timestamp_clock::duration get_generation_leeway() {
return generation_leeway;
}
stream_state read_stream_state(int8_t val) {
if (val != std::to_underlying(stream_state::current)
&& val != std::to_underlying(stream_state::closed)
&& val != std::to_underlying(stream_state::opened)) {
throw std::runtime_error(format("invalid value {} for stream state", val));
}
return static_cast<stream_state>(val);
}
static void copy_int_to_bytes(int64_t i, size_t offset, bytes& b) {
i = net::hton(i);
std::copy_n(reinterpret_cast<int8_t*>(&i), sizeof(int64_t), b.begin() + offset);
@@ -169,10 +158,6 @@ const bytes& stream_id::to_bytes() const {
return _value;
}
bytes stream_id::to_bytes() && {
return std::move(_value);
}
partition_key stream_id::to_partition_key(const schema& log_schema) const {
return partition_key::from_single_value(log_schema, _value);
}
@@ -197,19 +182,6 @@ utils::chunked_vector<token_range_description>&& topology_description::entries()
return std::move(_entries);
}
future<topology_description> topology_description::clone_async() const {
utils::chunked_vector<token_range_description> vec{};
co_await utils::reserve_gently(vec, _entries.size());
for (const auto& entry : _entries) {
vec.push_back(entry);
co_await seastar::maybe_yield();
}
co_return topology_description{std::move(vec)};
}
static std::vector<stream_id> create_stream_ids(
size_t index, dht::token start, dht::token end, size_t shard_count, uint8_t ignore_msb) {
std::vector<stream_id> result;
@@ -1012,11 +984,8 @@ future<> generation_service::handle_cdc_generation(cdc::generation_id_v2 gen_id)
auto gen_data = co_await _sys_ks.local().read_cdc_generation(gen_id.id);
bool using_this_gen = co_await container().map_reduce(or_reducer(), [ts, &gen_data] (generation_service& svc) -> future<bool> {
// We need to copy it here before awaiting anything to avoid destruction of the captures.
const auto timestamp = ts;
topology_description gen_copy = co_await gen_data.clone_async();
co_return svc._cdc_metadata.insert(timestamp, std::move(gen_copy));
bool using_this_gen = co_await container().map_reduce(or_reducer(), [ts, &gen_data] (generation_service& svc) {
return svc._cdc_metadata.insert(ts, cdc::topology_description{gen_data});
});
if (using_this_gen) {
@@ -1168,12 +1137,9 @@ future<bool> generation_service::legacy_do_handle_cdc_generation(cdc::generation
}
// Return `true` iff the generation was inserted on any of our shards.
co_return co_await container().map_reduce(or_reducer(),
[ts = get_ts(gen_id), &gen] (generation_service& svc) -> future<bool> {
// We need to copy it here before awaiting anything to avoid destruction of the captures.
const auto timestamp = ts;
topology_description gen_copy = co_await gen->clone_async();
co_return svc._cdc_metadata.insert(timestamp, std::move(gen_copy));
co_return co_await container().map_reduce(or_reducer(), [ts = get_ts(gen_id), &gen] (generation_service& svc) {
auto gen_ = *gen;
return svc._cdc_metadata.insert(ts, std::move(gen_));
});
}
@@ -1191,347 +1157,4 @@ db_clock::time_point get_ts(const generation_id& gen_id) {
return std::visit([] (auto& id) { return id.ts; }, gen_id);
}
future<mutation> create_table_streams_mutation(table_id table, db_clock::time_point stream_ts, const locator::tablet_map& map, api::timestamp_type ts) {
auto s = db::system_keyspace::cdc_streams_state();
mutation m(s, partition_key::from_single_value(*s,
data_value(table.uuid()).serialize_nonnull()
));
m.set_static_cell("timestamp", stream_ts, ts);
for (auto tid : map.tablet_ids()) {
auto sid = cdc::stream_id(map.get_last_token(tid), 0);
auto ck = clustering_key::from_singular(*s, dht::token::to_int64(sid.token()));
m.set_cell(ck, "stream_id", data_value(std::move(sid).to_bytes()), ts);
co_await coroutine::maybe_yield();
}
co_return std::move(m);
}
future<mutation> create_table_streams_mutation(table_id table, db_clock::time_point stream_ts, const utils::chunked_vector<cdc::stream_id>& stream_ids, api::timestamp_type ts) {
auto s = db::system_keyspace::cdc_streams_state();
mutation m(s, partition_key::from_single_value(*s,
data_value(table.uuid()).serialize_nonnull()
));
m.set_static_cell("timestamp", stream_ts, ts);
for (const auto& sid : stream_ids) {
auto ck = clustering_key::from_singular(*s, dht::token::to_int64(sid.token()));
m.set_cell(ck, "stream_id", data_value(sid.to_bytes()), ts);
co_await coroutine::maybe_yield();
}
co_return std::move(m);
}
utils::chunked_vector<mutation>
make_drop_table_streams_mutations(table_id table, api::timestamp_type ts) {
utils::chunked_vector<mutation> mutations;
mutations.reserve(2);
for (auto s : {db::system_keyspace::cdc_streams_state(),
db::system_keyspace::cdc_streams_history()}) {
mutation m(s, partition_key::from_single_value(*s,
data_value(table.uuid()).serialize_nonnull()
));
m.partition().apply(tombstone(ts, gc_clock::now()));
mutations.emplace_back(std::move(m));
}
return mutations;
}
future<> generation_service::load_cdc_tablet_streams(std::optional<std::unordered_set<table_id>> changed_tables) {
// track which tables we expect to get data for, and which we actually get.
// when a table is dropped we won't get any streams for it. we will use this to
// know which tables are dropped and remove their stream map from the metadata.
std::unordered_set<table_id> tables_to_process;
if (changed_tables) {
tables_to_process = *changed_tables;
} else {
tables_to_process = _cdc_metadata.get_tables_with_cdc_tablet_streams() | std::ranges::to<std::unordered_set<table_id>>();
}
auto read_streams_state = [this] (const std::optional<std::unordered_set<table_id>>& tables, noncopyable_function<future<>(table_id, db_clock::time_point, utils::chunked_vector<cdc::stream_id>)> f) -> future<> {
if (tables) {
for (auto table : *tables) {
co_await _sys_ks.local().read_cdc_streams_state(table, [&] (table_id table, db_clock::time_point base_ts, utils::chunked_vector<cdc::stream_id> base_stream_set) -> future<> {
return f(table, base_ts, std::move(base_stream_set));
});
}
} else {
co_await _sys_ks.local().read_cdc_streams_state(std::nullopt, [&] (table_id table, db_clock::time_point base_ts, utils::chunked_vector<cdc::stream_id> base_stream_set) -> future<> {
return f(table, base_ts, std::move(base_stream_set));
});
}
};
co_await read_streams_state(changed_tables, [this, &tables_to_process] (table_id table, db_clock::time_point base_ts, utils::chunked_vector<cdc::stream_id> base_stream_set) -> future<> {
table_streams new_table_map;
auto append_stream = [&new_table_map] (db_clock::time_point stream_tp, utils::chunked_vector<cdc::stream_id> stream_set) {
auto ts = std::chrono::duration_cast<api::timestamp_clock::duration>(stream_tp.time_since_epoch()).count();
new_table_map[ts] = committed_stream_set {stream_tp, std::move(stream_set)};
};
// if we already have a loaded streams map, and the base timestamp is unchanged, then read
// the history entries starting from the latest one we have and append it to the existing map.
// we can do it because we only append new rows with higher timestamps to the history table.
std::optional<std::reference_wrapper<const committed_stream_set>> from_streams;
std::optional<db_clock::time_point> from_ts;
const auto& all_streams = _cdc_metadata.get_all_tablet_streams();
if (auto it = all_streams.find(table); it != all_streams.end()) {
const auto& current_map = *it->second;
if (current_map.cbegin()->second.ts == base_ts) {
const auto& latest_entry = current_map.crbegin()->second;
from_streams = std::cref(latest_entry);
from_ts = latest_entry.ts;
}
}
if (!from_ts) {
append_stream(base_ts, std::move(base_stream_set));
}
co_await _sys_ks.local().read_cdc_streams_history(table, from_ts, [&] (table_id tid, db_clock::time_point ts, cdc_stream_diff diff) -> future<> {
const auto& prev_stream_set = new_table_map.empty() ?
from_streams->get().streams : std::crbegin(new_table_map)->second.streams;
append_stream(ts, co_await cdc::metadata::construct_next_stream_set(
prev_stream_set, std::move(diff.opened_streams), diff.closed_streams));
});
co_await container().invoke_on_all(coroutine::lambda([&] (generation_service& svc) -> future<> {
table_streams new_table_map_copy;
for (const auto& [ts, entry] : new_table_map) {
new_table_map_copy[ts] = entry;
co_await coroutine::maybe_yield();
}
if (!from_ts) {
svc._cdc_metadata.load_tablet_streams_map(table, std::move(new_table_map_copy));
} else {
svc._cdc_metadata.append_tablet_streams_map(table, std::move(new_table_map_copy));
}
}));
tables_to_process.erase(table);
});
// the remaining tables have no streams - remove them from the metadata
co_await container().invoke_on_all([&] (generation_service& svc) {
for (auto table : tables_to_process) {
svc._cdc_metadata.remove_tablet_streams_map(table);
}
});
}
future<> generation_service::query_cdc_timestamps(table_id table, bool ascending, noncopyable_function<future<>(db_clock::time_point)> f) {
const auto& all_tables = _cdc_metadata.get_all_tablet_streams();
auto table_it = all_tables.find(table);
if (table_it == all_tables.end()) {
co_return;
}
const auto table_streams_ptr = table_it->second; // keep alive
const auto& table_streams = *table_streams_ptr;
if (ascending) {
for (auto it = table_streams.cbegin(); it != table_streams.cend(); ++it) {
co_await f(it->second.ts);
}
} else {
for (auto it = table_streams.crbegin(); it != table_streams.crend(); ++it) {
co_await f(it->second.ts);
}
}
}
future<> generation_service::query_cdc_streams(table_id table, noncopyable_function<future<>(db_clock::time_point, const utils::chunked_vector<cdc::stream_id>& current, cdc::cdc_stream_diff)> f) {
const auto& all_tables = _cdc_metadata.get_all_tablet_streams();
auto table_it = all_tables.find(table);
if (table_it == all_tables.end()) {
co_return;
}
const auto table_streams_ptr = table_it->second; // keep alive
const auto& table_streams = *table_streams_ptr;
auto it_prev = table_streams.end();
auto it = table_streams.begin();
while (it != table_streams.end()) {
const auto& entry = it->second;
if (it_prev != table_streams.end()) {
const auto& prev_entry = it_prev->second;
auto diff = co_await cdc::metadata::generate_stream_diff(prev_entry.streams, entry.streams);
co_await f(entry.ts, entry.streams, std::move(diff));
} else {
co_await f(entry.ts, entry.streams, cdc::cdc_stream_diff{.closed_streams = {}, .opened_streams = entry.streams});
}
it_prev = it;
++it;
}
}
future<mutation> get_switch_streams_mutation(table_id table, db_clock::time_point stream_ts, cdc_stream_diff diff, api::timestamp_type ts) {
auto history_schema = db::system_keyspace::cdc_streams_history();
auto decomposed_ts = timestamp_type->decompose(stream_ts);
auto closed_kind = byte_type->decompose(std::to_underlying(stream_state::closed));
auto opened_kind = byte_type->decompose(std::to_underlying(stream_state::opened));
mutation m(history_schema, partition_key::from_single_value(*history_schema,
data_value(table.uuid()).serialize_nonnull()
));
for (auto&& sid : diff.closed_streams) {
co_await coroutine::maybe_yield();
auto ck = clustering_key::from_exploded(*history_schema, { decomposed_ts, closed_kind, long_type->decompose(dht::token::to_int64(sid.token())) });
m.set_cell(ck, "stream_id", data_value(std::move(sid).to_bytes()), ts);
}
for (auto&& sid : diff.opened_streams) {
co_await coroutine::maybe_yield();
auto ck = clustering_key::from_exploded(*history_schema, { decomposed_ts, opened_kind, long_type->decompose(dht::token::to_int64(sid.token())) });
m.set_cell(ck, "stream_id", data_value(std::move(sid).to_bytes()), ts);
}
co_return std::move(m);
}
future<> generation_service::generate_tablet_resize_update(utils::chunked_vector<canonical_mutation>& muts, table_id table, const locator::tablet_map& new_tablet_map, api::timestamp_type ts) {
if (!_cdc_metadata.get_all_tablet_streams().contains(table)) {
// not a CDC table
co_return;
}
utils::chunked_vector<cdc::stream_id> new_streams;
co_await utils::reserve_gently(new_streams, new_tablet_map.tablet_count());
for (auto tid : new_tablet_map.tablet_ids()) {
new_streams.emplace_back(new_tablet_map.get_last_token(tid), 0);
co_await coroutine::maybe_yield();
}
const auto& table_streams = *_cdc_metadata.get_all_tablet_streams().at(table);
auto current_streams_it = std::crbegin(table_streams);
if (current_streams_it == std::crend(table_streams)) {
// no streams at all - this should not happen
on_internal_error(cdc_log, format("generate_tablet_resize_update: no streams for table {}", table));
}
const auto& current_streams = current_streams_it->second;
auto new_ts = new_generation_timestamp(true, _cfg.ring_delay);
new_ts = std::max(new_ts, current_streams.ts + std::chrono::milliseconds(1)); // ensure timestamps are increasing
auto diff = co_await _cdc_metadata.generate_stream_diff(current_streams.streams, new_streams);
auto mut = co_await get_switch_streams_mutation(table, new_ts, std::move(diff), ts);
muts.emplace_back(std::move(mut));
}
future<utils::chunked_vector<mutation>> get_cdc_stream_gc_mutations(table_id table, db_clock::time_point base_ts, const utils::chunked_vector<cdc::stream_id>& base_stream_set, api::timestamp_type ts) {
utils::chunked_vector<mutation> muts;
muts.reserve(2);
auto gc_now = gc_clock::now();
auto tombstone_ts = ts - 1;
{
// write the new base stream set to cdc_streams_state
auto s = db::system_keyspace::cdc_streams_state();
mutation m(s, partition_key::from_single_value(*s,
data_value(table.uuid()).serialize_nonnull()
));
m.partition().apply(tombstone(tombstone_ts, gc_now));
m.set_static_cell("timestamp", data_value(base_ts), ts);
for (const auto& sid : base_stream_set) {
co_await coroutine::maybe_yield();
auto ck = clustering_key::from_singular(*s, dht::token::to_int64(sid.token()));
m.set_cell(ck, "stream_id", data_value(sid.to_bytes()), ts);
}
muts.emplace_back(std::move(m));
}
{
// remove all entries from cdc_streams_history up to the new base
auto s = db::system_keyspace::cdc_streams_history();
mutation m(s, partition_key::from_single_value(*s,
data_value(table.uuid()).serialize_nonnull()
));
auto range = query::clustering_range::make_ending_with({
clustering_key_prefix::from_single_value(*s, timestamp_type->decompose(base_ts)), true});
auto bv = bound_view::from_range(range);
m.partition().apply_delete(*s, range_tombstone{bv.first, bv.second, tombstone{ts, gc_now}});
muts.emplace_back(std::move(m));
}
co_return std::move(muts);
}
table_streams::const_iterator get_new_base_for_gc(const table_streams& streams_map, std::chrono::seconds ttl) {
// find the most recent timestamp that is older than ttl_seconds, which will become the new base.
// all streams with older timestamps can be removed because they are closed for more than ttl_seconds
// (they are all replaced by streams with the newer timestamp).
auto ts_upper_bound = db_clock::now() - ttl;
auto it = streams_map.begin();
while (it != streams_map.end()) {
auto next_it = std::next(it);
if (next_it == streams_map.end()) {
break;
}
auto next_tp = next_it->second.ts;
if (next_tp <= ts_upper_bound) {
// the next timestamp is older than ttl_seconds, so the current one is obsolete
it = next_it;
} else {
break;
}
}
return it;
}
future<utils::chunked_vector<mutation>> generation_service::garbage_collect_cdc_streams_for_table(table_id table, std::optional<std::chrono::seconds> ttl, api::timestamp_type ts) {
const auto& table_streams = *_cdc_metadata.get_all_tablet_streams().at(table);
// if TTL is not provided by the caller then use the table's CDC TTL
auto base_schema = cdc::get_base_table(_db, *_db.find_schema(table));
ttl = ttl.or_else([&] -> std::optional<std::chrono::seconds> {
auto ttl_seconds = base_schema->cdc_options().ttl();
if (ttl_seconds > 0) {
return std::chrono::seconds(ttl_seconds);
} else {
// ttl=0 means no ttl
return std::nullopt;
}
});
if (!ttl) {
co_return utils::chunked_vector<mutation>{};
}
auto new_base_it = get_new_base_for_gc(table_streams, *ttl);
if (new_base_it == table_streams.begin() || new_base_it == table_streams.end()) {
// nothing to gc
co_return utils::chunked_vector<mutation>{};
}
for (auto it = table_streams.begin(); it != new_base_it; ++it) {
cdc_log.info("Garbage collecting CDC stream metadata for table {}: removing generation {} because it is older than the CDC TTL of {} seconds",
table, it->second.ts, *ttl);
}
co_return co_await get_cdc_stream_gc_mutations(table, new_base_it->second.ts, new_base_it->second.streams, ts);
}
future<> generation_service::garbage_collect_cdc_streams(utils::chunked_vector<canonical_mutation>& muts, api::timestamp_type ts) {
for (auto table : _cdc_metadata.get_tables_with_cdc_tablet_streams()) {
co_await coroutine::maybe_yield();
auto table_muts = co_await garbage_collect_cdc_streams_for_table(table, std::nullopt, ts);
for (auto&& m : table_muts) {
muts.emplace_back(std::move(m));
}
}
}
} // namespace cdc

View File

@@ -44,10 +44,6 @@ namespace gms {
class gossiper;
} // namespace gms
namespace locator {
class tablet_map;
} // namespace locator
namespace cdc {
api::timestamp_clock::duration get_generation_leeway();
@@ -67,7 +63,6 @@ public:
uint8_t version() const;
size_t index() const;
const bytes& to_bytes() const;
bytes to_bytes() &&;
dht::token token() const;
partition_key to_partition_key(const schema& log_schema) const;
@@ -102,20 +97,10 @@ class topology_description {
utils::chunked_vector<token_range_description> _entries;
public:
topology_description(utils::chunked_vector<token_range_description> entries);
topology_description(const topology_description&) = delete;
topology_description& operator=(const topology_description&) = delete;
topology_description(topology_description&&) = default;
topology_description& operator=(topology_description&&) = default;
bool operator==(const topology_description&) const;
const utils::chunked_vector<token_range_description>& entries() const&;
utils::chunked_vector<token_range_description>&& entries() &&;
/// The object MUST NOT be modified until the returned future resolves.
future<topology_description> clone_async() const;
};
/**
@@ -133,26 +118,6 @@ public:
{}
};
enum class stream_state : int8_t {
current = 0,
closed = 1,
opened = 2,
};
stream_state read_stream_state(int8_t val);
struct committed_stream_set {
db_clock::time_point ts;
utils::chunked_vector<cdc::stream_id> streams;
};
struct cdc_stream_diff {
utils::chunked_vector<stream_id> closed_streams;
utils::chunked_vector<stream_id> opened_streams;
};
using table_streams = std::map<api::timestamp_type, committed_stream_set>;
class no_generation_data_exception : public std::runtime_error {
public:
no_generation_data_exception(cdc::generation_id generation_ts)
@@ -219,12 +184,4 @@ future<utils::chunked_vector<mutation>> get_cdc_generation_mutations_v3(
schema_ptr, utils::UUID gen_uuid, const cdc::topology_description&,
size_t mutation_size_threshold, api::timestamp_type mutation_timestamp);
future<mutation> create_table_streams_mutation(table_id, db_clock::time_point, const locator::tablet_map&, api::timestamp_type);
future<mutation> create_table_streams_mutation(table_id, db_clock::time_point, const utils::chunked_vector<cdc::stream_id>&, api::timestamp_type);
utils::chunked_vector<mutation> make_drop_table_streams_mutations(table_id, api::timestamp_type ts);
future<mutation> get_switch_streams_mutation(table_id table, db_clock::time_point stream_ts, cdc_stream_diff diff, api::timestamp_type ts);
future<utils::chunked_vector<mutation>> get_cdc_stream_gc_mutations(table_id table, db_clock::time_point base_ts, const utils::chunked_vector<cdc::stream_id>& base_stream_set, api::timestamp_type ts);
table_streams::const_iterator get_new_base_for_gc(const table_streams&, std::chrono::seconds ttl);
} // namespace cdc

View File

@@ -29,7 +29,6 @@ class abort_source;
namespace locator {
class shared_token_metadata;
class tablet_map;
}
namespace cdc {
@@ -146,16 +145,6 @@ public:
*/
future<> handle_cdc_generation(cdc::generation_id_v2);
future<> load_cdc_tablet_streams(std::optional<std::unordered_set<table_id>> changed_tables);
future<> query_cdc_timestamps(table_id table, bool ascending, noncopyable_function<future<>(db_clock::time_point)> f);
future<> query_cdc_streams(table_id table, noncopyable_function<future<>(db_clock::time_point, const utils::chunked_vector<cdc::stream_id>& current, cdc::cdc_stream_diff)> f);
future<> generate_tablet_resize_update(utils::chunked_vector<canonical_mutation>& muts, table_id table, const locator::tablet_map& new_tablet_map, api::timestamp_type ts);
future<utils::chunked_vector<mutation>> garbage_collect_cdc_streams_for_table(table_id table, std::optional<std::chrono::seconds> ttl, api::timestamp_type ts);
future<> garbage_collect_cdc_streams(utils::chunked_vector<canonical_mutation>& muts, api::timestamp_type ts);
private:
/* Retrieve the CDC generation which starts at the given timestamp (from a distributed table created for this purpose)
* and start using it for CDC log writes if it's not obsolete.

View File

@@ -21,16 +21,12 @@
#include "cdc/metadata.hh"
#include "cdc/cdc_partitioner.hh"
#include "bytes.hh"
#include "index/vector_index.hh"
#include "locator/abstract_replication_strategy.hh"
#include "replica/database.hh"
#include "db/schema_tables.hh"
#include "gms/feature_service.hh"
#include "schema/schema.hh"
#include "schema/schema_builder.hh"
#include "service/migration_listener.hh"
#include "service/storage_proxy.hh"
#include "tombstone_gc_extension.hh"
#include "types/tuple.hh"
#include "cql3/statements/select_statement.hh"
#include "cql3/untyped_result_set.hh"
@@ -40,7 +36,7 @@
#include "utils/UUID_gen.hh"
#include "utils/managed_bytes.hh"
#include "types/types.hh"
#include "types/concrete_types.hh"
#include "concrete_types.hh"
#include "types/listlike_partial_deserializing_iterator.hh"
#include "tracing/trace_state.hh"
#include "stats.hh"
@@ -60,23 +56,8 @@ using namespace std::chrono_literals;
logging::logger cdc_log("cdc");
namespace {
shared_ptr<locator::abstract_replication_strategy> generate_replication_strategy(const keyspace_metadata& ksm) {
locator::replication_strategy_params params(ksm.strategy_options(), ksm.initial_tablets());
return locator::abstract_replication_strategy::create_replication_strategy(ksm.strategy_name(), params);
}
// When dropping a column from a CDC log table, we set the drop timestamp
// `column_drop_leeway` seconds into the future to ensure that for writes concurrent
// with column drop, the write timestamp is before the column drop timestamp.
constexpr auto column_drop_leeway = std::chrono::seconds(5);
} // anonymous namespace
namespace cdc {
static schema_ptr create_log_schema(const schema&, const replica::database&, const keyspace_metadata&, api::timestamp_type,
std::optional<table_id> = {}, schema_ptr = nullptr);
static schema_ptr create_log_schema(const schema&, std::optional<table_id> = {}, schema_ptr = nullptr);
}
static constexpr auto cdc_group_name = "cdc";
@@ -177,52 +158,26 @@ public:
});
}
virtual void on_before_allocate_tablet_map(const locator::tablet_map& map, const schema& s, utils::chunked_vector<mutation>& muts, api::timestamp_type ts) override {
if (!is_log_schema(s)) {
return;
}
auto stream_ts = db_clock::now() - duration_cast<std::chrono::milliseconds>(get_generation_leeway());
auto mut = create_table_streams_mutation(s.id(), stream_ts, map, ts).get();
muts.emplace_back(std::move(mut));
}
void on_pre_create_column_families(const keyspace_metadata& ksm, std::vector<schema_ptr>& cfms, api::timestamp_type ts) override {
std::vector<schema_ptr> new_cfms;
for (auto sp : cfms) {
const auto& schema = *sp;
if (!schema.cdc_options().enabled()) {
continue;
}
void on_before_create_column_family(const keyspace_metadata& ksm, const schema& schema, utils::chunked_vector<mutation>& mutations, api::timestamp_type timestamp) override {
if (schema.cdc_options().enabled()) {
auto& db = _ctxt._proxy.get_db().local();
auto logname = log_name(schema.cf_name());
check_that_cdc_log_table_does_not_exist(db, schema, logname);
ensure_that_table_has_no_counter_columns(schema);
if (!db.features().cdc_with_tablets) {
ensure_that_table_uses_vnodes(ksm, schema);
}
ensure_that_table_uses_vnodes(ksm, schema);
// in seastar thread
auto log_schema = create_log_schema(schema, db, ksm, ts);
new_cfms.push_back(std::move(log_schema));
}
auto log_schema = create_log_schema(schema);
cfms.insert(cfms.end(), std::make_move_iterator(new_cfms.begin()), std::make_move_iterator(new_cfms.end()));
auto log_mut = db::schema_tables::make_create_table_mutations(log_schema, timestamp);
mutations.insert(mutations.end(), std::make_move_iterator(log_mut.begin()), std::make_move_iterator(log_mut.end()));
}
}
void on_before_update_column_family(const schema& new_schema, const schema& old_schema, utils::chunked_vector<mutation>& mutations, api::timestamp_type timestamp) override {
bool has_vector_index = secondary_index::vector_index::has_vector_index(new_schema);
if (has_vector_index) {
// If we have a vector index, we need to ensure that the CDC log is created
// satisfying the minimal requirements of Vector Search.
secondary_index::vector_index::check_cdc_options(new_schema);
}
bool is_cdc = cdc_enabled(new_schema);
bool was_cdc = cdc_enabled(old_schema);
bool is_cdc = new_schema.cdc_options().enabled();
bool was_cdc = old_schema.cdc_options().enabled();
// if we are turning off cdc we can skip this, since even if columns change etc,
// any writer should see cdc -> off together with any actual schema changes to
@@ -240,7 +195,7 @@ public:
// make sure the apparent log table really is a cdc log (not user table)
// we just check the partitioner - since user tables should _not_ be able
// set/use this.
if (!is_log_schema(*log_schema)) {
if (log_schema->get_partitioner().name() != cdc::cdc_partitioner::classname) {
// will throw
check_that_cdc_log_table_does_not_exist(db, old_schema, logname);
}
@@ -248,23 +203,16 @@ public:
check_for_attempt_to_create_nested_cdc_log(db, new_schema);
ensure_that_table_has_no_counter_columns(new_schema);
if (!db.features().cdc_with_tablets) {
ensure_that_table_uses_vnodes(*keyspace.metadata(), new_schema);
}
ensure_that_table_uses_vnodes(*keyspace.metadata(), new_schema);
std::optional<table_id> maybe_id = log_schema ? std::make_optional(log_schema->id()) : std::nullopt;
auto new_log_schema = create_log_schema(new_schema, db, *keyspace.metadata(), timestamp, std::move(maybe_id), log_schema);
auto new_log_schema = create_log_schema(new_schema, log_schema ? std::make_optional(log_schema->id()) : std::nullopt, log_schema);
auto log_mut = log_schema
? db::schema_tables::make_update_table_mutations(_ctxt._proxy, keyspace.metadata(), log_schema, new_log_schema, timestamp)
? db::schema_tables::make_update_table_mutations(db, keyspace.metadata(), log_schema, new_log_schema, timestamp)
: db::schema_tables::make_create_table_mutations(new_log_schema, timestamp)
;
mutations.insert(mutations.end(), std::make_move_iterator(log_mut.begin()), std::make_move_iterator(log_mut.end()));
if (!log_schema) {
db.get_notifier().before_create_column_family(*keyspace.metadata(), *new_log_schema, mutations, timestamp);
}
}
}
@@ -274,41 +222,12 @@ public:
auto has_cdc_log = db.has_schema(schema.ks_name(), logname);
if (has_cdc_log) {
auto log_schema = db.find_schema(schema.ks_name(), logname);
if (is_log_schema(*log_schema)) {
auto& keyspace = db.find_keyspace(schema.ks_name());
auto log_mut = db::schema_tables::make_drop_table_mutations(keyspace.metadata(), log_schema, timestamp);
mutations.insert(mutations.end(), std::make_move_iterator(log_mut.begin()), std::make_move_iterator(log_mut.end()));
db.get_notifier().before_drop_column_family(*log_schema, mutations, timestamp);
if (log_schema->get_partitioner().name() != cdc::cdc_partitioner::classname) {
return;
}
}
if (is_log_schema(schema)) {
auto& keyspace = db.find_keyspace(schema.ks_name());
if (keyspace.uses_tablets()) {
// drop cdc streams of this table
auto drop_stream_mut = make_drop_table_streams_mutations(schema.id(), timestamp);
mutations.insert(mutations.end(), std::make_move_iterator(drop_stream_mut.begin()), std::make_move_iterator(drop_stream_mut.end()));
}
}
}
void on_before_drop_keyspace(const sstring& keyspace_name, utils::chunked_vector<mutation>& mutations, api::timestamp_type ts) override {
auto& db = _ctxt._proxy.get_db().local();
auto& ks = db.find_keyspace(keyspace_name);
if (ks.uses_tablets()) {
// drop cdc streams for all CDC tables in this keyspace
for (auto&& [name, s] : ks.metadata()->cf_meta_data()) {
seastar::thread::maybe_yield();
if (!is_log_schema(*s)) {
continue;
}
auto drop_stream_mut = make_drop_table_streams_mutations(s->id(), ts);
mutations.insert(mutations.end(), std::make_move_iterator(drop_stream_mut.begin()), std::make_move_iterator(drop_stream_mut.end()));
}
auto log_mut = db::schema_tables::make_drop_table_mutations(keyspace.metadata(), log_schema, timestamp);
mutations.insert(mutations.end(), std::make_move_iterator(log_mut.begin()), std::make_move_iterator(log_mut.end()));
}
}
@@ -351,9 +270,10 @@ private:
// to be attempted - in particular the log table we try to create will not
// have tablets, and will cause a failure.
static void ensure_that_table_uses_vnodes(const keyspace_metadata& ksm, const schema& schema) {
auto rs = generate_replication_strategy(ksm);
locator::replication_strategy_params params(ksm.strategy_options(), ksm.initial_tablets());
auto rs = locator::abstract_replication_strategy::create_replication_strategy(ksm.strategy_name(), params);
if (rs->uses_tablets()) {
throw exceptions::invalid_request_exception(format("Cannot create CDC log for a table {}.{}, because the keyspace uses tablets, and not all nodes support the CDC with tablets feature.",
throw exceptions::invalid_request_exception(format("Cannot create CDC log for a table {}.{}, because keyspace uses tablets. See issue #16317.",
schema.ks_name(), schema.cf_name()));
}
}
@@ -499,18 +419,10 @@ static const sstring cdc_meta_column_prefix = "cdc$";
static const sstring cdc_deleted_column_prefix = cdc_meta_column_prefix + "deleted_";
static const sstring cdc_deleted_elements_column_prefix = cdc_meta_column_prefix + "deleted_elements_";
bool cdc_enabled(const schema& s) {
return s.cdc_options().enabled() || secondary_index::vector_index::has_vector_index(s);
}
bool is_log_name(const std::string_view& table_name) {
return table_name.ends_with(cdc_log_suffix);
}
bool is_log_schema(const schema& s) {
return s.get_partitioner().name() == cdc::cdc_partitioner::classname;
}
bool is_cdc_metacolumn_name(const sstring& name) {
return name.compare(0, cdc_meta_column_prefix.size(), cdc_meta_column_prefix) == 0;
}
@@ -520,7 +432,7 @@ bool is_log_for_some_table(const replica::database& db, const sstring& ks_name,
if (!base_schema) {
return false;
}
return cdc_enabled(*base_schema);
return base_schema->cdc_options().enabled();
}
schema_ptr get_base_table(const replica::database& db, const schema& s) {
@@ -584,12 +496,10 @@ bytes log_data_column_deleted_elements_name_bytes(const bytes& column_name) {
return to_bytes(cdc_deleted_elements_column_prefix) + column_name;
}
static schema_ptr create_log_schema(const schema& s, const replica::database& db,
const keyspace_metadata& ksm, api::timestamp_type timestamp, std::optional<table_id> uuid, schema_ptr old)
{
static schema_ptr create_log_schema(const schema& s, std::optional<table_id> uuid, schema_ptr old) {
schema_builder b(s.ks_name(), log_name(s.cf_name()));
b.with_partitioner(cdc::cdc_partitioner::classname);
b.set_compaction_strategy(compaction::compaction_strategy_type::time_window);
b.set_compaction_strategy(sstables::compaction_strategy_type::time_window);
b.set_comment(fmt::format("CDC log for {}.{}", s.ks_name(), s.cf_name()));
auto ttl_seconds = s.cdc_options().ttl();
if (ttl_seconds > 0) {
@@ -621,28 +531,6 @@ static schema_ptr create_log_schema(const schema& s, const replica::database& db
b.with_column(log_meta_column_name_bytes("ttl"), long_type);
b.with_column(log_meta_column_name_bytes("end_of_batch"), boolean_type);
b.set_caching_options(caching_options::get_disabled_caching_options());
auto validate_new_column = [&] (const sstring& name) {
// When dropping a column from a CDC log table, we set the drop timestamp to be
// `column_drop_leeway` seconds into the future (see `create_log_schema`).
// Therefore, when recreating a column with the same name, we need to validate
// that it's not recreated too soon and that the drop timestamp has passed.
if (old && old->dropped_columns().contains(name)) {
const auto& drop_info = old->dropped_columns().at(name);
auto create_time = api::timestamp_clock::time_point(api::timestamp_clock::duration(timestamp));
auto drop_time = api::timestamp_clock::time_point(api::timestamp_clock::duration(drop_info.timestamp));
if (drop_time > create_time) {
throw exceptions::invalid_request_exception(format("Cannot add column {} because a column with the same name was dropped too recently. Please retry after {} seconds",
name, std::chrono::duration_cast<std::chrono::seconds>(drop_time - create_time).count() + 1));
}
}
};
auto add_column = [&] (sstring name, data_type type) {
validate_new_column(name);
b.with_column(to_bytes(name), type);
};
auto add_columns = [&] (const schema::const_iterator_range_type& columns, bool is_data_col = false) {
for (const auto& column : columns) {
auto type = column.type;
@@ -664,9 +552,9 @@ static schema_ptr create_log_schema(const schema& s, const replica::database& db
}
));
}
add_column(log_data_column_name(column.name_as_text()), type);
b.with_column(log_data_column_name_bytes(column.name()), type);
if (is_data_col) {
add_column(log_data_column_deleted_name(column.name_as_text()), boolean_type);
b.with_column(log_data_column_deleted_name_bytes(column.name()), boolean_type);
}
if (column.type->is_multi_cell()) {
auto dtype = visit(*type, make_visitor(
@@ -682,7 +570,7 @@ static schema_ptr create_log_schema(const schema& s, const replica::database& db
throw std::invalid_argument("Should not reach");
}
));
add_column(log_data_column_deleted_elements_name(column.name_as_text()), dtype);
b.with_column(log_data_column_deleted_elements_name_bytes(column.name()), dtype);
}
}
};
@@ -695,10 +583,6 @@ static schema_ptr create_log_schema(const schema& s, const replica::database& db
b.set_uuid(*uuid);
}
auto rs = generate_replication_strategy(ksm);
auto tombstone_gc_ext = seastar::make_shared<tombstone_gc_extension>(get_default_tombstone_gc_mode(*rs, db.get_token_metadata(), false));
b.add_extension(tombstone_gc_extension::NAME, std::move(tombstone_gc_ext));
/**
* #10473 - if we are redefining the log table, we need to ensure any dropped
* columns are registered in "dropped_columns" table, otherwise clients will not
@@ -708,8 +592,7 @@ static schema_ptr create_log_schema(const schema& s, const replica::database& db
// not super efficient, but we don't do this often.
for (auto& col : old->all_columns()) {
if (!b.has_column({col.name(), col.name_as_text() })) {
auto drop_ts = api::timestamp_clock::now() + column_drop_leeway;
b.without_column(col.name_as_text(), col.type, drop_ts.time_since_epoch().count());
b.without_column(col.name_as_text(), col.type, api::new_timestamp());
}
}
}
@@ -1593,8 +1476,6 @@ private:
row_states_map _clustering_row_states;
cell_map _static_row_state;
const bool _uses_tablets;
utils::chunked_vector<mutation> _result_mutations;
std::optional<log_mutation_builder> _builder;
@@ -1610,13 +1491,12 @@ public:
, _dk(std::move(dk))
, _log_schema(ctx._proxy.get_db().local().find_schema(_schema->ks_name(), log_name(_schema->cf_name())))
, _clustering_row_states(0, clustering_key::hashing(*_schema), clustering_key::equality(*_schema))
, _uses_tablets(ctx._proxy.get_db().local().find_keyspace(_schema->ks_name()).uses_tablets())
{
}
// DON'T move the transformer after this
void begin_timestamp(api::timestamp_type ts, bool is_last) override {
const auto stream_id = _uses_tablets ? _ctx._cdc_metadata.get_tablet_stream(_log_schema->id(), ts, _dk.token()) : _ctx._cdc_metadata.get_vnode_stream(ts, _dk.token());
const auto stream_id = _ctx._cdc_metadata.get_stream(ts, _dk.token());
_result_mutations.emplace_back(_log_schema, stream_id.to_partition_key(*_log_schema));
_builder.emplace(_result_mutations.back(), ts, _dk.key(), *_schema);
_enable_updating_state = _schema->cdc_options().postimage() || (!is_last && _schema->cdc_options().preimage());
@@ -1913,7 +1793,7 @@ cdc::cdc_service::impl::augment_mutation_call(lowres_clock::time_point timeout,
// we do all this because in the case of batches, we can have mixed schemas.
auto e = mutations.end();
auto i = std::find_if(mutations.begin(), e, [](const mutation& m) {
return cdc_enabled(*m.schema());
return m.schema()->cdc_options().enabled();
});
if (i == e) {
@@ -1929,7 +1809,7 @@ cdc::cdc_service::impl::augment_mutation_call(lowres_clock::time_point timeout,
auto& m = mutations[idx];
auto s = m.schema();
if (!cdc_enabled(*s)) {
if (!s->cdc_options().enabled()) {
return make_ready_future<>();
}
@@ -1994,7 +1874,7 @@ cdc::cdc_service::impl::augment_mutation_call(lowres_clock::time_point timeout,
bool cdc::cdc_service::needs_cdc_augmentation(const utils::chunked_vector<mutation>& mutations) const {
return std::any_of(mutations.begin(), mutations.end(), [](const mutation& m) {
return cdc_enabled(*m.schema());
return m.schema()->cdc_options().enabled();
});
}

View File

@@ -21,7 +21,7 @@
#include <seastar/core/shared_ptr.hh>
#include <seastar/core/sstring.hh>
#include "mutation/timestamp.hh"
#include "timestamp.hh"
#include "tracing/trace_state.hh"
#include "utils/UUID.hh"
@@ -56,7 +56,6 @@ class db_context;
class metadata;
bool is_log_name(const std::string_view& table_name);
bool is_log_schema(const schema& s);
/// \brief CDC service, responsible for schema listeners
///
@@ -107,8 +106,6 @@ bool is_log_for_some_table(const replica::database& db, const sstring& ks_name,
schema_ptr get_base_table(const replica::database&, const schema&);
schema_ptr get_base_table(const replica::database&, std::string_view, std::string_view);
bool cdc_enabled(const schema& s);
seastar::sstring base_name(std::string_view log_name);
seastar::sstring log_name(std::string_view table_name);
seastar::sstring log_data_column_name(std::string_view column_name);

View File

@@ -10,11 +10,8 @@
#include "exceptions/exceptions.hh"
#include "cdc/generation.hh"
#include "utils/stall_free.hh"
#include "cdc/metadata.hh"
#include <seastar/coroutine/maybe_yield.hh>
extern logging::logger cdc_log;
static api::timestamp_type to_ts(db_clock::time_point tp) {
@@ -53,21 +50,6 @@ cdc::stream_id get_stream(
return get_stream(*it, tok);
}
static cdc::stream_id get_stream(
const utils::chunked_vector<cdc::stream_id>& streams,
dht::token tok) {
if (streams.empty()) {
on_internal_error(cdc_log, "get_stream: streams empty");
}
auto it = std::lower_bound(streams.begin(), streams.end(), tok,
[] (const cdc::stream_id& sid, dht::token t) { return sid.token() < t; });
if (it == streams.end()) {
on_internal_error(cdc_log, fmt::format("get_stream: no stream for token {}.", tok));
}
return *it;
}
cdc::metadata::container_t::const_iterator cdc::metadata::gen_used_at(api::timestamp_type ts) const {
auto it = _gens.upper_bound(ts);
if (it == _gens.begin()) {
@@ -84,7 +66,7 @@ bool cdc::metadata::streams_available() const {
return it != _gens.end();
}
cdc::stream_id cdc::metadata::get_vnode_stream(api::timestamp_type ts, dht::token tok) {
cdc::stream_id cdc::metadata::get_stream(api::timestamp_type ts, dht::token tok) {
auto now = api::new_timestamp();
if (ts > now + get_generation_leeway().count()) {
throw exceptions::invalid_request_exception(seastar::format(
@@ -159,43 +141,6 @@ cdc::stream_id cdc::metadata::get_vnode_stream(api::timestamp_type ts, dht::toke
return ret;
}
const utils::chunked_vector<cdc::stream_id>& cdc::metadata::get_tablet_stream_set(table_id tid, api::timestamp_type ts) const {
auto now = api::new_timestamp();
if (ts > now + get_generation_leeway().count()) {
throw exceptions::invalid_request_exception(seastar::format(
"cdc: attempted to get a stream \"from the future\" ({}; current server time: {})."
" With CDC you cannot send writes with timestamps arbitrarily into the future, because we don't"
" know what streams will be used at that time.\n"
"We *do* allow sending writes into the near future, but our ability to do that is limited."
" If you really must use your own timestamps, then make sure your clocks are well-synchronized"
" with the database's clocks.", format_timestamp(ts), format_timestamp(now)));
}
auto table_it = _tablet_streams.find(tid);
if (table_it == _tablet_streams.end()) {
throw std::runtime_error(fmt::format(
"cdc::metadata::get_stream: could not find stream metadata for table {}.", tid));
}
// find the most recent entry of stream sets with timestamp <= write timestamp.
// start from the most recent entry, which is the most likely candidate, and go
// back until we find matching entry.
const auto& table_streams = *table_it->second;
auto it = table_streams.crbegin();
for (; it != table_streams.crend() && it->first > ts; ++it);
if (it == table_streams.crend()) {
throw std::runtime_error(fmt::format(
"cdc::metadata::get_stream: could not find any CDC streams for table {} for timestamp {}.", tid, ts));
}
return it->second.streams;
}
cdc::stream_id cdc::metadata::get_tablet_stream(table_id tid, api::timestamp_type ts, dht::token tok) {
return ::get_stream(get_tablet_stream_set(tid, ts), tok);
}
bool cdc::metadata::known_or_obsolete(db_clock::time_point tp) const {
auto ts = to_ts(tp);
auto it = _gens.lower_bound(ts);
@@ -258,102 +203,3 @@ bool cdc::metadata::prepare(db_clock::time_point tp) {
return !it->second;
}
future<utils::chunked_vector<cdc::stream_id>> cdc::metadata::construct_next_stream_set(
const utils::chunked_vector<cdc::stream_id>& prev_stream_set,
utils::chunked_vector<cdc::stream_id> opened,
const utils::chunked_vector<cdc::stream_id>& closed) {
if (closed.size() == prev_stream_set.size()) {
// all previous streams are closed, so the next stream set is just the opened streams.
co_return std::move(opened);
}
// construct the next stream set from the previous one by adding the opened
// streams and removing the closed streams. we assume each stream set is
// sorted by token, and the result is sorted as well.
utils::chunked_vector<cdc::stream_id> next_stream_set;
co_await utils::reserve_gently(next_stream_set, prev_stream_set.size() + opened.size() - closed.size());
auto next_prev = prev_stream_set.begin();
auto next_closed = closed.begin();
auto next_opened = opened.begin();
while (next_prev != prev_stream_set.end() || next_opened != opened.end()) {
co_await coroutine::maybe_yield();
if (next_prev == prev_stream_set.end() || (next_opened != opened.end() && next_opened->token() < next_prev->token())) {
next_stream_set.push_back(std::move(*next_opened));
next_opened++;
continue;
}
if (next_closed != closed.end() && *next_prev == *next_closed) {
next_prev++;
next_closed++;
continue;
}
next_stream_set.push_back(*next_prev);
next_prev++;
}
co_return std::move(next_stream_set);
}
void cdc::metadata::load_tablet_streams_map(table_id tid, table_streams new_table_map) {
_tablet_streams[tid] = make_lw_shared(std::move(new_table_map));
}
void cdc::metadata::append_tablet_streams_map(table_id tid, table_streams new_table_map) {
_tablet_streams[tid]->insert(std::make_move_iterator(new_table_map.begin()), std::make_move_iterator(new_table_map.end()));
}
void cdc::metadata::remove_tablet_streams_map(table_id tid) {
_tablet_streams.erase(tid);
}
std::vector<table_id> cdc::metadata::get_tables_with_cdc_tablet_streams() const {
return _tablet_streams | std::views::keys | std::ranges::to<std::vector<table_id>>();
}
future<cdc::cdc_stream_diff> cdc::metadata::generate_stream_diff(const utils::chunked_vector<stream_id>& before, const utils::chunked_vector<stream_id>& after) {
utils::chunked_vector<stream_id> closed, opened;
auto before_it = before.begin();
auto after_it = after.begin();
while (before_it != before.end()) {
co_await coroutine::maybe_yield();
if (after_it == after.end()) {
while (before_it != before.end()) {
co_await coroutine::maybe_yield();
closed.push_back(*before_it++);
}
break;
}
if (after_it->token() < before_it->token()) {
opened.push_back(*after_it++);
} else if (after_it->token() > before_it->token()) {
closed.push_back(*before_it++);
} else if (*after_it != *before_it) {
opened.push_back(*after_it++);
closed.push_back(*before_it++);
} else {
after_it++;
before_it++;
}
}
while (after_it != after.end()) {
co_await coroutine::maybe_yield();
opened.push_back(*after_it++);
}
co_return cdc_stream_diff {
.closed_streams = std::move(closed),
.opened_streams = std::move(opened)
};
}

View File

@@ -11,7 +11,7 @@
#include <map>
#include "db_clock.hh"
#include "mutation/timestamp.hh"
#include "timestamp.hh"
#include "cdc/generation.hh"
namespace dht {
@@ -37,20 +37,10 @@ class metadata final {
using container_t = std::map<api::timestamp_type, std::optional<topology_description>>;
container_t _gens;
// per-table streams map for tables in tablets-based keyspaces.
// the streams map is shared with the virtual tables reader, hence we can only insert new entries to it, not erase.
using table_streams_ptr = lw_shared_ptr<table_streams>;
using tablet_streams_map = std::unordered_map<table_id, table_streams_ptr>;
tablet_streams_map _tablet_streams;
/* The timestamp used in the last successful `get_vnode_stream` call. */
/* The timestamp used in the last successful `get_stream` call. */
api::timestamp_type _last_stream_timestamp = api::missing_timestamp;
container_t::const_iterator gen_used_at(api::timestamp_type ts) const;
const utils::chunked_vector<stream_id>& get_tablet_stream_set(table_id tid, api::timestamp_type ts) const;
public:
/* Is a generation with the given timestamp already known or obsolete? It is obsolete if and only if
* it is older than the generation operating at `now - get_generation_leeway()`.
@@ -61,65 +51,37 @@ public:
* CDC logs will fail fast.
*/
bool streams_available() const;
/* Return the stream for a vnode-based keyspace for the base partition whose token is `tok` to which a corresponding
* log write should go according to the generation used at time `ts` (i.e, the latest generation whose timestamp is
* less or equal to `ts`).
/* Return the stream for the base partition whose token is `tok` to which a corresponding log write should go
* according to the generation used at time `ts` (i.e, the latest generation whose timestamp is less or equal to `ts`).
*
* If the provided timestamp is too far away "into the future" (where "now" is defined according to our local clock),
* we reject the get_vnode_stream query. This is because the resulting stream might belong to a generation which we don't
* we reject the get_stream query. This is because the resulting stream might belong to a generation which we don't
* yet know about. Similarly, we reject queries to the previous generations if the timestamp is too far away "into
* the past". The amount of leeway (how much "into the future" or "into the past" we allow `ts` to be) is defined by
* `get_generation_leeway()`.
*/
stream_id get_vnode_stream(api::timestamp_type ts, dht::token tok);
stream_id get_stream(api::timestamp_type ts, dht::token tok);
/* Similar to get_vnode_stream but for tablet-based keyspaces.
* In addition to the base partition token and timestamp, the stream also depends on the table id because each table
* has its own set of streams.
*/
stream_id get_tablet_stream(table_id tid, api::timestamp_type ts, dht::token tok);
/* Insert the generation given by `gen` with timestamp `ts` to be used by the `get_vnode_stream` function,
/* Insert the generation given by `gen` with timestamp `ts` to be used by the `get_stream` function,
* if the generation is not already known or older than the currently known ones.
*
* Returns true if the generation was inserted,
* meaning that `get_vnode_stream` might return a stream from this generation (at some time points).
* meaning that `get_stream` might return a stream from this generation (at some time points).
*/
bool insert(db_clock::time_point ts, topology_description&& gen);
/* Prepare for inserting a new generation whose timestamp is `ts`.
* This method is not required to be called before `insert`, but it's here
* to increase safety of `get_vnode_stream` calls in some situations. Use it if you:
* to increase safety of `get_stream` calls in some situations. Use it if you:
* 1. know that there is a new generation, but
* 2. you didn't yet retrieve the generation's topology_description.
*
* After preparing a generation, if `get_vnode_stream` is supposed to return a stream from this generation
* After preparing a generation, if `get_stream` is supposed to return a stream from this generation
* but we don't yet have the generation's data, it will reject the query to maintain consistency of streams.
*
* Returns true iff this generation is not obsolete and wasn't previously prepared nor inserted.
*/
bool prepare(db_clock::time_point ts);
void load_tablet_streams_map(table_id tid, table_streams new_table_map);
void append_tablet_streams_map(table_id tid, table_streams new_table_map);
void remove_tablet_streams_map(table_id tid);
const tablet_streams_map& get_all_tablet_streams() const {
return _tablet_streams;
}
std::vector<table_id> get_tables_with_cdc_tablet_streams() const;
static future<utils::chunked_vector<stream_id>> construct_next_stream_set(
const utils::chunked_vector<cdc::stream_id>& prev_stream_set,
utils::chunked_vector<cdc::stream_id> opened,
const utils::chunked_vector<cdc::stream_id>& closed);
static future<cdc_stream_diff> generate_stream_diff(
const utils::chunked_vector<stream_id>& before,
const utils::chunked_vector<stream_id>& after);
};
} // namespace cdc

View File

@@ -9,7 +9,7 @@
#include "mutation/mutation.hh"
#include "schema/schema.hh"
#include "types/concrete_types.hh"
#include "concrete_types.hh"
#include "types/user.hh"
#include "split.hh"

View File

@@ -10,7 +10,7 @@
#include <boost/dynamic_bitset.hpp> // IWYU pragma: keep
#include "replica/database_fwd.hh"
#include "mutation/timestamp.hh"
#include "timestamp.hh"
class mutation;

View File

@@ -8,7 +8,7 @@
#include <fmt/chrono.h>
#include "mutation/timestamp.hh"
#include "timestamp.hh"
#include "clocks-impl.hh"

View File

@@ -30,6 +30,5 @@ if(_slp_vectorize_supported)
endif()
add_link_options($<$<CONFIG:RelWithDebInfo>:LINKER:--gc-sections>)
add_link_options($<$<CONFIG:RelWithDebInfo>:LINKER:--no-lto-pgo-warn-mismatch>)
maybe_limit_stack_usage_in_KB(13 RelWithDebInfo)

View File

@@ -135,8 +135,6 @@ function(maybe_limit_stack_usage_in_KB stack_usage_threshold_in_KB config)
endif()
endfunction()
option(Scylla_WITH_DEBUG_INFO "Enable debug info" OFF)
macro(update_build_flags config)
cmake_parse_arguments (
parsed_args
@@ -152,7 +150,7 @@ macro(update_build_flags config)
set(linker_flags "CMAKE_EXE_LINKER_FLAGS_${CONFIG}")
string(APPEND ${cxx_flags}
" -O${parsed_args_OPTIMIZATION_LEVEL}")
if(parsed_args_WITH_DEBUG_INFO OR ${Scylla_WITH_DEBUG_INFO})
if(parsed_args_WITH_DEBUG_INFO)
string(APPEND ${cxx_flags} " -g -gz")
else()
# If Scylla is compiled without debug info, strip the debug symbols from

View File

@@ -9,7 +9,7 @@
#include "utils/assert.hh"
#include "types/collection.hh"
#include "types/user.hh"
#include "types/concrete_types.hh"
#include "concrete_types.hh"
#include "mutation/mutation_partition.hh"
#include "compaction/compaction_garbage_collector.hh"
#include "combine.hh"
@@ -261,7 +261,7 @@ static collection_mutation serialize_collection_mutation(
writev(v.serialize());
}
return collection_mutation(type, std::move(ret));
return collection_mutation(type, ret);
}
collection_mutation collection_mutation_description::serialize(const abstract_type& type) const {

View File

@@ -28,7 +28,6 @@
#include <seastar/coroutine/maybe_yield.hh>
#include "compaction/compaction_garbage_collector.hh"
#include "compaction/exceptions.hh"
#include "dht/i_partitioner.hh"
#include "sstables/exceptions.hh"
#include "sstables/sstables.hh"
@@ -53,7 +52,7 @@
#include "readers/compacting.hh"
#include "tombstone_gc.hh"
#include "replica/database.hh"
#include "mutation/timestamp.hh"
#include "timestamp.hh"
can_gc_fn always_gc = [] (tombstone, is_shadowable) { return true; };
@@ -111,9 +110,9 @@ auto fmt::formatter<max_purgeable>::format(max_purgeable mp, fmt::format_context
return format_to(ctx.out(), "max_purgeable{{timestamp={}, expiry_treshold={}, source={}}}", mp.timestamp(), expiry_str, mp.source());
}
namespace compaction {
namespace sstables {
bool is_eligible_for_compaction(const sstables::shared_sstable& sst) noexcept {
bool is_eligible_for_compaction(const shared_sstable& sst) noexcept {
return !sst->requires_view_building() && !sst->is_quarantined();
}
@@ -192,8 +191,8 @@ std::string_view to_string(compaction_type_options::scrub::quarantine_mode quara
return "(invalid)";
}
static max_purgeable get_max_purgeable_timestamp(const compaction_group_view& table_s, sstables::sstable_set::incremental_selector& selector,
const std::unordered_set<sstables::shared_sstable>& compacting_set, const dht::decorated_key& dk, uint64_t& bloom_filter_checks,
static max_purgeable get_max_purgeable_timestamp(const compaction_group_view& table_s, sstable_set::incremental_selector& selector,
const std::unordered_set<shared_sstable>& compacting_set, const dht::decorated_key& dk, uint64_t& bloom_filter_checks,
const api::timestamp_type compacting_max_timestamp, const bool gc_check_only_compacting_sstables, const is_shadowable is_shadowable) {
if (!table_s.tombstone_gc_enabled()) [[unlikely]] {
clogger.trace("get_max_purgeable_timestamp {}.{}: tombstone_gc_enabled=false, returning min_timestamp",
@@ -287,23 +286,23 @@ static max_purgeable get_max_purgeable_timestamp(const compaction_group_view& ta
return max_purgeable(timestamp, source);
}
static std::vector<sstables::shared_sstable> get_uncompacting_sstables(const compaction_group_view& table_s, std::vector<sstables::shared_sstable> sstables) {
static std::vector<shared_sstable> get_uncompacting_sstables(const compaction_group_view& table_s, std::vector<shared_sstable> sstables) {
auto sstable_set = table_s.sstable_set_for_tombstone_gc();
auto all_sstables = *sstable_set->all() | std::ranges::to<std::vector>();
auto& compacted_undeleted = table_s.compacted_undeleted_sstables();
all_sstables.insert(all_sstables.end(), compacted_undeleted.begin(), compacted_undeleted.end());
std::ranges::sort(all_sstables, std::ranges::less(), std::mem_fn(&sstables::sstable::generation));
std::ranges::sort(sstables, std::ranges::less(), std::mem_fn(&sstables::sstable::generation));
std::vector<sstables::shared_sstable> not_compacted_sstables;
std::ranges::sort(all_sstables, std::ranges::less(), std::mem_fn(&sstable::generation));
std::ranges::sort(sstables, std::ranges::less(), std::mem_fn(&sstable::generation));
std::vector<shared_sstable> not_compacted_sstables;
std::ranges::set_difference(all_sstables, sstables, std::back_inserter(not_compacted_sstables),
std::ranges::less(), std::mem_fn(&sstables::sstable::generation), std::mem_fn(&sstables::sstable::generation));
std::ranges::less(), std::mem_fn(&sstable::generation), std::mem_fn(&sstable::generation));
return not_compacted_sstables;
}
static std::vector<sstables::basic_info> extract_basic_info_from_sstables(const std::vector<sstables::shared_sstable>& sstables) {
static std::vector<basic_info> extract_basic_info_from_sstables(const std::vector<shared_sstable>& sstables) {
return sstables | std::views::transform([] (auto&& sst) {
return sstables::basic_info{.generation = sst->generation(), .origin = sst->get_origin(), .size = sst->bytes_on_disk()};
}) | std::ranges::to<std::vector<sstables::basic_info>>();
}) | std::ranges::to<std::vector<basic_info>>();
}
class compaction;
@@ -358,19 +357,19 @@ public:
};
struct compaction_writer {
sstables::shared_sstable sst;
shared_sstable sst;
// We use a ptr for pointer stability and so that it can be null
// when using a noop monitor.
sstables::sstable_writer writer;
sstable_writer writer;
// The order in here is important. A monitor must be destroyed before the writer it is monitoring since it has a
// periodic timer that checks the writer.
// The writer must be destroyed before the shared_sstable since the it may depend on the sstable
// (as in the mx::writer over compressed_file_data_sink_impl case that depends on sstables::compression).
std::unique_ptr<compaction_write_monitor> monitor;
compaction_writer(std::unique_ptr<compaction_write_monitor> monitor, sstables::sstable_writer writer, sstables::shared_sstable sst)
compaction_writer(std::unique_ptr<compaction_write_monitor> monitor, sstable_writer writer, shared_sstable sst)
: sst(std::move(sst)), writer(std::move(writer)), monitor(std::move(monitor)) {}
compaction_writer(sstables::sstable_writer writer, sstables::shared_sstable sst)
compaction_writer(sstable_writer writer, shared_sstable sst)
: compaction_writer(nullptr, std::move(writer), std::move(sst)) {}
};
@@ -449,7 +448,7 @@ public:
using use_backlog_tracker = bool_class<class use_backlog_tracker_tag>;
struct compaction_read_monitor_generator final : public sstables::read_monitor_generator {
struct compaction_read_monitor_generator final : public read_monitor_generator {
class compaction_read_monitor final : public sstables::read_monitor, public backlog_read_progress_manager {
sstables::shared_sstable _sst;
compaction_group_view& _table_s;
@@ -521,13 +520,13 @@ struct compaction_read_monitor_generator final : public sstables::read_monitor_g
}
private:
compaction_group_view& _table_s;
std::unordered_map<sstables::generation_type, compaction_read_monitor> _generated_monitors;
std::unordered_map<generation_type, compaction_read_monitor> _generated_monitors;
use_backlog_tracker _use_backlog_tracker;
friend class compaction_progress_monitor;
};
void compaction_progress_monitor::set_generator(std::unique_ptr<sstables::read_monitor_generator> generator) {
void compaction_progress_monitor::set_generator(std::unique_ptr<read_monitor_generator> generator) {
_generator = std::move(generator);
}
@@ -552,16 +551,16 @@ protected:
const compaction_sstable_creator_fn _sstable_creator;
const schema_ptr _schema;
const reader_permit _permit;
std::vector<sstables::shared_sstable> _sstables;
std::vector<sstables::generation_type> _input_sstable_generations;
std::vector<sstables::basic_info> _input_sstables_basic_info;
std::vector<shared_sstable> _sstables;
std::vector<generation_type> _input_sstable_generations;
std::vector<basic_info> _input_sstables_basic_info;
// Unused sstables are tracked because if compaction is interrupted we can only delete them.
// Deleting used sstables could potentially result in data loss.
std::unordered_set<sstables::shared_sstable> _new_partial_sstables;
std::vector<sstables::shared_sstable> _new_unused_sstables;
std::vector<sstables::shared_sstable> _all_new_sstables;
lw_shared_ptr<sstables::sstable_set> _compacting;
const compaction_type _type;
std::unordered_set<shared_sstable> _new_partial_sstables;
std::vector<shared_sstable> _new_unused_sstables;
std::vector<shared_sstable> _all_new_sstables;
lw_shared_ptr<sstable_set> _compacting;
const sstables::compaction_type _type;
const uint64_t _max_sstable_size;
const uint32_t _sstable_level;
uint64_t _start_size = 0;
@@ -580,21 +579,21 @@ protected:
bool _contains_multi_fragment_runs = false;
mutation_source_metadata _ms_metadata = {};
const compaction_sstable_replacer_fn _replacer;
const sstables::run_id _run_identifier;
const run_id _run_identifier;
// optional clone of sstable set to be used for expiration purposes, so it will be set if expiration is enabled.
std::optional<sstables::sstable_set> _sstable_set;
std::optional<sstable_set> _sstable_set;
// used to incrementally calculate max purgeable timestamp, as we iterate through decorated keys.
std::optional<sstables::sstable_set::incremental_selector> _selector;
std::unordered_set<sstables::shared_sstable> _compacting_for_max_purgeable_func;
std::optional<sstable_set::incremental_selector> _selector;
std::unordered_set<shared_sstable> _compacting_for_max_purgeable_func;
// optional owned_ranges vector for cleanup;
const owned_ranges_ptr _owned_ranges = {};
// required for reshard compaction.
const dht::sharder* _sharder = nullptr;
const std::optional<dht::incremental_owned_ranges_checker> _owned_ranges_checker;
// Garbage collected sstables that are sealed but were not added to SSTable set yet.
std::vector<sstables::shared_sstable> _unused_garbage_collected_sstables;
std::vector<shared_sstable> _unused_garbage_collected_sstables;
// Garbage collected sstables that were added to SSTable set and should be eventually removed from it.
std::vector<sstables::shared_sstable> _used_garbage_collected_sstables;
std::vector<shared_sstable> _used_garbage_collected_sstables;
utils::observable<> _stop_request_observable;
// optional tombstone_gc_state that is used when gc has to check only the compacting sstables to collect tombstones.
std::optional<tombstone_gc_state> _tombstone_gc_state_with_commitlog_check_disabled;
@@ -610,7 +609,7 @@ private:
// Called in a seastar thread
dht::partition_range_vector
get_ranges_for_invalidation(const std::vector<sstables::shared_sstable>& sstables) {
get_ranges_for_invalidation(const std::vector<shared_sstable>& sstables) {
// If owned ranges is disengaged, it means no cleanup work was done and
// so nothing needs to be invalidated.
if (!_owned_ranges) {
@@ -619,7 +618,7 @@ private:
auto owned_ranges = dht::to_partition_ranges(*_owned_ranges, utils::can_yield::yes);
auto non_owned_ranges = sstables
| std::views::transform([] (const sstables::shared_sstable& sst) {
| std::views::transform([] (const shared_sstable& sst) {
seastar::thread::maybe_yield();
return dht::partition_range::make({sst->get_first_decorated_key(), true},
{sst->get_last_decorated_key(), true});
@@ -642,26 +641,26 @@ protected:
, _replacer(std::move(descriptor.replacer))
, _run_identifier(descriptor.run_identifier)
, _sstable_set(std::move(descriptor.all_sstables_snapshot))
, _selector(_sstable_set ? _sstable_set->make_incremental_selector() : std::optional<sstables::sstable_set::incremental_selector>{})
, _compacting_for_max_purgeable_func(std::unordered_set<sstables::shared_sstable>(_sstables.begin(), _sstables.end()))
, _selector(_sstable_set ? _sstable_set->make_incremental_selector() : std::optional<sstable_set::incremental_selector>{})
, _compacting_for_max_purgeable_func(std::unordered_set<shared_sstable>(_sstables.begin(), _sstables.end()))
, _owned_ranges(std::move(descriptor.owned_ranges))
, _sharder(descriptor.sharder)
, _owned_ranges_checker(_owned_ranges ? std::optional<dht::incremental_owned_ranges_checker>(*_owned_ranges) : std::nullopt)
, _tombstone_gc_state_with_commitlog_check_disabled(descriptor.gc_check_only_compacting_sstables ? std::make_optional(_table_s.get_tombstone_gc_state().with_commitlog_check_disabled()) : std::nullopt)
, _progress_monitor(progress_monitor)
{
std::unordered_set<sstables::run_id> ssts_run_ids;
_contains_multi_fragment_runs = std::any_of(_sstables.begin(), _sstables.end(), [&ssts_run_ids] (sstables::shared_sstable& sst) {
std::unordered_set<run_id> ssts_run_ids;
_contains_multi_fragment_runs = std::any_of(_sstables.begin(), _sstables.end(), [&ssts_run_ids] (shared_sstable& sst) {
return !ssts_run_ids.insert(sst->run_identifier()).second;
});
_progress_monitor.set_generator(std::make_unique<compaction_read_monitor_generator>(_table_s, use_backlog_tracker));
}
sstables::read_monitor_generator& unwrap_monitor_generator() const {
read_monitor_generator& unwrap_monitor_generator() const {
if (_progress_monitor._generator) {
return *_progress_monitor._generator;
}
return sstables::default_read_monitor_generator();
return default_read_monitor_generator();
}
virtual uint64_t partitions_per_sstable() const {
@@ -672,7 +671,7 @@ protected:
_table_s.get_compaction_strategy().adjust_partition_estimate(_ms_metadata, _estimated_partitions, _schema));
}
void setup_new_sstable(sstables::shared_sstable& sst) {
void setup_new_sstable(shared_sstable& sst) {
_all_new_sstables.push_back(sst);
_new_partial_sstables.insert(sst);
for (auto ancestor : _input_sstable_generations) {
@@ -689,14 +688,14 @@ protected:
_new_partial_sstables.erase(writer->sst);
}
sstables::sstable_writer_config make_sstable_writer_config(compaction_type type) {
sstable_writer_config make_sstable_writer_config(compaction_type type) {
auto s = compaction_name(type);
std::transform(s.begin(), s.end(), s.begin(), [] (char c) {
return std::tolower(c);
});
sstables::sstable_writer_config cfg = _table_s.configure_writer(std::move(s));
sstable_writer_config cfg = _table_s.configure_writer(std::move(s));
cfg.max_sstable_size = _max_sstable_size;
cfg.monitor = &sstables::default_write_monitor();
cfg.monitor = &default_write_monitor();
cfg.run_identifier = _run_identifier;
cfg.replay_position = _rp;
cfg.sstable_level = _sstable_level;
@@ -704,7 +703,7 @@ protected:
}
api::timestamp_type maximum_timestamp() const {
auto m = std::max_element(_sstables.begin(), _sstables.end(), [] (const sstables::shared_sstable& sst1, const sstables::shared_sstable& sst2) {
auto m = std::max_element(_sstables.begin(), _sstables.end(), [] (const shared_sstable& sst1, const shared_sstable& sst2) {
return sst1->get_stats_metadata().max_timestamp < sst2->get_stats_metadata().max_timestamp;
});
return (*m)->get_stats_metadata().max_timestamp;
@@ -715,7 +714,7 @@ protected:
}
compaction_completion_desc
get_compaction_completion_desc(std::vector<sstables::shared_sstable> input_sstables, std::vector<sstables::shared_sstable> output_sstables) {
get_compaction_completion_desc(std::vector<shared_sstable> input_sstables, std::vector<shared_sstable> output_sstables) {
auto ranges_for_for_invalidation = get_ranges_for_invalidation(input_sstables);
return compaction_completion_desc{std::move(input_sstables), std::move(output_sstables), std::move(ranges_for_for_invalidation)};
}
@@ -726,11 +725,11 @@ protected:
return bool(_sstable_set) && _table_s.tombstone_gc_enabled();
}
compaction_writer create_gc_compaction_writer(sstables::run_id gc_run) const {
compaction_writer create_gc_compaction_writer(run_id gc_run) const {
auto sst = _sstable_creator(this_shard_id());
auto monitor = std::make_unique<compaction_write_monitor>(sst, _table_s, maximum_timestamp(), _sstable_level);
sstables::sstable_writer_config cfg = _table_s.configure_writer("garbage_collection");
sstable_writer_config cfg = _table_s.configure_writer("garbage_collection");
cfg.run_identifier = gc_run;
cfg.monitor = monitor.get();
uint64_t estimated_partitions = std::max(1UL, uint64_t(ceil(partitions_per_sstable() * _estimated_droppable_tombstone_ratio)));
@@ -758,7 +757,7 @@ protected:
// created here as:
// 1. it can be shared across all sstables created by this writer
// 2. it is optional, as gc writer is not always used
auto gc_run = sstables::run_id::create_random_id();
auto gc_run = run_id::create_random_id();
return compacted_fragments_writer(*this,
[this, gc_run] (const dht::decorated_key&) { return create_gc_compaction_writer(gc_run); },
[this] (compaction_writer* cw) { stop_gc_compaction_writer(cw); },
@@ -767,13 +766,13 @@ protected:
// Retrieves all unused garbage collected sstables that will be subsequently added
// to the SSTable set, and mark them as used.
std::vector<sstables::shared_sstable> consume_unused_garbage_collected_sstables() {
std::vector<shared_sstable> consume_unused_garbage_collected_sstables() {
auto unused = std::exchange(_unused_garbage_collected_sstables, {});
_used_garbage_collected_sstables.insert(_used_garbage_collected_sstables.end(), unused.begin(), unused.end());
return unused;
}
const std::vector<sstables::shared_sstable>& used_garbage_collected_sstables() const {
const std::vector<shared_sstable>& used_garbage_collected_sstables() const {
return _used_garbage_collected_sstables;
}
@@ -861,7 +860,6 @@ private:
_input_sstables_basic_info.reserve(_sstables.size());
int64_t repaired_at = 0;
std::vector<int64_t> repaired_at_for_compacted_sstables;
uint64_t compaction_size = 0;
for (auto& sst : _sstables) {
co_await coroutine::maybe_yield();
auto& sst_stats = sst->get_stats_metadata();
@@ -885,7 +883,7 @@ private:
}
_stats_collector.update(sst->get_encoding_stats_for_compaction());
compaction_size += sst->data_size();
_cdata.compaction_size += sst->data_size();
// We also capture the sstable, so we keep it alive while the read isn't done
ssts->insert(sst);
// FIXME: If the sstables have cardinality estimation bitmaps, use that
@@ -899,7 +897,6 @@ private:
_rp = std::max(_rp, sst_stats.position);
}
}
_cdata.compaction_size += compaction_size;
log_debug("{} [{}]", report_start_desc(), fmt::join(_sstables | std::views::transform([] (auto sst) { return to_string(sst, true); }), ","));
if (repaired_at) {
_output_repaired_at = repaired_at;
@@ -1271,9 +1268,9 @@ public:
sm_fwd,
mr_fwd,
unwrap_monitor_generator(),
sstables::default_sstable_predicate(),
default_sstable_predicate(),
&_reader_statistics,
sstables::integrity_check::yes);
integrity_check::yes);
}
std::string_view report_start_desc() const override {
@@ -1289,7 +1286,7 @@ public:
setup_new_sstable(sst);
auto monitor = std::make_unique<compaction_write_monitor>(sst, _table_s, maximum_timestamp(), _sstable_level);
sstables::sstable_writer_config cfg = make_sstable_writer_config(_type);
sstable_writer_config cfg = make_sstable_writer_config(_type);
cfg.monitor = monitor.get();
return compaction_writer{std::move(monitor), sst->get_writer(*_schema, partitions_per_sstable(), cfg, get_encoding_stats()), sst};
}
@@ -1309,7 +1306,7 @@ public:
replace_remaining_exhausted_sstables();
}
private:
void maybe_replace_exhausted_sstables_by_sst(sstables::shared_sstable sst) {
void maybe_replace_exhausted_sstables_by_sst(shared_sstable sst) {
// Skip earlier replacement of exhausted sstables if compaction works with only single-fragment runs,
// meaning incremental compaction is disabled for this compaction.
if (!enable_garbage_collected_sstable_writer()) {
@@ -1317,7 +1314,7 @@ private:
}
auto permit = seastar::get_units(_replacer_lock, 1).get();
// Replace exhausted sstable(s), if any, by new one(s) in the column family.
auto not_exhausted = [s = _schema, &dk = sst->get_last_decorated_key()] (sstables::shared_sstable& sst) {
auto not_exhausted = [s = _schema, &dk = sst->get_last_decorated_key()] (shared_sstable& sst) {
return sst->get_last_decorated_key().tri_compare(*s, dk) > 0;
};
auto exhausted = std::partition(_sstables.begin(), _sstables.end(), not_exhausted);
@@ -1325,7 +1322,7 @@ private:
if (exhausted != _sstables.end()) {
// The goal is that exhausted sstables will be deleted as soon as possible,
// so we need to release reference to them.
std::for_each(exhausted, _sstables.end(), [this] (sstables::shared_sstable& sst) {
std::for_each(exhausted, _sstables.end(), [this] (shared_sstable& sst) {
_compacting_for_max_purgeable_func.erase(sst);
// Fully expired sstable is not actually compacted, therefore it's not present in the compacting set.
_compacting->erase(sst);
@@ -1340,7 +1337,7 @@ private:
auto unused_gc_sstables = consume_unused_garbage_collected_sstables();
_new_unused_sstables.insert(_new_unused_sstables.end(), unused_gc_sstables.begin(), unused_gc_sstables.end());
auto exhausted_ssts = std::vector<sstables::shared_sstable>(exhausted, _sstables.end());
auto exhausted_ssts = std::vector<shared_sstable>(exhausted, _sstables.end());
log_debug("Replacing earlier exhausted sstable(s) [{}] by new sstable(s) [{}]",
fmt::join(exhausted_ssts | std::views::transform([] (auto sst) { return to_string(sst, false); }), ","),
fmt::join(_new_unused_sstables | std::views::transform([] (auto sst) { return to_string(sst, true); }), ","));
@@ -1352,7 +1349,7 @@ private:
void replace_remaining_exhausted_sstables() {
if (!_sstables.empty() || !used_garbage_collected_sstables().empty()) {
std::vector<sstables::shared_sstable> old_sstables;
std::vector<shared_sstable> old_sstables;
std::move(_sstables.begin(), _sstables.end(), std::back_inserter(old_sstables));
// Remove Garbage Collected SSTables from the SSTable set if any was previously added.
@@ -1421,9 +1418,9 @@ public:
sm_fwd,
mr_fwd,
unwrap_monitor_generator(),
sstables::default_sstable_predicate(),
default_sstable_predicate(),
nullptr,
sstables::integrity_check::yes);
integrity_check::yes);
}
std::string_view report_start_desc() const override {
@@ -1438,7 +1435,7 @@ public:
auto sst = _sstable_creator(this_shard_id());
setup_new_sstable(sst);
sstables::sstable_writer_config cfg = make_sstable_writer_config(compaction_type::Reshape);
sstable_writer_config cfg = make_sstable_writer_config(compaction_type::Reshape);
return compaction_writer{sst->get_writer(*_schema, partitions_per_sstable(), cfg, get_encoding_stats()), sst};
}
@@ -1511,10 +1508,10 @@ public:
// Deduce the estimated keys based on the token range that will end up in this new sstable after the split.
auto token_range_of_new_sst = _table_s.get_token_range_after_split(dk.token());
const auto estimated_keys = _sstables[0]->estimated_keys_for_range(token_range_of_new_sst).get();
const auto estimated_keys = _sstables[0]->estimated_keys_for_range(token_range_of_new_sst);
auto monitor = std::make_unique<compaction_write_monitor>(sst, _table_s, maximum_timestamp(), _sstable_level);
sstables::sstable_writer_config cfg = make_sstable_writer_config(_type);
sstable_writer_config cfg = make_sstable_writer_config(_type);
cfg.monitor = monitor.get();
return compaction_writer{std::move(monitor), sst->get_writer(*_schema, estimated_keys, cfg, get_encoding_stats()), sst};
@@ -1634,31 +1631,6 @@ private:
report_fn("Rectifying by adding missing partition-end to the end of the stream");
}
void on_malformed_sstable_exception(std::exception_ptr e) {
if (_scrub_mode != compaction_type_options::scrub::mode::skip) {
throw compaction_aborted_exception(
_schema->ks_name(),
_schema->cf_name(),
format("scrub compaction failed due to unrecoverable error: {}", e));
}
// Closes the active range tombstone if needed, before emitting partition end.
if (auto current_tombstone = _validator.current_tombstone(); current_tombstone) {
const auto& last_pos = _validator.previous_position();
auto after_last_pos = position_in_partition::after_key(*_schema, last_pos.key());
auto rtc = range_tombstone_change(std::move(after_last_pos), tombstone{});
push_mutation_fragment(mutation_fragment_v2(*_schema, _permit, std::move(rtc)));
}
// Emit partition end if needed.
if (_validator.previous_mutation_fragment_kind() != mutation_fragment_v2::kind::partition_end) {
push_mutation_fragment(mutation_fragment_v2(*_schema, _permit, partition_end{}));
}
// Report the end of stream.
_end_of_stream = true;
}
void fill_buffer_from_underlying() {
utils::get_local_injector().inject("rest_api_keyspace_scrub_abort", [] { throw compaction_aborted_exception("", "", "scrub compaction found invalid data"); });
while (!_reader.is_buffer_empty() && !is_buffer_full()) {
@@ -1731,8 +1703,6 @@ private:
} catch (const storage_io_error&) {
// Propagate these unchanged.
throw;
} catch (const sstables::malformed_sstable_exception& e) {
on_malformed_sstable_exception(std::current_exception());
} catch (...) {
// We don't want failed scrubs to be retried.
throw compaction_aborted_exception(
@@ -1792,7 +1762,7 @@ public:
if (!range.is_full()) {
on_internal_error(clogger, fmt::format("Scrub compaction in mode {} expected full partition range, but got {} instead", _options.operation_mode, range));
}
auto full_scan_reader = _compacting->make_full_scan_reader(std::move(s), std::move(permit), nullptr, unwrap_monitor_generator(), sstables::integrity_check::yes);
auto full_scan_reader = _compacting->make_full_scan_reader(std::move(s), std::move(permit), nullptr, unwrap_monitor_generator(), integrity_check::yes);
return make_mutation_reader<reader>(std::move(full_scan_reader), _options.operation_mode, _validation_errors);
}
@@ -1852,7 +1822,7 @@ class resharding_compaction final : public compaction {
uint64_t estimated_partitions = 0;
};
std::vector<estimated_values> _estimation_per_shard;
std::vector<sstables::run_id> _run_identifiers;
std::vector<run_id> _run_identifiers;
private:
// return estimated partitions per sstable for a given shard
uint64_t partitions_per_sstable(shard_id s) const {
@@ -1861,7 +1831,7 @@ private:
_table_s.get_compaction_strategy().adjust_partition_estimate(_ms_metadata, _estimation_per_shard[s].estimated_partitions, _schema));
}
public:
resharding_compaction(compaction_group_view& table_s, compaction_descriptor descriptor, compaction_data& cdata, compaction_progress_monitor& progress_monitor)
resharding_compaction(compaction_group_view& table_s, sstables::compaction_descriptor descriptor, compaction_data& cdata, compaction_progress_monitor& progress_monitor)
: compaction(table_s, std::move(descriptor), cdata, progress_monitor, use_backlog_tracker::no)
, _estimation_per_shard(smp::count)
, _run_identifiers(smp::count)
@@ -1876,7 +1846,7 @@ public:
}
}
for (auto i : std::views::iota(0u, smp::count)) {
_run_identifiers[i] = sstables::run_id::create_random_id();
_run_identifiers[i] = run_id::create_random_id();
}
}
@@ -1898,7 +1868,7 @@ public:
sm_fwd,
mr_fwd,
unwrap_monitor_generator(),
sstables::integrity_check::yes);
integrity_check::yes);
}
@@ -1976,10 +1946,10 @@ compaction_type compaction_type_options::type() const {
return index_to_type[_options.index()];
}
static std::unique_ptr<compaction> make_compaction(compaction_group_view& table_s, compaction_descriptor descriptor, compaction_data& cdata, compaction_progress_monitor& progress_monitor) {
static std::unique_ptr<compaction> make_compaction(compaction_group_view& table_s, sstables::compaction_descriptor descriptor, compaction_data& cdata, compaction_progress_monitor& progress_monitor) {
struct {
compaction_group_view& table_s;
compaction_descriptor&& descriptor;
sstables::compaction_descriptor&& descriptor;
compaction_data& cdata;
compaction_progress_monitor& progress_monitor;
@@ -2009,7 +1979,7 @@ static std::unique_ptr<compaction> make_compaction(compaction_group_view& table_
return descriptor.options.visit(visitor_factory);
}
static future<compaction_result> scrub_sstables_validate_mode(compaction_descriptor descriptor, compaction_data& cdata, compaction_group_view& table_s, sstables::read_monitor_generator& monitor_generator) {
static future<compaction_result> scrub_sstables_validate_mode(sstables::compaction_descriptor descriptor, compaction_data& cdata, compaction_group_view& table_s, read_monitor_generator& monitor_generator) {
auto schema = table_s.schema();
auto permit = table_s.make_compaction_reader_permit();
@@ -2031,14 +2001,10 @@ static future<compaction_result> scrub_sstables_validate_mode(compaction_descrip
clogger.info("Finished scrubbing in validate mode {} - sstable is {}", sst->get_filename(), validation_errors == 0 ? "valid" : "invalid");
}
using scrub = compaction_type_options::scrub;
using scrub = sstables::compaction_type_options::scrub;
if (validation_errors != 0 && descriptor.options.as<scrub>().quarantine_sstables == scrub::quarantine_invalid_sstables::yes) {
for (auto& sst : descriptor.sstables) {
try {
co_await sst->change_state(sstables::sstable_state::quarantine);
} catch (...) {
clogger.error("Moving {} to quarantine failed due to {}, continuing.", sst->get_filename(), std::current_exception());
}
co_await sst->change_state(sstables::sstable_state::quarantine);
}
}
@@ -2051,7 +2017,7 @@ static future<compaction_result> scrub_sstables_validate_mode(compaction_descrip
};
}
future<compaction_result> scrub_sstables_validate_mode(compaction_descriptor descriptor, compaction_data& cdata, compaction_group_view& table_s, compaction_progress_monitor& progress_monitor) {
future<compaction_result> scrub_sstables_validate_mode(sstables::compaction_descriptor descriptor, compaction_data& cdata, compaction_group_view& table_s, compaction_progress_monitor& progress_monitor) {
progress_monitor.set_generator(std::make_unique<compaction_read_monitor_generator>(table_s, use_backlog_tracker::no));
auto d = defer([&] { progress_monitor.reset_generator(); });
auto res = co_await scrub_sstables_validate_mode(descriptor, cdata, table_s, *progress_monitor._generator);
@@ -2059,7 +2025,7 @@ future<compaction_result> scrub_sstables_validate_mode(compaction_descriptor des
}
future<compaction_result>
compact_sstables(compaction_descriptor descriptor, compaction_data& cdata, compaction_group_view& table_s, compaction_progress_monitor& progress_monitor) {
compact_sstables(sstables::compaction_descriptor descriptor, compaction_data& cdata, compaction_group_view& table_s, compaction_progress_monitor& progress_monitor) {
if (descriptor.sstables.empty()) {
return make_exception_future<compaction_result>(std::runtime_error(format("Called {} compaction with empty set on behalf of {}.{}",
compaction_name(descriptor.options.type()), table_s.schema()->ks_name(), table_s.schema()->cf_name())));
@@ -2102,7 +2068,7 @@ get_fully_expired_sstables(const compaction_group_view& table_s, const std::vect
// Get ancestors from sstable which is empty after restart. It works for this purpose because
// we only need to check that a sstable compacted *in this instance* hasn't an ancestor undeleted.
// Not getting it from sstable metadata because mc format hasn't it available.
return std::ranges::any_of(candidate->compaction_ancestors(), [&compacted_undeleted_gens] (const sstables::generation_type& gen) {
return std::ranges::any_of(candidate->compaction_ancestors(), [&compacted_undeleted_gens] (const generation_type& gen) {
return compacted_undeleted_gens.contains(gen);
});
};
@@ -2150,17 +2116,17 @@ uint64_t compaction_descriptor::sstables_size() const {
}
auto fmt::formatter<::compaction::compaction_type>::format(::compaction::compaction_type type, fmt::format_context& ctx) const
auto fmt::formatter<sstables::compaction_type>::format(sstables::compaction_type type, fmt::format_context& ctx) const
-> decltype(ctx.out()) {
return fmt::format_to(ctx.out(), "{}", to_string(type));
}
auto fmt::formatter<::compaction::compaction_type_options::scrub::mode>::format(::compaction::compaction_type_options::scrub::mode mode, fmt::format_context& ctx) const
auto fmt::formatter<sstables::compaction_type_options::scrub::mode>::format(sstables::compaction_type_options::scrub::mode mode, fmt::format_context& ctx) const
-> decltype(ctx.out()) {
return fmt::format_to(ctx.out(), "{}", to_string(mode));
}
auto fmt::formatter<::compaction::compaction_type_options::scrub::quarantine_mode>::format(::compaction::compaction_type_options::scrub::quarantine_mode mode, fmt::format_context& ctx) const
auto fmt::formatter<sstables::compaction_type_options::scrub::quarantine_mode>::format(sstables::compaction_type_options::scrub::quarantine_mode mode, fmt::format_context& ctx) const
-> decltype(ctx.out()) {
return fmt::format_to(ctx.out(), "{}", to_string(mode));
}

View File

@@ -20,7 +20,9 @@
#include <seastar/core/abort_source.hh>
#include "sstables/basic_info.hh"
namespace compaction {
using namespace compaction;
namespace sstables {
bool is_eligible_for_compaction(const sstables::shared_sstable& sst) noexcept;
@@ -55,8 +57,8 @@ struct compaction_data {
utils::UUID compaction_uuid;
unsigned compaction_fan_in = 0;
struct replacement {
const std::vector<sstables::shared_sstable> removed;
const std::vector<sstables::shared_sstable> added;
const std::vector<shared_sstable> removed;
const std::vector<shared_sstable> added;
};
std::vector<replacement> pending_replacements;
@@ -109,17 +111,19 @@ struct compaction_result {
compaction_stats stats;
};
class read_monitor_generator;
class compaction_progress_monitor {
std::unique_ptr<sstables::read_monitor_generator> _generator = nullptr;
std::unique_ptr<read_monitor_generator> _generator = nullptr;
uint64_t _progress = 0;
public:
void set_generator(std::unique_ptr<sstables::read_monitor_generator> generator);
void set_generator(std::unique_ptr<read_monitor_generator> generator);
void reset_generator();
// Returns number of bytes processed with _generator.
uint64_t get_progress() const;
friend class compaction;
friend future<compaction_result> scrub_sstables_validate_mode(compaction_descriptor, compaction_data&, compaction_group_view&, compaction_progress_monitor&);
friend future<compaction_result> scrub_sstables_validate_mode(sstables::compaction_descriptor, compaction_data&, compaction_group_view&, compaction_progress_monitor&);
};
// Compact a list of N sstables into M sstables.
@@ -127,7 +131,7 @@ public:
//
// compaction_descriptor is responsible for specifying the type of compaction, and influencing
// compaction behavior through its available member fields.
future<compaction_result> compact_sstables(compaction_descriptor descriptor, compaction_data& cdata, compaction_group_view& table_s, compaction_progress_monitor& progress_monitor);
future<compaction_result> compact_sstables(sstables::compaction_descriptor descriptor, compaction_data& cdata, compaction_group_view& table_s, compaction_progress_monitor& progress_monitor);
// Return list of expired sstables for column family cf.
// A sstable is fully expired *iff* its max_local_deletion_time precedes gc_before and its

View File

@@ -11,13 +11,10 @@
#include <unordered_set>
#include <memory>
#include "sstables/shared_sstable.hh"
#include "mutation/timestamp.hh"
class compaction_controller;
namespace compaction {
#include "timestamp.hh"
class compaction_backlog_manager;
class compaction_controller;
// Read and write progress are provided by structures present in progress_manager.hh
// However, we don't want to be tied to their lifetimes and for that reason we will not
@@ -122,5 +119,3 @@ public:
double backlog() const;
void register_backlog_tracker(compaction_backlog_tracker& tracker);
};
}

View File

@@ -17,7 +17,7 @@
#include "compaction_fwd.hh"
#include "mutation_writer/token_group_based_splitting_writer.hh"
namespace compaction {
namespace sstables {
enum class compaction_type {
Compaction = 0,
@@ -33,15 +33,15 @@ enum class compaction_type {
struct compaction_completion_desc {
// Old, existing SSTables that should be deleted and removed from the SSTable set.
std::vector<sstables::shared_sstable> old_sstables;
std::vector<shared_sstable> old_sstables;
// New, fresh SSTables that should be added to SSTable set, replacing the old ones.
std::vector<sstables::shared_sstable> new_sstables;
std::vector<shared_sstable> new_sstables;
// Set of compacted partition ranges that should be invalidated in the cache.
dht::partition_range_vector ranges_for_cache_invalidation;
};
// creates a new SSTable for a given shard
using compaction_sstable_creator_fn = std::function<sstables::shared_sstable(shard_id shard)>;
using compaction_sstable_creator_fn = std::function<shared_sstable(shard_id shard)>;
// Replaces old sstable(s) by new one(s) which contain all non-expired data.
using compaction_sstable_replacer_fn = std::function<void(compaction_completion_desc)>;
@@ -169,7 +169,7 @@ struct compaction_descriptor {
compaction_sstable_replacer_fn replacer;
// Denotes if this compaction task is comprised solely of completely expired SSTables
has_only_fully_expired has_only_fully_expired = has_only_fully_expired::no;
sstables::has_only_fully_expired has_only_fully_expired = has_only_fully_expired::no;
// If set to true, gc will check only the compacting sstables to collect tombstones.
// If set to false, gc will check the memtables, commit log and other uncompacting
@@ -189,7 +189,7 @@ struct compaction_descriptor {
explicit compaction_descriptor(std::vector<sstables::shared_sstable> sstables,
int level = default_level,
uint64_t max_sstable_bytes = default_max_sstable_bytes,
sstables::run_id run_identifier = sstables::run_id::create_random_id(),
run_id run_identifier = run_id::create_random_id(),
compaction_type_options options = compaction_type_options::make_regular(),
compaction::owned_ranges_ptr owned_ranges_ = {})
: sstables(std::move(sstables))
@@ -200,12 +200,12 @@ struct compaction_descriptor {
, owned_ranges(std::move(owned_ranges_))
{}
explicit compaction_descriptor(::compaction::has_only_fully_expired has_only_fully_expired,
explicit compaction_descriptor(sstables::has_only_fully_expired has_only_fully_expired,
std::vector<sstables::shared_sstable> sstables)
: sstables(std::move(sstables))
, level(default_level)
, max_sstable_bytes(default_max_sstable_bytes)
, run_identifier(sstables::run_id::create_random_id())
, run_identifier(run_id::create_random_id())
, options(compaction_type_options::make_regular())
, has_only_fully_expired(has_only_fully_expired)
{}
@@ -221,14 +221,14 @@ struct compaction_descriptor {
}
template <>
struct fmt::formatter<compaction::compaction_type> : fmt::formatter<string_view> {
auto format(compaction::compaction_type, fmt::format_context& ctx) const -> decltype(ctx.out());
struct fmt::formatter<sstables::compaction_type> : fmt::formatter<string_view> {
auto format(sstables::compaction_type, fmt::format_context& ctx) const -> decltype(ctx.out());
};
template <>
struct fmt::formatter<compaction::compaction_type_options::scrub::mode> : fmt::formatter<string_view> {
auto format(compaction::compaction_type_options::scrub::mode, fmt::format_context& ctx) const -> decltype(ctx.out());
struct fmt::formatter<sstables::compaction_type_options::scrub::mode> : fmt::formatter<string_view> {
auto format(sstables::compaction_type_options::scrub::mode, fmt::format_context& ctx) const -> decltype(ctx.out());
};
template <>
struct fmt::formatter<compaction::compaction_type_options::scrub::quarantine_mode> : fmt::formatter<string_view> {
auto format(compaction::compaction_type_options::scrub::quarantine_mode, fmt::format_context& ctx) const -> decltype(ctx.out());
struct fmt::formatter<sstables::compaction_type_options::scrub::quarantine_mode> : fmt::formatter<string_view> {
auto format(sstables::compaction_type_options::scrub::quarantine_mode, fmt::format_context& ctx) const -> decltype(ctx.out());
};

View File

@@ -15,17 +15,20 @@
#include "compaction_descriptor.hh"
class reader_permit;
class compaction_backlog_tracker;
namespace sstables {
class sstable_set;
class compaction_strategy;
class sstables_manager;
struct sstable_writer_config;
}
namespace compaction {
class compaction_strategy;
class compaction_strategy_state;
class compaction_backlog_tracker;
}
namespace compaction {
class compaction_group_view {
public:
@@ -40,7 +43,7 @@ public:
virtual lw_shared_ptr<const sstables::sstable_set> sstable_set_for_tombstone_gc() const = 0;
virtual std::unordered_set<sstables::shared_sstable> fully_expired_sstables(const std::vector<sstables::shared_sstable>& sstables, gc_clock::time_point compaction_time) const = 0;
virtual const std::vector<sstables::shared_sstable>& compacted_undeleted_sstables() const noexcept = 0;
virtual compaction_strategy& get_compaction_strategy() const noexcept = 0;
virtual sstables::compaction_strategy& get_compaction_strategy() const noexcept = 0;
virtual compaction_strategy_state& get_compaction_strategy_state() noexcept = 0;
virtual reader_permit make_compaction_reader_permit() const = 0;
virtual sstables::sstables_manager& get_sstables_manager() noexcept = 0;
@@ -50,7 +53,7 @@ public:
virtual api::timestamp_type min_memtable_live_timestamp() const = 0;
virtual api::timestamp_type min_memtable_live_row_marker_timestamp() const = 0;
virtual bool memtable_has_key(const dht::decorated_key& key) const = 0;
virtual future<> on_compaction_completion(compaction_completion_desc desc, sstables::offstrategy offstrategy) = 0;
virtual future<> on_compaction_completion(sstables::compaction_completion_desc desc, sstables::offstrategy offstrategy) = 0;
virtual bool is_auto_compaction_disabled_by_user() const noexcept = 0;
virtual bool tombstone_gc_enabled() const noexcept = 0;
virtual const tombstone_gc_state& get_tombstone_gc_state() const noexcept = 0;

File diff suppressed because it is too large Load Diff

View File

@@ -31,16 +31,14 @@
#include "strategy_control.hh"
#include "backlog_controller.hh"
#include "seastarx.hh"
#include "compaction/exceptions.hh"
#include "sstables/exceptions.hh"
#include "tombstone_gc.hh"
#include "utils/pluggable.hh"
#include "compaction/compaction_reenabler.hh"
#include "utils/disk_space_monitor.hh"
namespace db {
class compaction_history_entry;
class system_keyspace;
class config;
}
namespace sstables { class test_env_compaction_manager; }
@@ -63,11 +61,12 @@ inline owned_ranges_ptr make_owned_ranges_ptr(dht::token_range_vector&& ranges)
return make_lw_shared<const dht::token_range_vector>(std::move(ranges));
}
}
// Compaction manager provides facilities to submit and track compaction jobs on
// behalf of existing tables.
class compaction_manager: public peering_sharded_service<compaction_manager> {
class compaction_manager {
public:
using compaction_stats_opt = std::optional<compaction_stats>;
using compaction_stats_opt = std::optional<sstables::compaction_stats>;
struct stats {
int64_t pending_tasks = 0;
int64_t completed_tasks = 0;
@@ -102,17 +101,16 @@ private:
//
// none: started, but not yet enabled. Once the compaction manager moves out of "none", it can
// never legally move back
// stopped: stop() was called. The compaction_manager will never be running again
// stopped: stop() was called. The compaction_manager will never be enabled or disabled again
// and can no longer be used (although it is possible to still grab metrics, stats,
// etc)
// running: running, started and enabled at least once. Whether new compactions are accepted or not is determined by the counter
enum class state { none, stopped, running };
// enabled: accepting compactions
// disabled: not accepting compactions
//
// Moving the compaction manager to and from enabled and disable states is legal, as many times
// as necessary.
enum class state { none, stopped, disabled, enabled };
state _state = state::none;
// The compaction manager is initiated in the none state. It is moved to the running state when start() is invoked
// and the service is immediately enabled.
uint32_t _disabled_state_count = 0;
bool is_disabled() const { return _state != state::running || _disabled_state_count > 0; }
std::optional<future<>> _stop_future;
@@ -124,7 +122,7 @@ private:
// a sstable from being compacted twice.
std::unordered_set<sstables::shared_sstable> _compacting_sstables;
std::optional<future<>> _waiting_reevaluation;
future<> _waiting_reevalution = make_ready_future<>();
condition_variable _postponed_reevaluation;
// tables that wait for compaction but had its submission postponed due to ongoing compaction.
std::unordered_set<compaction::compaction_group_view*> _postponed;
@@ -169,8 +167,6 @@ private:
// still uses it with reference semantics (inconsistently though).
// Drop this member, once the code is converted into using value semantics.
tombstone_gc_state _tombstone_gc_state;
utils::disk_space_monitor::subscription _out_of_space_subscription;
private:
// Requires task->_compaction_state.gate to be held and task to be registered in _tasks.
future<compaction_stats_opt> perform_task(shared_ptr<compaction::compaction_task_executor> task, throw_if_stopping do_throw_if_stopping);
@@ -232,12 +228,11 @@ private:
future<> postponed_compactions_reevaluation();
void reevaluate_postponed_compactions() noexcept;
future<> stop_postponed_compactions() noexcept;
// Postpone compaction for a table that couldn't be executed due to ongoing
// similar-sized compaction.
void postpone_compaction_for_table(compaction::compaction_group_view* t);
using quarantine_invalid_sstables = compaction_type_options::scrub::quarantine_invalid_sstables;
using quarantine_invalid_sstables = sstables::compaction_type_options::scrub::quarantine_invalid_sstables;
future<compaction_stats_opt> perform_sstable_scrub_validate_mode(compaction::compaction_group_view& t, tasks::task_info info, quarantine_invalid_sstables quarantine_sstables);
future<> update_static_shares(float shares);
@@ -249,10 +244,9 @@ private:
requires std::derived_from<TaskType, compaction_task_executor> &&
std::derived_from<TaskType, compaction_task_impl>
future<compaction_manager::compaction_stats_opt> perform_task_on_all_files(sstring reason, tasks::task_info info, compaction_group_view& t, compaction_type_options options, owned_ranges_ptr owned_ranges_ptr,
get_candidates_func get_func, throw_if_stopping do_throw_if_stopping, Args... args);
future<compaction_manager::compaction_stats_opt> perform_task_on_all_files(sstring reason, tasks::task_info info, compaction_group_view& t, sstables::compaction_type_options options, owned_ranges_ptr owned_ranges_ptr, get_candidates_func get_func, Args... args);
future<compaction_stats_opt> rewrite_sstables(compaction::compaction_group_view& t, compaction_type_options options, owned_ranges_ptr, get_candidates_func, tasks::task_info info,
future<compaction_stats_opt> rewrite_sstables(compaction::compaction_group_view& t, sstables::compaction_type_options options, owned_ranges_ptr, get_candidates_func, tasks::task_info info,
can_purge_tombstones can_purge = can_purge_tombstones::yes, sstring options_desc = "");
// Stop all fibers, without waiting. Safe to be called multiple times.
@@ -308,18 +302,12 @@ public:
// Stop all fibers. Ongoing compactions will be waited. Should only be called
// once, from main teardown path.
future<> stop();
future<> start(const db::config& cfg, utils::disk_space_monitor* dsm);
// cancels all running compactions and moves the compaction manager into disabled state.
// The compaction manager is still alive after drain but it will not accept new compactions
// unless it is moved back to enabled state.
future<> drain();
// Check if compaction manager is running, i.e. it was enabled or drained
bool is_running() const noexcept {
return _state == state::running;
}
using compaction_history_consumer = noncopyable_function<future<>(const db::compaction_history_entry&)>;
future<> get_compaction_history(compaction_history_consumer&& f);
@@ -351,27 +339,27 @@ private:
// Add sst to or remove it from the respective compaction_state.sstables_requiring_cleanup set.
bool update_sstable_cleanup_state(compaction_group_view& t, const sstables::shared_sstable& sst, const dht::token_range_vector& sorted_owned_ranges);
future<> on_compaction_completion(compaction_group_view& t, compaction_completion_desc desc, sstables::offstrategy offstrategy);
future<> on_compaction_completion(compaction_group_view& t, sstables::compaction_completion_desc desc, sstables::offstrategy offstrategy);
public:
// Submit a table to be upgraded and wait for its termination.
future<> perform_sstable_upgrade(owned_ranges_ptr sorted_owned_ranges, compaction::compaction_group_view& t, bool exclude_current_version, tasks::task_info info);
// Submit a table to be scrubbed and wait for its termination.
future<compaction_stats_opt> perform_sstable_scrub(compaction::compaction_group_view& t, compaction_type_options::scrub opts, tasks::task_info info);
future<compaction_stats_opt> perform_sstable_scrub(compaction::compaction_group_view& t, sstables::compaction_type_options::scrub opts, tasks::task_info info);
// Submit a table for major compaction.
future<> perform_major_compaction(compaction::compaction_group_view& t, tasks::task_info info, bool consider_only_existing_data = false);
// Splits a compaction group by segregating all its sstable according to the classifier[1].
// [1]: See compaction_type_options::splitting::classifier.
// [1]: See sstables::compaction_type_options::splitting::classifier.
// Returns when all sstables in the main sstable set are split. The only exception is shutdown
// or user aborted splitting using stop API.
future<compaction_stats_opt> perform_split_compaction(compaction::compaction_group_view& t, compaction_type_options::split opt, tasks::task_info info);
future<compaction_stats_opt> perform_split_compaction(compaction::compaction_group_view& t, sstables::compaction_type_options::split opt, tasks::task_info info);
// Splits a single SSTable by segregating all its data according to the classifier.
// If SSTable doesn't need split, the same input SSTable is returned as output.
// If SSTable needs split, then output SSTables are returned and the input SSTable is deleted.
future<std::vector<sstables::shared_sstable>> maybe_split_sstable(sstables::shared_sstable sst, compaction_group_view& t, compaction_type_options::split opt);
future<std::vector<sstables::shared_sstable>> maybe_split_sstable(sstables::shared_sstable sst, compaction_group_view& t, sstables::compaction_type_options::split opt);
// Run a custom job for a given table, defined by a function
// it completes when future returned by job is ready or returns immediately
@@ -380,7 +368,7 @@ public:
// parameter type is the compaction type the operation can most closely be
// associated with, use compaction_type::Compaction, if none apply.
// parameter job is a function that will carry the operation
future<> run_custom_job(compaction::compaction_group_view& s, compaction_type type, const char *desc, noncopyable_function<future<>(compaction_data&, compaction_progress_monitor&)> job, tasks::task_info info, throw_if_stopping do_throw_if_stopping);
future<> run_custom_job(compaction::compaction_group_view& s, sstables::compaction_type type, const char *desc, noncopyable_function<future<>(sstables::compaction_data&, sstables::compaction_progress_monitor&)> job, tasks::task_info info, throw_if_stopping do_throw_if_stopping);
// Disable compaction temporarily for a table t.
// Caller should call the compaction_reenabler::reenable
@@ -413,7 +401,7 @@ public:
return _stats;
}
const std::vector<compaction_info> get_compactions(std::function<bool(const compaction_group_view*)> filter = [] (auto) { return true; }) const;
const std::vector<sstables::compaction_info> get_compactions(compaction::compaction_group_view* t = nullptr) const;
// Returns true if table has an ongoing compaction, running on its behalf
bool has_table_ongoing_compaction(const compaction::compaction_group_view& t) const;
@@ -421,16 +409,15 @@ public:
bool compaction_disabled(compaction::compaction_group_view& t) const;
// Stops ongoing compaction of a given type.
future<> stop_compaction(sstring type, std::function<bool(const compaction_group_view*)> filter = [] (auto) { return true; });
future<> stop_compaction(sstring type, compaction::compaction_group_view* table = nullptr);
private:
std::vector<shared_ptr<compaction_task_executor>>
do_stop_ongoing_compactions(sstring reason, std::function<bool(const compaction_group_view*)> filter, std::optional<compaction_type> type_opt) noexcept;
future<> stop_ongoing_compactions(sstring reason, std::function<bool(const compaction_group_view*)> filter, std::optional<compaction_type> type_opt = {}) noexcept;
do_stop_ongoing_compactions(sstring reason, compaction_group_view* t, std::optional<sstables::compaction_type> type_opt) noexcept;
public:
// Stops ongoing compaction of a given table and/or compaction_type.
future<> stop_ongoing_compactions(sstring reason, compaction::compaction_group_view* t = nullptr, std::optional<compaction_type> type_opt = {}) noexcept;
future<> stop_ongoing_compactions(sstring reason, compaction::compaction_group_view* t = nullptr, std::optional<sstables::compaction_type> type_opt = {}) noexcept;
future<> await_ongoing_compactions(compaction_group_view* t);
@@ -446,7 +433,7 @@ public:
compaction_backlog_tracker& get_backlog_tracker(compaction::compaction_group_view& t);
static compaction_data create_compaction_data();
static sstables::compaction_data create_compaction_data();
compaction::strategy_control& get_strategy_control() const noexcept;
@@ -474,7 +461,6 @@ public:
friend class compaction_weight_registration;
friend class sstables::test_env_compaction_manager;
friend class compaction::compaction_task_impl;
friend class compaction::compaction_task_executor;
friend class compaction::sstables_task_executor;
friend class compaction::major_compaction_task_executor;
@@ -488,6 +474,8 @@ public:
friend compaction_reenabler;
};
namespace compaction {
class compaction_task_executor
: public enable_shared_from_this<compaction_task_executor>
, public boost::intrusive::list_base_hook<boost::intrusive::link_mode<boost::intrusive::auto_unlink>> {
@@ -510,21 +498,21 @@ protected:
compaction_manager& _cm;
::compaction::compaction_group_view* _compacting_table = nullptr;
compaction::compaction_state& _compaction_state;
compaction_data _compaction_data;
sstables::compaction_data _compaction_data;
state _state = state::none;
throw_if_stopping _do_throw_if_stopping;
compaction_progress_monitor _progress_monitor;
sstables::compaction_progress_monitor _progress_monitor;
private:
shared_future<compaction_manager::compaction_stats_opt> _compaction_done = make_ready_future<compaction_manager::compaction_stats_opt>();
exponential_backoff_retry _compaction_retry = exponential_backoff_retry(std::chrono::seconds(5), std::chrono::seconds(300));
compaction_type _type;
sstables::compaction_type _type;
sstables::run_id _output_run_identifier;
sstring _description;
compaction_manager::compaction_stats_opt _stats = std::nullopt;
public:
explicit compaction_task_executor(compaction_manager& mgr, throw_if_stopping do_throw_if_stopping, ::compaction::compaction_group_view* t, compaction_type type, sstring desc);
explicit compaction_task_executor(compaction_manager& mgr, throw_if_stopping do_throw_if_stopping, ::compaction::compaction_group_view* t, sstables::compaction_type type, sstring desc);
compaction_task_executor(compaction_task_executor&&) = delete;
compaction_task_executor(const compaction_task_executor&) = delete;
@@ -563,14 +551,14 @@ protected:
// otherwise, returns stop_iteration::no after sleep for exponential retry.
future<stop_iteration> maybe_retry(std::exception_ptr err, bool throw_on_abort = false);
future<compaction_result> compact_sstables_and_update_history(compaction_descriptor descriptor, compaction_data& cdata, on_replacement&,
future<sstables::compaction_result> compact_sstables_and_update_history(sstables::compaction_descriptor descriptor, sstables::compaction_data& cdata, on_replacement&,
compaction_manager::can_purge_tombstones can_purge = compaction_manager::can_purge_tombstones::yes);
future<compaction_result> compact_sstables(compaction_descriptor descriptor, compaction_data& cdata, on_replacement&,
future<sstables::compaction_result> compact_sstables(sstables::compaction_descriptor descriptor, sstables::compaction_data& cdata, on_replacement&,
compaction_manager::can_purge_tombstones can_purge = compaction_manager::can_purge_tombstones::yes,
sstables::offstrategy offstrategy = sstables::offstrategy::no);
future<> update_history(::compaction::compaction_group_view& t, compaction_result&& res, const compaction_data& cdata);
bool should_update_history(compaction_type ct) {
return ct == compaction_type::Compaction;
future<> update_history(::compaction::compaction_group_view& t, sstables::compaction_result&& res, const sstables::compaction_data& cdata);
bool should_update_history(sstables::compaction_type ct) {
return ct == sstables::compaction_type::Compaction;
}
public:
compaction_manager::compaction_stats_opt get_stats() const noexcept {
@@ -583,7 +571,7 @@ public:
return _compacting_table;
}
compaction_type compaction_type() const noexcept {
sstables::compaction_type compaction_type() const noexcept {
return _type;
}
@@ -591,11 +579,11 @@ public:
return _state == state::active;
}
const ::compaction::compaction_data& compaction_data() const noexcept {
const sstables::compaction_data& compaction_data() const noexcept {
return _compaction_data;
}
::compaction::compaction_data& compaction_data() noexcept {
sstables::compaction_data& compaction_data() noexcept {
return _compaction_data;
}
@@ -625,7 +613,7 @@ public:
void stop_compaction(sstring reason) noexcept;
compaction_stopped_exception make_compaction_stopped_exception() const;
sstables::compaction_stopped_exception make_compaction_stopped_exception() const;
template<typename TaskExecutor, typename... Args>
requires std::is_base_of_v<compaction_task_executor, TaskExecutor> &&
@@ -655,11 +643,7 @@ struct fmt::formatter<compaction::compaction_task_executor> {
auto format(const compaction::compaction_task_executor& ex, fmt::format_context& ctx) const -> decltype(ctx.out());
};
namespace compaction {
bool needs_cleanup(const sstables::shared_sstable& sst, const dht::token_range_vector& owned_ranges);
// Return all sstables but those that are off-strategy like the ones in maintenance set and staging dir.
future<std::vector<sstables::shared_sstable>> in_strategy_sstables(compaction::compaction_group_view& table_s);
}

View File

@@ -10,10 +10,12 @@
#include <seastar/core/gate.hh>
class compaction_manager;
namespace compaction {
class compaction_manager;
class compaction_group_view;
class compaction_state;
}
class compaction_reenabler {
compaction_manager& _cm;
@@ -36,4 +38,3 @@ public:
}
};
}

View File

@@ -62,4 +62,4 @@ struct compaction_state {
}
};
} // namespace compaction
} // namespace compaction_manager

View File

@@ -32,11 +32,13 @@
#include "incremental_compaction_strategy.hh"
#include "sstables/sstable_set_impl.hh"
namespace compaction {
logging::logger leveled_manifest::logger("LeveledManifest");
logging::logger compaction_strategy_logger("CompactionStrategy");
using namespace sstables;
namespace sstables {
using timestamp_type = api::timestamp_type;
compaction_descriptor compaction_strategy_impl::make_major_compaction_job(std::vector<sstables::shared_sstable> candidates, int level, uint64_t max_sstable_bytes) {
@@ -44,21 +46,16 @@ compaction_descriptor compaction_strategy_impl::make_major_compaction_job(std::v
return compaction_descriptor(std::move(candidates), level, max_sstable_bytes);
}
std::vector<compaction_descriptor> compaction_strategy_impl::get_cleanup_compaction_jobs(compaction_group_view& table_s, std::vector<sstables::shared_sstable> candidates) const {
std::vector<compaction_descriptor> compaction_strategy_impl::get_cleanup_compaction_jobs(compaction_group_view& table_s, std::vector<shared_sstable> candidates) const {
// The default implementation is suboptimal and causes the writeamp problem described issue in #10097.
// The compaction strategy relying on it should strive to implement its own method, to make cleanup bucket aware.
return candidates | std::views::transform([] (const sstables::shared_sstable& sst) {
return candidates | std::views::transform([] (const shared_sstable& sst) {
return compaction_descriptor({ sst },
sst->get_sstable_level(), compaction_descriptor::default_max_sstable_bytes, sst->run_identifier());
sst->get_sstable_level(), sstables::compaction_descriptor::default_max_sstable_bytes, sst->run_identifier());
}) | std::ranges::to<std::vector>();
}
std::unique_ptr<sstables::sstable_set_impl>
compaction_strategy_impl::make_sstable_set(const compaction_group_view& ts) const {
return std::make_unique<sstables::partitioned_sstable_set>(ts.schema(), ts.token_range());
}
bool compaction_strategy_impl::worth_dropping_tombstones(const sstables::shared_sstable& sst, gc_clock::time_point compaction_time, const compaction_group_view& t) {
bool compaction_strategy_impl::worth_dropping_tombstones(const shared_sstable& sst, gc_clock::time_point compaction_time, const compaction_group_view& t) {
if (_disable_tombstone_compaction) {
return false;
}
@@ -85,7 +82,7 @@ mutation_reader_consumer compaction_strategy_impl::make_interposer_consumer(cons
}
compaction_descriptor
compaction_strategy_impl::get_reshaping_job(std::vector<sstables::shared_sstable> input, schema_ptr schema, reshape_config cfg) const {
compaction_strategy_impl::get_reshaping_job(std::vector<shared_sstable> input, schema_ptr schema, reshape_config cfg) const {
return compaction_descriptor();
}
@@ -165,7 +162,7 @@ static bool validate_unchecked_tombstone_compaction(const std::map<sstring, sstr
return unchecked_tombstone_compaction;
}
void compaction_strategy_impl::validate_options_for_strategy_type(const std::map<sstring, sstring>& options, compaction_strategy_type type) {
void compaction_strategy_impl::validate_options_for_strategy_type(const std::map<sstring, sstring>& options, sstables::compaction_strategy_type type) {
auto unchecked_options = options;
compaction_strategy_impl::validate_options(options, unchecked_options);
switch (type) {
@@ -215,6 +212,8 @@ compaction_strategy_impl::compaction_strategy_impl(const std::map<sstring, sstri
_unchecked_tombstone_compaction = validate_unchecked_tombstone_compaction(options);
}
} // namespace sstables
size_tiered_backlog_tracker::inflight_component
size_tiered_backlog_tracker::compacted_backlog(const compaction_backlog_tracker::ongoing_compactions& ongoing_compactions) const {
inflight_component in;
@@ -233,11 +232,12 @@ size_tiered_backlog_tracker::compacted_backlog(const compaction_backlog_tracker:
}
// Provides strong exception safety guarantees.
size_tiered_backlog_tracker::sstables_backlog_contribution size_tiered_backlog_tracker::calculate_sstables_backlog_contribution(const std::vector<sstables::shared_sstable>& all, const size_tiered_compaction_strategy_options& stcs_options) {
size_tiered_backlog_tracker::sstables_backlog_contribution size_tiered_backlog_tracker::calculate_sstables_backlog_contribution(const std::vector<sstables::shared_sstable>& all, const sstables::size_tiered_compaction_strategy_options& stcs_options) {
sstables_backlog_contribution contrib;
if (all.empty()) {
return contrib;
}
using namespace sstables;
// Deduce threshold from the last SSTable added to the set
// Low-efficiency jobs, which fan-in is smaller than min-threshold, will not have backlog accounted.
@@ -245,14 +245,14 @@ size_tiered_backlog_tracker::sstables_backlog_contribution size_tiered_backlog_t
// in efficient jobs acting more aggressive than they really have to.
// TODO: potentially switch to compaction manager's fan-in threshold, so to account for the dynamic
// fan-in threshold behavior.
const auto& newest_sst = std::ranges::max(all, std::less<sstables::generation_type>(), std::mem_fn(&sstables::sstable::generation));
const auto& newest_sst = std::ranges::max(all, std::less<generation_type>(), std::mem_fn(&sstable::generation));
auto threshold = newest_sst->get_schema()->min_compaction_threshold();
for (auto& bucket : size_tiered_compaction_strategy::get_buckets(all, stcs_options)) {
if (!size_tiered_compaction_strategy::is_bucket_interesting(bucket, threshold)) {
continue;
}
contrib.value += std::ranges::fold_left(bucket | std::views::transform([] (const sstables::shared_sstable& sst) -> double {
contrib.value += std::ranges::fold_left(bucket | std::views::transform([] (const shared_sstable& sst) -> double {
return sst->data_size() * log4(sst->data_size());
}), double(0.0f), std::plus{});
// Controller is disabled if exception is caught during add / remove calls, so not making any effort to make this exception safe
@@ -319,6 +319,8 @@ void size_tiered_backlog_tracker::replace_sstables(const std::vector<sstables::s
});
}
namespace sstables {
extern logging::logger clogger;
// The backlog for TWCS is just the sum of the individual backlogs in each time window.
@@ -580,7 +582,7 @@ struct null_backlog_tracker final : public compaction_backlog_tracker::impl {
class null_compaction_strategy : public compaction_strategy_impl {
public:
virtual future<compaction_descriptor> get_sstables_for_compaction(compaction_group_view& table_s, strategy_control& control) override {
return make_ready_future<compaction_descriptor>();
return make_ready_future<sstables::compaction_descriptor>();
}
virtual future<int64_t> estimated_pending_compactions(compaction_group_view& table_s) const override {
@@ -662,6 +664,10 @@ std::unique_ptr<compaction_backlog_tracker::impl> time_window_compaction_strateg
return std::make_unique<time_window_backlog_tracker>(_options, _stcs_options);
}
} // namespace sstables
namespace sstables {
size_tiered_compaction_strategy::size_tiered_compaction_strategy(const std::map<sstring, sstring>& options)
: compaction_strategy_impl(options)
, _options(options)
@@ -702,11 +708,11 @@ compaction_descriptor compaction_strategy::get_major_compaction_job(compaction_g
return _compaction_strategy_impl->get_major_compaction_job(table_s, std::move(candidates));
}
std::vector<compaction_descriptor> compaction_strategy::get_cleanup_compaction_jobs(compaction_group_view& table_s, std::vector<sstables::shared_sstable> candidates) const {
std::vector<compaction_descriptor> compaction_strategy::get_cleanup_compaction_jobs(compaction_group_view& table_s, std::vector<shared_sstable> candidates) const {
return _compaction_strategy_impl->get_cleanup_compaction_jobs(table_s, std::move(candidates));
}
void compaction_strategy::notify_completion(compaction_group_view& table_s, const std::vector<sstables::shared_sstable>& removed, const std::vector<sstables::shared_sstable>& added) {
void compaction_strategy::notify_completion(compaction_group_view& table_s, const std::vector<shared_sstable>& removed, const std::vector<shared_sstable>& added) {
_compaction_strategy_impl->notify_completion(table_s, removed, added);
}
@@ -726,8 +732,8 @@ compaction_backlog_tracker compaction_strategy::make_backlog_tracker() const {
return compaction_backlog_tracker(_compaction_strategy_impl->make_backlog_tracker());
}
compaction_descriptor
compaction_strategy::get_reshaping_job(std::vector<sstables::shared_sstable> input, schema_ptr schema, reshape_config cfg) const {
sstables::compaction_descriptor
compaction_strategy::get_reshaping_job(std::vector<shared_sstable> input, schema_ptr schema, reshape_config cfg) const {
return _compaction_strategy_impl->get_reshaping_job(std::move(input), schema, cfg);
}
@@ -743,12 +749,6 @@ bool compaction_strategy::use_interposer_consumer() const {
return _compaction_strategy_impl->use_interposer_consumer();
}
sstables::sstable_set
compaction_strategy::make_sstable_set(const compaction::compaction_group_view& ts) const {
return sstables::sstable_set(
_compaction_strategy_impl->make_sstable_set(ts));
}
compaction_strategy make_compaction_strategy(compaction_strategy_type strategy, const std::map<sstring, sstring>& options) {
::shared_ptr<compaction_strategy_impl> impl;
@@ -783,14 +783,14 @@ compaction_strategy make_compaction_strategy(compaction_strategy_type strategy,
}
future<reshape_config> make_reshape_config(const sstables::storage& storage, reshape_mode mode) {
co_return reshape_config{
co_return sstables::reshape_config{
.mode = mode,
.free_storage_space = co_await storage.free_space() / smp::count,
};
}
std::unique_ptr<sstables::sstable_set_impl> incremental_compaction_strategy::make_sstable_set(const compaction_group_view& ts) const {
return std::make_unique<sstables::partitioned_sstable_set>(ts.schema(), ts.token_range());
std::unique_ptr<sstable_set_impl> incremental_compaction_strategy::make_sstable_set(const compaction_group_view& ts) const {
return std::make_unique<partitioned_sstable_set>(ts.schema(), ts.token_range());
}
}
@@ -804,9 +804,9 @@ compaction_strategy_state compaction_strategy_state::make(const compaction_strat
case compaction_strategy_type::incremental:
return compaction_strategy_state(default_empty_state{});
case compaction_strategy_type::leveled:
return compaction_strategy_state(seastar::make_shared<leveled_compaction_strategy_state>());
return compaction_strategy_state(leveled_compaction_strategy_state{});
case compaction_strategy_type::time_window:
return compaction_strategy_state(seastar::make_shared<time_window_compaction_strategy_state>());
return compaction_strategy_state(time_window_compaction_strategy_state{});
default:
throw std::runtime_error("strategy not supported");
}

View File

@@ -16,19 +16,18 @@
#include "strategy_control.hh"
struct mutation_source_metadata;
class compaction_backlog_tracker;
extern logging::logger compaction_strategy_logger;
using namespace compaction;
namespace sstables {
class compaction_strategy_impl;
class sstable;
class sstable_set;
class storage;
}
namespace compaction {
class compaction_backlog_tracker;
class compaction_strategy_impl;
struct compaction_descriptor;
class storage;
class compaction_strategy {
::shared_ptr<compaction_strategy_impl> _compaction_strategy_impl;
@@ -44,13 +43,13 @@ public:
// Return a list of sstables to be compacted after applying the strategy.
future<compaction_descriptor> get_sstables_for_compaction(compaction_group_view& table_s, strategy_control& control);
compaction_descriptor get_major_compaction_job(compaction_group_view& table_s, std::vector<sstables::shared_sstable> candidates);
compaction_descriptor get_major_compaction_job(compaction_group_view& table_s, std::vector<shared_sstable> candidates);
std::vector<compaction_descriptor> get_cleanup_compaction_jobs(compaction_group_view& table_s, std::vector<sstables::shared_sstable> candidates) const;
std::vector<compaction_descriptor> get_cleanup_compaction_jobs(compaction_group_view& table_s, std::vector<shared_sstable> candidates) const;
// Some strategies may look at the compacted and resulting sstables to
// get some useful information for subsequent compactions.
void notify_completion(compaction_group_view& table_s, const std::vector<sstables::shared_sstable>& removed, const std::vector<sstables::shared_sstable>& added);
void notify_completion(compaction_group_view& table_s, const std::vector<shared_sstable>& removed, const std::vector<shared_sstable>& added);
// Return if parallel compaction is allowed by strategy.
bool parallel_compaction() const;
@@ -106,7 +105,7 @@ public:
return name(type());
}
sstables::sstable_set make_sstable_set(const compaction_group_view& ts) const;
sstable_set make_sstable_set(const compaction_group_view& ts) const;
compaction_backlog_tracker make_backlog_tracker() const;
@@ -128,7 +127,7 @@ public:
//
// The caller should also pass a maximum number of SSTables which is the maximum amount of
// SSTables that can be added into a single job.
compaction_descriptor get_reshaping_job(std::vector<sstables::shared_sstable> input, schema_ptr schema, reshape_config cfg) const;
compaction_descriptor get_reshaping_job(std::vector<shared_sstable> input, schema_ptr schema, reshape_config cfg) const;
};

View File

@@ -14,10 +14,8 @@
#include "compaction_descriptor.hh"
namespace sstables {
class sstable_set_impl;
}
namespace compaction {
class sstable_set_impl;
class compaction_strategy_impl {
public:
@@ -37,7 +35,7 @@ protected:
public:
static std::optional<sstring> get_value(const std::map<sstring, sstring>& options, const sstring& name);
static void validate_min_max_threshold(const std::map<sstring, sstring>& options, std::map<sstring, sstring>& unchecked_options);
static void validate_options_for_strategy_type(const std::map<sstring, sstring>& options, compaction_strategy_type type);
static void validate_options_for_strategy_type(const std::map<sstring, sstring>& options, sstables::compaction_strategy_type type);
protected:
static void validate_options(const std::map<sstring, sstring>& options, std::map<sstring, sstring>& unchecked_options);
compaction_strategy_impl() = default;
@@ -51,14 +49,14 @@ public:
virtual compaction_descriptor get_major_compaction_job(compaction_group_view& table_s, std::vector<sstables::shared_sstable> candidates) {
return make_major_compaction_job(std::move(candidates));
}
virtual std::vector<compaction_descriptor> get_cleanup_compaction_jobs(compaction_group_view& table_s, std::vector<sstables::shared_sstable> candidates) const;
virtual void notify_completion(compaction_group_view& table_s, const std::vector<sstables::shared_sstable>& removed, const std::vector<sstables::shared_sstable>& added) { }
virtual std::vector<compaction_descriptor> get_cleanup_compaction_jobs(compaction_group_view& table_s, std::vector<shared_sstable> candidates) const;
virtual void notify_completion(compaction_group_view& table_s, const std::vector<shared_sstable>& removed, const std::vector<shared_sstable>& added) { }
virtual compaction_strategy_type type() const = 0;
virtual bool parallel_compaction() const {
return true;
}
virtual future<int64_t> estimated_pending_compactions(compaction_group_view& table_s) const = 0;
virtual std::unique_ptr<sstables::sstable_set_impl> make_sstable_set(const compaction_group_view& ts) const;
virtual std::unique_ptr<sstable_set_impl> make_sstable_set(const compaction_group_view& ts) const;
bool use_clustering_key_filter() const {
return _use_clustering_key_filter;
@@ -66,7 +64,7 @@ public:
// Check if a given sstable is entitled for tombstone compaction based on its
// droppable tombstone histogram and gc_before.
bool worth_dropping_tombstones(const sstables::shared_sstable& sst, gc_clock::time_point compaction_time, const compaction_group_view& t);
bool worth_dropping_tombstones(const shared_sstable& sst, gc_clock::time_point compaction_time, const compaction_group_view& t);
virtual std::unique_ptr<compaction_backlog_tracker::impl> make_backlog_tracker() const = 0;
@@ -90,6 +88,6 @@ public:
return false;
}
virtual compaction_descriptor get_reshaping_job(std::vector<sstables::shared_sstable> input, schema_ptr schema, reshape_config cfg) const;
virtual compaction_descriptor get_reshaping_job(std::vector<shared_sstable> input, schema_ptr schema, reshape_config cfg) const;
};
}

View File

@@ -18,7 +18,7 @@ namespace compaction {
class compaction_strategy_state {
public:
struct default_empty_state {};
using states_variant = std::variant<default_empty_state, leveled_compaction_strategy_state_ptr, time_window_compaction_strategy_state_ptr>;
using states_variant = std::variant<default_empty_state, sstables::leveled_compaction_strategy_state, sstables::time_window_compaction_strategy_state>;
private:
states_variant _state;
public:
@@ -33,7 +33,7 @@ public:
return std::get<StateType>(_state);
}
static compaction_strategy_state make(const compaction_strategy& cs);
static compaction_strategy_state make(const sstables::compaction_strategy& cs);
};
}

View File

@@ -10,7 +10,7 @@
#include <cstdint>
namespace compaction {
namespace sstables {
enum class compaction_strategy_type {
null,

Some files were not shown because too many files have changed in this diff Show More