Compare commits

..

4 Commits

Author SHA1 Message Date
copilot-swe-agent[bot]
0afb6b8fb6 Fix missing include for schema.hh in system0_virtual_tables.cc
Co-authored-by: tgrabiec <283695+tgrabiec@users.noreply.github.com>
2025-12-05 16:19:36 +00:00
copilot-swe-agent[bot]
03733ffa1b Simplify system0_virtual_tables implementation
Co-authored-by: tgrabiec <283695+tgrabiec@users.noreply.github.com>
2025-12-05 16:17:19 +00:00
copilot-swe-agent[bot]
780243790e Add system0 virtual tables for group0 updates (initial implementation)
Co-authored-by: tgrabiec <283695+tgrabiec@users.noreply.github.com>
2025-12-05 16:14:31 +00:00
copilot-swe-agent[bot]
1125175b51 Initial plan 2025-12-05 16:01:22 +00:00
970 changed files with 10944 additions and 40445 deletions

9
.github/CODEOWNERS vendored
View File

@@ -1,5 +1,5 @@
# AUTH
auth/* @nuivall
auth/* @nuivall @ptrsmrn
# CACHE
row_cache* @tgrabiec
@@ -25,11 +25,11 @@ compaction/* @raphaelsc
transport/*
# CQL QUERY LANGUAGE
cql3/* @tgrabiec @nuivall
cql3/* @tgrabiec @nuivall @ptrsmrn
# COUNTERS
counters* @nuivall
tests/counter_test* @nuivall
counters* @nuivall @ptrsmrn
tests/counter_test* @nuivall @ptrsmrn
# DOCS
docs/* @annastuchlik @tzach
@@ -57,6 +57,7 @@ repair/* @tgrabiec @asias
# SCHEMA MANAGEMENT
db/schema_tables* @tgrabiec
db/legacy_schema_migrator* @tgrabiec
service/migration* @tgrabiec
schema* @tgrabiec

View File

@@ -84,14 +84,3 @@ ninja build/<mode>/scylla
- Strive for simplicity and clarity, add complexity only when clearly justified
- Question requests: don't blindly implement requests - evaluate trade-offs, identify issues, and suggest better alternatives when appropriate
- Consider different approaches, weigh pros and cons, and recommend the best fit for the specific context
## Test Philosophy
- Performance matters. Tests should run as quickly as possible. Sleeps in the code are highly discouraged and should be avoided, to reduce run time and flakiness.
- Stability matters. Tests should be stable. New tests should be executed 100 times at least to ensure they pass 100 out of 100 times. (use --repeat 100 --max-failures 1 when running it)
- Unit tests should ideally test one thing and one thing only.
- Tests for bug fixes should run before the fix - and show the failure and after the fix - and show they now pass.
- Tests for bug fixes should have in their comments which bug fixes (GitHub or JIRA issue) they test.
- Tests in debug are always slower, so if needed, reduce number of iterations, rows, data used, cycles, etc. in debug mode.
- Tests should strive to be repeatable, and not use random input that will make their results unpredictable.
- Tests should consume as little resources as possible. Prefer running tests on a single node if it is sufficient, for example.

View File

@@ -62,7 +62,7 @@ def create_pull_request(repo, new_branch_name, base_branch_name, pr, backport_pr
if is_draft:
labels_to_add.append("conflicts")
pr_comment = f"@{pr.user.login} - This PR was marked as draft because it has conflicts\n"
pr_comment += "Please resolve them and remove the 'conflicts' label. The PR will be made ready for review automatically."
pr_comment += "Please resolve them and mark this PR as ready for review"
backport_pr.create_issue_comment(pr_comment)
# Apply all labels at once if we have any

View File

@@ -10,9 +10,6 @@ on:
types: [labeled, unlabeled]
branches: [master, next, enterprise]
env:
FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true
jobs:
check-commit:
runs-on: ubuntu-latest
@@ -33,7 +30,7 @@ jobs:
echo "DEFAULT_BRANCH=master" >> $GITHUB_ENV
fi
- name: Checkout repository
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
uses: actions/checkout@v4
with:
repository: ${{ github.repository }}
ref: ${{ env.DEFAULT_BRANCH }}

View File

@@ -5,15 +5,12 @@ on:
types: [opened, reopened, edited]
branches: [branch-*]
env:
FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true
jobs:
check-fixes-prefix:
runs-on: ubuntu-latest
steps:
- name: Check PR body for "Fixes" prefix patterns
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0
uses: actions/github-script@v7
with:
script: |
const body = context.payload.pull_request.body;
@@ -21,7 +18,7 @@ jobs:
// Regular expression pattern to check for "Fixes" prefix
// Adjusted to dynamically insert the repository full name
const pattern = `Fixes:? ((?:#|${repo.replace('/', '\\/')}#|https://github\\.com/${repo.replace('/', '\\/')}/issues/)(\\d+)|(?:https://scylladb\\.atlassian\\.net/browse/)?([A-Z]+-\\d+))`;
const pattern = `Fixes:? (?:#|${repo.replace('/', '\\/')}#|https://github\\.com/${repo.replace('/', '\\/')}/issues/)(\\d+)`;
const regex = new RegExp(pattern);
if (!regex.test(body)) {

View File

@@ -12,9 +12,6 @@ on:
description: 'the md5sum for scylla executable'
value: ${{ jobs.build.outputs.md5sum }}
env:
FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true
jobs:
read-toolchain:
uses: ./.github/workflows/read-toolchain.yaml
@@ -27,7 +24,7 @@ jobs:
outputs:
md5sum: ${{ steps.checksum.outputs.md5sum }}
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- uses: actions/checkout@v4
with:
submodules: recursive
- name: Generate the building system

View File

@@ -1,53 +0,0 @@
name: Backport with Jira Integration
on:
push:
branches:
- master
- next-*.*
- branch-*.*
pull_request_target:
types: [labeled, closed]
branches:
- master
- next
- next-*.*
- branch-*.*
jobs:
backport-on-push:
if: github.event_name == 'push'
uses: scylladb/github-automation/.github/workflows/backport-with-jira.yaml@main
with:
event_type: 'push'
base_branch: ${{ github.ref }}
commits: ${{ github.event.before }}..${{ github.sha }}
secrets:
gh_token: ${{ secrets.AUTO_BACKPORT_TOKEN }}
jira_auth: ${{ secrets.USER_AND_KEY_FOR_JIRA_AUTOMATION }}
backport-on-label:
if: github.event_name == 'pull_request_target' && github.event.action == 'labeled'
uses: scylladb/github-automation/.github/workflows/backport-with-jira.yaml@main
with:
event_type: 'labeled'
base_branch: refs/heads/${{ github.event.pull_request.base.ref }}
pull_request_number: ${{ github.event.pull_request.number }}
head_commit: ${{ github.event.pull_request.base.sha }}
label_name: ${{ github.event.label.name }}
pr_state: ${{ github.event.pull_request.state }}
secrets:
gh_token: ${{ secrets.AUTO_BACKPORT_TOKEN }}
jira_auth: ${{ secrets.USER_AND_KEY_FOR_JIRA_AUTOMATION }}
backport-chain:
if: github.event_name == 'pull_request_target' && github.event.action == 'closed' && github.event.pull_request.merged == true
uses: scylladb/github-automation/.github/workflows/backport-with-jira.yaml@main
with:
event_type: 'chain'
base_branch: refs/heads/${{ github.event.pull_request.base.ref }}
pull_request_number: ${{ github.event.pull_request.number }}
pr_body: ${{ github.event.pull_request.body }}
secrets:
gh_token: ${{ secrets.AUTO_BACKPORT_TOKEN }}
jira_auth: ${{ secrets.USER_AND_KEY_FOR_JIRA_AUTOMATION }}

View File

@@ -0,0 +1,12 @@
name: Call Jira Status In Progress
on:
pull_request_target:
types: [opened]
jobs:
call-jira-status-in-progress:
uses: scylladb/github-automation/.github/workflows/main_update_jira_status_to_in_progress.yml@main
secrets:
caller_jira_auth: ${{ secrets.USER_AND_KEY_FOR_JIRA_AUTOMATION }}

View File

@@ -0,0 +1,12 @@
name: Call Jira Status In Review
on:
pull_request_target:
types: [ready_for_review, review_requested]
jobs:
call-jira-status-in-review:
uses: scylladb/github-automation/.github/workflows/main_update_jira_status_to_in_review.yml@main
secrets:
caller_jira_auth: ${{ secrets.USER_AND_KEY_FOR_JIRA_AUTOMATION }}

View File

@@ -0,0 +1,12 @@
name: Call Jira Status Ready For Merge
on:
pull_request_target:
types: [labeled]
jobs:
call-jira-status-update:
uses: scylladb/github-automation/.github/workflows/main_update_jira_status_to_ready_for_merge.yml@main
secrets:
caller_jira_auth: ${{ secrets.USER_AND_KEY_FOR_JIRA_AUTOMATION }}

View File

@@ -1,41 +0,0 @@
name: Sync Jira Based on PR Events
on:
pull_request_target:
types: [opened, ready_for_review, review_requested, labeled, unlabeled, closed]
permissions:
contents: read
pull-requests: write
issues: write
jobs:
jira-sync-pr-opened:
if: github.event.action == 'opened'
uses: scylladb/github-automation/.github/workflows/main_jira_sync_pr_opened.yml@main
secrets:
caller_jira_auth: ${{ secrets.USER_AND_KEY_FOR_JIRA_AUTOMATION }}
jira-sync-in-review:
if: github.event.action == 'ready_for_review' || github.event.action == 'review_requested'
uses: scylladb/github-automation/.github/workflows/main_jira_sync_in_review.yml@main
secrets:
caller_jira_auth: ${{ secrets.USER_AND_KEY_FOR_JIRA_AUTOMATION }}
jira-sync-add-label:
if: github.event.action == 'labeled'
uses: scylladb/github-automation/.github/workflows/main_jira_sync_add_label.yml@main
secrets:
caller_jira_auth: ${{ secrets.USER_AND_KEY_FOR_JIRA_AUTOMATION }}
jira-status-remove-label:
if: github.event.action == 'unlabeled'
uses: scylladb/github-automation/.github/workflows/main_jira_sync_remove_label.yml@main
secrets:
caller_jira_auth: ${{ secrets.USER_AND_KEY_FOR_JIRA_AUTOMATION }}
jira-status-pr-closed:
if: github.event.action == 'closed'
uses: scylladb/github-automation/.github/workflows/main_jira_sync_pr_closed.yml@main
secrets:
caller_jira_auth: ${{ secrets.USER_AND_KEY_FOR_JIRA_AUTOMATION }}

View File

@@ -1,14 +0,0 @@
name: Call Jira release creation for new milestone
on:
milestone:
types: [created]
jobs:
sync-milestone-to-jira:
uses: scylladb/github-automation/.github/workflows/main_sync_milestone_to_jira_release.yml@main
with:
# Comma-separated list of Jira project keys
jira_project_keys: "SCYLLADB,CUSTOMER"
secrets:
caller_jira_auth: ${{ secrets.USER_AND_KEY_FOR_JIRA_AUTOMATION }}

View File

@@ -1,13 +0,0 @@
name: validate_pr_author_email
on:
pull_request_target:
types:
- opened
- synchronize
- reopened
jobs:
validate_pr_author_email:
uses: scylladb/github-automation/.github/workflows/validate_pr_author_email.yml@main

View File

@@ -9,7 +9,6 @@ env:
HEADER_CHECK_LINES: 10
LICENSE: "LicenseRef-ScyllaDB-Source-Available-1.0"
CHECKED_EXTENSIONS: ".cc .hh .py"
FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true
jobs:
check-license-headers:
@@ -20,7 +19,7 @@ jobs:
steps:
- name: Checkout code
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
uses: actions/checkout@v4
with:
fetch-depth: 0
@@ -41,7 +40,7 @@ jobs:
- name: Comment on PR if check fails
if: failure()
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0
uses: actions/github-script@v7
with:
script: |
const license = '${{ env.LICENSE }}';

View File

@@ -9,7 +9,6 @@ env:
# use the development branch explicitly
CLANG_VERSION: 21
BUILD_DIR: build
FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true
permissions: {}
@@ -33,7 +32,7 @@ jobs:
steps:
- run: |
sudo dnf -y install git
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- uses: actions/checkout@v4
with:
submodules: true
- name: Install build dependencies

View File

@@ -18,7 +18,6 @@ env:
BUILD_TYPE: RelWithDebInfo
BUILD_DIR: build
CLANG_TIDY_CHECKS: '-*,bugprone-use-after-move'
FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true
permissions: {}
@@ -43,7 +42,7 @@ jobs:
IMAGE: ${{ needs.read-toolchain.image }}
run: |
echo ${{ needs.read-toolchain.image }}
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- uses: actions/checkout@v4
with:
submodules: true
- run: |

View File

@@ -4,16 +4,14 @@ on:
branches:
- master
permissions: {}
env:
FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true
jobs:
codespell:
name: Check for spelling errors
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- uses: codespell-project/actions-codespell@8f01853be192eb0f849a5c7d721450e7a467c579 # v2.2
- uses: actions/checkout@v4
- uses: codespell-project/actions-codespell@master
with:
only_warn: 1
ignore_words_list: "ans,datas,fo,ser,ue,crate,nd,reenable,strat,stap,te,raison,iif,tread"
ignore_words_list: "ans,datas,fo,ser,ue,crate,nd,reenable,strat,stap,te,raison"
skip: "./.git,./build,./tools,*.js,*.lock,./test,./licenses,./redis/lolwut.cc,*.svg"

View File

@@ -12,16 +12,13 @@ on:
schedule:
- cron: '0 10 * * 1' # Runs every Monday at 10:00am
env:
FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true
jobs:
notify_conflict_prs:
runs-on: ubuntu-latest
steps:
- name: Notify PR Authors of Conflicts
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0
uses: actions/github-script@v7
with:
script: |
console.log("Starting conflict reminder script...");

View File

@@ -13,9 +13,6 @@ on:
permissions:
contents: read
env:
FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true
jobs:
lint:
runs-on: ubuntu-latest
@@ -24,12 +21,12 @@ jobs:
security-events: write
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Differential ShellCheck
uses: redhat-plumbers-in-action/differential-shellcheck@d965e66ec0b3b2f821f75c8eff9b12442d9a7d1e # v5.5.6
uses: redhat-plumbers-in-action/differential-shellcheck@v5
with:
severity: warning
token: ${{ secrets.GITHUB_TOKEN }}

View File

@@ -5,7 +5,6 @@ name: "Docs / Publish"
env:
FLAG: ${{ github.repository == 'scylladb/scylla-enterprise' && 'enterprise' || 'opensource' }}
DEFAULT_BRANCH: ${{ github.repository == 'scylladb/scylla-enterprise' && 'enterprise' || 'master' }}
FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true
on:
push:
@@ -19,18 +18,16 @@ on:
jobs:
release:
permissions:
contents: write
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
uses: actions/checkout@v4
with:
ref: ${{ env.DEFAULT_BRANCH }}
persist-credentials: false
fetch-depth: 0
- name: Set up Python
uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
uses: actions/setup-python@v5
with:
python-version: "3.10"
- name: Set up env

View File

@@ -2,12 +2,8 @@ name: "Docs / Build PR"
# For more information,
# see https://sphinx-theme.scylladb.com/stable/deployment/production.html#available-workflows
permissions:
contents: read
env:
FLAG: ${{ github.repository == 'scylladb/scylla-enterprise' && 'enterprise' || 'opensource' }}
FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true
on:
pull_request:
@@ -23,12 +19,12 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
uses: actions/checkout@v4
with:
persist-credentials: false
fetch-depth: 0
- name: Set up Python
uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
uses: actions/setup-python@v5
with:
python-version: "3.10"
- name: Set up env

View File

@@ -1,11 +1,5 @@
name: Docs / Validate metrics
permissions:
contents: read
env:
FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true
on:
pull_request:
branches:
@@ -13,7 +7,7 @@ on:
- enterprise
paths:
- '**/*.cc'
- 'scripts/metrics-config.yml'
- 'scripts/metrics-config.yml'
- 'scripts/get_description.py'
- 'docs/_ext/scylladb_metrics.py'
@@ -21,20 +15,20 @@ jobs:
validate-metrics:
runs-on: ubuntu-latest
name: Check metrics documentation coverage
steps:
- name: Checkout code
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
uses: actions/checkout@v4
with:
submodules: true
- name: Set up Python
uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
uses: actions/setup-python@v6
with:
python-version: '3.10'
- name: Install dependencies
run: pip install PyYAML
- name: Validate metrics
run: python3 scripts/get_description.py --validate -c scripts/metrics-config.yml

View File

@@ -13,7 +13,6 @@ env:
# supposed to be processed by idl-compiler.py, so we don't check them using the cleaner
CLEANER_DIRS: test/unit exceptions alternator api auth cdc compaction db dht gms index lang message mutation mutation_writer node_ops raft redis replica service
SEASTAR_BAD_INCLUDE_OUTPUT_PATH: build/seastar-bad-include.log
FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true
permissions: {}
@@ -32,7 +31,7 @@ jobs:
runs-on: ubuntu-latest
container: ${{ needs.read-toolchain.outputs.image }}
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- uses: actions/checkout@v4
with:
submodules: true
- run: |
@@ -91,7 +90,7 @@ jobs:
| tee "$SEASTAR_BAD_INCLUDE_OUTPUT_PATH"
- run: |
echo "::remove-matcher owner=seastar-bad-include::"
- uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0
- uses: actions/upload-artifact@v4
with:
name: Logs
path: |

View File

@@ -7,7 +7,6 @@ on:
env:
DEFAULT_BRANCH: 'master'
FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true
jobs:
mark-ready:
@@ -18,7 +17,7 @@ jobs:
steps:
- name: Checkout repository
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
uses: actions/checkout@v4
with:
repository: ${{ github.repository }}
ref: ${{ env.DEFAULT_BRANCH }}

View File

@@ -5,8 +5,6 @@ on:
branches:
- master
- next
env:
FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true
jobs:
label:
if: github.event.pull_request.draft == false
@@ -17,7 +15,7 @@ jobs:
steps:
- name: Wait for label to be added
run: sleep 1m
- uses: mheap/github-action-required-labels@0ac283b4e65c1fb28ce6079dea5546ceca98ccbe # v5.5.2
- uses: mheap/github-action-required-labels@v5
with:
mode: minimum
count: 1

View File

@@ -7,18 +7,13 @@ on:
description: "the toolchain docker image"
value: ${{ jobs.read-toolchain.outputs.image }}
env:
FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true
jobs:
read-toolchain:
runs-on: ubuntu-latest
permissions:
contents: read
outputs:
image: ${{ steps.read.outputs.image }}
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- uses: actions/checkout@v4
with:
sparse-checkout: tools/toolchain/image
sparse-checkout-cone-mode: false

View File

@@ -13,7 +13,6 @@ concurrency:
env:
BUILD_DIR: build
FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true
jobs:
read-toolchain:
@@ -30,12 +29,12 @@ jobs:
- RelWithDebInfo
- Dev
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- uses: actions/checkout@v4
with:
submodules: true
- run: |
rm -rf seastar
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- uses: actions/checkout@v4
with:
repository: scylladb/seastar
submodules: true

View File

@@ -7,9 +7,6 @@ on:
issues:
types: [labeled, unlabeled]
env:
FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true
jobs:
label-sync:
if: ${{ github.repository == 'scylladb/scylladb' }}
@@ -24,7 +21,7 @@ jobs:
GITHUB_CONTEXT: ${{ toJson(github) }}
run: echo "$GITHUB_CONTEXT"
- name: Checkout repository
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
uses: actions/checkout@v4
with:
sparse-checkout: |
.github/scripts/sync_labels.py

View File

@@ -3,63 +3,19 @@ name: Trigger Scylla CI Route
on:
issue_comment:
types: [created]
pull_request_target:
types:
- unlabeled
jobs:
trigger-jenkins:
if: (github.event_name == 'issue_comment' && github.event.comment.user.login != 'scylladbbot') || github.event.label.name == 'conflicts'
if: github.event.comment.user.login != 'scylladbbot' && contains(github.event.comment.body, '@scylladbbot') && contains(github.event.comment.body, 'trigger-ci')
runs-on: ubuntu-latest
steps:
- name: Verify Org Membership
id: verify_author
env:
EVENT_NAME: ${{ github.event_name }}
PR_AUTHOR: ${{ github.event.pull_request.user.login }}
PR_ASSOCIATION: ${{ github.event.pull_request.author_association }}
COMMENT_AUTHOR: ${{ github.event.comment.user.login }}
COMMENT_ASSOCIATION: ${{ github.event.comment.author_association }}
shell: bash
run: |
if [[ "$EVENT_NAME" == "pull_request_target" ]]; then
AUTHOR="$PR_AUTHOR"
ASSOCIATION="$PR_ASSOCIATION"
else
AUTHOR="$COMMENT_AUTHOR"
ASSOCIATION="$COMMENT_ASSOCIATION"
fi
ORG="scylladb"
if gh api "/orgs/${ORG}/members/${AUTHOR}" --silent 2>/dev/null; then
echo "member=true" >> $GITHUB_OUTPUT
else
echo "::warning::${AUTHOR} is not a member of ${ORG}; skipping CI trigger."
echo "member=false" >> $GITHUB_OUTPUT
fi
- name: Validate Comment Trigger
if: github.event_name == 'issue_comment'
id: verify_comment
env:
COMMENT_BODY: ${{ github.event.comment.body }}
shell: bash
run: |
CLEAN_BODY=$(echo "$COMMENT_BODY" | grep -v '^[[:space:]]*>')
if echo "$CLEAN_BODY" | grep -qi '@scylladbbot' && echo "$CLEAN_BODY" | grep -qi 'trigger-ci'; then
echo "trigger=true" >> $GITHUB_OUTPUT
else
echo "trigger=false" >> $GITHUB_OUTPUT
fi
- name: Trigger Scylla-CI-Route Jenkins Job
if: steps.verify_author.outputs.member == 'true' && (github.event_name == 'pull_request_target' || steps.verify_comment.outputs.trigger == 'true')
env:
JENKINS_USER: ${{ secrets.JENKINS_USERNAME }}
JENKINS_API_TOKEN: ${{ secrets.JENKINS_TOKEN }}
JENKINS_URL: "https://jenkins.scylladb.com"
PR_NUMBER: "${{ github.event.issue.number || github.event.pull_request.number }}"
PR_REPO_NAME: "${{ github.event.repository.full_name }}"
run: |
PR_NUMBER=${{ github.event.issue.number }}
PR_REPO_NAME=${{ github.event.repository.full_name }}
curl -X POST "$JENKINS_URL/job/releng/job/Scylla-CI-Route/buildWithParameters?PR_NUMBER=$PR_NUMBER&PR_REPO_NAME=$PR_REPO_NAME" \
--user "$JENKINS_USER:$JENKINS_API_TOKEN" --fail
--user "$JENKINS_USER:$JENKINS_API_TOKEN" --fail -i -v

View File

@@ -5,10 +5,7 @@ on:
types: [opened, reopened, synchronize]
issue_comment:
types: [created]
env:
FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true
jobs:
trigger-ci:
runs-on: ubuntu-latest
@@ -18,7 +15,7 @@ jobs:
GITHUB_CONTEXT: ${{ toJson(github) }}
run: echo "$GITHUB_CONTEXT"
- name: Checkout PR code
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
uses: actions/checkout@v3
with:
fetch-depth: 0 # Needed to access full history
ref: ${{ github.event.pull_request.head.ref }}

View File

@@ -4,16 +4,13 @@ on:
schedule:
- cron: '10 8 * * *' # Runs daily at 8 AM
env:
FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true
jobs:
reminder:
runs-on: ubuntu-latest
steps:
- name: Send reminders
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0
uses: actions/github-script@v7
with:
script: |
const labelFilters = ['P0', 'P1', 'Field-Tier1','status/release blocker', 'status/regression'];

2
.gitmodules vendored
View File

@@ -1,6 +1,6 @@
[submodule "seastar"]
path = seastar
url = ../scylla-seastar
url = ../seastar
ignore = dirty
[submodule "swagger-ui"]
path = swagger-ui

View File

@@ -78,7 +78,7 @@ fi
# Default scylla product/version tags
PRODUCT=scylla
VERSION=2026.1.2
VERSION=2026.1.0-dev
if test -f version
then

View File

@@ -18,7 +18,6 @@ target_sources(alternator
consumed_capacity.cc
ttl.cc
parsed_expression_cache.cc
http_compression.cc
${cql_grammar_srcs})
target_include_directories(alternator
PUBLIC

View File

@@ -42,7 +42,7 @@ comparison_operator_type get_comparison_operator(const rjson::value& comparison_
if (!comparison_operator.IsString()) {
throw api_error::validation(fmt::format("Invalid comparison operator definition {}", rjson::print(comparison_operator)));
}
std::string op = rjson::to_string(comparison_operator);
std::string op = comparison_operator.GetString();
auto it = ops.find(op);
if (it == ops.end()) {
throw api_error::validation(fmt::format("Unsupported comparison operator {}", op));
@@ -377,8 +377,8 @@ bool check_compare(const rjson::value* v1, const rjson::value& v2, const Compara
return cmp(unwrap_number(*v1, cmp.diagnostic), unwrap_number(v2, cmp.diagnostic));
}
if (kv1.name == "S") {
return cmp(rjson::to_string_view(kv1.value),
rjson::to_string_view(kv2.value));
return cmp(std::string_view(kv1.value.GetString(), kv1.value.GetStringLength()),
std::string_view(kv2.value.GetString(), kv2.value.GetStringLength()));
}
if (kv1.name == "B") {
auto d_kv1 = unwrap_bytes(kv1.value, v1_from_query);
@@ -470,9 +470,9 @@ static bool check_BETWEEN(const rjson::value* v, const rjson::value& lb, const r
return check_BETWEEN(unwrap_number(*v, diag), unwrap_number(lb, diag), unwrap_number(ub, diag), bounds_from_query);
}
if (kv_v.name == "S") {
return check_BETWEEN(rjson::to_string_view(kv_v.value),
rjson::to_string_view(kv_lb.value),
rjson::to_string_view(kv_ub.value),
return check_BETWEEN(std::string_view(kv_v.value.GetString(), kv_v.value.GetStringLength()),
std::string_view(kv_lb.value.GetString(), kv_lb.value.GetStringLength()),
std::string_view(kv_ub.value.GetString(), kv_ub.value.GetStringLength()),
bounds_from_query);
}
if (kv_v.name == "B") {

View File

@@ -8,8 +8,6 @@
#include "consumed_capacity.hh"
#include "error.hh"
#include "utils/rjson.hh"
#include <fmt/format.h>
namespace alternator {
@@ -34,12 +32,12 @@ bool consumed_capacity_counter::should_add_capacity(const rjson::value& request)
if (!return_consumed->IsString()) {
throw api_error::validation("Non-string ReturnConsumedCapacity field in request");
}
std::string_view consumed = rjson::to_string_view(*return_consumed);
std::string consumed = return_consumed->GetString();
if (consumed == "INDEXES") {
throw api_error::validation("INDEXES consumed capacity is not supported");
}
if (consumed != "TOTAL") {
throw api_error::validation(fmt::format("Unknown consumed capacity {}", consumed));
throw api_error::validation("Unknown consumed capacity "+ consumed);
}
return true;
}

View File

@@ -28,7 +28,6 @@ static logging::logger logger("alternator_controller");
controller::controller(
sharded<gms::gossiper>& gossiper,
sharded<service::storage_proxy>& proxy,
sharded<service::storage_service>& ss,
sharded<service::migration_manager>& mm,
sharded<db::system_distributed_keyspace>& sys_dist_ks,
sharded<cdc::generation_service>& cdc_gen_svc,
@@ -40,7 +39,6 @@ controller::controller(
: protocol_server(sg)
, _gossiper(gossiper)
, _proxy(proxy)
, _ss(ss)
, _mm(mm)
, _sys_dist_ks(sys_dist_ks)
, _cdc_gen_svc(cdc_gen_svc)
@@ -91,7 +89,7 @@ future<> controller::start_server() {
auto get_timeout_in_ms = [] (const db::config& cfg) -> utils::updateable_value<uint32_t> {
return cfg.alternator_timeout_in_ms;
};
_executor.start(std::ref(_gossiper), std::ref(_proxy), std::ref(_ss), std::ref(_mm), std::ref(_sys_dist_ks),
_executor.start(std::ref(_gossiper), std::ref(_proxy), std::ref(_mm), std::ref(_sys_dist_ks),
sharded_parameter(get_cdc_metadata, std::ref(_cdc_gen_svc)), _ssg.value(),
sharded_parameter(get_timeout_in_ms, std::ref(_config))).get();
_server.start(std::ref(_executor), std::ref(_proxy), std::ref(_gossiper), std::ref(_auth_service), std::ref(_sl_controller)).get();
@@ -105,23 +103,11 @@ future<> controller::start_server() {
alternator_port = _config.alternator_port();
_listen_addresses.push_back({addr, *alternator_port});
}
std::optional<uint16_t> alternator_port_proxy_protocol;
if (_config.alternator_port_proxy_protocol()) {
alternator_port_proxy_protocol = _config.alternator_port_proxy_protocol();
_listen_addresses.push_back({addr, *alternator_port_proxy_protocol});
}
std::optional<uint16_t> alternator_https_port;
std::optional<uint16_t> alternator_https_port_proxy_protocol;
std::optional<tls::credentials_builder> creds;
if (_config.alternator_https_port() || _config.alternator_https_port_proxy_protocol()) {
if (_config.alternator_https_port()) {
alternator_https_port = _config.alternator_https_port();
_listen_addresses.push_back({addr, *alternator_https_port});
}
if (_config.alternator_https_port_proxy_protocol()) {
alternator_https_port_proxy_protocol = _config.alternator_https_port_proxy_protocol();
_listen_addresses.push_back({addr, *alternator_https_port_proxy_protocol});
}
if (_config.alternator_https_port()) {
alternator_https_port = _config.alternator_https_port();
_listen_addresses.push_back({addr, *alternator_https_port});
creds.emplace();
auto opts = _config.alternator_encryption_options();
if (opts.empty()) {
@@ -147,29 +133,20 @@ future<> controller::start_server() {
}
}
_server.invoke_on_all(
[this, addr, alternator_port, alternator_https_port, alternator_port_proxy_protocol, alternator_https_port_proxy_protocol, creds = std::move(creds)] (server& server) mutable {
return server.init(addr, alternator_port, alternator_https_port, alternator_port_proxy_protocol, alternator_https_port_proxy_protocol, creds,
[this, addr, alternator_port, alternator_https_port, creds = std::move(creds)] (server& server) mutable {
return server.init(addr, alternator_port, alternator_https_port, creds,
_config.alternator_enforce_authorization,
_config.alternator_warn_authorization,
_config.alternator_max_users_query_size_in_trace_output,
&_memory_limiter.local().get_semaphore(),
_config.max_concurrent_requests_per_shard);
}).handle_exception([this, addr, alternator_port, alternator_https_port, alternator_port_proxy_protocol, alternator_https_port_proxy_protocol] (std::exception_ptr ep) {
logger.error("Failed to set up Alternator HTTP server on {} port {}, TLS port {}, proxy-protocol port {}, TLS proxy-protocol port {}: {}",
addr,
alternator_port ? std::to_string(*alternator_port) : "OFF",
alternator_https_port ? std::to_string(*alternator_https_port) : "OFF",
alternator_port_proxy_protocol ? std::to_string(*alternator_port_proxy_protocol) : "OFF",
alternator_https_port_proxy_protocol ? std::to_string(*alternator_https_port_proxy_protocol) : "OFF",
ep);
}).handle_exception([this, addr, alternator_port, alternator_https_port] (std::exception_ptr ep) {
logger.error("Failed to set up Alternator HTTP server on {} port {}, TLS port {}: {}",
addr, alternator_port ? std::to_string(*alternator_port) : "OFF", alternator_https_port ? std::to_string(*alternator_https_port) : "OFF", ep);
return stop_server().then([ep = std::move(ep)] { return make_exception_future<>(ep); });
}).then([addr, alternator_port, alternator_https_port, alternator_port_proxy_protocol, alternator_https_port_proxy_protocol] {
logger.info("Alternator server listening on {}, HTTP port {}, HTTPS port {}, proxy-protocol port {}, TLS proxy-protocol port {}",
addr,
alternator_port ? std::to_string(*alternator_port) : "OFF",
alternator_https_port ? std::to_string(*alternator_https_port) : "OFF",
alternator_port_proxy_protocol ? std::to_string(*alternator_port_proxy_protocol) : "OFF",
alternator_https_port_proxy_protocol ? std::to_string(*alternator_https_port_proxy_protocol) : "OFF");
}).then([addr, alternator_port, alternator_https_port] {
logger.info("Alternator server listening on {}, HTTP port {}, HTTPS port {}",
addr, alternator_port ? std::to_string(*alternator_port) : "OFF", alternator_https_port ? std::to_string(*alternator_https_port) : "OFF");
}).get();
});
}
@@ -192,7 +169,7 @@ future<> controller::request_stop_server() {
});
}
future<utils::chunked_vector<foreign_ptr<std::unique_ptr<client_data>>>> controller::get_client_data() {
future<utils::chunked_vector<client_data>> controller::get_client_data() {
return _server.local().get_client_data();
}

View File

@@ -15,7 +15,6 @@
namespace service {
class storage_proxy;
class storage_service;
class migration_manager;
class memory_limiter;
}
@@ -58,7 +57,6 @@ class server;
class controller : public protocol_server {
sharded<gms::gossiper>& _gossiper;
sharded<service::storage_proxy>& _proxy;
sharded<service::storage_service>& _ss;
sharded<service::migration_manager>& _mm;
sharded<db::system_distributed_keyspace>& _sys_dist_ks;
sharded<cdc::generation_service>& _cdc_gen_svc;
@@ -76,7 +74,6 @@ public:
controller(
sharded<gms::gossiper>& gossiper,
sharded<service::storage_proxy>& proxy,
sharded<service::storage_service>& ss,
sharded<service::migration_manager>& mm,
sharded<db::system_distributed_keyspace>& sys_dist_ks,
sharded<cdc::generation_service>& cdc_gen_svc,
@@ -96,7 +93,7 @@ public:
// This virtual function is called (on each shard separately) when the
// virtual table "system.clients" is read. It is expected to generate a
// list of clients connected to this server (on this shard).
virtual future<utils::chunked_vector<foreign_ptr<std::unique_ptr<client_data>>>> get_client_data() override;
virtual future<utils::chunked_vector<client_data>> get_client_data() override;
};
}

View File

@@ -17,7 +17,6 @@
#include "auth/service.hh"
#include "db/config.hh"
#include "db/view/view_build_status.hh"
#include "locator/tablets.hh"
#include "mutation/tombstone.hh"
#include "locator/abstract_replication_strategy.hh"
#include "utils/log.hh"
@@ -68,14 +67,6 @@ using namespace std::chrono_literals;
logging::logger elogger("alternator-executor");
namespace std {
template <> struct hash<std::pair<sstring, sstring>> {
size_t operator () (const std::pair<sstring, sstring>& p) const {
return std::hash<sstring>()(p.first) * 1009 + std::hash<sstring>()(p.second) * 3;
}
};
}
namespace alternator {
// Alternator-specific table properties stored as hidden table tags:
@@ -257,66 +248,14 @@ static const rjson::value::Member& get_single_member(const rjson::value& v, cons
return *(v.MemberBegin());
}
class executor::describe_table_info_manager : public service::migration_listener::empty_listener {
executor &_executor;
struct table_info {
utils::simple_value_with_expiry<std::uint64_t> size_in_bytes;
};
std::unordered_map<std::pair<sstring, sstring>, table_info> info_for_tables;
bool active = false;
public:
describe_table_info_manager(executor& executor) : _executor(executor) {
_executor._proxy.data_dictionary().real_database_ptr()->get_notifier().register_listener(this);
active = true;
}
describe_table_info_manager(const describe_table_info_manager &) = delete;
describe_table_info_manager(describe_table_info_manager&&) = delete;
~describe_table_info_manager() {
if (active) {
on_fatal_internal_error(elogger, "describe_table_info_manager was not stopped before destruction");
}
}
describe_table_info_manager &operator = (const describe_table_info_manager &) = delete;
describe_table_info_manager &operator = (describe_table_info_manager&&) = delete;
static std::chrono::high_resolution_clock::time_point now() {
return std::chrono::high_resolution_clock::now();
}
std::optional<std::uint64_t> get_cached_size_in_bytes(const sstring &ks_name, const sstring &cf_name) const {
auto it = info_for_tables.find({ks_name, cf_name});
if (it != info_for_tables.end()) {
return it->second.size_in_bytes.get();
}
return std::nullopt;
}
void cache_size_in_bytes(sstring ks_name, sstring cf_name, std::uint64_t size_in_bytes, std::chrono::high_resolution_clock::time_point expiry) {
info_for_tables[{std::move(ks_name), std::move(cf_name)}].size_in_bytes.set_if_longer_expiry(size_in_bytes, expiry);
}
future<> stop() {
co_await _executor._proxy.data_dictionary().real_database_ptr()->get_notifier().unregister_listener(this);
active = false;
co_return;
}
void on_drop_column_family(const sstring& ks_name, const sstring& cf_name) override {
if (!ks_name.starts_with(executor::KEYSPACE_NAME_PREFIX)) return;
info_for_tables.erase({ks_name, cf_name});
}
};
executor::executor(gms::gossiper& gossiper,
service::storage_proxy& proxy,
service::storage_service& ss,
service::migration_manager& mm,
db::system_distributed_keyspace& sdks,
cdc::metadata& cdc_metadata,
smp_service_group ssg,
utils::updateable_value<uint32_t> default_timeout_in_ms)
: _gossiper(gossiper),
_ss(ss),
_proxy(proxy),
_mm(mm),
_sdks(sdks),
@@ -329,7 +268,6 @@ executor::executor(gms::gossiper& gossiper,
_stats))
{
s_default_timeout_in_ms = std::move(default_timeout_in_ms);
_describe_table_info_manager = std::make_unique<describe_table_info_manager>(*this);
register_metrics(_metrics, _stats);
}
@@ -481,7 +419,7 @@ static std::optional<std::string> find_table_name(const rjson::value& request) {
if (!table_name_value->IsString()) {
throw api_error::validation("Non-string TableName field in request");
}
std::string table_name = rjson::to_string(*table_name_value);
std::string table_name = table_name_value->GetString();
return table_name;
}
@@ -608,7 +546,7 @@ get_table_or_view(service::storage_proxy& proxy, const rjson::value& request) {
// does exist but the index does not (ValidationException).
if (proxy.data_dictionary().has_schema(keyspace_name, orig_table_name)) {
throw api_error::validation(
fmt::format("Requested resource not found: Index '{}' for table '{}'", rjson::to_string_view(*index_name), orig_table_name));
fmt::format("Requested resource not found: Index '{}' for table '{}'", index_name->GetString(), orig_table_name));
} else {
throw api_error::resource_not_found(
fmt::format("Requested resource not found: Table: {} not found", orig_table_name));
@@ -649,7 +587,7 @@ static std::string get_string_attribute(const rjson::value& value, std::string_v
throw api_error::validation(fmt::format("Expected string value for attribute {}, got: {}",
attribute_name, value));
}
return rjson::to_string(*attribute_value);
return std::string(attribute_value->GetString(), attribute_value->GetStringLength());
}
// Convenience function for getting the value of a boolean attribute, or a
@@ -814,44 +752,12 @@ static future<bool> is_view_built(
}
future<> executor::cache_newly_calculated_size_on_all_shards(schema_ptr schema, std::uint64_t size_in_bytes, std::chrono::nanoseconds ttl) {
auto expiry = describe_table_info_manager::now() + ttl;
return container().invoke_on_all(
[schema, size_in_bytes, expiry] (executor& exec) {
exec._describe_table_info_manager->cache_size_in_bytes(schema->ks_name(), schema->cf_name(), size_in_bytes, expiry);
});
}
future<> executor::fill_table_size(rjson::value &table_description, schema_ptr schema, bool deleting) {
auto cached_size = _describe_table_info_manager->get_cached_size_in_bytes(schema->ks_name(), schema->cf_name());
std::uint64_t total_size = 0;
if (cached_size) {
total_size = *cached_size;
} else {
// there's no point in trying to estimate value of table that is being deleted, as other nodes more often than not might
// move forward with deletion faster than we calculate the size
if (!deleting) {
total_size = co_await _ss.estimate_total_sstable_volume(schema->id(), service::storage_service::ignore_errors::yes);
const auto expiry = std::chrono::seconds{ _proxy.data_dictionary().get_config().alternator_describe_table_info_cache_validity_in_seconds() };
// Note: we don't care when the notification of other shards will finish, as long as it will be done
// it's possible to get into race condition (next DescribeTable comes to other shard, that new shard doesn't have
// the size yet, so it will calculate it again) - this is not a problem, because it will call cache_newly_calculated_size_on_all_shards
// with expiry, which is extremely unlikely to be exactly the same as the previous one, all shards will keep the size coming with expiry that is further into the future.
// In case of the same expiry, some shards will have different size, which means DescribeTable will return different values depending on the shard
// which is also fine, as the specification doesn't give precision guarantees of any kind.
co_await cache_newly_calculated_size_on_all_shards(schema, total_size, expiry);
}
}
rjson::add(table_description, "TableSizeBytes", total_size);
}
future<rjson::value> executor::fill_table_description(schema_ptr schema, table_status tbl_status, service::client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit)
static future<rjson::value> fill_table_description(schema_ptr schema, table_status tbl_status, service::storage_proxy& proxy, service::client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit)
{
rjson::value table_description = rjson::empty_object();
auto tags_ptr = db::get_tags_of_table(schema);
rjson::add(table_description, "TableName", rjson::from_string(schema->cf_name()));
co_await fill_table_size(table_description, schema, tbl_status == table_status::deleting);
auto creation_timestamp = get_table_creation_time(*schema);
@@ -895,7 +801,9 @@ future<rjson::value> executor::fill_table_description(schema_ptr schema, table_s
rjson::add(table_description["ProvisionedThroughput"], "WriteCapacityUnits", wcu);
rjson::add(table_description["ProvisionedThroughput"], "NumberOfDecreasesToday", 0);
data_dictionary::table t = _proxy.data_dictionary().find_column_family(schema);
data_dictionary::table t = proxy.data_dictionary().find_column_family(schema);
if (tbl_status != table_status::deleting) {
rjson::add(table_description, "CreationDateTime", rjson::value(creation_timestamp));
@@ -932,7 +840,7 @@ future<rjson::value> executor::fill_table_description(schema_ptr schema, table_s
// (for a built view) or CREATING+Backfilling (if view building
// is in progress).
if (!is_lsi) {
if (co_await is_view_built(vptr, _proxy, client_state, trace_state, permit)) {
if (co_await is_view_built(vptr, proxy, client_state, trace_state, permit)) {
rjson::add(view_entry, "IndexStatus", "ACTIVE");
} else {
rjson::add(view_entry, "IndexStatus", "CREATING");
@@ -960,8 +868,9 @@ future<rjson::value> executor::fill_table_description(schema_ptr schema, table_s
}
rjson::add(table_description, "AttributeDefinitions", std::move(attribute_definitions));
}
executor::supplement_table_stream_info(table_description, *schema, _proxy);
executor::supplement_table_stream_info(table_description, *schema, proxy);
// FIXME: still missing some response fields (issue #5026)
co_return table_description;
}
@@ -981,7 +890,7 @@ future<executor::request_return_type> executor::describe_table(client_state& cli
get_stats_from_schema(_proxy, *schema)->api_operations.describe_table++;
tracing::add_alternator_table_name(trace_state, schema->cf_name());
rjson::value table_description = co_await fill_table_description(schema, table_status::active, client_state, trace_state, permit);
rjson::value table_description = co_await fill_table_description(schema, table_status::active, _proxy, client_state, trace_state, permit);
rjson::value response = rjson::empty_object();
rjson::add(response, "Table", std::move(table_description));
elogger.trace("returning {}", response);
@@ -1084,7 +993,7 @@ future<executor::request_return_type> executor::delete_table(client_state& clien
auto& p = _proxy.container();
schema_ptr schema = get_table(_proxy, request);
rjson::value table_description = co_await fill_table_description(schema, table_status::deleting, client_state, trace_state, permit);
rjson::value table_description = co_await fill_table_description(schema, table_status::deleting, _proxy, client_state, trace_state, permit);
co_await verify_permission(_enforce_authorization, _warn_authorization, client_state, schema, auth::permission::DROP, _stats);
co_await _mm.container().invoke_on(0, [&, cs = client_state.move_to_other_shard()] (service::migration_manager& mm) -> future<> {
size_t retries = mm.get_concurrent_ddl_retries();
@@ -1171,8 +1080,8 @@ static void add_column(schema_builder& builder, const std::string& name, const r
}
for (auto it = attribute_definitions.Begin(); it != attribute_definitions.End(); ++it) {
const rjson::value& attribute_info = *it;
if (rjson::to_string_view(attribute_info["AttributeName"]) == name) {
std::string_view type = rjson::to_string_view(attribute_info["AttributeType"]);
if (attribute_info["AttributeName"].GetString() == name) {
auto type = attribute_info["AttributeType"].GetString();
data_type dt = parse_key_type(type);
if (computed_column) {
// Computed column for GSI (doesn't choose a real column as-is
@@ -1207,7 +1116,7 @@ static std::pair<std::string, std::string> parse_key_schema(const rjson::value&
throw api_error::validation("First element of KeySchema must be an object");
}
const rjson::value *v = rjson::find((*key_schema)[0], "KeyType");
if (!v || !v->IsString() || rjson::to_string_view(*v) != "HASH") {
if (!v || !v->IsString() || v->GetString() != std::string("HASH")) {
throw api_error::validation("First key in KeySchema must be a HASH key");
}
v = rjson::find((*key_schema)[0], "AttributeName");
@@ -1215,14 +1124,14 @@ static std::pair<std::string, std::string> parse_key_schema(const rjson::value&
throw api_error::validation("First key in KeySchema must have string AttributeName");
}
validate_attr_name_length(supplementary_context, v->GetStringLength(), true, "HASH key in KeySchema - ");
std::string hash_key = rjson::to_string(*v);
std::string hash_key = v->GetString();
std::string range_key;
if (key_schema->Size() == 2) {
if (!(*key_schema)[1].IsObject()) {
throw api_error::validation("Second element of KeySchema must be an object");
}
v = rjson::find((*key_schema)[1], "KeyType");
if (!v || !v->IsString() || rjson::to_string_view(*v) != "RANGE") {
if (!v || !v->IsString() || v->GetString() != std::string("RANGE")) {
throw api_error::validation("Second key in KeySchema must be a RANGE key");
}
v = rjson::find((*key_schema)[1], "AttributeName");
@@ -1648,7 +1557,8 @@ static future<> mark_view_schemas_as_built(utils::chunked_vector<mutation>& out,
}
}
future<executor::request_return_type> executor::create_table_on_shard0(service::client_state&& client_state, tracing::trace_state_ptr trace_state, rjson::value request, bool enforce_authorization, bool warn_authorization, const db::tablets_mode_t::mode tablets_mode) {
static future<executor::request_return_type> create_table_on_shard0(service::client_state&& client_state, tracing::trace_state_ptr trace_state, rjson::value request,
service::storage_proxy& sp, service::migration_manager& mm, gms::gossiper& gossiper, bool enforce_authorization, bool warn_authorization, stats& stats, const db::tablets_mode_t::mode tablets_mode) {
SCYLLA_ASSERT(this_shard_id() == 0);
// We begin by parsing and validating the content of the CreateTable
@@ -1835,7 +1745,7 @@ future<executor::request_return_type> executor::create_table_on_shard0(service::
rjson::value* stream_specification = rjson::find(request, "StreamSpecification");
if (stream_specification && stream_specification->IsObject()) {
if (executor::add_stream_options(*stream_specification, builder, _proxy)) {
if (executor::add_stream_options(*stream_specification, builder, sp)) {
validate_cdc_log_name_length(builder.cf_name());
}
}
@@ -1854,7 +1764,7 @@ future<executor::request_return_type> executor::create_table_on_shard0(service::
set_table_creation_time(tags_map, db_clock::now());
builder.add_extension(db::tags_extension::NAME, ::make_shared<db::tags_extension>(tags_map));
co_await verify_create_permission(enforce_authorization, warn_authorization, client_state, _stats);
co_await verify_create_permission(enforce_authorization, warn_authorization, client_state, stats);
schema_ptr schema = builder.build();
for (auto& view_builder : view_builders) {
@@ -1870,49 +1780,33 @@ future<executor::request_return_type> executor::create_table_on_shard0(service::
view_builder.with_view_info(schema, include_all_columns, ""/*where clause*/);
}
size_t retries = _mm.get_concurrent_ddl_retries();
size_t retries = mm.get_concurrent_ddl_retries();
for (;;) {
auto group0_guard = co_await _mm.start_group0_operation();
auto group0_guard = co_await mm.start_group0_operation();
auto ts = group0_guard.write_timestamp();
utils::chunked_vector<mutation> schema_mutations;
auto ksm = create_keyspace_metadata(keyspace_name, _proxy, _gossiper, ts, tags_map, _proxy.features(), tablets_mode);
locator::replication_strategy_params params(ksm->strategy_options(), ksm->initial_tablets(), ksm->consistency_option());
const auto& topo = _proxy.local_db().get_token_metadata().get_topology();
auto rs = locator::abstract_replication_strategy::create_replication_strategy(ksm->strategy_name(), params, topo);
auto ksm = create_keyspace_metadata(keyspace_name, sp, gossiper, ts, tags_map, sp.features(), tablets_mode);
// Alternator Streams doesn't yet work when the table uses tablets (#23838)
if (stream_specification && stream_specification->IsObject()) {
auto stream_enabled = rjson::find(*stream_specification, "StreamEnabled");
if (stream_enabled && stream_enabled->IsBool() && stream_enabled->GetBool()) {
locator::replication_strategy_params params(ksm->strategy_options(), ksm->initial_tablets(), ksm->consistency_option());
const auto& topo = sp.local_db().get_token_metadata().get_topology();
auto rs = locator::abstract_replication_strategy::create_replication_strategy(ksm->strategy_name(), params, topo);
if (rs->uses_tablets()) {
co_return api_error::validation("Streams not yet supported on a table using tablets (issue #23838). "
"If you want to use streams, create a table with vnodes by setting the tag 'system:initial_tablets' set to 'none'.");
}
}
}
// Creating an index in tablets mode requires the keyspace to be RF-rack-valid.
// GSI and LSI indexes are based on materialized views which require RF-rack-validity to avoid consistency issues.
if (!view_builders.empty() || _proxy.data_dictionary().get_config().rf_rack_valid_keyspaces()) {
try {
locator::assert_rf_rack_valid_keyspace(keyspace_name, _proxy.local_db().get_token_metadata_ptr(), *rs);
} catch (const std::invalid_argument& ex) {
if (!view_builders.empty()) {
co_return api_error::validation(fmt::format("GlobalSecondaryIndexes and LocalSecondaryIndexes on a table "
"using tablets require the number of racks in the cluster to be either 1 or 3"));
} else {
co_return api_error::validation(fmt::format("Cannot create table '{}' with tablets: the configuration "
"option 'rf_rack_valid_keyspaces' is enabled, which enforces that tables using tablets can only be created in clusters "
"that have either 1 or 3 racks", table_name));
}
}
}
try {
schema_mutations = service::prepare_new_keyspace_announcement(_proxy.local_db(), ksm, ts);
schema_mutations = service::prepare_new_keyspace_announcement(sp.local_db(), ksm, ts);
} catch (exceptions::already_exists_exception&) {
if (_proxy.data_dictionary().has_schema(keyspace_name, table_name)) {
if (sp.data_dictionary().has_schema(keyspace_name, table_name)) {
co_return api_error::resource_in_use(fmt::format("Table {} already exists", table_name));
}
}
if (_proxy.data_dictionary().try_find_table(schema->id())) {
if (sp.data_dictionary().try_find_table(schema->id())) {
// This should never happen, the ID is supposed to be unique
co_return api_error::internal(format("Table with ID {} already exists", schema->id()));
}
@@ -1921,9 +1815,9 @@ future<executor::request_return_type> executor::create_table_on_shard0(service::
for (schema_builder& view_builder : view_builders) {
schemas.push_back(view_builder.build());
}
co_await service::prepare_new_column_families_announcement(schema_mutations, _proxy, *ksm, schemas, ts);
co_await service::prepare_new_column_families_announcement(schema_mutations, sp, *ksm, schemas, ts);
if (ksm->uses_tablets()) {
co_await mark_view_schemas_as_built(schema_mutations, schemas, ts, _proxy);
co_await mark_view_schemas_as_built(schema_mutations, schemas, ts, sp);
}
// If a role is allowed to create a table, we must give it permissions to
@@ -1948,7 +1842,7 @@ future<executor::request_return_type> executor::create_table_on_shard0(service::
}
std::tie(schema_mutations, group0_guard) = co_await std::move(mc).extract();
try {
co_await _mm.announce(std::move(schema_mutations), std::move(group0_guard), fmt::format("alternator-executor: create {} table", table_name));
co_await mm.announce(std::move(schema_mutations), std::move(group0_guard), fmt::format("alternator-executor: create {} table", table_name));
break;
} catch (const service::group0_concurrent_modification& ex) {
elogger.info("Failed to execute CreateTable {} due to concurrent schema modifications. {}.",
@@ -1960,9 +1854,9 @@ future<executor::request_return_type> executor::create_table_on_shard0(service::
}
}
co_await _mm.wait_for_schema_agreement(_proxy.local_db(), db::timeout_clock::now() + 10s, nullptr);
co_await mm.wait_for_schema_agreement(sp.local_db(), db::timeout_clock::now() + 10s, nullptr);
rjson::value status = rjson::empty_object();
executor::supplement_table_info(request, *schema, _proxy);
executor::supplement_table_info(request, *schema, sp);
rjson::add(status, "TableDescription", std::move(request));
co_return rjson::print(std::move(status));
}
@@ -1971,11 +1865,10 @@ future<executor::request_return_type> executor::create_table(client_state& clien
_stats.api_operations.create_table++;
elogger.trace("Creating table {}", request);
co_return co_await _mm.container().invoke_on(0, [&, tr = tracing::global_trace_state_ptr(trace_state), request = std::move(request), &e = this->container(), client_state_other_shard = client_state.move_to_other_shard(), enforce_authorization = bool(_enforce_authorization), warn_authorization = bool(_warn_authorization)]
co_return co_await _mm.container().invoke_on(0, [&, tr = tracing::global_trace_state_ptr(trace_state), request = std::move(request), &sp = _proxy.container(), &g = _gossiper.container(), &e = this->container(), client_state_other_shard = client_state.move_to_other_shard(), enforce_authorization = bool(_enforce_authorization), warn_authorization = bool(_warn_authorization)]
(service::migration_manager& mm) mutable -> future<executor::request_return_type> {
const db::tablets_mode_t::mode tablets_mode = _proxy.data_dictionary().get_config().tablets_mode_for_new_keyspaces(); // type cast
// `invoke_on` hopped us to shard 0, but `this` points to `executor` is from 'old' shard, we need to hop it too.
co_return co_await e.local().create_table_on_shard0(client_state_other_shard.get(), tr, std::move(request), enforce_authorization, warn_authorization, std::move(tablets_mode));
co_return co_await create_table_on_shard0(client_state_other_shard.get(), tr, std::move(request), sp.local(), mm, g.local(), enforce_authorization, warn_authorization, e.local()._stats, std::move(tablets_mode));
});
}
@@ -1994,8 +1887,8 @@ future<executor::request_return_type> executor::create_table(client_state& clien
std::string def_type = type_to_string(def.type);
for (auto it = attribute_definitions.Begin(); it != attribute_definitions.End(); ++it) {
const rjson::value& attribute_info = *it;
if (rjson::to_string_view(attribute_info["AttributeName"]) == def.name_as_text()) {
std::string_view type = rjson::to_string_view(attribute_info["AttributeType"]);
if (attribute_info["AttributeName"].GetString() == def.name_as_text()) {
auto type = attribute_info["AttributeType"].GetString();
if (type != def_type) {
throw api_error::validation(fmt::format("AttributeDefinitions redefined {} to {} already a key attribute of type {} in this table", def.name_as_text(), type, def_type));
}
@@ -2126,13 +2019,6 @@ future<executor::request_return_type> executor::update_table(client_state& clien
co_return api_error::validation(fmt::format(
"LSI {} already exists in table {}, can't use same name for GSI", index_name, table_name));
}
try {
locator::assert_rf_rack_valid_keyspace(keyspace_name, p.local().local_db().get_token_metadata_ptr(),
p.local().local_db().find_keyspace(keyspace_name).get_replication_strategy());
} catch (const std::invalid_argument& ex) {
co_return api_error::validation(fmt::format("GlobalSecondaryIndexes on a table "
"using tablets require the number of racks in the cluster to be either 1 or 3"));
}
elogger.trace("Adding GSI {}", index_name);
// FIXME: read and handle "Projection" parameter. This will
@@ -2337,12 +2223,12 @@ void validate_value(const rjson::value& v, const char* caller) {
// The put_or_delete_item class builds the mutations needed by the PutItem and
// DeleteItem operations - either as stand-alone commands or part of a list
// of commands in BatchWriteItem.
// of commands in BatchWriteItems.
// put_or_delete_item splits each operation into two stages: Constructing the
// object parses and validates the user input (throwing exceptions if there
// are input errors). Later, build() generates the actual mutation, with a
// specified timestamp. This split is needed because of the peculiar needs of
// BatchWriteItem and LWT. BatchWriteItem needs all parsing to happen before
// BatchWriteItems and LWT. BatchWriteItems needs all parsing to happen before
// any writing happens (if one of the commands has an error, none of the
// writes should be done). LWT makes it impossible for the parse step to
// generate "mutation" objects, because the timestamp still isn't known.
@@ -2476,7 +2362,7 @@ put_or_delete_item::put_or_delete_item(const rjson::value& item, schema_ptr sche
_cells = std::vector<cell>();
_cells->reserve(item.MemberCount());
for (auto it = item.MemberBegin(); it != item.MemberEnd(); ++it) {
bytes column_name = to_bytes(rjson::to_string_view(it->name));
bytes column_name = to_bytes(it->name.GetString());
validate_value(it->value, "PutItem");
const column_definition* cdef = find_attribute(*schema, column_name);
validate_attr_name_length("", column_name.size(), cdef && cdef->is_primary_key());
@@ -2853,7 +2739,7 @@ future<executor::request_return_type> rmw_operation::execute(service::storage_pr
auto read_command = needs_read_before_write ?
previous_item_read_command(proxy, schema(), _ck, selection) :
nullptr;
return proxy.cas(schema(), std::move(*cas_shard), *this, read_command, to_partition_ranges(*schema(), _pk),
return proxy.cas(schema(), std::move(*cas_shard), shared_from_this(), read_command, to_partition_ranges(*schema(), _pk),
{timeout, std::move(permit), client_state, trace_state},
db::consistency_level::LOCAL_SERIAL, db::consistency_level::LOCAL_QUORUM, timeout, timeout, true, std::move(cdc_opts)).then([this, read_command, &wcu_total] (bool is_applied) mutable {
if (!is_applied) {
@@ -2897,10 +2783,10 @@ static void verify_all_are_used(const rjson::value* field,
return;
}
for (auto it = field->MemberBegin(); it != field->MemberEnd(); ++it) {
if (!used.contains(rjson::to_string(it->name))) {
if (!used.contains(it->name.GetString())) {
throw api_error::validation(
format("{} has spurious '{}', not used in {}",
field_name, rjson::to_string_view(it->name), operation));
field_name, it->name.GetString(), operation));
}
}
}
@@ -3114,7 +3000,7 @@ future<executor::request_return_type> executor::delete_item(client_state& client
}
static schema_ptr get_table_from_batch_request(const service::storage_proxy& proxy, const rjson::value::ConstMemberIterator& batch_request) {
sstring table_name = rjson::to_sstring(batch_request->name); // JSON keys are always strings
sstring table_name = batch_request->name.GetString(); // JSON keys are always strings
try {
return proxy.data_dictionary().find_schema(sstring(executor::KEYSPACE_NAME_PREFIX) + table_name, table_name);
} catch(data_dictionary::no_such_column_family&) {
@@ -3140,20 +3026,17 @@ struct primary_key_equal {
};
// This is a cas_request subclass for applying given put_or_delete_items to
// one partition using LWT as part as BatchWriteItem. This is a write-only
// one partition using LWT as part as BatchWriteItems. This is a write-only
// operation, not needing the previous value of the item (the mutation to be
// done is known prior to starting the operation). Nevertheless, we want to
// do this mutation via LWT to ensure that it is serialized with other LWT
// mutations to the same partition.
//
// The std::vector<put_or_delete_item> must remain alive until the
// storage_proxy::cas() future is resolved.
class put_or_delete_item_cas_request : public service::cas_request {
schema_ptr schema;
const std::vector<put_or_delete_item>& _mutation_builders;
std::vector<put_or_delete_item> _mutation_builders;
public:
put_or_delete_item_cas_request(schema_ptr s, const std::vector<put_or_delete_item>& b) :
schema(std::move(s)), _mutation_builders(b) { }
put_or_delete_item_cas_request(schema_ptr s, std::vector<put_or_delete_item>&& b) :
schema(std::move(s)), _mutation_builders(std::move(b)) { }
virtual ~put_or_delete_item_cas_request() = default;
virtual std::optional<mutation> apply(foreign_ptr<lw_shared_ptr<query::result>> qr, const query::partition_slice& slice, api::timestamp_type ts, cdc::per_request_options& cdc_opts) override {
std::optional<mutation> ret;
@@ -3169,48 +3052,20 @@ public:
}
};
future<> executor::cas_write(schema_ptr schema, service::cas_shard cas_shard, const dht::decorated_key& dk,
const std::vector<put_or_delete_item>& mutation_builders, service::client_state& client_state,
tracing::trace_state_ptr trace_state, service_permit permit)
{
if (!cas_shard.this_shard()) {
_stats.shard_bounce_for_lwt++;
return container().invoke_on(cas_shard.shard(), _ssg,
[cs = client_state.move_to_other_shard(),
&mb = mutation_builders,
&dk,
ks = schema->ks_name(),
cf = schema->cf_name(),
gt = tracing::global_trace_state_ptr(trace_state),
permit = std::move(permit)]
(executor& self) mutable {
return do_with(cs.get(), [&mb, &dk, ks = std::move(ks), cf = std::move(cf),
trace_state = tracing::trace_state_ptr(gt), &self]
(service::client_state& client_state) mutable {
auto schema = self._proxy.data_dictionary().find_schema(ks, cf);
service::cas_shard cas_shard(*schema, dk.token());
//FIXME: Instead of passing empty_service_permit() to the background operation,
// the current permit's lifetime should be prolonged, so that it's destructed
// only after all background operations are finished as well.
return self.cas_write(schema, std::move(cas_shard), dk, mb, client_state, std::move(trace_state), empty_service_permit());
});
});
}
static future<> cas_write(service::storage_proxy& proxy, schema_ptr schema, service::cas_shard cas_shard, dht::decorated_key dk, std::vector<put_or_delete_item>&& mutation_builders,
service::client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit) {
auto timeout = executor::default_timeout();
auto op = std::make_unique<put_or_delete_item_cas_request>(schema, mutation_builders);
auto* op_ptr = op.get();
auto op = seastar::make_shared<put_or_delete_item_cas_request>(schema, std::move(mutation_builders));
auto cdc_opts = cdc::per_request_options{
.alternator = true,
.alternator_streams_increased_compatibility =
schema->cdc_options().enabled() && _proxy.data_dictionary().get_config().alternator_streams_increased_compatibility(),
schema->cdc_options().enabled() && proxy.data_dictionary().get_config().alternator_streams_increased_compatibility(),
};
return _proxy.cas(schema, std::move(cas_shard), *op_ptr, nullptr, to_partition_ranges(dk),
return proxy.cas(schema, std::move(cas_shard), op, nullptr, to_partition_ranges(dk),
{timeout, std::move(permit), client_state, trace_state},
db::consistency_level::LOCAL_SERIAL, db::consistency_level::LOCAL_QUORUM,
timeout, timeout, true, std::move(cdc_opts)).finally([op = std::move(op)]{}).discard_result();
// We discarded cas()'s future value ("is_applied") because BatchWriteItem
timeout, timeout, true, std::move(cdc_opts)).discard_result();
// We discarded cas()'s future value ("is_applied") because BatchWriteItems
// does not need to support conditional updates.
}
@@ -3232,11 +3087,13 @@ struct schema_decorated_key_equal {
// FIXME: if we failed writing some of the mutations, need to return a list
// of these failed mutations rather than fail the whole write (issue #5650).
future<> executor::do_batch_write(
static future<> do_batch_write(service::storage_proxy& proxy,
smp_service_group ssg,
std::vector<std::pair<schema_ptr, put_or_delete_item>> mutation_builders,
service::client_state& client_state,
tracing::trace_state_ptr trace_state,
service_permit permit) {
service_permit permit,
stats& stats) {
if (mutation_builders.empty()) {
return make_ready_future<>();
}
@@ -3258,7 +3115,7 @@ future<> executor::do_batch_write(
mutations.push_back(b.second.build(b.first, now));
any_cdc_enabled |= b.first->cdc_options().enabled();
}
return _proxy.mutate(std::move(mutations),
return proxy.mutate(std::move(mutations),
db::consistency_level::LOCAL_QUORUM,
executor::default_timeout(),
trace_state,
@@ -3267,48 +3124,55 @@ future<> executor::do_batch_write(
false,
cdc::per_request_options{
.alternator = true,
.alternator_streams_increased_compatibility = any_cdc_enabled && _proxy.data_dictionary().get_config().alternator_streams_increased_compatibility(),
.alternator_streams_increased_compatibility = any_cdc_enabled && proxy.data_dictionary().get_config().alternator_streams_increased_compatibility(),
});
} else {
// Do the write via LWT:
// Multiple mutations may be destined for the same partition, adding
// or deleting different items of one partition. Join them together
// because we can do them in one cas() call.
using map_type = std::unordered_map<schema_decorated_key,
std::vector<put_or_delete_item>,
schema_decorated_key_hash,
schema_decorated_key_equal>;
auto key_builders = std::make_unique<map_type>(1, schema_decorated_key_hash{}, schema_decorated_key_equal{});
for (auto&& b : std::move(mutation_builders)) {
auto [it, added] = key_builders->try_emplace(schema_decorated_key {
.schema = b.first,
.dk = dht::decorate_key(*b.first, b.second.pk())
});
std::unordered_map<schema_decorated_key, std::vector<put_or_delete_item>, schema_decorated_key_hash, schema_decorated_key_equal>
key_builders(1, schema_decorated_key_hash{}, schema_decorated_key_equal{});
for (auto& b : mutation_builders) {
auto dk = dht::decorate_key(*b.first, b.second.pk());
auto [it, added] = key_builders.try_emplace(schema_decorated_key{b.first, dk});
it->second.push_back(std::move(b.second));
}
auto* key_builders_ptr = key_builders.get();
return parallel_for_each(*key_builders_ptr, [this, &client_state, trace_state, permit = std::move(permit)] (const auto& e) {
_stats.write_using_lwt++;
return parallel_for_each(std::move(key_builders), [&proxy, &client_state, &stats, trace_state, ssg, permit = std::move(permit)] (auto& e) {
stats.write_using_lwt++;
auto desired_shard = service::cas_shard(*e.first.schema, e.first.dk.token());
auto s = e.first.schema;
if (desired_shard.this_shard()) {
return cas_write(proxy, e.first.schema, std::move(desired_shard), e.first.dk, std::move(e.second), client_state, trace_state, permit);
} else {
stats.shard_bounce_for_lwt++;
return proxy.container().invoke_on(desired_shard.shard(), ssg,
[cs = client_state.move_to_other_shard(),
mb = e.second,
dk = e.first.dk,
ks = e.first.schema->ks_name(),
cf = e.first.schema->cf_name(),
gt = tracing::global_trace_state_ptr(trace_state),
permit = std::move(permit)]
(service::storage_proxy& proxy) mutable {
return do_with(cs.get(), [&proxy, mb = std::move(mb), dk = std::move(dk), ks = std::move(ks), cf = std::move(cf),
trace_state = tracing::trace_state_ptr(gt)]
(service::client_state& client_state) mutable {
auto schema = proxy.data_dictionary().find_schema(ks, cf);
static const auto* injection_name = "alternator_executor_batch_write_wait";
return utils::get_local_injector().inject(injection_name, [s = std::move(s)] (auto& handler) -> future<> {
const auto ks = handler.get("keyspace");
const auto cf = handler.get("table");
const auto shard = std::atoll(handler.get("shard")->data());
if (ks == s->ks_name() && cf == s->cf_name() && shard == this_shard_id()) {
elogger.info("{}: hit", injection_name);
co_await handler.wait_for_message(std::chrono::steady_clock::now() + std::chrono::minutes{5});
elogger.info("{}: continue", injection_name);
}
}).then([&e, desired_shard = std::move(desired_shard),
&client_state, trace_state = std::move(trace_state), permit = std::move(permit), this]() mutable
{
return cas_write(e.first.schema, std::move(desired_shard), e.first.dk,
std::move(e.second), client_state, std::move(trace_state), std::move(permit));
});
}).finally([key_builders = std::move(key_builders)]{});
// The desired_shard on the original shard remains alive for the duration
// of cas_write on this shard and prevents any tablet operations.
// However, we need a local instance of cas_shard on this shard
// to pass it to sp::cas, so we just create a new one.
service::cas_shard cas_shard(*schema, dk.token());
//FIXME: Instead of passing empty_service_permit() to the background operation,
// the current permit's lifetime should be prolonged, so that it's destructed
// only after all background operations are finished as well.
return cas_write(proxy, schema, std::move(cas_shard), dk, std::move(mb), client_state, std::move(trace_state), empty_service_permit());
});
}).finally([desired_shard = std::move(desired_shard)]{});
}
});
}
}
@@ -3455,7 +3319,7 @@ future<executor::request_return_type> executor::batch_write_item(client_state& c
_stats.wcu_total[stats::DELETE_ITEM] += wcu_delete_units;
_stats.api_operations.batch_write_item_batch_total += total_items;
_stats.api_operations.batch_write_item_histogram.add(total_items);
co_await do_batch_write(std::move(mutation_builders), client_state, trace_state, std::move(permit));
co_await do_batch_write(_proxy, _ssg, std::move(mutation_builders), client_state, trace_state, std::move(permit), _stats);
// FIXME: Issue #5650: If we failed writing some of the updates,
// need to return a list of these failed updates in UnprocessedItems
// rather than fail the whole write (issue #5650).
@@ -3464,11 +3328,7 @@ future<executor::request_return_type> executor::batch_write_item(client_state& c
if (should_add_wcu) {
rjson::add(ret, "ConsumedCapacity", std::move(consumed_capacity));
}
auto duration = std::chrono::steady_clock::now() - start_time;
_stats.api_operations.batch_write_item_latency.mark(duration);
for (const auto& w : per_table_wcu) {
w.first->api_operations.batch_write_item_latency.mark(duration);
}
_stats.api_operations.batch_write_item_latency.mark(std::chrono::steady_clock::now() - start_time);
co_return rjson::print(std::move(ret));
}
@@ -3504,7 +3364,7 @@ static bool hierarchy_filter(rjson::value& val, const attribute_path_map_node<T>
}
rjson::value newv = rjson::empty_object();
for (auto it = v.MemberBegin(); it != v.MemberEnd(); ++it) {
std::string attr = rjson::to_string(it->name);
std::string attr = it->name.GetString();
auto x = members.find(attr);
if (x != members.end()) {
if (x->second) {
@@ -3724,7 +3584,7 @@ static std::optional<attrs_to_get> calculate_attrs_to_get(const rjson::value& re
const rjson::value& attributes_to_get = req["AttributesToGet"];
attrs_to_get ret;
for (auto it = attributes_to_get.Begin(); it != attributes_to_get.End(); ++it) {
attribute_path_map_add("AttributesToGet", ret, rjson::to_string(*it));
attribute_path_map_add("AttributesToGet", ret, it->GetString());
validate_attr_name_length("AttributesToGet", it->GetStringLength(), false);
}
if (ret.empty()) {
@@ -4390,12 +4250,12 @@ inline void update_item_operation::apply_attribute_updates(const std::unique_ptr
attribute_collector& modified_attrs, bool& any_updates, bool& any_deletes) const {
for (auto it = _attribute_updates->MemberBegin(); it != _attribute_updates->MemberEnd(); ++it) {
// Note that it.key() is the name of the column, *it is the operation
bytes column_name = to_bytes(rjson::to_string_view(it->name));
bytes column_name = to_bytes(it->name.GetString());
const column_definition* cdef = _schema->get_column_definition(column_name);
if (cdef && cdef->is_primary_key()) {
throw api_error::validation(format("UpdateItem cannot update key column {}", rjson::to_string_view(it->name)));
throw api_error::validation(format("UpdateItem cannot update key column {}", it->name.GetString()));
}
std::string action = rjson::to_string((it->value)["Action"]);
std::string action = (it->value)["Action"].GetString();
if (action == "DELETE") {
// The DELETE operation can do two unrelated tasks. Without a
// "Value" option, it is used to delete an attribute. With a
@@ -4979,12 +4839,7 @@ future<executor::request_return_type> executor::batch_get_item(client_state& cli
if (!some_succeeded && eptr) {
co_await coroutine::return_exception_ptr(std::move(eptr));
}
auto duration = std::chrono::steady_clock::now() - start_time;
_stats.api_operations.batch_get_item_latency.mark(duration);
for (const table_requests& rs : requests) {
lw_shared_ptr<stats> per_table_stats = get_stats_from_schema(_proxy, *rs.schema);
per_table_stats->api_operations.batch_get_item_latency.mark(duration);
}
_stats.api_operations.batch_get_item_latency.mark(std::chrono::steady_clock::now() - start_time);
if (is_big(response)) {
co_return make_streamed(std::move(response));
} else {
@@ -5597,7 +5452,7 @@ calculate_bounds_conditions(schema_ptr schema, const rjson::value& conditions) {
std::vector<query::clustering_range> ck_bounds;
for (auto it = conditions.MemberBegin(); it != conditions.MemberEnd(); ++it) {
sstring key = rjson::to_sstring(it->name);
std::string key = it->name.GetString();
const rjson::value& condition = it->value;
const rjson::value& comp_definition = rjson::get(condition, "ComparisonOperator");
@@ -5605,13 +5460,13 @@ calculate_bounds_conditions(schema_ptr schema, const rjson::value& conditions) {
const column_definition& pk_cdef = schema->partition_key_columns().front();
const column_definition* ck_cdef = schema->clustering_key_size() > 0 ? &schema->clustering_key_columns().front() : nullptr;
if (key == pk_cdef.name_as_text()) {
if (sstring(key) == pk_cdef.name_as_text()) {
if (!partition_ranges.empty()) {
throw api_error::validation("Currently only a single restriction per key is allowed");
}
partition_ranges.push_back(calculate_pk_bound(schema, pk_cdef, comp_definition, attr_list));
}
if (ck_cdef && key == ck_cdef->name_as_text()) {
if (ck_cdef && sstring(key) == ck_cdef->name_as_text()) {
if (!ck_bounds.empty()) {
throw api_error::validation("Currently only a single restriction per key is allowed");
}
@@ -6010,14 +5865,9 @@ future<executor::request_return_type> executor::list_tables(client_state& client
_stats.api_operations.list_tables++;
elogger.trace("Listing tables {}", request);
co_await utils::get_local_injector().inject("alternator_list_tables", [] (auto& handler) -> future<> {
handler.set("waiting", true);
co_await handler.wait_for_message(std::chrono::steady_clock::now() + std::chrono::minutes{5});
});
rjson::value* exclusive_start_json = rjson::find(request, "ExclusiveStartTableName");
rjson::value* limit_json = rjson::find(request, "Limit");
std::string exclusive_start = exclusive_start_json ? rjson::to_string(*exclusive_start_json) : "";
std::string exclusive_start = exclusive_start_json ? exclusive_start_json->GetString() : "";
int limit = limit_json ? limit_json->GetInt() : 100;
if (limit < 1 || limit > 100) {
co_return api_error::validation("Limit must be greater than 0 and no greater than 100");
@@ -6206,10 +6056,9 @@ future<> executor::start() {
}
future<> executor::stop() {
co_await _describe_table_info_manager->stop();
// disconnect from the value source, but keep the value unchanged.
s_default_timeout_in_ms = utils::updateable_value<uint32_t>{s_default_timeout_in_ms()};
co_await _parsed_expression_cache->stop();
return _parsed_expression_cache->stop();
}
} // namespace alternator

View File

@@ -17,13 +17,11 @@
#include "service/client_state.hh"
#include "service_permit.hh"
#include "db/timeout_clock.hh"
#include "db/config.hh"
#include "alternator/error.hh"
#include "stats.hh"
#include "utils/rjson.hh"
#include "utils/updateable_value.hh"
#include "utils/simple_value_with_expiry.hh"
#include "tracing/trace_state.hh"
@@ -42,8 +40,6 @@ namespace cql3::selection {
namespace service {
class storage_proxy;
class cas_shard;
class storage_service;
}
namespace cdc {
@@ -60,9 +56,7 @@ class schema_builder;
namespace alternator {
enum class table_status;
class rmw_operation;
class put_or_delete_item;
schema_ptr get_table(service::storage_proxy& proxy, const rjson::value& request);
bool is_alternator_keyspace(const sstring& ks_name);
@@ -140,7 +134,6 @@ class expression_cache;
class executor : public peering_sharded_service<executor> {
gms::gossiper& _gossiper;
service::storage_service& _ss;
service::storage_proxy& _proxy;
service::migration_manager& _mm;
db::system_distributed_keyspace& _sdks;
@@ -153,11 +146,6 @@ class executor : public peering_sharded_service<executor> {
std::unique_ptr<parsed::expression_cache> _parsed_expression_cache;
struct describe_table_info_manager;
std::unique_ptr<describe_table_info_manager> _describe_table_info_manager;
future<> cache_newly_calculated_size_on_all_shards(schema_ptr schema, std::uint64_t size_in_bytes, std::chrono::nanoseconds ttl);
future<> fill_table_size(rjson::value &table_description, schema_ptr schema, bool deleting);
public:
using client_state = service::client_state;
// request_return_type is the return type of the executor methods, which
@@ -183,7 +171,6 @@ public:
executor(gms::gossiper& gossiper,
service::storage_proxy& proxy,
service::storage_service& ss,
service::migration_manager& mm,
db::system_distributed_keyspace& sdks,
cdc::metadata& cdc_metadata,
@@ -231,18 +218,6 @@ private:
friend class rmw_operation;
static void describe_key_schema(rjson::value& parent, const schema&, std::unordered_map<std::string,std::string> * = nullptr, const std::map<sstring, sstring> *tags = nullptr);
future<rjson::value> fill_table_description(schema_ptr schema, table_status tbl_status, service::client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit);
future<executor::request_return_type> create_table_on_shard0(service::client_state&& client_state, tracing::trace_state_ptr trace_state, rjson::value request, bool enforce_authorization, bool warn_authorization, const db::tablets_mode_t::mode tablets_mode);
future<> do_batch_write(
std::vector<std::pair<schema_ptr, put_or_delete_item>> mutation_builders,
service::client_state& client_state,
tracing::trace_state_ptr trace_state,
service_permit permit);
future<> cas_write(schema_ptr schema, service::cas_shard cas_shard, const dht::decorated_key& dk,
const std::vector<put_or_delete_item>& mutation_builders, service::client_state& client_state,
tracing::trace_state_ptr trace_state, service_permit permit);
public:
static void describe_key_schema(rjson::value& parent, const schema& schema, std::unordered_map<std::string,std::string>&, const std::map<sstring, sstring> *tags = nullptr);

View File

@@ -1,301 +0,0 @@
/*
* Copyright 2025-present ScyllaDB
*/
/*
* SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
*/
#include "alternator/http_compression.hh"
#include "alternator/server.hh"
#include <seastar/coroutine/maybe_yield.hh>
#include <zlib.h>
static logging::logger slogger("alternator-http-compression");
namespace alternator {
static constexpr size_t compressed_buffer_size = 1024;
class zlib_compressor {
z_stream _zs;
temporary_buffer<char> _output_buf;
noncopyable_function<future<>(temporary_buffer<char>&&)> _write_func;
public:
zlib_compressor(bool gzip, int compression_level, noncopyable_function<future<>(temporary_buffer<char>&&)> write_func)
: _write_func(std::move(write_func)) {
memset(&_zs, 0, sizeof(_zs));
if (deflateInit2(&_zs, std::clamp(compression_level, Z_NO_COMPRESSION, Z_BEST_COMPRESSION), Z_DEFLATED,
(gzip ? 16 : 0) + MAX_WBITS, 8, Z_DEFAULT_STRATEGY) != Z_OK) {
// Should only happen if memory allocation fails
throw std::bad_alloc();
}
}
~zlib_compressor() {
deflateEnd(&_zs);
}
future<> close() {
return compress(nullptr, 0, true);
}
future<> compress(const char* buf, size_t len, bool is_last_chunk = false) {
_zs.next_in = reinterpret_cast<unsigned char*>(const_cast<char*>(buf));
_zs.avail_in = (uInt) len;
int mode = is_last_chunk ? Z_FINISH : Z_NO_FLUSH;
while(_zs.avail_in > 0 || is_last_chunk) {
co_await coroutine::maybe_yield();
if (_output_buf.empty()) {
if (is_last_chunk) {
uint32_t max_buffer_size = 0;
deflatePending(&_zs, &max_buffer_size, nullptr);
max_buffer_size += deflateBound(&_zs, _zs.avail_in) + 1;
_output_buf = temporary_buffer<char>(std::min(compressed_buffer_size, (size_t) max_buffer_size));
} else {
_output_buf = temporary_buffer<char>(compressed_buffer_size);
}
_zs.next_out = reinterpret_cast<unsigned char*>(_output_buf.get_write());
_zs.avail_out = compressed_buffer_size;
}
int e = deflate(&_zs, mode);
if (e < Z_OK) {
throw api_error::internal("Error during compression of response body");
}
if (e == Z_STREAM_END || _zs.avail_out < compressed_buffer_size / 4) {
_output_buf.trim(compressed_buffer_size - _zs.avail_out);
co_await _write_func(std::move(_output_buf));
if (e == Z_STREAM_END) {
break;
}
}
}
}
};
// Helper string_view functions for parsing Accept-Encoding header
struct case_insensitive_cmp_sv {
bool operator()(std::string_view s1, std::string_view s2) const {
return std::equal(s1.begin(), s1.end(), s2.begin(), s2.end(),
[](char a, char b) { return ::tolower(a) == ::tolower(b); });
}
};
static inline std::string_view trim_left(std::string_view sv) {
while (!sv.empty() && std::isspace(static_cast<unsigned char>(sv.front())))
sv.remove_prefix(1);
return sv;
}
static inline std::string_view trim_right(std::string_view sv) {
while (!sv.empty() && std::isspace(static_cast<unsigned char>(sv.back())))
sv.remove_suffix(1);
return sv;
}
static inline std::string_view trim(std::string_view sv) {
return trim_left(trim_right(sv));
}
inline std::vector<std::string_view> split(std::string_view text, char separator) {
std::vector<std::string_view> tokens;
if (text == "") {
return tokens;
}
while (true) {
auto pos = text.find_first_of(separator);
if (pos != std::string_view::npos) {
tokens.emplace_back(text.data(), pos);
text.remove_prefix(pos + 1);
} else {
tokens.emplace_back(text);
break;
}
}
return tokens;
}
constexpr response_compressor::compression_type response_compressor::get_compression_type(std::string_view encoding) {
for (size_t i = 0; i < static_cast<size_t>(compression_type::count); ++i) {
if (case_insensitive_cmp_sv{}(encoding, compression_names[i])) {
return static_cast<compression_type>(i);
}
}
return compression_type::unknown;
}
response_compressor::compression_type response_compressor::find_compression(std::string_view accept_encoding, size_t response_size) {
std::optional<float> ct_q[static_cast<size_t>(compression_type::count)];
ct_q[static_cast<size_t>(compression_type::none)] = std::numeric_limits<float>::min(); // enabled, but lowest priority
compression_type selected_ct = compression_type::none;
std::vector<std::string_view> entries = split(accept_encoding, ',');
for (auto& e : entries) {
std::vector<std::string_view> params = split(e, ';');
if (params.size() == 0) {
continue;
}
compression_type ct = get_compression_type(trim(params[0]));
if (ct == compression_type::unknown) {
continue; // ignore unknown encoding types
}
if (ct_q[static_cast<size_t>(ct)].has_value() && ct_q[static_cast<size_t>(ct)] != 0.0f) {
continue; // already processed this encoding
}
if (response_size < _threshold[static_cast<size_t>(ct)]) {
continue; // below threshold treat as unknown
}
for (size_t i = 1; i < params.size(); ++i) { // find "q=" parameter
auto pos = params[i].find("q=");
if (pos == std::string_view::npos) {
continue;
}
std::string_view param = params[i].substr(pos + 2);
param = trim(param);
// parse quality value
float q_value = 1.0f;
auto [ptr, ec] = std::from_chars(param.data(), param.data() + param.size(), q_value);
if (ec != std::errc() || ptr != param.data() + param.size()) {
continue;
}
if (q_value < 0.0) {
q_value = 0.0;
} else if (q_value > 1.0) {
q_value = 1.0;
}
ct_q[static_cast<size_t>(ct)] = q_value;
break; // we parsed quality value
}
if (!ct_q[static_cast<size_t>(ct)].has_value()) {
ct_q[static_cast<size_t>(ct)] = 1.0f; // default quality value
}
// keep the highest encoding (in the order, unless 'any')
if (selected_ct == compression_type::any) {
if (ct_q[static_cast<size_t>(ct)] >= ct_q[static_cast<size_t>(selected_ct)]) {
selected_ct = ct;
}
} else {
if (ct_q[static_cast<size_t>(ct)] > ct_q[static_cast<size_t>(selected_ct)]) {
selected_ct = ct;
}
}
}
if (selected_ct == compression_type::any) {
// select any not mentioned or highest quality
selected_ct = compression_type::none;
for (size_t i = 0; i < static_cast<size_t>(compression_type::compressions_count); ++i) {
if (!ct_q[i].has_value()) {
return static_cast<compression_type>(i);
}
if (ct_q[i] > ct_q[static_cast<size_t>(selected_ct)]) {
selected_ct = static_cast<compression_type>(i);
}
}
}
return selected_ct;
}
static future<chunked_content> compress(response_compressor::compression_type ct, const db::config& cfg, std::string str) {
chunked_content compressed;
auto write = [&compressed](temporary_buffer<char>&& buf) -> future<> {
compressed.push_back(std::move(buf));
return make_ready_future<>();
};
zlib_compressor compressor(ct != response_compressor::compression_type::deflate,
cfg.alternator_response_gzip_compression_level(), std::move(write));
co_await compressor.compress(str.data(), str.size(), true);
co_return compressed;
}
static sstring flatten(chunked_content&& cc) {
size_t total_size = 0;
for (const auto& chunk : cc) {
total_size += chunk.size();
}
sstring result = sstring{ sstring::initialized_later{}, total_size };
size_t offset = 0;
for (const auto& chunk : cc) {
std::copy(chunk.begin(), chunk.end(), result.begin() + offset);
offset += chunk.size();
}
return result;
}
future<std::unique_ptr<http::reply>> response_compressor::generate_reply(std::unique_ptr<http::reply> rep, sstring accept_encoding, const char* content_type, std::string&& response_body) {
response_compressor::compression_type ct = find_compression(accept_encoding, response_body.size());
if (ct != response_compressor::compression_type::none) {
rep->add_header("Content-Encoding", get_encoding_name(ct));
rep->set_content_type(content_type);
return compress(ct, cfg, std::move(response_body)).then([rep = std::move(rep)] (chunked_content compressed) mutable {
rep->_content = flatten(std::move(compressed));
return make_ready_future<std::unique_ptr<http::reply>>(std::move(rep));
});
} else {
// Note that despite the move, there is a copy here -
// as str is std::string and rep->_content is sstring.
rep->_content = std::move(response_body);
rep->set_content_type(content_type);
}
return make_ready_future<std::unique_ptr<http::reply>>(std::move(rep));
}
template<typename Compressor>
class compressed_data_sink_impl : public data_sink_impl {
output_stream<char> _out;
Compressor _compressor;
public:
template<typename... Args>
compressed_data_sink_impl(output_stream<char>&& out, Args&&... args)
: _out(std::move(out)), _compressor(std::forward<Args>(args)..., [this](temporary_buffer<char>&& buf) {
return _out.write(std::move(buf));
}) { }
future<> put(std::span<temporary_buffer<char>> data) override {
return data_sink_impl::fallback_put(data, [this] (temporary_buffer<char>&& buf) {
return do_put(std::move(buf));
});
}
private:
future<> do_put(temporary_buffer<char> buf) {
co_return co_await _compressor.compress(buf.get(), buf.size());
}
future<> close() override {
return _compressor.close().then([this] {
return _out.close();
});
}
};
executor::body_writer compress(response_compressor::compression_type ct, const db::config& cfg, executor::body_writer&& bw) {
return [bw = std::move(bw), ct, level = cfg.alternator_response_gzip_compression_level()](output_stream<char>&& out) mutable -> future<> {
output_stream_options opts;
opts.trim_to_size = true;
std::unique_ptr<data_sink_impl> data_sink_impl;
switch (ct) {
case response_compressor::compression_type::gzip:
data_sink_impl = std::make_unique<compressed_data_sink_impl<zlib_compressor>>(std::move(out), true, level);
break;
case response_compressor::compression_type::deflate:
data_sink_impl = std::make_unique<compressed_data_sink_impl<zlib_compressor>>(std::move(out), false, level);
break;
case response_compressor::compression_type::none:
case response_compressor::compression_type::any:
case response_compressor::compression_type::unknown:
on_internal_error(slogger,"Compression not selected");
default:
on_internal_error(slogger, "Unsupported compression type for data sink");
}
return bw(output_stream<char>(data_sink(std::move(data_sink_impl)), compressed_buffer_size, opts));
};
}
future<std::unique_ptr<http::reply>> response_compressor::generate_reply(std::unique_ptr<http::reply> rep, sstring accept_encoding, const char* content_type, executor::body_writer&& body_writer) {
response_compressor::compression_type ct = find_compression(accept_encoding, std::numeric_limits<size_t>::max());
if (ct != response_compressor::compression_type::none) {
rep->add_header("Content-Encoding", get_encoding_name(ct));
rep->write_body(content_type, compress(ct, cfg, std::move(body_writer)));
} else {
rep->write_body(content_type, std::move(body_writer));
}
return make_ready_future<std::unique_ptr<http::reply>>(std::move(rep));
}
} // namespace alternator

View File

@@ -1,91 +0,0 @@
/*
* Copyright 2025-present ScyllaDB
*/
/*
* SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
*/
#pragma once
#include "alternator/executor.hh"
#include <seastar/http/httpd.hh>
#include "db/config.hh"
namespace alternator {
class response_compressor {
public:
enum class compression_type {
gzip,
deflate,
compressions_count,
any = compressions_count,
none,
count,
unknown = count
};
static constexpr std::string_view compression_names[] = {
"gzip",
"deflate",
"*",
"identity"
};
static sstring get_encoding_name(compression_type ct) {
return sstring(compression_names[static_cast<size_t>(ct)]);
}
static constexpr compression_type get_compression_type(std::string_view encoding);
sstring get_accepted_encoding(const http::request& req) {
if (get_threshold() == 0) {
return "";
}
return req.get_header("Accept-Encoding");
}
compression_type find_compression(std::string_view accept_encoding, size_t response_size);
response_compressor(const db::config& cfg)
: cfg(cfg)
,_gzip_level_observer(
cfg.alternator_response_gzip_compression_level.observe([this](int v) {
update_threshold();
}))
,_gzip_threshold_observer(
cfg.alternator_response_compression_threshold_in_bytes.observe([this](uint32_t v) {
update_threshold();
}))
{
update_threshold();
}
response_compressor(const response_compressor& rhs) : response_compressor(rhs.cfg) {}
private:
const db::config& cfg;
utils::observable<int>::observer _gzip_level_observer;
utils::observable<uint32_t>::observer _gzip_threshold_observer;
uint32_t _threshold[static_cast<size_t>(compression_type::count)];
size_t get_threshold() { return _threshold[static_cast<size_t>(compression_type::any)]; }
void update_threshold() {
_threshold[static_cast<size_t>(compression_type::none)] = std::numeric_limits<uint32_t>::max();
_threshold[static_cast<size_t>(compression_type::any)] = std::numeric_limits<uint32_t>::max();
uint32_t gzip = cfg.alternator_response_gzip_compression_level() <= 0 ? std::numeric_limits<uint32_t>::max()
: cfg.alternator_response_compression_threshold_in_bytes();
_threshold[static_cast<size_t>(compression_type::gzip)] = gzip;
_threshold[static_cast<size_t>(compression_type::deflate)] = gzip;
for (size_t i = 0; i < static_cast<size_t>(compression_type::compressions_count); ++i) {
if (_threshold[i] < _threshold[static_cast<size_t>(compression_type::any)]) {
_threshold[static_cast<size_t>(compression_type::any)] = _threshold[i];
}
}
}
public:
future<std::unique_ptr<http::reply>> generate_reply(std::unique_ptr<http::reply> rep,
sstring accept_encoding, const char* content_type, std::string&& response_body);
future<std::unique_ptr<http::reply>> generate_reply(std::unique_ptr<http::reply> rep,
sstring accept_encoding, const char* content_type, executor::body_writer&& body_writer);
};
}

View File

@@ -496,7 +496,7 @@ const std::pair<std::string, const rjson::value*> unwrap_set(const rjson::value&
return {"", nullptr};
}
auto it = v.MemberBegin();
const std::string it_key = rjson::to_string(it->name);
const std::string it_key = it->name.GetString();
if (it_key != "SS" && it_key != "BS" && it_key != "NS") {
return {std::move(it_key), nullptr};
}

View File

@@ -34,7 +34,6 @@
#include "client_data.hh"
#include "utils/updateable_value.hh"
#include <zlib.h>
#include "alternator/http_compression.hh"
static logging::logger slogger("alternator-server");
@@ -112,12 +111,9 @@ class api_handler : public handler_base {
// type applies to all replies, both success and error.
static constexpr const char* REPLY_CONTENT_TYPE = "application/x-amz-json-1.0";
public:
api_handler(const std::function<future<executor::request_return_type>(std::unique_ptr<request> req)>& _handle,
const db::config& config) : _response_compressor(config), _f_handle(
api_handler(const std::function<future<executor::request_return_type>(std::unique_ptr<request> req)>& _handle) : _f_handle(
[this, _handle](std::unique_ptr<request> req, std::unique_ptr<reply> rep) {
sstring accept_encoding = _response_compressor.get_accepted_encoding(*req);
return seastar::futurize_invoke(_handle, std::move(req)).then_wrapped(
[this, rep = std::move(rep), accept_encoding=std::move(accept_encoding)](future<executor::request_return_type> resf) mutable {
return seastar::futurize_invoke(_handle, std::move(req)).then_wrapped([this, rep = std::move(rep)](future<executor::request_return_type> resf) mutable {
if (resf.failed()) {
// Exceptions of type api_error are wrapped as JSON and
// returned to the client as expected. Other types of
@@ -137,20 +133,22 @@ public:
return make_ready_future<std::unique_ptr<reply>>(std::move(rep));
}
auto res = resf.get();
return std::visit(overloaded_functor {
std::visit(overloaded_functor {
[&] (std::string&& str) {
return _response_compressor.generate_reply(std::move(rep), std::move(accept_encoding),
REPLY_CONTENT_TYPE, std::move(str));
// Note that despite the move, there is a copy here -
// as str is std::string and rep->_content is sstring.
rep->_content = std::move(str);
rep->set_content_type(REPLY_CONTENT_TYPE);
},
[&] (executor::body_writer&& body_writer) {
return _response_compressor.generate_reply(std::move(rep), std::move(accept_encoding),
REPLY_CONTENT_TYPE, std::move(body_writer));
rep->write_body(REPLY_CONTENT_TYPE, std::move(body_writer));
},
[&] (const api_error& err) {
generate_error_reply(*rep, err);
return make_ready_future<std::unique_ptr<reply>>(std::move(rep));
}
}, std::move(res));
return make_ready_future<std::unique_ptr<reply>>(std::move(rep));
});
}) { }
@@ -179,7 +177,6 @@ protected:
slogger.trace("api_handler error case: {}", rep._content);
}
response_compressor _response_compressor;
future_handler_function _f_handle;
};
@@ -374,40 +371,13 @@ future<std::string> server::verify_signature(const request& req, const chunked_c
for (const auto& header : signed_headers) {
signed_headers_map.emplace(header, std::string_view());
}
std::vector<std::string> modified_values;
for (auto& header : req._headers) {
std::string header_str;
header_str.resize(header.first.size());
std::transform(header.first.begin(), header.first.end(), header_str.begin(), ::tolower);
auto it = signed_headers_map.find(header_str);
if (it != signed_headers_map.end()) {
// replace multiple spaces in the header value header.second with
// a single space, as required by AWS SigV4 header canonization.
// If we modify the value, we need to save it in modified_values
// to keep it alive.
std::string value;
value.reserve(header.second.size());
bool prev_space = false;
bool modified = false;
for (char ch : header.second) {
if (ch == ' ') {
if (!prev_space) {
value += ch;
prev_space = true;
} else {
modified = true; // skip a space
}
} else {
value += ch;
prev_space = false;
}
}
if (modified) {
modified_values.emplace_back(std::move(value));
it->second = std::string_view(modified_values.back());
} else {
it->second = std::string_view(header.second);
}
it->second = std::string_view(header.second);
}
}
@@ -420,7 +390,6 @@ future<std::string> server::verify_signature(const request& req, const chunked_c
datestamp = std::move(datestamp),
signed_headers_str = std::move(signed_headers_str),
signed_headers_map = std::move(signed_headers_map),
modified_values = std::move(modified_values),
region = std::move(region),
service = std::move(service),
user_signature = std::move(user_signature)] (future<key_cache::value_ptr> key_ptr_fut) {
@@ -591,11 +560,11 @@ read_entire_stream(input_stream<char>& inp, size_t length_limit) {
class safe_gzip_zstream {
z_stream _zs;
public:
// If gzip is true, decode a gzip header (for "Content-Encoding: gzip").
// Otherwise, a zlib header (for "Content-Encoding: deflate").
safe_gzip_zstream(bool gzip = true) {
safe_gzip_zstream() {
memset(&_zs, 0, sizeof(_zs));
if (inflateInit2(&_zs, gzip ? 16 + MAX_WBITS : MAX_WBITS) != Z_OK) {
// The strange 16 + WMAX_BITS tells zlib to expect and decode
// a gzip header, not a zlib header.
if (inflateInit2(&_zs, 16 + MAX_WBITS) != Z_OK) {
// Should only happen if memory allocation fails
throw std::bad_alloc();
}
@@ -614,21 +583,19 @@ public:
}
};
// ungzip() takes a chunked_content of a compressed request body, and returns
// the uncompressed content as a chunked_content. If gzip is true, we expect
// gzip header (for "Content-Encoding: gzip"), if gzip is false, we expect a
// zlib header (for "Content-Encoding: deflate").
// ungzip() takes a chunked_content with a gzip-compressed request body,
// uncompresses it, and returns the uncompressed content as a chunked_content.
// If the uncompressed content exceeds length_limit, an error is thrown.
static future<chunked_content>
ungzip(chunked_content&& compressed_body, size_t length_limit, bool gzip = true) {
ungzip(chunked_content&& compressed_body, size_t length_limit) {
chunked_content ret;
// output_buf can be any size - when uncompressing input_buf, it doesn't
// need to fit in a single output_buf, we'll use multiple output_buf for
// a single input_buf if needed.
constexpr size_t OUTPUT_BUF_SIZE = 4096;
temporary_buffer<char> output_buf;
safe_gzip_zstream strm(gzip);
bool complete_stream = false; // empty input is not a valid gzip/deflate
safe_gzip_zstream strm;
bool complete_stream = false; // empty input is not a valid gzip
size_t total_out_bytes = 0;
for (const temporary_buffer<char>& input_buf : compressed_body) {
if (input_buf.empty()) {
@@ -731,8 +698,6 @@ future<executor::request_return_type> server::handle_api_request(std::unique_ptr
sstring content_encoding = req->get_header("Content-Encoding");
if (content_encoding == "gzip") {
content = co_await ungzip(std::move(content), request_content_length_limit);
} else if (content_encoding == "deflate") {
content = co_await ungzip(std::move(content), request_content_length_limit, false);
} else if (!content_encoding.empty()) {
// DynamoDB returns a 500 error for unsupported Content-Encoding.
// I'm not sure if this is the best error code, but let's do it too.
@@ -743,12 +708,8 @@ future<executor::request_return_type> server::handle_api_request(std::unique_ptr
// As long as the system_clients_entry object is alive, this request will
// be visible in the "system.clients" virtual table. When requested, this
// entry will be formatted by server::ongoing_request::make_client_data().
auto user_agent_header = co_await _connection_options_keys_and_values.get_or_load(req->get_header("User-Agent"), [] (const client_options_cache_key_type&) {
return make_ready_future<options_cache_value_type>(options_cache_value_type{});
});
auto system_clients_entry = _ongoing_requests.emplace(
req->get_client_address(), std::move(user_agent_header),
req->get_client_address(), req->get_header("User-Agent"),
username, current_scheduling_group(),
req->get_protocol_name() == "https");
@@ -793,7 +754,7 @@ future<executor::request_return_type> server::handle_api_request(std::unique_ptr
void server::set_routes(routes& r) {
api_handler* req_handler = new api_handler([this] (std::unique_ptr<request> req) mutable {
return handle_api_request(std::move(req));
}, _proxy.data_dictionary().get_config());
});
r.put(operation_type::POST, "/", req_handler);
r.put(operation_type::GET, "/", new health_handler(_pending_requests));
@@ -904,9 +865,7 @@ server::server(executor& exec, service::storage_proxy& proxy, gms::gossiper& gos
} {
}
future<> server::init(net::inet_address addr, std::optional<uint16_t> port, std::optional<uint16_t> https_port,
std::optional<uint16_t> port_proxy_protocol, std::optional<uint16_t> https_port_proxy_protocol,
std::optional<tls::credentials_builder> creds,
future<> server::init(net::inet_address addr, std::optional<uint16_t> port, std::optional<uint16_t> https_port, std::optional<tls::credentials_builder> creds,
utils::updateable_value<bool> enforce_authorization, utils::updateable_value<bool> warn_authorization, utils::updateable_value<uint64_t> max_users_query_size_in_trace_output,
semaphore* memory_limiter, utils::updateable_value<uint32_t> max_concurrent_requests) {
_memory_limiter = memory_limiter;
@@ -914,28 +873,20 @@ future<> server::init(net::inet_address addr, std::optional<uint16_t> port, std:
_warn_authorization = std::move(warn_authorization);
_max_concurrent_requests = std::move(max_concurrent_requests);
_max_users_query_size_in_trace_output = std::move(max_users_query_size_in_trace_output);
if (!port && !https_port && !port_proxy_protocol && !https_port_proxy_protocol) {
if (!port && !https_port) {
return make_exception_future<>(std::runtime_error("Either regular port or TLS port"
" must be specified in order to init an alternator HTTP server instance"));
}
return seastar::async([this, addr, port, https_port, port_proxy_protocol, https_port_proxy_protocol, creds] {
return seastar::async([this, addr, port, https_port, creds] {
_executor.start().get();
if (port || port_proxy_protocol) {
if (port) {
set_routes(_http_server._routes);
_http_server.set_content_streaming(true);
if (port) {
_http_server.listen(socket_address{addr, *port}).get();
}
if (port_proxy_protocol) {
listen_options lo;
lo.reuse_address = true;
lo.proxy_protocol = true;
_http_server.listen(socket_address{addr, *port_proxy_protocol}, lo).get();
}
_http_server.listen(socket_address{addr, *port}).get();
_enabled_servers.push_back(std::ref(_http_server));
}
if (https_port || https_port_proxy_protocol) {
if (https_port) {
set_routes(_https_server._routes);
_https_server.set_content_streaming(true);
@@ -955,15 +906,7 @@ future<> server::init(net::inet_address addr, std::optional<uint16_t> port, std:
} else {
_credentials = creds->build_server_credentials();
}
if (https_port) {
_https_server.listen(socket_address{addr, *https_port}, _credentials).get();
}
if (https_port_proxy_protocol) {
listen_options lo;
lo.reuse_address = true;
lo.proxy_protocol = true;
_https_server.listen(socket_address{addr, *https_port_proxy_protocol}, lo, _credentials).get();
}
_https_server.listen(socket_address{addr, *https_port}, _credentials).get();
_enabled_servers.push_back(std::ref(_https_server));
}
});
@@ -1036,15 +979,16 @@ client_data server::ongoing_request::make_client_data() const {
// and keep "driver_version" unset.
cd.driver_name = _user_agent;
// Leave "protocol_version" unset, it has no meaning in Alternator.
// Leave "hostname", "ssl_protocol" and "ssl_cipher_suite" unset for Alternator.
// Note: CQL sets ssl_protocol and ssl_cipher_suite via generic_server::connection base class.
// Leave "hostname", "ssl_protocol" and "ssl_cipher_suite" unset.
// As reported in issue #9216, we never set these fields in CQL
// either (see cql_server::connection::make_client_data()).
return cd;
}
future<utils::chunked_vector<foreign_ptr<std::unique_ptr<client_data>>>> server::get_client_data() {
utils::chunked_vector<foreign_ptr<std::unique_ptr<client_data>>> ret;
future<utils::chunked_vector<client_data>> server::get_client_data() {
utils::chunked_vector<client_data> ret;
co_await _ongoing_requests.for_each_gently([&ret] (const ongoing_request& r) {
ret.emplace_back(make_foreign(std::make_unique<client_data>(r.make_client_data())));
ret.emplace_back(r.make_client_data());
});
co_return ret;
}

View File

@@ -55,7 +55,6 @@ class server : public peering_sharded_service<server> {
// though it isn't really relevant for Alternator which defines its own
// timeouts separately. We can create this object only once.
updateable_timeout_config _timeout_config;
client_options_cache_type _connection_options_keys_and_values;
alternator_callbacks_map _callbacks;
@@ -89,7 +88,7 @@ class server : public peering_sharded_service<server> {
// is called when reading the "system.clients" virtual table.
struct ongoing_request {
socket_address _client_address;
client_options_cache_entry_type _user_agent;
sstring _user_agent;
sstring _username;
scheduling_group _scheduling_group;
bool _is_https;
@@ -100,9 +99,7 @@ class server : public peering_sharded_service<server> {
public:
server(executor& executor, service::storage_proxy& proxy, gms::gossiper& gossiper, auth::service& service, qos::service_level_controller& sl_controller);
future<> init(net::inet_address addr, std::optional<uint16_t> port, std::optional<uint16_t> https_port,
std::optional<uint16_t> port_proxy_protocol, std::optional<uint16_t> https_port_proxy_protocol,
std::optional<tls::credentials_builder> creds,
future<> init(net::inet_address addr, std::optional<uint16_t> port, std::optional<uint16_t> https_port, std::optional<tls::credentials_builder> creds,
utils::updateable_value<bool> enforce_authorization, utils::updateable_value<bool> warn_authorization, utils::updateable_value<uint64_t> max_users_query_size_in_trace_output,
semaphore* memory_limiter, utils::updateable_value<uint32_t> max_concurrent_requests);
future<> stop();
@@ -110,7 +107,7 @@ public:
// table "system.clients" is read. It is expected to generate a list of
// clients connected to this server (on this shard). This function is
// called by alternator::controller::get_client_data().
future<utils::chunked_vector<foreign_ptr<std::unique_ptr<client_data>>>> get_client_data();
future<utils::chunked_vector<client_data>> get_client_data();
private:
void set_routes(seastar::httpd::routes& r);
// If verification succeeds, returns the authenticated user's username

View File

@@ -93,7 +93,7 @@ future<executor::request_return_type> executor::update_time_to_live(client_state
if (v->GetStringLength() < 1 || v->GetStringLength() > 255) {
co_return api_error::validation("The length of AttributeName must be between 1 and 255");
}
sstring attribute_name = rjson::to_sstring(*v);
sstring attribute_name(v->GetString(), v->GetStringLength());
co_await verify_permission(_enforce_authorization, _warn_authorization, client_state, schema, auth::permission::ALTER, _stats);
co_await db::modify_tags(_mm, schema->ks_name(), schema->cf_name(), [&](std::map<sstring, sstring>& tags_map) {
@@ -767,7 +767,7 @@ static future<bool> scan_table(
// by tasking another node to take over scanning of the dead node's primary
// ranges. What we do here is that this node will also check expiration
// on its *secondary* ranges - but only those whose primary owner is down.
auto tablet_secondary_replica = tablet_map.get_secondary_replica(*tablet, erm->get_topology()); // throws if no secondary replica
auto tablet_secondary_replica = tablet_map.get_secondary_replica(*tablet); // throws if no secondary replica
if (tablet_secondary_replica.host == my_host_id && tablet_secondary_replica.shard == this_shard_id()) {
if (!gossiper.is_alive(tablet_primary_replica.host)) {
co_await scan_tablet(*tablet, proxy, abort_source, page_sem, expiration_stats, scan_ctx, tablet_map);

View File

@@ -31,7 +31,6 @@ set(swagger_files
api-doc/column_family.json
api-doc/commitlog.json
api-doc/compaction_manager.json
api-doc/client_routes.json
api-doc/config.json
api-doc/cql_server_test.json
api-doc/endpoint_snitch_info.json
@@ -69,7 +68,6 @@ target_sources(api
PRIVATE
api.cc
cache_service.cc
client_routes.cc
collectd.cc
column_family.cc
commitlog.cc

View File

@@ -1,23 +0,0 @@
, "client_routes_entry": {
"id": "client_routes_entry",
"summary": "An entry storing client routes",
"properties": {
"connection_id": {"type": "string"},
"host_id": {"type": "string", "format": "uuid"},
"address": {"type": "string"},
"port": {"type": "integer"},
"tls_port": {"type": "integer"},
"alternator_port": {"type": "integer"},
"alternator_https_port": {"type": "integer"}
},
"required": ["connection_id", "host_id", "address"]
}
, "client_routes_key": {
"id": "client_routes_key",
"summary": "A key of client_routes_entry",
"properties": {
"connection_id": {"type": "string"},
"host_id": {"type": "string", "format": "uuid"}
}
}

View File

@@ -1,74 +0,0 @@
, "/v2/client-routes":{
"get": {
"description":"List all client route entries",
"operationId":"get_client_routes",
"tags":["client_routes"],
"produces":[
"application/json"
],
"parameters":[],
"responses":{
"200":{
"schema":{
"type":"array",
"items":{ "$ref":"#/definitions/client_routes_entry" }
}
},
"default":{
"description":"unexpected error",
"schema":{"$ref":"#/definitions/ErrorModel"}
}
}
},
"post": {
"description":"Upsert one or more client route entries",
"operationId":"set_client_routes",
"tags":["client_routes"],
"parameters":[
{
"name":"body",
"in":"body",
"required":true,
"schema":{
"type":"array",
"items":{ "$ref":"#/definitions/client_routes_entry" }
}
}
],
"responses":{
"200":{ "description": "OK" },
"default":{
"description":"unexpected error",
"schema":{ "$ref":"#/definitions/ErrorModel" }
}
}
},
"delete": {
"description":"Delete one or more client route entries",
"operationId":"delete_client_routes",
"tags":["client_routes"],
"parameters":[
{
"name":"body",
"in":"body",
"required":true,
"schema":{
"type":"array",
"items":{ "$ref":"#/definitions/client_routes_key" }
}
}
],
"responses":{
"200":{
"description": "OK"
},
"default":{
"description":"unexpected error",
"schema":{
"$ref":"#/definitions/ErrorModel"
}
}
}
}
}

View File

@@ -37,7 +37,6 @@
#include "raft.hh"
#include "gms/gossip_address_map.hh"
#include "service_levels.hh"
#include "client_routes.hh"
logging::logger apilog("api");
@@ -68,11 +67,9 @@ future<> set_server_init(http_context& ctx) {
rb02->set_api_doc(r);
rb02->register_api_file(r, "swagger20_header");
rb02->register_api_file(r, "metrics");
rb02->register_api_file(r, "client_routes");
rb->register_function(r, "system",
"The system related API");
rb02->add_definitions_file(r, "metrics");
rb02->add_definitions_file(r, "client_routes");
set_system(ctx, r);
rb->register_function(r, "error_injection",
"The error injection API");
@@ -132,16 +129,6 @@ future<> unset_server_storage_service(http_context& ctx) {
return ctx.http_server.set_routes([&ctx] (routes& r) { unset_storage_service(ctx, r); });
}
future<> set_server_client_routes(http_context& ctx, sharded<service::client_routes_service>& cr) {
return ctx.http_server.set_routes([&ctx, &cr] (routes& r) {
set_client_routes(ctx, r, cr);
});
}
future<> unset_server_client_routes(http_context& ctx) {
return ctx.http_server.set_routes([&ctx] (routes& r) { unset_client_routes(ctx, r); });
}
future<> set_load_meter(http_context& ctx, service::load_meter& lm) {
return ctx.http_server.set_routes([&ctx, &lm] (routes& r) { set_load_meter(ctx, r, lm); });
}

View File

@@ -29,7 +29,6 @@ class storage_proxy;
class storage_service;
class raft_group0_client;
class raft_group_registry;
class client_routes_service;
} // namespace service
@@ -100,8 +99,6 @@ future<> set_server_snitch(http_context& ctx, sharded<locator::snitch_ptr>& snit
future<> unset_server_snitch(http_context& ctx);
future<> set_server_storage_service(http_context& ctx, sharded<service::storage_service>& ss, service::raft_group0_client&);
future<> unset_server_storage_service(http_context& ctx);
future<> set_server_client_routes(http_context& ctx, sharded<service::client_routes_service>& cr);
future<> unset_server_client_routes(http_context& ctx);
future<> set_server_sstables_loader(http_context& ctx, sharded<sstables_loader>& sst_loader);
future<> unset_server_sstables_loader(http_context& ctx);
future<> set_server_view_builder(http_context& ctx, sharded<db::view::view_builder>& vb, sharded<gms::gossiper>& g);

View File

@@ -1,176 +0,0 @@
/*
* Copyright (C) 2025-present ScyllaDB
*
*/
/*
* SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
*/
#include <seastar/http/short_streams.hh>
#include "client_routes.hh"
#include "api/api.hh"
#include "service/storage_service.hh"
#include "service/client_routes.hh"
#include "utils/rjson.hh"
#include "api/api-doc/client_routes.json.hh"
using namespace seastar::httpd;
using namespace std::chrono_literals;
using namespace json;
extern logging::logger apilog;
namespace api {
static void validate_client_routes_endpoint(sharded<service::client_routes_service>& cr, sstring endpoint_name) {
if (!cr.local().get_feature_service().client_routes) {
apilog.warn("{}: called before the cluster feature was enabled", endpoint_name);
throw std::runtime_error(fmt::format("{} requires all nodes to support the CLIENT_ROUTES cluster feature", endpoint_name));
}
}
static sstring parse_string(const char* name, rapidjson::Value const& v) {
const auto it = v.FindMember(name);
if (it == v.MemberEnd()) {
throw bad_param_exception(fmt::format("Missing '{}'", name));
}
if (!it->value.IsString()) {
throw bad_param_exception(fmt::format("'{}' must be a string", name));
}
return {it->value.GetString(), it->value.GetStringLength()};
}
static std::optional<uint32_t> parse_port(const char* name, rapidjson::Value const& v) {
const auto it = v.FindMember(name);
if (it == v.MemberEnd()) {
return std::nullopt;
}
if (!it->value.IsInt()) {
throw bad_param_exception(fmt::format("'{}' must be an integer", name));
}
auto port = it->value.GetInt();
if (port < 1 || port > 65535) {
throw bad_param_exception(fmt::format("'{}' value={} is outside the allowed port range", name, port));
}
return port;
}
static std::vector<service::client_routes_service::client_route_entry> parse_set_client_array(const rapidjson::Document& root) {
if (!root.IsArray()) {
throw bad_param_exception("Body must be a JSON array");
}
std::vector<service::client_routes_service::client_route_entry> v;
v.reserve(root.GetArray().Size());
for (const auto& element : root.GetArray()) {
if (!element.IsObject()) { throw bad_param_exception("Each element must be object"); }
const auto port = parse_port("port", element);
const auto tls_port = parse_port("tls_port", element);
const auto alternator_port = parse_port("alternator_port", element);
const auto alternator_https_port = parse_port("alternator_https_port", element);
if (!port.has_value() && !tls_port.has_value() && !alternator_port.has_value() && !alternator_https_port.has_value()) {
throw bad_param_exception("At least one port field ('port', 'tls_port', 'alternator_port', 'alternator_https_port') must be specified");
}
v.emplace_back(
parse_string("connection_id", element),
utils::UUID{parse_string("host_id", element)},
parse_string("address", element),
port,
tls_port,
alternator_port,
alternator_https_port
);
}
return v;
}
static
future<json::json_return_type>
rest_set_client_routes(http_context& ctx, sharded<service::client_routes_service>& cr, std::unique_ptr<http::request> req) {
validate_client_routes_endpoint(cr, "rest_set_client_routes");
rapidjson::Document root;
auto content = co_await util::read_entire_stream_contiguous(*req->content_stream);
root.Parse(content.c_str());
co_await cr.local().set_client_routes(parse_set_client_array(root));
co_return seastar::json::json_void();
}
static std::vector<service::client_routes_service::client_route_key> parse_delete_client_array(const rapidjson::Document& root) {
if (!root.IsArray()) {
throw bad_param_exception("Body must be a JSON array");
}
std::vector<service::client_routes_service::client_route_key> v;
v.reserve(root.GetArray().Size());
for (const auto& element : root.GetArray()) {
v.emplace_back(
parse_string("connection_id", element),
utils::UUID{parse_string("host_id", element)}
);
}
return v;
}
static
future<json::json_return_type>
rest_delete_client_routes(http_context& ctx, sharded<service::client_routes_service>& cr, std::unique_ptr<http::request> req) {
validate_client_routes_endpoint(cr, "delete_client_routes");
rapidjson::Document root;
auto content = co_await util::read_entire_stream_contiguous(*req->content_stream);
root.Parse(content.c_str());
co_await cr.local().delete_client_routes(parse_delete_client_array(root));
co_return seastar::json::json_void();
}
static
future<json::json_return_type>
rest_get_client_routes(http_context& ctx, sharded<service::client_routes_service>& cr, std::unique_ptr<http::request> req) {
validate_client_routes_endpoint(cr, "get_client_routes");
co_return co_await cr.invoke_on(0, [] (service::client_routes_service& cr) -> future<json::json_return_type> {
co_return json::json_return_type(stream_range_as_array(co_await cr.get_client_routes(), [](const service::client_routes_service::client_route_entry & entry) {
seastar::httpd::client_routes_json::client_routes_entry obj;
obj.connection_id = entry.connection_id;
obj.host_id = fmt::to_string(entry.host_id);
obj.address = entry.address;
if (entry.port.has_value()) { obj.port = entry.port.value(); }
if (entry.tls_port.has_value()) { obj.tls_port = entry.tls_port.value(); }
if (entry.alternator_port.has_value()) { obj.alternator_port = entry.alternator_port.value(); }
if (entry.alternator_https_port.has_value()) { obj.alternator_https_port = entry.alternator_https_port.value(); }
return obj;
}));
});
}
void set_client_routes(http_context& ctx, routes& r, sharded<service::client_routes_service>& cr) {
seastar::httpd::client_routes_json::set_client_routes.set(r, [&ctx, &cr] (std::unique_ptr<seastar::http::request> req) {
return rest_set_client_routes(ctx, cr, std::move(req));
});
seastar::httpd::client_routes_json::delete_client_routes.set(r, [&ctx, &cr] (std::unique_ptr<seastar::http::request> req) {
return rest_delete_client_routes(ctx, cr, std::move(req));
});
seastar::httpd::client_routes_json::get_client_routes.set(r, [&ctx, &cr] (std::unique_ptr<seastar::http::request> req) {
return rest_get_client_routes(ctx, cr, std::move(req));
});
}
void unset_client_routes(http_context& ctx, routes& r) {
seastar::httpd::client_routes_json::set_client_routes.unset(r);
seastar::httpd::client_routes_json::delete_client_routes.unset(r);
seastar::httpd::client_routes_json::get_client_routes.unset(r);
}
}

View File

@@ -1,20 +0,0 @@
/*
* Copyright (C) 2025-present ScyllaDB
*
*/
/*
* SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
*/
#pragma once
#include <seastar/core/sharded.hh>
#include <seastar/json/json_elements.hh>
#include "api/api_init.hh"
namespace api {
void set_client_routes(http_context& ctx, httpd::routes& r, sharded<service::client_routes_service>& cr);
void unset_client_routes(http_context& ctx, httpd::routes& r);
}

View File

@@ -515,15 +515,6 @@ void set_sstables_loader(http_context& ctx, routes& r, sharded<sstables_loader>&
auto sstables = parsed.GetArray() |
std::views::transform([] (const auto& s) { return sstring(rjson::to_string_view(s)); }) |
std::ranges::to<std::vector>();
apilog.info("Restore invoked with following parameters: keyspace={}, table={}, endpoint={}, bucket={}, prefix={}, sstables_count={}, scope={}, primary_replica_only={}",
keyspace,
table,
endpoint,
bucket,
prefix,
sstables.size(),
scope,
primary_replica_only);
auto task_id = co_await sst_loader.local().download_new_sstables(keyspace, table, prefix, std::move(sstables), endpoint, bucket, scope, primary_replica_only);
co_return json::json_return_type(fmt::to_string(task_id));
});
@@ -556,13 +547,17 @@ void set_view_builder(http_context& ctx, routes& r, sharded<db::view::view_build
vp.insert(b.second);
}
}
std::vector<sstring> res;
replica::database& db = vb.local().get_db();
auto uuid = validate_table(db, ks, cf_name);
replica::column_family& cf = db.find_column_family(uuid);
co_return cf.get_index_manager().list_indexes()
| std::views::transform([] (const auto& i) { return i.metadata().name(); })
| std::views::filter([&vp] (const auto& n) { return vp.contains(secondary_index::index_table_name(n)); })
| std::ranges::to<std::vector>();
res.reserve(cf.get_index_manager().list_indexes().size());
for (auto&& i : cf.get_index_manager().list_indexes()) {
if (vp.contains(secondary_index::index_table_name(i.metadata().name()))) {
res.emplace_back(i.metadata().name());
}
}
co_return res;
});
}
@@ -893,9 +888,7 @@ rest_exclude_node(sharded<service::storage_service>& ss, std::unique_ptr<http::r
}
apilog.info("exclude_node: hosts={}", hosts);
co_await ss.local().run_with_no_api_lock([hosts = std::move(hosts)] (service::storage_service& ss) {
return ss.mark_excluded(hosts);
});
co_await ss.local().mark_excluded(hosts);
co_return json_void();
}
@@ -1805,100 +1798,84 @@ rest_bind(FuncType func, BindArgs&... args) {
return std::bind_front(func, std::ref(args)...);
}
// Hold the storage_service async gate for the duration of async REST
// handlers so stop() drains in-flight requests before teardown.
// Synchronous handlers don't yield and need no gate.
static seastar::httpd::future_json_function
gated(sharded<service::storage_service>& ss, seastar::httpd::future_json_function fn) {
return [fn = std::move(fn), &ss](std::unique_ptr<http::request> req) -> future<json::json_return_type> {
auto holder = ss.local().hold_async_gate();
co_return co_await fn(std::move(req));
};
}
static seastar::httpd::json_request_function
gated(sharded<service::storage_service>&, seastar::httpd::json_request_function fn) {
return fn;
}
void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_service>& ss, service::raft_group0_client& group0_client) {
ss::get_token_endpoint.set(r, gated(ss, rest_bind(rest_get_token_endpoint, ctx, ss)));
ss::toppartitions_generic.set(r, gated(ss, rest_bind(rest_toppartitions_generic, ctx)));
ss::get_release_version.set(r, gated(ss, rest_bind(rest_get_release_version, ss)));
ss::get_scylla_release_version.set(r, gated(ss, rest_bind(rest_get_scylla_release_version, ss)));
ss::get_schema_version.set(r, gated(ss, rest_bind(rest_get_schema_version, ss)));
ss::get_range_to_endpoint_map.set(r, gated(ss, rest_bind(rest_get_range_to_endpoint_map, ctx, ss)));
ss::get_pending_range_to_endpoint_map.set(r, gated(ss, rest_bind(rest_get_pending_range_to_endpoint_map, ctx)));
ss::describe_ring.set(r, gated(ss, rest_bind(rest_describe_ring, ctx, ss)));
ss::get_current_generation_number.set(r, gated(ss, rest_bind(rest_get_current_generation_number, ss)));
ss::get_natural_endpoints.set(r, gated(ss, rest_bind(rest_get_natural_endpoints, ctx, ss)));
ss::get_natural_endpoints_v2.set(r, gated(ss, rest_bind(rest_get_natural_endpoints_v2, ctx, ss)));
ss::cdc_streams_check_and_repair.set(r, gated(ss, rest_bind(rest_cdc_streams_check_and_repair, ss)));
ss::cleanup_all.set(r, gated(ss, rest_bind(rest_cleanup_all, ctx, ss)));
ss::reset_cleanup_needed.set(r, gated(ss, rest_bind(rest_reset_cleanup_needed, ctx, ss)));
ss::force_flush.set(r, gated(ss, rest_bind(rest_force_flush, ctx)));
ss::force_keyspace_flush.set(r, gated(ss, rest_bind(rest_force_keyspace_flush, ctx)));
ss::decommission.set(r, gated(ss, rest_bind(rest_decommission, ss)));
ss::move.set(r, gated(ss, rest_bind(rest_move, ss)));
ss::remove_node.set(r, gated(ss, rest_bind(rest_remove_node, ss)));
ss::exclude_node.set(r, gated(ss, rest_bind(rest_exclude_node, ss)));
ss::get_removal_status.set(r, gated(ss, rest_bind(rest_get_removal_status, ss)));
ss::force_remove_completion.set(r, gated(ss, rest_bind(rest_force_remove_completion, ss)));
ss::set_logging_level.set(r, gated(ss, rest_bind(rest_set_logging_level)));
ss::get_logging_levels.set(r, gated(ss, rest_bind(rest_get_logging_levels)));
ss::get_operation_mode.set(r, gated(ss, rest_bind(rest_get_operation_mode, ss)));
ss::is_starting.set(r, gated(ss, rest_bind(rest_is_starting, ss)));
ss::get_drain_progress.set(r, gated(ss, rest_bind(rest_get_drain_progress, ss)));
ss::drain.set(r, gated(ss, rest_bind(rest_drain, ss)));
ss::stop_gossiping.set(r, gated(ss, rest_bind(rest_stop_gossiping, ss)));
ss::start_gossiping.set(r, gated(ss, rest_bind(rest_start_gossiping, ss)));
ss::is_gossip_running.set(r, gated(ss, rest_bind(rest_is_gossip_running, ss)));
ss::stop_daemon.set(r, gated(ss, rest_bind(rest_stop_daemon)));
ss::is_initialized.set(r, gated(ss, rest_bind(rest_is_initialized, ss)));
ss::join_ring.set(r, gated(ss, rest_bind(rest_join_ring)));
ss::is_joined.set(r, gated(ss, rest_bind(rest_is_joined, ss)));
ss::is_incremental_backups_enabled.set(r, gated(ss, rest_bind(rest_is_incremental_backups_enabled, ctx)));
ss::set_incremental_backups_enabled.set(r, gated(ss, rest_bind(rest_set_incremental_backups_enabled, ctx)));
ss::rebuild.set(r, gated(ss, rest_bind(rest_rebuild, ss)));
ss::bulk_load.set(r, gated(ss, rest_bind(rest_bulk_load)));
ss::bulk_load_async.set(r, gated(ss, rest_bind(rest_bulk_load_async)));
ss::reschedule_failed_deletions.set(r, gated(ss, rest_bind(rest_reschedule_failed_deletions)));
ss::sample_key_range.set(r, gated(ss, rest_bind(rest_sample_key_range)));
ss::reset_local_schema.set(r, gated(ss, rest_bind(rest_reset_local_schema, ss)));
ss::set_trace_probability.set(r, gated(ss, rest_bind(rest_set_trace_probability)));
ss::get_trace_probability.set(r, gated(ss, rest_bind(rest_get_trace_probability)));
ss::get_slow_query_info.set(r, gated(ss, rest_bind(rest_get_slow_query_info)));
ss::set_slow_query.set(r, gated(ss, rest_bind(rest_set_slow_query)));
ss::deliver_hints.set(r, gated(ss, rest_bind(rest_deliver_hints)));
ss::get_cluster_name.set(r, gated(ss, rest_bind(rest_get_cluster_name, ss)));
ss::get_partitioner_name.set(r, gated(ss, rest_bind(rest_get_partitioner_name, ss)));
ss::get_tombstone_warn_threshold.set(r, gated(ss, rest_bind(rest_get_tombstone_warn_threshold)));
ss::set_tombstone_warn_threshold.set(r, gated(ss, rest_bind(rest_set_tombstone_warn_threshold)));
ss::get_tombstone_failure_threshold.set(r, gated(ss, rest_bind(rest_get_tombstone_failure_threshold)));
ss::set_tombstone_failure_threshold.set(r, gated(ss, rest_bind(rest_set_tombstone_failure_threshold)));
ss::get_batch_size_failure_threshold.set(r, gated(ss, rest_bind(rest_get_batch_size_failure_threshold)));
ss::set_batch_size_failure_threshold.set(r, gated(ss, rest_bind(rest_set_batch_size_failure_threshold)));
ss::set_hinted_handoff_throttle_in_kb.set(r, gated(ss, rest_bind(rest_set_hinted_handoff_throttle_in_kb)));
ss::get_exceptions.set(r, gated(ss, rest_bind(rest_get_exceptions, ss)));
ss::get_total_hints_in_progress.set(r, gated(ss, rest_bind(rest_get_total_hints_in_progress)));
ss::get_total_hints.set(r, gated(ss, rest_bind(rest_get_total_hints)));
ss::get_ownership.set(r, gated(ss, rest_bind(rest_get_ownership, ctx, ss)));
ss::get_effective_ownership.set(r, gated(ss, rest_bind(rest_get_effective_ownership, ctx, ss)));
ss::retrain_dict.set(r, gated(ss, rest_bind(rest_retrain_dict, ctx, ss, group0_client)));
ss::estimate_compression_ratios.set(r, gated(ss, rest_bind(rest_estimate_compression_ratios, ctx, ss)));
ss::sstable_info.set(r, gated(ss, rest_bind(rest_sstable_info, ctx)));
ss::reload_raft_topology_state.set(r, gated(ss, rest_bind(rest_reload_raft_topology_state, ss, group0_client)));
ss::upgrade_to_raft_topology.set(r, gated(ss, rest_bind(rest_upgrade_to_raft_topology, ss)));
ss::raft_topology_upgrade_status.set(r, gated(ss, rest_bind(rest_raft_topology_upgrade_status, ss)));
ss::raft_topology_get_cmd_status.set(r, gated(ss, rest_bind(rest_raft_topology_get_cmd_status, ss)));
ss::move_tablet.set(r, gated(ss, rest_bind(rest_move_tablet, ctx, ss)));
ss::add_tablet_replica.set(r, gated(ss, rest_bind(rest_add_tablet_replica, ctx, ss)));
ss::del_tablet_replica.set(r, gated(ss, rest_bind(rest_del_tablet_replica, ctx, ss)));
ss::repair_tablet.set(r, gated(ss, rest_bind(rest_repair_tablet, ctx, ss)));
ss::tablet_balancing_enable.set(r, gated(ss, rest_bind(rest_tablet_balancing_enable, ss)));
ss::quiesce_topology.set(r, gated(ss, rest_bind(rest_quiesce_topology, ss)));
sp::get_schema_versions.set(r, gated(ss, rest_bind(rest_get_schema_versions, ss)));
ss::drop_quarantined_sstables.set(r, gated(ss, rest_bind(rest_drop_quarantined_sstables, ctx, ss)));
ss::get_token_endpoint.set(r, rest_bind(rest_get_token_endpoint, ctx, ss));
ss::toppartitions_generic.set(r, rest_bind(rest_toppartitions_generic, ctx));
ss::get_release_version.set(r, rest_bind(rest_get_release_version, ss));
ss::get_scylla_release_version.set(r, rest_bind(rest_get_scylla_release_version, ss));
ss::get_schema_version.set(r, rest_bind(rest_get_schema_version, ss));
ss::get_range_to_endpoint_map.set(r, rest_bind(rest_get_range_to_endpoint_map, ctx, ss));
ss::get_pending_range_to_endpoint_map.set(r, rest_bind(rest_get_pending_range_to_endpoint_map, ctx));
ss::describe_ring.set(r, rest_bind(rest_describe_ring, ctx, ss));
ss::get_current_generation_number.set(r, rest_bind(rest_get_current_generation_number, ss));
ss::get_natural_endpoints.set(r, rest_bind(rest_get_natural_endpoints, ctx, ss));
ss::get_natural_endpoints_v2.set(r, rest_bind(rest_get_natural_endpoints_v2, ctx, ss));
ss::cdc_streams_check_and_repair.set(r, rest_bind(rest_cdc_streams_check_and_repair, ss));
ss::cleanup_all.set(r, rest_bind(rest_cleanup_all, ctx, ss));
ss::reset_cleanup_needed.set(r, rest_bind(rest_reset_cleanup_needed, ctx, ss));
ss::force_flush.set(r, rest_bind(rest_force_flush, ctx));
ss::force_keyspace_flush.set(r, rest_bind(rest_force_keyspace_flush, ctx));
ss::decommission.set(r, rest_bind(rest_decommission, ss));
ss::move.set(r, rest_bind(rest_move, ss));
ss::remove_node.set(r, rest_bind(rest_remove_node, ss));
ss::exclude_node.set(r, rest_bind(rest_exclude_node, ss));
ss::get_removal_status.set(r, rest_bind(rest_get_removal_status, ss));
ss::force_remove_completion.set(r, rest_bind(rest_force_remove_completion, ss));
ss::set_logging_level.set(r, rest_bind(rest_set_logging_level));
ss::get_logging_levels.set(r, rest_bind(rest_get_logging_levels));
ss::get_operation_mode.set(r, rest_bind(rest_get_operation_mode, ss));
ss::is_starting.set(r, rest_bind(rest_is_starting, ss));
ss::get_drain_progress.set(r, rest_bind(rest_get_drain_progress, ss));
ss::drain.set(r, rest_bind(rest_drain, ss));
ss::stop_gossiping.set(r, rest_bind(rest_stop_gossiping, ss));
ss::start_gossiping.set(r, rest_bind(rest_start_gossiping, ss));
ss::is_gossip_running.set(r, rest_bind(rest_is_gossip_running, ss));
ss::stop_daemon.set(r, rest_bind(rest_stop_daemon));
ss::is_initialized.set(r, rest_bind(rest_is_initialized, ss));
ss::join_ring.set(r, rest_bind(rest_join_ring));
ss::is_joined.set(r, rest_bind(rest_is_joined, ss));
ss::is_incremental_backups_enabled.set(r, rest_bind(rest_is_incremental_backups_enabled, ctx));
ss::set_incremental_backups_enabled.set(r, rest_bind(rest_set_incremental_backups_enabled, ctx));
ss::rebuild.set(r, rest_bind(rest_rebuild, ss));
ss::bulk_load.set(r, rest_bind(rest_bulk_load));
ss::bulk_load_async.set(r, rest_bind(rest_bulk_load_async));
ss::reschedule_failed_deletions.set(r, rest_bind(rest_reschedule_failed_deletions));
ss::sample_key_range.set(r, rest_bind(rest_sample_key_range));
ss::reset_local_schema.set(r, rest_bind(rest_reset_local_schema, ss));
ss::set_trace_probability.set(r, rest_bind(rest_set_trace_probability));
ss::get_trace_probability.set(r, rest_bind(rest_get_trace_probability));
ss::get_slow_query_info.set(r, rest_bind(rest_get_slow_query_info));
ss::set_slow_query.set(r, rest_bind(rest_set_slow_query));
ss::deliver_hints.set(r, rest_bind(rest_deliver_hints));
ss::get_cluster_name.set(r, rest_bind(rest_get_cluster_name, ss));
ss::get_partitioner_name.set(r, rest_bind(rest_get_partitioner_name, ss));
ss::get_tombstone_warn_threshold.set(r, rest_bind(rest_get_tombstone_warn_threshold));
ss::set_tombstone_warn_threshold.set(r, rest_bind(rest_set_tombstone_warn_threshold));
ss::get_tombstone_failure_threshold.set(r, rest_bind(rest_get_tombstone_failure_threshold));
ss::set_tombstone_failure_threshold.set(r, rest_bind(rest_set_tombstone_failure_threshold));
ss::get_batch_size_failure_threshold.set(r, rest_bind(rest_get_batch_size_failure_threshold));
ss::set_batch_size_failure_threshold.set(r, rest_bind(rest_set_batch_size_failure_threshold));
ss::set_hinted_handoff_throttle_in_kb.set(r, rest_bind(rest_set_hinted_handoff_throttle_in_kb));
ss::get_exceptions.set(r, rest_bind(rest_get_exceptions, ss));
ss::get_total_hints_in_progress.set(r, rest_bind(rest_get_total_hints_in_progress));
ss::get_total_hints.set(r, rest_bind(rest_get_total_hints));
ss::get_ownership.set(r, rest_bind(rest_get_ownership, ctx, ss));
ss::get_effective_ownership.set(r, rest_bind(rest_get_effective_ownership, ctx, ss));
ss::retrain_dict.set(r, rest_bind(rest_retrain_dict, ctx, ss, group0_client));
ss::estimate_compression_ratios.set(r, rest_bind(rest_estimate_compression_ratios, ctx, ss));
ss::sstable_info.set(r, rest_bind(rest_sstable_info, ctx));
ss::reload_raft_topology_state.set(r, rest_bind(rest_reload_raft_topology_state, ss, group0_client));
ss::upgrade_to_raft_topology.set(r, rest_bind(rest_upgrade_to_raft_topology, ss));
ss::raft_topology_upgrade_status.set(r, rest_bind(rest_raft_topology_upgrade_status, ss));
ss::raft_topology_get_cmd_status.set(r, rest_bind(rest_raft_topology_get_cmd_status, ss));
ss::move_tablet.set(r, rest_bind(rest_move_tablet, ctx, ss));
ss::add_tablet_replica.set(r, rest_bind(rest_add_tablet_replica, ctx, ss));
ss::del_tablet_replica.set(r, rest_bind(rest_del_tablet_replica, ctx, ss));
ss::repair_tablet.set(r, rest_bind(rest_repair_tablet, ctx, ss));
ss::tablet_balancing_enable.set(r, rest_bind(rest_tablet_balancing_enable, ss));
ss::quiesce_topology.set(r, rest_bind(rest_quiesce_topology, ss));
sp::get_schema_versions.set(r, rest_bind(rest_get_schema_versions, ss));
ss::drop_quarantined_sstables.set(r, rest_bind(rest_drop_quarantined_sstables, ctx, ss));
}
void unset_storage_service(http_context& ctx, routes& r) {
@@ -2043,14 +2020,12 @@ void set_snapshot(http_context& ctx, routes& r, sharded<db::snapshot_ctl>& snap_
auto tag = req->get_query_param("tag");
auto column_families = split(req->get_query_param("cf"), ",");
auto sfopt = req->get_query_param("sf");
db::snapshot_options opts = {
.skip_flush = strcasecmp(sfopt.c_str(), "true") == 0,
};
auto sf = db::snapshot_ctl::skip_flush(strcasecmp(sfopt.c_str(), "true") == 0);
std::vector<sstring> keynames = split(req->get_query_param("kn"), ",");
try {
if (column_families.empty()) {
co_await snap_ctl.local().take_snapshot(tag, keynames, opts);
co_await snap_ctl.local().take_snapshot(tag, keynames, sf);
} else {
if (keynames.empty()) {
throw httpd::bad_param_exception("The keyspace of column families must be specified");
@@ -2058,7 +2033,7 @@ void set_snapshot(http_context& ctx, routes& r, sharded<db::snapshot_ctl>& snap_
if (keynames.size() > 1) {
throw httpd::bad_param_exception("Only one keyspace allowed when specifying a column family");
}
co_await snap_ctl.local().take_column_family_snapshot(keynames[0], column_families, tag, opts);
co_await snap_ctl.local().take_column_family_snapshot(keynames[0], column_families, tag, sf);
}
co_return json_void();
} catch (...) {
@@ -2093,8 +2068,7 @@ void set_snapshot(http_context& ctx, routes& r, sharded<db::snapshot_ctl>& snap_
auto info = parse_scrub_options(ctx, std::move(req));
if (!info.snapshot_tag.empty()) {
db::snapshot_options opts = {.skip_flush = false};
co_await snap_ctl.local().take_column_family_snapshot(info.keyspace, info.column_families, info.snapshot_tag, opts);
co_await snap_ctl.local().take_column_family_snapshot(info.keyspace, info.column_families, info.snapshot_tag, db::snapshot_ctl::skip_flush::no);
}
compaction::compaction_stats stats;

View File

@@ -9,7 +9,6 @@
#include <seastar/core/chunked_fifo.hh>
#include <seastar/core/coroutine.hh>
#include <seastar/coroutine/exception.hh>
#include <seastar/coroutine/maybe_yield.hh>
#include <seastar/http/exception.hh>
#include "task_manager.hh"
@@ -265,7 +264,7 @@ void set_task_manager(http_context& ctx, routes& r, sharded<tasks::task_manager>
if (id) {
module->unregister_task(id);
}
co_await coroutine::maybe_yield();
co_await maybe_yield();
}
});
co_return json_void();

View File

@@ -146,8 +146,7 @@ void set_tasks_compaction_module(http_context& ctx, routes& r, sharded<service::
auto info = parse_scrub_options(ctx, std::move(req));
if (!info.snapshot_tag.empty()) {
db::snapshot_options opts = {.skip_flush = false};
co_await snap_ctl.local().take_column_family_snapshot(info.keyspace, info.column_families, info.snapshot_tag, opts);
co_await snap_ctl.local().take_column_family_snapshot(info.keyspace, info.column_families, info.snapshot_tag, db::snapshot_ctl::skip_flush::no);
}
auto& compaction_module = db.local().get_compaction_manager().get_task_manager_module();

View File

@@ -53,10 +53,10 @@ static std::string json_escape(std::string_view str) {
}
future<> audit_syslog_storage_helper::syslog_send_helper(temporary_buffer<char> msg) {
future<> audit_syslog_storage_helper::syslog_send_helper(const sstring& msg) {
try {
auto lock = co_await get_units(_semaphore, 1, std::chrono::hours(1));
co_await _sender.send(_syslog_address, std::span(&msg, 1));
co_await _sender.send(_syslog_address, net::packet{msg.data(), msg.size()});
}
catch (const std::exception& e) {
auto error_msg = seastar::format(
@@ -90,7 +90,7 @@ future<> audit_syslog_storage_helper::start(const db::config& cfg) {
co_return;
}
co_await syslog_send_helper(temporary_buffer<char>::copy_of("Initializing syslog audit backend."));
co_await syslog_send_helper("Initializing syslog audit backend.");
}
future<> audit_syslog_storage_helper::stop() {
@@ -120,7 +120,7 @@ future<> audit_syslog_storage_helper::write(const audit_info* audit_info,
audit_info->table(),
username);
co_await syslog_send_helper(std::move(msg).release());
co_await syslog_send_helper(msg);
}
future<> audit_syslog_storage_helper::write_login(const sstring& username,
@@ -139,7 +139,7 @@ future<> audit_syslog_storage_helper::write_login(const sstring& username,
client_ip,
username);
co_await syslog_send_helper(std::move(msg).release());
co_await syslog_send_helper(msg.c_str());
}
}

View File

@@ -26,7 +26,7 @@ class audit_syslog_storage_helper : public storage_helper {
net::datagram_channel _sender;
seastar::semaphore _semaphore;
future<> syslog_send_helper(seastar::temporary_buffer<char> msg);
future<> syslog_send_helper(const sstring& msg);
public:
explicit audit_syslog_storage_helper(cql3::query_processor&, service::migration_manager&);
virtual ~audit_syslog_storage_helper();

View File

@@ -9,6 +9,7 @@
#include "auth/allow_all_authenticator.hh"
#include "service/migration_manager.hh"
#include "utils/alien_worker.hh"
#include "utils/class_registrator.hh"
namespace auth {
@@ -22,6 +23,7 @@ static const class_registrator<
cql3::query_processor&,
::service::raft_group0_client&,
::service::migration_manager&,
cache&> registration("org.apache.cassandra.auth.AllowAllAuthenticator");
cache&,
utils::alien_worker&> registration("org.apache.cassandra.auth.AllowAllAuthenticator");
}

View File

@@ -14,6 +14,7 @@
#include "auth/authenticator.hh"
#include "auth/cache.hh"
#include "auth/common.hh"
#include "utils/alien_worker.hh"
namespace cql3 {
class query_processor;
@@ -29,7 +30,7 @@ extern const std::string_view allow_all_authenticator_name;
class allow_all_authenticator final : public authenticator {
public:
allow_all_authenticator(cql3::query_processor&, ::service::raft_group0_client&, ::service::migration_manager&, cache&) {
allow_all_authenticator(cql3::query_processor&, ::service::raft_group0_client&, ::service::migration_manager&, cache&, utils::alien_worker&) {
}
virtual future<> start() override {

View File

@@ -15,7 +15,6 @@
#include "db/system_keyspace.hh"
#include "schema/schema.hh"
#include <iterator>
#include <seastar/core/abort_source.hh>
#include <seastar/coroutine/maybe_yield.hh>
#include <seastar/core/format.hh>
@@ -23,11 +22,9 @@ namespace auth {
logging::logger logger("auth-cache");
cache::cache(cql3::query_processor& qp, abort_source& as) noexcept
cache::cache(cql3::query_processor& qp) noexcept
: _current_version(0)
, _qp(qp)
, _loading_sem(1)
, _as(as) {
, _qp(qp) {
}
lw_shared_ptr<const cache::role_record> cache::get(const role_name_t& role) const noexcept {
@@ -119,8 +116,6 @@ future<> cache::load_all() {
co_return;
}
SCYLLA_ASSERT(this_shard_id() == 0);
auto units = co_await get_units(_loading_sem, 1, _as);
++_current_version;
logger.info("Loading all roles");
@@ -151,9 +146,6 @@ future<> cache::load_roles(std::unordered_set<role_name_t> roles) {
if (legacy_mode(_qp)) {
co_return;
}
SCYLLA_ASSERT(this_shard_id() == 0);
auto units = co_await get_units(_loading_sem, 1, _as);
for (const auto& name : roles) {
logger.info("Loading role {}", name);
auto role = co_await fetch_role(name);

View File

@@ -8,7 +8,6 @@
#pragma once
#include <seastar/core/abort_source.hh>
#include <unordered_set>
#include <unordered_map>
@@ -16,7 +15,6 @@
#include <seastar/core/future.hh>
#include <seastar/core/sharded.hh>
#include <seastar/core/shared_ptr.hh>
#include <seastar/core/semaphore.hh>
#include <absl/container/flat_hash_map.h>
@@ -43,7 +41,7 @@ public:
version_tag_t version; // used for seamless cache reloads
};
explicit cache(cql3::query_processor& qp, abort_source& as) noexcept;
explicit cache(cql3::query_processor& qp) noexcept;
lw_shared_ptr<const role_record> get(const role_name_t& role) const noexcept;
future<> load_all();
future<> load_roles(std::unordered_set<role_name_t> roles);
@@ -54,8 +52,6 @@ private:
roles_map _roles;
version_tag_t _current_version;
cql3::query_processor& _qp;
semaphore _loading_sem;
abort_source& _as;
future<lw_shared_ptr<role_record>> fetch_role(const role_name_t& role) const;
future<> prune_all() noexcept;

View File

@@ -8,7 +8,6 @@
*/
#include "auth/certificate_authenticator.hh"
#include "auth/cache.hh"
#include <boost/regex.hpp>
#include <fmt/ranges.h>
@@ -35,13 +34,13 @@ static const class_registrator<auth::authenticator
, cql3::query_processor&
, ::service::raft_group0_client&
, ::service::migration_manager&
, auth::cache&> cert_auth_reg(CERT_AUTH_NAME);
, utils::alien_worker&> cert_auth_reg(CERT_AUTH_NAME);
enum class auth::certificate_authenticator::query_source {
subject, altname
};
auth::certificate_authenticator::certificate_authenticator(cql3::query_processor& qp, ::service::raft_group0_client&, ::service::migration_manager&, auth::cache&)
auth::certificate_authenticator::certificate_authenticator(cql3::query_processor& qp, ::service::raft_group0_client&, ::service::migration_manager&, utils::alien_worker&)
: _queries([&] {
auto& conf = qp.db().get_config();
auto queries = conf.auth_certificate_role_queries();
@@ -76,9 +75,9 @@ auth::certificate_authenticator::certificate_authenticator(cql3::query_processor
throw std::invalid_argument(fmt::format("Invalid source: {}", map.at(cfg_source_attr)));
}
continue;
} catch (const std::out_of_range&) {
} catch (std::out_of_range&) {
// just fallthrough
} catch (const boost::regex_error&) {
} catch (boost::regex_error&) {
std::throw_with_nested(std::invalid_argument(fmt::format("Invalid query expression: {}", map.at(cfg_query_attr))));
}
}

View File

@@ -10,6 +10,7 @@
#pragma once
#include "auth/authenticator.hh"
#include "utils/alien_worker.hh"
#include <boost/regex_fwd.hpp> // IWYU pragma: keep
namespace cql3 {
@@ -25,15 +26,13 @@ class raft_group0_client;
namespace auth {
class cache;
extern const std::string_view certificate_authenticator_name;
class certificate_authenticator : public authenticator {
enum class query_source;
std::vector<std::pair<query_source, boost::regex>> _queries;
public:
certificate_authenticator(cql3::query_processor&, ::service::raft_group0_client&, ::service::migration_manager&, cache&);
certificate_authenticator(cql3::query_processor&, ::service::raft_group0_client&, ::service::migration_manager&, utils::alien_worker&);
~certificate_authenticator();
future<> start() override;

View File

@@ -94,7 +94,7 @@ static future<> create_legacy_metadata_table_if_missing_impl(
try {
co_return co_await mm.announce(co_await ::service::prepare_new_column_family_announcement(qp.proxy(), table, ts),
std::move(group0_guard), format("auth: create {} metadata table", table->cf_name()));
} catch (const exceptions::already_exists_exception&) {}
} catch (exceptions::already_exists_exception&) {}
}
}

View File

@@ -256,7 +256,7 @@ future<> default_authorizer::revoke_all(std::string_view role_name, ::service::g
} else {
co_await collect_mutations(_qp, mc, query, {sstring(role_name)});
}
} catch (const exceptions::request_execution_exception& e) {
} catch (exceptions::request_execution_exception& e) {
alogger.warn("CassandraAuthorizer failed to revoke all permissions of {}: {}", role_name, e);
}
}
@@ -293,13 +293,13 @@ future<> default_authorizer::revoke_all_legacy(const resource& resource) {
[resource](auto ep) {
try {
std::rethrow_exception(ep);
} catch (const exceptions::request_execution_exception& e) {
} catch (exceptions::request_execution_exception& e) {
alogger.warn("CassandraAuthorizer failed to revoke all permissions on {}: {}", resource, e);
}
});
});
} catch (const exceptions::request_execution_exception& e) {
} catch (exceptions::request_execution_exception& e) {
alogger.warn("CassandraAuthorizer failed to revoke all permissions on {}: {}", resource, e);
return make_ready_future();
}

View File

@@ -32,8 +32,6 @@ namespace {
logger mylog{"ldap_role_manager"}; // `log` is taken by math.
constexpr std::string_view user_placeholder = "{USER}";
struct url_desc_deleter {
void operator()(LDAPURLDesc *p) {
ldap_free_urldesc(p);
@@ -42,141 +40,9 @@ struct url_desc_deleter {
using url_desc_ptr = std::unique_ptr<LDAPURLDesc, url_desc_deleter>;
/// Escapes LDAP filter assertion value per RFC 4515 Section 3.
/// The characters *, (, ), \, and NUL must be backslash-hex-escaped
/// to prevent filter injection when interpolating untrusted input.
sstring escape_filter_value(std::string_view value) {
size_t escapable_chars = 0;
for (unsigned char ch : value) {
switch (ch) {
case '*':
case '(':
case ')':
case '\\':
case '\0':
++escapable_chars;
break;
default:
break;
}
}
if (escapable_chars == 0) {
return sstring(value);
}
sstring escaped(value.size() + escapable_chars * 2, 0);
size_t pos = 0;
for (unsigned char ch : value) {
switch (ch) {
case '*':
escaped[pos++] = '\\';
escaped[pos++] = '2';
escaped[pos++] = 'a';
break;
case '(':
escaped[pos++] = '\\';
escaped[pos++] = '2';
escaped[pos++] = '8';
break;
case ')':
escaped[pos++] = '\\';
escaped[pos++] = '2';
escaped[pos++] = '9';
break;
case '\\':
escaped[pos++] = '\\';
escaped[pos++] = '5';
escaped[pos++] = 'c';
break;
case '\0':
escaped[pos++] = '\\';
escaped[pos++] = '0';
escaped[pos++] = '0';
break;
default:
escaped[pos++] = static_cast<char>(ch);
break;
}
}
return escaped;
}
/// Percent-encodes characters that are not RFC 3986 "unreserved"
/// (ALPHA / DIGIT / '-' / '.' / '_' / '~').
///
/// Uses explicit ASCII range checks instead of std::isalnum() because
/// the latter is locale-dependent and could pass non-ASCII characters
/// through unencoded under certain locale settings.
///
/// This is applied AFTER RFC 4515 filter escaping when the value is
/// substituted into an LDAP URL. It serves two purposes:
/// 1. Prevents URL-level metacharacters ('?', '#') from breaking
/// the URL structure parsed by ldap_url_parse.
/// 2. Prevents percent-decoding (which ldap_url_parse performs on
/// each component) from undoing the filter escaping, e.g. a
/// literal "%2a" in the username would otherwise decode to '*'.
sstring percent_encode_for_url(std::string_view value) {
static constexpr char hex[] = "0123456789ABCDEF";
size_t chars_to_encode = 0;
for (unsigned char ch : value) {
if (!((ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z') || (ch >= '0' && ch <= '9')
|| ch == '-' || ch == '.' || ch == '_' || ch == '~')) {
++chars_to_encode;
}
}
if (chars_to_encode == 0) {
return sstring(value);
}
sstring encoded(value.size() + chars_to_encode * 2, 0);
size_t pos = 0;
for (unsigned char ch : value) {
if ((ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z') || (ch >= '0' && ch <= '9')
|| ch == '-' || ch == '.' || ch == '_' || ch == '~') {
encoded[pos++] = static_cast<char>(ch);
} else {
encoded[pos++] = '%';
encoded[pos++] = hex[ch >> 4];
encoded[pos++] = hex[ch & 0x0F];
}
}
return encoded;
}
/// Checks whether \p sentinel appears in any parsed URL component
/// other than the filter (host, DN, attributes, extensions).
bool sentinel_outside_filter(const LDAPURLDesc& desc, std::string_view sentinel) {
auto contains = [&](const char* field) {
return field && std::string_view(field).find(sentinel) != std::string_view::npos;
};
if (contains(desc.lud_host) || contains(desc.lud_dn)) {
return true;
}
if (desc.lud_attrs) {
for (int i = 0; desc.lud_attrs[i]; ++i) {
if (contains(desc.lud_attrs[i])) {
return true;
}
}
}
if (desc.lud_exts) {
for (int i = 0; desc.lud_exts[i]; ++i) {
if (contains(desc.lud_exts[i])) {
return true;
}
}
}
return false;
}
url_desc_ptr parse_url(const sstring& url) {
url_desc_ptr parse_url(std::string_view url) {
LDAPURLDesc *desc = nullptr;
if (ldap_url_parse(url.c_str(), &desc)) {
if (ldap_url_parse(url.data(), &desc)) {
mylog.error("error in ldap_url_parse({})", url);
}
return url_desc_ptr(desc);
@@ -249,7 +115,6 @@ const resource_set& ldap_role_manager::protected_resources() const {
}
future<> ldap_role_manager::start() {
validate_query_template();
if (!parse_url(get_url("dummy-user"))) { // Just need host and port -- any user should do.
return make_exception_future(
std::runtime_error(fmt::format("error getting LDAP server address from template {}", _query_template)));
@@ -334,7 +199,7 @@ future<> ldap_role_manager::revoke(std::string_view, std::string_view, ::service
}
future<role_set> ldap_role_manager::query_granted(std::string_view grantee_name, recursive_role_query) {
const auto url = get_url(grantee_name);
const auto url = get_url(grantee_name.data());
auto desc = parse_url(url);
if (!desc) {
return make_exception_future<role_set>(std::runtime_error(format("Error parsing URL {}", url)));
@@ -466,46 +331,7 @@ future<> ldap_role_manager::remove_attribute(std::string_view role_name, std::st
}
sstring ldap_role_manager::get_url(std::string_view user) const {
// Two-layer encoding protects against injection:
// 1. RFC 4515 filter escaping neutralizes filter metacharacters (*, (, ), \, NUL)
// 2. URL percent-encoding prevents URL structure injection (?, #) and blocks
// ldap_url_parse's percent-decoding from undoing the filter escaping (%2a -> *)
return boost::replace_all_copy(_query_template, user_placeholder,
percent_encode_for_url(escape_filter_value(user)));
}
void ldap_role_manager::validate_query_template() const {
if (_query_template.find(user_placeholder) == sstring::npos) {
return;
}
// Substitute {USER} with a sentinel and let ldap_url_parse tell us
// which URL component it landed in. The sentinel is purely
// alphanumeric so it cannot affect URL parsing.
static constexpr std::string_view sentinel = "XLDAPSENTINELX";
sstring test_url = boost::replace_all_copy(_query_template, user_placeholder, sentinel);
auto desc = parse_url(test_url);
if (!desc) {
throw url_error(format("LDAP URL template is not a valid URL when {{USER}} is substituted: {}", _query_template));
}
// The sentinel must appear in the filter ...
if (!desc->lud_filter
|| std::string_view(desc->lud_filter).find(sentinel) == std::string_view::npos) {
throw url_error(format(
"LDAP URL template places {{USER}} outside the filter component. "
"RFC 4515 filter escaping only protects the filter; other components "
"(e.g. the base DN) require different escaping and are not supported. "
"Template: {}", _query_template));
}
// ... and nowhere else (host, DN, attributes, extensions).
if (sentinel_outside_filter(*desc, sentinel)) {
throw url_error(format(
"LDAP URL template places {{USER}} outside the filter component. "
"RFC 4515 filter escaping only protects the filter; other components "
"(e.g. the host) require different escaping and are not supported. "
"Template: {}", _query_template));
}
return boost::replace_all_copy(_query_template, "{USER}", user);
}
future<std::vector<cql3::description>> ldap_role_manager::describe_role_grants() {

View File

@@ -107,9 +107,6 @@ class ldap_role_manager : public role_manager {
/// Macro-expands _query_template, returning the result.
sstring get_url(std::string_view user) const;
/// Validates that {USER}, if present, is used only in the LDAP filter component.
void validate_query_template() const;
/// Used to auto-create roles returned by ldap.
future<> create_role(std::string_view role_name);

View File

@@ -49,7 +49,8 @@ static const class_registrator<
cql3::query_processor&,
::service::raft_group0_client&,
::service::migration_manager&,
cache&> password_auth_reg("org.apache.cassandra.auth.PasswordAuthenticator");
cache&,
utils::alien_worker&> password_auth_reg("org.apache.cassandra.auth.PasswordAuthenticator");
static thread_local auto rng_for_salt = std::default_random_engine(std::random_device{}());
@@ -63,13 +64,14 @@ std::string password_authenticator::default_superuser(const db::config& cfg) {
password_authenticator::~password_authenticator() {
}
password_authenticator::password_authenticator(cql3::query_processor& qp, ::service::raft_group0_client& g0, ::service::migration_manager& mm, cache& cache)
password_authenticator::password_authenticator(cql3::query_processor& qp, ::service::raft_group0_client& g0, ::service::migration_manager& mm, cache& cache, utils::alien_worker& hashing_worker)
: _qp(qp)
, _group0_client(g0)
, _migration_manager(mm)
, _cache(cache)
, _stopped(make_ready_future<>())
, _superuser(default_superuser(qp.db().get_config()))
, _hashing_worker(hashing_worker)
{}
static bool has_salted_hash(const cql3::untyped_result_set_row& row) {
@@ -328,18 +330,20 @@ future<authenticated_user> password_authenticator::authenticate(
}
salted_hash = role->salted_hash;
}
const bool password_match = co_await passwords::check(password, *salted_hash);
const bool password_match = co_await _hashing_worker.submit<bool>([password = std::move(password), salted_hash] {
return passwords::check(password, *salted_hash);
});
if (!password_match) {
throw exceptions::authentication_exception("Username and/or password are incorrect");
}
co_return username;
} catch (const std::system_error &) {
} catch (std::system_error &) {
std::throw_with_nested(exceptions::authentication_exception("Could not verify password"));
} catch (const exceptions::request_execution_exception& e) {
} catch (exceptions::request_execution_exception& e) {
std::throw_with_nested(exceptions::authentication_exception(e.what()));
} catch (const exceptions::authentication_exception& e) {
} catch (exceptions::authentication_exception& e) {
std::throw_with_nested(e);
} catch (const exceptions::unavailable_exception& e) {
} catch (exceptions::unavailable_exception& e) {
std::throw_with_nested(exceptions::authentication_exception(e.get_message()));
} catch (...) {
std::throw_with_nested(exceptions::authentication_exception("authentication failed"));

View File

@@ -18,6 +18,7 @@
#include "auth/passwords.hh"
#include "auth/cache.hh"
#include "service/raft/raft_group0_client.hh"
#include "utils/alien_worker.hh"
namespace db {
class config;
@@ -48,12 +49,13 @@ class password_authenticator : public authenticator {
shared_promise<> _superuser_created_promise;
// We used to also support bcrypt, SHA-256, and MD5 (ref. scylladb#24524).
constexpr static auth::passwords::scheme _scheme = passwords::scheme::sha_512;
utils::alien_worker& _hashing_worker;
public:
static db::consistency_level consistency_for_user(std::string_view role_name);
static std::string default_superuser(const db::config&);
password_authenticator(cql3::query_processor&, ::service::raft_group0_client&, ::service::migration_manager&, cache&);
password_authenticator(cql3::query_processor&, ::service::raft_group0_client&, ::service::migration_manager&, cache&, utils::alien_worker&);
~password_authenticator();

View File

@@ -7,8 +7,6 @@
*/
#include "auth/passwords.hh"
#include "utils/crypt_sha512.hh"
#include <seastar/core/coroutine.hh>
#include <cerrno>
@@ -23,46 +21,25 @@ static thread_local crypt_data tlcrypt = {};
namespace detail {
void verify_hashing_output(const char * res) {
if (!res || (res[0] == '*')) {
throw std::system_error(errno, std::system_category());
}
}
void verify_scheme(scheme scheme) {
const sstring random_part_of_salt = "aaaabbbbccccdddd";
const sstring salt = sstring(prefix_for_scheme(scheme)) + random_part_of_salt;
const char* e = crypt_r("fisk", salt.c_str(), &tlcrypt);
try {
verify_hashing_output(e);
} catch (const std::system_error& ex) {
throw no_supported_schemes();
if (e && (e[0] != '*')) {
return;
}
throw no_supported_schemes();
}
sstring hash_with_salt(const sstring& pass, const sstring& salt) {
auto res = crypt_r(pass.c_str(), salt.c_str(), &tlcrypt);
verify_hashing_output(res);
return res;
}
seastar::future<sstring> hash_with_salt_async(const sstring& pass, const sstring& salt) {
sstring res;
// Only SHA-512 hashes for passphrases shorter than 256 bytes can be computed using
// the __crypt_sha512 method. For other computations, we fall back to the
// crypt_r implementation from `<crypt.h>`, which can stall.
if (salt.starts_with(prefix_for_scheme(scheme::sha_512)) && pass.size() <= 255) {
char buf[128];
const char * output_ptr = co_await __crypt_sha512(pass.c_str(), salt.c_str(), buf);
verify_hashing_output(output_ptr);
res = output_ptr;
} else {
const char * output_ptr = crypt_r(pass.c_str(), salt.c_str(), &tlcrypt);
verify_hashing_output(output_ptr);
res = output_ptr;
if (!res || (res[0] == '*')) {
throw std::system_error(errno, std::system_category());
}
co_return res;
return res;
}
std::string_view prefix_for_scheme(scheme c) noexcept {
@@ -81,9 +58,8 @@ no_supported_schemes::no_supported_schemes()
: std::runtime_error("No allowed hashing schemes are supported on this system") {
}
seastar::future<bool> check(const sstring& pass, const sstring& salted_hash) {
const auto pwd_hash = co_await detail::hash_with_salt_async(pass, salted_hash);
co_return pwd_hash == salted_hash;
bool check(const sstring& pass, const sstring& salted_hash) {
return detail::hash_with_salt(pass, salted_hash) == salted_hash;
}
} // namespace auth::passwords

View File

@@ -11,7 +11,6 @@
#include <random>
#include <stdexcept>
#include <seastar/core/future.hh>
#include <seastar/core/sstring.hh>
#include "seastarx.hh"
@@ -76,23 +75,11 @@ sstring generate_salt(RandomNumberEngine& g, scheme scheme) {
///
/// Hash a password combined with an implementation-specific salt string.
/// Deprecated in favor of `hash_with_salt_async`. This function is still used
/// when generating password hashes for storage to ensure that
/// `hash_with_salt` and `hash_with_salt_async` produce identical results,
/// preserving backward compatibility.
///
/// \throws \ref std::system_error when an unexpected implementation-specific error occurs.
///
sstring hash_with_salt(const sstring& pass, const sstring& salt);
///
/// Async version of `hash_with_salt` that returns a future.
/// If possible, hashing uses `coroutine::maybe_yield` to prevent reactor stalls.
///
/// \throws \ref std::system_error when an unexpected implementation-specific error occurs.
///
seastar::future<sstring> hash_with_salt_async(const sstring& pass, const sstring& salt);
} // namespace detail
///
@@ -120,6 +107,6 @@ sstring hash(const sstring& pass, RandomNumberEngine& g, scheme scheme) {
///
/// \throws \ref std::system_error when an unexpected implementation-specific error occurs.
///
seastar::future<bool> check(const sstring& pass, const sstring& salted_hash);
bool check(const sstring& pass, const sstring& salted_hash);
} // namespace auth::passwords

View File

@@ -35,9 +35,10 @@ static const class_registrator<
cql3::query_processor&,
::service::raft_group0_client&,
::service::migration_manager&,
cache&> saslauthd_auth_reg("com.scylladb.auth.SaslauthdAuthenticator");
cache&,
utils::alien_worker&> saslauthd_auth_reg("com.scylladb.auth.SaslauthdAuthenticator");
saslauthd_authenticator::saslauthd_authenticator(cql3::query_processor& qp, ::service::raft_group0_client&, ::service::migration_manager&, cache&)
saslauthd_authenticator::saslauthd_authenticator(cql3::query_processor& qp, ::service::raft_group0_client&, ::service::migration_manager&, cache&, utils::alien_worker&)
: _socket_path(qp.db().get_config().saslauthd_socket_path())
{}

View File

@@ -12,6 +12,7 @@
#include "auth/authenticator.hh"
#include "auth/cache.hh"
#include "utils/alien_worker.hh"
namespace cql3 {
class query_processor;
@@ -29,7 +30,7 @@ namespace auth {
class saslauthd_authenticator : public authenticator {
sstring _socket_path; ///< Path to the domain socket on which saslauthd is listening.
public:
saslauthd_authenticator(cql3::query_processor&, ::service::raft_group0_client&, ::service::migration_manager&, cache&);
saslauthd_authenticator(cql3::query_processor&, ::service::raft_group0_client&, ::service::migration_manager&, cache&,utils::alien_worker&);
future<> start() override;

View File

@@ -191,7 +191,8 @@ service::service(
::service::migration_manager& mm,
const service_config& sc,
maintenance_socket_enabled used_by_maintenance_socket,
cache& cache)
cache& cache,
utils::alien_worker& hashing_worker)
: service(
std::move(c),
cache,
@@ -199,7 +200,7 @@ service::service(
g0,
mn,
create_object<authorizer>(sc.authorizer_java_name, qp, g0, mm),
create_object<authenticator>(sc.authenticator_java_name, qp, g0, mm, cache),
create_object<authenticator>(sc.authenticator_java_name, qp, g0, mm, cache, hashing_worker),
create_object<role_manager>(sc.role_manager_java_name, qp, g0, mm, cache),
used_by_maintenance_socket) {
}
@@ -225,7 +226,7 @@ future<> service::create_legacy_keyspace_if_missing(::service::migration_manager
try {
co_return co_await mm.announce(::service::prepare_new_keyspace_announcement(db.real_database(), ksm, ts),
std::move(group0_guard), seastar::format("auth_service: create {} keyspace", meta::legacy::AUTH_KS));
} catch (const ::service::group0_concurrent_modification&) {
} catch (::service::group0_concurrent_modification&) {
log.info("Concurrent operation is detected while creating {} keyspace, retrying.", meta::legacy::AUTH_KS);
}
}
@@ -876,6 +877,22 @@ future<> migrate_to_auth_v2(db::system_keyspace& sys_ks, ::service::raft_group0_
continue; // some tables might not have been created if they were not used
}
// use longer than usual timeout as we scan the whole table
// but not infinite or very long as we want to fail reasonably fast
const auto t = 5min;
const timeout_config tc{t, t, t, t, t, t, t};
::service::client_state cs(::service::client_state::internal_tag{}, tc);
::service::query_state qs(cs, empty_service_permit());
auto rows = co_await qp.execute_internal(
seastar::format("SELECT * FROM {}.{}", meta::legacy::AUTH_KS, cf_name),
db::consistency_level::ALL,
qs,
{},
cql3::query_processor::cache_internal::no);
if (rows->empty()) {
continue;
}
std::vector<sstring> col_names;
for (const auto& col : schema->all_columns()) {
col_names.push_back(col.name_as_cql_string());
@@ -884,51 +901,30 @@ future<> migrate_to_auth_v2(db::system_keyspace& sys_ks, ::service::raft_group0_
for (size_t i = 1; i < col_names.size(); ++i) {
val_binders_str += ", ?";
}
std::vector<mutation> collected;
// use longer than usual timeout as we scan the whole table
// but not infinite or very long as we want to fail reasonably fast
const auto t = 5min;
const timeout_config tc{t, t, t, t, t, t, t};
::service::client_state cs(::service::client_state::internal_tag{}, tc);
::service::query_state qs(cs, empty_service_permit());
co_await qp.query_internal(
seastar::format("SELECT * FROM {}.{}", meta::legacy::AUTH_KS, cf_name),
db::consistency_level::ALL,
{},
1000,
[&qp, &cf_name, &col_names, &val_binders_str, &schema, ts, &collected] (const cql3::untyped_result_set::row& row) -> future<stop_iteration> {
std::vector<data_value_or_unset> values;
for (const auto& col : schema->all_columns()) {
if (row.has(col.name_as_text())) {
values.push_back(
col.type->deserialize(row.get_blob_unfragmented(col.name_as_text())));
} else {
values.push_back(unset_value{});
}
for (const auto& row : *rows) {
std::vector<data_value_or_unset> values;
for (const auto& col : schema->all_columns()) {
if (row.has(col.name_as_text())) {
values.push_back(
col.type->deserialize(row.get_blob_unfragmented(col.name_as_text())));
} else {
values.push_back(unset_value{});
}
auto muts = co_await qp.get_mutations_internal(
seastar::format("INSERT INTO {}.{} ({}) VALUES ({})",
db::system_keyspace::NAME,
cf_name,
fmt::join(col_names, ", "),
val_binders_str),
internal_distributed_query_state(),
ts,
std::move(values));
if (muts.size() != 1) {
on_internal_error(log,
format("expecting single insert mutation, got {}", muts.size()));
}
collected.push_back(std::move(muts[0]));
co_return stop_iteration::no;
},
std::move(qs));
for (auto& m : collected) {
co_yield std::move(m);
}
auto muts = co_await qp.get_mutations_internal(
seastar::format("INSERT INTO {}.{} ({}) VALUES ({})",
db::system_keyspace::NAME,
cf_name,
fmt::join(col_names, ", "),
val_binders_str),
internal_distributed_query_state(),
ts,
std::move(values));
if (muts.size() != 1) {
on_internal_error(log,
format("expecting single insert mutation, got {}", muts.size()));
}
co_yield std::move(muts[0]);
}
}
co_yield co_await sys_ks.make_auth_version_mutation(ts,

View File

@@ -27,6 +27,7 @@
#include "cql3/description.hh"
#include "seastarx.hh"
#include "service/raft/raft_group0_client.hh"
#include "utils/alien_worker.hh"
#include "utils/observable.hh"
#include "utils/serialized_action.hh"
#include "service/maintenance_mode.hh"
@@ -130,7 +131,8 @@ public:
::service::migration_manager&,
const service_config&,
maintenance_socket_enabled,
cache&);
cache&,
utils::alien_worker&);
future<> start(::service::migration_manager&, db::system_keyspace&);

View File

@@ -192,7 +192,7 @@ future<> standard_role_manager::legacy_create_default_role_if_missing() {
{_superuser},
cql3::query_processor::cache_internal::no).discard_result();
log.info("Created default superuser role '{}'.", _superuser);
} catch (const exceptions::unavailable_exception& e) {
} catch(const exceptions::unavailable_exception& e) {
log.warn("Skipped default role setup: some nodes were not ready; will retry");
throw e;
}

View File

@@ -38,8 +38,8 @@ class transitional_authenticator : public authenticator {
public:
static const sstring PASSWORD_AUTHENTICATOR_NAME;
transitional_authenticator(cql3::query_processor& qp, ::service::raft_group0_client& g0, ::service::migration_manager& mm, cache& cache)
: transitional_authenticator(std::make_unique<password_authenticator>(qp, g0, mm, cache)) {
transitional_authenticator(cql3::query_processor& qp, ::service::raft_group0_client& g0, ::service::migration_manager& mm, cache& cache, utils::alien_worker& hashing_worker)
: transitional_authenticator(std::make_unique<password_authenticator>(qp, g0, mm, cache, hashing_worker)) {
}
transitional_authenticator(std::unique_ptr<authenticator> a)
: _authenticator(std::move(a)) {
@@ -81,7 +81,7 @@ public:
}).handle_exception([](auto ep) {
try {
std::rethrow_exception(ep);
} catch (const exceptions::authentication_exception&) {
} catch (exceptions::authentication_exception&) {
// return anon user
return make_ready_future<authenticated_user>(anonymous_user());
}
@@ -126,7 +126,7 @@ public:
virtual bytes evaluate_response(bytes_view client_response) override {
try {
return _sasl->evaluate_response(client_response);
} catch (const exceptions::authentication_exception&) {
} catch (exceptions::authentication_exception&) {
_complete = true;
return {};
}
@@ -141,7 +141,7 @@ public:
return _sasl->get_authenticated_user().handle_exception([](auto ep) {
try {
std::rethrow_exception(ep);
} catch (const exceptions::authentication_exception&) {
} catch (exceptions::authentication_exception&) {
// return anon user
return make_ready_future<authenticated_user>(anonymous_user());
}
@@ -241,7 +241,8 @@ static const class_registrator<
cql3::query_processor&,
::service::raft_group0_client&,
::service::migration_manager&,
auth::cache&> transitional_authenticator_reg(auth::PACKAGE_NAME + "TransitionalAuthenticator");
auth::cache&,
utils::alien_worker&> transitional_authenticator_reg(auth::PACKAGE_NAME + "TransitionalAuthenticator");
static const class_registrator<
auth::authorizer,

View File

@@ -204,7 +204,7 @@ future<topology_description> topology_description::clone_async() const {
for (const auto& entry : _entries) {
vec.push_back(entry);
co_await coroutine::maybe_yield();
co_await seastar::maybe_yield();
}
co_return topology_description{std::move(vec)};

View File

@@ -15,7 +15,7 @@
#include "mutation/tombstone.hh"
#include "schema/schema.hh"
#include <seastar/core/sstring.hh>
#include "seastar/core/sstring.hh"
#include "types/concrete_types.hh"
#include "types/types.hh"
#include "types/user.hh"

View File

@@ -10,9 +10,7 @@
#include <seastar/net/inet_address.hh>
#include <seastar/core/sstring.hh>
#include "seastarx.hh"
#include "utils/loading_shared_values.hh"
#include <list>
#include <optional>
enum class client_type {
@@ -29,20 +27,6 @@ enum class client_connection_stage {
ready,
};
// We implement a keys cache using a map-like utils::loading_shared_values container by storing empty values.
struct options_cache_value_type {};
using client_options_cache_type = utils::loading_shared_values<sstring, options_cache_value_type>;
using client_options_cache_entry_type = client_options_cache_type::entry_ptr;
using client_options_cache_key_type = client_options_cache_type::key_type;
// This struct represents a single OPTION key-value pair from the client's connection options.
// Both key and value are represented by corresponding "references" to their cached values.
// Each "reference" is effectively a lw_shared_ptr value.
struct client_option_key_value_cached_entry {
client_options_cache_entry_type key;
client_options_cache_entry_type value;
};
sstring to_string(client_connection_stage ct);
// Representation of a row in `system.clients'. std::optionals are for nullable cells.
@@ -53,8 +37,8 @@ struct client_data {
client_connection_stage connection_stage = client_connection_stage::established;
int32_t shard_id; /// ID of server-side shard which is processing the connection.
std::optional<client_options_cache_entry_type> driver_name;
std::optional<client_options_cache_entry_type> driver_version;
std::optional<sstring> driver_name;
std::optional<sstring> driver_version;
std::optional<sstring> hostname;
std::optional<int32_t> protocol_version;
std::optional<sstring> ssl_cipher_suite;
@@ -62,7 +46,6 @@ struct client_data {
std::optional<sstring> ssl_protocol;
std::optional<sstring> username;
std::optional<sstring> scheduling_group_name;
std::list<client_option_key_value_cached_entry> client_options;
sstring stage_str() const { return to_string(connection_stage); }
sstring client_type_str() const { return to_string(ct); }

View File

@@ -125,6 +125,10 @@ if(target_arch)
add_compile_options("-march=${target_arch}")
endif()
if(CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
add_compile_options("SHELL:-Xclang -fexperimental-assignment-tracking=disabled")
endif()
function(maybe_limit_stack_usage_in_KB stack_usage_threshold_in_KB config)
math(EXPR _stack_usage_threshold_in_bytes "${stack_usage_threshold_in_KB} * 1024")
set(_stack_usage_threshold_flag "-Wstack-usage=${_stack_usage_threshold_in_bytes}")

View File

@@ -48,7 +48,6 @@
#include "mutation/mutation_fragment_stream_validator.hh"
#include "utils/assert.hh"
#include "utils/error_injection.hh"
#include "utils/chunked_vector.hh"
#include "utils/pretty_printers.hh"
#include "readers/multi_range.hh"
#include "readers/compacting.hh"
@@ -612,23 +611,23 @@ private:
}
// Called in a seastar thread
utils::chunked_vector<dht::partition_range>
dht::partition_range_vector
get_ranges_for_invalidation(const std::vector<sstables::shared_sstable>& sstables) {
// If owned ranges is disengaged, it means no cleanup work was done and
// so nothing needs to be invalidated.
if (!_owned_ranges) {
return {};
return dht::partition_range_vector{};
}
auto owned_ranges = dht::to_partition_ranges_chunked(*_owned_ranges).get();
auto owned_ranges = dht::to_partition_ranges(*_owned_ranges, utils::can_yield::yes);
auto non_owned_ranges = sstables
| std::views::transform([] (const sstables::shared_sstable& sst) {
seastar::thread::maybe_yield();
return dht::partition_range::make({sst->get_first_decorated_key(), true},
{sst->get_last_decorated_key(), true});
}) | std::ranges::to<utils::chunked_vector<dht::partition_range>>();
}) | std::ranges::to<dht::partition_range_vector>();
return dht::subtract_ranges(*_schema, std::move(non_owned_ranges), std::move(owned_ranges)).get();
return dht::subtract_ranges(*_schema, non_owned_ranges, std::move(owned_ranges)).get();
}
protected:
compaction(compaction_group_view& table_s, compaction_descriptor descriptor, compaction_data& cdata, compaction_progress_monitor& progress_monitor, use_backlog_tracker use_backlog_tracker)
@@ -719,8 +718,8 @@ protected:
compaction_completion_desc
get_compaction_completion_desc(std::vector<sstables::shared_sstable> input_sstables, std::vector<sstables::shared_sstable> output_sstables) {
auto ranges = get_ranges_for_invalidation(input_sstables);
return compaction_completion_desc{std::move(input_sstables), std::move(output_sstables), std::move(ranges)};
auto ranges_for_for_invalidation = get_ranges_for_invalidation(input_sstables);
return compaction_completion_desc{std::move(input_sstables), std::move(output_sstables), std::move(ranges_for_for_invalidation)};
}
// Tombstone expiration is enabled based on the presence of sstable set.

View File

@@ -16,7 +16,6 @@
#include "sstables/sstable_set.hh"
#include "compaction_fwd.hh"
#include "mutation_writer/token_group_based_splitting_writer.hh"
#include "utils/chunked_vector.hh"
namespace compaction {
@@ -39,7 +38,7 @@ struct compaction_completion_desc {
// New, fresh SSTables that should be added to SSTable set, replacing the old ones.
std::vector<sstables::shared_sstable> new_sstables;
// Set of compacted partition ranges that should be invalidated in the cache.
utils::chunked_vector<dht::partition_range> ranges_for_cache_invalidation;
dht::partition_range_vector ranges_for_cache_invalidation;
};
// creates a new SSTable for a given shard

View File

@@ -12,7 +12,6 @@
#include <seastar/core/condition-variable.hh>
#include "schema/schema_fwd.hh"
#include "sstables/open_info.hh"
#include "compaction_descriptor.hh"
class reader_permit;
@@ -45,7 +44,7 @@ public:
virtual compaction_strategy_state& get_compaction_strategy_state() noexcept = 0;
virtual reader_permit make_compaction_reader_permit() const = 0;
virtual sstables::sstables_manager& get_sstables_manager() noexcept = 0;
virtual sstables::shared_sstable make_sstable(sstables::sstable_state) const = 0;
virtual sstables::shared_sstable make_sstable() const = 0;
virtual sstables::sstable_writer_config configure_writer(sstring origin) const = 0;
virtual api::timestamp_type min_memtable_timestamp() const = 0;
virtual api::timestamp_type min_memtable_live_timestamp() const = 0;

View File

@@ -416,9 +416,7 @@ future<compaction_result> compaction_task_executor::compact_sstables(compaction_
descriptor.enable_garbage_collection(co_await sstable_set_for_tombstone_gc(t));
}
descriptor.creator = [&t] (shard_id) {
// All compaction types going through this path will work on normal input sstables only.
// Off-strategy, for example, waits until the sstables move out of staging state.
return t.make_sstable(sstables::sstable_state::normal);
return t.make_sstable();
};
descriptor.replacer = [this, &t, &on_replace, offstrategy] (compaction_completion_desc desc) {
t.get_compaction_strategy().notify_completion(t, desc.old_sstables, desc.new_sstables);
@@ -778,7 +776,6 @@ compaction_manager::get_incremental_repair_read_lock(compaction::compaction_grou
cmlog.debug("Get get_incremental_repair_read_lock for {} started", reason);
}
compaction::compaction_state& cs = get_compaction_state(&t);
auto gh = cs.gate.hold();
auto ret = co_await cs.incremental_repair_lock.hold_read_lock();
if (!reason.empty()) {
cmlog.debug("Get get_incremental_repair_read_lock for {} done", reason);
@@ -792,7 +789,6 @@ compaction_manager::get_incremental_repair_write_lock(compaction::compaction_gro
cmlog.debug("Get get_incremental_repair_write_lock for {} started", reason);
}
compaction::compaction_state& cs = get_compaction_state(&t);
auto gh = cs.gate.hold();
auto ret = co_await cs.incremental_repair_lock.hold_write_lock();
if (!reason.empty()) {
cmlog.debug("Get get_incremental_repair_write_lock for {} done", reason);
@@ -1128,10 +1124,7 @@ void compaction_manager::enable() {
_compaction_submission_timer.cancel();
_compaction_submission_timer.arm_periodic(periodic_compaction_submission_interval());
if (_waiting_reevaluation) {
on_internal_error(cmlog, "postponed compactions reevaluation is already running when enabling compaction manager");
}
_waiting_reevaluation.emplace(postponed_compactions_reevaluation());
_waiting_reevalution = postponed_compactions_reevaluation();
cmlog.info("Enabled");
}
@@ -1179,16 +1172,6 @@ void compaction_manager::reevaluate_postponed_compactions() noexcept {
_postponed_reevaluation.signal();
}
future<> compaction_manager::stop_postponed_compactions() noexcept {
auto waiting_reevaluation = std::exchange(_waiting_reevaluation, std::nullopt);
if (!waiting_reevaluation) {
return make_ready_future();
}
// Trigger a signal to properly exit from postponed_compactions_reevaluation() fiber
reevaluate_postponed_compactions();
return std::move(*waiting_reevaluation);
}
void compaction_manager::postpone_compaction_for_table(compaction_group_view* t) {
_postponed.insert(t);
}
@@ -1272,7 +1255,8 @@ future<> compaction_manager::drain() {
_compaction_submission_timer.cancel();
// Stop ongoing compactions, if the request has not been sent already and wait for them to stop.
co_await stop_ongoing_compactions("drain");
co_await stop_postponed_compactions();
// Trigger a signal to properly exit from postponed_compactions_reevaluation() fiber
reevaluate_postponed_compactions();
cmlog.info("Drained");
}
@@ -1280,15 +1264,9 @@ future<> compaction_manager::start(const db::config& cfg, utils::disk_space_moni
if (dsm && (this_shard_id() == 0)) {
_out_of_space_subscription = dsm->subscribe(cfg.critical_disk_utilization_level, [this] (auto threshold_reached) {
if (threshold_reached) {
return container().invoke_on_all([] (compaction_manager& cm) {
cm._in_critical_disk_utilization_mode = true;
return cm.drain();
});
return container().invoke_on_all([] (compaction_manager& cm) { return cm.drain(); });
}
return container().invoke_on_all([] (compaction_manager& cm) {
cm._in_critical_disk_utilization_mode = false;
cm.enable();
});
return container().invoke_on_all([] (compaction_manager& cm) { cm.enable(); });
});
}
@@ -1316,7 +1294,8 @@ future<> compaction_manager::really_do_stop() noexcept {
if (!_tasks.empty()) {
on_fatal_internal_error(cmlog, format("{} tasks still exist after being stopped", _tasks.size()));
}
co_await stop_postponed_compactions();
reevaluate_postponed_compactions();
co_await std::move(_waiting_reevalution);
co_await _sys_ks.close();
_weight_tracker.clear();
_compaction_submission_timer.cancel();
@@ -1538,9 +1517,7 @@ future<> compaction_manager::maybe_wait_for_sstable_count_reduction(compaction_g
| std::views::transform(std::mem_fn(&sstables::sstable::run_identifier))
| std::ranges::to<std::unordered_set>());
};
const auto threshold = utils::get_local_injector().inject_parameter<size_t>("set_sstable_count_reduction_threshold")
.value_or(size_t(std::max(schema->max_compaction_threshold(), 32)));
const auto threshold = size_t(std::max(schema->max_compaction_threshold(), 32));
auto count = co_await num_runs_for_compaction();
if (count <= threshold) {
cmlog.trace("No need to wait for sstable count reduction in {}: {} <= {}",
@@ -1555,7 +1532,9 @@ future<> compaction_manager::maybe_wait_for_sstable_count_reduction(compaction_g
auto& cstate = get_compaction_state(&t);
try {
while (can_perform_regular_compaction(t) && co_await num_runs_for_compaction() > threshold) {
co_await cstate.compaction_done.wait();
co_await cstate.compaction_done.wait([this, &t] {
return !can_perform_regular_compaction(t);
});
}
} catch (const broken_condition_variable&) {
co_return;
@@ -1868,10 +1847,6 @@ protected:
throw make_compaction_stopped_exception();
}
}, false);
if (utils::get_local_injector().is_enabled("split_sstable_force_stop_exception")) {
throw make_compaction_stopped_exception();
}
co_return co_await do_rewrite_sstable(std::move(sst));
}
};
@@ -2308,26 +2283,13 @@ future<compaction_manager::compaction_stats_opt> compaction_manager::perform_spl
return perform_task_on_all_files<split_compaction_task_executor>("split", info, t, std::move(options), std::move(owned_ranges_ptr), std::move(get_sstables), throw_if_stopping::no);
}
std::exception_ptr compaction_manager::make_disabled_exception(compaction::compaction_group_view& cg) {
std::exception_ptr ex;
if (_in_critical_disk_utilization_mode) {
ex = std::make_exception_ptr(std::runtime_error("critical disk utilization"));
} else {
ex = std::make_exception_ptr(compaction_stopped_exception(cg.schema()->ks_name(), cg.schema()->cf_name(), "compaction disabled"));
}
return ex;
}
future<std::vector<sstables::shared_sstable>>
compaction_manager::maybe_split_new_sstable(sstables::shared_sstable sst, compaction_group_view& t, compaction_type_options::split opt) {
compaction_manager::maybe_split_sstable(sstables::shared_sstable sst, compaction_group_view& t, compaction_type_options::split opt) {
if (!split_compaction_task_executor::sstable_needs_split(sst, opt)) {
co_return std::vector<sstables::shared_sstable>{sst};
}
// Throw an error if split cannot be performed due to e.g. out of space prevention.
// We don't want to prevent split because compaction is temporarily disabled on a view only for synchronization,
// which is unneeded against new sstables that aren't part of any set yet, so never use can_proceed(&t) here.
if (is_disabled()) {
co_return coroutine::exception(make_disabled_exception(t));
if (!can_proceed(&t)) {
co_return std::vector<sstables::shared_sstable>{sst};
}
std::vector<sstables::shared_sstable> ret;
@@ -2335,11 +2297,8 @@ compaction_manager::maybe_split_new_sstable(sstables::shared_sstable sst, compac
compaction_progress_monitor monitor;
compaction_data info = create_compaction_data();
compaction_descriptor desc = split_compaction_task_executor::make_descriptor(sst, opt);
desc.creator = [&t, sst] (shard_id _) {
// NOTE: preserves the sstable state, since we want the output to be on the same state as the original.
// For example, if base table has views, it's important that sstable produced by repair will be
// in the staging state.
return t.make_sstable(sst->state());
desc.creator = [&t] (shard_id _) {
return t.make_sstable();
};
desc.replacer = [&] (compaction_completion_desc d) {
std::move(d.new_sstables.begin(), d.new_sstables.end(), std::back_inserter(ret));
@@ -2415,8 +2374,6 @@ future<> compaction_manager::remove(compaction_group_view& t, sstring reason) no
if (!c_state.gate.is_closed()) {
auto close_gate = c_state.gate.close();
co_await stop_ongoing_compactions(reason, &t);
// Wait for users of incremental repair lock (can be either repair itself or maintenance compactions).
co_await c_state.incremental_repair_lock.write_lock();
co_await std::move(close_gate);
}

View File

@@ -114,8 +114,6 @@ private:
uint32_t _disabled_state_count = 0;
bool is_disabled() const { return _state != state::running || _disabled_state_count > 0; }
// precondition: is_disabled() is true.
std::exception_ptr make_disabled_exception(compaction::compaction_group_view& cg);
std::optional<future<>> _stop_future;
@@ -127,7 +125,7 @@ private:
// a sstable from being compacted twice.
std::unordered_set<sstables::shared_sstable> _compacting_sstables;
std::optional<future<>> _waiting_reevaluation;
future<> _waiting_reevalution = make_ready_future<>();
condition_variable _postponed_reevaluation;
// tables that wait for compaction but had its submission postponed due to ongoing compaction.
std::unordered_set<compaction::compaction_group_view*> _postponed;
@@ -175,7 +173,6 @@ private:
tombstone_gc_state _tombstone_gc_state;
utils::disk_space_monitor::subscription _out_of_space_subscription;
bool _in_critical_disk_utilization_mode = false;
private:
// Requires task->_compaction_state.gate to be held and task to be registered in _tasks.
future<compaction_stats_opt> perform_task(shared_ptr<compaction::compaction_task_executor> task, throw_if_stopping do_throw_if_stopping);
@@ -237,7 +234,6 @@ private:
future<> postponed_compactions_reevaluation();
void reevaluate_postponed_compactions() noexcept;
future<> stop_postponed_compactions() noexcept;
// Postpone compaction for a table that couldn't be executed due to ongoing
// similar-sized compaction.
void postpone_compaction_for_table(compaction::compaction_group_view* t);
@@ -380,8 +376,7 @@ public:
// Splits a single SSTable by segregating all its data according to the classifier.
// If SSTable doesn't need split, the same input SSTable is returned as output.
// If SSTable needs split, then output SSTables are returned and the input SSTable is deleted.
// Exception is thrown if the input sstable cannot be split due to e.g. out of space prevention.
future<std::vector<sstables::shared_sstable>> maybe_split_new_sstable(sstables::shared_sstable sst, compaction_group_view& t, compaction_type_options::split opt);
future<std::vector<sstables::shared_sstable>> maybe_split_sstable(sstables::shared_sstable sst, compaction_group_view& t, compaction_type_options::split opt);
// Run a custom job for a given table, defined by a function
// it completes when future returned by job is ready or returns immediately

View File

@@ -571,10 +571,10 @@ commitlog_total_space_in_mb: -1
# - "none": auditing is disabled (default)
# - "table": save audited events in audit.audit_log column family
# - "syslog": send audited events via syslog (depends on OS, but usually to /dev/log)
audit: "table"
# audit: "none"
#
# List of statement categories that should be audited.
audit_categories: "DCL,DDL,AUTH,ADMIN"
# audit_categories: "DCL,DDL,AUTH"
#
# List of tables that should be audited.
# audit_tables: "<keyspace_name>.<table_name>,<keyspace_name>.<table_name>"

View File

@@ -368,87 +368,6 @@ def find_ninja():
sys.exit(1)
def find_compiler(name):
"""
Find a compiler by name, skipping ccache wrapper directories.
This is useful when using sccache to avoid double-caching through ccache.
Args:
name: The compiler name (e.g., 'clang++', 'clang', 'gcc')
Returns:
Path to the compiler, skipping ccache directories, or None if not found.
"""
ccache_dirs = {'/usr/lib/ccache', '/usr/lib64/ccache'}
for path_dir in os.environ.get('PATH', '').split(os.pathsep):
# Skip ccache wrapper directories
if os.path.realpath(path_dir) in ccache_dirs or path_dir in ccache_dirs:
continue
candidate = os.path.join(path_dir, name)
if os.path.isfile(candidate) and os.access(candidate, os.X_OK):
return candidate
return None
def resolve_compilers_for_compiler_cache(args, compiler_cache):
"""
When using a compiler cache, resolve compiler paths to avoid ccache directories.
This prevents double-caching when ccache symlinks are in PATH.
Args:
args: The argument namespace with cc and cxx attributes.
compiler_cache: Path to the compiler cache binary, or None.
"""
if not compiler_cache:
return
if not os.path.isabs(args.cxx):
real_cxx = find_compiler(args.cxx)
if real_cxx:
args.cxx = real_cxx
if not os.path.isabs(args.cc):
real_cc = find_compiler(args.cc)
if real_cc:
args.cc = real_cc
def find_compiler_cache(preference):
"""
Find a compiler cache based on the preference.
Args:
preference: One of 'auto', 'sccache', 'ccache', 'none', or a path to a binary.
Returns:
Path to the compiler cache binary, or None if not found/disabled.
"""
if preference == 'none':
return None
if preference == 'auto':
# Prefer sccache over ccache
for cache in ['sccache', 'ccache']:
path = which(cache)
if path:
return path
return None
if preference in ('sccache', 'ccache'):
path = which(preference)
if path:
return path
print(f"Warning: {preference} not found on PATH, disabling compiler cache")
return None
# Assume it's a path to a binary
if os.path.isfile(preference) and os.access(preference, os.X_OK):
return preference
print(f"Warning: compiler cache '{preference}' not found or not executable, disabling compiler cache")
return None
modes = {
'debug': {
'cxxflags': '-DDEBUG -DSANITIZE -DDEBUG_LSA_SANITIZER -DSCYLLA_ENABLE_ERROR_INJECTION',
@@ -725,9 +644,29 @@ raft_tests = set([
vector_search_tests = set([
'test/vector_search/vector_store_client_test',
'test/vector_search/load_balancer_test',
'test/vector_search/client_test',
'test/vector_search/filter_test',
'test/vector_search/rescoring_test'
'test/vector_search/client_test'
])
vector_search_validator_bin = 'vector-search-validator/bin/vector-search-validator'
vector_search_validator_deps = set([
'test/vector_search_validator/build-validator',
'test/vector_search_validator/Cargo.toml',
'test/vector_search_validator/crates/validator/Cargo.toml',
'test/vector_search_validator/crates/validator/src/main.rs',
'test/vector_search_validator/crates/validator-scylla/Cargo.toml',
'test/vector_search_validator/crates/validator-scylla/src/lib.rs',
'test/vector_search_validator/crates/validator-scylla/src/cql.rs',
])
vector_store_bin = 'vector-search-validator/bin/vector-store'
vector_store_deps = set([
'test/vector_search_validator/build-env',
'test/vector_search_validator/build-vector-store',
])
vector_search_validator_bins = set([
vector_search_validator_bin,
vector_store_bin,
])
wasms = set([
@@ -763,7 +702,7 @@ other = set([
'iotune',
])
all_artifacts = apps | cpp_apps | tests | other | wasms
all_artifacts = apps | cpp_apps | tests | other | wasms | vector_search_validator_bins
arg_parser = argparse.ArgumentParser('Configure scylla', add_help=False, formatter_class=argparse.ArgumentDefaultsHelpFormatter)
arg_parser.add_argument('--out', dest='buildfile', action='store', default='build.ninja',
@@ -793,8 +732,6 @@ arg_parser.add_argument('--compiler', action='store', dest='cxx', default='clang
help='C++ compiler path')
arg_parser.add_argument('--c-compiler', action='store', dest='cc', default='clang',
help='C compiler path')
arg_parser.add_argument('--compiler-cache', action='store', dest='compiler_cache', default='auto',
help='Compiler cache to use: auto (default, prefers sccache), sccache, ccache, none, or a path to a binary')
add_tristate(arg_parser, name='dpdk', dest='dpdk', default=False,
help='Use dpdk (from seastar dpdk sources)')
arg_parser.add_argument('--dpdk-target', action='store', dest='dpdk_target', default='',
@@ -922,7 +859,6 @@ scylla_core = (['message/messaging_service.cc',
'utils/alien_worker.cc',
'utils/array-search.cc',
'utils/base64.cc',
'utils/crypt_sha512.cc',
'utils/logalloc.cc',
'utils/large_bitset.cc',
'utils/buffer_input_stream.cc',
@@ -1014,10 +950,6 @@ scylla_core = (['message/messaging_service.cc',
'cql3/functions/aggregate_fcts.cc',
'cql3/functions/castas_fcts.cc',
'cql3/functions/error_injection_fcts.cc',
'cql3/statements/strong_consistency/modification_statement.cc',
'cql3/statements/strong_consistency/select_statement.cc',
'cql3/statements/strong_consistency/statement_helpers.cc',
'cql3/functions/vector_similarity_fcts.cc',
'cql3/statements/cf_prop_defs.cc',
'cql3/statements/cf_statement.cc',
'cql3/statements/authentication_statement.cc',
@@ -1042,8 +974,8 @@ scylla_core = (['message/messaging_service.cc',
'cql3/statements/raw/parsed_statement.cc',
'cql3/statements/property_definitions.cc',
'cql3/statements/update_statement.cc',
'cql3/statements/broadcast_modification_statement.cc',
'cql3/statements/broadcast_select_statement.cc',
'cql3/statements/strongly_consistent_modification_statement.cc',
'cql3/statements/strongly_consistent_select_statement.cc',
'cql3/statements/delete_statement.cc',
'cql3/statements/prune_materialized_view_statement.cc',
'cql3/statements/batch_statement.cc',
@@ -1075,7 +1007,6 @@ scylla_core = (['message/messaging_service.cc',
'cql3/statements/list_service_level_attachments_statement.cc',
'cql3/statements/list_effective_service_level_statement.cc',
'cql3/statements/describe_statement.cc',
'cql3/statements/view_prop_defs.cc',
'cql3/update_parameters.cc',
'cql3/util.cc',
'cql3/ut_name.cc',
@@ -1131,6 +1062,7 @@ scylla_core = (['message/messaging_service.cc',
'db/hints/resource_manager.cc',
'db/hints/sync_point.cc',
'db/large_data_handler.cc',
'db/legacy_schema_migrator.cc',
'db/marshal/type_parser.cc',
'db/per_partition_rate_limit_options.cc',
'db/rate_limiter.cc',
@@ -1153,6 +1085,7 @@ scylla_core = (['message/messaging_service.cc',
'db/view/view_building_task_mutation_builder.cc',
'db/virtual_table.cc',
'db/virtual_tables.cc',
'db/system0_virtual_tables.cc',
'db/tablet_options.cc',
'db/object_storage_endpoint_param.cc',
'index/secondary_index_manager.cc',
@@ -1226,7 +1159,6 @@ scylla_core = (['message/messaging_service.cc',
'locator/topology.cc',
'locator/util.cc',
'service/client_state.cc',
'service/client_routes.cc',
'service/storage_service.cc',
'service/session.cc',
'service/task_manager_module.cc',
@@ -1334,9 +1266,6 @@ scylla_core = (['message/messaging_service.cc',
'lang/wasm.cc',
'lang/wasm_alien_thread_runner.cc',
'lang/wasm_instance_cache.cc',
'service/strong_consistency/groups_manager.cc',
'service/strong_consistency/coordinator.cc',
'service/strong_consistency/state_machine.cc',
'service/raft/group0_state_id_handler.cc',
'service/raft/group0_state_machine.cc',
'service/raft/group0_state_machine_merger.cc',
@@ -1366,7 +1295,6 @@ scylla_core = (['message/messaging_service.cc',
'vector_search/dns.cc',
'vector_search/client.cc',
'vector_search/clients.cc',
'vector_search/filter.cc',
'vector_search/truststore.cc'
] + [Antlr3Grammar('cql3/Cql.g')] \
+ scylla_raft_core
@@ -1391,8 +1319,6 @@ api = ['api/api.cc',
'api/storage_proxy.cc',
Json2Code('api/api-doc/cache_service.json'),
'api/cache_service.cc',
Json2Code('api/api-doc/client_routes.json'),
'api/client_routes.cc',
Json2Code('api/api-doc/collectd.json'),
'api/collectd.cc',
Json2Code('api/api-doc/endpoint_snitch_info.json'),
@@ -1442,7 +1368,6 @@ alternator = [
'alternator/auth.cc',
'alternator/streams.cc',
'alternator/ttl.cc',
'alternator/http_compression.cc'
]
idls = ['idl/gossip_digest.idl.hh',
@@ -1476,7 +1401,6 @@ idls = ['idl/gossip_digest.idl.hh',
'idl/hinted_handoff.idl.hh',
'idl/storage_proxy.idl.hh',
'idl/sstables.idl.hh',
'idl/strong_consistency/state_machine.idl.hh',
'idl/group0_state_machine.idl.hh',
'idl/mapreduce_request.idl.hh',
'idl/replica_exception.idl.hh',
@@ -1557,6 +1481,7 @@ deps = {
pure_boost_tests = set([
'test/boost/anchorless_list_test',
'test/boost/auth_passwords_test',
'test/boost/auth_resource_test',
'test/boost/big_decimal_test',
'test/boost/caching_options_test',
@@ -1689,7 +1614,6 @@ deps['test/boost/combined_tests'] += [
'test/boost/schema_registry_test.cc',
'test/boost/secondary_index_test.cc',
'test/boost/sessions_test.cc',
'test/boost/simple_value_with_expiry_test.cc',
'test/boost/sstable_compaction_test.cc',
'test/boost/sstable_compressor_factory_test.cc',
'test/boost/sstable_compression_config_test.cc',
@@ -1701,7 +1625,6 @@ deps['test/boost/combined_tests'] += [
'test/boost/tracing_test.cc',
'test/boost/user_function_test.cc',
'test/boost/user_types_test.cc',
'test/boost/vector_index_test.cc',
'test/boost/view_build_test.cc',
'test/boost/view_complex_test.cc',
'test/boost/view_schema_ckey_test.cc',
@@ -1773,20 +1696,6 @@ deps['test/raft/discovery_test'] = ['test/raft/discovery_test.cc',
deps['test/vector_search/vector_store_client_test'] = ['test/vector_search/vector_store_client_test.cc'] + scylla_tests_dependencies
deps['test/vector_search/load_balancer_test'] = ['test/vector_search/load_balancer_test.cc'] + scylla_tests_dependencies
deps['test/vector_search/client_test'] = ['test/vector_search/client_test.cc'] + scylla_tests_dependencies
deps['test/vector_search/filter_test'] = ['test/vector_search/filter_test.cc'] + scylla_tests_dependencies
deps['test/vector_search/rescoring_test'] = ['test/vector_search/rescoring_test.cc'] + scylla_tests_dependencies
boost_tests_prefixes = ["test/boost/", "test/vector_search/", "test/raft/", "test/manual/", "test/ldap/"]
# We need to link these files to all Boost tests to make sure that
# we can execute `--list_json_content` on them. That will produce
# a similar result as calling `--list_content={HRF,DOT}`.
# Unfortunately, to be able to do that, we're forced to link the
# relevant code by hand.
for key in deps.keys():
for prefix in boost_tests_prefixes:
if key.startswith(prefix):
deps[key] += ["test/lib/boost_tree_lister_injector.cc", "test/lib/boost_test_tree_lister.cc"]
wasm_deps = {}
@@ -2092,7 +2001,7 @@ def semicolon_separated(*flags):
def real_relpath(path, start):
return os.path.relpath(os.path.realpath(path), os.path.realpath(start))
def configure_seastar(build_dir, mode, mode_config, compiler_cache=None):
def configure_seastar(build_dir, mode, mode_config):
seastar_cxx_ld_flags = mode_config['cxx_ld_flags']
# We want to "undo" coverage for seastar if we have it enabled.
if args.coverage:
@@ -2139,10 +2048,6 @@ def configure_seastar(build_dir, mode, mode_config, compiler_cache=None):
'-DSeastar_IO_URING=ON',
]
if compiler_cache:
seastar_cmake_args += [f'-DCMAKE_CXX_COMPILER_LAUNCHER={compiler_cache}',
f'-DCMAKE_C_COMPILER_LAUNCHER={compiler_cache}']
if args.stack_guards is not None:
stack_guards = 'ON' if args.stack_guards else 'OFF'
seastar_cmake_args += ['-DSeastar_STACK_GUARDS={}'.format(stack_guards)]
@@ -2174,7 +2079,7 @@ def configure_seastar(build_dir, mode, mode_config, compiler_cache=None):
subprocess.check_call(seastar_cmd, shell=False, cwd=cmake_dir)
def configure_abseil(build_dir, mode, mode_config, compiler_cache=None):
def configure_abseil(build_dir, mode, mode_config):
abseil_cflags = mode_config['lib_cflags']
cxx_flags = mode_config['cxxflags']
if '-DSANITIZE' in cxx_flags:
@@ -2200,10 +2105,6 @@ def configure_abseil(build_dir, mode, mode_config, compiler_cache=None):
'-DABSL_PROPAGATE_CXX_STD=ON',
]
if compiler_cache:
abseil_cmake_args += [f'-DCMAKE_CXX_COMPILER_LAUNCHER={compiler_cache}',
f'-DCMAKE_C_COMPILER_LAUNCHER={compiler_cache}']
cmake_args = abseil_cmake_args[:]
abseil_build_dir = os.path.join(build_dir, mode, 'abseil')
abseil_cmd = ['cmake', '-G', 'Ninja', real_relpath('abseil', abseil_build_dir)] + cmake_args
@@ -2349,6 +2250,15 @@ def get_extra_cxxflags(mode, mode_config, cxx, debuginfo):
if debuginfo and mode_config['can_have_debug_info']:
cxxflags += ['-g', '-gz']
if 'clang' in cxx:
# Since AssignmentTracking was enabled by default in clang
# (llvm/llvm-project@de6da6ad55d3ca945195d1cb109cb8efdf40a52a)
# coroutine frame debugging info (`coro_frame_ty`) is broken.
#
# It seems that we aren't losing much by disabling AssigmentTracking,
# so for now we choose to disable it to get `coro_frame_ty` back.
cxxflags.append('-Xclang -fexperimental-assignment-tracking=disabled')
return cxxflags
@@ -2376,15 +2286,10 @@ def write_build_file(f,
scylla_product,
scylla_version,
scylla_release,
compiler_cache,
args):
use_precompiled_header = not args.disable_precompiled_header
warnings = get_warning_options(args.cxx)
rustc_target = pick_rustc_target('wasm32-wasi', 'wasm32-wasip1')
# If compiler cache is available, prefix the compiler with it
cxx_with_cache = f'{compiler_cache} {args.cxx}' if compiler_cache else args.cxx
# For Rust, sccache is used via RUSTC_WRAPPER environment variable
rustc_wrapper = f'RUSTC_WRAPPER={compiler_cache} ' if compiler_cache and 'sccache' in compiler_cache else ''
f.write(textwrap.dedent('''\
configure_args = {configure_args}
builddir = {outdir}
@@ -2447,7 +2352,7 @@ def write_build_file(f,
command = clang --target=wasm32 --no-standard-libraries -Wl,--export-all -Wl,--no-entry $in -o $out
description = C2WASM $out
rule rust2wasm
command = {rustc_wrapper}cargo build --target={rustc_target} --example=$example --locked --manifest-path=test/resource/wasm/rust/Cargo.toml --target-dir=$builddir/wasm/ $
command = cargo build --target={rustc_target} --example=$example --locked --manifest-path=test/resource/wasm/rust/Cargo.toml --target-dir=$builddir/wasm/ $
&& wasm-opt -Oz $builddir/wasm/{rustc_target}/debug/examples/$example.wasm -o $builddir/wasm/$example.wasm $
&& wasm-strip $builddir/wasm/$example.wasm
description = RUST2WASM $out
@@ -2463,7 +2368,7 @@ def write_build_file(f,
command = llvm-profdata merge $in -output=$out
''').format(configure_args=configure_args,
outdir=outdir,
cxx=cxx_with_cache,
cxx=args.cxx,
user_cflags=user_cflags,
warnings=warnings,
defines=defines,
@@ -2471,7 +2376,6 @@ def write_build_file(f,
user_ldflags=user_ldflags,
libs=libs,
rustc_target=rustc_target,
rustc_wrapper=rustc_wrapper,
link_pool_depth=link_pool_depth,
seastar_path=args.seastar_path,
ninja=ninja,
@@ -2556,15 +2460,16 @@ def write_build_file(f,
description = TEST {mode}
# This rule is unused for PGO stages. They use the rust lib from the parent mode.
rule rust_lib.{mode}
command = CARGO_BUILD_DEP_INFO_BASEDIR='.' {rustc_wrapper}cargo build --locked --manifest-path=rust/Cargo.toml --target-dir=$builddir/{mode} --profile=rust-{mode} $
command = CARGO_BUILD_DEP_INFO_BASEDIR='.' cargo build --locked --manifest-path=rust/Cargo.toml --target-dir=$builddir/{mode} --profile=rust-{mode} $
&& touch $out
description = RUST_LIB $out
''').format(mode=mode, antlr3_exec=args.antlr3_exec, fmt_lib=fmt_lib, test_repeat=args.test_repeat, test_timeout=args.test_timeout, rustc_wrapper=rustc_wrapper, **modeval))
''').format(mode=mode, antlr3_exec=args.antlr3_exec, fmt_lib=fmt_lib, test_repeat=args.test_repeat, test_timeout=args.test_timeout, **modeval))
f.write(
'build {mode}-build: phony {artifacts} {wasms}\n'.format(
'build {mode}-build: phony {artifacts} {wasms} {vector_search_validator_bins}\n'.format(
mode=mode,
artifacts=str.join(' ', ['$builddir/' + mode + '/' + x for x in sorted(build_artifacts - wasms)]),
artifacts=str.join(' ', ['$builddir/' + mode + '/' + x for x in sorted(build_artifacts - wasms - vector_search_validator_bins)]),
wasms = str.join(' ', ['$builddir/' + x for x in sorted(build_artifacts & wasms)]),
vector_search_validator_bins=str.join(' ', ['$builddir/' + x for x in sorted(build_artifacts & vector_search_validator_bins)]),
)
)
if profile_recipe := modes[mode].get('profile_recipe'):
@@ -2594,7 +2499,7 @@ def write_build_file(f,
continue
profile_dep = modes[mode].get('profile_target', "")
if binary in other or binary in wasms:
if binary in other or binary in wasms or binary in vector_search_validator_bins:
continue
srcs = deps[binary]
# 'scylla'
@@ -2622,7 +2527,7 @@ def write_build_file(f,
# In debug/sanitize modes, we compile with fsanitizers,
# so must use the same options during the link:
if '-DSANITIZE' in modes[mode]['cxxflags']:
f.write(' libs = -fsanitize=address -fsanitize=undefined -lubsan\n')
f.write(' libs = -fsanitize=address -fsanitize=undefined\n')
else:
f.write(' libs =\n')
f.write(f'build $builddir/{mode}/{binary}.stripped: strip $builddir/{mode}/{binary}\n')
@@ -2705,10 +2610,11 @@ def write_build_file(f,
)
f.write(
'build {mode}-test: test.{mode} {test_executables} $builddir/{mode}/scylla {wasms}\n'.format(
'build {mode}-test: test.{mode} {test_executables} $builddir/{mode}/scylla {wasms} {vector_search_validator_bins} \n'.format(
mode=mode,
test_executables=' '.join(['$builddir/{}/{}'.format(mode, binary) for binary in sorted(tests)]),
wasms=' '.join([f'$builddir/{binary}' for binary in sorted(wasms)]),
vector_search_validator_bins=' '.join([f'$builddir/{binary}' for binary in sorted(vector_search_validator_bins)]),
)
)
f.write(
@@ -2795,35 +2701,38 @@ def write_build_file(f,
seastar_dep = f'$builddir/{mode}/seastar/libseastar.{seastar_lib_ext}'
seastar_testing_dep = f'$builddir/{mode}/seastar/libseastar_testing.{seastar_lib_ext}'
f.write(f'build {seastar_dep}: ninja $builddir/{mode}/seastar/build.ninja | always {profile_dep}\n')
f.write('build {seastar_dep}: ninja $builddir/{mode}/seastar/build.ninja | always {profile_dep}\n'
.format(**locals()))
f.write(' pool = submodule_pool\n')
f.write(f' subdir = $builddir/{mode}/seastar\n')
f.write(' target = seastar\n')
f.write(f'build {seastar_testing_dep}: ninja $builddir/{mode}/seastar/build.ninja | always {profile_dep}\n')
f.write(' subdir = $builddir/{mode}/seastar\n'.format(**locals()))
f.write(' target = seastar\n'.format(**locals()))
f.write('build {seastar_testing_dep}: ninja $builddir/{mode}/seastar/build.ninja | always {profile_dep}\n'
.format(**locals()))
f.write(' pool = submodule_pool\n')
f.write(f' subdir = $builddir/{mode}/seastar\n')
f.write(' target = seastar_testing\n')
f.write(f' profile_dep = {profile_dep}\n')
f.write(' subdir = $builddir/{mode}/seastar\n'.format(**locals()))
f.write(' target = seastar_testing\n'.format(**locals()))
f.write(' profile_dep = {profile_dep}\n'.format(**locals()))
for lib in abseil_libs:
f.write(f'build $builddir/{mode}/abseil/{lib}: ninja $builddir/{mode}/abseil/build.ninja | always {profile_dep}\n')
f.write(f' pool = submodule_pool\n')
f.write(f' subdir = $builddir/{mode}/abseil\n')
f.write(f' target = {lib}\n')
f.write(f' profile_dep = {profile_dep}\n')
f.write('build $builddir/{mode}/abseil/{lib}: ninja $builddir/{mode}/abseil/build.ninja | always {profile_dep}\n'.format(**locals()))
f.write(' pool = submodule_pool\n')
f.write(' subdir = $builddir/{mode}/abseil\n'.format(**locals()))
f.write(' target = {lib}\n'.format(**locals()))
f.write(' profile_dep = {profile_dep}\n'.format(**locals()))
f.write(f'build $builddir/{mode}/stdafx.hh.pch: cxx_build_precompiled_header.{mode} stdafx.hh | {profile_dep} {seastar_dep} {abseil_dep} {gen_headers_dep} {pch_dep}\n')
f.write(f'build $builddir/{mode}/seastar/apps/iotune/iotune: ninja $builddir/{mode}/seastar/build.ninja | $builddir/{mode}/seastar/libseastar.{seastar_lib_ext}\n')
f.write('build $builddir/{mode}/seastar/apps/iotune/iotune: ninja $builddir/{mode}/seastar/build.ninja | $builddir/{mode}/seastar/libseastar.{seastar_lib_ext}\n'
.format(**locals()))
f.write(' pool = submodule_pool\n')
f.write(f' subdir = $builddir/{mode}/seastar\n')
f.write(' target = iotune\n')
f.write(f' profile_dep = {profile_dep}\n')
f.write(textwrap.dedent(f'''\
f.write(' subdir = $builddir/{mode}/seastar\n'.format(**locals()))
f.write(' target = iotune\n'.format(**locals()))
f.write(' profile_dep = {profile_dep}\n'.format(**locals()))
f.write(textwrap.dedent('''\
build $builddir/{mode}/iotune: copy $builddir/{mode}/seastar/apps/iotune/iotune
build $builddir/{mode}/iotune.stripped: strip $builddir/{mode}/iotune
build $builddir/{mode}/iotune.debug: phony $builddir/{mode}/iotune.stripped
'''))
''').format(**locals()))
if args.dist_only:
include_scylla_and_iotune = ''
include_scylla_and_iotune_stripped = ''
@@ -2832,16 +2741,16 @@ def write_build_file(f,
include_scylla_and_iotune = f'$builddir/{mode}/scylla $builddir/{mode}/iotune $builddir/{mode}/patchelf'
include_scylla_and_iotune_stripped = f'$builddir/{mode}/scylla.stripped $builddir/{mode}/iotune.stripped $builddir/{mode}/patchelf.stripped'
include_scylla_and_iotune_debug = f'$builddir/{mode}/scylla.debug $builddir/{mode}/iotune.debug'
f.write(f'build $builddir/{mode}/dist/tar/{scylla_product}-unstripped-{scylla_version}-{scylla_release}.{arch}.tar.gz: package {include_scylla_and_iotune} $builddir/SCYLLA-RELEASE-FILE $builddir/SCYLLA-VERSION-FILE $builddir/debian/debian $builddir/node_exporter/node_exporter | always\n')
f.write(f' mode = {mode}\n')
f.write(f'build $builddir/{mode}/dist/tar/{scylla_product}-{scylla_version}-{scylla_release}.{arch}.tar.gz: stripped_package {include_scylla_and_iotune_stripped} $builddir/SCYLLA-RELEASE-FILE $builddir/SCYLLA-VERSION-FILE $builddir/debian/debian $builddir/node_exporter/node_exporter.stripped | always\n')
f.write(f' mode = {mode}\n')
f.write(f'build $builddir/{mode}/dist/tar/{scylla_product}-debuginfo-{scylla_version}-{scylla_release}.{arch}.tar.gz: debuginfo_package {include_scylla_and_iotune_debug} $builddir/SCYLLA-RELEASE-FILE $builddir/SCYLLA-VERSION-FILE $builddir/debian/debian $builddir/node_exporter/node_exporter.debug | always\n')
f.write(f' mode = {mode}\n')
f.write(f'build $builddir/{mode}/dist/tar/{scylla_product}-package.tar.gz: copy $builddir/{mode}/dist/tar/{scylla_product}-{scylla_version}-{scylla_release}.{arch}.tar.gz\n')
f.write(f' mode = {mode}\n')
f.write(f'build $builddir/{mode}/dist/tar/{scylla_product}-{arch}-package.tar.gz: copy $builddir/{mode}/dist/tar/{scylla_product}-{scylla_version}-{scylla_release}.{arch}.tar.gz\n')
f.write(f' mode = {mode}\n')
f.write('build $builddir/{mode}/dist/tar/{scylla_product}-unstripped-{scylla_version}-{scylla_release}.{arch}.tar.gz: package {include_scylla_and_iotune} $builddir/SCYLLA-RELEASE-FILE $builddir/SCYLLA-VERSION-FILE $builddir/debian/debian $builddir/node_exporter/node_exporter | always\n'.format(**locals()))
f.write(' mode = {mode}\n'.format(**locals()))
f.write('build $builddir/{mode}/dist/tar/{scylla_product}-{scylla_version}-{scylla_release}.{arch}.tar.gz: stripped_package {include_scylla_and_iotune_stripped} $builddir/SCYLLA-RELEASE-FILE $builddir/SCYLLA-VERSION-FILE $builddir/debian/debian $builddir/node_exporter/node_exporter.stripped | always\n'.format(**locals()))
f.write(' mode = {mode}\n'.format(**locals()))
f.write('build $builddir/{mode}/dist/tar/{scylla_product}-debuginfo-{scylla_version}-{scylla_release}.{arch}.tar.gz: debuginfo_package {include_scylla_and_iotune_debug} $builddir/SCYLLA-RELEASE-FILE $builddir/SCYLLA-VERSION-FILE $builddir/debian/debian $builddir/node_exporter/node_exporter.debug | always\n'.format(**locals()))
f.write(' mode = {mode}\n'.format(**locals()))
f.write('build $builddir/{mode}/dist/tar/{scylla_product}-package.tar.gz: copy $builddir/{mode}/dist/tar/{scylla_product}-{scylla_version}-{scylla_release}.{arch}.tar.gz\n'.format(**locals()))
f.write(' mode = {mode}\n'.format(**locals()))
f.write('build $builddir/{mode}/dist/tar/{scylla_product}-{arch}-package.tar.gz: copy $builddir/{mode}/dist/tar/{scylla_product}-{scylla_version}-{scylla_release}.{arch}.tar.gz\n'.format(**locals()))
f.write(' mode = {mode}\n'.format(**locals()))
f.write(f'build $builddir/dist/{mode}/redhat: rpmbuild $builddir/{mode}/dist/tar/{scylla_product}-unstripped-{scylla_version}-{scylla_release}.{arch}.tar.gz\n')
f.write(f' mode = {mode}\n')
@@ -2876,6 +2785,19 @@ def write_build_file(f,
'build compiler-training: phony {}\n'.format(' '.join(['{mode}-compiler-training'.format(mode=mode) for mode in default_modes]))
)
f.write(textwrap.dedent(f'''\
rule build-vector-search-validator
command = test/vector_search_validator/build-validator $builddir
rule build-vector-store
command = test/vector_search_validator/build-vector-store $builddir
'''))
f.write(
'build $builddir/{vector_search_validator_bin}: build-vector-search-validator {}\n'.format(' '.join([dep for dep in sorted(vector_search_validator_deps)]), vector_search_validator_bin=vector_search_validator_bin)
)
f.write(
'build $builddir/{vector_store_bin}: build-vector-store {}\n'.format(' '.join([dep for dep in sorted(vector_store_deps)]), vector_store_bin=vector_store_bin)
)
f.write(textwrap.dedent(f'''\
build dist-unified-tar: phony {' '.join([f'$builddir/{mode}/dist/tar/{scylla_product}-unified-{scylla_version}-{scylla_release}.{arch}.tar.gz' for mode in default_modes])}
build dist-unified: phony dist-unified-tar
@@ -3001,9 +2923,6 @@ def create_build_system(args):
os.makedirs(outdir, exist_ok=True)
compiler_cache = find_compiler_cache(args.compiler_cache)
resolve_compilers_for_compiler_cache(args, compiler_cache)
scylla_product, scylla_version, scylla_release = generate_version(args.date_stamp)
for mode, mode_config in build_modes.items():
@@ -3020,8 +2939,8 @@ def create_build_system(args):
# {outdir}/{mode}/seastar/build.ninja, and
# {outdir}/{mode}/seastar/seastar.pc is queried for building flags
for mode, mode_config in build_modes.items():
configure_seastar(outdir, mode, mode_config, compiler_cache)
configure_abseil(outdir, mode, mode_config, compiler_cache)
configure_seastar(outdir, mode, mode_config)
configure_abseil(outdir, mode, mode_config)
user_cflags += ' -isystem abseil'
for mode, mode_config in build_modes.items():
@@ -3044,7 +2963,6 @@ def create_build_system(args):
scylla_product,
scylla_version,
scylla_release,
compiler_cache,
args)
generate_compdb('compile_commands.json', ninja, args.buildfile, selected_modes)
@@ -3087,10 +3005,6 @@ def configure_using_cmake(args):
selected_modes = args.selected_modes or default_modes
selected_configs = ';'.join(build_modes[mode].cmake_build_type for mode
in selected_modes)
compiler_cache = find_compiler_cache(args.compiler_cache)
resolve_compilers_for_compiler_cache(args, compiler_cache)
settings = {
'CMAKE_CONFIGURATION_TYPES': selected_configs,
'CMAKE_CROSS_CONFIGS': selected_configs,
@@ -3108,14 +3022,6 @@ def configure_using_cmake(args):
'Scylla_WITH_DEBUG_INFO' : 'ON' if args.debuginfo else 'OFF',
'Scylla_USE_PRECOMPILED_HEADER': 'OFF' if args.disable_precompiled_header else 'ON',
}
if compiler_cache:
settings['CMAKE_CXX_COMPILER_LAUNCHER'] = compiler_cache
settings['CMAKE_C_COMPILER_LAUNCHER'] = compiler_cache
# For Rust, sccache is used via RUSTC_WRAPPER
if 'sccache' in compiler_cache:
settings['Scylla_RUSTC_WRAPPER'] = compiler_cache
if args.date_stamp:
settings['Scylla_DATE_STAMP'] = args.date_stamp
if args.staticboost:
@@ -3147,7 +3053,7 @@ def configure_using_cmake(args):
if not args.dist_only:
for mode in selected_modes:
configure_seastar(build_dir, build_modes[mode].cmake_build_type, modes[mode], compiler_cache)
configure_seastar(build_dir, build_modes[mode].cmake_build_type, modes[mode])
cmake_command = ['cmake']
cmake_command += [f'-D{var}={value}' for var, value in settings.items()]

View File

@@ -47,10 +47,6 @@ target_sources(cql3
functions/aggregate_fcts.cc
functions/castas_fcts.cc
functions/error_injection_fcts.cc
statements/strong_consistency/select_statement.cc
statements/strong_consistency/modification_statement.cc
statements/strong_consistency/statement_helpers.cc
functions/vector_similarity_fcts.cc
statements/cf_prop_defs.cc
statements/cf_statement.cc
statements/authentication_statement.cc
@@ -75,8 +71,8 @@ target_sources(cql3
statements/raw/parsed_statement.cc
statements/property_definitions.cc
statements/update_statement.cc
statements/broadcast_modification_statement.cc
statements/broadcast_select_statement.cc
statements/strongly_consistent_modification_statement.cc
statements/strongly_consistent_select_statement.cc
statements/delete_statement.cc
statements/prune_materialized_view_statement.cc
statements/batch_statement.cc
@@ -108,7 +104,6 @@ target_sources(cql3
statements/list_service_level_attachments_statement.cc
statements/list_effective_service_level_statement.cc
statements/describe_statement.cc
statements/view_prop_defs.cc
update_parameters.cc
util.cc
ut_name.cc

View File

@@ -431,7 +431,6 @@ unaliasedSelector returns [uexpression tmp]
| K_TTL '(' c=cident ')' { tmp = column_mutation_attribute{column_mutation_attribute::attribute_kind::ttl,
unresolved_identifier{std::move(c)}}; }
| f=functionName args=selectionFunctionArgs { tmp = function_call{std::move(f), std::move(args)}; }
| f=similarityFunctionName args=vectorSimilarityArgs { tmp = function_call{std::move(f), std::move(args)}; }
| K_CAST '(' arg=unaliasedSelector K_AS t=native_type ')' { tmp = cast{.style = cast::cast_style::sql, .arg = std::move(arg), .type = std::move(t)}; }
)
( '.' fi=cident { tmp = field_selection{std::move(tmp), std::move(fi)}; }
@@ -446,18 +445,6 @@ selectionFunctionArgs returns [std::vector<expression> a]
')'
;
vectorSimilarityArgs returns [std::vector<expression> a]
: '(' ')'
| '(' v1=vectorSimilarityArg { a.push_back(std::move(v1)); }
( ',' vn=vectorSimilarityArg { a.push_back(std::move(vn)); } )*
')'
;
vectorSimilarityArg returns [uexpression a]
: s=unaliasedSelector { a = std::move(s); }
| v=value { a = std::move(v); }
;
countArgument
: '*'
| i=INTEGER { if (i->getText() != "1") {
@@ -898,10 +885,6 @@ pkDef[cql3::statements::create_table_statement::raw_statement& expr]
| '(' k1=ident { l.push_back(k1); } ( ',' kn=ident { l.push_back(kn); } )* ')' { $expr.add_key_aliases(l); }
;
cfamProperties[cql3::statements::cf_properties& expr]
: cfamProperty[expr] (K_AND cfamProperty[expr])*
;
cfamProperty[cql3::statements::cf_properties& expr]
: property[*$expr.properties()]
| K_COMPACT K_STORAGE { $expr.set_compact_storage(); }
@@ -939,22 +922,16 @@ typeColumns[create_type_statement& expr]
*/
createIndexStatement returns [std::unique_ptr<create_index_statement> expr]
@init {
auto idx_props = make_shared<index_specific_prop_defs>();
auto props = index_prop_defs();
auto props = make_shared<index_prop_defs>();
bool if_not_exists = false;
auto name = ::make_shared<cql3::index_name>();
std::vector<::shared_ptr<index_target::raw>> targets;
}
: K_CREATE (K_CUSTOM { idx_props->is_custom = true; })? K_INDEX (K_IF K_NOT K_EXISTS { if_not_exists = true; } )?
: K_CREATE (K_CUSTOM { props->is_custom = true; })? K_INDEX (K_IF K_NOT K_EXISTS { if_not_exists = true; } )?
(idxName[*name])? K_ON cf=columnFamilyName '(' (target1=indexIdent { targets.emplace_back(target1); } (',' target2=indexIdent { targets.emplace_back(target2); } )*)? ')'
(K_USING cls=STRING_LITERAL { idx_props->custom_class = sstring{$cls.text}; })?
(K_WITH cfamProperties[props])?
{
props.extract_index_specific_properties_to(*idx_props);
view_prop_defs view_props = std::move(props).into_view_prop_defs();
$expr = std::make_unique<create_index_statement>(cf, name, targets, std::move(idx_props), std::move(view_props), if_not_exists);
}
(K_USING cls=STRING_LITERAL { props->custom_class = sstring{$cls.text}; })?
(K_WITH properties[*props])?
{ $expr = std::make_unique<create_index_statement>(cf, name, targets, props, if_not_exists); }
;
indexIdent returns [::shared_ptr<index_target::raw> id]
@@ -1102,9 +1079,9 @@ alterTypeStatement returns [std::unique_ptr<alter_type_statement> expr]
*/
alterViewStatement returns [std::unique_ptr<alter_view_statement> expr]
@init {
auto props = cql3::statements::view_prop_defs();
auto props = cql3::statements::cf_prop_defs();
}
: K_ALTER K_MATERIALIZED K_VIEW cf=columnFamilyName K_WITH properties[*props.properties()]
: K_ALTER K_MATERIALIZED K_VIEW cf=columnFamilyName K_WITH properties[props]
{
$expr = std::make_unique<alter_view_statement>(std::move(cf), std::move(props));
}
@@ -1706,10 +1683,6 @@ functionName returns [cql3::functions::function_name s]
: (ks=keyspaceName '.')? f=allowedFunctionName { $s.keyspace = std::move(ks); $s.name = std::move(f); }
;
similarityFunctionName returns [cql3::functions::function_name s]
: f=allowedSimilarityFunctionName { $s = cql3::functions::function_name::native_function(std::move(f)); }
;
allowedFunctionName returns [sstring s]
: f=IDENT { $s = $f.text; std::transform(s.begin(), s.end(), s.begin(), ::tolower); }
| f=QUOTED_NAME { $s = $f.text; }
@@ -1718,11 +1691,6 @@ allowedFunctionName returns [sstring s]
| K_COUNT { $s = "count"; }
;
allowedSimilarityFunctionName returns [sstring s]
: f=(K_SIMILARITY_COSINE | K_SIMILARITY_EUCLIDEAN | K_SIMILARITY_DOT_PRODUCT)
{ $s = $f.text; std::transform(s.begin(), s.end(), s.begin(), ::tolower); }
;
functionArgs returns [std::vector<expression> a]
: '(' ')'
| '(' t1=term { a.push_back(std::move(t1)); }
@@ -2419,10 +2387,6 @@ K_MUTATION_FRAGMENTS: M U T A T I O N '_' F R A G M E N T S;
K_VECTOR_SEARCH_INDEXING: V E C T O R '_' S E A R C H '_' I N D E X I N G;
K_SIMILARITY_EUCLIDEAN: S I M I L A R I T Y '_' E U C L I D E A N;
K_SIMILARITY_COSINE: S I M I L A R I T Y '_' C O S I N E;
K_SIMILARITY_DOT_PRODUCT: S I M I L A R I T Y '_' D O T '_' P R O D U C T;
// Case-insensitive alpha characters
fragment A: ('a'|'A');
fragment B: ('b'|'B');

View File

@@ -25,11 +25,6 @@ public:
NOT_ASSIGNABLE,
};
struct vector_test_result {
test_result result;
std::optional<size_t> dimension_opt;
};
static bool is_assignable(test_result tr) {
return tr != test_result::NOT_ASSIGNABLE;
}
@@ -49,8 +44,6 @@ public:
*/
virtual test_result test_assignment(data_dictionary::database db, const sstring& keyspace, const schema* schema_opt, const column_specification& receiver) const = 0;
virtual vector_test_result test_assignment_any_size_float_vector() const = 0;
virtual std::optional<data_type> assignment_testable_type_opt() const = 0;
// for error reporting

View File

@@ -10,7 +10,6 @@
#include "utils/assert.hh"
#include "cql3/column_specification.hh"
#include "cql3/column_identifier.hh"
namespace cql3 {
@@ -32,12 +31,4 @@ bool column_specification::all_in_same_table(const std::vector<lw_shared_ptr<col
});
}
lw_shared_ptr<column_specification> make_column_spec(std::string_view ks_name, std::string_view cf_name, sstring name, data_type type) {
return make_lw_shared<column_specification>(
ks_name,
cf_name,
::make_shared<column_identifier>(std::move(name), true),
std::move(type));
}
}

View File

@@ -42,6 +42,4 @@ public:
static bool all_in_same_table(const std::vector<lw_shared_ptr<column_specification>>& names);
};
lw_shared_ptr<column_specification> make_column_spec(std::string_view ks_name, std::string_view cf_name, sstring name, data_type type);
}

View File

@@ -1434,112 +1434,6 @@ test_assignment(const expression& expr, data_dictionary::database db, const sstr
}, expr);
}
template <cql3_type::kind... Kinds>
assignment_testable::vector_test_result
test_assignment_any_size_float_vector(const expression& expr) {
using test_result = assignment_testable::vector_test_result;
const test_result NOT_ASSIGNABLE = {assignment_testable::test_result::NOT_ASSIGNABLE, std::nullopt};
const test_result WEAKLY_ASSIGNABLE = {assignment_testable::test_result::WEAKLY_ASSIGNABLE, std::nullopt};
auto is_float_or_bind = [] (const expression& e) {
return expr::visit(overloaded_functor{
[] (const bind_variable&) {
return true;
},
[] (const untyped_constant& uc) {
return uc.partial_type == untyped_constant::type_class::floating_point
|| uc.partial_type == untyped_constant::type_class::integer;
},
[] (const constant& value) {
auto kind = value.type->as_cql3_type().get_kind();
return cql3_type::kind_enum_set::frozen<Kinds...>().contains(kind);
},
[] (const auto&) {
return false;
},
}, e);
};
auto validate_assignment = [&] (const data_type& dt) -> test_result {
auto vt = dynamic_pointer_cast<const vector_type_impl>(dt->underlying_type());
if (!vt) {
return NOT_ASSIGNABLE;
}
auto elem_kind = vt->get_elements_type()->as_cql3_type().get_kind();
if (cql3_type::kind_enum_set::frozen<Kinds...>().contains(elem_kind)) {
return {assignment_testable::test_result::WEAKLY_ASSIGNABLE, vt->get_dimension()};
}
return NOT_ASSIGNABLE;
};
return expr::visit(overloaded_functor{
[&] (const constant& value) -> test_result {
return validate_assignment(value.type);
},
[&] (const binary_operator&) -> test_result {
return NOT_ASSIGNABLE;
},
[&] (const conjunction&) -> test_result {
return NOT_ASSIGNABLE;
},
[&] (const column_value& col_val) -> test_result {
return validate_assignment(col_val.col->type);
},
[&] (const subscript&) -> test_result {
return NOT_ASSIGNABLE;
},
[&] (const unresolved_identifier& ui) -> test_result {
return NOT_ASSIGNABLE;
},
[&] (const column_mutation_attribute& cma) -> test_result {
return NOT_ASSIGNABLE;
},
[&] (const function_call& fc) -> test_result {
return NOT_ASSIGNABLE;
},
[&] (const cast& c) -> test_result {
return NOT_ASSIGNABLE;
},
[&] (const field_selection& fs) -> test_result {
return NOT_ASSIGNABLE;
},
[&] (const bind_variable& bv) -> test_result {
return WEAKLY_ASSIGNABLE;
},
[&] (const untyped_constant& uc) -> test_result {
return uc.partial_type == untyped_constant::type_class::null
? WEAKLY_ASSIGNABLE
: NOT_ASSIGNABLE;
},
[&] (const tuple_constructor& tc) -> test_result {
return NOT_ASSIGNABLE;
},
[&] (const collection_constructor& c) -> test_result {
switch (c.style) {
case collection_constructor::style_type::list_or_vector: {
if(std::ranges::all_of(c.elements, is_float_or_bind)) {
return {assignment_testable::test_result::WEAKLY_ASSIGNABLE, c.elements.size()};
}
return NOT_ASSIGNABLE;
}
case collection_constructor::style_type::set: return NOT_ASSIGNABLE;
case collection_constructor::style_type::map: return NOT_ASSIGNABLE;
case collection_constructor::style_type::vector:
on_internal_error(expr_logger, "vector style type found in test_assignment, should have been introduced post-prepare");
}
on_internal_error(expr_logger, fmt::format("unexpected collection_constructor style {}", static_cast<unsigned>(c.style)));
},
[&] (const usertype_constructor& uc) -> test_result {
return NOT_ASSIGNABLE;
},
[&] (const temporary& t) -> test_result {
return NOT_ASSIGNABLE;
},
}, expr);
}
assignment_testable::vector_test_result
test_assignment_any_size_float_vector(const expression& expr) {
return test_assignment_any_size_float_vector<cql3_type::kind::FLOAT, cql3_type::kind::DOUBLE>(expr);
}
expression
prepare_expression(const expression& expr, data_dictionary::database db, const sstring& keyspace, const schema* schema_opt, lw_shared_ptr<column_specification> receiver) {
auto e_opt = try_prepare_expression(expr, db, keyspace, schema_opt, std::move(receiver));
@@ -1573,9 +1467,6 @@ public:
virtual test_result test_assignment(data_dictionary::database db, const sstring& keyspace, const schema* schema_opt, const column_specification& receiver) const override {
return expr::test_assignment(_e, db, keyspace, schema_opt, receiver);
}
virtual vector_test_result test_assignment_any_size_float_vector() const override {
return expr::test_assignment_any_size_float_vector(_e);
}
virtual sstring assignment_testable_source_context() const override {
return fmt::format("{}", _e);
}

View File

@@ -16,7 +16,6 @@
#include "cql3/functions/user_function.hh"
#include "cql3/functions/user_aggregate.hh"
#include "cql3/functions/uuid_fcts.hh"
#include "cql3/functions/vector_similarity_fcts.hh"
#include "data_dictionary/data_dictionary.hh"
#include "as_json_function.hh"
#include "cql3/prepare_context.hh"
@@ -399,14 +398,6 @@ functions::get(data_dictionary::database db,
}
});
const auto func_name = name.has_keyspace() ? name : name.as_native_function();
if (SIMILARITY_FUNCTIONS.contains(func_name)) {
auto arg_types = retrieve_vector_arg_types(func_name, provided_args);
auto fun = ::make_shared<vector_similarity_fct>(func_name.name, arg_types);
validate_types(db, keyspace, schema.get(), fun, provided_args, receiver_ks, receiver_cf);
return fun;
}
if (name.has_keyspace()
? name == TOKEN_FUNCTION_NAME
: name.name == TOKEN_FUNCTION_NAME.name) {

View File

@@ -1,184 +0,0 @@
/*
* Copyright (C) 2025-present ScyllaDB
*/
/*
* SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
*/
#include "vector_similarity_fcts.hh"
#include "types/types.hh"
#include "types/vector.hh"
#include "exceptions/exceptions.hh"
#include <span>
#include <bit>
namespace cql3 {
namespace functions {
namespace detail {
std::vector<float> extract_float_vector(const bytes_opt& param, size_t dimension) {
if (!param) {
throw exceptions::invalid_request_exception("Cannot extract float vector from null parameter");
}
const size_t expected_size = dimension * sizeof(float);
if (param->size() != expected_size) {
throw exceptions::invalid_request_exception(
fmt::format("Invalid vector size: expected {} bytes for {} floats, got {} bytes",
expected_size, dimension, param->size()));
}
std::vector<float> result;
result.reserve(dimension);
bytes_view view(*param);
for (size_t i = 0; i < dimension; ++i) {
// read_simple handles network byte order (big-endian) conversion
uint32_t raw = read_simple<uint32_t>(view);
result.push_back(std::bit_cast<float>(raw));
}
return result;
}
} // namespace detail
namespace {
// The computations of similarity scores match the exact formulas of Cassandra's (jVector's) implementation to ensure compatibility.
// There exist tests checking the compliance of the results.
// Reference:
// https://github.com/datastax/jvector/blob/f967f1c9249035b63b55a566fac7d4dc38380349/jvector-base/src/main/java/io/github/jbellis/jvector/vector/VectorSimilarityFunction.java#L36-L69
// You should only use this function if you need to preserve the original vectors and cannot normalize
// them in advance.
float compute_cosine_similarity(std::span<const float> v1, std::span<const float> v2) {
double dot_product = 0.0;
double squared_norm_a = 0.0;
double squared_norm_b = 0.0;
for (size_t i = 0; i < v1.size(); ++i) {
double a = v1[i];
double b = v2[i];
dot_product += a * b;
squared_norm_a += a * a;
squared_norm_b += b * b;
}
if (squared_norm_a == 0 || squared_norm_b == 0) {
return std::numeric_limits<float>::quiet_NaN();
}
// The cosine similarity is in the range [-1, 1].
// It is mapped to a similarity score in the range [0, 1] (-1 -> 0, 1 -> 1)
// for consistency with other similarity functions.
return (1 + (dot_product / (std::sqrt(squared_norm_a * squared_norm_b)))) / 2;
}
float compute_euclidean_similarity(std::span<const float> v1, std::span<const float> v2) {
double sum = 0.0;
for (size_t i = 0; i < v1.size(); ++i) {
double a = v1[i];
double b = v2[i];
double diff = a - b;
sum += diff * diff;
}
// The squared Euclidean (L2) distance is of range [0, inf).
// It is mapped to a similarity score in the range (0, 1] (0 -> 1, inf -> 0)
// for consistency with other similarity functions.
return (1 / (1 + sum));
}
// Assumes that both vectors are L2-normalized.
// This similarity is intended as an optimized way to perform cosine similarity calculation.
float compute_dot_product_similarity(std::span<const float> v1, std::span<const float> v2) {
double dot_product = 0.0;
for (size_t i = 0; i < v1.size(); ++i) {
double a = v1[i];
double b = v2[i];
dot_product += a * b;
}
// The dot product is in the range [-1, 1] for L2-normalized vectors.
// It is mapped to a similarity score in the range [0, 1] (-1 -> 0, 1 -> 1)
// for consistency with other similarity functions.
return ((1 + dot_product) / 2);
}
} // namespace
thread_local const std::unordered_map<function_name, similarity_function_t> SIMILARITY_FUNCTIONS = {
{SIMILARITY_COSINE_FUNCTION_NAME, compute_cosine_similarity},
{SIMILARITY_EUCLIDEAN_FUNCTION_NAME, compute_euclidean_similarity},
{SIMILARITY_DOT_PRODUCT_FUNCTION_NAME, compute_dot_product_similarity},
};
std::vector<data_type> retrieve_vector_arg_types(const function_name& name, const std::vector<shared_ptr<assignment_testable>>& provided_args) {
if (provided_args.size() != 2) {
throw exceptions::invalid_request_exception(fmt::format("Invalid number of arguments for function {}(vector<float, n>, vector<float, n>)", name));
}
auto [first_result, first_dim_opt] = provided_args[0]->test_assignment_any_size_float_vector();
auto [second_result, second_dim_opt] = provided_args[1]->test_assignment_any_size_float_vector();
auto invalid_type_error_message = [&name](const shared_ptr<assignment_testable>& arg) {
auto type = arg->assignment_testable_type_opt();
const auto& source_context = arg->assignment_testable_source_context();
if (type) {
return fmt::format("Function {} requires a float vector argument, but found {} of type {}", name, source_context, type.value()->cql3_type_name());
} else {
return fmt::format("Function {} requires a float vector argument, but found {}", name, source_context);
}
};
if (!is_assignable(first_result)) {
throw exceptions::invalid_request_exception(invalid_type_error_message(provided_args[0]));
}
if (!is_assignable(second_result)) {
throw exceptions::invalid_request_exception(invalid_type_error_message(provided_args[1]));
}
if (!first_dim_opt && !second_dim_opt) {
throw exceptions::invalid_request_exception(fmt::format("Cannot infer type of argument {} for function {}(vector<float, n>, vector<float, n>)",
provided_args[0]->assignment_testable_source_context(), name));
}
if (first_dim_opt && second_dim_opt) {
if (*first_dim_opt != *second_dim_opt) {
throw exceptions::invalid_request_exception(fmt::format(
"All arguments must have the same vector dimensions, but found vector<float, {}> and vector<float, {}>", *first_dim_opt, *second_dim_opt));
}
}
size_t dimension = first_dim_opt ? *first_dim_opt : *second_dim_opt;
auto type = vector_type_impl::get_instance(float_type, dimension);
return {type, type};
}
bytes_opt vector_similarity_fct::execute(std::span<const bytes_opt> parameters) {
if (std::any_of(parameters.begin(), parameters.end(), [](const auto& param) {
return !param;
})) {
return std::nullopt;
}
// Extract dimension from the vector type
const auto& type = static_cast<const vector_type_impl&>(*arg_types()[0]);
size_t dimension = type.get_dimension();
// Optimized path: extract floats directly from bytes, bypassing data_value overhead
std::vector<float> v1 = detail::extract_float_vector(parameters[0], dimension);
std::vector<float> v2 = detail::extract_float_vector(parameters[1], dimension);
float result = SIMILARITY_FUNCTIONS.at(_name)(v1, v2);
return float_type->decompose(result);
}
} // namespace functions
} // namespace cql3

View File

@@ -1,47 +0,0 @@
/*
* Copyright (C) 2025-present ScyllaDB
*/
/*
* SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
*/
#pragma once
#include "native_scalar_function.hh"
#include "cql3/assignment_testable.hh"
#include "cql3/functions/function_name.hh"
#include <span>
namespace cql3 {
namespace functions {
static const function_name SIMILARITY_COSINE_FUNCTION_NAME = function_name::native_function("similarity_cosine");
static const function_name SIMILARITY_EUCLIDEAN_FUNCTION_NAME = function_name::native_function("similarity_euclidean");
static const function_name SIMILARITY_DOT_PRODUCT_FUNCTION_NAME = function_name::native_function("similarity_dot_product");
using similarity_function_t = float (*)(std::span<const float>, std::span<const float>);
extern thread_local const std::unordered_map<function_name, similarity_function_t> SIMILARITY_FUNCTIONS;
std::vector<data_type> retrieve_vector_arg_types(const function_name& name, const std::vector<shared_ptr<assignment_testable>>& provided_args);
class vector_similarity_fct : public native_scalar_function {
public:
vector_similarity_fct(const sstring& name, const std::vector<data_type>& arg_types)
: native_scalar_function(name, float_type, arg_types) {
}
virtual bytes_opt execute(std::span<const bytes_opt> parameters) override;
};
namespace detail {
// Extract float vector directly from serialized bytes, bypassing data_value overhead.
// This is an internal API exposed for testing purposes.
// Vector<float, N> wire format: N floats as big-endian uint32_t values, 4 bytes each.
std::vector<float> extract_float_vector(const bytes_opt& param, size_t dimension);
} // namespace detail
} // namespace functions
} // namespace cql3

View File

@@ -105,7 +105,6 @@ public:
static const std::chrono::minutes entry_expiry;
using key_type = prepared_cache_key_type;
using pinned_value_type = cache_value_ptr;
using value_type = checked_weak_ptr;
using statement_is_too_big = typename cache_type::entry_is_too_big;
@@ -117,14 +116,9 @@ public:
: _cache(size, entry_expiry, logger)
{}
template <typename LoadFunc>
future<pinned_value_type> get_pinned(const key_type& key, LoadFunc&& load) {
return _cache.get_ptr(key.key(), [load = std::forward<LoadFunc>(load)] (const cache_key_type&) { return load(); });
}
template <typename LoadFunc>
future<value_type> get(const key_type& key, LoadFunc&& load) {
return get_pinned(key, std::forward<LoadFunc>(load)).then([] (cache_value_ptr v_ptr) {
return _cache.get_ptr(key.key(), [load = std::forward<LoadFunc>(load)] (const cache_key_type&) { return load(); }).then([] (cache_value_ptr v_ptr) {
return make_ready_future<value_type>((*v_ptr)->checked_weak_from_this());
});
}

View File

@@ -14,7 +14,6 @@
#include <seastar/core/shared_ptr.hh>
#include <seastar/coroutine/parallel_for_each.hh>
#include <seastar/coroutine/as_future.hh>
#include <seastar/coroutine/try_future.hh>
#include "service/storage_proxy.hh"
#include "service/migration_manager.hh"
@@ -48,10 +47,8 @@ const std::chrono::minutes prepared_statements_cache::entry_expiry = std::chrono
struct query_processor::remote {
remote(service::migration_manager& mm, service::mapreduce_service& fwd,
service::storage_service& ss, service::raft_group0_client& group0_client,
service::strong_consistency::coordinator& _sc_coordinator)
service::storage_service& ss, service::raft_group0_client& group0_client)
: mm(mm), mapreducer(fwd), ss(ss), group0_client(group0_client)
, sc_coordinator(_sc_coordinator)
, gate("query_processor::remote")
{}
@@ -59,7 +56,6 @@ struct query_processor::remote {
service::mapreduce_service& mapreducer;
service::storage_service& ss;
service::raft_group0_client& group0_client;
service::strong_consistency::coordinator& sc_coordinator;
seastar::named_gate gate;
};
@@ -68,10 +64,6 @@ bool query_processor::topology_global_queue_empty() {
return remote().first.get().ss.topology_global_queue_empty();
}
future<bool> query_processor::ongoing_rf_change(const service::group0_guard& guard, sstring ks) {
return remote().first.get().ss.ongoing_rf_change(guard, std::move(ks));
}
static service::query_state query_state_for_internal_call() {
return {service::client_state::for_internal_calls(), empty_service_permit()};
}
@@ -517,16 +509,9 @@ query_processor::~query_processor() {
}
}
std::pair<std::reference_wrapper<service::strong_consistency::coordinator>, gate::holder>
query_processor::acquire_strongly_consistent_coordinator() {
auto [remote_, holder] = remote();
return {remote_.get().sc_coordinator, std::move(holder)};
}
void query_processor::start_remote(service::migration_manager& mm, service::mapreduce_service& mapreducer,
service::storage_service& ss, service::raft_group0_client& group0_client,
service::strong_consistency::coordinator& sc_coordinator) {
_remote = std::make_unique<struct remote>(mm, mapreducer, ss, group0_client, sc_coordinator);
service::storage_service& ss, service::raft_group0_client& group0_client) {
_remote = std::make_unique<struct remote>(mm, mapreducer, ss, group0_client);
}
future<> query_processor::stop_remote() {
@@ -697,7 +682,7 @@ future<::shared_ptr<cql_transport::messages::result_message::prepared>>
query_processor::prepare(sstring query_string, const service::client_state& client_state, cql3::dialect d) {
try {
auto key = compute_id(query_string, client_state.get_raw_keyspace(), d);
auto prep_entry = co_await _prepared_cache.get_pinned(key, [this, &query_string, &client_state, d] {
auto prep_ptr = co_await _prepared_cache.get(key, [this, &query_string, &client_state, d] {
auto prepared = get_statement(query_string, client_state, d);
prepared->calculate_metadata_id();
auto bound_terms = prepared->statement->get_bound_terms();
@@ -711,13 +696,13 @@ query_processor::prepare(sstring query_string, const service::client_state& clie
return make_ready_future<std::unique_ptr<statements::prepared_statement>>(std::move(prepared));
});
co_await utils::get_local_injector().inject(
"query_processor_prepare_wait_after_cache_get",
utils::wait_for_message(std::chrono::seconds(60)));
auto msg = ::make_shared<result_message::prepared::cql>(prepared_cache_key_type::cql_id(key), std::move(prep_entry),
const auto& warnings = prep_ptr->warnings;
const auto msg = ::make_shared<result_message::prepared::cql>(prepared_cache_key_type::cql_id(key), std::move(prep_ptr),
client_state.is_protocol_extension_set(cql_transport::cql_protocol_extension::LWT_ADD_METADATA_MARK));
co_return std::move(msg);
for (const auto& w : warnings) {
msg->add_warning(w);
}
co_return ::shared_ptr<cql_transport::messages::result_message::prepared>(std::move(msg));
} catch(typename prepared_statements_cache::statement_is_too_big&) {
throw prepared_statement_is_too_big(query_string);
}
@@ -870,7 +855,6 @@ struct internal_query_state {
sstring query_string;
std::unique_ptr<query_options> opts;
statements::prepared_statement::checked_weak_ptr p;
std::optional<service::query_state> qs;
bool more_results = true;
};
@@ -878,14 +862,10 @@ internal_query_state query_processor::create_paged_state(
const sstring& query_string,
db::consistency_level cl,
const data_value_list& values,
int32_t page_size,
std::optional<service::query_state> qs) {
int32_t page_size) {
auto p = prepare_internal(query_string);
auto opts = make_internal_options(p, values, cl, page_size);
if (!qs) {
qs.emplace(query_state_for_internal_call());
}
return internal_query_state{query_string, std::make_unique<cql3::query_options>(std::move(opts)), std::move(p), std::move(qs), true};
return internal_query_state{query_string, std::make_unique<cql3::query_options>(std::move(opts)), std::move(p), true};
}
bool query_processor::has_more_results(cql3::internal_query_state& state) const {
@@ -908,8 +888,9 @@ future<> query_processor::for_each_cql_result(
future<::shared_ptr<untyped_result_set>>
query_processor::execute_paged_internal(internal_query_state& state) {
state.p->statement->validate(*this, service::client_state::for_internal_calls());
auto qs = query_state_for_internal_call();
::shared_ptr<cql_transport::messages::result_message> msg =
co_await state.p->statement->execute(*this, *state.qs, *state.opts, std::nullopt);
co_await state.p->statement->execute(*this, qs, *state.opts, std::nullopt);
class visitor : public result_message::visitor_base {
internal_query_state& _state;
@@ -1008,7 +989,7 @@ query_processor::execute_with_params(
auto opts = make_internal_options(p, values, cl);
auto statement = p->statement;
auto msg = co_await coroutine::try_future(execute_maybe_with_guard(query_state, std::move(statement), opts, &query_processor::do_execute_with_params));
auto msg = co_await execute_maybe_with_guard(query_state, std::move(statement), opts, &query_processor::do_execute_with_params);
co_return ::make_shared<untyped_result_set>(msg);
}
@@ -1018,7 +999,7 @@ query_processor::do_execute_with_params(
shared_ptr<cql_statement> statement,
const query_options& options, std::optional<service::group0_guard> guard) {
statement->validate(*this, service::client_state::for_internal_calls());
co_return co_await coroutine::try_future(statement->execute(*this, query_state, options, std::move(guard)));
co_return co_await statement->execute(*this, query_state, options, std::move(guard));
}
@@ -1029,11 +1010,6 @@ query_processor::execute_batch_without_checking_exception_message(
query_options& options,
std::unordered_map<prepared_cache_key_type, authorized_prepared_statements_cache::value_type> pending_authorization_entries) {
auto access_future = co_await coroutine::as_future(batch->check_access(*this, query_state.get_client_state()));
bool failed = access_future.failed();
co_await audit::inspect(batch, query_state, options, failed);
if (failed) {
std::rethrow_exception(access_future.get_exception());
}
co_await coroutine::parallel_for_each(pending_authorization_entries, [this, &query_state] (auto& e) -> future<> {
try {
co_await _authorized_prepared_cache.insert(*query_state.get_client_state().user(), e.first, std::move(e.second));
@@ -1041,6 +1017,11 @@ query_processor::execute_batch_without_checking_exception_message(
log.error("failed to cache the entry: {}", std::current_exception());
}
});
bool failed = access_future.failed();
co_await audit::inspect(batch, query_state, options, failed);
if (access_future.failed()) {
std::rethrow_exception(access_future.get_exception());
}
batch->validate();
batch->validate(*this, query_state.get_client_state());
_stats.queries_by_cl[size_t(options.get_consistency())] += batch->get_statements().size();
@@ -1216,9 +1197,8 @@ future<> query_processor::query_internal(
db::consistency_level cl,
const data_value_list& values,
int32_t page_size,
noncopyable_function<future<stop_iteration>(const cql3::untyped_result_set_row&)> f,
std::optional<service::query_state> qs) {
auto query_state = create_paged_state(query_string, cl, values, page_size, std::move(qs));
noncopyable_function<future<stop_iteration>(const cql3::untyped_result_set_row&)> f) {
auto query_state = create_paged_state(query_string, cl, values, page_size);
co_return co_await for_each_cql_result(query_state, std::move(f));
}

Some files were not shown because too many files have changed in this diff Show More