mirror of
https://github.com/scylladb/scylladb.git
synced 2026-04-21 09:00:35 +00:00
Compare commits
3 Commits
next
...
per_sl_cou
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
d193740ee5 | ||
|
|
70e114922f | ||
|
|
0c143afea9 |
8
.github/CODEOWNERS
vendored
8
.github/CODEOWNERS
vendored
@@ -32,8 +32,8 @@ counters* @nuivall
|
||||
tests/counter_test* @nuivall
|
||||
|
||||
# DOCS
|
||||
/docs/ @annastuchlik @tzach
|
||||
/docs/alternator/ @annastuchlik @tzach @nyh
|
||||
docs/* @annastuchlik @tzach
|
||||
docs/alternator @annastuchlik @tzach @nyh
|
||||
|
||||
# GOSSIP
|
||||
gms/* @tgrabiec @asias @kbr-scylla
|
||||
@@ -92,10 +92,6 @@ test/boost/querier_cache_test.cc @denesb
|
||||
# PYTEST-BASED CQL TESTS
|
||||
test/cqlpy/* @nyh
|
||||
|
||||
# TEST FRAMEWORK
|
||||
test/pylib/* @xtrey
|
||||
test.py @xtrey
|
||||
|
||||
# RAFT
|
||||
raft/* @kbr-scylla @gleb-cloudius @kostja
|
||||
test/raft/* @kbr-scylla @gleb-cloudius @kostja
|
||||
|
||||
33
.github/copilot-instructions.md
vendored
33
.github/copilot-instructions.md
vendored
@@ -5,14 +5,13 @@ High-performance distributed NoSQL database. Core values: performance, correctne
|
||||
|
||||
## Build System
|
||||
|
||||
### Using native OS environment
|
||||
### Modern Build (configure.py + ninja)
|
||||
```bash
|
||||
# Configure (run once)
|
||||
./configure.py
|
||||
# Configure (run once per mode, or when switching modes)
|
||||
./configure.py --mode=<mode> # mode: dev, debug, release, sanitize
|
||||
|
||||
# Build everything
|
||||
ninja <mode>-build # modes: dev, debug, release, sanitize
|
||||
# dev is recommended for development (fastest compilation)
|
||||
ninja <mode>-build # e.g., ninja dev-build
|
||||
|
||||
# Build Scylla binary only (sufficient for Python integration tests)
|
||||
ninja build/<mode>/scylla
|
||||
@@ -21,9 +20,6 @@ ninja build/<mode>/scylla
|
||||
ninja build/<mode>/test/boost/<test_name>
|
||||
```
|
||||
|
||||
### Using frozen toolchain (Docker)
|
||||
Prefix any build command with `./tools/toolchain/dbuild`.
|
||||
|
||||
## Running Tests
|
||||
|
||||
### C++ Unit Tests
|
||||
@@ -40,9 +36,9 @@ Prefix any build command with `./tools/toolchain/dbuild`.
|
||||
```
|
||||
|
||||
**Important:**
|
||||
- Use full path with `.cc` extension (e.g., `test/boost/memtable_test.cc`)
|
||||
- Use full path with `.cc` extension (e.g., `test/boost/test_name.cc`, not `boost/test_name`)
|
||||
- To run a single test case, append `::<test_case_name>` to the file path
|
||||
- If you encounter permission issues with cgroup metrics, add `--no-gather-metrics` to the `./test.py` command
|
||||
- If you encounter permission issues with cgroup metric gathering, add `--no-gather-metrics` flag
|
||||
|
||||
**Rebuilding Tests:**
|
||||
- test.py does NOT automatically rebuild when test source files are modified
|
||||
@@ -64,21 +60,25 @@ ninja build/<mode>/scylla
|
||||
# Run a single test case from a file
|
||||
./test.py --mode=<mode> test/<suite>/<test_name>.py::<test_function_name>
|
||||
|
||||
# Run all tests in a directory
|
||||
./test.py --mode=<mode> test/<suite>/
|
||||
|
||||
# Examples
|
||||
./test.py --mode=dev test/alternator/
|
||||
./test.py --mode=dev test/cqlpy/test_json.py
|
||||
./test.py --mode=dev test/cluster/test_raft_voters.py::test_raft_limited_voters_retain_coordinator
|
||||
./test.py --mode=dev test/cqlpy/test_json.py
|
||||
|
||||
# Optional flags
|
||||
./test.py --mode=dev test/cluster/test_raft_no_quorum.py -v --repeat 5
|
||||
./test.py --mode=dev test/cluster/test_raft_no_quorum.py -v # Verbose output
|
||||
./test.py --mode=dev test/cluster/test_raft_no_quorum.py --repeat 5 # Repeat test 5 times
|
||||
```
|
||||
|
||||
**Important:**
|
||||
- Use full path with `.py` extension
|
||||
- Use full path with `.py` extension (e.g., `test/cluster/test_raft_no_quorum.py`, not `cluster/test_raft_no_quorum`)
|
||||
- To run a single test case, append `::<test_function_name>` to the file path
|
||||
- Add `-v` for verbose output
|
||||
- Add `--repeat <num>` to repeat a test multiple times
|
||||
- After modifying C++ source files, only rebuild the Scylla binary for Python tests
|
||||
- After modifying C++ source files, only rebuild the Scylla binary for Python tests - building the entire repository is unnecessary
|
||||
|
||||
## Code Philosophy
|
||||
- Performance matters in hot paths (data read/write, inner loops)
|
||||
@@ -92,13 +92,10 @@ ninja build/<mode>/scylla
|
||||
## Test Philosophy
|
||||
- Performance matters. Tests should run as quickly as possible. Sleeps in the code are highly discouraged and should be avoided, to reduce run time and flakiness.
|
||||
- Stability matters. Tests should be stable. New tests should be executed 100 times at least to ensure they pass 100 out of 100 times. (use --repeat 100 --max-failures 1 when running it)
|
||||
- Unit tests should ideally test one thing only.
|
||||
- Unit tests should ideally test one thing and one thing only.
|
||||
- Tests for bug fixes should run before the fix - and show the failure and after the fix - and show they now pass.
|
||||
- Tests for bug fixes should have in their comments which bug fixes (GitHub or JIRA issue) they test.
|
||||
- Tests in debug are always slower, so if needed, reduce number of iterations, rows, data used, cycles, etc. in debug mode.
|
||||
- Tests should strive to be repeatable, and not use random input that will make their results unpredictable.
|
||||
- Tests should consume as little resources as possible. Prefer running tests on a single node if it is sufficient, for example.
|
||||
|
||||
## New Files
|
||||
- Include `LicenseRef-ScyllaDB-Source-Available-1.1` in the SPDX header
|
||||
- Use the current year for new files; for existing code keep the year as is
|
||||
|
||||
14
.github/instructions/cpp.instructions.md
vendored
14
.github/instructions/cpp.instructions.md
vendored
@@ -25,8 +25,6 @@ applyTo: "**/*.{cc,hh}"
|
||||
- Use `seastar::gate` for shutdown coordination
|
||||
- Use `seastar::semaphore` for resource limiting (not `std::mutex`)
|
||||
- Break long loops with `maybe_yield()` to avoid reactor stalls
|
||||
- Most Scylla code runs on a single shard where atomics are unnecessary
|
||||
- Use Seastar message passing for cross-shard communication
|
||||
|
||||
## Coroutines
|
||||
```cpp
|
||||
@@ -38,16 +36,10 @@ seastar::future<T> func() {
|
||||
|
||||
## Error Handling
|
||||
- Throw exceptions for errors (futures propagate them automatically)
|
||||
- In coroutines, use `co_await coroutine::return_exception_ptr()` or `co_return coroutine::exception()` to avoid the overhead of throwing
|
||||
- In data path: avoid exceptions, use `std::expected` (or `boost::outcome`) instead
|
||||
- Use standard exceptions (`std::runtime_error`, `std::invalid_argument`)
|
||||
- Database-specific: throw appropriate schema/query exceptions
|
||||
|
||||
## Invariant Checking
|
||||
- Prefer `throwing_assert()` (`utils/assert.hh`), it logs and throws instead of aborting
|
||||
- Use `SCYLLA_ASSERT` where critical to system stability where no clean shutdown is possible, it aborts
|
||||
- Use `on_internal_error()` for should-never-happen conditions that should be logged with backtrace
|
||||
|
||||
## Performance
|
||||
- Pass large objects by `const&` or `&&` (move semantics)
|
||||
- Use `std::string_view` for non-owning string references
|
||||
@@ -76,7 +68,7 @@ seastar::future<T> func() {
|
||||
- Use `#pragma once`
|
||||
- Include order: own header, C++ std, Seastar, Boost, project headers
|
||||
- Forward declare when possible
|
||||
- Never `using namespace` in headers. Exception: most headers include `seastarx.hh`, which provides `using namespace seastar` project-wide.
|
||||
- Never `using namespace` in headers (exception: `using namespace seastar` is globally available via `seastarx.hh`)
|
||||
|
||||
## Documentation
|
||||
- Public APIs require clear documentation
|
||||
@@ -109,8 +101,10 @@ seastar::future<T> func() {
|
||||
- `malloc`/`free`
|
||||
- `printf` family (use logging or fmt)
|
||||
- Raw pointers for ownership
|
||||
- `using namespace` in headers
|
||||
- Blocking operations: `std::sleep`, `std::read`, `std::mutex` (use Seastar equivalents)
|
||||
- New ad-hoc macros (prefer `inline`, `constexpr`, or templates; established project macros like `SCYLLA_ASSERT` are fine)
|
||||
- `std::atomic` (reserved for very special circumstances only)
|
||||
- Macros (use `inline`, `constexpr`, or templates instead)
|
||||
|
||||
## Testing
|
||||
When modifying existing code, follow TDD: create/update test first, then implement.
|
||||
|
||||
4
.github/instructions/python.instructions.md
vendored
4
.github/instructions/python.instructions.md
vendored
@@ -7,7 +7,7 @@ applyTo: "**/*.py"
|
||||
**Important:** Match existing code style. Some directories (like `test/cqlpy` and `test/alternator`) prefer simplicity over type hints and docstrings.
|
||||
|
||||
## Style
|
||||
- Match style of the file and directory you are editing; fall back to PEP 8 if unclear
|
||||
- Follow PEP 8
|
||||
- Use type hints for function signatures (unless directory style omits them)
|
||||
- Use f-strings for formatting
|
||||
- Line length: 160 characters max
|
||||
@@ -25,7 +25,7 @@ from cassandra.cluster import Cluster
|
||||
from test.utils import setup_keyspace
|
||||
```
|
||||
|
||||
Avoid wildcard imports (`from module import *`).
|
||||
Never use `from module import *`
|
||||
|
||||
## Documentation
|
||||
All public functions/classes need docstrings (unless the current directory conventions omit them):
|
||||
|
||||
@@ -10,9 +10,6 @@ on:
|
||||
types: [labeled, unlabeled]
|
||||
branches: [master, next, enterprise]
|
||||
|
||||
env:
|
||||
FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true
|
||||
|
||||
jobs:
|
||||
check-commit:
|
||||
runs-on: ubuntu-latest
|
||||
@@ -33,7 +30,7 @@ jobs:
|
||||
echo "DEFAULT_BRANCH=master" >> $GITHUB_ENV
|
||||
fi
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
repository: ${{ github.repository }}
|
||||
ref: ${{ env.DEFAULT_BRANCH }}
|
||||
|
||||
@@ -5,9 +5,6 @@ on:
|
||||
types: [opened, reopened, edited]
|
||||
branches: [branch-*]
|
||||
|
||||
env:
|
||||
FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true
|
||||
|
||||
jobs:
|
||||
check-fixes-prefix:
|
||||
runs-on: ubuntu-latest
|
||||
@@ -16,7 +13,7 @@ jobs:
|
||||
issues: write
|
||||
steps:
|
||||
- name: Check PR body for "Fixes" prefix patterns
|
||||
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0
|
||||
uses: actions/github-script@v7
|
||||
with:
|
||||
script: |
|
||||
const body = context.payload.pull_request.body;
|
||||
|
||||
5
.github/workflows/build-scylla.yaml
vendored
5
.github/workflows/build-scylla.yaml
vendored
@@ -12,9 +12,6 @@ on:
|
||||
description: 'the md5sum for scylla executable'
|
||||
value: ${{ jobs.build.outputs.md5sum }}
|
||||
|
||||
env:
|
||||
FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true
|
||||
|
||||
jobs:
|
||||
read-toolchain:
|
||||
uses: ./.github/workflows/read-toolchain.yaml
|
||||
@@ -27,7 +24,7 @@ jobs:
|
||||
outputs:
|
||||
md5sum: ${{ steps.checksum.outputs.md5sum }}
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
submodules: recursive
|
||||
- name: Generate the building system
|
||||
|
||||
5
.github/workflows/check-license-header.yaml
vendored
5
.github/workflows/check-license-header.yaml
vendored
@@ -9,7 +9,6 @@ env:
|
||||
HEADER_CHECK_LINES: 10
|
||||
LICENSE: "LicenseRef-ScyllaDB-Source-Available-1.1"
|
||||
CHECKED_EXTENSIONS: ".cc .hh .py"
|
||||
FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true
|
||||
|
||||
jobs:
|
||||
check-license-headers:
|
||||
@@ -20,7 +19,7 @@ jobs:
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
@@ -41,7 +40,7 @@ jobs:
|
||||
|
||||
- name: Comment on PR if check fails
|
||||
if: failure()
|
||||
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0
|
||||
uses: actions/github-script@v7
|
||||
with:
|
||||
script: |
|
||||
const license = '${{ env.LICENSE }}';
|
||||
|
||||
3
.github/workflows/clang-nightly.yaml
vendored
3
.github/workflows/clang-nightly.yaml
vendored
@@ -9,7 +9,6 @@ env:
|
||||
# use the development branch explicitly
|
||||
CLANG_VERSION: 21
|
||||
BUILD_DIR: build
|
||||
FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true
|
||||
|
||||
permissions: {}
|
||||
|
||||
@@ -33,7 +32,7 @@ jobs:
|
||||
steps:
|
||||
- run: |
|
||||
sudo dnf -y install git
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
submodules: true
|
||||
- name: Install build dependencies
|
||||
|
||||
3
.github/workflows/clang-tidy.yaml
vendored
3
.github/workflows/clang-tidy.yaml
vendored
@@ -18,7 +18,6 @@ env:
|
||||
BUILD_TYPE: RelWithDebInfo
|
||||
BUILD_DIR: build
|
||||
CLANG_TIDY_CHECKS: '-*,bugprone-use-after-move'
|
||||
FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true
|
||||
|
||||
permissions: {}
|
||||
|
||||
@@ -43,7 +42,7 @@ jobs:
|
||||
IMAGE: ${{ needs.read-toolchain.image }}
|
||||
run: |
|
||||
echo ${{ needs.read-toolchain.image }}
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
submodules: true
|
||||
- run: |
|
||||
|
||||
@@ -7,16 +7,13 @@ on:
|
||||
permissions:
|
||||
issues: write
|
||||
|
||||
env:
|
||||
FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true
|
||||
|
||||
jobs:
|
||||
comment-and-close:
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- name: Comment and close if author email is scylladb.com
|
||||
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0
|
||||
uses: actions/github-script@v7
|
||||
with:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
script: |
|
||||
|
||||
6
.github/workflows/codespell.yaml
vendored
6
.github/workflows/codespell.yaml
vendored
@@ -4,15 +4,13 @@ on:
|
||||
branches:
|
||||
- master
|
||||
permissions: {}
|
||||
env:
|
||||
FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true
|
||||
jobs:
|
||||
codespell:
|
||||
name: Check for spelling errors
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
- uses: codespell-project/actions-codespell@8f01853be192eb0f849a5c7d721450e7a467c579 # v2.2
|
||||
- uses: actions/checkout@v4
|
||||
- uses: codespell-project/actions-codespell@master
|
||||
with:
|
||||
only_warn: 1
|
||||
ignore_words_list: "ans,datas,fo,ser,ue,crate,nd,reenable,strat,stap,te,raison,iif,tread"
|
||||
|
||||
38
.github/workflows/compare-build-systems.yaml
vendored
38
.github/workflows/compare-build-systems.yaml
vendored
@@ -1,38 +0,0 @@
|
||||
name: Compare Build Systems
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
branches:
|
||||
- master
|
||||
paths:
|
||||
- 'configure.py'
|
||||
- '**/CMakeLists.txt'
|
||||
- 'cmake/**'
|
||||
- 'scripts/compare_build_systems.py'
|
||||
workflow_dispatch:
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
# cancel the in-progress run upon a repush
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
read-toolchain:
|
||||
uses: ./.github/workflows/read-toolchain.yaml
|
||||
compare:
|
||||
name: Compare configure.py vs CMake
|
||||
needs:
|
||||
- read-toolchain
|
||||
runs-on: ubuntu-latest
|
||||
container: ${{ needs.read-toolchain.outputs.image }}
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
submodules: true
|
||||
- name: Compare build systems
|
||||
run: |
|
||||
git config --global --add safe.directory $GITHUB_WORKSPACE
|
||||
python3 scripts/compare_build_systems.py --ci
|
||||
5
.github/workflows/conflict_reminder.yaml
vendored
5
.github/workflows/conflict_reminder.yaml
vendored
@@ -12,16 +12,13 @@ on:
|
||||
schedule:
|
||||
- cron: '0 10 * * 1' # Runs every Monday at 10:00am
|
||||
|
||||
env:
|
||||
FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true
|
||||
|
||||
jobs:
|
||||
notify_conflict_prs:
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- name: Notify PR Authors of Conflicts
|
||||
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0
|
||||
uses: actions/github-script@v7
|
||||
with:
|
||||
script: |
|
||||
console.log("Starting conflict reminder script...");
|
||||
|
||||
@@ -13,9 +13,6 @@ on:
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
env:
|
||||
FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true
|
||||
|
||||
jobs:
|
||||
lint:
|
||||
runs-on: ubuntu-latest
|
||||
@@ -24,12 +21,12 @@ jobs:
|
||||
security-events: write
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Differential ShellCheck
|
||||
uses: redhat-plumbers-in-action/differential-shellcheck@d965e66ec0b3b2f821f75c8eff9b12442d9a7d1e # v5.5.6
|
||||
uses: redhat-plumbers-in-action/differential-shellcheck@v5
|
||||
with:
|
||||
severity: warning
|
||||
token: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
7
.github/workflows/docs-pages.yaml
vendored
7
.github/workflows/docs-pages.yaml
vendored
@@ -5,7 +5,6 @@ name: "Docs / Publish"
|
||||
env:
|
||||
FLAG: ${{ github.repository == 'scylladb/scylla-enterprise' && 'enterprise' || 'opensource' }}
|
||||
DEFAULT_BRANCH: ${{ github.repository == 'scylladb/scylla-enterprise' && 'enterprise' || 'master' }}
|
||||
FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true
|
||||
|
||||
on:
|
||||
push:
|
||||
@@ -26,17 +25,17 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
ref: ${{ env.DEFAULT_BRANCH }}
|
||||
persist-credentials: false
|
||||
fetch-depth: 0
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.12"
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@cec208311dfd045dd5311c1add060b2062131d57 # v8.0.0
|
||||
uses: astral-sh/setup-uv@v6
|
||||
- name: Set up env
|
||||
run: make -C docs FLAG="${{ env.FLAG }}" setupenv
|
||||
- name: Build docs
|
||||
|
||||
7
.github/workflows/docs-pr.yaml
vendored
7
.github/workflows/docs-pr.yaml
vendored
@@ -7,7 +7,6 @@ permissions:
|
||||
|
||||
env:
|
||||
FLAG: ${{ github.repository == 'scylladb/scylla-enterprise' && 'enterprise' || 'opensource' }}
|
||||
FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
@@ -23,16 +22,16 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
persist-credentials: false
|
||||
fetch-depth: 0
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.12"
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@cec208311dfd045dd5311c1add060b2062131d57 # v8.0.0
|
||||
uses: astral-sh/setup-uv@v6
|
||||
- name: Set up env
|
||||
run: make -C docs FLAG="${{ env.FLAG }}" setupenv
|
||||
- name: Build docs
|
||||
|
||||
7
.github/workflows/docs-validate-metrics.yml
vendored
7
.github/workflows/docs-validate-metrics.yml
vendored
@@ -3,9 +3,6 @@ name: Docs / Validate metrics
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
env:
|
||||
FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
branches:
|
||||
@@ -24,12 +21,12 @@ jobs:
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
submodules: true
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
|
||||
uses: actions/setup-python@v6
|
||||
with:
|
||||
python-version: '3.10'
|
||||
|
||||
|
||||
5
.github/workflows/iwyu.yaml
vendored
5
.github/workflows/iwyu.yaml
vendored
@@ -13,7 +13,6 @@ env:
|
||||
# supposed to be processed by idl-compiler.py, so we don't check them using the cleaner
|
||||
CLEANER_DIRS: test/unit exceptions alternator api auth cdc compaction db dht gms index lang message mutation mutation_writer node_ops raft redis replica service
|
||||
SEASTAR_BAD_INCLUDE_OUTPUT_PATH: build/seastar-bad-include.log
|
||||
FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
@@ -33,7 +32,7 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
container: ${{ needs.read-toolchain.outputs.image }}
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
submodules: true
|
||||
- name: Generate compilation database
|
||||
@@ -90,7 +89,7 @@ jobs:
|
||||
| tee "$SEASTAR_BAD_INCLUDE_OUTPUT_PATH"
|
||||
- run: |
|
||||
echo "::remove-matcher owner=seastar-bad-include::"
|
||||
- uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0
|
||||
- uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: Logs
|
||||
path: |
|
||||
|
||||
@@ -7,7 +7,6 @@ on:
|
||||
|
||||
env:
|
||||
DEFAULT_BRANCH: 'master'
|
||||
FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true
|
||||
|
||||
jobs:
|
||||
mark-ready:
|
||||
@@ -18,7 +17,7 @@ jobs:
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
repository: ${{ github.repository }}
|
||||
ref: ${{ env.DEFAULT_BRANCH }}
|
||||
|
||||
@@ -5,8 +5,6 @@ on:
|
||||
branches:
|
||||
- master
|
||||
- next
|
||||
env:
|
||||
FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true
|
||||
jobs:
|
||||
label:
|
||||
if: github.event.pull_request.draft == false
|
||||
@@ -17,7 +15,7 @@ jobs:
|
||||
steps:
|
||||
- name: Wait for label to be added
|
||||
run: sleep 1m
|
||||
- uses: mheap/github-action-required-labels@0ac283b4e65c1fb28ce6079dea5546ceca98ccbe # v5.5.2
|
||||
- uses: mheap/github-action-required-labels@v5
|
||||
with:
|
||||
mode: minimum
|
||||
count: 1
|
||||
|
||||
5
.github/workflows/read-toolchain.yaml
vendored
5
.github/workflows/read-toolchain.yaml
vendored
@@ -7,9 +7,6 @@ on:
|
||||
description: "the toolchain docker image"
|
||||
value: ${{ jobs.read-toolchain.outputs.image }}
|
||||
|
||||
env:
|
||||
FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true
|
||||
|
||||
jobs:
|
||||
read-toolchain:
|
||||
runs-on: ubuntu-latest
|
||||
@@ -18,7 +15,7 @@ jobs:
|
||||
outputs:
|
||||
image: ${{ steps.read.outputs.image }}
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
sparse-checkout: tools/toolchain/image
|
||||
sparse-checkout-cone-mode: false
|
||||
|
||||
5
.github/workflows/seastar.yaml
vendored
5
.github/workflows/seastar.yaml
vendored
@@ -13,7 +13,6 @@ concurrency:
|
||||
|
||||
env:
|
||||
BUILD_DIR: build
|
||||
FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true
|
||||
|
||||
jobs:
|
||||
read-toolchain:
|
||||
@@ -30,12 +29,12 @@ jobs:
|
||||
- RelWithDebInfo
|
||||
- Dev
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
submodules: true
|
||||
- run: |
|
||||
rm -rf seastar
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
repository: scylladb/seastar
|
||||
submodules: true
|
||||
|
||||
5
.github/workflows/sync-labels.yaml
vendored
5
.github/workflows/sync-labels.yaml
vendored
@@ -7,9 +7,6 @@ on:
|
||||
issues:
|
||||
types: [labeled, unlabeled]
|
||||
|
||||
env:
|
||||
FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true
|
||||
|
||||
jobs:
|
||||
label-sync:
|
||||
if: ${{ github.repository == 'scylladb/scylladb' }}
|
||||
@@ -24,7 +21,7 @@ jobs:
|
||||
GITHUB_CONTEXT: ${{ toJson(github) }}
|
||||
run: echo "$GITHUB_CONTEXT"
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
sparse-checkout: |
|
||||
.github/scripts/sync_labels.py
|
||||
|
||||
7
.github/workflows/trigger_ci.yaml
vendored
7
.github/workflows/trigger_ci.yaml
vendored
@@ -5,10 +5,7 @@ on:
|
||||
types: [opened, reopened, synchronize]
|
||||
issue_comment:
|
||||
types: [created]
|
||||
|
||||
env:
|
||||
FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true
|
||||
|
||||
|
||||
jobs:
|
||||
trigger-ci:
|
||||
runs-on: ubuntu-latest
|
||||
@@ -18,7 +15,7 @@ jobs:
|
||||
GITHUB_CONTEXT: ${{ toJson(github) }}
|
||||
run: echo "$GITHUB_CONTEXT"
|
||||
- name: Checkout PR code
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
uses: actions/checkout@v3
|
||||
with:
|
||||
fetch-depth: 0 # Needed to access full history
|
||||
ref: ${{ github.event.pull_request.head.ref }}
|
||||
|
||||
5
.github/workflows/urgent_issue_reminder.yml
vendored
5
.github/workflows/urgent_issue_reminder.yml
vendored
@@ -4,16 +4,13 @@ on:
|
||||
schedule:
|
||||
- cron: '10 8 * * *' # Runs daily at 8 AM
|
||||
|
||||
env:
|
||||
FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true
|
||||
|
||||
jobs:
|
||||
reminder:
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- name: Send reminders
|
||||
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0
|
||||
uses: actions/github-script@v7
|
||||
with:
|
||||
script: |
|
||||
const labelFilters = ['P0', 'P1', 'Field-Tier1','status/release blocker', 'status/regression'];
|
||||
|
||||
16
AGENTS.md
16
AGENTS.md
@@ -1,16 +0,0 @@
|
||||
# ScyllaDB — AI Agent Instructions
|
||||
|
||||
This file routes you to the relevant instruction files.
|
||||
Do NOT load all files at once — read only what applies to your current task.
|
||||
|
||||
## Instruction Files
|
||||
|
||||
- `.github/copilot-instructions.md` — build system, test runner, code philosophy, test philosophy
|
||||
- `.github/instructions/cpp.instructions.md` — C++ style, Seastar patterns, memory, error handling (for `*.cc`, `*.hh`)
|
||||
- `.github/instructions/python.instructions.md` — Python style, testing conventions (for `*.py`)
|
||||
|
||||
## Which files to read
|
||||
|
||||
- **Always read** `.github/copilot-instructions.md` for build/test commands and project values
|
||||
- **If editing C++ files** (`*.cc`, `*.hh`): also read `.github/instructions/cpp.instructions.md`
|
||||
- **If editing Python files** (`*.py`): also read `.github/instructions/python.instructions.md`
|
||||
@@ -9,8 +9,6 @@ target_sources(alternator
|
||||
controller.cc
|
||||
server.cc
|
||||
executor.cc
|
||||
executor_read.cc
|
||||
executor_util.cc
|
||||
stats.cc
|
||||
serialization.cc
|
||||
expressions.cc
|
||||
|
||||
@@ -1,253 +0,0 @@
|
||||
/*
|
||||
* Copyright 2019-present ScyllaDB
|
||||
*/
|
||||
|
||||
/*
|
||||
* SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.1
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <optional>
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
#include <variant>
|
||||
|
||||
#include "utils/rjson.hh"
|
||||
#include "utils/overloaded_functor.hh"
|
||||
#include "alternator/error.hh"
|
||||
#include "alternator/expressions_types.hh"
|
||||
|
||||
namespace alternator {
|
||||
|
||||
// An attribute_path_map object is used to hold data for various attributes
|
||||
// paths (parsed::path) in a hierarchy of attribute paths. Each attribute path
|
||||
// has a root attribute, and then modified by member and index operators -
|
||||
// for example in "a.b[2].c" we have "a" as the root, then ".b" member, then
|
||||
// "[2]" index, and finally ".c" member.
|
||||
// Data can be added to an attribute_path_map using the add() function, but
|
||||
// requires that attributes with data not be *overlapping* or *conflicting*:
|
||||
//
|
||||
// 1. Two attribute paths which are identical or an ancestor of one another
|
||||
// are considered *overlapping* and not allowed. If a.b.c has data,
|
||||
// we can't add more data in a.b.c or any of its descendants like a.b.c.d.
|
||||
//
|
||||
// 2. Two attribute paths which need the same parent to have both a member and
|
||||
// an index are considered *conflicting* and not allowed. E.g., if a.b has
|
||||
// data, you can't add a[1]. The meaning of adding both would be that the
|
||||
// attribute a is both a map and an array, which isn't sensible.
|
||||
//
|
||||
// These two requirements are common to the two places where Alternator uses
|
||||
// this abstraction to describe how a hierarchical item is to be transformed:
|
||||
//
|
||||
// 1. In ProjectExpression: for filtering from a full top-level attribute
|
||||
// only the parts for which user asked in ProjectionExpression.
|
||||
//
|
||||
// 2. In UpdateExpression: for taking the previous value of a top-level
|
||||
// attribute, and modifying it based on the instructions in the user
|
||||
// wrote in UpdateExpression.
|
||||
|
||||
template<typename T>
|
||||
class attribute_path_map_node {
|
||||
public:
|
||||
using data_t = T;
|
||||
// We need the extra unique_ptr<> here because libstdc++ unordered_map
|
||||
// doesn't work with incomplete types :-(
|
||||
using members_t = std::unordered_map<std::string, std::unique_ptr<attribute_path_map_node<T>>>;
|
||||
// The indexes list is sorted because DynamoDB requires handling writes
|
||||
// beyond the end of a list in index order.
|
||||
using indexes_t = std::map<unsigned, std::unique_ptr<attribute_path_map_node<T>>>;
|
||||
// The prohibition on "overlap" and "conflict" explained above means
|
||||
// That only one of data, members or indexes is non-empty.
|
||||
std::optional<std::variant<data_t, members_t, indexes_t>> _content;
|
||||
|
||||
bool is_empty() const { return !_content; }
|
||||
bool has_value() const { return _content && std::holds_alternative<data_t>(*_content); }
|
||||
bool has_members() const { return _content && std::holds_alternative<members_t>(*_content); }
|
||||
bool has_indexes() const { return _content && std::holds_alternative<indexes_t>(*_content); }
|
||||
// get_members() assumes that has_members() is true
|
||||
members_t& get_members() { return std::get<members_t>(*_content); }
|
||||
const members_t& get_members() const { return std::get<members_t>(*_content); }
|
||||
indexes_t& get_indexes() { return std::get<indexes_t>(*_content); }
|
||||
const indexes_t& get_indexes() const { return std::get<indexes_t>(*_content); }
|
||||
T& get_value() { return std::get<T>(*_content); }
|
||||
const T& get_value() const { return std::get<T>(*_content); }
|
||||
};
|
||||
|
||||
template<typename T>
|
||||
using attribute_path_map = std::unordered_map<std::string, attribute_path_map_node<T>>;
|
||||
|
||||
using attrs_to_get_node = attribute_path_map_node<std::monostate>;
|
||||
// attrs_to_get lists which top-level attribute are needed, and possibly also
|
||||
// which part of the top-level attribute is really needed (when nested
|
||||
// attribute paths appeared in the query).
|
||||
// Most code actually uses optional<attrs_to_get>. There, a disengaged
|
||||
// optional means we should get all attributes, not specific ones.
|
||||
using attrs_to_get = attribute_path_map<std::monostate>;
|
||||
|
||||
// takes a given JSON value and drops its parts which weren't asked to be
|
||||
// kept. It modifies the given JSON value, or returns false to signify that
|
||||
// the entire object should be dropped.
|
||||
// Note that The JSON value is assumed to be encoded using the DynamoDB
|
||||
// conventions - i.e., it is really a map whose key has a type string,
|
||||
// and the value is the real object.
|
||||
template<typename T>
|
||||
bool hierarchy_filter(rjson::value& val, const attribute_path_map_node<T>& h) {
|
||||
if (!val.IsObject() || val.MemberCount() != 1) {
|
||||
// This shouldn't happen. We shouldn't have stored malformed objects.
|
||||
// But today Alternator does not validate the structure of nested
|
||||
// documents before storing them, so this can happen on read.
|
||||
throw api_error::internal(format("Malformed value object read: {}", val));
|
||||
}
|
||||
const char* type = val.MemberBegin()->name.GetString();
|
||||
rjson::value& v = val.MemberBegin()->value;
|
||||
if (h.has_members()) {
|
||||
const auto& members = h.get_members();
|
||||
if (type[0] != 'M' || !v.IsObject()) {
|
||||
// If v is not an object (dictionary, map), none of the members
|
||||
// can match.
|
||||
return false;
|
||||
}
|
||||
rjson::value newv = rjson::empty_object();
|
||||
for (auto it = v.MemberBegin(); it != v.MemberEnd(); ++it) {
|
||||
std::string attr = rjson::to_string(it->name);
|
||||
auto x = members.find(attr);
|
||||
if (x != members.end()) {
|
||||
if (x->second) {
|
||||
// Only a part of this attribute is to be filtered, do it.
|
||||
if (hierarchy_filter(it->value, *x->second)) {
|
||||
// because newv started empty and attr are unique
|
||||
// (keys of v), we can use add() here
|
||||
rjson::add_with_string_name(newv, attr, std::move(it->value));
|
||||
}
|
||||
} else {
|
||||
// The entire attribute is to be kept
|
||||
rjson::add_with_string_name(newv, attr, std::move(it->value));
|
||||
}
|
||||
}
|
||||
}
|
||||
if (newv.MemberCount() == 0) {
|
||||
return false;
|
||||
}
|
||||
v = newv;
|
||||
} else if (h.has_indexes()) {
|
||||
const auto& indexes = h.get_indexes();
|
||||
if (type[0] != 'L' || !v.IsArray()) {
|
||||
return false;
|
||||
}
|
||||
rjson::value newv = rjson::empty_array();
|
||||
const auto& a = v.GetArray();
|
||||
for (unsigned i = 0; i < v.Size(); i++) {
|
||||
auto x = indexes.find(i);
|
||||
if (x != indexes.end()) {
|
||||
if (x->second) {
|
||||
if (hierarchy_filter(a[i], *x->second)) {
|
||||
rjson::push_back(newv, std::move(a[i]));
|
||||
}
|
||||
} else {
|
||||
// The entire attribute is to be kept
|
||||
rjson::push_back(newv, std::move(a[i]));
|
||||
}
|
||||
}
|
||||
}
|
||||
if (newv.Size() == 0) {
|
||||
return false;
|
||||
}
|
||||
v = newv;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// Add a path to an attribute_path_map. Throws a validation error if the path
|
||||
// "overlaps" with one already in the filter (one is a sub-path of the other)
|
||||
// or "conflicts" with it (both a member and index is requested).
|
||||
template<typename T>
|
||||
void attribute_path_map_add(const char* source, attribute_path_map<T>& map, const parsed::path& p, T value = {}) {
|
||||
using node = attribute_path_map_node<T>;
|
||||
// The first step is to look for the top-level attribute (p.root()):
|
||||
auto it = map.find(p.root());
|
||||
if (it == map.end()) {
|
||||
if (p.has_operators()) {
|
||||
it = map.emplace(p.root(), node {std::nullopt}).first;
|
||||
} else {
|
||||
(void) map.emplace(p.root(), node {std::move(value)}).first;
|
||||
// Value inserted for top-level node. We're done.
|
||||
return;
|
||||
}
|
||||
} else if(!p.has_operators()) {
|
||||
// If p is top-level and we already have it or a part of it
|
||||
// in map, it's a forbidden overlapping path.
|
||||
throw api_error::validation(fmt::format(
|
||||
"Invalid {}: two document paths overlap at {}", source, p.root()));
|
||||
} else if (it->second.has_value()) {
|
||||
// If we're here, it != map.end() && p.has_operators && it->second.has_value().
|
||||
// This means the top-level attribute already has a value, and we're
|
||||
// trying to add a non-top-level value. It's an overlap.
|
||||
throw api_error::validation(fmt::format("Invalid {}: two document paths overlap at {}", source, p.root()));
|
||||
}
|
||||
node* h = &it->second;
|
||||
// The second step is to walk h from the top-level node to the inner node
|
||||
// where we're supposed to insert the value:
|
||||
for (const auto& op : p.operators()) {
|
||||
std::visit(overloaded_functor {
|
||||
[&] (const std::string& member) {
|
||||
if (h->is_empty()) {
|
||||
*h = node {typename node::members_t()};
|
||||
} else if (h->has_indexes()) {
|
||||
throw api_error::validation(format("Invalid {}: two document paths conflict at {}", source, p));
|
||||
} else if (h->has_value()) {
|
||||
throw api_error::validation(format("Invalid {}: two document paths overlap at {}", source, p));
|
||||
}
|
||||
typename node::members_t& members = h->get_members();
|
||||
auto it = members.find(member);
|
||||
if (it == members.end()) {
|
||||
it = members.insert({member, std::make_unique<node>()}).first;
|
||||
}
|
||||
h = it->second.get();
|
||||
},
|
||||
[&] (unsigned index) {
|
||||
if (h->is_empty()) {
|
||||
*h = node {typename node::indexes_t()};
|
||||
} else if (h->has_members()) {
|
||||
throw api_error::validation(format("Invalid {}: two document paths conflict at {}", source, p));
|
||||
} else if (h->has_value()) {
|
||||
throw api_error::validation(format("Invalid {}: two document paths overlap at {}", source, p));
|
||||
}
|
||||
typename node::indexes_t& indexes = h->get_indexes();
|
||||
auto it = indexes.find(index);
|
||||
if (it == indexes.end()) {
|
||||
it = indexes.insert({index, std::make_unique<node>()}).first;
|
||||
}
|
||||
h = it->second.get();
|
||||
}
|
||||
}, op);
|
||||
}
|
||||
// Finally, insert the value in the node h.
|
||||
if (h->is_empty()) {
|
||||
*h = node {std::move(value)};
|
||||
} else {
|
||||
throw api_error::validation(format("Invalid {}: two document paths overlap at {}", source, p));
|
||||
}
|
||||
}
|
||||
|
||||
// A very simplified version of the above function for the special case of
|
||||
// adding only top-level attribute. It's not only simpler, we also use a
|
||||
// different error message, referring to a "duplicate attribute" instead of
|
||||
// "overlapping paths". DynamoDB also has this distinction (errors in
|
||||
// AttributesToGet refer to duplicates, not overlaps, but errors in
|
||||
// ProjectionExpression refer to overlap - even if it's an exact duplicate).
|
||||
template<typename T>
|
||||
void attribute_path_map_add(const char* source, attribute_path_map<T>& map, const std::string& attr, T value = {}) {
|
||||
using node = attribute_path_map_node<T>;
|
||||
auto it = map.find(attr);
|
||||
if (it == map.end()) {
|
||||
map.emplace(attr, node {std::move(value)});
|
||||
} else {
|
||||
throw api_error::validation(fmt::format(
|
||||
"Invalid {}: Duplicate attribute: {}", source, attr));
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace alternator
|
||||
@@ -18,7 +18,6 @@
|
||||
#include "service/memory_limiter.hh"
|
||||
#include "auth/service.hh"
|
||||
#include "service/qos/service_level_controller.hh"
|
||||
#include "vector_search/vector_store_client.hh"
|
||||
|
||||
using namespace seastar;
|
||||
|
||||
@@ -32,12 +31,10 @@ controller::controller(
|
||||
sharded<service::storage_service>& ss,
|
||||
sharded<service::migration_manager>& mm,
|
||||
sharded<db::system_distributed_keyspace>& sys_dist_ks,
|
||||
sharded<db::system_keyspace>& sys_ks,
|
||||
sharded<cdc::generation_service>& cdc_gen_svc,
|
||||
sharded<service::memory_limiter>& memory_limiter,
|
||||
sharded<auth::service>& auth_service,
|
||||
sharded<qos::service_level_controller>& sl_controller,
|
||||
sharded<vector_search::vector_store_client>& vsc,
|
||||
const db::config& config,
|
||||
seastar::scheduling_group sg)
|
||||
: protocol_server(sg)
|
||||
@@ -46,12 +43,10 @@ controller::controller(
|
||||
, _ss(ss)
|
||||
, _mm(mm)
|
||||
, _sys_dist_ks(sys_dist_ks)
|
||||
, _sys_ks(sys_ks)
|
||||
, _cdc_gen_svc(cdc_gen_svc)
|
||||
, _memory_limiter(memory_limiter)
|
||||
, _auth_service(auth_service)
|
||||
, _sl_controller(sl_controller)
|
||||
, _vsc(vsc)
|
||||
, _config(config)
|
||||
{
|
||||
}
|
||||
@@ -96,8 +91,8 @@ future<> controller::start_server() {
|
||||
auto get_timeout_in_ms = [] (const db::config& cfg) -> utils::updateable_value<uint32_t> {
|
||||
return cfg.alternator_timeout_in_ms;
|
||||
};
|
||||
_executor.start(std::ref(_gossiper), std::ref(_proxy), std::ref(_ss), std::ref(_mm), std::ref(_sys_dist_ks), std::ref(_sys_ks),
|
||||
sharded_parameter(get_cdc_metadata, std::ref(_cdc_gen_svc)), std::ref(_vsc), _ssg.value(),
|
||||
_executor.start(std::ref(_gossiper), std::ref(_proxy), std::ref(_ss), std::ref(_mm), std::ref(_sys_dist_ks),
|
||||
sharded_parameter(get_cdc_metadata, std::ref(_cdc_gen_svc)), _ssg.value(),
|
||||
sharded_parameter(get_timeout_in_ms, std::ref(_config))).get();
|
||||
_server.start(std::ref(_executor), std::ref(_proxy), std::ref(_gossiper), std::ref(_auth_service), std::ref(_sl_controller)).get();
|
||||
// Note: from this point on, if start_server() throws for any reason,
|
||||
|
||||
@@ -22,7 +22,6 @@ class memory_limiter;
|
||||
|
||||
namespace db {
|
||||
class system_distributed_keyspace;
|
||||
class system_keyspace;
|
||||
class config;
|
||||
}
|
||||
|
||||
@@ -44,10 +43,6 @@ namespace qos {
|
||||
class service_level_controller;
|
||||
}
|
||||
|
||||
namespace vector_search {
|
||||
class vector_store_client;
|
||||
}
|
||||
|
||||
namespace alternator {
|
||||
|
||||
// This is the official DynamoDB API version.
|
||||
@@ -66,12 +61,10 @@ class controller : public protocol_server {
|
||||
sharded<service::storage_service>& _ss;
|
||||
sharded<service::migration_manager>& _mm;
|
||||
sharded<db::system_distributed_keyspace>& _sys_dist_ks;
|
||||
sharded<db::system_keyspace>& _sys_ks;
|
||||
sharded<cdc::generation_service>& _cdc_gen_svc;
|
||||
sharded<service::memory_limiter>& _memory_limiter;
|
||||
sharded<auth::service>& _auth_service;
|
||||
sharded<qos::service_level_controller>& _sl_controller;
|
||||
sharded<vector_search::vector_store_client>& _vsc;
|
||||
const db::config& _config;
|
||||
|
||||
std::vector<socket_address> _listen_addresses;
|
||||
@@ -86,12 +79,10 @@ public:
|
||||
sharded<service::storage_service>& ss,
|
||||
sharded<service::migration_manager>& mm,
|
||||
sharded<db::system_distributed_keyspace>& sys_dist_ks,
|
||||
sharded<db::system_keyspace>& sys_ks,
|
||||
sharded<cdc::generation_service>& cdc_gen_svc,
|
||||
sharded<service::memory_limiter>& memory_limiter,
|
||||
sharded<auth::service>& auth_service,
|
||||
sharded<qos::service_level_controller>& sl_controller,
|
||||
sharded<vector_search::vector_store_client>& vsc,
|
||||
const db::config& config,
|
||||
seastar::scheduling_group sg);
|
||||
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -9,9 +9,7 @@
|
||||
#pragma once
|
||||
|
||||
#include <seastar/core/future.hh>
|
||||
#include "audit/audit.hh"
|
||||
#include "seastarx.hh"
|
||||
#include <seastar/core/future.hh>
|
||||
#include <seastar/core/sharded.hh>
|
||||
#include <seastar/util/noncopyable_function.hh>
|
||||
|
||||
@@ -22,23 +20,15 @@
|
||||
#include "db/config.hh"
|
||||
|
||||
#include "alternator/error.hh"
|
||||
#include "alternator/attribute_path.hh"
|
||||
#include "alternator/stats.hh"
|
||||
#include "alternator/executor_util.hh"
|
||||
|
||||
#include "stats.hh"
|
||||
#include "utils/rjson.hh"
|
||||
#include "utils/updateable_value.hh"
|
||||
#include "utils/simple_value_with_expiry.hh"
|
||||
|
||||
#include "tracing/trace_state.hh"
|
||||
|
||||
|
||||
namespace db {
|
||||
class system_distributed_keyspace;
|
||||
class system_keyspace;
|
||||
}
|
||||
|
||||
namespace audit {
|
||||
class audit_info_alternator;
|
||||
}
|
||||
|
||||
namespace query {
|
||||
@@ -56,10 +46,6 @@ namespace service {
|
||||
class storage_service;
|
||||
}
|
||||
|
||||
namespace vector_search {
|
||||
class vector_store_client;
|
||||
}
|
||||
|
||||
namespace cdc {
|
||||
class metadata;
|
||||
}
|
||||
@@ -72,13 +58,82 @@ class gossiper;
|
||||
|
||||
class schema_builder;
|
||||
|
||||
|
||||
namespace alternator {
|
||||
|
||||
enum class table_status;
|
||||
class rmw_operation;
|
||||
class put_or_delete_item;
|
||||
|
||||
schema_ptr get_table(service::storage_proxy& proxy, const rjson::value& request);
|
||||
bool is_alternator_keyspace(const sstring& ks_name);
|
||||
// Wraps the db::get_tags_of_table and throws if the table is missing the tags extension.
|
||||
const std::map<sstring, sstring>& get_tags_of_table_or_throw(schema_ptr schema);
|
||||
|
||||
// An attribute_path_map object is used to hold data for various attributes
|
||||
// paths (parsed::path) in a hierarchy of attribute paths. Each attribute path
|
||||
// has a root attribute, and then modified by member and index operators -
|
||||
// for example in "a.b[2].c" we have "a" as the root, then ".b" member, then
|
||||
// "[2]" index, and finally ".c" member.
|
||||
// Data can be added to an attribute_path_map using the add() function, but
|
||||
// requires that attributes with data not be *overlapping* or *conflicting*:
|
||||
//
|
||||
// 1. Two attribute paths which are identical or an ancestor of one another
|
||||
// are considered *overlapping* and not allowed. If a.b.c has data,
|
||||
// we can't add more data in a.b.c or any of its descendants like a.b.c.d.
|
||||
//
|
||||
// 2. Two attribute paths which need the same parent to have both a member and
|
||||
// an index are considered *conflicting* and not allowed. E.g., if a.b has
|
||||
// data, you can't add a[1]. The meaning of adding both would be that the
|
||||
// attribute a is both a map and an array, which isn't sensible.
|
||||
//
|
||||
// These two requirements are common to the two places where Alternator uses
|
||||
// this abstraction to describe how a hierarchical item is to be transformed:
|
||||
//
|
||||
// 1. In ProjectExpression: for filtering from a full top-level attribute
|
||||
// only the parts for which user asked in ProjectionExpression.
|
||||
//
|
||||
// 2. In UpdateExpression: for taking the previous value of a top-level
|
||||
// attribute, and modifying it based on the instructions in the user
|
||||
// wrote in UpdateExpression.
|
||||
|
||||
template<typename T>
|
||||
class attribute_path_map_node {
|
||||
public:
|
||||
using data_t = T;
|
||||
// We need the extra unique_ptr<> here because libstdc++ unordered_map
|
||||
// doesn't work with incomplete types :-(
|
||||
using members_t = std::unordered_map<std::string, std::unique_ptr<attribute_path_map_node<T>>>;
|
||||
// The indexes list is sorted because DynamoDB requires handling writes
|
||||
// beyond the end of a list in index order.
|
||||
using indexes_t = std::map<unsigned, std::unique_ptr<attribute_path_map_node<T>>>;
|
||||
// The prohibition on "overlap" and "conflict" explained above means
|
||||
// That only one of data, members or indexes is non-empty.
|
||||
std::optional<std::variant<data_t, members_t, indexes_t>> _content;
|
||||
|
||||
bool is_empty() const { return !_content; }
|
||||
bool has_value() const { return _content && std::holds_alternative<data_t>(*_content); }
|
||||
bool has_members() const { return _content && std::holds_alternative<members_t>(*_content); }
|
||||
bool has_indexes() const { return _content && std::holds_alternative<indexes_t>(*_content); }
|
||||
// get_members() assumes that has_members() is true
|
||||
members_t& get_members() { return std::get<members_t>(*_content); }
|
||||
const members_t& get_members() const { return std::get<members_t>(*_content); }
|
||||
indexes_t& get_indexes() { return std::get<indexes_t>(*_content); }
|
||||
const indexes_t& get_indexes() const { return std::get<indexes_t>(*_content); }
|
||||
T& get_value() { return std::get<T>(*_content); }
|
||||
const T& get_value() const { return std::get<T>(*_content); }
|
||||
};
|
||||
|
||||
template<typename T>
|
||||
using attribute_path_map = std::unordered_map<std::string, attribute_path_map_node<T>>;
|
||||
|
||||
using attrs_to_get_node = attribute_path_map_node<std::monostate>;
|
||||
// attrs_to_get lists which top-level attribute are needed, and possibly also
|
||||
// which part of the top-level attribute is really needed (when nested
|
||||
// attribute paths appeared in the query).
|
||||
// Most code actually uses optional<attrs_to_get>. There, a disengaged
|
||||
// optional means we should get all attributes, not specific ones.
|
||||
using attrs_to_get = attribute_path_map<std::monostate>;
|
||||
|
||||
namespace parsed {
|
||||
class expression_cache;
|
||||
}
|
||||
@@ -89,12 +144,9 @@ class executor : public peering_sharded_service<executor> {
|
||||
service::storage_proxy& _proxy;
|
||||
service::migration_manager& _mm;
|
||||
db::system_distributed_keyspace& _sdks;
|
||||
db::system_keyspace& _system_keyspace;
|
||||
cdc::metadata& _cdc_metadata;
|
||||
vector_search::vector_store_client& _vsc;
|
||||
utils::updateable_value<bool> _enforce_authorization;
|
||||
utils::updateable_value<bool> _warn_authorization;
|
||||
seastar::sharded<audit::audit>& _audit;
|
||||
// An smp_service_group to be used for limiting the concurrency when
|
||||
// forwarding Alternator request between shards - if necessary for LWT.
|
||||
smp_service_group _ssg;
|
||||
@@ -119,6 +171,7 @@ public:
|
||||
// is written in chunks to the output_stream. This allows for efficient
|
||||
// handling of large responses without needing to allocate a large buffer
|
||||
// in memory.
|
||||
using body_writer = noncopyable_function<future<>(output_stream<char>&&)>;
|
||||
using request_return_type = std::variant<std::string, body_writer, api_error>;
|
||||
stats _stats;
|
||||
// The metric_groups object holds this stat object's metrics registered
|
||||
@@ -133,60 +186,53 @@ public:
|
||||
service::storage_service& ss,
|
||||
service::migration_manager& mm,
|
||||
db::system_distributed_keyspace& sdks,
|
||||
db::system_keyspace& system_keyspace,
|
||||
cdc::metadata& cdc_metadata,
|
||||
vector_search::vector_store_client& vsc,
|
||||
smp_service_group ssg,
|
||||
utils::updateable_value<uint32_t> default_timeout_in_ms);
|
||||
~executor();
|
||||
|
||||
future<request_return_type> create_table(client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value request, std::unique_ptr<audit::audit_info_alternator>& audit_info);
|
||||
future<request_return_type> describe_table(client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value request, std::unique_ptr<audit::audit_info_alternator>& audit_info);
|
||||
future<request_return_type> delete_table(client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value request, std::unique_ptr<audit::audit_info_alternator>& audit_info);
|
||||
future<request_return_type> update_table(client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value request, std::unique_ptr<audit::audit_info_alternator>& audit_info);
|
||||
future<request_return_type> put_item(client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value request, std::unique_ptr<audit::audit_info_alternator>& audit_info);
|
||||
future<request_return_type> get_item(client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value request, std::unique_ptr<audit::audit_info_alternator>& audit_info);
|
||||
future<request_return_type> delete_item(client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value request, std::unique_ptr<audit::audit_info_alternator>& audit_info);
|
||||
future<request_return_type> update_item(client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value request, std::unique_ptr<audit::audit_info_alternator>& audit_info);
|
||||
future<request_return_type> list_tables(client_state& client_state, service_permit permit, rjson::value request, std::unique_ptr<audit::audit_info_alternator>& audit_info);
|
||||
future<request_return_type> scan(client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value request, std::unique_ptr<audit::audit_info_alternator>& audit_info);
|
||||
future<request_return_type> describe_endpoints(client_state& client_state, service_permit permit, rjson::value request, std::string host_header, std::unique_ptr<audit::audit_info_alternator>& audit_info);
|
||||
future<request_return_type> batch_write_item(client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value request, std::unique_ptr<audit::audit_info_alternator>& audit_info);
|
||||
future<request_return_type> batch_get_item(client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value request, std::unique_ptr<audit::audit_info_alternator>& audit_info);
|
||||
future<request_return_type> query(client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value request, std::unique_ptr<audit::audit_info_alternator>& audit_info);
|
||||
future<request_return_type> tag_resource(client_state& client_state, service_permit permit, rjson::value request, std::unique_ptr<audit::audit_info_alternator>& audit_info);
|
||||
future<request_return_type> untag_resource(client_state& client_state, service_permit permit, rjson::value request, std::unique_ptr<audit::audit_info_alternator>& audit_info);
|
||||
future<request_return_type> list_tags_of_resource(client_state& client_state, service_permit permit, rjson::value request, std::unique_ptr<audit::audit_info_alternator>& audit_info);
|
||||
future<request_return_type> update_time_to_live(client_state& client_state, service_permit permit, rjson::value request, std::unique_ptr<audit::audit_info_alternator>& audit_info);
|
||||
future<request_return_type> describe_time_to_live(client_state& client_state, service_permit permit, rjson::value request, std::unique_ptr<audit::audit_info_alternator>& audit_info);
|
||||
future<request_return_type> list_streams(client_state& client_state, service_permit permit, rjson::value request, std::unique_ptr<audit::audit_info_alternator>& audit_info);
|
||||
future<request_return_type> describe_stream(client_state& client_state, service_permit permit, rjson::value request, std::unique_ptr<audit::audit_info_alternator>& audit_info);
|
||||
future<request_return_type> get_shard_iterator(client_state& client_state, service_permit permit, rjson::value request, std::unique_ptr<audit::audit_info_alternator>& audit_info);
|
||||
future<request_return_type> get_records(client_state& client_state, tracing::trace_state_ptr, service_permit permit, rjson::value request, std::unique_ptr<audit::audit_info_alternator>& audit_info);
|
||||
future<request_return_type> describe_continuous_backups(client_state& client_state, service_permit permit, rjson::value request, std::unique_ptr<audit::audit_info_alternator>& audit_info);
|
||||
future<request_return_type> create_table(client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value request);
|
||||
future<request_return_type> describe_table(client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value request);
|
||||
future<request_return_type> delete_table(client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value request);
|
||||
future<request_return_type> update_table(client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value request);
|
||||
future<request_return_type> put_item(client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value request);
|
||||
future<request_return_type> get_item(client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value request);
|
||||
future<request_return_type> delete_item(client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value request);
|
||||
future<request_return_type> update_item(client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value request);
|
||||
future<request_return_type> list_tables(client_state& client_state, service_permit permit, rjson::value request);
|
||||
future<request_return_type> scan(client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value request);
|
||||
future<request_return_type> describe_endpoints(client_state& client_state, service_permit permit, rjson::value request, std::string host_header);
|
||||
future<request_return_type> batch_write_item(client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value request);
|
||||
future<request_return_type> batch_get_item(client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value request);
|
||||
future<request_return_type> query(client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value request);
|
||||
future<request_return_type> tag_resource(client_state& client_state, service_permit permit, rjson::value request);
|
||||
future<request_return_type> untag_resource(client_state& client_state, service_permit permit, rjson::value request);
|
||||
future<request_return_type> list_tags_of_resource(client_state& client_state, service_permit permit, rjson::value request);
|
||||
future<request_return_type> update_time_to_live(client_state& client_state, service_permit permit, rjson::value request);
|
||||
future<request_return_type> describe_time_to_live(client_state& client_state, service_permit permit, rjson::value request);
|
||||
future<request_return_type> list_streams(client_state& client_state, service_permit permit, rjson::value request);
|
||||
future<request_return_type> describe_stream(client_state& client_state, service_permit permit, rjson::value request);
|
||||
future<request_return_type> get_shard_iterator(client_state& client_state, service_permit permit, rjson::value request);
|
||||
future<request_return_type> get_records(client_state& client_state, tracing::trace_state_ptr, service_permit permit, rjson::value request);
|
||||
future<request_return_type> describe_continuous_backups(client_state& client_state, service_permit permit, rjson::value request);
|
||||
|
||||
future<> start();
|
||||
future<> stop();
|
||||
|
||||
static sstring table_name(const schema&);
|
||||
static db::timeout_clock::time_point default_timeout();
|
||||
private:
|
||||
static thread_local utils::updateable_value<uint32_t> s_default_timeout_in_ms;
|
||||
public:
|
||||
static schema_ptr find_table(service::storage_proxy&, std::string_view table_name);
|
||||
static schema_ptr find_table(service::storage_proxy&, const rjson::value& request);
|
||||
|
||||
private:
|
||||
friend class rmw_operation;
|
||||
|
||||
// Helper to set up auditing for an Alternator operation. Checks whether
|
||||
// the operation should be audited (via will_log()) and if so, allocates
|
||||
// and populates audit_info. No allocation occurs when auditing is disabled.
|
||||
void maybe_audit(std::unique_ptr<audit::audit_info_alternator>& audit_info,
|
||||
audit::statement_category category,
|
||||
std::string_view ks_name,
|
||||
std::string_view table_name,
|
||||
std::string_view operation_name,
|
||||
const rjson::value& request,
|
||||
std::optional<db::consistency_level> cl = std::nullopt);
|
||||
|
||||
static void describe_key_schema(rjson::value& parent, const schema&, std::unordered_map<std::string,std::string> * = nullptr, const std::map<sstring, sstring> *tags = nullptr);
|
||||
future<rjson::value> fill_table_description(schema_ptr schema, table_status tbl_status, service::client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit);
|
||||
future<executor::request_return_type> create_table_on_shard0(service::client_state&& client_state, tracing::trace_state_ptr trace_state, rjson::value request, bool enforce_authorization,
|
||||
bool warn_authorization, const db::tablets_mode_t::mode tablets_mode, std::unique_ptr<audit::audit_info_alternator>& audit_info);
|
||||
future<executor::request_return_type> create_table_on_shard0(service::client_state&& client_state, tracing::trace_state_ptr trace_state, rjson::value request, bool enforce_authorization, bool warn_authorization, const db::tablets_mode_t::mode tablets_mode);
|
||||
|
||||
future<> do_batch_write(
|
||||
std::vector<std::pair<schema_ptr, put_or_delete_item>> mutation_builders,
|
||||
@@ -199,34 +245,60 @@ private:
|
||||
tracing::trace_state_ptr trace_state, service_permit permit);
|
||||
|
||||
public:
|
||||
static void describe_key_schema(rjson::value& parent, const schema& schema, std::unordered_map<std::string,std::string>&, const std::map<sstring, sstring> *tags = nullptr);
|
||||
|
||||
static std::optional<rjson::value> describe_single_item(schema_ptr,
|
||||
const query::partition_slice&,
|
||||
const cql3::selection::selection&,
|
||||
const query::result&,
|
||||
const std::optional<attrs_to_get>&,
|
||||
uint64_t* = nullptr);
|
||||
|
||||
// Converts a multi-row selection result to JSON compatible with DynamoDB.
|
||||
// For each row, this method calls item_callback, which takes the size of
|
||||
// the item as the parameter.
|
||||
static future<std::vector<rjson::value>> describe_multi_item(schema_ptr schema,
|
||||
const query::partition_slice&& slice,
|
||||
shared_ptr<cql3::selection::selection> selection,
|
||||
foreign_ptr<lw_shared_ptr<query::result>> query_result,
|
||||
shared_ptr<const std::optional<attrs_to_get>> attrs_to_get,
|
||||
noncopyable_function<void(uint64_t)> item_callback = {});
|
||||
|
||||
static void describe_single_item(const cql3::selection::selection&,
|
||||
const std::vector<managed_bytes_opt>&,
|
||||
const std::optional<attrs_to_get>&,
|
||||
rjson::value&,
|
||||
uint64_t* item_length_in_bytes = nullptr,
|
||||
bool = false);
|
||||
|
||||
static bool add_stream_options(const rjson::value& stream_spec, schema_builder&, service::storage_proxy& sp);
|
||||
static void supplement_table_info(rjson::value& descr, const schema& schema, service::storage_proxy& sp);
|
||||
static void supplement_table_stream_info(rjson::value& descr, const schema& schema, const service::storage_proxy& sp);
|
||||
};
|
||||
|
||||
// returns table creation time in seconds since epoch for `db_clock`
|
||||
double get_table_creation_time(const schema &schema);
|
||||
// is_big() checks approximately if the given JSON value is "bigger" than
|
||||
// the given big_size number of bytes. The goal is to *quickly* detect
|
||||
// oversized JSON that, for example, is too large to be serialized to a
|
||||
// contiguous string - we don't need an accurate size for that. Moreover,
|
||||
// as soon as we detect that the JSON is indeed "big", we can return true
|
||||
// and don't need to continue calculating its exact size.
|
||||
// For simplicity, we use a recursive implementation. This is fine because
|
||||
// Alternator limits the depth of JSONs it reads from inputs, and doesn't
|
||||
// add more than a couple of levels in its own output construction.
|
||||
bool is_big(const rjson::value& val, int big_size = 100'000);
|
||||
|
||||
// result of parsing ARN (Amazon Resource Name)
|
||||
// ARN format is `arn:<partition>:<service>:<region>:<account-id>:<resource-type>/<resource-id>/<postfix>`
|
||||
// we ignore partition, service and account-id
|
||||
// resource-type must be string "table"
|
||||
// resource-id will be returned as table_name
|
||||
// region will be returned as keyspace_name
|
||||
// postfix is a string after resource-id and will be returned as is (whole), including separator.
|
||||
struct arn_parts {
|
||||
std::string_view keyspace_name;
|
||||
std::string_view table_name;
|
||||
std::string_view postfix;
|
||||
};
|
||||
// arn - arn to parse
|
||||
// arn_field_name - identifier of the ARN, used only when reporting an error (in error messages), for example "Incorrect resource identifier `<arn_field_name>`"
|
||||
// type_name - used only when reporting an error (in error messages), for example "... is not a valid <type_name> ARN ..."
|
||||
// expected_postfix - optional filter of postfix value (part of ARN after resource-id, including separator, see comments for struct arn_parts).
|
||||
// If is empty - then postfix value must be empty as well
|
||||
// if not empty - postfix value must start with expected_postfix, but might be longer
|
||||
arn_parts parse_arn(std::string_view arn, std::string_view arn_field_name, std::string_view type_name, std::string_view expected_postfix);
|
||||
// Check CQL's Role-Based Access Control (RBAC) permission (MODIFY,
|
||||
// SELECT, DROP, etc.) on the given table. When permission is denied an
|
||||
// appropriate user-readable api_error::access_denied is thrown.
|
||||
future<> verify_permission(bool enforce_authorization, bool warn_authorization, const service::client_state&, const schema_ptr&, auth::permission, alternator::stats& stats);
|
||||
|
||||
/**
|
||||
* Make return type for serializing the object "streamed",
|
||||
* i.e. direct to HTTP output stream. Note: only useful for
|
||||
* (very) large objects as there are overhead issues with this
|
||||
* as well, but for massive lists of return objects this can
|
||||
* help avoid large allocations/many re-allocs
|
||||
*/
|
||||
executor::body_writer make_streamed(rjson::value&&);
|
||||
|
||||
// The format is ks1|ks2|ks3... and table1|table2|table3...
|
||||
sstring print_names_for_audit(const std::set<sstring>& names);
|
||||
}
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,559 +0,0 @@
|
||||
/*
|
||||
* Copyright 2019-present ScyllaDB
|
||||
*/
|
||||
|
||||
/*
|
||||
* SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.1
|
||||
*/
|
||||
|
||||
#include "alternator/executor_util.hh"
|
||||
#include "alternator/executor.hh"
|
||||
#include "alternator/error.hh"
|
||||
#include "auth/resource.hh"
|
||||
#include "auth/service.hh"
|
||||
#include "cdc/log.hh"
|
||||
#include "data_dictionary/data_dictionary.hh"
|
||||
#include "db/tags/utils.hh"
|
||||
#include "replica/database.hh"
|
||||
#include "cql3/selection/selection.hh"
|
||||
#include "cql3/result_set.hh"
|
||||
#include "serialization.hh"
|
||||
#include "service/storage_proxy.hh"
|
||||
#include "types/map.hh"
|
||||
#include <fmt/format.h>
|
||||
|
||||
namespace alternator {
|
||||
|
||||
extern logging::logger elogger; // from executor.cc
|
||||
|
||||
std::optional<int> get_int_attribute(const rjson::value& value, std::string_view attribute_name) {
|
||||
const rjson::value* attribute_value = rjson::find(value, attribute_name);
|
||||
if (!attribute_value)
|
||||
return {};
|
||||
if (!attribute_value->IsInt()) {
|
||||
throw api_error::validation(fmt::format("Expected integer value for attribute {}, got: {}",
|
||||
attribute_name, value));
|
||||
}
|
||||
return attribute_value->GetInt();
|
||||
}
|
||||
|
||||
std::string get_string_attribute(const rjson::value& value, std::string_view attribute_name, const char* default_return) {
|
||||
const rjson::value* attribute_value = rjson::find(value, attribute_name);
|
||||
if (!attribute_value)
|
||||
return default_return;
|
||||
if (!attribute_value->IsString()) {
|
||||
throw api_error::validation(fmt::format("Expected string value for attribute {}, got: {}",
|
||||
attribute_name, value));
|
||||
}
|
||||
return rjson::to_string(*attribute_value);
|
||||
}
|
||||
|
||||
bool get_bool_attribute(const rjson::value& value, std::string_view attribute_name, bool default_return) {
|
||||
const rjson::value* attribute_value = rjson::find(value, attribute_name);
|
||||
if (!attribute_value) {
|
||||
return default_return;
|
||||
}
|
||||
if (!attribute_value->IsBool()) {
|
||||
throw api_error::validation(fmt::format("Expected boolean value for attribute {}, got: {}",
|
||||
attribute_name, value));
|
||||
}
|
||||
return attribute_value->GetBool();
|
||||
}
|
||||
|
||||
std::optional<std::string> find_table_name(const rjson::value& request) {
|
||||
const rjson::value* table_name_value = rjson::find(request, "TableName");
|
||||
if (!table_name_value) {
|
||||
return std::nullopt;
|
||||
}
|
||||
if (!table_name_value->IsString()) {
|
||||
throw api_error::validation("Non-string TableName field in request");
|
||||
}
|
||||
std::string table_name = rjson::to_string(*table_name_value);
|
||||
return table_name;
|
||||
}
|
||||
|
||||
std::string get_table_name(const rjson::value& request) {
|
||||
auto name = find_table_name(request);
|
||||
if (!name) {
|
||||
throw api_error::validation("Missing TableName field in request");
|
||||
}
|
||||
return *name;
|
||||
}
|
||||
|
||||
schema_ptr find_table(service::storage_proxy& proxy, const rjson::value& request) {
|
||||
auto table_name = find_table_name(request);
|
||||
if (!table_name) {
|
||||
return nullptr;
|
||||
}
|
||||
return find_table(proxy, *table_name);
|
||||
}
|
||||
|
||||
schema_ptr find_table(service::storage_proxy& proxy, std::string_view table_name) {
|
||||
try {
|
||||
return proxy.data_dictionary().find_schema(sstring(executor::KEYSPACE_NAME_PREFIX) + sstring(table_name), table_name);
|
||||
} catch(data_dictionary::no_such_column_family&) {
|
||||
// DynamoDB returns validation error even when table does not exist
|
||||
// and the table name is invalid.
|
||||
validate_table_name(table_name);
|
||||
|
||||
throw api_error::resource_not_found(
|
||||
fmt::format("Requested resource not found: Table: {} not found", table_name));
|
||||
}
|
||||
}
|
||||
|
||||
schema_ptr get_table(service::storage_proxy& proxy, const rjson::value& request) {
|
||||
auto schema = find_table(proxy, request);
|
||||
if (!schema) {
|
||||
// if we get here then the name was missing, since syntax or missing actual CF
|
||||
// checks throw. Slow path, but just call get_table_name to generate exception.
|
||||
get_table_name(request);
|
||||
}
|
||||
return schema;
|
||||
}
|
||||
|
||||
map_type attrs_type() {
|
||||
static thread_local auto t = map_type_impl::get_instance(utf8_type, bytes_type, true);
|
||||
return t;
|
||||
}
|
||||
|
||||
const std::map<sstring, sstring>& get_tags_of_table_or_throw(schema_ptr schema) {
|
||||
auto tags_ptr = db::get_tags_of_table(schema);
|
||||
if (tags_ptr) {
|
||||
return *tags_ptr;
|
||||
} else {
|
||||
throw api_error::validation(format("Table {} does not have valid tagging information", schema->ks_name()));
|
||||
}
|
||||
}
|
||||
|
||||
bool is_alternator_keyspace(std::string_view ks_name) {
|
||||
return ks_name.starts_with(executor::KEYSPACE_NAME_PREFIX);
|
||||
}
|
||||
|
||||
// This tag is set on a GSI when the user did not specify a range key, causing
|
||||
// Alternator to add the base table's range key as a spurious range key. It is
|
||||
// used by describe_key_schema() to suppress reporting that key.
|
||||
extern const sstring SPURIOUS_RANGE_KEY_ADDED_TO_GSI_AND_USER_DIDNT_SPECIFY_RANGE_KEY_TAG_KEY;
|
||||
|
||||
void describe_key_schema(rjson::value& parent, const schema& schema, std::unordered_map<std::string, std::string>* attribute_types, const std::map<sstring, sstring>* tags) {
|
||||
rjson::value key_schema = rjson::empty_array();
|
||||
const bool ignore_range_keys_as_spurious = tags != nullptr && tags->contains(SPURIOUS_RANGE_KEY_ADDED_TO_GSI_AND_USER_DIDNT_SPECIFY_RANGE_KEY_TAG_KEY);
|
||||
|
||||
for (const column_definition& cdef : schema.partition_key_columns()) {
|
||||
rjson::value key = rjson::empty_object();
|
||||
rjson::add(key, "AttributeName", rjson::from_string(cdef.name_as_text()));
|
||||
rjson::add(key, "KeyType", "HASH");
|
||||
rjson::push_back(key_schema, std::move(key));
|
||||
if (attribute_types) {
|
||||
(*attribute_types)[cdef.name_as_text()] = type_to_string(cdef.type);
|
||||
}
|
||||
}
|
||||
if (!ignore_range_keys_as_spurious) {
|
||||
// NOTE: user requested key (there can be at most one) will always come first.
|
||||
// There might be more keys following it, which were added, but those were
|
||||
// not requested by the user, so we ignore them.
|
||||
for (const column_definition& cdef : schema.clustering_key_columns()) {
|
||||
rjson::value key = rjson::empty_object();
|
||||
rjson::add(key, "AttributeName", rjson::from_string(cdef.name_as_text()));
|
||||
rjson::add(key, "KeyType", "RANGE");
|
||||
rjson::push_back(key_schema, std::move(key));
|
||||
if (attribute_types) {
|
||||
(*attribute_types)[cdef.name_as_text()] = type_to_string(cdef.type);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
rjson::add(parent, "KeySchema", std::move(key_schema));
|
||||
}
|
||||
|
||||
// Check if the given string has valid characters for a table name, i.e. only
|
||||
// a-z, A-Z, 0-9, _ (underscore), - (dash), . (dot). Note that this function
|
||||
// does not check the length of the name - instead, use validate_table_name()
|
||||
// to validate both the characters and the length.
|
||||
static bool valid_table_name_chars(std::string_view name) {
|
||||
for (auto c : name) {
|
||||
if ((c < 'a' || c > 'z') &&
|
||||
(c < 'A' || c > 'Z') &&
|
||||
(c < '0' || c > '9') &&
|
||||
c != '_' &&
|
||||
c != '-' &&
|
||||
c != '.') {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
std::string view_name(std::string_view table_name, std::string_view index_name, const std::string& delim, bool validate_len) {
|
||||
if (index_name.length() < 3) {
|
||||
throw api_error::validation("IndexName must be at least 3 characters long");
|
||||
}
|
||||
if (!valid_table_name_chars(index_name)) {
|
||||
throw api_error::validation(
|
||||
fmt::format("IndexName '{}' must satisfy regular expression pattern: [a-zA-Z0-9_.-]+", index_name));
|
||||
}
|
||||
std::string ret = std::string(table_name) + delim + std::string(index_name);
|
||||
if (ret.length() > max_auxiliary_table_name_length && validate_len) {
|
||||
throw api_error::validation(
|
||||
fmt::format("The total length of TableName ('{}') and IndexName ('{}') cannot exceed {} characters",
|
||||
table_name, index_name, max_auxiliary_table_name_length - delim.size()));
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
std::string gsi_name(std::string_view table_name, std::string_view index_name, bool validate_len) {
|
||||
return view_name(table_name, index_name, ":", validate_len);
|
||||
}
|
||||
|
||||
std::string lsi_name(std::string_view table_name, std::string_view index_name, bool validate_len) {
|
||||
return view_name(table_name, index_name, "!:", validate_len);
|
||||
}
|
||||
|
||||
void check_key(const rjson::value& key, const schema_ptr& schema) {
|
||||
if (key.MemberCount() != (schema->clustering_key_size() == 0 ? 1 : 2)) {
|
||||
throw api_error::validation("Given key attribute not in schema");
|
||||
}
|
||||
}
|
||||
|
||||
void verify_all_are_used(const rjson::value* field,
|
||||
const std::unordered_set<std::string>& used, const char* field_name, const char* operation) {
|
||||
if (!field) {
|
||||
return;
|
||||
}
|
||||
for (auto it = field->MemberBegin(); it != field->MemberEnd(); ++it) {
|
||||
if (!used.contains(rjson::to_string(it->name))) {
|
||||
throw api_error::validation(
|
||||
format("{} has spurious '{}', not used in {}",
|
||||
field_name, rjson::to_string_view(it->name), operation));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// This function increments the authorization_failures counter, and may also
|
||||
// log a warn-level message and/or throw an access_denied exception, depending
|
||||
// on what enforce_authorization and warn_authorization are set to.
|
||||
// Note that if enforce_authorization is false, this function will return
|
||||
// without throwing. So a caller that doesn't want to continue after an
|
||||
// authorization_error must explicitly return after calling this function.
|
||||
static void authorization_error(stats& stats, bool enforce_authorization, bool warn_authorization, std::string msg) {
|
||||
stats.authorization_failures++;
|
||||
if (enforce_authorization) {
|
||||
if (warn_authorization) {
|
||||
elogger.warn("alternator_warn_authorization=true: {}", msg);
|
||||
}
|
||||
throw api_error::access_denied(std::move(msg));
|
||||
} else {
|
||||
if (warn_authorization) {
|
||||
elogger.warn("If you set alternator_enforce_authorization=true the following will be enforced: {}", msg);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
future<> verify_permission(
|
||||
bool enforce_authorization,
|
||||
bool warn_authorization,
|
||||
const service::client_state& client_state,
|
||||
const schema_ptr& schema,
|
||||
auth::permission permission_to_check,
|
||||
stats& stats) {
|
||||
if (!enforce_authorization && !warn_authorization) {
|
||||
co_return;
|
||||
}
|
||||
// Unfortunately, the fix for issue #23218 did not modify the function
|
||||
// that we use here - check_has_permissions(). So if we want to allow
|
||||
// writes to internal tables (from try_get_internal_table()) only to a
|
||||
// superuser, we need to explicitly check it here.
|
||||
if (permission_to_check == auth::permission::MODIFY && is_internal_keyspace(schema->ks_name())) {
|
||||
if (!client_state.user() ||
|
||||
!client_state.user()->name ||
|
||||
!co_await client_state.get_auth_service()->underlying_role_manager().is_superuser(*client_state.user()->name)) {
|
||||
sstring username = "<anonymous>";
|
||||
if (client_state.user() && client_state.user()->name) {
|
||||
username = client_state.user()->name.value();
|
||||
}
|
||||
authorization_error(stats, enforce_authorization, warn_authorization, fmt::format(
|
||||
"Write access denied on internal table {}.{} to role {} because it is not a superuser",
|
||||
schema->ks_name(), schema->cf_name(), username));
|
||||
co_return;
|
||||
}
|
||||
}
|
||||
auto resource = auth::make_data_resource(schema->ks_name(), schema->cf_name());
|
||||
if (!client_state.user() || !client_state.user()->name ||
|
||||
!co_await client_state.check_has_permission(auth::command_desc(permission_to_check, resource))) {
|
||||
sstring username = "<anonymous>";
|
||||
if (client_state.user() && client_state.user()->name) {
|
||||
username = client_state.user()->name.value();
|
||||
}
|
||||
// Using exceptions for errors makes this function faster in the
|
||||
// success path (when the operation is allowed).
|
||||
authorization_error(stats, enforce_authorization, warn_authorization, fmt::format(
|
||||
"{} access on table {}.{} is denied to role {}, client address {}",
|
||||
auth::permissions::to_string(permission_to_check),
|
||||
schema->ks_name(), schema->cf_name(), username, client_state.get_client_address()));
|
||||
}
|
||||
}
|
||||
|
||||
// Similar to verify_permission() above, but just for CREATE operations.
|
||||
// Those do not operate on any specific table, so require permissions on
|
||||
// ALL KEYSPACES instead of any specific table.
|
||||
future<> verify_create_permission(bool enforce_authorization, bool warn_authorization, const service::client_state& client_state, stats& stats) {
|
||||
if (!enforce_authorization && !warn_authorization) {
|
||||
co_return;
|
||||
}
|
||||
auto resource = auth::resource(auth::resource_kind::data);
|
||||
if (!co_await client_state.check_has_permission(auth::command_desc(auth::permission::CREATE, resource))) {
|
||||
sstring username = "<anonymous>";
|
||||
if (client_state.user() && client_state.user()->name) {
|
||||
username = client_state.user()->name.value();
|
||||
}
|
||||
authorization_error(stats, enforce_authorization, warn_authorization, fmt::format(
|
||||
"CREATE access on ALL KEYSPACES is denied to role {}", username));
|
||||
}
|
||||
}
|
||||
|
||||
schema_ptr try_get_internal_table(const data_dictionary::database& db, std::string_view table_name) {
|
||||
size_t it = table_name.find(executor::INTERNAL_TABLE_PREFIX);
|
||||
if (it != 0) {
|
||||
return schema_ptr{};
|
||||
}
|
||||
table_name.remove_prefix(executor::INTERNAL_TABLE_PREFIX.size());
|
||||
size_t delim = table_name.find_first_of('.');
|
||||
if (delim == std::string_view::npos) {
|
||||
return schema_ptr{};
|
||||
}
|
||||
std::string_view ks_name = table_name.substr(0, delim);
|
||||
table_name.remove_prefix(ks_name.size() + 1);
|
||||
// Only internal keyspaces can be accessed to avoid leakage
|
||||
auto ks = db.try_find_keyspace(ks_name);
|
||||
if (!ks || !ks->is_internal()) {
|
||||
return schema_ptr{};
|
||||
}
|
||||
try {
|
||||
return db.find_schema(ks_name, table_name);
|
||||
} catch (data_dictionary::no_such_column_family&) {
|
||||
// DynamoDB returns validation error even when table does not exist
|
||||
// and the table name is invalid.
|
||||
validate_table_name(table_name);
|
||||
throw api_error::resource_not_found(
|
||||
fmt::format("Requested resource not found: Internal table: {}.{} not found", ks_name, table_name));
|
||||
}
|
||||
}
|
||||
|
||||
schema_ptr get_table_from_batch_request(const service::storage_proxy& proxy, const rjson::value::ConstMemberIterator& batch_request) {
|
||||
sstring table_name = rjson::to_sstring(batch_request->name); // JSON keys are always strings
|
||||
try {
|
||||
return proxy.data_dictionary().find_schema(sstring(executor::KEYSPACE_NAME_PREFIX) + table_name, table_name);
|
||||
} catch(data_dictionary::no_such_column_family&) {
|
||||
// DynamoDB returns validation error even when table does not exist
|
||||
// and the table name is invalid.
|
||||
validate_table_name(table_name);
|
||||
throw api_error::resource_not_found(format("Requested resource not found: Table: {} not found", table_name));
|
||||
}
|
||||
}
|
||||
|
||||
lw_shared_ptr<stats> get_stats_from_schema(service::storage_proxy& sp, const schema& schema) {
|
||||
try {
|
||||
replica::table& table = sp.local_db().find_column_family(schema.id());
|
||||
if (!table.get_stats().alternator_stats) {
|
||||
table.get_stats().alternator_stats = seastar::make_shared<table_stats>(schema.ks_name(), schema.cf_name());
|
||||
}
|
||||
return table.get_stats().alternator_stats->_stats;
|
||||
} catch (std::runtime_error&) {
|
||||
// If we're here it means that a table we are currently working on was deleted before the
|
||||
// operation completed, returning a temporary object is fine, if the table get deleted so will its metrics
|
||||
return make_lw_shared<stats>();
|
||||
}
|
||||
}
|
||||
|
||||
void describe_single_item(const cql3::selection::selection& selection,
|
||||
const std::vector<managed_bytes_opt>& result_row,
|
||||
const std::optional<attrs_to_get>& attrs_to_get,
|
||||
rjson::value& item,
|
||||
uint64_t* item_length_in_bytes,
|
||||
bool include_all_embedded_attributes)
|
||||
{
|
||||
const auto& columns = selection.get_columns();
|
||||
auto column_it = columns.begin();
|
||||
for (const managed_bytes_opt& cell : result_row) {
|
||||
if (!cell) {
|
||||
++column_it;
|
||||
continue;
|
||||
}
|
||||
std::string column_name = (*column_it)->name_as_text();
|
||||
if (column_name != executor::ATTRS_COLUMN_NAME) {
|
||||
if (item_length_in_bytes) {
|
||||
(*item_length_in_bytes) += column_name.length() + cell->size();
|
||||
}
|
||||
if (!attrs_to_get || attrs_to_get->contains(column_name)) {
|
||||
// item is expected to start empty, and column_name are unique
|
||||
// so add() makes sense
|
||||
rjson::add_with_string_name(item, column_name, rjson::empty_object());
|
||||
rjson::value& field = item[column_name.c_str()];
|
||||
cell->with_linearized([&] (bytes_view linearized_cell) {
|
||||
rjson::add_with_string_name(field, type_to_string((*column_it)->type), json_key_column_value(linearized_cell, **column_it));
|
||||
});
|
||||
}
|
||||
} else {
|
||||
auto deserialized = attrs_type()->deserialize(*cell);
|
||||
auto keys_and_values = value_cast<map_type_impl::native_type>(deserialized);
|
||||
for (auto entry : keys_and_values) {
|
||||
std::string attr_name = value_cast<sstring>(entry.first);
|
||||
if (item_length_in_bytes) {
|
||||
(*item_length_in_bytes) += attr_name.length();
|
||||
}
|
||||
if (include_all_embedded_attributes || !attrs_to_get || attrs_to_get->contains(attr_name)) {
|
||||
bytes value = value_cast<bytes>(entry.second);
|
||||
if (item_length_in_bytes && value.length()) {
|
||||
// ScyllaDB uses one extra byte compared to DynamoDB for the bytes length
|
||||
(*item_length_in_bytes) += value.length() - 1;
|
||||
}
|
||||
rjson::value v = deserialize_item(value);
|
||||
if (attrs_to_get) {
|
||||
auto it = attrs_to_get->find(attr_name);
|
||||
if (it != attrs_to_get->end()) {
|
||||
// attrs_to_get may have asked for only part of
|
||||
// this attribute. hierarchy_filter() modifies v,
|
||||
// and returns false when nothing is to be kept.
|
||||
if (!hierarchy_filter(v, it->second)) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
// item is expected to start empty, and attribute
|
||||
// names are unique so add() makes sense
|
||||
rjson::add_with_string_name(item, attr_name, std::move(v));
|
||||
} else if (item_length_in_bytes) {
|
||||
(*item_length_in_bytes) += value_cast<bytes>(entry.second).length() - 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
++column_it;
|
||||
}
|
||||
}
|
||||
|
||||
std::optional<rjson::value> describe_single_item(schema_ptr schema,
|
||||
const query::partition_slice& slice,
|
||||
const cql3::selection::selection& selection,
|
||||
const query::result& query_result,
|
||||
const std::optional<attrs_to_get>& attrs_to_get,
|
||||
uint64_t* item_length_in_bytes) {
|
||||
rjson::value item = rjson::empty_object();
|
||||
|
||||
cql3::selection::result_set_builder builder(selection, gc_clock::now());
|
||||
query::result_view::consume(query_result, slice, cql3::selection::result_set_builder::visitor(builder, *schema, selection));
|
||||
|
||||
auto result_set = builder.build();
|
||||
if (result_set->empty()) {
|
||||
if (item_length_in_bytes) {
|
||||
// empty results is counted as having a minimal length (e.g. 1 byte).
|
||||
(*item_length_in_bytes) += 1;
|
||||
}
|
||||
// If there is no matching item, we're supposed to return an empty
|
||||
// object without an Item member - not one with an empty Item member
|
||||
return {};
|
||||
}
|
||||
if (result_set->size() > 1) {
|
||||
// If the result set contains multiple rows, the code should have
|
||||
// called describe_multi_item(), not this function.
|
||||
throw std::logic_error("describe_single_item() asked to describe multiple items");
|
||||
}
|
||||
describe_single_item(selection, *result_set->rows().begin(), attrs_to_get, item, item_length_in_bytes);
|
||||
return item;
|
||||
}
|
||||
|
||||
static void check_big_array(const rjson::value& val, int& size_left);
|
||||
static void check_big_object(const rjson::value& val, int& size_left);
|
||||
|
||||
// For simplicity, we use a recursive implementation. This is fine because
|
||||
// Alternator limits the depth of JSONs it reads from inputs, and doesn't
|
||||
// add more than a couple of levels in its own output construction.
|
||||
bool is_big(const rjson::value& val, int big_size) {
|
||||
if (val.IsString()) {
|
||||
return ssize_t(val.GetStringLength()) > big_size;
|
||||
} else if (val.IsObject()) {
|
||||
check_big_object(val, big_size);
|
||||
return big_size < 0;
|
||||
} else if (val.IsArray()) {
|
||||
check_big_array(val, big_size);
|
||||
return big_size < 0;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
static void check_big_array(const rjson::value& val, int& size_left) {
|
||||
// Assume a fixed size of 10 bytes for each number, boolean, etc., or
|
||||
// beginning of a sub-object. This doesn't have to be accurate.
|
||||
size_left -= 10 * val.Size();
|
||||
for (const auto& v : val.GetArray()) {
|
||||
if (size_left < 0) {
|
||||
return;
|
||||
}
|
||||
// Note that we avoid recursive calls for the leaves (anything except
|
||||
// array or object) because usually those greatly outnumber the trunk.
|
||||
if (v.IsString()) {
|
||||
size_left -= v.GetStringLength();
|
||||
} else if (v.IsObject()) {
|
||||
check_big_object(v, size_left);
|
||||
} else if (v.IsArray()) {
|
||||
check_big_array(v, size_left);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void check_big_object(const rjson::value& val, int& size_left) {
|
||||
size_left -= 10 * val.MemberCount();
|
||||
for (const auto& m : val.GetObject()) {
|
||||
if (size_left < 0) {
|
||||
return;
|
||||
}
|
||||
size_left -= m.name.GetStringLength();
|
||||
if (m.value.IsString()) {
|
||||
size_left -= m.value.GetStringLength();
|
||||
} else if (m.value.IsObject()) {
|
||||
check_big_object(m.value, size_left);
|
||||
} else if (m.value.IsArray()) {
|
||||
check_big_array(m.value, size_left);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void validate_table_name(std::string_view name, const char* source) {
|
||||
if (name.length() < 3 || name.length() > max_table_name_length) {
|
||||
throw api_error::validation(
|
||||
format("{} must be at least 3 characters long and at most {} characters long", source, max_table_name_length));
|
||||
}
|
||||
if (!valid_table_name_chars(name)) {
|
||||
throw api_error::validation(
|
||||
format("{} must satisfy regular expression pattern: [a-zA-Z0-9_.-]+", source));
|
||||
}
|
||||
}
|
||||
|
||||
void validate_cdc_log_name_length(std::string_view table_name) {
|
||||
if (cdc::log_name(table_name).length() > max_auxiliary_table_name_length) {
|
||||
// CDC will add cdc_log_suffix ("_scylla_cdc_log") to the table name
|
||||
// to create its log table, and this will exceed the maximum allowed
|
||||
// length. To provide a more helpful error message, we assume that
|
||||
// cdc::log_name() always adds a suffix of the same length.
|
||||
int suffix_len = cdc::log_name(table_name).length() - table_name.length();
|
||||
throw api_error::validation(fmt::format("Streams or vector search cannot be enabled on a table whose name is longer than {} characters: {}",
|
||||
max_auxiliary_table_name_length - suffix_len, table_name));
|
||||
}
|
||||
}
|
||||
|
||||
body_writer make_streamed(rjson::value&& value) {
|
||||
return [value = std::move(value)](output_stream<char>&& _out) mutable -> future<> {
|
||||
auto out = std::move(_out);
|
||||
std::exception_ptr ex;
|
||||
try {
|
||||
co_await rjson::print(value, out);
|
||||
} catch (...) {
|
||||
ex = std::current_exception();
|
||||
}
|
||||
co_await out.close();
|
||||
co_await rjson::destroy_gently(std::move(value));
|
||||
if (ex) {
|
||||
co_await coroutine::return_exception_ptr(std::move(ex));
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
} // namespace alternator
|
||||
@@ -1,247 +0,0 @@
|
||||
/*
|
||||
* Copyright 2019-present ScyllaDB
|
||||
*/
|
||||
|
||||
/*
|
||||
* SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.1
|
||||
*/
|
||||
|
||||
// This header file, and the implementation file executor_util.cc, contain
|
||||
// various utility functions that are reused in many different operations
|
||||
// (API requests) across Alternator's code - in files such as executor.cc,
|
||||
// executor_read.cc, streams.cc, ttl.cc, and more. These utility functions
|
||||
// include things like extracting and validating pieces from a JSON request,
|
||||
// checking permissions, constructing auxiliary table names, and more.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <map>
|
||||
#include <optional>
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
#include <unordered_map>
|
||||
#include <unordered_set>
|
||||
|
||||
#include <seastar/core/future.hh>
|
||||
#include <seastar/util/noncopyable_function.hh>
|
||||
|
||||
#include "utils/rjson.hh"
|
||||
#include "schema/schema_fwd.hh"
|
||||
#include "types/types.hh"
|
||||
#include "auth/permission.hh"
|
||||
#include "alternator/stats.hh"
|
||||
#include "alternator/attribute_path.hh"
|
||||
#include "utils/managed_bytes.hh"
|
||||
|
||||
namespace query { class partition_slice; class result; }
|
||||
namespace cql3::selection { class selection; }
|
||||
namespace data_dictionary { class database; }
|
||||
namespace service { class storage_proxy; class client_state; }
|
||||
|
||||
namespace alternator {
|
||||
|
||||
/// The body_writer is used for streaming responses - where the response body
|
||||
/// is written in chunks to the output_stream. This allows for efficient
|
||||
/// handling of large responses without needing to allocate a large buffer in
|
||||
/// memory. It is one of the variants of executor::request_return_type.
|
||||
using body_writer = noncopyable_function<future<>(output_stream<char>&&)>;
|
||||
|
||||
/// Get the value of an integer attribute, or an empty optional if it is
|
||||
/// missing. If the attribute exists, but is not an integer, a descriptive
|
||||
/// api_error is thrown.
|
||||
std::optional<int> get_int_attribute(const rjson::value& value, std::string_view attribute_name);
|
||||
|
||||
/// Get the value of a string attribute, or a default value if it is missing.
|
||||
/// If the attribute exists, but is not a string, a descriptive api_error is
|
||||
/// thrown.
|
||||
std::string get_string_attribute(const rjson::value& value, std::string_view attribute_name, const char* default_return);
|
||||
|
||||
/// Get the value of a boolean attribute, or a default value if it is missing.
|
||||
/// If the attribute exists, but is not a bool, a descriptive api_error is
|
||||
/// thrown.
|
||||
bool get_bool_attribute(const rjson::value& value, std::string_view attribute_name, bool default_return);
|
||||
|
||||
/// Extract table name from a request.
|
||||
/// Most requests expect the table's name to be listed in a "TableName" field.
|
||||
/// get_table_name() returns the name or api_error in case the table name is
|
||||
/// missing or not a string.
|
||||
std::string get_table_name(const rjson::value& request);
|
||||
|
||||
/// find_table_name() is like get_table_name() except that it returns an
|
||||
/// optional table name - it returns an empty optional when the TableName
|
||||
/// is missing from the request, instead of throwing as get_table_name()
|
||||
/// does. However, find_table_name() still throws if a TableName exists but
|
||||
/// is not a string.
|
||||
std::optional<std::string> find_table_name(const rjson::value& request);
|
||||
|
||||
/// Extract table schema from a request.
|
||||
/// Many requests expect the table's name to be listed in a "TableName" field
|
||||
/// and need to look it up as an existing table. The get_table() function
|
||||
/// does this, with the appropriate validation and api_error in case the table
|
||||
/// name is missing, invalid or the table doesn't exist. If everything is
|
||||
/// successful, it returns the table's schema.
|
||||
schema_ptr get_table(service::storage_proxy& proxy, const rjson::value& request);
|
||||
|
||||
/// This find_table() variant is like get_table() excepts that it returns a
|
||||
/// nullptr instead of throwing if the request does not mention a TableName.
|
||||
/// In other cases of errors (i.e., a table is mentioned but doesn't exist)
|
||||
/// this function throws too.
|
||||
schema_ptr find_table(service::storage_proxy& proxy, const rjson::value& request);
|
||||
|
||||
/// This find_table() variant is like the previous one except that it takes
|
||||
/// the table name directly instead of a request object. It is used in cases
|
||||
/// where we already have the table name extracted from the request.
|
||||
schema_ptr find_table(service::storage_proxy& proxy, std::string_view table_name);
|
||||
|
||||
// We would have liked to support table names up to 255 bytes, like DynamoDB.
|
||||
// But Scylla creates a directory whose name is the table's name plus 33
|
||||
// bytes (dash and UUID), and since directory names are limited to 255 bytes,
|
||||
// we need to limit table names to 222 bytes, instead of 255. See issue #4480.
|
||||
// We actually have two limits here,
|
||||
// * max_table_name_length is the limit that Alternator will impose on names
|
||||
// of new Alternator tables.
|
||||
// * max_auxiliary_table_name_length is the potentially higher absolute limit
|
||||
// that Scylla imposes on the names of auxiliary tables that Alternator
|
||||
// wants to create internally - i.e. materialized views or CDC log tables.
|
||||
// The second limit might mean that it is not possible to add a GSI to an
|
||||
// existing table, because the name of the new auxiliary table may go over
|
||||
// the limit. The second limit is also one of the reasons why the first limit
|
||||
// is set lower than 222 - to have room to enable streams which add the extra
|
||||
// suffix "_scylla_cdc_log" to the table name.
|
||||
inline constexpr int max_table_name_length = 192;
|
||||
inline constexpr int max_auxiliary_table_name_length = 222;
|
||||
|
||||
/// validate_table_name() validates the TableName parameter in a request - it
|
||||
/// should be called in CreateTable, and in other requests only when noticing
|
||||
/// that the named table doesn't exist.
|
||||
/// The DynamoDB developer guide, https://docs.aws.amazon.com/amazondynamodb/latest/developerguide/HowItWorks.NamingRulesDataTypes.html#HowItWorks.NamingRules
|
||||
/// specifies that table "names must be between 3 and 255 characters long and
|
||||
/// can contain only the following characters: a-z, A-Z, 0-9, _ (underscore),
|
||||
/// - (dash), . (dot)". However, Alternator only allows max_table_name_length
|
||||
/// characters (see above) - not 255.
|
||||
/// validate_table_name() throws the appropriate api_error if this validation
|
||||
/// fails.
|
||||
void validate_table_name(std::string_view name, const char* source = "TableName");
|
||||
|
||||
/// Validate that a CDC log table could be created for the base table with a
|
||||
/// given table_name, and if not, throw a user-visible api_error::validation.
|
||||
/// It is not possible to create a CDC log table if the table name is so long
|
||||
/// that adding the 15-character suffix "_scylla_cdc_log" (cdc_log_suffix)
|
||||
/// makes it go over max_auxiliary_table_name_length.
|
||||
/// Note that if max_table_name_length is set to less than 207 (which is
|
||||
/// max_auxiliary_table_name_length-15), then this function will never
|
||||
/// fail. However, it's still important to call it in UpdateTable, in case
|
||||
/// we have pre-existing tables with names longer than this to avoid #24598.
|
||||
void validate_cdc_log_name_length(std::string_view table_name);
|
||||
|
||||
/// Checks if a keyspace, given by its name, is an Alternator keyspace.
|
||||
/// This just checks if the name begins in executor::KEYSPACE_NAME_PREFIX,
|
||||
/// a prefix that all keyspaces created by Alternator's CreateTable use.
|
||||
bool is_alternator_keyspace(std::string_view ks_name);
|
||||
|
||||
/// Wraps db::get_tags_of_table() and throws api_error::validation if the
|
||||
/// table is missing the tags extension.
|
||||
const std::map<sstring, sstring>& get_tags_of_table_or_throw(schema_ptr schema);
|
||||
|
||||
/// Returns a type object representing the type of the ":attrs" column used
|
||||
/// by Alternator to store all non-key attribute. This type is a map from
|
||||
/// string (attribute name) to bytes (serialized attribute value).
|
||||
map_type attrs_type();
|
||||
|
||||
// In DynamoDB index names are local to a table, while in Scylla, materialized
|
||||
// view names are global (in a keyspace). So we need to compose a unique name
|
||||
// for the view taking into account both the table's name and the index name.
|
||||
// We concatenate the table and index name separated by a delim character
|
||||
// (a character not allowed by DynamoDB in ordinary table names, default: ":").
|
||||
// The downside of this approach is that it limits the sum of the lengths,
|
||||
// instead of each component individually as DynamoDB does.
|
||||
// The view_name() function assumes the table_name has already been validated
|
||||
// but validates the legality of index_name and the combination of both.
|
||||
std::string view_name(std::string_view table_name, std::string_view index_name,
|
||||
const std::string& delim = ":", bool validate_len = true);
|
||||
std::string gsi_name(std::string_view table_name, std::string_view index_name,
|
||||
bool validate_len = true);
|
||||
std::string lsi_name(std::string_view table_name, std::string_view index_name,
|
||||
bool validate_len = true);
|
||||
|
||||
/// After calling pk_from_json() and ck_from_json() to extract the pk and ck
|
||||
/// components of a key, and if that succeeded, call check_key() to further
|
||||
/// check that the key doesn't have any spurious components.
|
||||
void check_key(const rjson::value& key, const schema_ptr& schema);
|
||||
|
||||
/// Fail with api_error::validation if the expression if has unused attribute
|
||||
/// names or values. This is how DynamoDB behaves, so we do too.
|
||||
void verify_all_are_used(const rjson::value* field,
|
||||
const std::unordered_set<std::string>& used,
|
||||
const char* field_name,
|
||||
const char* operation);
|
||||
|
||||
/// Check CQL's Role-Based Access Control (RBAC) permission (MODIFY,
|
||||
/// SELECT, DROP, etc.) on the given table. When permission is denied an
|
||||
/// appropriate user-readable api_error::access_denied is thrown.
|
||||
future<> verify_permission(bool enforce_authorization, bool warn_authorization, const service::client_state&, const schema_ptr&, auth::permission, stats& stats);
|
||||
|
||||
/// Similar to verify_permission() above, but just for CREATE operations.
|
||||
/// Those do not operate on any specific table, so require permissions on
|
||||
/// ALL KEYSPACES instead of any specific table.
|
||||
future<> verify_create_permission(bool enforce_authorization, bool warn_authorization, const service::client_state&, stats& stats);
|
||||
|
||||
// Sets a KeySchema JSON array inside the given parent object describing the
|
||||
// key attributes of the given schema as HASH or RANGE keys. Additionally,
|
||||
// adds mappings from key attribute names to their DynamoDB type string into
|
||||
// attribute_types.
|
||||
void describe_key_schema(rjson::value& parent, const schema&, std::unordered_map<std::string, std::string>* attribute_types = nullptr, const std::map<sstring, sstring>* tags = nullptr);
|
||||
|
||||
/// is_big() checks approximately if the given JSON value is "bigger" than
|
||||
/// the given big_size number of bytes. The goal is to *quickly* detect
|
||||
/// oversized JSON that, for example, is too large to be serialized to a
|
||||
/// contiguous string - we don't need an accurate size for that. Moreover,
|
||||
/// as soon as we detect that the JSON is indeed "big", we can return true
|
||||
/// and don't need to continue calculating its exact size.
|
||||
bool is_big(const rjson::value& val, int big_size = 100'000);
|
||||
|
||||
/// try_get_internal_table() handles the special case that the given table_name
|
||||
/// begins with INTERNAL_TABLE_PREFIX (".scylla.alternator."). In that case,
|
||||
/// this function assumes that the rest of the name refers to an internal
|
||||
/// Scylla table (e.g., system table) and returns the schema of that table -
|
||||
/// or an exception if it doesn't exist. Otherwise, if table_name does not
|
||||
/// start with INTERNAL_TABLE_PREFIX, this function returns an empty schema_ptr
|
||||
/// and the caller should look for a normal Alternator table with that name.
|
||||
schema_ptr try_get_internal_table(const data_dictionary::database& db, std::string_view table_name);
|
||||
|
||||
/// get_table_from_batch_request() is used by batch write/read operations to
|
||||
/// look up the schema for a table named in a batch request, by the JSON member
|
||||
/// name (which is the table name in a BatchWriteItem or BatchGetItem request).
|
||||
schema_ptr get_table_from_batch_request(const service::storage_proxy& proxy, const rjson::value::ConstMemberIterator& batch_request);
|
||||
|
||||
/// Returns (or lazily creates) the per-table stats object for the given schema.
|
||||
/// If the table has been deleted, returns a temporary stats object.
|
||||
lw_shared_ptr<stats> get_stats_from_schema(service::storage_proxy& sp, const schema& schema);
|
||||
|
||||
/// Writes one item's attributes into `item` from the given selection result
|
||||
/// row. If include_all_embedded_attributes is true, all attributes from the
|
||||
/// ATTRS_COLUMN map column are included regardless of attrs_to_get.
|
||||
void describe_single_item(const cql3::selection::selection&,
|
||||
const std::vector<managed_bytes_opt>&,
|
||||
const std::optional<attrs_to_get>&,
|
||||
rjson::value&,
|
||||
uint64_t* item_length_in_bytes = nullptr,
|
||||
bool include_all_embedded_attributes = false);
|
||||
|
||||
/// Converts a single result row to a JSON item, or returns an empty optional
|
||||
/// if the result is empty.
|
||||
std::optional<rjson::value> describe_single_item(schema_ptr,
|
||||
const query::partition_slice&,
|
||||
const cql3::selection::selection&,
|
||||
const query::result&,
|
||||
const std::optional<attrs_to_get>&,
|
||||
uint64_t* item_length_in_bytes = nullptr);
|
||||
|
||||
/// Make a body_writer (function that can write output incrementally to the
|
||||
/// HTTP stream) from the given JSON object.
|
||||
/// Note: only useful for (very) large objects as there are overhead issues
|
||||
/// with this as well, but for massive lists of return objects this can
|
||||
/// help avoid large allocations/many re-allocs.
|
||||
body_writer make_streamed(rjson::value&&);
|
||||
|
||||
} // namespace alternator
|
||||
@@ -744,7 +744,7 @@ void validate_attr_name_length(std::string_view supplementary_context, size_t at
|
||||
constexpr const size_t DYNAMODB_NONKEY_ATTR_NAME_SIZE_MAX = 65535;
|
||||
|
||||
const size_t max_length = is_key ? DYNAMODB_KEY_ATTR_NAME_SIZE_MAX : DYNAMODB_NONKEY_ATTR_NAME_SIZE_MAX;
|
||||
if (attr_name_length > max_length || attr_name_length == 0) {
|
||||
if (attr_name_length > max_length) {
|
||||
std::string error_msg;
|
||||
if (!error_msg_prefix.empty()) {
|
||||
error_msg += error_msg_prefix;
|
||||
@@ -754,11 +754,7 @@ void validate_attr_name_length(std::string_view supplementary_context, size_t at
|
||||
error_msg += supplementary_context;
|
||||
error_msg += " - ";
|
||||
}
|
||||
if (attr_name_length == 0) {
|
||||
error_msg += "Empty attribute name";
|
||||
} else {
|
||||
error_msg += fmt::format("Attribute name is too large, must be less than {} bytes", std::to_string(max_length + 1));
|
||||
}
|
||||
error_msg += fmt::format("Attribute name is too large, must be less than {} bytes", std::to_string(max_length + 1));
|
||||
throw api_error::validation(error_msg);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -264,7 +264,7 @@ private:
|
||||
}
|
||||
};
|
||||
|
||||
body_writer compress(response_compressor::compression_type ct, const db::config& cfg, body_writer&& bw) {
|
||||
executor::body_writer compress(response_compressor::compression_type ct, const db::config& cfg, executor::body_writer&& bw) {
|
||||
return [bw = std::move(bw), ct, level = cfg.alternator_response_gzip_compression_level()](output_stream<char>&& out) mutable -> future<> {
|
||||
output_stream_options opts;
|
||||
opts.trim_to_size = true;
|
||||
@@ -287,7 +287,7 @@ body_writer compress(response_compressor::compression_type ct, const db::config&
|
||||
};
|
||||
}
|
||||
|
||||
future<std::unique_ptr<http::reply>> response_compressor::generate_reply(std::unique_ptr<http::reply> rep, sstring accept_encoding, const char* content_type, body_writer&& body_writer) {
|
||||
future<std::unique_ptr<http::reply>> response_compressor::generate_reply(std::unique_ptr<http::reply> rep, sstring accept_encoding, const char* content_type, executor::body_writer&& body_writer) {
|
||||
response_compressor::compression_type ct = find_compression(accept_encoding, std::numeric_limits<size_t>::max());
|
||||
if (ct != response_compressor::compression_type::none) {
|
||||
rep->add_header("Content-Encoding", get_encoding_name(ct));
|
||||
|
||||
@@ -85,7 +85,7 @@ public:
|
||||
future<std::unique_ptr<http::reply>> generate_reply(std::unique_ptr<http::reply> rep,
|
||||
sstring accept_encoding, const char* content_type, std::string&& response_body);
|
||||
future<std::unique_ptr<http::reply>> generate_reply(std::unique_ptr<http::reply> rep,
|
||||
sstring accept_encoding, const char* content_type, body_writer&& body_writer);
|
||||
sstring accept_encoding, const char* content_type, executor::body_writer&& body_writer);
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
@@ -14,12 +14,12 @@
|
||||
#include "types/concrete_types.hh"
|
||||
#include "types/json_utils.hh"
|
||||
#include "mutation/position_in_partition.hh"
|
||||
#include "alternator/executor_util.hh"
|
||||
|
||||
static logging::logger slogger("alternator-serialization");
|
||||
|
||||
namespace alternator {
|
||||
|
||||
bool is_alternator_keyspace(const sstring& ks_name);
|
||||
|
||||
type_info type_info_from_string(std::string_view type) {
|
||||
static thread_local const std::unordered_map<std::string_view, type_info> type_infos = {
|
||||
|
||||
@@ -7,8 +7,6 @@
|
||||
*/
|
||||
|
||||
#include "alternator/server.hh"
|
||||
#include "audit/audit.hh"
|
||||
#include "alternator/executor_util.hh"
|
||||
#include "gms/application_state.hh"
|
||||
#include "utils/log.hh"
|
||||
#include <fmt/ranges.h>
|
||||
@@ -144,7 +142,7 @@ public:
|
||||
return _response_compressor.generate_reply(std::move(rep), std::move(accept_encoding),
|
||||
REPLY_CONTENT_TYPE, std::move(str));
|
||||
},
|
||||
[&] (body_writer&& body_writer) {
|
||||
[&] (executor::body_writer&& body_writer) {
|
||||
return _response_compressor.generate_reply(std::move(rep), std::move(accept_encoding),
|
||||
REPLY_CONTENT_TYPE, std::move(body_writer));
|
||||
},
|
||||
@@ -787,25 +785,12 @@ future<executor::request_return_type> server::handle_api_request(std::unique_ptr
|
||||
auto f = [this, content = std::move(content), &callback = callback_it->second,
|
||||
client_state = std::move(client_state), trace_state = std::move(trace_state),
|
||||
units = std::move(units), req = std::move(req)] () mutable -> future<executor::request_return_type> {
|
||||
rjson::value json_request = co_await _json_parser.parse(std::move(content));
|
||||
if (!json_request.IsObject()) {
|
||||
co_return api_error::validation("Request content must be an object");
|
||||
}
|
||||
std::unique_ptr<audit::audit_info_alternator> audit_info;
|
||||
std::exception_ptr ex = {};
|
||||
executor::request_return_type ret;
|
||||
try {
|
||||
ret = co_await callback(_executor, client_state, trace_state, make_service_permit(std::move(units)), std::move(json_request), std::move(req), audit_info);
|
||||
} catch (...) {
|
||||
ex = std::current_exception();
|
||||
}
|
||||
if (audit_info) {
|
||||
co_await audit::inspect(*audit_info, client_state, ex != nullptr);
|
||||
}
|
||||
if (ex) {
|
||||
co_return coroutine::exception(std::move(ex));
|
||||
}
|
||||
co_return ret;
|
||||
rjson::value json_request = co_await _json_parser.parse(std::move(content));
|
||||
if (!json_request.IsObject()) {
|
||||
co_return api_error::validation("Request content must be an object");
|
||||
}
|
||||
co_return co_await callback(_executor, client_state, trace_state,
|
||||
make_service_permit(std::move(units)), std::move(json_request), std::move(req));
|
||||
};
|
||||
co_return co_await _sl_controller.with_user_service_level(user, std::ref(f));
|
||||
}
|
||||
@@ -849,77 +834,77 @@ server::server(executor& exec, service::storage_proxy& proxy, gms::gossiper& gos
|
||||
, _pending_requests("alternator::server::pending_requests")
|
||||
, _timeout_config(_proxy.data_dictionary().get_config())
|
||||
, _callbacks{
|
||||
{"CreateTable", [] (executor& e, executor::client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value json_request, std::unique_ptr<request> req, std::unique_ptr<audit::audit_info_alternator>& audit_info) {
|
||||
return e.create_table(client_state, std::move(trace_state), std::move(permit), std::move(json_request), audit_info);
|
||||
{"CreateTable", [] (executor& e, executor::client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value json_request, std::unique_ptr<request> req) {
|
||||
return e.create_table(client_state, std::move(trace_state), std::move(permit), std::move(json_request));
|
||||
}},
|
||||
{"DescribeTable", [] (executor& e, executor::client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value json_request, std::unique_ptr<request> req, std::unique_ptr<audit::audit_info_alternator>& audit_info) {
|
||||
return e.describe_table(client_state, std::move(trace_state), std::move(permit), std::move(json_request), audit_info);
|
||||
{"DescribeTable", [] (executor& e, executor::client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value json_request, std::unique_ptr<request> req) {
|
||||
return e.describe_table(client_state, std::move(trace_state), std::move(permit), std::move(json_request));
|
||||
}},
|
||||
{"DeleteTable", [] (executor& e, executor::client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value json_request, std::unique_ptr<request> req, std::unique_ptr<audit::audit_info_alternator>& audit_info) {
|
||||
return e.delete_table(client_state, std::move(trace_state), std::move(permit), std::move(json_request), audit_info);
|
||||
{"DeleteTable", [] (executor& e, executor::client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value json_request, std::unique_ptr<request> req) {
|
||||
return e.delete_table(client_state, std::move(trace_state), std::move(permit), std::move(json_request));
|
||||
}},
|
||||
{"UpdateTable", [] (executor& e, executor::client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value json_request, std::unique_ptr<request> req, std::unique_ptr<audit::audit_info_alternator>& audit_info) {
|
||||
return e.update_table(client_state, std::move(trace_state), std::move(permit), std::move(json_request), audit_info);
|
||||
{"UpdateTable", [] (executor& e, executor::client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value json_request, std::unique_ptr<request> req) {
|
||||
return e.update_table(client_state, std::move(trace_state), std::move(permit), std::move(json_request));
|
||||
}},
|
||||
{"PutItem", [] (executor& e, executor::client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value json_request, std::unique_ptr<request> req, std::unique_ptr<audit::audit_info_alternator>& audit_info) {
|
||||
return e.put_item(client_state, std::move(trace_state), std::move(permit), std::move(json_request), audit_info);
|
||||
{"PutItem", [] (executor& e, executor::client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value json_request, std::unique_ptr<request> req) {
|
||||
return e.put_item(client_state, std::move(trace_state), std::move(permit), std::move(json_request));
|
||||
}},
|
||||
{"UpdateItem", [] (executor& e, executor::client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value json_request, std::unique_ptr<request> req, std::unique_ptr<audit::audit_info_alternator>& audit_info) {
|
||||
return e.update_item(client_state, std::move(trace_state), std::move(permit), std::move(json_request), audit_info);
|
||||
{"UpdateItem", [] (executor& e, executor::client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value json_request, std::unique_ptr<request> req) {
|
||||
return e.update_item(client_state, std::move(trace_state), std::move(permit), std::move(json_request));
|
||||
}},
|
||||
{"GetItem", [] (executor& e, executor::client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value json_request, std::unique_ptr<request> req, std::unique_ptr<audit::audit_info_alternator>& audit_info) {
|
||||
return e.get_item(client_state, std::move(trace_state), std::move(permit), std::move(json_request), audit_info);
|
||||
{"GetItem", [] (executor& e, executor::client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value json_request, std::unique_ptr<request> req) {
|
||||
return e.get_item(client_state, std::move(trace_state), std::move(permit), std::move(json_request));
|
||||
}},
|
||||
{"DeleteItem", [] (executor& e, executor::client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value json_request, std::unique_ptr<request> req, std::unique_ptr<audit::audit_info_alternator>& audit_info) {
|
||||
return e.delete_item(client_state, std::move(trace_state), std::move(permit), std::move(json_request), audit_info);
|
||||
{"DeleteItem", [] (executor& e, executor::client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value json_request, std::unique_ptr<request> req) {
|
||||
return e.delete_item(client_state, std::move(trace_state), std::move(permit), std::move(json_request));
|
||||
}},
|
||||
{"ListTables", [] (executor& e, executor::client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value json_request, std::unique_ptr<request> req, std::unique_ptr<audit::audit_info_alternator>& audit_info) {
|
||||
return e.list_tables(client_state, std::move(permit), std::move(json_request), audit_info);
|
||||
{"ListTables", [] (executor& e, executor::client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value json_request, std::unique_ptr<request> req) {
|
||||
return e.list_tables(client_state, std::move(permit), std::move(json_request));
|
||||
}},
|
||||
{"Scan", [] (executor& e, executor::client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value json_request, std::unique_ptr<request> req, std::unique_ptr<audit::audit_info_alternator>& audit_info) {
|
||||
return e.scan(client_state, std::move(trace_state), std::move(permit), std::move(json_request), audit_info);
|
||||
{"Scan", [] (executor& e, executor::client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value json_request, std::unique_ptr<request> req) {
|
||||
return e.scan(client_state, std::move(trace_state), std::move(permit), std::move(json_request));
|
||||
}},
|
||||
{"DescribeEndpoints", [] (executor& e, executor::client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value json_request, std::unique_ptr<request> req, std::unique_ptr<audit::audit_info_alternator>& audit_info) {
|
||||
return e.describe_endpoints(client_state, std::move(permit), std::move(json_request), req->get_header("Host"), audit_info);
|
||||
{"DescribeEndpoints", [] (executor& e, executor::client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value json_request, std::unique_ptr<request> req) {
|
||||
return e.describe_endpoints(client_state, std::move(permit), std::move(json_request), req->get_header("Host"));
|
||||
}},
|
||||
{"BatchWriteItem", [] (executor& e, executor::client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value json_request, std::unique_ptr<request> req, std::unique_ptr<audit::audit_info_alternator>& audit_info) {
|
||||
return e.batch_write_item(client_state, std::move(trace_state), std::move(permit), std::move(json_request), audit_info);
|
||||
{"BatchWriteItem", [] (executor& e, executor::client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value json_request, std::unique_ptr<request> req) {
|
||||
return e.batch_write_item(client_state, std::move(trace_state), std::move(permit), std::move(json_request));
|
||||
}},
|
||||
{"BatchGetItem", [] (executor& e, executor::client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value json_request, std::unique_ptr<request> req, std::unique_ptr<audit::audit_info_alternator>& audit_info) {
|
||||
return e.batch_get_item(client_state, std::move(trace_state), std::move(permit), std::move(json_request), audit_info);
|
||||
{"BatchGetItem", [] (executor& e, executor::client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value json_request, std::unique_ptr<request> req) {
|
||||
return e.batch_get_item(client_state, std::move(trace_state), std::move(permit), std::move(json_request));
|
||||
}},
|
||||
{"Query", [] (executor& e, executor::client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value json_request, std::unique_ptr<request> req, std::unique_ptr<audit::audit_info_alternator>& audit_info) {
|
||||
return e.query(client_state, std::move(trace_state), std::move(permit), std::move(json_request), audit_info);
|
||||
{"Query", [] (executor& e, executor::client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value json_request, std::unique_ptr<request> req) {
|
||||
return e.query(client_state, std::move(trace_state), std::move(permit), std::move(json_request));
|
||||
}},
|
||||
{"TagResource", [] (executor& e, executor::client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value json_request, std::unique_ptr<request> req, std::unique_ptr<audit::audit_info_alternator>& audit_info) {
|
||||
return e.tag_resource(client_state, std::move(permit), std::move(json_request), audit_info);
|
||||
{"TagResource", [] (executor& e, executor::client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value json_request, std::unique_ptr<request> req) {
|
||||
return e.tag_resource(client_state, std::move(permit), std::move(json_request));
|
||||
}},
|
||||
{"UntagResource", [] (executor& e, executor::client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value json_request, std::unique_ptr<request> req, std::unique_ptr<audit::audit_info_alternator>& audit_info) {
|
||||
return e.untag_resource(client_state, std::move(permit), std::move(json_request), audit_info);
|
||||
{"UntagResource", [] (executor& e, executor::client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value json_request, std::unique_ptr<request> req) {
|
||||
return e.untag_resource(client_state, std::move(permit), std::move(json_request));
|
||||
}},
|
||||
{"ListTagsOfResource", [] (executor& e, executor::client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value json_request, std::unique_ptr<request> req, std::unique_ptr<audit::audit_info_alternator>& audit_info) {
|
||||
return e.list_tags_of_resource(client_state, std::move(permit), std::move(json_request), audit_info);
|
||||
{"ListTagsOfResource", [] (executor& e, executor::client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value json_request, std::unique_ptr<request> req) {
|
||||
return e.list_tags_of_resource(client_state, std::move(permit), std::move(json_request));
|
||||
}},
|
||||
{"UpdateTimeToLive", [] (executor& e, executor::client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value json_request, std::unique_ptr<request> req, std::unique_ptr<audit::audit_info_alternator>& audit_info) {
|
||||
return e.update_time_to_live(client_state, std::move(permit), std::move(json_request), audit_info);
|
||||
{"UpdateTimeToLive", [] (executor& e, executor::client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value json_request, std::unique_ptr<request> req) {
|
||||
return e.update_time_to_live(client_state, std::move(permit), std::move(json_request));
|
||||
}},
|
||||
{"DescribeTimeToLive", [] (executor& e, executor::client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value json_request, std::unique_ptr<request> req, std::unique_ptr<audit::audit_info_alternator>& audit_info) {
|
||||
return e.describe_time_to_live(client_state, std::move(permit), std::move(json_request), audit_info);
|
||||
{"DescribeTimeToLive", [] (executor& e, executor::client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value json_request, std::unique_ptr<request> req) {
|
||||
return e.describe_time_to_live(client_state, std::move(permit), std::move(json_request));
|
||||
}},
|
||||
{"ListStreams", [] (executor& e, executor::client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value json_request, std::unique_ptr<request> req, std::unique_ptr<audit::audit_info_alternator>& audit_info) {
|
||||
return e.list_streams(client_state, std::move(permit), std::move(json_request), audit_info);
|
||||
{"ListStreams", [] (executor& e, executor::client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value json_request, std::unique_ptr<request> req) {
|
||||
return e.list_streams(client_state, std::move(permit), std::move(json_request));
|
||||
}},
|
||||
{"DescribeStream", [] (executor& e, executor::client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value json_request, std::unique_ptr<request> req, std::unique_ptr<audit::audit_info_alternator>& audit_info) {
|
||||
return e.describe_stream(client_state, std::move(permit), std::move(json_request), audit_info);
|
||||
{"DescribeStream", [] (executor& e, executor::client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value json_request, std::unique_ptr<request> req) {
|
||||
return e.describe_stream(client_state, std::move(permit), std::move(json_request));
|
||||
}},
|
||||
{"GetShardIterator", [] (executor& e, executor::client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value json_request, std::unique_ptr<request> req, std::unique_ptr<audit::audit_info_alternator>& audit_info) {
|
||||
return e.get_shard_iterator(client_state, std::move(permit), std::move(json_request), audit_info);
|
||||
{"GetShardIterator", [] (executor& e, executor::client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value json_request, std::unique_ptr<request> req) {
|
||||
return e.get_shard_iterator(client_state, std::move(permit), std::move(json_request));
|
||||
}},
|
||||
{"GetRecords", [] (executor& e, executor::client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value json_request, std::unique_ptr<request> req, std::unique_ptr<audit::audit_info_alternator>& audit_info) {
|
||||
return e.get_records(client_state, std::move(trace_state), std::move(permit), std::move(json_request), audit_info);
|
||||
{"GetRecords", [] (executor& e, executor::client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value json_request, std::unique_ptr<request> req) {
|
||||
return e.get_records(client_state, std::move(trace_state), std::move(permit), std::move(json_request));
|
||||
}},
|
||||
{"DescribeContinuousBackups", [] (executor& e, executor::client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value json_request, std::unique_ptr<request> req, std::unique_ptr<audit::audit_info_alternator>& audit_info) {
|
||||
return e.describe_continuous_backups(client_state, std::move(permit), std::move(json_request), audit_info);
|
||||
{"DescribeContinuousBackups", [] (executor& e, executor::client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value json_request, std::unique_ptr<request> req) {
|
||||
return e.describe_continuous_backups(client_state, std::move(permit), std::move(json_request));
|
||||
}},
|
||||
} {
|
||||
}
|
||||
|
||||
@@ -34,7 +34,7 @@ class server : public peering_sharded_service<server> {
|
||||
// DynamoDB also has the same limit set to 16 MB.
|
||||
static constexpr size_t request_content_length_limit = 16*MB;
|
||||
using alternator_callback = std::function<future<executor::request_return_type>(executor&, executor::client_state&,
|
||||
tracing::trace_state_ptr, service_permit, rjson::value, std::unique_ptr<http::request>, std::unique_ptr<audit::audit_info_alternator>&)>;
|
||||
tracing::trace_state_ptr, service_permit, rjson::value, std::unique_ptr<http::request>)>;
|
||||
using alternator_callbacks_map = std::unordered_map<std::string_view, alternator_callback>;
|
||||
|
||||
httpd::http_server _http_server;
|
||||
|
||||
@@ -7,8 +7,6 @@
|
||||
*/
|
||||
|
||||
#include <type_traits>
|
||||
#include <ranges>
|
||||
#include <generator>
|
||||
#include <boost/lexical_cast.hpp>
|
||||
#include <boost/io/ios_state.hpp>
|
||||
#include <boost/multiprecision/cpp_int.hpp>
|
||||
@@ -26,15 +24,12 @@
|
||||
#include "cql3/selection/selection.hh"
|
||||
#include "cql3/result_set.hh"
|
||||
#include "cql3/column_identifier.hh"
|
||||
#include "replica/database.hh"
|
||||
#include "schema/schema_builder.hh"
|
||||
#include "service/storage_proxy.hh"
|
||||
#include "gms/feature.hh"
|
||||
#include "gms/feature_service.hh"
|
||||
|
||||
#include "executor.hh"
|
||||
#include "streams.hh"
|
||||
#include "alternator/executor_util.hh"
|
||||
#include "data_dictionary/data_dictionary.hh"
|
||||
#include "utils/rjson.hh"
|
||||
|
||||
@@ -96,77 +91,43 @@ static sstring stream_label(const schema& log_schema) {
|
||||
return seastar::json::formatter::to_json(tm);
|
||||
}
|
||||
|
||||
// Debug printer for cdc::stream_id - used only for logging/debugging, not for
|
||||
// serialization or user-visible output. We print both signed and unsigned value
|
||||
// as we use both.
|
||||
template <>
|
||||
struct fmt::formatter<cdc::stream_id> : fmt::formatter<string_view> {
|
||||
template <typename FormatContext>
|
||||
auto format(const cdc::stream_id &id, FormatContext& ctx) const {
|
||||
fmt::format_to(ctx.out(), "{} ", id.token());
|
||||
|
||||
for (auto b : id.to_bytes()) {
|
||||
fmt::format_to(ctx.out(), "{:02x}", (unsigned char)b);
|
||||
}
|
||||
return ctx.out();
|
||||
}
|
||||
};
|
||||
|
||||
namespace alternator {
|
||||
// stream arn has certain format (see https://docs.aws.amazon.com/IAM/latest/UserGuide/reference-arns.html)
|
||||
// we need to follow it as Kinesis Client Library does check
|
||||
// NOTE: we're holding inside a name of cdc log table, not a user table
|
||||
class stream_arn {
|
||||
std::string _arn;
|
||||
size_t _table_name_offset, _table_name_size;
|
||||
size_t _keyspace_name_offset, _keyspace_name_size;
|
||||
|
||||
void _initialize_offsets() {
|
||||
auto parts = parse_arn(_arn, "StreamArn", "stream", "/stream/");
|
||||
_table_name_offset = parts.table_name.data() - _arn.data();
|
||||
_table_name_size = parts.table_name.size();
|
||||
_keyspace_name_offset = parts.keyspace_name.data() - _arn.data();
|
||||
_keyspace_name_size = parts.keyspace_name.size();
|
||||
}
|
||||
// stream arn _has_ to be 37 or more characters long. ugh...
|
||||
// see https://docs.aws.amazon.com/amazondynamodb/latest/APIReference/API_streams_DescribeStream.html#API_streams_DescribeStream_RequestSyntax
|
||||
// UUID is 36 bytes as string (including dashes).
|
||||
// Prepend a version/type marker -> 37
|
||||
class stream_arn : public utils::UUID {
|
||||
public:
|
||||
// ARN to get table name from
|
||||
stream_arn(std::string arn) : _arn(std::move(arn)) {
|
||||
_initialize_offsets();
|
||||
using UUID = utils::UUID;
|
||||
static constexpr char marker = 'S';
|
||||
|
||||
stream_arn() = default;
|
||||
stream_arn(const UUID& uuid)
|
||||
: UUID(uuid)
|
||||
{}
|
||||
stream_arn(const table_id& tid)
|
||||
: UUID(tid.uuid())
|
||||
{}
|
||||
stream_arn(std::string_view v)
|
||||
: UUID(v.substr(1))
|
||||
{
|
||||
if (v[0] != marker) {
|
||||
throw std::invalid_argument(std::string(v));
|
||||
}
|
||||
}
|
||||
// NOTE: it must be a schema of a CDC log table, not a base table, because that's what we are encoding in ARN and returning to users.
|
||||
// we need base schema for creation time
|
||||
stream_arn(schema_ptr s, schema_ptr base_schema) {
|
||||
auto creation_time = get_table_creation_time(*base_schema);
|
||||
auto now = std::chrono::system_clock::time_point{ std::chrono::duration_cast<std::chrono::system_clock::duration>(std::chrono::duration<double>(creation_time)) };
|
||||
|
||||
// KCL checks for arn / aws / dynamodb and account-id being a number
|
||||
_arn = fmt::format("arn:aws:dynamodb:us-east-1:000000000000:table/{}@{}/stream/{:%FT%T}", s->ks_name(), s->cf_name(), now);
|
||||
|
||||
_initialize_offsets();
|
||||
}
|
||||
|
||||
std::string_view unparsed() const { return _arn; }
|
||||
std::string_view table_name() const { return std::string_view{ _arn }.substr(_table_name_offset, _table_name_size); }
|
||||
std::string_view keyspace_name() const { return std::string_view{ _arn }.substr(_keyspace_name_offset, _keyspace_name_size); }
|
||||
friend std::ostream& operator<<(std::ostream& os, const stream_arn& arn) {
|
||||
os << arn._arn;
|
||||
return os;
|
||||
const UUID& uuid = arn;
|
||||
return os << marker << uuid;
|
||||
}
|
||||
friend std::istream& operator>>(std::istream& is, stream_arn& arn) {
|
||||
std::string s;
|
||||
is >> s;
|
||||
arn = stream_arn(s);
|
||||
return is;
|
||||
}
|
||||
};
|
||||
|
||||
// NOTE: this will return schema for cdc log table, not the base table.
|
||||
static schema_ptr get_schema_from_arn(service::storage_proxy& proxy, const stream_arn& arn)
|
||||
{
|
||||
if (!cdc::is_log_name(arn.table_name())) {
|
||||
throw api_error::resource_not_found(fmt::format("{} as found in ARN {} is not a valid name for a CDC table", arn.table_name(), arn.unparsed()));
|
||||
}
|
||||
try {
|
||||
return proxy.data_dictionary().find_schema(arn.keyspace_name(), arn.table_name());
|
||||
} catch(data_dictionary::no_such_column_family&) {
|
||||
throw api_error::resource_not_found(fmt::format("`{}` is not a valid StreamArn - table {} not found", arn.unparsed(), arn.table_name()));
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace alternator
|
||||
|
||||
template<typename ValueType>
|
||||
@@ -176,12 +137,11 @@ struct rapidjson::internal::TypeHelper<ValueType, alternator::stream_arn>
|
||||
|
||||
namespace alternator {
|
||||
|
||||
future<alternator::executor::request_return_type> alternator::executor::list_streams(client_state& client_state, service_permit permit, rjson::value request, std::unique_ptr<audit::audit_info_alternator>& audit_info) {
|
||||
future<alternator::executor::request_return_type> alternator::executor::list_streams(client_state& client_state, service_permit permit, rjson::value request) {
|
||||
_stats.api_operations.list_streams++;
|
||||
|
||||
auto limit = rjson::get_opt<int>(request, "Limit").value_or(100);
|
||||
auto streams_start = rjson::get_opt<stream_arn>(request, "ExclusiveStartStreamArn");
|
||||
|
||||
auto table = find_table(_proxy, request);
|
||||
auto db = _proxy.data_dictionary();
|
||||
|
||||
@@ -189,11 +149,6 @@ future<alternator::executor::request_return_type> alternator::executor::list_str
|
||||
throw api_error::validation("Limit must be 1 or more");
|
||||
}
|
||||
|
||||
// Audit the input table name (if specified), not the output table names.
|
||||
maybe_audit(audit_info, audit::statement_category::QUERY,
|
||||
table ? table->ks_name() : "", table ? table->cf_name() : "",
|
||||
"ListStreams", request);
|
||||
|
||||
std::vector<data_dictionary::table> cfs;
|
||||
|
||||
if (table) {
|
||||
@@ -207,64 +162,63 @@ future<alternator::executor::request_return_type> alternator::executor::list_str
|
||||
cfs = db.get_tables();
|
||||
}
|
||||
|
||||
// We need to sort the tables to ensure a stable order for paging.
|
||||
// We sort by keyspace and table name, which will also allow us to skip to
|
||||
// the right position by ExclusiveStartStreamArn.
|
||||
auto cmp = [](std::string_view ks1, std::string_view cf1, std::string_view ks2, std::string_view cf2) {
|
||||
return ks1 == ks2 ? cf1 < cf2 : ks1 < ks2;
|
||||
};
|
||||
// # 12601 (maybe?) - sort the set of tables on ID. This should ensure we never
|
||||
// generate duplicates in a paged listing here. Can obviously miss things if they
|
||||
// are added between paged calls and end up with a "smaller" UUID/ARN, but that
|
||||
// is to be expected.
|
||||
if (std::cmp_less(limit, cfs.size()) || streams_start) {
|
||||
std::sort(cfs.begin(), cfs.end(),
|
||||
[&cmp](const data_dictionary::table& t1, const data_dictionary::table& t2) {
|
||||
return cmp(t1.schema()->ks_name(), t1.schema()->cf_name(),
|
||||
t2.schema()->ks_name(), t2.schema()->cf_name());
|
||||
});
|
||||
std::sort(cfs.begin(), cfs.end(), [](const data_dictionary::table& t1, const data_dictionary::table& t2) {
|
||||
return t1.schema()->id().uuid() < t2.schema()->id().uuid();
|
||||
});
|
||||
}
|
||||
|
||||
auto i = cfs.begin();
|
||||
auto e = cfs.end();
|
||||
|
||||
if (streams_start) {
|
||||
i = std::upper_bound(i, e, *streams_start,
|
||||
[&cmp](const stream_arn& arn, const data_dictionary::table& t) {
|
||||
return cmp(arn.keyspace_name(), arn.table_name(),
|
||||
t.schema()->ks_name(), t.schema()->cf_name());
|
||||
});
|
||||
i = std::find_if(i, e, [&](const data_dictionary::table& t) {
|
||||
return t.schema()->id().uuid() == streams_start
|
||||
&& cdc::get_base_table(db.real_database(), *t.schema())
|
||||
&& is_alternator_keyspace(t.schema()->ks_name())
|
||||
;
|
||||
});
|
||||
if (i != e) {
|
||||
++i;
|
||||
}
|
||||
}
|
||||
|
||||
auto ret = rjson::empty_object();
|
||||
auto streams = rjson::empty_array();
|
||||
std::optional<std::string> last;
|
||||
|
||||
std::optional<stream_arn> last;
|
||||
|
||||
for (;limit > 0 && i != e; ++i) {
|
||||
auto s = i->schema();
|
||||
auto& ks_name = s->ks_name();
|
||||
auto& cf_name = s->cf_name();
|
||||
|
||||
if (!is_alternator_keyspace(ks_name)) {
|
||||
continue;
|
||||
}
|
||||
if (cdc::is_log_for_some_table(db.real_database(), ks_name, cf_name)) {
|
||||
rjson::value new_entry = rjson::empty_object();
|
||||
|
||||
auto arn = stream_arn{ i->schema(), cdc::get_base_table(db.real_database(), *i->schema()) };
|
||||
last = std::string(arn.unparsed());
|
||||
rjson::add(new_entry, "StreamArn", arn);
|
||||
last = i->schema()->id();
|
||||
rjson::add(new_entry, "StreamArn", *last);
|
||||
rjson::add(new_entry, "StreamLabel", rjson::from_string(stream_label(*s)));
|
||||
rjson::add(new_entry, "TableName", rjson::from_string(cdc::base_name(s->cf_name())));
|
||||
rjson::add(new_entry, "TableName", rjson::from_string(cdc::base_name(table_name(*s))));
|
||||
rjson::push_back(streams, std::move(new_entry));
|
||||
|
||||
--limit;
|
||||
}
|
||||
}
|
||||
|
||||
rjson::add(ret, "Streams", std::move(streams));
|
||||
|
||||
// Only emit LastEvaluatedStreamArn when we stopped because we hit the
|
||||
// limit (limit == 0), meaning there may be more streams to list.
|
||||
// If we exhausted all tables naturally (limit > 0), there are no more
|
||||
// streams, so we must not emit a cookie.
|
||||
if (last && limit == 0) {
|
||||
rjson::add(ret, "LastEvaluatedStreamArn", rjson::from_string(*last));
|
||||
if (last) {
|
||||
rjson::add(ret, "LastEvaluatedStreamArn", *last);
|
||||
}
|
||||
|
||||
return make_ready_future<executor::request_return_type>(rjson::print(std::move(ret)));
|
||||
}
|
||||
|
||||
@@ -476,7 +430,7 @@ using namespace std::chrono_literals;
|
||||
// Dynamo docs says no data shall live longer than 24h.
|
||||
static constexpr auto dynamodb_streams_max_window = 24h;
|
||||
|
||||
// find the parent Streams shard in previous generation for the given child Streams shard
|
||||
// find the parent shard in previous generation for the given child shard
|
||||
// takes care of wrap-around case in vnodes
|
||||
// prev_streams must be sorted by token
|
||||
const cdc::stream_id& find_parent_shard_in_previous_generation(db_clock::time_point prev_timestamp, const utils::chunked_vector<cdc::stream_id> &prev_streams, const cdc::stream_id &child) {
|
||||
@@ -495,305 +449,7 @@ const cdc::stream_id& find_parent_shard_in_previous_generation(db_clock::time_po
|
||||
return *it;
|
||||
}
|
||||
|
||||
// The function compare_lexicographically() below sorts stream shard ids in the
|
||||
// way we need to present them in our output. However, when processing lists of
|
||||
// shards internally, especially for finding child shards, it's more convenient
|
||||
// for us to sort the shard ids by the different function defined here -
|
||||
// compare_by_token(). It sorts the ids by numeric token (the end token of the
|
||||
// token range belonging to this shard), and makes algorithms like lower_bound()
|
||||
// possible.
|
||||
static bool compare_by_token(const cdc::stream_id& id1, const cdc::stream_id& id2) {
|
||||
return id1.token() < id2.token();
|
||||
}
|
||||
|
||||
// #7409 - shards must be returned in lexicographical order.
|
||||
// Normal bytes compare is string_traits<int8_t>::compare,
|
||||
// thus bytes 0x8000 is less than 0x0000. Instead, we need to use unsigned compare.
|
||||
// KCL depends on this ordering, so we need to adhere.
|
||||
static bool compare_lexicographically(const cdc::stream_id& id1, const cdc::stream_id& id2) {
|
||||
return compare_unsigned(id1.to_bytes(), id2.to_bytes()) < 0;
|
||||
}
|
||||
|
||||
stream_id_range::stream_id_range(
|
||||
utils::chunked_vector<cdc::stream_id> &items,
|
||||
utils::chunked_vector<cdc::stream_id>::iterator lo1,
|
||||
utils::chunked_vector<cdc::stream_id>::iterator end1) : stream_id_range(items, lo1, end1, items.end(), items.end()) {}
|
||||
stream_id_range::stream_id_range(
|
||||
utils::chunked_vector<cdc::stream_id> &items,
|
||||
utils::chunked_vector<cdc::stream_id>::iterator lo1,
|
||||
utils::chunked_vector<cdc::stream_id>::iterator end1,
|
||||
utils::chunked_vector<cdc::stream_id>::iterator lo2,
|
||||
utils::chunked_vector<cdc::stream_id>::iterator end2)
|
||||
: _lo1(lo1)
|
||||
, _end1(end1)
|
||||
, _lo2(lo2)
|
||||
, _end2(end2)
|
||||
{
|
||||
if (_lo2 != items.end()) {
|
||||
if (_lo1 != items.begin()) {
|
||||
on_internal_error(slogger, fmt::format("Invalid stream_id_range: _lo1 != items.begin()"));
|
||||
}
|
||||
if (_end2 != items.end()) {
|
||||
on_internal_error(slogger, fmt::format("Invalid stream_id_range: _end2 != items.end()"));
|
||||
}
|
||||
}
|
||||
if (_end1 > _lo2)
|
||||
on_internal_error(slogger, fmt::format("Invalid stream_id_range: _end1 > _lo2"));
|
||||
}
|
||||
|
||||
void stream_id_range::set_starting_position(const cdc::stream_id &update_to) {
|
||||
_skip_to = &update_to;
|
||||
}
|
||||
|
||||
void stream_id_range::prepare_for_iterating()
|
||||
{
|
||||
if (_prepared) return;
|
||||
_prepared = true;
|
||||
// here we deal with unfortunate possibility of wrap around range - in which case we actually have
|
||||
// two ranges (lo1, end1) and (lo2, end2), where lo1 will be begin() and end2 will be end().
|
||||
// the whole range needs to be sorted by `compare_lexicographically`, so we have to manually merge two ranges together and then sort them.
|
||||
// We also need to apply starting position update, if it was set, after merging and sorting.
|
||||
if (_end1 > _lo2)
|
||||
on_internal_error(slogger, fmt::format("Invalid stream_id_range: _end1 > _lo2"));
|
||||
|
||||
auto tgt = _end1;
|
||||
auto src = _lo2;
|
||||
// just try to move second range just after first one - if we have only one range,
|
||||
// second range will be empty and nothing will happen here
|
||||
for(; src != _end2; ++src, ++tgt) {
|
||||
std::swap(*tgt, *src);
|
||||
}
|
||||
// sort merged ranges by compare_lexicographically
|
||||
std::sort(_lo1, tgt, compare_lexicographically);
|
||||
|
||||
// apply starting position update if it was set
|
||||
// as a sanity check we require to find EXACT token match
|
||||
if (_skip_to) {
|
||||
auto it = std::lower_bound(_lo1, tgt, *_skip_to, compare_lexicographically);
|
||||
if (it == tgt || it->token() != _skip_to->token()) {
|
||||
slogger.info("Could not find starting position update shard id {}", *_skip_to);
|
||||
} else {
|
||||
_lo1 = std::next(it);
|
||||
}
|
||||
}
|
||||
_end1 = tgt;
|
||||
}
|
||||
|
||||
// the function returns `stream_id_range` that will allow iteration over children Streams shards for the Streams shard `parent`
|
||||
// a child Streams shard is defined as a Streams shard that touches token range that was previously covered by `parent` Streams shard
|
||||
// Streams shard contains a token, that represents end of the token range for that Streams shard (inclusive)
|
||||
// begginning of the token range is defined by previous Streams shard's token + 1
|
||||
// NOTE: With vnodes, ranges of Streams' shards wrap, while with tablets the biggest allowed token number is always a range end.
|
||||
// NOTE: both streams generation are guaranteed to cover whole range and be non-empty
|
||||
// NOTE: it's possible to get more than one stream shard with the same token value (thus some of those stream shards will be empty) -
|
||||
// for simplicity we will emit empty stream shards as well.
|
||||
//
|
||||
// to find children we will first find parent Streams shard in parent_streams by its token
|
||||
// then we will find previous Streams shard in parent stream - that will determine range
|
||||
// then based on the range we will find children Streams shards in current_streams
|
||||
// NOTE: function sorts / reorders current_streams
|
||||
// NOTE: function assumes parent_streams is sorted by compare_by_token and it doesn't modify it
|
||||
stream_id_range find_children_range_from_parent_token(
|
||||
const utils::chunked_vector<cdc::stream_id>& parent_streams,
|
||||
utils::chunked_vector<cdc::stream_id>& current_streams,
|
||||
cdc::stream_id parent,
|
||||
bool uses_tablets
|
||||
) {
|
||||
// sanity checks for required preconditions
|
||||
if (parent_streams.empty()) {
|
||||
on_internal_error(slogger, fmt::format("parent_streams is empty") );
|
||||
}
|
||||
if (current_streams.empty()) {
|
||||
on_internal_error(slogger, fmt::format("current_streams is empty") );
|
||||
}
|
||||
|
||||
// first let's cover obvious cases
|
||||
// if we have only one parent Streams shard, then all children belong to it
|
||||
if (parent_streams.size() == 1) {
|
||||
return stream_id_range{ current_streams, current_streams.begin(), current_streams.end() };
|
||||
}
|
||||
// if we have only one current Streams shard, then every parent maps to it
|
||||
if (current_streams.size() == 1) {
|
||||
return stream_id_range{ current_streams, current_streams.begin(), current_streams.end() };
|
||||
}
|
||||
|
||||
// find parent Streams shard in parent_streams, it must be present and have exact match
|
||||
auto parent_shard_end_it = std::lower_bound(parent_streams.begin(), parent_streams.end(), parent.token(), [](const cdc::stream_id& id, const dht::token& t) {
|
||||
return id.token() < t;
|
||||
});
|
||||
if (parent_shard_end_it == parent_streams.end() || parent_shard_end_it->token() != parent.token()) {
|
||||
throw api_error::validation(fmt::format("Invalid ShardFilter.ShardId value - shard {} not found", parent));
|
||||
}
|
||||
|
||||
std::sort(current_streams.begin(), current_streams.end(), compare_by_token);
|
||||
|
||||
utils::chunked_vector<cdc::stream_id>::iterator child_shard_begin_it;
|
||||
// upper_bound gives us the first element with token strictly greater than
|
||||
// parent's end token - this is the correct one-past-end for an inclusive
|
||||
// boundary and handles duplicate tokens (multiple children sharing a token)
|
||||
auto child_shard_end_it = std::upper_bound(current_streams.begin(), current_streams.end(), parent_shard_end_it->token(), [](const dht::token& t, const cdc::stream_id& id) {
|
||||
return t < id.token();
|
||||
});
|
||||
|
||||
if (uses_tablets) {
|
||||
// tablets version - tablets don't wrap around and last token is always present
|
||||
// let's assume we've parent (first line) and child generation (second line):
|
||||
// NOTE: token space doesn't wrap around - instead we have a guarantee that last token
|
||||
// will be present as one of the shards
|
||||
// P=| 1 2 3 4|
|
||||
// C=| a b c d e|
|
||||
// we want to find children for each token from parent:
|
||||
// 1 -> a,b
|
||||
// 2 -> c
|
||||
// 3 -> d
|
||||
// 4 -> d, e
|
||||
// first we find token in P that is end of range of parent - parent_shard_end_it
|
||||
// - if parent_shard_end_it - 1 exists
|
||||
// - we take it as parent_shard_begin_it
|
||||
// - find the first child with token > parent_shard_begin_it and set it to child_shard_begin_it
|
||||
// - else previous one to parent_shard_end_it does not exist
|
||||
// - set child_shard_begin_it = C.begin()
|
||||
// - find the first child with token > parent_shard_end_it and set it to child_shard_end_it
|
||||
// - range [child_shard_begin_it, child_shard_end_it) represents children
|
||||
|
||||
// When the parent's end token is not directly present in the children
|
||||
// (merge scenario: several parent shards merged into fewer children),
|
||||
// the child whose range absorbs the parent's end is the first child
|
||||
// with token > parent_end_token. upper_bound already points there,
|
||||
// so we advance past it to include it in the [begin, end) range.
|
||||
if (child_shard_end_it == current_streams.begin() || std::prev(child_shard_end_it)->token() != parent_shard_end_it->token()) {
|
||||
if (child_shard_end_it == current_streams.end()) {
|
||||
on_internal_error(slogger, fmt::format("parent end token not present in children tokens and no child with greater token exists, for parent shard id {}, got parent shards [{}] and children shards [{}]",
|
||||
parent, fmt::join(parent_streams, "; "), fmt::join(current_streams, "; ")));
|
||||
}
|
||||
++child_shard_end_it;
|
||||
}
|
||||
|
||||
// end of parent token is also first token in parent streams - it means beginning of the parent's range
|
||||
// is the beginning of the token space - this means first child stream will be start of the children range
|
||||
if (parent_shard_end_it == parent_streams.begin()) {
|
||||
child_shard_begin_it = current_streams.begin();
|
||||
} else {
|
||||
// normal case - we have previous parent Streams shard that determines beginning of the range (exclusive)
|
||||
// upper_bound skips past all children at the previous parent's token (including duplicates)
|
||||
auto parent_shard_begin_it = std::prev(parent_shard_end_it);
|
||||
child_shard_begin_it = std::upper_bound(current_streams.begin(), current_streams.end(), parent_shard_begin_it->token(), [](const dht::token& t, const cdc::stream_id& id) {
|
||||
return t < id.token();
|
||||
});
|
||||
}
|
||||
|
||||
// simple range
|
||||
return stream_id_range{ current_streams, child_shard_begin_it, child_shard_end_it };
|
||||
} else {
|
||||
// vnodes version - vnodes wrap around
|
||||
// wrapping around make whole algorithm extremely confusing, because we wrap around on two levels,
|
||||
// both parent Streams shard might wrap around and children range might wrap around as well
|
||||
|
||||
// helper function to find a range in current_streams based on range from parent_streams, but without wrap around
|
||||
// if lo is not set, it means start from beginning of current_streams
|
||||
// if end is not set, it means go until end of current_streams
|
||||
auto find_range_in_children = [&](std::optional<utils::chunked_vector<cdc::stream_id>::const_iterator> lo, std::optional<utils::chunked_vector<cdc::stream_id>::const_iterator> end) -> std::pair<utils::chunked_vector<cdc::stream_id>::iterator, utils::chunked_vector<cdc::stream_id>::iterator> {
|
||||
utils::chunked_vector<cdc::stream_id>::iterator res_lo, res_end;
|
||||
if (!lo) {
|
||||
// beginning of the range
|
||||
res_lo = current_streams.begin();
|
||||
} else {
|
||||
// we use upper_bound as beginning of the range is exclusive
|
||||
res_lo = std::upper_bound(current_streams.begin(), current_streams.end(), (*lo)->token(), [](const dht::token& t, const cdc::stream_id& id) {
|
||||
return t < id.token();
|
||||
});
|
||||
}
|
||||
if (!end) {
|
||||
// end of the range
|
||||
res_end = current_streams.end();
|
||||
} else {
|
||||
// end of the range is inclusive, so we use upper_bound to find the first element
|
||||
// with token strictly greater than the end token - this correctly handles the case
|
||||
// where multiple children share the same token (e.g. small vnodes where several
|
||||
// shards fall back to the vnode-end token)
|
||||
res_end = std::upper_bound(current_streams.begin(), current_streams.end(), (*end)->token(), [](const dht::token& t, const cdc::stream_id& id) {
|
||||
return t < id.token();
|
||||
});
|
||||
// When the parent's end token is not directly present in the
|
||||
// children (merge scenario), the child whose range absorbs the
|
||||
// parent's end is at res_end. Advance past it so that the
|
||||
// half-open range [res_lo, res_end) includes it.
|
||||
if (res_end != current_streams.end() &&
|
||||
(res_end == current_streams.begin() || std::prev(res_end)->token() != (*end)->token())) {
|
||||
++res_end;
|
||||
}
|
||||
}
|
||||
return { res_lo, res_end };
|
||||
};
|
||||
auto parent_shard_begin_it = parent_shard_end_it;
|
||||
if (parent_shard_begin_it == parent_streams.begin()) {
|
||||
// end of the parent Streams shard is also first token in parent streams - it means wrap around case for parent
|
||||
// beginning of the parent's range is the last token in the parent streams
|
||||
// for example:
|
||||
// P=| 0 10 |
|
||||
// C=| -20 -10 |
|
||||
// searching for parent Streams shard at 0 will get us here - end of the parent is the first parent Streams shard
|
||||
// so beginning of the parent's range is the last parent Streams shard (10)
|
||||
parent_shard_begin_it = std::prev(parent_streams.end());
|
||||
|
||||
// we find two unwrapped ranges here - from beginning of current_streams to the end of the parent's range
|
||||
// (end is inclusive) - in our example it's (-inf, 0]
|
||||
auto [ lo1, end1 ] = find_range_in_children(std::nullopt, parent_shard_end_it);
|
||||
// and from the beginning of the parent's range (exclusive) to the end of current_streams
|
||||
// our example is (10, +inf)
|
||||
auto [ lo2, end2 ] = find_range_in_children(parent_shard_begin_it, std::nullopt);
|
||||
|
||||
// in rare cases those two ranges might overlap - so we check and merge if needed
|
||||
// for example:
|
||||
// P=| -30 -20 |
|
||||
// C=| -40 -10 |
|
||||
// searching for parent Streams shard at -30 will get us here - end of the parent is -30, beginning is -20
|
||||
// first search will give us (-inf, +inf) with end1 pointing to current_streams.end()
|
||||
// (because the range needs to include -10 position, so the iterator will point to the next one after - end of the current_streams)
|
||||
// second search will give us [-10, +inf) with lo2 pointing to current_streams[1]
|
||||
// which is less then end1 - so we need to merge those two ranges
|
||||
if (lo2 < end1) {
|
||||
assert(lo1 <= lo2);
|
||||
assert(end1 <= end2);
|
||||
end1 = end2;
|
||||
lo2 = end2 = current_streams.end();
|
||||
}
|
||||
return stream_id_range{ current_streams, lo1, end1, lo2, end2 };
|
||||
} else {
|
||||
// simpler case - parent doesn't wrap around and we have both begin and end in normal order
|
||||
// we search for single unwrapped range and adjust later if needed
|
||||
--parent_shard_begin_it;
|
||||
auto [ lo1, end1 ] = find_range_in_children(parent_shard_begin_it, parent_shard_end_it);
|
||||
auto lo2 = current_streams.end();
|
||||
auto end2 = current_streams.end();
|
||||
|
||||
// it's possible for simple case to still wrap around, when parent range lies after all children Streams shards
|
||||
// for example:
|
||||
// P=| 0 10 |
|
||||
// C=| -20 -10 |
|
||||
// when searching for parent shart at 0, we get parent range [0, 10)
|
||||
// unwrapped search will produce empty range and miss -20 child Streams shard, which is actually
|
||||
// owner of [0, 10) range (and is also a first Streams shard in current generation)
|
||||
// note, that searching for 0 parent will give correct result, but because algorithm in that case
|
||||
// detects wrap around case and chooses different if
|
||||
if (parent_shard_end_it->token() > current_streams.back().token() && lo1 != current_streams.begin()) {
|
||||
// wrap around case - children at the beginning of the sorted array
|
||||
// wrap around the ring and cover the parent's range. Include all
|
||||
// children sharing the first token (duplicate tokens are possible
|
||||
// for small vnodes where multiple shards fall back to the same token)
|
||||
end2 = lo2 = current_streams.begin();
|
||||
while(end2 != current_streams.end() && end2->token() == current_streams.front().token()) {
|
||||
++end2;
|
||||
}
|
||||
std::swap(lo1, lo2);
|
||||
std::swap(end1, end2);
|
||||
}
|
||||
return stream_id_range{ current_streams, lo1, end1, lo2, end2 };
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
future<executor::request_return_type> executor::describe_stream(client_state& client_state, service_permit permit, rjson::value request, std::unique_ptr<audit::audit_info_alternator>& audit_info) {
|
||||
future<executor::request_return_type> executor::describe_stream(client_state& client_state, service_permit permit, rjson::value request) {
|
||||
_stats.api_operations.describe_stream++;
|
||||
|
||||
auto limit = rjson::get_opt<int>(request, "Limit").value_or(100); // according to spec
|
||||
@@ -803,11 +459,12 @@ future<executor::request_return_type> executor::describe_stream(client_state& cl
|
||||
// I.e. unparsable arn -> error.
|
||||
auto stream_arn = rjson::get<alternator::stream_arn>(request, "StreamArn");
|
||||
|
||||
schema_ptr bs;
|
||||
schema_ptr schema, bs;
|
||||
auto db = _proxy.data_dictionary();
|
||||
auto schema = get_schema_from_arn(_proxy, stream_arn);
|
||||
|
||||
try {
|
||||
auto cf = db.find_column_family(table_id(stream_arn));
|
||||
schema = cf.schema();
|
||||
bs = cdc::get_base_table(db.real_database(), *schema);
|
||||
} catch (...) {
|
||||
}
|
||||
@@ -815,12 +472,6 @@ future<executor::request_return_type> executor::describe_stream(client_state& cl
|
||||
if (!schema || !bs || !is_alternator_keyspace(schema->ks_name())) {
|
||||
throw api_error::resource_not_found("Invalid StreamArn");
|
||||
}
|
||||
auto normal_token_owners = _proxy.get_token_metadata_ptr()->count_normal_token_owners();
|
||||
|
||||
// _sdks.cdc_get_versioned_streams() uses quorum_if_many() underneath, which uses CL=QUORUM for many token owners and CL=ONE otherwise.
|
||||
auto describe_cl = (normal_token_owners > 1) ? db::consistency_level::QUORUM : db::consistency_level::ONE;
|
||||
maybe_audit(audit_info, audit::statement_category::QUERY, schema->ks_name(),
|
||||
bs->cf_name() + "|" + schema->cf_name(), "DescribeStream", request, describe_cl);
|
||||
|
||||
if (limit < 1) {
|
||||
throw api_error::validation("Limit must be 1 or more");
|
||||
@@ -845,8 +496,6 @@ future<executor::request_return_type> executor::describe_stream(client_state& cl
|
||||
} else {
|
||||
status = "ENABLED";
|
||||
}
|
||||
} else if (opts.enable_requested()) {
|
||||
status = "ENABLING";
|
||||
}
|
||||
|
||||
auto ttl = std::chrono::seconds(opts.ttl());
|
||||
@@ -855,9 +504,9 @@ future<executor::request_return_type> executor::describe_stream(client_state& cl
|
||||
|
||||
stream_view_type type = cdc_options_to_steam_view_type(opts);
|
||||
|
||||
rjson::add(stream_desc, "StreamArn", stream_arn);
|
||||
rjson::add(stream_desc, "StreamArn", alternator::stream_arn(schema->id()));
|
||||
rjson::add(stream_desc, "StreamViewType", type);
|
||||
rjson::add(stream_desc, "TableName", rjson::from_string(bs->cf_name()));
|
||||
rjson::add(stream_desc, "TableName", rjson::from_string(table_name(*bs)));
|
||||
|
||||
describe_key_schema(stream_desc, *bs);
|
||||
|
||||
@@ -869,48 +518,13 @@ future<executor::request_return_type> executor::describe_stream(client_state& cl
|
||||
// TODO: label
|
||||
// TODO: creation time
|
||||
|
||||
std::map<db_clock::time_point, cdc::streams_version> topologies;
|
||||
auto normal_token_owners = _proxy.get_token_metadata_ptr()->count_normal_token_owners();
|
||||
|
||||
// filter out cdc generations older than the table or now() - cdc::ttl (typically dynamodb_streams_max_window - 24h)
|
||||
if (schema->table().uses_tablets()) {
|
||||
// We can't use table creation time here, as tablets might report a
|
||||
// generation timestamp just before table creation. This is safe
|
||||
// because CDC generations are per-table and cannot pre-date the
|
||||
// table, so expanding the window won't pull in unrelated data.
|
||||
auto low_ts = db_clock::now() - ttl;
|
||||
topologies = co_await _system_keyspace.read_cdc_for_tablets_versioned_streams(bs->ks_name(), bs->cf_name(), low_ts);
|
||||
} else {
|
||||
auto normal_token_owners = _proxy.get_token_metadata_ptr()->count_normal_token_owners();
|
||||
auto low_ts = std::max(as_timepoint(schema->id()), db_clock::now() - ttl);
|
||||
topologies = co_await _sdks.cdc_get_versioned_streams(low_ts, { normal_token_owners });
|
||||
}
|
||||
|
||||
const auto e = topologies.end();
|
||||
std::optional<shard_id> shard_filter;
|
||||
|
||||
if (const rjson::value *shard_filter_obj = rjson::find(request, "ShardFilter")) {
|
||||
if (!shard_filter_obj->IsObject()) {
|
||||
throw api_error::validation("Invalid ShardFilter value - must be object");
|
||||
}
|
||||
std::string type;
|
||||
try {
|
||||
type = rjson::get<std::string>(*shard_filter_obj, "Type");
|
||||
} catch (...) {
|
||||
throw api_error::validation("Invalid ShardFilter.Type value - must be string `CHILD_SHARDS`");
|
||||
}
|
||||
if (type != "CHILD_SHARDS") {
|
||||
throw api_error::validation("Invalid ShardFilter.Type value - must be string `CHILD_SHARDS`");
|
||||
}
|
||||
try {
|
||||
shard_filter = rjson::get<shard_id>(*shard_filter_obj, "ShardId");
|
||||
} catch (const std::exception &e) {
|
||||
throw api_error::validation(fmt::format("Invalid ShardFilter.ShardId value - not a valid ShardId: {}", e.what()));
|
||||
}
|
||||
if (topologies.find(shard_filter->time) == topologies.end()) {
|
||||
throw api_error::validation(fmt::format("Invalid ShardFilter.ShardId value - corresponding generation not found: {}", shard_filter->id));
|
||||
}
|
||||
}
|
||||
auto low_ts = std::max(as_timepoint(schema->id()), db_clock::now() - ttl);
|
||||
|
||||
std::map<db_clock::time_point, cdc::streams_version> topologies = co_await _sdks.cdc_get_versioned_streams(low_ts, { normal_token_owners });
|
||||
auto e = topologies.end();
|
||||
auto prev = e;
|
||||
auto shards = rjson::empty_array();
|
||||
|
||||
@@ -922,6 +536,25 @@ future<executor::request_return_type> executor::describe_stream(client_state& cl
|
||||
i = topologies.find(shard_start->time);
|
||||
}
|
||||
|
||||
// for parent-child stuff we need id:s to be sorted by token
|
||||
// (see explanation above) since we want to find closest
|
||||
// token boundary when determining parent.
|
||||
// #7346 - we processed and searched children/parents in
|
||||
// stored order, which is not necessarily token order,
|
||||
// so the finding of "closest" token boundary (using upper bound)
|
||||
// could give somewhat weird results.
|
||||
static auto token_cmp = [](const cdc::stream_id& id1, const cdc::stream_id& id2) {
|
||||
return id1.token() < id2.token();
|
||||
};
|
||||
|
||||
// #7409 - shards must be returned in lexicographical order,
|
||||
// normal bytes compare is string_traits<int8_t>::compare.
|
||||
// thus bytes 0x8000 is less than 0x0000. By doing unsigned
|
||||
// compare instead we inadvertently will sort in string lexical.
|
||||
static auto id_cmp = [](const cdc::stream_id& id1, const cdc::stream_id& id2) {
|
||||
return compare_unsigned(id1.to_bytes(), id2.to_bytes()) < 0;
|
||||
};
|
||||
|
||||
// need a prev even if we are skipping stuff
|
||||
if (i != topologies.begin()) {
|
||||
prev = std::prev(i);
|
||||
@@ -930,18 +563,24 @@ future<executor::request_return_type> executor::describe_stream(client_state& cl
|
||||
for (; limit > 0 && i != e; prev = i, ++i) {
|
||||
auto& [ts, sv] = *i;
|
||||
|
||||
if (shard_filter && (prev == e || prev->first != shard_filter->time)) {
|
||||
shard_start = std::nullopt;
|
||||
continue;
|
||||
}
|
||||
last = std::nullopt;
|
||||
|
||||
auto lo = sv.streams.begin();
|
||||
auto end = sv.streams.end();
|
||||
|
||||
// #7409 - shards must be returned in lexicographical order,
|
||||
std::sort(sv.streams.begin(), sv.streams.end(), compare_lexicographically);
|
||||
if (prev != e) {
|
||||
std::sort(lo, end, id_cmp);
|
||||
|
||||
if (shard_start) {
|
||||
// find next shard position
|
||||
lo = std::upper_bound(lo, end, shard_start->id, id_cmp);
|
||||
shard_start = std::nullopt;
|
||||
}
|
||||
|
||||
if (lo != end && prev != e) {
|
||||
// We want older stuff sorted in token order so we can find matching
|
||||
// token range when determining parent Streams shard.
|
||||
std::stable_sort(prev->second.streams.begin(), prev->second.streams.end(), compare_by_token);
|
||||
// token range when determining parent shard.
|
||||
std::stable_sort(prev->second.streams.begin(), prev->second.streams.end(), token_cmp);
|
||||
}
|
||||
|
||||
auto expired = [&]() -> std::optional<db_clock::time_point> {
|
||||
@@ -954,29 +593,9 @@ future<executor::request_return_type> executor::describe_stream(client_state& cl
|
||||
return j->first + confidence_interval(db);
|
||||
}();
|
||||
|
||||
std::optional<stream_id_range> shard_range;
|
||||
while (lo != end) {
|
||||
auto& id = *lo++;
|
||||
|
||||
if (shard_filter) {
|
||||
// sanity check - we should never get here as there is if above (`shard_filter && prev == e` => `continue`)
|
||||
if (prev == e) {
|
||||
on_internal_error(slogger, fmt::format("Could not find parent generation for shard id {}, got generations [{}]", shard_filter->id, fmt::join(topologies | std::ranges::views::keys, "; ")));
|
||||
}
|
||||
|
||||
const bool uses_tablets = schema->table().uses_tablets();
|
||||
shard_range = find_children_range_from_parent_token(
|
||||
prev->second.streams,
|
||||
i->second.streams,
|
||||
shard_filter->id,
|
||||
uses_tablets
|
||||
);
|
||||
} else {
|
||||
shard_range = stream_id_range{ i->second.streams, i->second.streams.begin(), i->second.streams.end() };
|
||||
}
|
||||
if (shard_start) {
|
||||
shard_range->set_starting_position(shard_start->id);
|
||||
}
|
||||
shard_range->prepare_for_iterating();
|
||||
for(const auto &id : *shard_range) {
|
||||
auto shard = rjson::empty_object();
|
||||
|
||||
if (prev != e) {
|
||||
@@ -1001,7 +620,6 @@ future<executor::request_return_type> executor::describe_stream(client_state& cl
|
||||
|
||||
last = std::nullopt;
|
||||
}
|
||||
shard_start = std::nullopt;
|
||||
}
|
||||
|
||||
if (last) {
|
||||
@@ -1102,7 +720,7 @@ struct rapidjson::internal::TypeHelper<ValueType, alternator::shard_iterator_typ
|
||||
|
||||
namespace alternator {
|
||||
|
||||
future<executor::request_return_type> executor::get_shard_iterator(client_state& client_state, service_permit permit, rjson::value request, std::unique_ptr<audit::audit_info_alternator>& audit_info) {
|
||||
future<executor::request_return_type> executor::get_shard_iterator(client_state& client_state, service_permit permit, rjson::value request) {
|
||||
_stats.api_operations.get_shard_iterator++;
|
||||
|
||||
auto type = rjson::get<shard_iterator_type>(request, "ShardIteratorType");
|
||||
@@ -1118,22 +736,18 @@ future<executor::request_return_type> executor::get_shard_iterator(client_state&
|
||||
auto stream_arn = rjson::get<alternator::stream_arn>(request, "StreamArn");
|
||||
auto db = _proxy.data_dictionary();
|
||||
|
||||
schema_ptr schema = nullptr;
|
||||
std::optional<shard_id> sid;
|
||||
auto schema = get_schema_from_arn(_proxy, stream_arn);
|
||||
schema_ptr base_schema = nullptr;
|
||||
|
||||
try {
|
||||
base_schema = cdc::get_base_table(db.real_database(), *schema);
|
||||
auto cf = db.find_column_family(table_id(stream_arn));
|
||||
schema = cf.schema();
|
||||
sid = rjson::get<shard_id>(request, "ShardId");
|
||||
} catch (...) {
|
||||
}
|
||||
if (!schema || !base_schema || !is_alternator_keyspace(schema->ks_name())) {
|
||||
if (!schema || !cdc::get_base_table(db.real_database(), *schema) || !is_alternator_keyspace(schema->ks_name())) {
|
||||
throw api_error::resource_not_found("Invalid StreamArn");
|
||||
}
|
||||
|
||||
// Uses only node-local context (the metadata) to generate response
|
||||
maybe_audit(audit_info, audit::statement_category::QUERY, schema->ks_name(),
|
||||
base_schema->cf_name() + "|" + schema->cf_name(), "GetShardIterator", request);
|
||||
|
||||
if (!sid) {
|
||||
throw api_error::resource_not_found("Invalid ShardId");
|
||||
}
|
||||
@@ -1162,10 +776,11 @@ future<executor::request_return_type> executor::get_shard_iterator(client_state&
|
||||
break;
|
||||
}
|
||||
|
||||
shard_iterator iter(schema->id().uuid(), *sid, threshold, inclusive_of_threshold);
|
||||
shard_iterator iter(stream_arn, *sid, threshold, inclusive_of_threshold);
|
||||
|
||||
auto ret = rjson::empty_object();
|
||||
rjson::add(ret, "ShardIterator", iter);
|
||||
|
||||
return make_ready_future<executor::request_return_type>(rjson::print(std::move(ret)));
|
||||
}
|
||||
|
||||
@@ -1208,7 +823,7 @@ namespace alternator {
|
||||
};
|
||||
}
|
||||
|
||||
future<executor::request_return_type> executor::get_records(client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value request, std::unique_ptr<audit::audit_info_alternator>& audit_info) {
|
||||
future<executor::request_return_type> executor::get_records(client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value request) {
|
||||
_stats.api_operations.get_records++;
|
||||
auto start_time = std::chrono::steady_clock::now();
|
||||
|
||||
@@ -1234,17 +849,16 @@ future<executor::request_return_type> executor::get_records(client_state& client
|
||||
if (!schema || !base || !is_alternator_keyspace(schema->ks_name())) {
|
||||
co_return api_error::resource_not_found(fmt::to_string(iter.table));
|
||||
}
|
||||
db::consistency_level cl = db::consistency_level::LOCAL_QUORUM;
|
||||
|
||||
maybe_audit(audit_info, audit::statement_category::QUERY, schema->ks_name(),
|
||||
base->cf_name() + "|" + schema->cf_name(), "GetRecords", request, cl);
|
||||
|
||||
tracing::add_table_name(trace_state, schema->ks_name(), schema->cf_name());
|
||||
|
||||
co_await verify_permission(_enforce_authorization, _warn_authorization, client_state, schema, auth::permission::SELECT, _stats);
|
||||
|
||||
db::consistency_level cl = db::consistency_level::LOCAL_QUORUM;
|
||||
partition_key pk = iter.shard.id.to_partition_key(*schema);
|
||||
|
||||
dht::partition_range_vector partition_ranges{ dht::partition_range::make_singular(dht::decorate_key(*schema, pk)) };
|
||||
|
||||
auto high_ts = db_clock::now() - confidence_interval(db);
|
||||
auto high_uuid = utils::UUID_gen::min_time_UUID(high_ts.time_since_epoch());
|
||||
auto lo = clustering_key_prefix::from_exploded(*schema, { iter.threshold.serialize() });
|
||||
@@ -1324,17 +938,17 @@ future<executor::request_return_type> executor::get_records(client_state& client
|
||||
|
||||
auto& metadata = result_set->get_metadata();
|
||||
|
||||
auto op_index = std::distance(metadata.get_names().begin(),
|
||||
auto op_index = std::distance(metadata.get_names().begin(),
|
||||
std::find_if(metadata.get_names().begin(), metadata.get_names().end(), [](const lw_shared_ptr<cql3::column_specification>& cdef) {
|
||||
return cdef->name->name() == op_column_name;
|
||||
})
|
||||
);
|
||||
auto ts_index = std::distance(metadata.get_names().begin(),
|
||||
auto ts_index = std::distance(metadata.get_names().begin(),
|
||||
std::find_if(metadata.get_names().begin(), metadata.get_names().end(), [](const lw_shared_ptr<cql3::column_specification>& cdef) {
|
||||
return cdef->name->name() == timestamp_column_name;
|
||||
})
|
||||
);
|
||||
auto eor_index = std::distance(metadata.get_names().begin(),
|
||||
auto eor_index = std::distance(metadata.get_names().begin(),
|
||||
std::find_if(metadata.get_names().begin(), metadata.get_names().end(), [](const lw_shared_ptr<cql3::column_specification>& cdef) {
|
||||
return cdef->name->name() == eor_column_name;
|
||||
})
|
||||
@@ -1379,19 +993,19 @@ future<executor::request_return_type> executor::get_records(client_state& client
|
||||
/**
|
||||
* We merge rows with same timestamp into a single event.
|
||||
* This is pretty much needed, because a CDC row typically
|
||||
* encodes ~half the info of an alternator write.
|
||||
*
|
||||
* encodes ~half the info of an alternator write.
|
||||
*
|
||||
* A big, big downside to how alternator records are written
|
||||
* (i.e. CQL), is that the distinction between INSERT and UPDATE
|
||||
* is somewhat lost/unmappable to actual eventName.
|
||||
* is somewhat lost/unmappable to actual eventName.
|
||||
* A write (currently) always looks like an insert+modify
|
||||
* regardless whether we wrote existing record or not.
|
||||
*
|
||||
* Maybe RMW ops could be done slightly differently so
|
||||
* regardless whether we wrote existing record or not.
|
||||
*
|
||||
* Maybe RMW ops could be done slightly differently so
|
||||
* we can distinguish them here...
|
||||
*
|
||||
*
|
||||
* For now, all writes will become MODIFY.
|
||||
*
|
||||
*
|
||||
* Note: we do not check the current pre/post
|
||||
* flags on CDC log, instead we use data to
|
||||
* drive what is returned. This is (afaict)
|
||||
@@ -1470,15 +1084,9 @@ future<executor::request_return_type> executor::get_records(client_state& client
|
||||
}
|
||||
|
||||
// ugh. figure out if we are and end-of-shard
|
||||
auto normal_token_owners = _proxy.get_token_metadata_ptr()->count_normal_token_owners();
|
||||
|
||||
db_clock::time_point ts;
|
||||
if (schema->table().uses_tablets()) {
|
||||
ts = co_await _system_keyspace.read_cdc_for_tablets_current_generation_timestamp(base->ks_name(), base->cf_name());
|
||||
} else {
|
||||
auto normal_token_owners = _proxy.get_token_metadata_ptr()->count_normal_token_owners();
|
||||
ts = co_await _sdks.cdc_current_generation_timestamp({ normal_token_owners });
|
||||
}
|
||||
|
||||
db_clock::time_point ts = co_await _sdks.cdc_current_generation_timestamp({ normal_token_owners });
|
||||
auto& shard = iter.shard;
|
||||
|
||||
if (shard.time < ts && ts < high_ts) {
|
||||
@@ -1514,7 +1122,6 @@ bool executor::add_stream_options(const rjson::value& stream_specification, sche
|
||||
|
||||
cdc::options opts;
|
||||
opts.enabled(true);
|
||||
opts.tablet_merge_blocked(true);
|
||||
// cdc::delta_mode is ignored by Alternator, so aim for the least overhead.
|
||||
opts.set_delta_mode(cdc::delta_mode::keys);
|
||||
opts.ttl(std::chrono::duration_cast<std::chrono::seconds>(dynamodb_streams_max_window).count());
|
||||
@@ -1549,30 +1156,24 @@ void executor::supplement_table_stream_info(rjson::value& descr, const schema& s
|
||||
if (opts.enabled()) {
|
||||
auto db = sp.data_dictionary();
|
||||
auto cf = db.find_table(schema.ks_name(), cdc::log_name(schema.cf_name()));
|
||||
stream_arn arn(cf.schema(), cdc::get_base_table(db.real_database(), *cf.schema()));
|
||||
stream_arn arn(cf.schema()->id());
|
||||
rjson::add(descr, "LatestStreamArn", arn);
|
||||
rjson::add(descr, "LatestStreamLabel", rjson::from_string(stream_label(*cf.schema())));
|
||||
} else if (!opts.enable_requested()) {
|
||||
return;
|
||||
}
|
||||
// For both enabled() and enable_requested():
|
||||
// DynamoDB returns StreamEnabled=true in StreamSpecification even when
|
||||
// the stream status is ENABLING (not yet fully active). We mirror this
|
||||
// behavior: enable_requested means the user asked for streams but CDC
|
||||
// is not yet finalized, so we still report StreamEnabled=true.
|
||||
auto stream_desc = rjson::empty_object();
|
||||
rjson::add(stream_desc, "StreamEnabled", true);
|
||||
|
||||
auto mode = stream_view_type::KEYS_ONLY;
|
||||
if (opts.preimage() && opts.postimage()) {
|
||||
mode = stream_view_type::NEW_AND_OLD_IMAGES;
|
||||
} else if (opts.preimage()) {
|
||||
mode = stream_view_type::OLD_IMAGE;
|
||||
} else if (opts.postimage()) {
|
||||
mode = stream_view_type::NEW_IMAGE;
|
||||
auto stream_desc = rjson::empty_object();
|
||||
rjson::add(stream_desc, "StreamEnabled", true);
|
||||
|
||||
auto mode = stream_view_type::KEYS_ONLY;
|
||||
if (opts.preimage() && opts.postimage()) {
|
||||
mode = stream_view_type::NEW_AND_OLD_IMAGES;
|
||||
} else if (opts.preimage()) {
|
||||
mode = stream_view_type::OLD_IMAGE;
|
||||
} else if (opts.postimage()) {
|
||||
mode = stream_view_type::NEW_IMAGE;
|
||||
}
|
||||
rjson::add(stream_desc, "StreamViewType", mode);
|
||||
rjson::add(descr, "StreamSpecification", std::move(stream_desc));
|
||||
}
|
||||
rjson::add(stream_desc, "StreamViewType", mode);
|
||||
rjson::add(descr, "StreamSpecification", std::move(stream_desc));
|
||||
}
|
||||
|
||||
} // namespace alternator
|
||||
|
||||
@@ -1,62 +0,0 @@
|
||||
/*
|
||||
* Copyright 2026-present ScyllaDB
|
||||
*/
|
||||
|
||||
/*
|
||||
* SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.1
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "utils/chunked_vector.hh"
|
||||
#include "cdc/generation.hh"
|
||||
#include <generator>
|
||||
|
||||
namespace cdc {
|
||||
class stream_id;
|
||||
}
|
||||
|
||||
namespace alternator {
|
||||
class stream_id_range {
|
||||
// helper class for manipulating (possibly wrapped around) range of stream_ids
|
||||
// it holds one or two ranges [lo1, end1) and [lo2, end2)
|
||||
// if the range doesn't wrap around, then lo2 == end2 == items.end()
|
||||
// if the range wraps around, then
|
||||
// `lo1 == items.begin() and end2 == items.end()` must be true
|
||||
// the object doesn't own `items`, but it does manipulate it - it will
|
||||
// reorder elements (so both ranges were next to each other) and sort them by unsigned comparison
|
||||
// usage - create an object with needed ranges. before iteration call `prepare_for_iterating` method -
|
||||
// it will reorder elements of `items` array to what is needed and then call begin / end pair.
|
||||
// note - `items` array will be modified - elements will be reordered, but no elements will be added or removed.
|
||||
// `items` array must stay intact as long as iteration is in progress.
|
||||
utils::chunked_vector<cdc::stream_id>::iterator _lo1 = {}, _end1 = {}, _lo2 = {}, _end2 = {};
|
||||
const cdc::stream_id* _skip_to = nullptr;
|
||||
bool _prepared = false;
|
||||
public:
|
||||
stream_id_range(
|
||||
utils::chunked_vector<cdc::stream_id> &items,
|
||||
utils::chunked_vector<cdc::stream_id>::iterator lo1,
|
||||
utils::chunked_vector<cdc::stream_id>::iterator end1);
|
||||
stream_id_range(
|
||||
utils::chunked_vector<cdc::stream_id> &items,
|
||||
utils::chunked_vector<cdc::stream_id>::iterator lo1,
|
||||
utils::chunked_vector<cdc::stream_id>::iterator end1,
|
||||
utils::chunked_vector<cdc::stream_id>::iterator lo2,
|
||||
utils::chunked_vector<cdc::stream_id>::iterator end2);
|
||||
|
||||
void set_starting_position(const cdc::stream_id &update_to);
|
||||
// Must be called after construction and after set_starting_position()
|
||||
// (if used), but before begin()/end() iteration.
|
||||
void prepare_for_iterating();
|
||||
|
||||
utils::chunked_vector<cdc::stream_id>::iterator begin() const { return _lo1; }
|
||||
utils::chunked_vector<cdc::stream_id>::iterator end() const { return _end1; }
|
||||
};
|
||||
|
||||
stream_id_range find_children_range_from_parent_token(
|
||||
const utils::chunked_vector<cdc::stream_id>& parent_streams,
|
||||
utils::chunked_vector<cdc::stream_id>& current_streams,
|
||||
cdc::stream_id parent,
|
||||
bool uses_tablets
|
||||
);
|
||||
}
|
||||
@@ -44,7 +44,6 @@
|
||||
#include "cql3/query_options.hh"
|
||||
#include "cql3/column_identifier.hh"
|
||||
#include "alternator/executor.hh"
|
||||
#include "alternator/executor_util.hh"
|
||||
#include "alternator/controller.hh"
|
||||
#include "alternator/serialization.hh"
|
||||
#include "alternator/ttl_tag.hh"
|
||||
@@ -59,17 +58,13 @@ static logging::logger tlogger("alternator_ttl");
|
||||
|
||||
namespace alternator {
|
||||
|
||||
future<executor::request_return_type> executor::update_time_to_live(client_state& client_state, service_permit permit, rjson::value request, std::unique_ptr<audit::audit_info_alternator>& audit_info) {
|
||||
future<executor::request_return_type> executor::update_time_to_live(client_state& client_state, service_permit permit, rjson::value request) {
|
||||
_stats.api_operations.update_time_to_live++;
|
||||
if (!_proxy.features().alternator_ttl) {
|
||||
co_return api_error::unknown_operation("UpdateTimeToLive not yet supported. Upgrade all nodes to a version that supports it.");
|
||||
}
|
||||
|
||||
schema_ptr schema = get_table(_proxy, request);
|
||||
|
||||
maybe_audit(audit_info, audit::statement_category::DDL,
|
||||
schema->ks_name(), schema->cf_name(), "UpdateTimeToLive", request);
|
||||
|
||||
rjson::value* spec = rjson::find(request, "TimeToLiveSpecification");
|
||||
if (!spec || !spec->IsObject()) {
|
||||
co_return api_error::validation("UpdateTimeToLive missing mandatory TimeToLiveSpecification");
|
||||
@@ -119,13 +114,9 @@ future<executor::request_return_type> executor::update_time_to_live(client_state
|
||||
co_return rjson::print(std::move(response));
|
||||
}
|
||||
|
||||
future<executor::request_return_type> executor::describe_time_to_live(client_state& client_state, service_permit permit, rjson::value request, std::unique_ptr<audit::audit_info_alternator>& audit_info) {
|
||||
future<executor::request_return_type> executor::describe_time_to_live(client_state& client_state, service_permit permit, rjson::value request) {
|
||||
_stats.api_operations.describe_time_to_live++;
|
||||
schema_ptr schema = get_table(_proxy, request);
|
||||
|
||||
maybe_audit(audit_info, audit::statement_category::QUERY,
|
||||
schema->ks_name(), schema->cf_name(), "DescribeTimeToLive", request);
|
||||
|
||||
std::map<sstring, sstring> tags_map = get_tags_of_table_or_throw(schema);
|
||||
rjson::value desc = rjson::empty_object();
|
||||
auto i = tags_map.find(TTL_TAG_KEY);
|
||||
|
||||
@@ -82,16 +82,15 @@ void set_config(std::shared_ptr < api_registry_builder20 > rb, http_context& ctx
|
||||
});
|
||||
});
|
||||
|
||||
cs::find_config_id.set(r, [&cfg] (std::unique_ptr<http::request> req) -> future<json::json_return_type> {
|
||||
auto id = req->get_path_param("id");
|
||||
auto value = co_await cfg.value_as_json_string_for_name(id);
|
||||
if (!value) {
|
||||
throw bad_param_exception(sstring("No such config entry: ") + id);
|
||||
cs::find_config_id.set(r, [&cfg] (const_req r) {
|
||||
auto id = r.get_path_param("id");
|
||||
for (auto&& cfg_ref : cfg.values()) {
|
||||
auto&& cfg = cfg_ref.get();
|
||||
if (id == cfg.name()) {
|
||||
return cfg.value_as_json();
|
||||
}
|
||||
}
|
||||
//value is already a json string
|
||||
json::json_return_type ret{json::json_void()};
|
||||
ret._res = std::move(*value);
|
||||
co_return ret;
|
||||
throw bad_param_exception(sstring("No such config entry: ") + id);
|
||||
});
|
||||
|
||||
sp::get_rpc_timeout.set(r, [&cfg](const_req req) {
|
||||
|
||||
@@ -123,13 +123,12 @@ static future<json::json_return_type> sum_estimated_histogram(sharded<service::
|
||||
});
|
||||
}
|
||||
|
||||
static future<json::json_return_type> sum_estimated_histogram(sharded<service::storage_proxy>& proxy, service::storage_proxy_stats::cas_contention_histogram service::storage_proxy_stats::stats::*f) {
|
||||
static future<json::json_return_type> sum_estimated_histogram(sharded<service::storage_proxy>& proxy, utils::estimated_histogram service::storage_proxy_stats::stats::*f) {
|
||||
|
||||
return two_dimensional_map_reduce(proxy, f, utils::estimated_histogram_with_max_merge<service::storage_proxy_stats::cas_contention_histogram::MAX>,
|
||||
service::storage_proxy_stats::cas_contention_histogram()).then([](const service::storage_proxy_stats::cas_contention_histogram& val) {
|
||||
return two_dimensional_map_reduce(proxy, f, utils::estimated_histogram_merge,
|
||||
utils::estimated_histogram()).then([](const utils::estimated_histogram& val) {
|
||||
utils_json::estimated_histogram res;
|
||||
res.bucket_offsets = val.get_buckets_offsets();
|
||||
res.buckets = val.get_buckets_counts();
|
||||
res = val;
|
||||
return make_ready_future<json::json_return_type>(res);
|
||||
});
|
||||
}
|
||||
|
||||
@@ -1743,11 +1743,11 @@ rest_get_vnode_tablet_migration(http_context& ctx, sharded<service::storage_serv
|
||||
throw std::runtime_error("vnodes-to-tablets migration requires all nodes to support the VNODES_TO_TABLETS_MIGRATIONS cluster feature");
|
||||
}
|
||||
auto keyspace = validate_keyspace(ctx, req);
|
||||
auto status = co_await ss.local().get_tablets_migration_status_with_node_details(keyspace);
|
||||
auto status = co_await ss.local().get_tablets_migration_status(keyspace);
|
||||
|
||||
ss::vnode_tablet_migration_status result;
|
||||
result.keyspace = status.keyspace;
|
||||
result.status = fmt::format("{}", status.status);
|
||||
result.status = status.status;
|
||||
result.nodes._set = true;
|
||||
for (const auto& node : status.nodes) {
|
||||
ss::vnode_tablet_migration_node_status n;
|
||||
|
||||
@@ -126,13 +126,6 @@ static std::map<sstring, std::set<sstring>> parse_audit_tables(const sstring& da
|
||||
}
|
||||
boost::trim(parts[0]);
|
||||
boost::trim(parts[1]);
|
||||
// The real keyspace name of an Alternator table T is
|
||||
// "alternator_T". The audit_tables config flag uses the format
|
||||
// "alternator.T" to refer to such tables, so we expand it here
|
||||
// to the real keyspace name.
|
||||
if (parts[0] == "alternator") {
|
||||
parts[0] = "alternator_" + parts[1];
|
||||
}
|
||||
result[parts[0]].insert(std::move(parts[1]));
|
||||
}
|
||||
}
|
||||
@@ -235,55 +228,27 @@ future<> audit::shutdown() {
|
||||
return make_ready_future<>();
|
||||
}
|
||||
|
||||
future<> audit::log(const audit_info& audit_info, const service::client_state& client_state, std::optional<db::consistency_level> cl, bool error) {
|
||||
future<> audit::log(const audit_info* audit_info, service::query_state& query_state, const cql3::query_options& options, bool error) {
|
||||
const service::client_state& client_state = query_state.get_client_state();
|
||||
socket_address node_ip = _token_metadata.get()->get_topology().my_address().addr();
|
||||
db::consistency_level cl = options.get_consistency();
|
||||
thread_local static sstring no_username("undefined");
|
||||
static const sstring anonymous_username("anonymous");
|
||||
const sstring& username = client_state.user() ? client_state.user()->name.value_or(anonymous_username) : no_username;
|
||||
socket_address client_ip = client_state.get_client_address().addr();
|
||||
socket_address node_ip = _token_metadata.get()->get_topology().my_address().addr();
|
||||
if (logger.is_enabled(logging::log_level::debug)) {
|
||||
logger.debug("Log written: node_ip {} category {} cl {} error {} keyspace {} query '{}' client_ip {} table {} username {}",
|
||||
node_ip, audit_info.category_string(), cl, error, audit_info.keyspace(),
|
||||
audit_info.query(), client_ip, audit_info.table(), username);
|
||||
node_ip, audit_info->category_string(), cl, error, audit_info->keyspace(),
|
||||
audit_info->query(), client_ip, audit_info->table(), username);
|
||||
}
|
||||
return futurize_invoke(std::mem_fn(&storage_helper::write), _storage_helper_ptr, &audit_info, node_ip, client_ip, cl, username, error)
|
||||
return futurize_invoke(std::mem_fn(&storage_helper::write), _storage_helper_ptr, audit_info, node_ip, client_ip, cl, username, error)
|
||||
.handle_exception([audit_info, node_ip, client_ip, cl, username, error] (auto ep) {
|
||||
logger.error("Unexpected exception when writing log with: node_ip {} category {} cl {} error {} keyspace {} query '{}' client_ip {} table {} username {} exception {}",
|
||||
node_ip, audit_info.category_string(), cl, error, audit_info.keyspace(),
|
||||
audit_info.query(), client_ip, audit_info.table(), username, ep);
|
||||
node_ip, audit_info->category_string(), cl, error, audit_info->keyspace(),
|
||||
audit_info->query(), client_ip, audit_info->table(),username, ep);
|
||||
});
|
||||
}
|
||||
|
||||
static future<> maybe_log(const audit_info& audit_info, const service::client_state& client_state, std::optional<db::consistency_level> cl, bool error) {
|
||||
if(audit::audit_instance().local_is_initialized() && audit::local_audit_instance().should_log(audit_info)) {
|
||||
return audit::local_audit_instance().log(audit_info, client_state, cl, error);
|
||||
}
|
||||
return make_ready_future<>();
|
||||
}
|
||||
|
||||
static future<> inspect(const audit_info& audit_info, const service::query_state& query_state, const cql3::query_options& options, bool error) {
|
||||
return maybe_log(audit_info, query_state.get_client_state(), options.get_consistency(), error);
|
||||
}
|
||||
|
||||
future<> inspect(shared_ptr<cql3::cql_statement> statement, const service::query_state& query_state, const cql3::query_options& options, bool error) {
|
||||
const auto audit_info = statement->get_audit_info();
|
||||
if (audit_info == nullptr) {
|
||||
return make_ready_future<>();
|
||||
}
|
||||
if (audit_info->batch()) {
|
||||
cql3::statements::batch_statement* batch = static_cast<cql3::statements::batch_statement*>(statement.get());
|
||||
return do_for_each(batch->statements().begin(), batch->statements().end(), [&query_state, &options, error] (auto&& m) {
|
||||
return inspect(m.statement, query_state, options, error);
|
||||
});
|
||||
} else {
|
||||
return inspect(*audit_info, query_state, options, error);
|
||||
}
|
||||
}
|
||||
|
||||
future<> inspect(const audit_info_alternator& ai, const service::client_state& client_state, bool error) {
|
||||
return maybe_log(static_cast<const audit_info&>(ai), client_state, ai.get_cl(), error);
|
||||
}
|
||||
|
||||
future<> audit::log_login(const sstring& username, socket_address client_ip, bool error) noexcept {
|
||||
socket_address node_ip = _token_metadata.get()->get_topology().my_address().addr();
|
||||
if (logger.is_enabled(logging::log_level::debug)) {
|
||||
@@ -297,6 +262,24 @@ future<> audit::log_login(const sstring& username, socket_address client_ip, boo
|
||||
});
|
||||
}
|
||||
|
||||
future<> inspect(shared_ptr<cql3::cql_statement> statement, service::query_state& query_state, const cql3::query_options& options, bool error) {
|
||||
auto audit_info = statement->get_audit_info();
|
||||
if (!audit_info) {
|
||||
return make_ready_future<>();
|
||||
}
|
||||
if (audit_info->batch()) {
|
||||
cql3::statements::batch_statement* batch = static_cast<cql3::statements::batch_statement*>(statement.get());
|
||||
return do_for_each(batch->statements().begin(), batch->statements().end(), [&query_state, &options, error] (auto&& m) {
|
||||
return inspect(m.statement, query_state, options, error);
|
||||
});
|
||||
} else {
|
||||
if (audit::local_audit_instance().should_log(audit_info)) {
|
||||
return audit::local_audit_instance().log(audit_info, query_state, options, error);
|
||||
}
|
||||
return make_ready_future<>();
|
||||
}
|
||||
}
|
||||
|
||||
future<> inspect_login(const sstring& username, socket_address client_ip, bool error) {
|
||||
if (!audit::audit_instance().local_is_initialized() || !audit::local_audit_instance().should_log_login()) {
|
||||
return make_ready_future<>();
|
||||
@@ -309,21 +292,13 @@ bool audit::should_log_table(const sstring& keyspace, const sstring& name) const
|
||||
return keyspace_it != _audited_tables.cend() && keyspace_it->second.find(name) != keyspace_it->second.cend();
|
||||
}
|
||||
|
||||
bool audit::should_log(const audit_info& audit_info) const {
|
||||
return will_log(audit_info.category(), audit_info.keyspace(), audit_info.table());
|
||||
}
|
||||
|
||||
bool audit::will_log(statement_category cat, std::string_view keyspace, std::string_view table) const {
|
||||
// If keyspace is empty (e.g., ListTables, or batch operations spanning
|
||||
// multiple tables), the operation cannot be filtered by keyspace/table,
|
||||
// so it is logged whenever the category matches.
|
||||
return _audited_categories.contains(cat)
|
||||
&& (keyspace.empty()
|
||||
|| _audited_keyspaces.find(sstring(keyspace)) != _audited_keyspaces.cend()
|
||||
|| should_log_table(sstring(keyspace), sstring(table))
|
||||
|| cat == statement_category::AUTH
|
||||
|| cat == statement_category::ADMIN
|
||||
|| cat == statement_category::DCL);
|
||||
bool audit::should_log(const audit_info* audit_info) const {
|
||||
return _audited_categories.contains(audit_info->category())
|
||||
&& (_audited_keyspaces.find(audit_info->keyspace()) != _audited_keyspaces.cend()
|
||||
|| should_log_table(audit_info->keyspace(), audit_info->table())
|
||||
|| audit_info->category() == statement_category::AUTH
|
||||
|| audit_info->category() == statement_category::ADMIN
|
||||
|| audit_info->category() == statement_category::DCL);
|
||||
}
|
||||
|
||||
template<class T>
|
||||
|
||||
@@ -10,15 +10,14 @@
|
||||
#include "seastarx.hh"
|
||||
#include "utils/log.hh"
|
||||
#include "utils/observable.hh"
|
||||
#include "service/client_state.hh"
|
||||
#include "db/consistency_level_type.hh"
|
||||
#include "db/consistency_level.hh"
|
||||
#include "locator/token_metadata_fwd.hh"
|
||||
#include <seastar/core/sharded.hh>
|
||||
#include <seastar/util/log.hh>
|
||||
|
||||
#include "enum_set.hh"
|
||||
|
||||
#include <memory>
|
||||
#include <optional>
|
||||
|
||||
namespace db {
|
||||
|
||||
@@ -71,15 +70,12 @@ using category_set = enum_set<super_enum<statement_category, statement_category:
|
||||
statement_category::AUTH,
|
||||
statement_category::ADMIN>>;
|
||||
|
||||
// Holds the audit metadata for a single request: the operation category,
|
||||
// target keyspace/table, and the query string to be logged.
|
||||
class audit_info {
|
||||
protected:
|
||||
class audit_info final {
|
||||
statement_category _category;
|
||||
sstring _keyspace;
|
||||
sstring _table;
|
||||
sstring _query;
|
||||
bool _batch; // used only for unpacking batches in CQL, not relevant for Alternator
|
||||
bool _batch;
|
||||
public:
|
||||
audit_info(statement_category cat, sstring keyspace, sstring table, bool batch)
|
||||
: _category(cat)
|
||||
@@ -87,17 +83,8 @@ public:
|
||||
, _table(std::move(table))
|
||||
, _batch(batch)
|
||||
{ }
|
||||
// 'operation' is for the cases where the query string does not contain it, like with Alternator
|
||||
audit_info& set_query_string(std::string_view query_string, std::string_view operation = {}) {
|
||||
return set_query_string(sstring(query_string), sstring(operation));
|
||||
}
|
||||
audit_info& set_query_string(const sstring& query_string, const sstring& operation = "") {
|
||||
if(!operation.empty()) {
|
||||
_query = operation + "|" + query_string;
|
||||
} else {
|
||||
_query = query_string;
|
||||
}
|
||||
return *this;
|
||||
void set_query_string(const std::string_view& query_string) {
|
||||
_query = sstring(query_string);
|
||||
}
|
||||
const sstring& keyspace() const { return _keyspace; }
|
||||
const sstring& table() const { return _table; }
|
||||
@@ -109,23 +96,6 @@ public:
|
||||
|
||||
using audit_info_ptr = std::unique_ptr<audit_info>;
|
||||
|
||||
// Audit info for Alternator requests.
|
||||
// Unlike CQL, where the consistency level is available from query_options and
|
||||
// passed separately to audit::log(), Alternator has no query_options, so we
|
||||
// store the CL inside the audit_info object.
|
||||
// Consistency level is optional: only data read/write operations (GetItem,
|
||||
// PutItem, Query, Scan, etc.) have a meaningful CL. Schema operations and
|
||||
// metadata queries pass std::nullopt.
|
||||
class audit_info_alternator final : public audit_info {
|
||||
std::optional<db::consistency_level> _cl;
|
||||
public:
|
||||
audit_info_alternator(statement_category cat, sstring keyspace, sstring table, std::optional<db::consistency_level> cl = std::nullopt)
|
||||
: audit_info(cat, std::move(keyspace), std::move(table), false), _cl(cl)
|
||||
{}
|
||||
|
||||
std::optional<db::consistency_level> get_cl() const { return _cl; }
|
||||
};
|
||||
|
||||
class storage_helper;
|
||||
|
||||
class audit final : public seastar::async_sharded_service<audit> {
|
||||
@@ -172,15 +142,13 @@ public:
|
||||
future<> start(const db::config& cfg);
|
||||
future<> stop();
|
||||
future<> shutdown();
|
||||
bool should_log(const audit_info& audit_info) const;
|
||||
bool will_log(statement_category cat, std::string_view keyspace = {}, std::string_view table = {}) const;
|
||||
bool should_log(const audit_info* audit_info) const;
|
||||
bool should_log_login() const { return _audited_categories.contains(statement_category::AUTH); }
|
||||
future<> log(const audit_info& audit_info, const service::client_state& client_state, std::optional<db::consistency_level> cl, bool error);
|
||||
future<> log(const audit_info* audit_info, service::query_state& query_state, const cql3::query_options& options, bool error);
|
||||
future<> log_login(const sstring& username, socket_address client_ip, bool error) noexcept;
|
||||
};
|
||||
|
||||
future<> inspect(const audit_info_alternator& audit_info, const service::client_state& client_state, bool error);
|
||||
future<> inspect(shared_ptr<cql3::cql_statement> statement, const service::query_state& query_state, const cql3::query_options& options, bool error);
|
||||
future<> inspect(shared_ptr<cql3::cql_statement> statement, service::query_state& query_state, const cql3::query_options& options, bool error);
|
||||
|
||||
future<> inspect_login(const sstring& username, socket_address client_ip, bool error);
|
||||
|
||||
|
||||
@@ -38,8 +38,7 @@ audit_cf_storage_helper::audit_cf_storage_helper(cql3::query_processor& qp, serv
|
||||
"source inet, "
|
||||
"username text, "
|
||||
"error boolean, "
|
||||
"PRIMARY KEY ((date, node), event_time))"
|
||||
" WITH caching = {{'keys': 'NONE', 'rows_per_partition': 'NONE', 'enabled': 'false'}}",
|
||||
"PRIMARY KEY ((date, node), event_time))",
|
||||
KEYSPACE_NAME, TABLE_NAME),
|
||||
fmt::format("INSERT INTO {}.{} ("
|
||||
"date,"
|
||||
@@ -130,7 +129,7 @@ future<> audit_cf_storage_helper::stop() {
|
||||
future<> audit_cf_storage_helper::write(const audit_info* audit_info,
|
||||
socket_address node_ip,
|
||||
socket_address client_ip,
|
||||
std::optional<db::consistency_level> cl,
|
||||
db::consistency_level cl,
|
||||
const sstring& username,
|
||||
bool error) {
|
||||
return _table.insert(_qp, _mm, _dummy_query_state, make_data, audit_info, node_ip, client_ip, cl, username, error);
|
||||
@@ -146,7 +145,7 @@ future<> audit_cf_storage_helper::write_login(const sstring& username,
|
||||
cql3::query_options audit_cf_storage_helper::make_data(const audit_info* audit_info,
|
||||
socket_address node_ip,
|
||||
socket_address client_ip,
|
||||
std::optional<db::consistency_level> cl,
|
||||
db::consistency_level cl,
|
||||
const sstring& username,
|
||||
bool error) {
|
||||
auto time = std::chrono::system_clock::now();
|
||||
@@ -155,7 +154,7 @@ cql3::query_options audit_cf_storage_helper::make_data(const audit_info* audit_i
|
||||
auto date = millis_since_epoch / ticks_per_day * ticks_per_day;
|
||||
thread_local static int64_t last_nanos = 0;
|
||||
auto time_id = utils::UUID_gen::get_time_UUID(table_helper::make_monotonic_UUID_tp(last_nanos, time));
|
||||
auto consistency_level = cl ? format("{}", *cl) : sstring("");
|
||||
auto consistency_level = fmt::format("{}", cl);
|
||||
std::vector<cql3::raw_value> values {
|
||||
cql3::raw_value::make_value(timestamp_type->decompose(date)),
|
||||
cql3::raw_value::make_value(inet_addr_type->decompose(node_ip.addr())),
|
||||
|
||||
@@ -37,7 +37,7 @@ class audit_cf_storage_helper : public storage_helper {
|
||||
static cql3::query_options make_data(const audit_info* audit_info,
|
||||
socket_address node_ip,
|
||||
socket_address client_ip,
|
||||
std::optional<db::consistency_level> cl,
|
||||
db::consistency_level cl,
|
||||
const sstring& username,
|
||||
bool error);
|
||||
static cql3::query_options make_login_data(socket_address node_ip,
|
||||
@@ -55,7 +55,7 @@ public:
|
||||
virtual future<> write(const audit_info* audit_info,
|
||||
socket_address node_ip,
|
||||
socket_address client_ip,
|
||||
std::optional<db::consistency_level> cl,
|
||||
db::consistency_level cl,
|
||||
const sstring& username,
|
||||
bool error) override;
|
||||
virtual future<> write_login(const sstring& username,
|
||||
|
||||
@@ -42,7 +42,7 @@ future<> audit_composite_storage_helper::stop() {
|
||||
future<> audit_composite_storage_helper::write(const audit_info* audit_info,
|
||||
socket_address node_ip,
|
||||
socket_address client_ip,
|
||||
std::optional<db::consistency_level> cl,
|
||||
db::consistency_level cl,
|
||||
const sstring& username,
|
||||
bool error) {
|
||||
return seastar::parallel_for_each(
|
||||
|
||||
@@ -25,7 +25,7 @@ public:
|
||||
virtual future<> write(const audit_info* audit_info,
|
||||
socket_address node_ip,
|
||||
socket_address client_ip,
|
||||
std::optional<db::consistency_level> cl,
|
||||
db::consistency_level cl,
|
||||
const sstring& username,
|
||||
bool error) override;
|
||||
virtual future<> write_login(const sstring& username,
|
||||
|
||||
@@ -101,19 +101,18 @@ future<> audit_syslog_storage_helper::stop() {
|
||||
future<> audit_syslog_storage_helper::write(const audit_info* audit_info,
|
||||
socket_address node_ip,
|
||||
socket_address client_ip,
|
||||
std::optional<db::consistency_level> cl,
|
||||
db::consistency_level cl,
|
||||
const sstring& username,
|
||||
bool error) {
|
||||
auto now = std::chrono::system_clock::to_time_t(std::chrono::system_clock::now());
|
||||
tm time;
|
||||
localtime_r(&now, &time);
|
||||
auto cl_str = cl ? format("{}", *cl) : sstring("");
|
||||
sstring msg = seastar::format(R"(<{}>{:%h %e %T} scylla-audit: node="{}", category="{}", cl="{}", error="{}", keyspace="{}", query="{}", client_ip="{}", table="{}", username="{}")",
|
||||
LOG_NOTICE | LOG_USER,
|
||||
time,
|
||||
node_ip,
|
||||
audit_info->category_string(),
|
||||
cl_str,
|
||||
cl,
|
||||
(error ? "true" : "false"),
|
||||
audit_info->keyspace(),
|
||||
json_escape(audit_info->query()),
|
||||
|
||||
@@ -35,7 +35,7 @@ public:
|
||||
virtual future<> write(const audit_info* audit_info,
|
||||
socket_address node_ip,
|
||||
socket_address client_ip,
|
||||
std::optional<db::consistency_level> cl,
|
||||
db::consistency_level cl,
|
||||
const sstring& username,
|
||||
bool error) override;
|
||||
virtual future<> write_login(const sstring& username,
|
||||
|
||||
@@ -22,7 +22,7 @@ public:
|
||||
virtual future<> write(const audit_info* audit_info,
|
||||
socket_address node_ip,
|
||||
socket_address client_ip,
|
||||
std::optional<db::consistency_level> cl,
|
||||
db::consistency_level cl,
|
||||
const sstring& username,
|
||||
bool error) = 0;
|
||||
virtual future<> write_login(const sstring& username,
|
||||
|
||||
@@ -31,8 +31,6 @@ namespace {
|
||||
|
||||
logger mylog{"ldap_role_manager"}; // `log` is taken by math.
|
||||
|
||||
constexpr std::string_view user_placeholder = "{USER}";
|
||||
|
||||
struct url_desc_deleter {
|
||||
void operator()(LDAPURLDesc *p) {
|
||||
ldap_free_urldesc(p);
|
||||
@@ -41,141 +39,9 @@ struct url_desc_deleter {
|
||||
|
||||
using url_desc_ptr = std::unique_ptr<LDAPURLDesc, url_desc_deleter>;
|
||||
|
||||
/// Escapes LDAP filter assertion value per RFC 4515 Section 3.
|
||||
/// The characters *, (, ), \, and NUL must be backslash-hex-escaped
|
||||
/// to prevent filter injection when interpolating untrusted input.
|
||||
sstring escape_filter_value(std::string_view value) {
|
||||
size_t escapable_chars = 0;
|
||||
for (unsigned char ch : value) {
|
||||
switch (ch) {
|
||||
case '*':
|
||||
case '(':
|
||||
case ')':
|
||||
case '\\':
|
||||
case '\0':
|
||||
++escapable_chars;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (escapable_chars == 0) {
|
||||
return sstring(value);
|
||||
}
|
||||
|
||||
sstring escaped(value.size() + escapable_chars * 2, 0);
|
||||
size_t pos = 0;
|
||||
for (unsigned char ch : value) {
|
||||
switch (ch) {
|
||||
case '*':
|
||||
escaped[pos++] = '\\';
|
||||
escaped[pos++] = '2';
|
||||
escaped[pos++] = 'a';
|
||||
break;
|
||||
case '(':
|
||||
escaped[pos++] = '\\';
|
||||
escaped[pos++] = '2';
|
||||
escaped[pos++] = '8';
|
||||
break;
|
||||
case ')':
|
||||
escaped[pos++] = '\\';
|
||||
escaped[pos++] = '2';
|
||||
escaped[pos++] = '9';
|
||||
break;
|
||||
case '\\':
|
||||
escaped[pos++] = '\\';
|
||||
escaped[pos++] = '5';
|
||||
escaped[pos++] = 'c';
|
||||
break;
|
||||
case '\0':
|
||||
escaped[pos++] = '\\';
|
||||
escaped[pos++] = '0';
|
||||
escaped[pos++] = '0';
|
||||
break;
|
||||
default:
|
||||
escaped[pos++] = static_cast<char>(ch);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return escaped;
|
||||
}
|
||||
|
||||
/// Percent-encodes characters that are not RFC 3986 "unreserved"
|
||||
/// (ALPHA / DIGIT / '-' / '.' / '_' / '~').
|
||||
///
|
||||
/// Uses explicit ASCII range checks instead of std::isalnum() because
|
||||
/// the latter is locale-dependent and could pass non-ASCII characters
|
||||
/// through unencoded under certain locale settings.
|
||||
///
|
||||
/// This is applied AFTER RFC 4515 filter escaping when the value is
|
||||
/// substituted into an LDAP URL. It serves two purposes:
|
||||
/// 1. Prevents URL-level metacharacters ('?', '#') from breaking
|
||||
/// the URL structure parsed by ldap_url_parse.
|
||||
/// 2. Prevents percent-decoding (which ldap_url_parse performs on
|
||||
/// each component) from undoing the filter escaping, e.g. a
|
||||
/// literal "%2a" in the username would otherwise decode to '*'.
|
||||
sstring percent_encode_for_url(std::string_view value) {
|
||||
static constexpr char hex[] = "0123456789ABCDEF";
|
||||
|
||||
size_t chars_to_encode = 0;
|
||||
for (unsigned char ch : value) {
|
||||
if (!((ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z') || (ch >= '0' && ch <= '9')
|
||||
|| ch == '-' || ch == '.' || ch == '_' || ch == '~')) {
|
||||
++chars_to_encode;
|
||||
}
|
||||
}
|
||||
|
||||
if (chars_to_encode == 0) {
|
||||
return sstring(value);
|
||||
}
|
||||
|
||||
sstring encoded(value.size() + chars_to_encode * 2, 0);
|
||||
size_t pos = 0;
|
||||
for (unsigned char ch : value) {
|
||||
if ((ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z') || (ch >= '0' && ch <= '9')
|
||||
|| ch == '-' || ch == '.' || ch == '_' || ch == '~') {
|
||||
encoded[pos++] = static_cast<char>(ch);
|
||||
} else {
|
||||
encoded[pos++] = '%';
|
||||
encoded[pos++] = hex[ch >> 4];
|
||||
encoded[pos++] = hex[ch & 0x0F];
|
||||
}
|
||||
}
|
||||
|
||||
return encoded;
|
||||
}
|
||||
|
||||
/// Checks whether \p sentinel appears in any parsed URL component
|
||||
/// other than the filter (host, DN, attributes, extensions).
|
||||
bool sentinel_outside_filter(const LDAPURLDesc& desc, std::string_view sentinel) {
|
||||
auto contains = [&](const char* field) {
|
||||
return field && std::string_view(field).find(sentinel) != std::string_view::npos;
|
||||
};
|
||||
if (contains(desc.lud_host) || contains(desc.lud_dn)) {
|
||||
return true;
|
||||
}
|
||||
if (desc.lud_attrs) {
|
||||
for (int i = 0; desc.lud_attrs[i]; ++i) {
|
||||
if (contains(desc.lud_attrs[i])) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (desc.lud_exts) {
|
||||
for (int i = 0; desc.lud_exts[i]; ++i) {
|
||||
if (contains(desc.lud_exts[i])) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
url_desc_ptr parse_url(const sstring& url) {
|
||||
url_desc_ptr parse_url(std::string_view url) {
|
||||
LDAPURLDesc *desc = nullptr;
|
||||
if (ldap_url_parse(url.c_str(), &desc)) {
|
||||
if (ldap_url_parse(url.data(), &desc)) {
|
||||
mylog.error("error in ldap_url_parse({})", url);
|
||||
}
|
||||
return url_desc_ptr(desc);
|
||||
@@ -246,7 +112,6 @@ const resource_set& ldap_role_manager::protected_resources() const {
|
||||
}
|
||||
|
||||
future<> ldap_role_manager::start() {
|
||||
validate_query_template();
|
||||
if (!parse_url(get_url("dummy-user"))) { // Just need host and port -- any user should do.
|
||||
return make_exception_future(
|
||||
std::runtime_error(fmt::format("error getting LDAP server address from template {}", _query_template)));
|
||||
@@ -351,7 +216,7 @@ future<> ldap_role_manager::revoke(std::string_view, std::string_view, ::service
|
||||
}
|
||||
|
||||
future<role_set> ldap_role_manager::query_granted(std::string_view grantee_name, recursive_role_query) {
|
||||
const auto url = get_url(grantee_name);
|
||||
const auto url = get_url(grantee_name.data());
|
||||
auto desc = parse_url(url);
|
||||
if (!desc) {
|
||||
return make_exception_future<role_set>(std::runtime_error(format("Error parsing URL {}", url)));
|
||||
@@ -483,46 +348,7 @@ future<> ldap_role_manager::remove_attribute(std::string_view role_name, std::st
|
||||
}
|
||||
|
||||
sstring ldap_role_manager::get_url(std::string_view user) const {
|
||||
// Two-layer encoding protects against injection:
|
||||
// 1. RFC 4515 filter escaping neutralizes filter metacharacters (*, (, ), \, NUL)
|
||||
// 2. URL percent-encoding prevents URL structure injection (?, #) and blocks
|
||||
// ldap_url_parse's percent-decoding from undoing the filter escaping (%2a -> *)
|
||||
return boost::replace_all_copy(_query_template, user_placeholder,
|
||||
percent_encode_for_url(escape_filter_value(user)));
|
||||
}
|
||||
|
||||
void ldap_role_manager::validate_query_template() const {
|
||||
if (_query_template.find(user_placeholder) == sstring::npos) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Substitute {USER} with a sentinel and let ldap_url_parse tell us
|
||||
// which URL component it landed in. The sentinel is purely
|
||||
// alphanumeric so it cannot affect URL parsing.
|
||||
static constexpr std::string_view sentinel = "XLDAPSENTINELX";
|
||||
sstring test_url = boost::replace_all_copy(_query_template, user_placeholder, sentinel);
|
||||
auto desc = parse_url(test_url);
|
||||
if (!desc) {
|
||||
throw url_error(format("LDAP URL template is not a valid URL when {{USER}} is substituted: {}", _query_template));
|
||||
}
|
||||
|
||||
// The sentinel must appear in the filter ...
|
||||
if (!desc->lud_filter
|
||||
|| std::string_view(desc->lud_filter).find(sentinel) == std::string_view::npos) {
|
||||
throw url_error(format(
|
||||
"LDAP URL template places {{USER}} outside the filter component. "
|
||||
"RFC 4515 filter escaping only protects the filter; other components "
|
||||
"(e.g. the base DN) require different escaping and are not supported. "
|
||||
"Template: {}", _query_template));
|
||||
}
|
||||
// ... and nowhere else (host, DN, attributes, extensions).
|
||||
if (sentinel_outside_filter(*desc, sentinel)) {
|
||||
throw url_error(format(
|
||||
"LDAP URL template places {{USER}} outside the filter component. "
|
||||
"RFC 4515 filter escaping only protects the filter; other components "
|
||||
"(e.g. the host) require different escaping and are not supported. "
|
||||
"Template: {}", _query_template));
|
||||
}
|
||||
return boost::replace_all_copy(_query_template, "{USER}", user);
|
||||
}
|
||||
|
||||
future<std::vector<cql3::description>> ldap_role_manager::describe_role_grants() {
|
||||
|
||||
@@ -115,9 +115,6 @@ class ldap_role_manager : public role_manager {
|
||||
/// Macro-expands _query_template, returning the result.
|
||||
sstring get_url(std::string_view user) const;
|
||||
|
||||
/// Validates that {USER}, if present, is used only in the LDAP filter component.
|
||||
void validate_query_template() const;
|
||||
|
||||
/// Used to auto-create roles returned by ldap.
|
||||
future<> create_role(std::string_view role_name);
|
||||
|
||||
|
||||
@@ -35,15 +35,6 @@ enum class image_mode : uint8_t {
|
||||
|
||||
class options final {
|
||||
std::optional<bool> _enabled;
|
||||
bool _enable_requested = false;
|
||||
// When CDC is employed for the purpose of Alternator Streams and tablets are used,
|
||||
// tablet merges need to be blocked due to limitations of DynamoDB Streams API.
|
||||
// DynamoDB Streams allows to specify a single parent for a stream.
|
||||
// In ScyllaDB, there is a one-to-one association between streams and tablets,
|
||||
// so merging tablets means also merging streams. A merged stream has two parents and both
|
||||
// need to be done reading from before reading from the newly merged tablet. This is impossible
|
||||
// to be conveyed with DynamoDB Streams API and the result can be reordering of events in Streams.
|
||||
bool _tablet_merge_blocked = false;
|
||||
image_mode _preimage = image_mode::off;
|
||||
bool _postimage = false;
|
||||
delta_mode _delta_mode = delta_mode::full;
|
||||
@@ -57,8 +48,6 @@ public:
|
||||
|
||||
bool enabled() const { return _enabled.value_or(false); }
|
||||
bool is_enabled_set() const { return _enabled.has_value(); }
|
||||
bool enable_requested() const { return _enable_requested; }
|
||||
bool tablet_merge_blocked() const { return _tablet_merge_blocked; }
|
||||
bool preimage() const { return _preimage != image_mode::off; }
|
||||
bool full_preimage() const { return _preimage == image_mode::full; }
|
||||
bool postimage() const { return _postimage; }
|
||||
@@ -67,17 +56,6 @@ public:
|
||||
int ttl() const { return _ttl; }
|
||||
|
||||
void enabled(bool b) { _enabled = b; }
|
||||
// For the cases when enabling cannot be immediately enforced, like with Alternator Streams
|
||||
// which is incompatible with tablet merges, we need to be able to defer actual enablement
|
||||
// until any in-progress tablet merges complete. We expect that finalization happens
|
||||
// promptly: on_update_column_family callback in topology_coordinator.cc wakes up
|
||||
// the topology coordinator to run maybe_finalize_pending_stream_enables shortly
|
||||
// after the DDL. However, there is SCYLLADB-1304
|
||||
void enable_requested(bool b = true) { _enable_requested = b; }
|
||||
// Persistent flag checked by the tablet allocator to suppress new merge
|
||||
// decisions. Always set when Alternator Streams are enabled; inert on
|
||||
// vnode tables.
|
||||
void tablet_merge_blocked(bool b = true) { _tablet_merge_blocked = b; }
|
||||
void preimage(bool b) { preimage(b ? image_mode::on : image_mode::off); }
|
||||
void preimage(image_mode m) { _preimage = m; }
|
||||
void postimage(bool b) { _postimage = b; }
|
||||
|
||||
@@ -16,11 +16,8 @@
|
||||
#include "keys/keys.hh"
|
||||
#include "replica/database.hh"
|
||||
#include "db/system_keyspace.hh"
|
||||
#include "db/schema_tables.hh"
|
||||
#include "dht/token-sharding.hh"
|
||||
#include "locator/token_metadata.hh"
|
||||
#include "locator/tablets.hh"
|
||||
#include "schema/schema_builder.hh"
|
||||
#include "types/set.hh"
|
||||
#include "utils/assert.hh"
|
||||
#include "utils/error_injection.hh"
|
||||
@@ -32,7 +29,6 @@
|
||||
#include "cdc/cdc_options.hh"
|
||||
#include "cdc/generation_service.hh"
|
||||
#include "cdc/log.hh"
|
||||
#include "service/migration_listener.hh"
|
||||
|
||||
extern logging::logger cdc_log;
|
||||
|
||||
@@ -780,59 +776,4 @@ future<> generation_service::garbage_collect_cdc_streams(utils::chunked_vector<c
|
||||
}
|
||||
}
|
||||
|
||||
future<utils::chunked_vector<canonical_mutation>> generation_service::maybe_finalize_pending_stream_enables(const locator::token_metadata& tm, api::timestamp_type ts) {
|
||||
utils::chunked_vector<canonical_mutation> muts;
|
||||
|
||||
if (utils::get_local_injector().enter("delay_cdc_stream_finalization")) {
|
||||
co_return std::move(muts);
|
||||
}
|
||||
|
||||
co_await _db.get_tables_metadata().for_each_table_gently([&] (table_id id, lw_shared_ptr<replica::table> t) -> future<> {
|
||||
auto s = t->schema();
|
||||
if (!s->cdc_options().enable_requested()) {
|
||||
co_return;
|
||||
}
|
||||
|
||||
// Only tablet tables can have enable_requested set
|
||||
if (!tm.tablets().has_tablet_map(id)) {
|
||||
co_return;
|
||||
}
|
||||
|
||||
auto& tmap = tm.tablets().get_tablet_map(id);
|
||||
if (tmap.needs_merge()) {
|
||||
cdc_log.debug("Table {}.{}: deferring stream enablement, tablet merge still in progress", s->ks_name(), s->cf_name());
|
||||
co_return;
|
||||
}
|
||||
|
||||
cdc_log.info("Table {}.{}: finalizing deferred stream enablement (no in-progress merges)", s->ks_name(), s->cf_name());
|
||||
|
||||
// Build a new schema with enabled=true, enable_requested=false
|
||||
schema_builder builder(s);
|
||||
cdc::options new_opts = s->cdc_options();
|
||||
new_opts.enabled(true);
|
||||
new_opts.enable_requested(false);
|
||||
new_opts.tablet_merge_blocked(true);
|
||||
builder.with_cdc_options(new_opts);
|
||||
auto new_schema = builder.build();
|
||||
|
||||
// Generate the schema mutation (table metadata update only, no columns/indices changed)
|
||||
utils::chunked_vector<mutation> schema_muts;
|
||||
db::schema_tables::add_table_or_view_to_schema_mutation(new_schema, ts, false, schema_muts);
|
||||
|
||||
// Trigger the CDC migration listener hook which creates the CDC log table.
|
||||
// This runs on_before_update_column_family listeners (including CDC's own
|
||||
// listener that creates/updates the log table schema).
|
||||
co_await seastar::async([&] {
|
||||
_db.get_notifier().before_update_column_family(*new_schema, *s, schema_muts, ts);
|
||||
});
|
||||
|
||||
for (auto& m : schema_muts) {
|
||||
muts.emplace_back(canonical_mutation(m));
|
||||
co_await coroutine::maybe_yield();
|
||||
}
|
||||
});
|
||||
|
||||
co_return std::move(muts);
|
||||
}
|
||||
|
||||
} // namespace cdc
|
||||
|
||||
@@ -18,7 +18,6 @@ class system_keyspace;
|
||||
|
||||
namespace locator {
|
||||
class tablet_map;
|
||||
class token_metadata;
|
||||
}
|
||||
|
||||
namespace cdc {
|
||||
@@ -65,12 +64,6 @@ public:
|
||||
|
||||
future<> generate_tablet_resize_update(utils::chunked_vector<canonical_mutation>& muts, table_id table, const locator::tablet_map& new_tablet_map, api::timestamp_type ts);
|
||||
|
||||
// Check for tables with enable_requested CDC option and finalize their
|
||||
// stream enablement if no in-progress tablet merges remain.
|
||||
// Returns schema mutations that transition enable_requested -> enabled,
|
||||
// including CDC log table creation side effects.
|
||||
future<utils::chunked_vector<canonical_mutation>> maybe_finalize_pending_stream_enables(const locator::token_metadata& tm, api::timestamp_type ts);
|
||||
|
||||
future<utils::chunked_vector<mutation>> garbage_collect_cdc_streams_for_table(table_id table, std::optional<std::chrono::seconds> ttl, api::timestamp_type ts);
|
||||
future<> garbage_collect_cdc_streams(utils::chunked_vector<canonical_mutation>& muts, api::timestamp_type ts);
|
||||
|
||||
|
||||
205
cdc/log.cc
205
cdc/log.cc
@@ -8,7 +8,7 @@
|
||||
|
||||
#include <utility>
|
||||
#include <algorithm>
|
||||
#include <unordered_set>
|
||||
|
||||
#include <boost/range/irange.hpp>
|
||||
#include <seastar/core/thread.hh>
|
||||
#include <seastar/core/metrics.hh>
|
||||
@@ -47,7 +47,6 @@
|
||||
#include "tracing/trace_state.hh"
|
||||
#include "stats.hh"
|
||||
#include "utils/labels.hh"
|
||||
#include "alternator/executor.hh"
|
||||
|
||||
namespace std {
|
||||
|
||||
@@ -196,7 +195,7 @@ public:
|
||||
for (auto sp : cfms) {
|
||||
const auto& schema = *sp;
|
||||
|
||||
if (!cdc_enabled(schema)) {
|
||||
if (!schema.cdc_options().enabled()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
@@ -465,18 +464,6 @@ cdc::options::options(const std::map<sstring, sstring>& map) {
|
||||
if (_ttl < 0) {
|
||||
throw exceptions::configuration_exception("Invalid CDC option: ttl must be >= 0");
|
||||
}
|
||||
} else if (key == "enable_requested") {
|
||||
if (is_true || is_false) {
|
||||
_enable_requested = is_true;
|
||||
} else {
|
||||
throw exceptions::configuration_exception("Invalid value for CDC option \"enable_requested\": " + p.second);
|
||||
}
|
||||
} else if (key == "tablet_merge_blocked") {
|
||||
if (is_true || is_false) {
|
||||
_tablet_merge_blocked = is_true;
|
||||
} else {
|
||||
throw exceptions::configuration_exception("Invalid value for CDC option \"tablet_merge_blocked\": " + p.second);
|
||||
}
|
||||
} else {
|
||||
throw exceptions::configuration_exception("Invalid CDC option: " + p.first);
|
||||
}
|
||||
@@ -484,7 +471,7 @@ cdc::options::options(const std::map<sstring, sstring>& map) {
|
||||
}
|
||||
|
||||
std::map<sstring, sstring> cdc::options::to_map() const {
|
||||
if (!is_enabled_set() && !_enable_requested) {
|
||||
if (!is_enabled_set()) {
|
||||
return {};
|
||||
}
|
||||
|
||||
@@ -494,8 +481,6 @@ std::map<sstring, sstring> cdc::options::to_map() const {
|
||||
{ "postimage", _postimage ? "true" : "false" },
|
||||
{ "delta", fmt::format("{}", _delta_mode) },
|
||||
{ "ttl", std::to_string(_ttl) },
|
||||
{ "enable_requested", enable_requested() ? "true" : "false" },
|
||||
{ "tablet_merge_blocked", _tablet_merge_blocked ? "true" : "false" },
|
||||
};
|
||||
}
|
||||
|
||||
@@ -504,9 +489,7 @@ sstring cdc::options::to_sstring() const {
|
||||
}
|
||||
|
||||
bool cdc::options::operator==(const options& o) const {
|
||||
return enabled() == o.enabled() && enable_requested() == o.enable_requested()
|
||||
&& _tablet_merge_blocked == o._tablet_merge_blocked
|
||||
&& _preimage == o._preimage && _postimage == o._postimage && _ttl == o._ttl
|
||||
return enabled() == o.enabled() && _preimage == o._preimage && _postimage == o._postimage && _ttl == o._ttl
|
||||
&& _delta_mode == o._delta_mode;
|
||||
}
|
||||
|
||||
@@ -1085,14 +1068,6 @@ public:
|
||||
return create_ck(_batch_no - 1);
|
||||
}
|
||||
|
||||
api::timestamp_type get_timestamp() const {
|
||||
return _ts;
|
||||
}
|
||||
|
||||
ttl_opt get_ttl() const {
|
||||
return _ttl;
|
||||
}
|
||||
|
||||
// A common pattern is to allocate a row and then immediately set its `cdc$operation` column.
|
||||
clustering_key allocate_new_log_row(operation op) {
|
||||
auto log_ck = allocate_new_log_row();
|
||||
@@ -1234,25 +1209,15 @@ struct process_row_visitor {
|
||||
row_states_map& _clustering_row_states;
|
||||
|
||||
const bool _generate_delta_values = true;
|
||||
|
||||
// true if we are processing changes that were produced by Alternator
|
||||
const bool _alternator;
|
||||
|
||||
// will be set to true, if any kind of change in row will be detected. Used only, when processing Alternator's changes.
|
||||
bool _alternator_any_value_changed = false;
|
||||
|
||||
// will be set to true, if Alternator's collection column (:attrs) will be modified only by removing elements
|
||||
// Used only, when processing Alternator's changes.
|
||||
bool _alternator_only_deletes = false;
|
||||
|
||||
process_row_visitor(
|
||||
const clustering_key& log_ck, stats::part_type_set& touched_parts, log_mutation_builder& builder,
|
||||
bool enable_updating_state, const clustering_key* base_ck, cell_map* row_state,
|
||||
row_states_map& clustering_row_states, bool generate_delta_values, bool alternator = false)
|
||||
row_states_map& clustering_row_states, bool generate_delta_values)
|
||||
: _log_ck(log_ck), _touched_parts(touched_parts), _builder(builder),
|
||||
_enable_updating_state(enable_updating_state), _base_ck(base_ck), _row_state(row_state),
|
||||
_clustering_row_states(clustering_row_states),
|
||||
_generate_delta_values(generate_delta_values), _alternator(alternator)
|
||||
_generate_delta_values(generate_delta_values)
|
||||
{}
|
||||
|
||||
void update_row_state(const column_definition& cdef, managed_bytes_opt value) {
|
||||
@@ -1262,17 +1227,7 @@ struct process_row_visitor {
|
||||
auto [it, _] = _clustering_row_states.try_emplace(*_base_ck);
|
||||
_row_state = &it->second;
|
||||
}
|
||||
auto [ it, inserted ] = _row_state->insert({ &cdef, std::nullopt });
|
||||
|
||||
// we ignore `_alternator_any_value_changed` for non-alternator changes.
|
||||
// we don't filter if `_enable_updating_state` is false, as on top of needing pre image
|
||||
// we also need cdc to build post image for us
|
||||
// we add check for `_alternator` here for performance reasons - no point in byte compare objects
|
||||
// if the return value will be ignored
|
||||
if (_alternator && _enable_updating_state) {
|
||||
_alternator_any_value_changed = _alternator_any_value_changed || it->second != value;
|
||||
}
|
||||
it->second = std::move(value);
|
||||
(*_row_state)[&cdef] = std::move(value);
|
||||
}
|
||||
|
||||
void live_atomic_cell(const column_definition& cdef, const atomic_cell_view& cell) {
|
||||
@@ -1422,8 +1377,6 @@ struct process_row_visitor {
|
||||
auto&& deleted_keys = std::get<1>(result);
|
||||
auto&& added_cells = std::get<2>(result);
|
||||
|
||||
_alternator_only_deletes = cdef.name_as_text() == alternator::executor::ATTRS_COLUMN_NAME && !deleted_keys.empty() && !added_cells.has_value();
|
||||
|
||||
// FIXME: we're doing redundant work: first we serialize the set of deleted keys into a blob,
|
||||
// then we deserialize again when merging images below
|
||||
managed_bytes_opt deleted_elements = std::nullopt;
|
||||
@@ -1481,31 +1434,12 @@ struct process_change_visitor {
|
||||
const bool _enable_updating_state = false;
|
||||
|
||||
row_states_map& _clustering_row_states;
|
||||
|
||||
// clustering keys' as bytes of rows that should be ignored, when writing cdc log changes
|
||||
// filtering will be done in `clean_up_noop_rows` function. Used only, when processing Alternator's changes.
|
||||
// Since Alternator clustering key is always at most single column, we store unpacked clustering key.
|
||||
// If Alternator table is without clustering key, that means partition has at most one row, any value present
|
||||
// in _alternator_clustering_keys_to_ignore will make us ignore that single row -
|
||||
// we will use an empty bytes object.
|
||||
std::unordered_set<bytes>& _alternator_clustering_keys_to_ignore;
|
||||
|
||||
cell_map& _static_row_state;
|
||||
|
||||
const bool _alternator_schema_has_no_clustering_key = false;
|
||||
|
||||
const bool _is_update = false;
|
||||
|
||||
const bool _generate_delta_values = true;
|
||||
|
||||
// only called, when processing Alternator's change
|
||||
void alternator_add_ckey_to_rows_to_ignore(const clustering_key& ckey) {
|
||||
throwing_assert(_request_options.alternator);
|
||||
auto res = ckey.explode();
|
||||
auto ckey_exploded = !res.empty() ? res[0] : bytes{};
|
||||
_alternator_clustering_keys_to_ignore.insert(ckey_exploded);
|
||||
}
|
||||
|
||||
void static_row_cells(auto&& visit_row_cells) {
|
||||
_touched_parts.set<stats::part_type::STATIC_ROW>();
|
||||
|
||||
@@ -1537,29 +1471,16 @@ struct process_change_visitor {
|
||||
}
|
||||
};
|
||||
|
||||
auto row_state = get_row_state(_clustering_row_states, ckey);
|
||||
clustering_row_cells_visitor v(
|
||||
log_ck, _touched_parts, _builder,
|
||||
_enable_updating_state, &ckey, row_state,
|
||||
_clustering_row_states, _generate_delta_values, _request_options.alternator);
|
||||
_enable_updating_state, &ckey, get_row_state(_clustering_row_states, ckey),
|
||||
_clustering_row_states, _generate_delta_values);
|
||||
if (_is_update && _request_options.alternator) {
|
||||
v._marker_op = row_state ? operation::update : operation::insert;
|
||||
v._marker_op = operation::update;
|
||||
}
|
||||
visit_row_cells(v);
|
||||
|
||||
if (_enable_updating_state) {
|
||||
if (_request_options.alternator && !v._alternator_any_value_changed) {
|
||||
// we need additional checks here:
|
||||
// - without `row_state != nullptr` inserting new key without additional fields (so only partition / clustering key) would be
|
||||
// treated as no-change, because without additional fields given by the user `v` visitor won't visit any cells
|
||||
// and _alternator_any_value_changed will be false (thus item will be skipped),
|
||||
// - without `row_state == nullptr && v._alternator_only_deletes` check we won't properly ignore
|
||||
// column deletes for existing items, but without the column we want to delete -
|
||||
// item exists (so row_state != nullptr), but we delete non-existing column, so no-op
|
||||
if (row_state != nullptr || (row_state == nullptr && v._alternator_only_deletes)) {
|
||||
alternator_add_ckey_to_rows_to_ignore(ckey);
|
||||
}
|
||||
}
|
||||
// #7716: if there are no regular columns, our visitor would not have visited any cells,
|
||||
// hence it would not have created a row_state for this row. In effect, postimage wouldn't be produced.
|
||||
// Ensure that the row state exists.
|
||||
@@ -1576,12 +1497,8 @@ struct process_change_visitor {
|
||||
auto log_ck = _builder.allocate_new_log_row(_row_delete_op);
|
||||
_builder.set_clustering_columns(log_ck, ckey);
|
||||
|
||||
if (_enable_updating_state) {
|
||||
if (get_row_state(_clustering_row_states, ckey)) {
|
||||
_clustering_row_states.erase(ckey);
|
||||
} else if (_request_options.alternator) {
|
||||
alternator_add_ckey_to_rows_to_ignore(ckey);
|
||||
}
|
||||
if (_enable_updating_state && get_row_state(_clustering_row_states, ckey)) {
|
||||
_clustering_row_states.erase(ckey);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1623,22 +1540,6 @@ struct process_change_visitor {
|
||||
_touched_parts.set<stats::part_type::PARTITION_DELETE>();
|
||||
auto log_ck = _builder.allocate_new_log_row(_partition_delete_op);
|
||||
if (_enable_updating_state) {
|
||||
if (_request_options.alternator && _alternator_schema_has_no_clustering_key && _clustering_row_states.empty()) {
|
||||
// Alternator's table can be with or without clustering key. If the clustering key exists,
|
||||
// delete request will be `clustered_row_delete` and will be hanlded there.
|
||||
// If the clustering key doesn't exist, delete request will be `partition_delete` and will be handled here.
|
||||
// The no-clustering-key case is slightly tricky, because insert of such item is handled by `clustered_row_cells`
|
||||
// and has some value as clustering_key (the value currently seems to be empty bytes object).
|
||||
// We don't want to rely on knowing the value exactly, instead we rely on the fact that
|
||||
// there will be at most one item in a partition. So if `_clustering_row_states` is empty,
|
||||
// we know the delete is for a non-existing item and we should ignore it.
|
||||
// If `_clustering_row_states` is not empty, then we know the delete is for an existing item
|
||||
// we should log it and clear `_clustering_row_states`.
|
||||
// The same logic applies to `alternator_add_ckey_to_rows_to_ignore` call in `clustered_row_delete`
|
||||
// we need to insert "anything" for no-clustering-key case, so further logic will check
|
||||
// if map is empty or not and will know if it should ignore the single partition item and keep it.
|
||||
alternator_add_ckey_to_rows_to_ignore({});
|
||||
}
|
||||
_clustering_row_states.clear();
|
||||
}
|
||||
}
|
||||
@@ -1746,47 +1647,6 @@ private:
|
||||
|
||||
stats::part_type_set _touched_parts;
|
||||
|
||||
std::unordered_set<bytes> _alternator_clustering_keys_to_ignore;
|
||||
const column_definition* _alternator_clustering_key_column = nullptr;
|
||||
|
||||
// the function will process mutations and remove rows that are in _alternator_clustering_keys_to_ignore
|
||||
// we need to take care and reindex clustering keys (cdc$batch_seq_no)
|
||||
// this is used for Alternator's changes only
|
||||
// NOTE: `_alternator_clustering_keys_to_ignore` must be not empty.
|
||||
mutation clean_up_noop_rows(mutation mut) {
|
||||
throwing_assert(!_alternator_clustering_keys_to_ignore.empty());
|
||||
auto after_mut = mutation(_log_schema, mut.key());
|
||||
if (!_alternator_clustering_key_column) {
|
||||
// no clustering key - only single row per partition
|
||||
// since _alternator_clustering_keys_to_ignore is not empty we need to drop that single row
|
||||
// so we just return empty mutation instead
|
||||
return after_mut;
|
||||
}
|
||||
int batch_seq = 0;
|
||||
for (rows_entry &row : mut.partition().mutable_non_dummy_rows()) {
|
||||
auto cell = row.row().cells().find_cell(_alternator_clustering_key_column->id);
|
||||
if (cell) {
|
||||
auto val = cell->as_atomic_cell(*_alternator_clustering_key_column).value().linearize();
|
||||
|
||||
if (_alternator_clustering_keys_to_ignore.contains(val)) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
auto new_key = _builder->create_ck(batch_seq++);
|
||||
after_mut.partition().clustered_row(*_log_schema, std::move(new_key)) = std::move(row.row());
|
||||
}
|
||||
|
||||
if (batch_seq > 0) {
|
||||
// update end_of_batch marker
|
||||
// we don't need to clear previous one, as we only removed rows
|
||||
// we need to set it on the last row, because original last row might have been deleted
|
||||
// batch_seq == 0 -> no rows, after_mut is empty, all entries were dropped and there's nothing to write to cdc log
|
||||
auto last_key = _builder->create_ck(batch_seq - 1);
|
||||
after_mut.set_cell(last_key, log_meta_column_name_bytes("end_of_batch"), data_value(true), _builder->get_timestamp(), _builder->get_ttl());
|
||||
}
|
||||
|
||||
return after_mut;
|
||||
}
|
||||
public:
|
||||
transformer(db_context ctx, schema_ptr s, dht::decorated_key dk, const per_request_options& options)
|
||||
: _ctx(ctx)
|
||||
@@ -1796,20 +1656,7 @@ public:
|
||||
, _options(options)
|
||||
, _clustering_row_states(0, clustering_key::hashing(*_schema), clustering_key::equality(*_schema))
|
||||
, _uses_tablets(ctx._proxy.get_db().local().find_keyspace(_schema->ks_name()).uses_tablets())
|
||||
, _alternator_clustering_keys_to_ignore()
|
||||
{
|
||||
if (_options.alternator) {
|
||||
auto cks = _schema->clustering_key_columns();
|
||||
const column_definition *ck_def = nullptr;
|
||||
if (!cks.empty()) {
|
||||
auto it = _log_schema->columns_by_name().find(cks.front().name());
|
||||
if (it == _log_schema->columns_by_name().end()) {
|
||||
on_internal_error(cdc_log, fmt::format("failed to find clustering key `{}` in cdc log table `{}`", cks.front().name(), _log_schema->id()));
|
||||
}
|
||||
ck_def = it->second;
|
||||
}
|
||||
_alternator_clustering_key_column = ck_def;
|
||||
}
|
||||
}
|
||||
|
||||
// DON'T move the transformer after this
|
||||
@@ -1817,10 +1664,7 @@ public:
|
||||
const auto stream_id = _uses_tablets ? _ctx._cdc_metadata.get_tablet_stream(_log_schema->id(), ts, _dk.token()) : _ctx._cdc_metadata.get_vnode_stream(ts, _dk.token());
|
||||
_result_mutations.emplace_back(_log_schema, stream_id.to_partition_key(*_log_schema));
|
||||
_builder.emplace(_result_mutations.back(), ts, _dk.key(), *_schema);
|
||||
// alternator_streams_increased_compatibility set to true reads preimage, but we need to set
|
||||
// _enable_updating_state to true to keep track of changes and produce correct pre/post images even
|
||||
// if upper layer didn't request them explicitly.
|
||||
_enable_updating_state = _schema->cdc_options().postimage() || (!is_last && _schema->cdc_options().preimage()) || (_options.alternator && _options.alternator_streams_increased_compatibility);
|
||||
_enable_updating_state = _schema->cdc_options().postimage() || (!is_last && _schema->cdc_options().preimage());
|
||||
}
|
||||
|
||||
void produce_preimage(const clustering_key* ck, const one_kind_column_set& columns_to_include) override {
|
||||
@@ -1917,9 +1761,7 @@ public:
|
||||
._builder = *_builder,
|
||||
._enable_updating_state = _enable_updating_state,
|
||||
._clustering_row_states = _clustering_row_states,
|
||||
._alternator_clustering_keys_to_ignore = _alternator_clustering_keys_to_ignore,
|
||||
._static_row_state = _static_row_state,
|
||||
._alternator_schema_has_no_clustering_key = (_alternator_clustering_key_column == nullptr),
|
||||
._is_update = _is_update,
|
||||
._generate_delta_values = generate_delta_values(_builder->base_schema())
|
||||
};
|
||||
@@ -1929,19 +1771,10 @@ public:
|
||||
void end_record() override {
|
||||
SCYLLA_ASSERT(_builder);
|
||||
_builder->end_record();
|
||||
}
|
||||
|
||||
if (_options.alternator && !_alternator_clustering_keys_to_ignore.empty()) {
|
||||
// we filter mutations for Alternator's changes here.
|
||||
// We do it per mutation object (user might submit a batch of those in one go
|
||||
// and some might be splitted because of different timestamps),
|
||||
// ignore key set is cleared afterwards.
|
||||
// If single mutation object contains two separate changes to the same row
|
||||
// and at least one of them is ignored, all of them will be ignored.
|
||||
// This is not possible in Alternator - Alternator spec forbids reusing
|
||||
// primary key in single batch.
|
||||
_result_mutations.back() = clean_up_noop_rows(std::move(_result_mutations.back()));
|
||||
_alternator_clustering_keys_to_ignore.clear();
|
||||
}
|
||||
const row_states_map& clustering_row_states() const override {
|
||||
return _clustering_row_states;
|
||||
}
|
||||
|
||||
// Takes and returns generated cdc log mutations and associated statistics about parts touched during transformer's lifetime.
|
||||
@@ -2180,7 +2013,7 @@ cdc::cdc_service::impl::augment_mutation_call(lowres_clock::time_point timeout,
|
||||
tracing::trace(tr_state, "CDC: Preimage not enabled for the table, not querying current value of {}", m.decorated_key());
|
||||
}
|
||||
|
||||
return f.then([trans = std::move(trans), &mutations, idx, tr_state, &details, &options] (lw_shared_ptr<cql3::untyped_result_set> rs) mutable {
|
||||
return f.then([alternator_increased_compatibility, trans = std::move(trans), &mutations, idx, tr_state, &details, &options] (lw_shared_ptr<cql3::untyped_result_set> rs) mutable {
|
||||
auto& m = mutations[idx];
|
||||
auto& s = m.schema();
|
||||
|
||||
@@ -2198,10 +2031,10 @@ cdc::cdc_service::impl::augment_mutation_call(lowres_clock::time_point timeout,
|
||||
if (should_split(m, options)) {
|
||||
tracing::trace(tr_state, "CDC: Splitting {}", m.decorated_key());
|
||||
details.was_split = true;
|
||||
process_changes_with_splitting(m, trans, preimage, postimage);
|
||||
process_changes_with_splitting(m, trans, preimage, postimage, alternator_increased_compatibility);
|
||||
} else {
|
||||
tracing::trace(tr_state, "CDC: No need to split {}", m.decorated_key());
|
||||
process_changes_without_splitting(m, trans, preimage, postimage);
|
||||
process_changes_without_splitting(m, trans, preimage, postimage, alternator_increased_compatibility);
|
||||
}
|
||||
auto [log_mut, touched_parts] = std::move(trans).finish();
|
||||
const int generated_count = log_mut.size();
|
||||
|
||||
126
cdc/split.cc
126
cdc/split.cc
@@ -6,15 +6,26 @@
|
||||
* SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.1
|
||||
*/
|
||||
|
||||
#include "bytes.hh"
|
||||
#include "bytes_fwd.hh"
|
||||
#include "mutation/atomic_cell.hh"
|
||||
#include "mutation/atomic_cell_or_collection.hh"
|
||||
#include "mutation/collection_mutation.hh"
|
||||
#include "mutation/mutation.hh"
|
||||
#include "mutation/tombstone.hh"
|
||||
#include "schema/schema.hh"
|
||||
|
||||
#include <seastar/core/sstring.hh>
|
||||
#include "types/concrete_types.hh"
|
||||
#include "types/types.hh"
|
||||
#include "types/user.hh"
|
||||
|
||||
#include "split.hh"
|
||||
#include "log.hh"
|
||||
#include "change_visitor.hh"
|
||||
#include "utils/managed_bytes.hh"
|
||||
#include <string_view>
|
||||
#include <unordered_map>
|
||||
|
||||
extern logging::logger cdc_log;
|
||||
|
||||
@@ -599,8 +610,109 @@ bool should_split(const mutation& m, const per_request_options& options) {
|
||||
|| v._ts == api::missing_timestamp;
|
||||
}
|
||||
|
||||
// Returns true if the row state and the atomic and nonatomic entries represent
|
||||
// an equivalent item.
|
||||
static bool entries_match_row_state(const schema_ptr& base_schema, const cell_map& row_state, const std::vector<atomic_column_update>& atomic_entries,
|
||||
std::vector<nonatomic_column_update>& nonatomic_entries) {
|
||||
for (const auto& update : atomic_entries) {
|
||||
const column_definition& cdef = base_schema->column_at(column_kind::regular_column, update.id);
|
||||
const auto it = row_state.find(&cdef);
|
||||
if (it == row_state.end()) {
|
||||
return false;
|
||||
}
|
||||
if (to_managed_bytes_opt(update.cell.value().linearize()) != it->second) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
if (nonatomic_entries.empty()) {
|
||||
return true;
|
||||
}
|
||||
|
||||
for (const auto& update : nonatomic_entries) {
|
||||
const column_definition& cdef = base_schema->column_at(column_kind::regular_column, update.id);
|
||||
const auto it = row_state.find(&cdef);
|
||||
if (it == row_state.end()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// The only collection used by Alternator is a non-frozen map.
|
||||
auto current_raw_map = cdef.type->deserialize(*it->second);
|
||||
map_type_impl::native_type current_values = value_cast<map_type_impl::native_type>(current_raw_map);
|
||||
|
||||
if (current_values.size() != update.cells.size()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
std::unordered_map<sstring_view, bytes> current_values_map;
|
||||
for (const auto& entry : current_values) {
|
||||
const auto attr_name = std::string_view(value_cast<sstring>(entry.first));
|
||||
current_values_map[attr_name] = value_cast<bytes>(entry.second);
|
||||
}
|
||||
|
||||
for (const auto& [key, value] : update.cells) {
|
||||
const auto key_str = to_string_view(key);
|
||||
if (!value.is_live()) {
|
||||
if (current_values_map.contains(key_str)) {
|
||||
return false;
|
||||
}
|
||||
} else if (current_values_map[key_str] != value.value().linearize()) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool should_skip(batch& changes, const mutation& base_mutation, change_processor& processor) {
|
||||
const schema_ptr& base_schema = base_mutation.schema();
|
||||
// Alternator doesn't use static updates and clustered range deletions.
|
||||
if (!changes.static_updates.empty() || !changes.clustered_range_deletions.empty()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
for (clustered_row_insert& u : changes.clustered_inserts) {
|
||||
const cell_map* row_state = get_row_state(processor.clustering_row_states(), u.key);
|
||||
if (!row_state) {
|
||||
return false;
|
||||
}
|
||||
if (!entries_match_row_state(base_schema, *row_state, u.atomic_entries, u.nonatomic_entries)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
for (clustered_row_update& u : changes.clustered_updates) {
|
||||
const cell_map* row_state = get_row_state(processor.clustering_row_states(), u.key);
|
||||
if (!row_state) {
|
||||
return false;
|
||||
}
|
||||
if (!entries_match_row_state(base_schema, *row_state, u.atomic_entries, u.nonatomic_entries)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// Skip only if the row being deleted does not exist (i.e. the deletion is a no-op).
|
||||
for (const auto& row_deletion : changes.clustered_row_deletions) {
|
||||
if (processor.clustering_row_states().contains(row_deletion.key)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// Don't skip if the item exists.
|
||||
//
|
||||
// Increased DynamoDB Streams compatibility guarantees that single-item
|
||||
// operations will read the item and store it in the clustering row states.
|
||||
// If it is not found there, we may skip CDC. This is safe as long as the
|
||||
// assumptions of this operation's write isolation are not violated.
|
||||
if (changes.partition_deletions && processor.clustering_row_states().contains(clustering_key::make_empty())) {
|
||||
return false;
|
||||
}
|
||||
|
||||
cdc_log.trace("Skipping CDC log for mutation {}", base_mutation);
|
||||
return true;
|
||||
}
|
||||
|
||||
void process_changes_with_splitting(const mutation& base_mutation, change_processor& processor,
|
||||
bool enable_preimage, bool enable_postimage) {
|
||||
bool enable_preimage, bool enable_postimage, bool alternator_strict_compatibility) {
|
||||
const auto base_schema = base_mutation.schema();
|
||||
auto changes = extract_changes(base_mutation);
|
||||
auto pk = base_mutation.key();
|
||||
@@ -620,6 +732,10 @@ void process_changes_with_splitting(const mutation& base_mutation, change_proces
|
||||
affected_clustered_columns_per_row = btch.get_affected_clustered_columns_per_row(*base_mutation.schema());
|
||||
}
|
||||
|
||||
if (alternator_strict_compatibility && should_skip(btch, base_mutation, processor)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const bool is_last = change_ts == last_timestamp;
|
||||
processor.begin_timestamp(change_ts, is_last);
|
||||
if (enable_preimage) {
|
||||
@@ -709,7 +825,13 @@ void process_changes_with_splitting(const mutation& base_mutation, change_proces
|
||||
}
|
||||
|
||||
void process_changes_without_splitting(const mutation& base_mutation, change_processor& processor,
|
||||
bool enable_preimage, bool enable_postimage) {
|
||||
bool enable_preimage, bool enable_postimage, bool alternator_strict_compatibility) {
|
||||
if (alternator_strict_compatibility) {
|
||||
auto changes = extract_changes(base_mutation);
|
||||
if (should_skip(changes.begin()->second, base_mutation, processor)) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
auto ts = find_timestamp(base_mutation);
|
||||
processor.begin_timestamp(ts, true);
|
||||
|
||||
|
||||
@@ -66,12 +66,14 @@ public:
|
||||
// Tells processor we have reached end of record - last part
|
||||
// of a given timestamp batch
|
||||
virtual void end_record() = 0;
|
||||
|
||||
virtual const row_states_map& clustering_row_states() const = 0;
|
||||
};
|
||||
|
||||
bool should_split(const mutation& base_mutation, const per_request_options& options);
|
||||
void process_changes_with_splitting(const mutation& base_mutation, change_processor& processor,
|
||||
bool enable_preimage, bool enable_postimage);
|
||||
bool enable_preimage, bool enable_postimage, bool alternator_strict_compatibility);
|
||||
void process_changes_without_splitting(const mutation& base_mutation, change_processor& processor,
|
||||
bool enable_preimage, bool enable_postimage);
|
||||
bool enable_preimage, bool enable_postimage, bool alternator_strict_compatibility);
|
||||
|
||||
}
|
||||
|
||||
@@ -1355,35 +1355,6 @@ private:
|
||||
_sstables.erase(exhausted, _sstables.end());
|
||||
dynamic_cast<compaction_read_monitor_generator&>(unwrap_monitor_generator()).remove_exhausted_sstables(exhausted_ssts);
|
||||
}
|
||||
|
||||
// Release exhausted garbage collected sstables.
|
||||
// A GC sstable is exhausted when it doesn't overlap with any remaining input sstable.
|
||||
// GC sstables serve as safeguards against data resurrection: their tombstones may shadow
|
||||
// data in not-yet-exhausted input sstables. So a GC sstable can only be released once
|
||||
// all overlapping input sstables have been exhausted.
|
||||
auto gc_not_exhausted = [this] (const sstables::shared_sstable& gc_sst) {
|
||||
auto gc_range = ::wrapping_interval<dht::token>::make(
|
||||
gc_sst->get_first_decorated_key()._token,
|
||||
gc_sst->get_last_decorated_key()._token);
|
||||
for (const auto& input_sst : _sstables) {
|
||||
auto input_range = ::wrapping_interval<dht::token>::make(
|
||||
input_sst->get_first_decorated_key()._token,
|
||||
input_sst->get_last_decorated_key()._token);
|
||||
if (gc_range.overlaps(input_range, dht::token_comparator())) {
|
||||
return true; // overlaps with a remaining input sstable, not exhausted yet
|
||||
}
|
||||
}
|
||||
return false; // no overlap with any remaining input sstable, can be released
|
||||
};
|
||||
exhausted = std::partition(_used_garbage_collected_sstables.begin(), _used_garbage_collected_sstables.end(), gc_not_exhausted);
|
||||
if (exhausted != _used_garbage_collected_sstables.end()) {
|
||||
auto exhausted_gc_ssts = std::vector<sstables::shared_sstable>(exhausted, _used_garbage_collected_sstables.end());
|
||||
log_debug("Releasing {} exhausted GC sstable(s) earlier: [{}]",
|
||||
exhausted_gc_ssts.size(),
|
||||
fmt::join(exhausted_gc_ssts | std::views::transform([] (auto sst) { return to_string(sst, true); }), ","));
|
||||
_replacer(get_compaction_completion_desc(std::move(exhausted_gc_ssts), {}));
|
||||
_used_garbage_collected_sstables.erase(exhausted, _used_garbage_collected_sstables.end());
|
||||
}
|
||||
}
|
||||
|
||||
void replace_remaining_exhausted_sstables() {
|
||||
|
||||
@@ -1106,8 +1106,7 @@ void compaction_manager::enable() {
|
||||
|
||||
_compaction_submission_timer.cancel();
|
||||
_compaction_submission_timer.arm_periodic(periodic_compaction_submission_interval());
|
||||
throwing_assert(!_waiting_reevaluation);
|
||||
_waiting_reevaluation.emplace(postponed_compactions_reevaluation());
|
||||
_waiting_reevalution = postponed_compactions_reevaluation();
|
||||
cmlog.info("Enabled");
|
||||
}
|
||||
|
||||
@@ -1155,16 +1154,6 @@ void compaction_manager::reevaluate_postponed_compactions() noexcept {
|
||||
_postponed_reevaluation.signal();
|
||||
}
|
||||
|
||||
future<> compaction_manager::stop_postponed_compactions() noexcept {
|
||||
auto waiting_reevaluation = std::exchange(_waiting_reevaluation, std::nullopt);
|
||||
if (!waiting_reevaluation) {
|
||||
return make_ready_future();
|
||||
}
|
||||
// Trigger a signal to properly exit from postponed_compactions_reevaluation() fiber
|
||||
reevaluate_postponed_compactions();
|
||||
return std::move(*waiting_reevaluation);
|
||||
}
|
||||
|
||||
void compaction_manager::postpone_compaction_for_table(compaction_group_view* t) {
|
||||
_postponed.insert(t);
|
||||
}
|
||||
@@ -1248,7 +1237,8 @@ future<> compaction_manager::drain() {
|
||||
_compaction_submission_timer.cancel();
|
||||
// Stop ongoing compactions, if the request has not been sent already and wait for them to stop.
|
||||
co_await stop_ongoing_compactions("drain");
|
||||
co_await stop_postponed_compactions();
|
||||
// Trigger a signal to properly exit from postponed_compactions_reevaluation() fiber
|
||||
reevaluate_postponed_compactions();
|
||||
cmlog.info("Drained");
|
||||
}
|
||||
|
||||
@@ -1292,7 +1282,8 @@ future<> compaction_manager::really_do_stop() noexcept {
|
||||
if (!_tasks.empty()) {
|
||||
on_fatal_internal_error(cmlog, format("{} tasks still exist after being stopped", _tasks.size()));
|
||||
}
|
||||
co_await stop_postponed_compactions();
|
||||
reevaluate_postponed_compactions();
|
||||
co_await std::move(_waiting_reevalution);
|
||||
co_await _sys_ks.close();
|
||||
_weight_tracker.clear();
|
||||
_compaction_submission_timer.cancel();
|
||||
|
||||
@@ -128,7 +128,7 @@ private:
|
||||
// a sstable from being compacted twice.
|
||||
std::unordered_set<sstables::shared_sstable> _compacting_sstables;
|
||||
|
||||
std::optional<future<>> _waiting_reevaluation;
|
||||
future<> _waiting_reevalution = make_ready_future<>();
|
||||
condition_variable _postponed_reevaluation;
|
||||
// tables that wait for compaction but had its submission postponed due to ongoing compaction.
|
||||
std::unordered_set<compaction::compaction_group_view*> _postponed;
|
||||
@@ -231,7 +231,6 @@ private:
|
||||
|
||||
future<> postponed_compactions_reevaluation();
|
||||
void reevaluate_postponed_compactions() noexcept;
|
||||
future<> stop_postponed_compactions() noexcept;
|
||||
// Postpone compaction for a table that couldn't be executed due to ongoing
|
||||
// similar-sized compaction.
|
||||
void postpone_compaction_for_table(compaction::compaction_group_view* t);
|
||||
|
||||
@@ -698,13 +698,12 @@ public:
|
||||
table_resharding_compaction_task_impl(tasks::task_manager::module_ptr module,
|
||||
std::string keyspace,
|
||||
std::string table,
|
||||
tasks::task_id parent_id,
|
||||
sharded<sstables::sstable_directory>& dir,
|
||||
sharded<replica::database>& db,
|
||||
compaction_sstable_creator_fn creator,
|
||||
compaction::owned_ranges_ptr owned_ranges_ptr,
|
||||
bool vnodes_resharding) noexcept
|
||||
: resharding_compaction_task_impl(module, tasks::task_id::create_random_id(), parent_id ? 0 : module->new_sequence_number(), "table", std::move(keyspace), std::move(table), "", parent_id)
|
||||
: resharding_compaction_task_impl(module, tasks::task_id::create_random_id(), module->new_sequence_number(), "table", std::move(keyspace), std::move(table), "", tasks::task_id::create_null_id())
|
||||
, _dir(dir)
|
||||
, _db(db)
|
||||
, _creator(std::move(creator))
|
||||
|
||||
@@ -406,11 +406,7 @@ commitlog_total_space_in_mb: -1
|
||||
# In short, `ms` needs more CPU during sstable writes,
|
||||
# but should behave better during reads,
|
||||
# although it might behave worse for very long clustering keys.
|
||||
#
|
||||
# `ms` sstable format works even better with `column_index_size_in_kb` set to 1,
|
||||
# so keep those two settings in sync (either both set, or both unset).
|
||||
sstable_format: ms
|
||||
column_index_size_in_kb: 1
|
||||
|
||||
# Auto-scaling of the promoted index prevents running out of memory
|
||||
# when the promoted index grows too large (due to partitions with many rows
|
||||
|
||||
@@ -1438,8 +1438,6 @@ alternator = [
|
||||
'alternator/controller.cc',
|
||||
'alternator/server.cc',
|
||||
'alternator/executor.cc',
|
||||
'alternator/executor_read.cc',
|
||||
'alternator/executor_util.cc',
|
||||
'alternator/stats.cc',
|
||||
'alternator/serialization.cc',
|
||||
'alternator/expressions.cc',
|
||||
@@ -1725,7 +1723,6 @@ deps['test/boost/combined_tests'] += [
|
||||
'test/boost/view_schema_test.cc',
|
||||
'test/boost/virtual_reader_test.cc',
|
||||
'test/boost/virtual_table_test.cc',
|
||||
'test/boost/vnodes_to_tablets_migration_test.cc',
|
||||
'tools/schema_loader.cc',
|
||||
'tools/read_mutation.cc',
|
||||
'test/lib/expr_test_utils.cc',
|
||||
|
||||
@@ -23,7 +23,7 @@ set_property(
|
||||
$<$<CONFIG:${unoptimized_modes}>:-O1>
|
||||
# use-after-scope sanitizer also uses large amount of stack space
|
||||
# and overflows the stack of CqlParser
|
||||
$<$<CONFIG:${sanitized_modes}>:-fno-sanitize-address-use-after-scope>)
|
||||
$<$<CONFIG:${sanitized_modes}>:-fsanitize-address-use-after-scope>)
|
||||
|
||||
add_library(cql3 STATIC)
|
||||
target_sources(cql3
|
||||
|
||||
12
cql3/Cql.g
12
cql3/Cql.g
@@ -429,10 +429,10 @@ unaliasedSelector returns [uexpression tmp]
|
||||
: ( c=cident { tmp = unresolved_identifier{std::move(c)}; }
|
||||
| v=value { tmp = std::move(v); }
|
||||
| K_COUNT '(' countArgument ')' { tmp = make_count_rows_function_expression(); }
|
||||
| K_WRITETIME '(' a=subscriptExpr ')' { tmp = column_mutation_attribute{column_mutation_attribute::attribute_kind::writetime,
|
||||
std::move(a)}; }
|
||||
| K_TTL '(' a=subscriptExpr ')' { tmp = column_mutation_attribute{column_mutation_attribute::attribute_kind::ttl,
|
||||
std::move(a)}; }
|
||||
| K_WRITETIME '(' c=cident ')' { tmp = column_mutation_attribute{column_mutation_attribute::attribute_kind::writetime,
|
||||
unresolved_identifier{std::move(c)}}; }
|
||||
| K_TTL '(' c=cident ')' { tmp = column_mutation_attribute{column_mutation_attribute::attribute_kind::ttl,
|
||||
unresolved_identifier{std::move(c)}}; }
|
||||
| f=functionName args=selectionFunctionArgs { tmp = function_call{std::move(f), std::move(args)}; }
|
||||
| K_CAST '(' arg=unaliasedSelector K_AS t=native_type ')' { tmp = cast{.style = cast::cast_style::sql, .arg = std::move(arg), .type = std::move(t)}; }
|
||||
)
|
||||
@@ -1794,9 +1794,7 @@ columnRefExpr returns [uexpression e]
|
||||
|
||||
subscriptExpr returns [uexpression e]
|
||||
: col=columnRefExpr { e = std::move(col); }
|
||||
( '[' sub=term ']' { e = subscript{std::move(e), std::move(sub)}; }
|
||||
| '.' fi=cident { e = field_selection{std::move(e), std::move(fi)}; }
|
||||
)?
|
||||
( '[' sub=term ']' { e = subscript{std::move(e), std::move(sub)}; } )?
|
||||
;
|
||||
|
||||
singleColumnInValuesOrMarkerExpr returns [uexpression e]
|
||||
|
||||
@@ -10,7 +10,6 @@
|
||||
|
||||
#include "utils/assert.hh"
|
||||
#include "cql3/column_specification.hh"
|
||||
#include "cql3/column_identifier.hh"
|
||||
|
||||
namespace cql3 {
|
||||
|
||||
@@ -32,12 +31,4 @@ bool column_specification::all_in_same_table(const std::vector<lw_shared_ptr<col
|
||||
});
|
||||
}
|
||||
|
||||
lw_shared_ptr<column_specification> make_column_spec(std::string_view ks_name, std::string_view cf_name, sstring name, data_type type) {
|
||||
return make_lw_shared<column_specification>(
|
||||
ks_name,
|
||||
cf_name,
|
||||
::make_shared<column_identifier>(std::move(name), true),
|
||||
std::move(type));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -42,6 +42,4 @@ public:
|
||||
static bool all_in_same_table(const std::vector<lw_shared_ptr<column_specification>>& names);
|
||||
};
|
||||
|
||||
lw_shared_ptr<column_specification> make_column_spec(std::string_view ks_name, std::string_view cf_name, sstring name, data_type type);
|
||||
|
||||
}
|
||||
|
||||
@@ -11,11 +11,6 @@
|
||||
#pragma once
|
||||
|
||||
#include "restrictions/restrictions_config.hh"
|
||||
#include "cql3/restrictions/replication_restrictions.hh"
|
||||
#include "cql3/restrictions/twcs_restrictions.hh"
|
||||
#include "cql3/restrictions/view_restrictions.hh"
|
||||
#include "db/tri_mode_restriction.hh"
|
||||
#include "utils/updateable_value.hh"
|
||||
|
||||
namespace db { class config; }
|
||||
|
||||
@@ -23,44 +18,9 @@ namespace cql3 {
|
||||
|
||||
struct cql_config {
|
||||
restrictions::restrictions_config restrictions;
|
||||
replication_restrictions replication_restrictions;
|
||||
twcs_restrictions twcs_restrictions;
|
||||
view_restrictions view_restrictions;
|
||||
utils::updateable_value<uint32_t> select_internal_page_size;
|
||||
utils::updateable_value<db::tri_mode_restriction> strict_allow_filtering;
|
||||
utils::updateable_value<bool> enable_parallelized_aggregation;
|
||||
utils::updateable_value<uint32_t> batch_size_warn_threshold_in_kb;
|
||||
utils::updateable_value<uint32_t> batch_size_fail_threshold_in_kb;
|
||||
utils::updateable_value<bool> restrict_future_timestamp;
|
||||
utils::updateable_value<bool> enable_create_table_with_compact_storage;
|
||||
|
||||
explicit cql_config(const db::config& cfg)
|
||||
: restrictions(cfg)
|
||||
, replication_restrictions(cfg)
|
||||
, twcs_restrictions(cfg)
|
||||
, view_restrictions(cfg)
|
||||
, select_internal_page_size(cfg.select_internal_page_size)
|
||||
, strict_allow_filtering(cfg.strict_allow_filtering)
|
||||
, enable_parallelized_aggregation(cfg.enable_parallelized_aggregation)
|
||||
, batch_size_warn_threshold_in_kb(cfg.batch_size_warn_threshold_in_kb)
|
||||
, batch_size_fail_threshold_in_kb(cfg.batch_size_fail_threshold_in_kb)
|
||||
, restrict_future_timestamp(cfg.restrict_future_timestamp)
|
||||
, enable_create_table_with_compact_storage(cfg.enable_create_table_with_compact_storage)
|
||||
{}
|
||||
explicit cql_config(const db::config& cfg) : restrictions(cfg) {}
|
||||
struct default_tag{};
|
||||
cql_config(default_tag)
|
||||
: restrictions(restrictions::restrictions_config::default_tag{})
|
||||
, replication_restrictions(replication_restrictions::default_tag{})
|
||||
, twcs_restrictions(twcs_restrictions::default_tag{})
|
||||
, view_restrictions(view_restrictions::default_tag{})
|
||||
, select_internal_page_size(10000)
|
||||
, strict_allow_filtering(db::tri_mode_restriction(db::tri_mode_restriction_t::mode::WARN))
|
||||
, enable_parallelized_aggregation(true)
|
||||
, batch_size_warn_threshold_in_kb(128)
|
||||
, batch_size_fail_threshold_in_kb(1024)
|
||||
, restrict_future_timestamp(true)
|
||||
, enable_create_table_with_compact_storage(false)
|
||||
{}
|
||||
cql_config(default_tag) : restrictions(restrictions::restrictions_config::default_tag{}) {}
|
||||
};
|
||||
|
||||
extern const cql_config default_cql_config;
|
||||
|
||||
@@ -1,21 +0,0 @@
|
||||
// Copyright (C) 2026-present ScyllaDB
|
||||
// SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <map>
|
||||
|
||||
#include "bytes.hh"
|
||||
#include "mutation/timestamp.hh"
|
||||
|
||||
namespace cql3::expr {
|
||||
|
||||
// Per-element timestamps and TTLs for a cell of a map, set or UDT (populated
|
||||
// when a WRITETIME() or TTL() of col[key] or col.field are in the query.
|
||||
// Keys are the raw serialized keys or serialized field index.
|
||||
struct collection_cell_metadata {
|
||||
std::map<bytes, api::timestamp_type> timestamps;
|
||||
std::map<bytes, int32_t> ttls; // remaining TTL in seconds (-1 if no TTL)
|
||||
};
|
||||
|
||||
} // namespace cql3::expr
|
||||
@@ -3,7 +3,6 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "collection_cell_metadata.hh"
|
||||
#include "expression.hh"
|
||||
|
||||
#include "bytes.hh"
|
||||
@@ -28,7 +27,6 @@ struct evaluation_inputs {
|
||||
std::span<const api::timestamp_type> static_and_regular_timestamps; // indexes match `selection` member
|
||||
std::span<const int32_t> static_and_regular_ttls; // indexes match `selection` member
|
||||
std::span<const cql3::raw_value> temporaries; // indexes match temporary::index
|
||||
std::span<const collection_cell_metadata> collection_element_metadata; // indexes match `selection` member
|
||||
};
|
||||
|
||||
// Takes a prepared expression and calculates its value.
|
||||
|
||||
@@ -1031,7 +1031,7 @@ expression search_and_replace(const expression& e,
|
||||
return cast{c.style, recurse(c.arg), c.type};
|
||||
},
|
||||
[&] (const field_selection& fs) -> expression {
|
||||
return field_selection{recurse(fs.structure), fs.field, fs.field_idx, fs.type};
|
||||
return field_selection{recurse(fs.structure), fs.field};
|
||||
},
|
||||
[&] (const subscript& s) -> expression {
|
||||
return subscript {
|
||||
@@ -1206,58 +1206,6 @@ cql3::raw_value do_evaluate(const field_selection& field_select, const evaluatio
|
||||
static
|
||||
cql3::raw_value
|
||||
do_evaluate(const column_mutation_attribute& cma, const evaluation_inputs& inputs) {
|
||||
// Helper for WRITETIME/TTL on a collection element or UDT field: given the
|
||||
// inner column and the serialized element key, validate the index and look
|
||||
// up the per-element timestamp or TTL in collection_element_metadata.
|
||||
auto lookup_element_attribute = [&](const column_value* inner_col, std::string_view context, bytes key) -> cql3::raw_value {
|
||||
int32_t index = inputs.selection->index_of(*inner_col->col);
|
||||
if (inputs.collection_element_metadata.empty() || index < 0 || size_t(index) >= inputs.collection_element_metadata.size()) {
|
||||
on_internal_error(expr_logger, fmt::format("evaluating column_mutation_attribute {}: column {} is not in selection",
|
||||
context, inner_col->col->name_as_text()));
|
||||
}
|
||||
const auto& meta = inputs.collection_element_metadata[index];
|
||||
switch (cma.kind) {
|
||||
case column_mutation_attribute::attribute_kind::writetime: {
|
||||
const auto it = meta.timestamps.find(key);
|
||||
if (it == meta.timestamps.end()) {
|
||||
return cql3::raw_value::make_null();
|
||||
}
|
||||
return raw_value::make_value(data_value(it->second).serialize());
|
||||
}
|
||||
case column_mutation_attribute::attribute_kind::ttl: {
|
||||
const auto it = meta.ttls.find(key);
|
||||
// The test it->second <= 0 (rather than < 0) matches the
|
||||
// single-TTL check ttl_v <= 0 below.
|
||||
if (it == meta.ttls.end() || it->second <= 0) {
|
||||
return cql3::raw_value::make_null();
|
||||
}
|
||||
return raw_value::make_value(data_value(it->second).serialize());
|
||||
}
|
||||
}
|
||||
on_internal_error(expr_logger, fmt::format("evaluating column_mutation_attribute {} with unexpected kind", context));
|
||||
};
|
||||
// Handle WRITETIME(x.field) / TTL(x.field) on a UDT field
|
||||
if (auto fs = expr::as_if<field_selection>(&cma.column)) {
|
||||
auto inner_col = expr::as_if<column_value>(&fs->structure);
|
||||
if (!inner_col) {
|
||||
on_internal_error(expr_logger, fmt::format("evaluating column_mutation_attribute field_selection: inner expression is not a column: {}", fs->structure));
|
||||
}
|
||||
return lookup_element_attribute(inner_col, "field_selection", serialize_field_index(fs->field_idx));
|
||||
}
|
||||
// Handle WRITETIME(m[key]) / TTL(m[key]) on a map element
|
||||
if (auto sub = expr::as_if<subscript>(&cma.column)) {
|
||||
auto inner_col = expr::as_if<column_value>(&sub->val);
|
||||
if (!inner_col) {
|
||||
on_internal_error(expr_logger, fmt::format("evaluating column_mutation_attribute subscript: inner expression is not a column: {}", sub->val));
|
||||
}
|
||||
auto evaluated_key = evaluate(sub->sub, inputs);
|
||||
if (evaluated_key.is_null()) {
|
||||
return cql3::raw_value::make_null();
|
||||
}
|
||||
return evaluated_key.view().with_linearized([&] (bytes_view key_bv) {
|
||||
return lookup_element_attribute(inner_col, "subscript", bytes(key_bv));
|
||||
});
|
||||
}
|
||||
auto col = expr::as_if<column_value>(&cma.column);
|
||||
if (!col) {
|
||||
on_internal_error(expr_logger, fmt::format("evaluating column_mutation_attribute of non-column {}", cma.column));
|
||||
|
||||
@@ -1259,40 +1259,6 @@ prepare_column_mutation_attribute(
|
||||
receiver->type->name(), receiver->name->text()));
|
||||
}
|
||||
auto column = prepare_expression(cma.column, db, keyspace, schema_opt, nullptr);
|
||||
// Helper for the subscript and field-selection cases below: validates that
|
||||
// inner_expr is a column, not a primary key column, that its type satisfies
|
||||
// type_allowed, and that the cluster feature flag is on.
|
||||
auto validate_and_return =
|
||||
[&](const expression& inner_expr, std::string_view context,
|
||||
auto type_allowed, std::string_view type_allowed_str) -> std::optional<expression> {
|
||||
auto inner_cval = expr::as_if<column_value>(&inner_expr);
|
||||
if (!inner_cval) {
|
||||
throw exceptions::invalid_request_exception(fmt::format("{} on a {} expects a column, got {}", cma.kind, context, inner_expr));
|
||||
}
|
||||
if (inner_cval->col->is_primary_key()) {
|
||||
throw exceptions::invalid_request_exception(fmt::format("{} is not legal on primary key component {}", cma.kind, inner_cval->col->name_as_text()));
|
||||
}
|
||||
if (!type_allowed(inner_cval->col->type)) {
|
||||
throw exceptions::invalid_request_exception(fmt::format("{} on a {} is only valid for {}", cma.kind, context, type_allowed_str));
|
||||
}
|
||||
if (!db.features().writetime_ttl_individual_element) {
|
||||
throw exceptions::invalid_request_exception(fmt::format(
|
||||
"{} on a {} is not supported until all nodes in the cluster are upgraded", cma.kind, context));
|
||||
}
|
||||
return column_mutation_attribute{.kind = cma.kind, .column = std::move(column)};
|
||||
};
|
||||
// Handle WRITETIME(m[key]) / TTL(m[key]) - a subscript into a non-frozen map or set column
|
||||
if (auto sub = expr::as_if<subscript>(&column)) {
|
||||
return validate_and_return(sub->val, "subscript",
|
||||
[](const data_type& t) { return (t->is_map() || t->is_set()) && t->is_multi_cell(); },
|
||||
"non-frozen map or set columns");
|
||||
}
|
||||
// Handle WRITETIME(x.field) / TTL(x.field) - a field selection into a non-frozen UDT column
|
||||
if (auto fs = expr::as_if<field_selection>(&column)) {
|
||||
return validate_and_return(fs->structure, "field selection",
|
||||
[](const data_type& t) { return t->is_user_type() && t->is_multi_cell(); },
|
||||
"non-frozen UDT columns");
|
||||
}
|
||||
auto cval = expr::as_if<column_value>(&column);
|
||||
if (!cval) {
|
||||
throw exceptions::invalid_request_exception(fmt::format("{} expects a column, but {} is a general expression", cma.kind, column));
|
||||
@@ -1688,12 +1654,6 @@ static lw_shared_ptr<column_specification> get_lhs_receiver(const expression& pr
|
||||
return list_value_spec_of(*sub_col.col->column_specification);
|
||||
}
|
||||
},
|
||||
[&](const field_selection& fs) -> lw_shared_ptr<column_specification> {
|
||||
return make_lw_shared<column_specification>(
|
||||
schema.ks_name(), schema.cf_name(),
|
||||
::make_shared<column_identifier>(fs.field->text(), true),
|
||||
fs.type);
|
||||
},
|
||||
[&](const tuple_constructor& tup) -> lw_shared_ptr<column_specification> {
|
||||
std::ostringstream tuple_name;
|
||||
tuple_name << "(";
|
||||
|
||||
@@ -560,11 +560,6 @@ query_processor::acquire_strongly_consistent_coordinator() {
|
||||
return {remote_.get().sc_coordinator, std::move(holder)};
|
||||
}
|
||||
|
||||
service::storage_service& query_processor::storage_service() {
|
||||
auto [remote_, holder] = remote();
|
||||
return remote_.get().ss;
|
||||
}
|
||||
|
||||
void query_processor::start_remote(service::migration_manager& mm, service::mapreduce_service& mapreducer,
|
||||
service::storage_service& ss, service::raft_group0_client& group0_client,
|
||||
service::strong_consistency::coordinator& sc_coordinator) {
|
||||
@@ -791,7 +786,7 @@ query_processor::get_statement(const std::string_view& query, const service::cli
|
||||
cf_stmt->prepare_keyspace(client_state);
|
||||
}
|
||||
++_stats.prepare_invocations;
|
||||
auto p = statement->prepare(_db, _cql_stats, _cql_config);
|
||||
auto p = statement->prepare(_db, _cql_stats);
|
||||
p->statement->raw_cql_statement = sstring(query);
|
||||
auto audit_info = p->statement->get_audit_info();
|
||||
if (audit_info) {
|
||||
@@ -906,7 +901,7 @@ query_options query_processor::make_internal_options(
|
||||
statements::prepared_statement::checked_weak_ptr query_processor::prepare_internal(const sstring& query_string) {
|
||||
auto& p = _internal_statements[query_string];
|
||||
if (p == nullptr) {
|
||||
auto np = parse_statement(query_string, internal_dialect())->prepare(_db, _cql_stats, _cql_config);
|
||||
auto np = parse_statement(query_string, internal_dialect())->prepare(_db, _cql_stats);
|
||||
np->statement->raw_cql_statement = query_string;
|
||||
p = std::move(np); // inserts it into map
|
||||
}
|
||||
@@ -1017,7 +1012,7 @@ query_processor::execute_internal(
|
||||
return execute_with_params(std::move(p), cl, query_state, values);
|
||||
} else {
|
||||
// For internal queries, we want the default dialect, not the user provided one
|
||||
auto p = parse_statement(query_string, dialect{})->prepare(_db, _cql_stats, _cql_config);
|
||||
auto p = parse_statement(query_string, dialect{})->prepare(_db, _cql_stats);
|
||||
p->statement->raw_cql_statement = query_string;
|
||||
auto checked_weak_ptr = p->checked_weak_from_this();
|
||||
return execute_with_params(std::move(checked_weak_ptr), cl, query_state, values).finally([p = std::move(p)] {});
|
||||
@@ -1076,11 +1071,6 @@ query_processor::execute_batch_without_checking_exception_message(
|
||||
query_options& options,
|
||||
std::unordered_map<prepared_cache_key_type, authorized_prepared_statements_cache::value_type> pending_authorization_entries) {
|
||||
auto access_future = co_await coroutine::as_future(batch->check_access(*this, query_state.get_client_state()));
|
||||
bool failed = access_future.failed();
|
||||
co_await audit::inspect(batch, query_state, options, failed);
|
||||
if (failed) {
|
||||
std::rethrow_exception(access_future.get_exception());
|
||||
}
|
||||
co_await coroutine::parallel_for_each(pending_authorization_entries, [this, &query_state] (auto& e) -> future<> {
|
||||
try {
|
||||
co_await _authorized_prepared_cache.insert(*query_state.get_client_state().user(), e.first, std::move(e.second));
|
||||
@@ -1088,6 +1078,11 @@ query_processor::execute_batch_without_checking_exception_message(
|
||||
log.error("failed to cache the entry: {}", std::current_exception());
|
||||
}
|
||||
});
|
||||
bool failed = access_future.failed();
|
||||
co_await audit::inspect(batch, query_state, options, failed);
|
||||
if (access_future.failed()) {
|
||||
std::rethrow_exception(access_future.get_exception());
|
||||
}
|
||||
batch->validate();
|
||||
batch->validate(*this, query_state.get_client_state());
|
||||
_stats.queries_by_cl[size_t(options.get_consistency())] += batch->get_statements().size();
|
||||
|
||||
@@ -209,8 +209,6 @@ public:
|
||||
return _proxy;
|
||||
}
|
||||
|
||||
service::storage_service& storage_service();
|
||||
|
||||
std::pair<std::reference_wrapper<service::strong_consistency::coordinator>, gate::holder>
|
||||
acquire_strongly_consistent_coordinator();
|
||||
|
||||
|
||||
@@ -1,47 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2026-present ScyllaDB
|
||||
*/
|
||||
|
||||
/*
|
||||
* SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.1
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "db/config.hh"
|
||||
#include "utils/updateable_value.hh"
|
||||
|
||||
namespace cql3 {
|
||||
|
||||
struct replication_restrictions {
|
||||
utils::updateable_value<db::tri_mode_restriction> restrict_replication_simplestrategy;
|
||||
utils::updateable_value<std::vector<enum_option<db::replication_strategy_restriction_t>>> replication_strategy_warn_list;
|
||||
utils::updateable_value<std::vector<enum_option<db::replication_strategy_restriction_t>>> replication_strategy_fail_list;
|
||||
utils::updateable_value<int> minimum_replication_factor_fail_threshold;
|
||||
utils::updateable_value<int> minimum_replication_factor_warn_threshold;
|
||||
utils::updateable_value<int> maximum_replication_factor_fail_threshold;
|
||||
utils::updateable_value<int> maximum_replication_factor_warn_threshold;
|
||||
|
||||
explicit replication_restrictions(const db::config& cfg)
|
||||
: restrict_replication_simplestrategy(cfg.restrict_replication_simplestrategy)
|
||||
, replication_strategy_warn_list(cfg.replication_strategy_warn_list)
|
||||
, replication_strategy_fail_list(cfg.replication_strategy_fail_list)
|
||||
, minimum_replication_factor_fail_threshold(cfg.minimum_replication_factor_fail_threshold)
|
||||
, minimum_replication_factor_warn_threshold(cfg.minimum_replication_factor_warn_threshold)
|
||||
, maximum_replication_factor_fail_threshold(cfg.maximum_replication_factor_fail_threshold)
|
||||
, maximum_replication_factor_warn_threshold(cfg.maximum_replication_factor_warn_threshold)
|
||||
{}
|
||||
|
||||
struct default_tag{};
|
||||
replication_restrictions(default_tag)
|
||||
: restrict_replication_simplestrategy(db::tri_mode_restriction(db::tri_mode_restriction_t::mode::FALSE))
|
||||
, replication_strategy_warn_list(std::vector<enum_option<db::replication_strategy_restriction_t>>{})
|
||||
, replication_strategy_fail_list(std::vector<enum_option<db::replication_strategy_restriction_t>>{})
|
||||
, minimum_replication_factor_fail_threshold(-1)
|
||||
, minimum_replication_factor_warn_threshold(3)
|
||||
, maximum_replication_factor_fail_threshold(-1)
|
||||
, maximum_replication_factor_warn_threshold(-1)
|
||||
{}
|
||||
};
|
||||
|
||||
} // namespace cql3
|
||||
@@ -1,32 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2026-present ScyllaDB
|
||||
*/
|
||||
|
||||
/*
|
||||
* SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.1
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "db/config.hh"
|
||||
#include "utils/updateable_value.hh"
|
||||
|
||||
namespace cql3 {
|
||||
|
||||
struct twcs_restrictions {
|
||||
utils::updateable_value<uint32_t> twcs_max_window_count;
|
||||
utils::updateable_value<db::tri_mode_restriction> restrict_twcs_without_default_ttl;
|
||||
|
||||
explicit twcs_restrictions(const db::config& cfg)
|
||||
: twcs_max_window_count(cfg.twcs_max_window_count)
|
||||
, restrict_twcs_without_default_ttl(cfg.restrict_twcs_without_default_ttl)
|
||||
{}
|
||||
|
||||
struct default_tag{};
|
||||
twcs_restrictions(default_tag)
|
||||
: twcs_max_window_count(10000)
|
||||
, restrict_twcs_without_default_ttl(db::tri_mode_restriction(db::tri_mode_restriction_t::mode::WARN))
|
||||
{}
|
||||
};
|
||||
|
||||
} // namespace cql3
|
||||
@@ -1,32 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2019-present ScyllaDB
|
||||
*/
|
||||
|
||||
/*
|
||||
* SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.1
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "db/config.hh"
|
||||
#include "db/tri_mode_restriction.hh"
|
||||
#include "utils/updateable_value.hh"
|
||||
|
||||
namespace db { class config; }
|
||||
|
||||
namespace cql3 {
|
||||
|
||||
struct view_restrictions {
|
||||
utils::updateable_value<db::tri_mode_restriction> strict_is_not_null_in_views;
|
||||
|
||||
explicit view_restrictions(const db::config& cfg)
|
||||
: strict_is_not_null_in_views(cfg.strict_is_not_null_in_views)
|
||||
{}
|
||||
|
||||
struct default_tag{};
|
||||
view_restrictions(default_tag)
|
||||
: strict_is_not_null_in_views(db::tri_mode_restriction(db::tri_mode_restriction_t::mode::WARN))
|
||||
{}
|
||||
};
|
||||
|
||||
}
|
||||
@@ -17,7 +17,6 @@
|
||||
#include "cql3/expr/expr-utils.hh"
|
||||
#include "cql3/functions/first_function.hh"
|
||||
#include "cql3/functions/aggregate_fcts.hh"
|
||||
#include "types/types.hh"
|
||||
|
||||
#include <ranges>
|
||||
|
||||
@@ -32,14 +31,12 @@ selection::selection(schema_ptr schema,
|
||||
std::vector<lw_shared_ptr<column_specification>> metadata_,
|
||||
bool collect_timestamps,
|
||||
bool collect_TTLs,
|
||||
bool collect_collection_timestamps,
|
||||
trivial is_trivial)
|
||||
: _schema(std::move(schema))
|
||||
, _columns(std::move(columns))
|
||||
, _metadata(::make_shared<metadata>(std::move(metadata_)))
|
||||
, _collect_timestamps(collect_timestamps)
|
||||
, _collect_TTLs(collect_TTLs)
|
||||
, _collect_collection_timestamps(collect_collection_timestamps)
|
||||
, _contains_static_columns(std::any_of(_columns.begin(), _columns.end(), std::mem_fn(&column_definition::is_static)))
|
||||
, _is_trivial(is_trivial)
|
||||
{ }
|
||||
@@ -49,7 +46,6 @@ query::partition_slice::option_set selection::get_query_options() {
|
||||
|
||||
opts.set_if<query::partition_slice::option::send_timestamp>(_collect_timestamps);
|
||||
opts.set_if<query::partition_slice::option::send_expiry>(_collect_TTLs);
|
||||
opts.set_if<query::partition_slice::option::send_collection_timestamps>(_collect_collection_timestamps);
|
||||
|
||||
opts.set_if<query::partition_slice::option::send_partition_key>(
|
||||
std::any_of(_columns.begin(), _columns.end(),
|
||||
@@ -118,7 +114,7 @@ public:
|
||||
*/
|
||||
simple_selection(schema_ptr schema, std::vector<const column_definition*> columns,
|
||||
std::vector<lw_shared_ptr<column_specification>> metadata, bool is_wildcard)
|
||||
: selection(schema, std::move(columns), std::move(metadata), false, false, false, trivial::yes)
|
||||
: selection(schema, std::move(columns), std::move(metadata), false, false, trivial::yes)
|
||||
, _is_wildcard(is_wildcard)
|
||||
{ }
|
||||
|
||||
@@ -182,12 +178,6 @@ contains_column_mutation_attribute(expr::column_mutation_attribute::attribute_ki
|
||||
});
|
||||
}
|
||||
|
||||
static bool contains_collection_mutation_attribute(const expr::expression& e) {
|
||||
return expr::find_in_expression<expr::column_mutation_attribute>(e, [](const expr::column_mutation_attribute& cma) {
|
||||
return expr::is<expr::subscript>(cma.column) || expr::is<expr::field_selection>(cma.column);
|
||||
});
|
||||
}
|
||||
|
||||
static
|
||||
bool
|
||||
contains_writetime(const expr::expression& e) {
|
||||
@@ -212,8 +202,7 @@ public:
|
||||
std::vector<expr::expression> selectors)
|
||||
: selection(schema, std::move(columns), std::move(metadata),
|
||||
contains_writetime(expr::tuple_constructor{selectors}),
|
||||
contains_ttl(expr::tuple_constructor{selectors}),
|
||||
contains_collection_mutation_attribute(expr::tuple_constructor{selectors}))
|
||||
contains_ttl(expr::tuple_constructor{selectors}))
|
||||
, _selectors(std::move(selectors))
|
||||
{
|
||||
auto agg_split = expr::split_aggregation(_selectors);
|
||||
@@ -402,7 +391,6 @@ protected:
|
||||
.static_and_regular_timestamps = rs._timestamps,
|
||||
.static_and_regular_ttls = rs._ttls,
|
||||
.temporaries = {},
|
||||
.collection_element_metadata = rs._collection_element_metadata,
|
||||
};
|
||||
for (auto&& e : _sel._selectors) {
|
||||
auto out = expr::evaluate(e, inputs);
|
||||
@@ -441,7 +429,6 @@ protected:
|
||||
.static_and_regular_timestamps = rs._timestamps,
|
||||
.static_and_regular_ttls = rs._ttls,
|
||||
.temporaries = _temporaries,
|
||||
.collection_element_metadata = rs._collection_element_metadata,
|
||||
};
|
||||
for (size_t i = 0; i != _sel._inner_loop.size(); ++i) {
|
||||
_temporaries[i] = expr::evaluate(_sel._inner_loop[i], inputs);
|
||||
@@ -566,9 +553,6 @@ result_set_builder::result_set_builder(const selection& s, gc_clock::time_point
|
||||
if (s._collect_TTLs) {
|
||||
_ttls.resize(s._columns.size(), 0);
|
||||
}
|
||||
if (s._collect_collection_timestamps) {
|
||||
_collection_element_metadata.resize(s._columns.size());
|
||||
}
|
||||
}
|
||||
|
||||
void result_set_builder::add_empty() {
|
||||
@@ -579,9 +563,6 @@ void result_set_builder::add_empty() {
|
||||
if (!_ttls.empty()) {
|
||||
_ttls[current.size() - 1] = -1;
|
||||
}
|
||||
if (!_collection_element_metadata.empty()) {
|
||||
_collection_element_metadata[current.size() - 1] = {};
|
||||
}
|
||||
}
|
||||
|
||||
void result_set_builder::add(bytes_opt value) {
|
||||
@@ -604,45 +585,8 @@ void result_set_builder::add(const column_definition& def, const query::result_a
|
||||
}
|
||||
|
||||
void result_set_builder::add_collection(const column_definition& def, bytes_view c) {
|
||||
size_t col_idx = current.size();
|
||||
if (!_collection_element_metadata.empty()) {
|
||||
// Extended format produced by serialize_for_cql_with_timestamps()
|
||||
// [uint32 cql_len][cql bytes][int32 entry_count]
|
||||
// followed by entry_count entries, each:
|
||||
// [int32 key_len][key bytes][int64 timestamp][int64 expiry_raw]
|
||||
// where expiry_raw is -1 if the element does not expire, otherwise
|
||||
// it is the serialized gc_clock time used to derive the remaining
|
||||
// TTL. The flag _collect_collection_timestamps = true determines
|
||||
// whether this extended format is used (instead of a plain CQL
|
||||
// collection blob), and it is only enabled when a feature flag
|
||||
// guarantees both reader and writer support it.
|
||||
uint32_t cql_len = read_simple<uint32_t>(c);
|
||||
bytes_view cql_bytes = read_simple_bytes(c, cql_len);
|
||||
current.emplace_back(to_bytes(cql_bytes));
|
||||
|
||||
auto& meta = _collection_element_metadata[col_idx];
|
||||
meta = {}; // clear stale data from previous row
|
||||
int32_t entry_count = read_simple<int32_t>(c);
|
||||
for (int32_t i = 0; i < entry_count; ++i) {
|
||||
int32_t key_len = read_simple<int32_t>(c);
|
||||
bytes key = to_bytes(read_simple_bytes(c, key_len));
|
||||
int64_t ts = read_simple<int64_t>(c);
|
||||
int64_t expiry_raw = read_simple<int64_t>(c);
|
||||
meta.timestamps[key] = ts;
|
||||
if (expiry_raw != -1) {
|
||||
auto expiry = gc_clock::time_point(gc_clock::duration(expiry_raw));
|
||||
auto ttl_left = expiry - _now;
|
||||
int32_t ttl = int32_t(ttl_left.count());
|
||||
if (ttl > 0) {
|
||||
meta.ttls[key] = ttl;
|
||||
}
|
||||
// otherwise, expired or no TTL; We can omit this key from
|
||||
// map - missing key is treated as null by the evaluator.
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
current.emplace_back(to_bytes(c));
|
||||
// timestamps, ttls meaningless for collections
|
||||
}
|
||||
|
||||
void result_set_builder::update_last_group() {
|
||||
|
||||
@@ -12,7 +12,6 @@
|
||||
|
||||
#include "utils/assert.hh"
|
||||
#include "bytes.hh"
|
||||
#include "cql3/expr/collection_cell_metadata.hh"
|
||||
#include "schema/schema_fwd.hh"
|
||||
#include "query/query-result-reader.hh"
|
||||
#include "selector.hh"
|
||||
@@ -70,7 +69,6 @@ private:
|
||||
::shared_ptr<metadata> _metadata;
|
||||
const bool _collect_timestamps;
|
||||
const bool _collect_TTLs;
|
||||
const bool _collect_collection_timestamps;
|
||||
const bool _contains_static_columns;
|
||||
bool _is_trivial;
|
||||
protected:
|
||||
@@ -80,9 +78,7 @@ protected:
|
||||
std::vector<const column_definition*> columns,
|
||||
std::vector<lw_shared_ptr<column_specification>> metadata_,
|
||||
bool collect_timestamps,
|
||||
bool collect_TTLs,
|
||||
bool collect_collection_timestamps,
|
||||
trivial is_trivial = trivial::no);
|
||||
bool collect_TTLs, trivial is_trivial = trivial::no);
|
||||
|
||||
virtual ~selection() {}
|
||||
public:
|
||||
@@ -201,7 +197,6 @@ public:
|
||||
std::vector<bytes> current_clustering_key;
|
||||
std::vector<api::timestamp_type> _timestamps;
|
||||
std::vector<int32_t> _ttls;
|
||||
std::vector<cql3::expr::collection_cell_metadata> _collection_element_metadata;
|
||||
const query_options* _options;
|
||||
private:
|
||||
const gc_clock::time_point _now;
|
||||
|
||||
@@ -27,7 +27,6 @@
|
||||
#include "data_dictionary/data_dictionary.hh"
|
||||
#include "data_dictionary/keyspace_metadata.hh"
|
||||
#include "cql3/query_processor.hh"
|
||||
#include "cql3/cql_config.hh"
|
||||
#include "cql3/statements/ks_prop_defs.hh"
|
||||
#include "create_keyspace_statement.hh"
|
||||
#include "gms/feature_service.hh"
|
||||
@@ -261,14 +260,14 @@ cql3::statements::alter_keyspace_statement::prepare_schema_mutations(query_proce
|
||||
}
|
||||
|
||||
std::unique_ptr<cql3::statements::prepared_statement>
|
||||
cql3::statements::alter_keyspace_statement::prepare(data_dictionary::database db, cql_stats& stats, const cql_config& cfg) {
|
||||
cql3::statements::alter_keyspace_statement::prepare(data_dictionary::database db, cql_stats& stats) {
|
||||
return std::make_unique<prepared_statement>(audit_info(), make_shared<alter_keyspace_statement>(*this));
|
||||
}
|
||||
|
||||
|
||||
future<::shared_ptr<cql_transport::messages::result_message>>
|
||||
cql3::statements::alter_keyspace_statement::execute(query_processor& qp, service::query_state& state, const query_options& options, std::optional<service::group0_guard> guard) const {
|
||||
std::vector<sstring> warnings = check_against_restricted_replication_strategies(qp, keyspace(), *_attrs, qp.get_cql_stats(), qp.get_cql_config().replication_restrictions);
|
||||
std::vector<sstring> warnings = check_against_restricted_replication_strategies(qp, keyspace(), *_attrs, qp.get_cql_stats());
|
||||
return schema_altering_statement::execute(qp, state, options, std::move(guard)).then([warnings = std::move(warnings)] (::shared_ptr<messages::result_message> msg) {
|
||||
for (const auto& warning : warnings) {
|
||||
msg->add_warning(warning);
|
||||
|
||||
@@ -37,7 +37,7 @@ public:
|
||||
future<> check_access(query_processor& qp, const service::client_state& state) const override;
|
||||
void validate(query_processor& qp, const service::client_state& state) const override;
|
||||
virtual future<std::tuple<::shared_ptr<event_t>, cql3::cql_warnings_vec>> prepare_schema_mutations(query_processor& qp, service::query_state& state, const query_options& options, service::group0_batch& mc) const override;
|
||||
virtual std::unique_ptr<prepared_statement> prepare(data_dictionary::database db, cql_stats& stats, const cql_config& cfg) override;
|
||||
virtual std::unique_ptr<prepared_statement> prepare(data_dictionary::database db, cql_stats& stats) override;
|
||||
virtual future<::shared_ptr<messages::result_message>> execute(query_processor& qp, service::query_state& state, const query_options& options, std::optional<service::group0_guard> guard) const override;
|
||||
bool changes_tablets(query_processor& qp) const;
|
||||
};
|
||||
|
||||
@@ -33,7 +33,7 @@ public:
|
||||
, _options(std::move(options)) {
|
||||
}
|
||||
|
||||
std::unique_ptr<prepared_statement> prepare(data_dictionary::database db, cql_stats& stats, const cql_config& cfg) override;
|
||||
std::unique_ptr<prepared_statement> prepare(data_dictionary::database db, cql_stats& stats) override;
|
||||
|
||||
virtual future<> check_access(query_processor& qp, const service::client_state&) const override;
|
||||
|
||||
|
||||
@@ -25,7 +25,7 @@ alter_service_level_statement::alter_service_level_statement(sstring service_lev
|
||||
|
||||
std::unique_ptr<cql3::statements::prepared_statement>
|
||||
cql3::statements::alter_service_level_statement::prepare(
|
||||
data_dictionary::database db, cql_stats &stats, const cql_config& cfg) {
|
||||
data_dictionary::database db, cql_stats &stats) {
|
||||
return std::make_unique<prepared_statement>(audit_info(), ::make_shared<alter_service_level_statement>(*this));
|
||||
}
|
||||
|
||||
|
||||
@@ -23,7 +23,7 @@ class alter_service_level_statement final : public service_level_statement {
|
||||
|
||||
public:
|
||||
alter_service_level_statement(sstring service_level, shared_ptr<sl_prop_defs> attrs);
|
||||
std::unique_ptr<cql3::statements::prepared_statement> prepare(data_dictionary::database db, cql_stats &stats, const cql_config& cfg) override;
|
||||
std::unique_ptr<cql3::statements::prepared_statement> prepare(data_dictionary::database db, cql_stats &stats) override;
|
||||
virtual future<> check_access(query_processor& qp, const service::client_state&) const override;
|
||||
virtual future<::shared_ptr<cql_transport::messages::result_message>>
|
||||
execute(query_processor&, service::query_state&, const query_options&, std::optional<service::group0_guard> guard) const override;
|
||||
|
||||
@@ -14,7 +14,6 @@
|
||||
#include "utils/assert.hh"
|
||||
#include <seastar/core/coroutine.hh>
|
||||
#include "cql3/query_options.hh"
|
||||
#include "cql3/cql_config.hh"
|
||||
#include "cql3/statements/alter_table_statement.hh"
|
||||
#include "cql3/statements/alter_type_statement.hh"
|
||||
#include "exceptions/exceptions.hh"
|
||||
@@ -561,7 +560,7 @@ alter_table_statement::prepare_schema_mutations(query_processor& qp, const query
|
||||
}
|
||||
|
||||
std::unique_ptr<cql3::statements::prepared_statement>
|
||||
cql3::statements::alter_table_statement::prepare(data_dictionary::database db, cql_stats& stats, const cql_config& cfg) {
|
||||
cql3::statements::alter_table_statement::prepare(data_dictionary::database db, cql_stats& stats) {
|
||||
// Cannot happen; alter_table_statement is never instantiated as a raw statement
|
||||
// (instead we instantiate alter_table_statement::raw_statement)
|
||||
utils::on_internal_error("alter_table_statement cannot be prepared. Use alter_table_statement::raw_statement instead");
|
||||
@@ -590,10 +589,10 @@ alter_table_statement::raw_statement::raw_statement(cf_name name,
|
||||
{}
|
||||
|
||||
std::unique_ptr<cql3::statements::prepared_statement>
|
||||
alter_table_statement::raw_statement::prepare(data_dictionary::database db, cql_stats& stats, const cql_config& cfg) {
|
||||
alter_table_statement::raw_statement::prepare(data_dictionary::database db, cql_stats& stats) {
|
||||
auto t = db.try_find_table(keyspace(), column_family());
|
||||
std::optional<schema_ptr> s = t ? std::make_optional(t->schema()) : std::nullopt;
|
||||
std::optional<sstring> warning = check_restricted_table_properties(s, keyspace(), column_family(), *_properties, cfg.twcs_restrictions);
|
||||
std::optional<sstring> warning = check_restricted_table_properties(db, s, keyspace(), column_family(), *_properties);
|
||||
if (warning) {
|
||||
// FIXME: should this warning be returned to the caller?
|
||||
// See https://github.com/scylladb/scylladb/issues/20945
|
||||
|
||||
@@ -64,7 +64,7 @@ public:
|
||||
|
||||
virtual uint32_t get_bound_terms() const override;
|
||||
virtual future<> check_access(query_processor& qp, const service::client_state& state) const override;
|
||||
virtual std::unique_ptr<prepared_statement> prepare(data_dictionary::database db, cql_stats& stats, const cql_config& cfg) override;
|
||||
virtual std::unique_ptr<prepared_statement> prepare(data_dictionary::database db, cql_stats& stats) override;
|
||||
virtual future<::shared_ptr<messages::result_message>> execute(query_processor& qp, service::query_state& state, const query_options& options, std::optional<service::group0_guard> guard) const override;
|
||||
|
||||
future<std::tuple<::shared_ptr<cql_transport::event::schema_change>, utils::chunked_vector<mutation>, cql3::cql_warnings_vec>> prepare_schema_mutations(query_processor& qp, const query_options& options, api::timestamp_type) const override;
|
||||
@@ -92,7 +92,7 @@ public:
|
||||
std::unique_ptr<attributes::raw> attrs,
|
||||
shared_ptr<column_identifier::raw> ttl_change);
|
||||
|
||||
virtual std::unique_ptr<prepared_statement> prepare(data_dictionary::database db, cql_stats& stats, const cql_config& cfg) override;
|
||||
virtual std::unique_ptr<prepared_statement> prepare(data_dictionary::database db, cql_stats& stats) override;
|
||||
|
||||
virtual audit::statement_category category() const override { return audit::statement_category::DDL; }
|
||||
};
|
||||
|
||||
@@ -209,12 +209,12 @@ user_type alter_type_statement::renames::make_updated_type(data_dictionary::data
|
||||
}
|
||||
|
||||
std::unique_ptr<cql3::statements::prepared_statement>
|
||||
alter_type_statement::add_or_alter::prepare(data_dictionary::database db, cql_stats& stats, const cql_config& cfg) {
|
||||
alter_type_statement::add_or_alter::prepare(data_dictionary::database db, cql_stats& stats) {
|
||||
return std::make_unique<prepared_statement>(audit_info(), make_shared<alter_type_statement::add_or_alter>(*this));
|
||||
}
|
||||
|
||||
std::unique_ptr<cql3::statements::prepared_statement>
|
||||
alter_type_statement::renames::prepare(data_dictionary::database db, cql_stats& stats, const cql_config& cfg) {
|
||||
alter_type_statement::renames::prepare(data_dictionary::database db, cql_stats& stats) {
|
||||
return std::make_unique<prepared_statement>(audit_info(), make_shared<alter_type_statement::renames>(*this));
|
||||
}
|
||||
|
||||
|
||||
@@ -54,7 +54,7 @@ public:
|
||||
const shared_ptr<column_identifier> field_name,
|
||||
const shared_ptr<cql3_type::raw> field_type);
|
||||
virtual user_type make_updated_type(data_dictionary::database db, user_type to_update) const override;
|
||||
virtual std::unique_ptr<prepared_statement> prepare(data_dictionary::database db, cql_stats& stats, const cql_config& cfg) override;
|
||||
virtual std::unique_ptr<prepared_statement> prepare(data_dictionary::database db, cql_stats& stats) override;
|
||||
private:
|
||||
user_type do_add(data_dictionary::database db, user_type to_update) const;
|
||||
user_type do_alter(data_dictionary::database db, user_type to_update) const;
|
||||
@@ -71,7 +71,7 @@ public:
|
||||
void add_rename(shared_ptr<column_identifier> previous_name, shared_ptr<column_identifier> new_name);
|
||||
|
||||
virtual user_type make_updated_type(data_dictionary::database db, user_type to_update) const override;
|
||||
virtual std::unique_ptr<prepared_statement> prepare(data_dictionary::database db, cql_stats& stats, const cql_config& cfg) override;
|
||||
virtual std::unique_ptr<prepared_statement> prepare(data_dictionary::database db, cql_stats& stats) override;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
@@ -98,7 +98,7 @@ future<std::tuple<::shared_ptr<cql_transport::event::schema_change>, utils::chun
|
||||
}
|
||||
|
||||
std::unique_ptr<cql3::statements::prepared_statement>
|
||||
alter_view_statement::prepare(data_dictionary::database db, cql_stats& stats, const cql_config& cfg) {
|
||||
alter_view_statement::prepare(data_dictionary::database db, cql_stats& stats) {
|
||||
return std::make_unique<prepared_statement>(audit_info(), make_shared<alter_view_statement>(*this));
|
||||
}
|
||||
|
||||
|
||||
@@ -35,7 +35,7 @@ public:
|
||||
|
||||
future<std::tuple<::shared_ptr<cql_transport::event::schema_change>, utils::chunked_vector<mutation>, cql3::cql_warnings_vec>> prepare_schema_mutations(query_processor& qp, const query_options& options, api::timestamp_type) const override;
|
||||
|
||||
virtual std::unique_ptr<prepared_statement> prepare(data_dictionary::database db, cql_stats& stats, const cql_config& cfg) override;
|
||||
virtual std::unique_ptr<prepared_statement> prepare(data_dictionary::database db, cql_stats& stats) override;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
@@ -30,7 +30,7 @@ bool attach_service_level_statement::needs_guard(query_processor& qp, service::q
|
||||
|
||||
std::unique_ptr<cql3::statements::prepared_statement>
|
||||
cql3::statements::attach_service_level_statement::prepare(
|
||||
data_dictionary::database db, cql_stats &stats, const cql_config& cfg) {
|
||||
data_dictionary::database db, cql_stats &stats) {
|
||||
return std::make_unique<prepared_statement>(audit_info(), ::make_shared<attach_service_level_statement>(*this));
|
||||
}
|
||||
|
||||
|
||||
@@ -22,7 +22,7 @@ class attach_service_level_statement final : public service_level_statement {
|
||||
public:
|
||||
attach_service_level_statement(sstring service_level, sstring role_name);
|
||||
virtual bool needs_guard(query_processor& qp, service::query_state&) const override;
|
||||
std::unique_ptr<cql3::statements::prepared_statement> prepare(data_dictionary::database db, cql_stats &stats, const cql_config& cfg) override;
|
||||
std::unique_ptr<cql3::statements::prepared_statement> prepare(data_dictionary::database db, cql_stats &stats) override;
|
||||
virtual future<> check_access(query_processor& qp, const service::client_state&) const override;
|
||||
virtual future<::shared_ptr<cql_transport::messages::result_message>>
|
||||
execute(query_processor&, service::query_state&, const query_options&, std::optional<service::group0_guard> guard) const override;
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user