mirror of
https://github.com/scylladb/scylladb.git
synced 2026-04-30 13:17:01 +00:00
Compare commits
350 Commits
debug_form
...
next-5.4
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
ec9e5b82a0 | ||
|
|
4640b3efd3 | ||
|
|
136ccff353 | ||
|
|
29c352d9c8 | ||
|
|
888d5fe1a3 | ||
|
|
6e8911ed51 | ||
|
|
9c4fa2652c | ||
|
|
58377036b0 | ||
|
|
5b29da123f | ||
|
|
92ee525f22 | ||
|
|
bc1c6275a4 | ||
|
|
79629a80cd | ||
|
|
9f0b75bcd2 | ||
|
|
0fbec200e9 | ||
|
|
972b799773 | ||
|
|
68c581314a | ||
|
|
380ce9a6d8 | ||
|
|
2c01dfe12b | ||
|
|
ab22cb7253 | ||
|
|
0e02128d28 | ||
|
|
1e548770cf | ||
|
|
3e879c1bfa | ||
|
|
5e9a2193db | ||
|
|
c2e5d9e726 | ||
|
|
80ff0688b1 | ||
|
|
a319085870 | ||
|
|
b24bd4d176 | ||
|
|
f628e7439c | ||
|
|
42da43b5b4 | ||
|
|
679fa0f72a | ||
|
|
89733a1f18 | ||
|
|
f185a227a2 | ||
|
|
89fd08b955 | ||
|
|
f29d51d9c3 | ||
|
|
01d5169593 | ||
|
|
3116ea7d8e | ||
|
|
36ccf67bee | ||
|
|
bae47ca197 | ||
|
|
fdcbbb85ad | ||
|
|
65daae0fbe | ||
|
|
f07fbcf929 | ||
|
|
1d0a6672d6 | ||
|
|
e16327034c | ||
|
|
3510ff3179 | ||
|
|
e9b8d08b74 | ||
|
|
d67af8da1c | ||
|
|
66fc7c0494 | ||
|
|
67be26ff7d | ||
|
|
97893a4f6d | ||
|
|
ab9683d182 | ||
|
|
892ffa966d | ||
|
|
4b6e462266 | ||
|
|
ce22d0071b | ||
|
|
a5d34b62ac | ||
|
|
f121720898 | ||
|
|
d217ab9cc7 | ||
|
|
6eac67628e | ||
|
|
a28b38d0a9 | ||
|
|
614cabf9cd | ||
|
|
193fda6dfb | ||
|
|
50f3f3d1a3 | ||
|
|
9ce5c2e6ce | ||
|
|
2f2bc18376 | ||
|
|
c795275675 | ||
|
|
b13cee4c7c | ||
|
|
bea1f4891d | ||
|
|
9535abf552 | ||
|
|
abb4751e00 | ||
|
|
39ec136a09 | ||
|
|
b7ef9652fb | ||
|
|
d3b2702be1 | ||
|
|
4ef4893f7e | ||
|
|
19999554e7 | ||
|
|
43f77c71c7 | ||
|
|
4aa0b84ba7 | ||
|
|
427127de57 | ||
|
|
d7b1116170 | ||
|
|
72155312e5 | ||
|
|
f8dcbc6037 | ||
|
|
dc1968cb9e | ||
|
|
7e40b658c8 | ||
|
|
58671274d8 | ||
|
|
c18f14cd78 | ||
|
|
c19f980802 | ||
|
|
0abccd212d | ||
|
|
b8d0df24ed | ||
|
|
b3de65a8fb | ||
|
|
3eb15e841a | ||
|
|
017524c7d8 | ||
|
|
1680bc2902 | ||
|
|
2e836fa077 | ||
|
|
98139a8716 | ||
|
|
ee942874de | ||
|
|
4099833587 | ||
|
|
27802511c0 | ||
|
|
30ffce4c79 | ||
|
|
9869276192 | ||
|
|
53a9dfba3a | ||
|
|
0d4e22ef55 | ||
|
|
ae6c8753e6 | ||
|
|
36c66d5a8f | ||
|
|
9413afce41 | ||
|
|
8e20379305 | ||
|
|
d32550f953 | ||
|
|
4e9ed69a75 | ||
|
|
7552c4b187 | ||
|
|
87dcd29ec3 | ||
|
|
9e7cd767dd | ||
|
|
63d1c763fc | ||
|
|
29c892ea5a | ||
|
|
9bb175852d | ||
|
|
daf4ffb9b4 | ||
|
|
03a54a4c07 | ||
|
|
4b0c60cdc3 | ||
|
|
28d0fc1b6b | ||
|
|
393880f355 | ||
|
|
e30a2af700 | ||
|
|
e0b4483bb8 | ||
|
|
8a6300be4c | ||
|
|
d8e0cba45d | ||
|
|
3933fc25de | ||
|
|
a741202ef0 | ||
|
|
0529055b1e | ||
|
|
81c0829993 | ||
|
|
1979cde07a | ||
|
|
7e2e8ddda6 | ||
|
|
986516fa4e | ||
|
|
020c7b662b | ||
|
|
e008060f39 | ||
|
|
7589981898 | ||
|
|
ed89deab40 | ||
|
|
905b8f59bd | ||
|
|
862e2affe0 | ||
|
|
d68d765247 | ||
|
|
d8df02f490 | ||
|
|
d85d37921a | ||
|
|
2d4825835c | ||
|
|
201d990072 | ||
|
|
678948e671 | ||
|
|
8acedb9255 | ||
|
|
3ed0826292 | ||
|
|
154fcffbc7 | ||
|
|
7e946925c3 | ||
|
|
d09e2a2311 | ||
|
|
6462a2f391 | ||
|
|
2e19e7cb6d | ||
|
|
fef7498da2 | ||
|
|
f45c878149 | ||
|
|
7225410af7 | ||
|
|
564b01fda9 | ||
|
|
1d80427888 | ||
|
|
ff17ec81e4 | ||
|
|
121f2a530e | ||
|
|
b77581c84c | ||
|
|
ea176bf4ce | ||
|
|
3835fd681d | ||
|
|
14da273c4c | ||
|
|
33d5f27244 | ||
|
|
c4515a9b99 | ||
|
|
10f137e367 | ||
|
|
53e1ed0ebb | ||
|
|
1aedc7372d | ||
|
|
28781ca37e | ||
|
|
9218bbb9b9 | ||
|
|
8faef135cb | ||
|
|
75962d3e94 | ||
|
|
034304127c | ||
|
|
95068d3c00 | ||
|
|
3dca49c524 | ||
|
|
505a0714a6 | ||
|
|
b8bd650e32 | ||
|
|
fd0b083414 | ||
|
|
1609b77b45 | ||
|
|
aec4d157da | ||
|
|
80d7bf7366 | ||
|
|
0dc50ac449 | ||
|
|
e9d38b3c9e | ||
|
|
bfc4104eb9 | ||
|
|
c1c1fde90f | ||
|
|
94a551e671 | ||
|
|
d6c7a26419 | ||
|
|
7e05a54b9c | ||
|
|
2fa581d8fb | ||
|
|
892c97295b | ||
|
|
5f6b1dc5b9 | ||
|
|
f1d547e74e | ||
|
|
3df5de60a9 | ||
|
|
fd7d57b9fa | ||
|
|
8a6c543033 | ||
|
|
27beb0fe60 | ||
|
|
e868ade258 | ||
|
|
57fa61e2ca | ||
|
|
6fc3c62223 | ||
|
|
8414aa292a | ||
|
|
881ac7a9af | ||
|
|
24db04dbe4 | ||
|
|
9fcb7baa9e | ||
|
|
8a1f01ad88 | ||
|
|
db1c8e8754 | ||
|
|
6e01e821d7 | ||
|
|
02182caff4 | ||
|
|
42d9e36454 | ||
|
|
05d2078911 | ||
|
|
7908f69b7c | ||
|
|
a762ab6283 | ||
|
|
9aa39850fd | ||
|
|
b51abf1853 | ||
|
|
111264e3b1 | ||
|
|
58a1be93b2 | ||
|
|
6a6450a82d | ||
|
|
75805a7f23 | ||
|
|
f843e7181b | ||
|
|
c76fa47cc4 | ||
|
|
2caef424fe | ||
|
|
5e665cd7fb | ||
|
|
b770be8f78 | ||
|
|
b5ff9a2bf8 | ||
|
|
0da3772d50 | ||
|
|
72e804306c | ||
|
|
384a0628b0 | ||
|
|
435000ee70 | ||
|
|
e691604823 | ||
|
|
46098c5a0e | ||
|
|
b2fe98bfc6 | ||
|
|
e4526449a1 | ||
|
|
c44bb1544d | ||
|
|
fcfcd6d35a | ||
|
|
cf42ca0c2a | ||
|
|
62d8c7274a | ||
|
|
8080c15d7a | ||
|
|
8398f361cd | ||
|
|
dba6070794 | ||
|
|
0a6a52e08c | ||
|
|
25c0510015 | ||
|
|
311e31b36f | ||
|
|
6a6a4fde79 | ||
|
|
390414c99e | ||
|
|
26b812067b | ||
|
|
e83c4cc75c | ||
|
|
df1843311a | ||
|
|
fcaae2ea78 | ||
|
|
a1b6edd5d3 | ||
|
|
6c625e8cd3 | ||
|
|
10df72ed04 | ||
|
|
d4788406d4 | ||
|
|
081a36e34f | ||
|
|
c0c7de8fd1 | ||
|
|
aee9947f6c | ||
|
|
6fdfec5282 | ||
|
|
50a5c5379a | ||
|
|
938b993331 | ||
|
|
7971abb8e3 | ||
|
|
65fb562ae3 | ||
|
|
97a9f1dc7b | ||
|
|
7f629df6fd | ||
|
|
3ff8051532 | ||
|
|
e5dcef32ef | ||
|
|
199cfd0784 | ||
|
|
5d88e997ef | ||
|
|
7bb6386c14 | ||
|
|
993e6997c0 | ||
|
|
8b487be054 | ||
|
|
346e883dfc | ||
|
|
ceffbdf832 | ||
|
|
da6a87057f | ||
|
|
ffb580df71 | ||
|
|
a983c009cb | ||
|
|
00f04e0f94 | ||
|
|
0ebcc21193 | ||
|
|
ff596f9d9d | ||
|
|
e3153dd5b0 | ||
|
|
f126ccb2e9 | ||
|
|
d8586fd101 | ||
|
|
a228d09017 | ||
|
|
c5f2095f6e | ||
|
|
3d22f42cf9 | ||
|
|
8ca5794756 | ||
|
|
abeeefb427 | ||
|
|
9c482ff262 | ||
|
|
bfc98d1909 | ||
|
|
2cef52aeaa | ||
|
|
a55561fc64 | ||
|
|
7288bdfe09 | ||
|
|
ac7ed6857a | ||
|
|
bc8ff68cf6 | ||
|
|
0974ef893e | ||
|
|
9fc4c265a5 | ||
|
|
0518e47daf | ||
|
|
1e8eb6172a | ||
|
|
14814c972e | ||
|
|
7a67db594a | ||
|
|
5434fcb5a8 | ||
|
|
b4ef2248cc | ||
|
|
21996e12ae | ||
|
|
df7b96a092 | ||
|
|
5df85094d9 | ||
|
|
a0ca8900e1 | ||
|
|
98bd287177 | ||
|
|
c4a249022f | ||
|
|
58a89e7a42 | ||
|
|
1a0424db01 | ||
|
|
6d7919041b | ||
|
|
17c15f6222 | ||
|
|
91d1c9153b | ||
|
|
95364e2454 | ||
|
|
6d779f58a9 | ||
|
|
b956646ba2 | ||
|
|
62b93018ac | ||
|
|
b0410c9391 | ||
|
|
6f073dfa54 | ||
|
|
a24b53e6bb | ||
|
|
219adcea71 | ||
|
|
6d01d01deb | ||
|
|
b1f54efc2d | ||
|
|
bc1202aab2 | ||
|
|
2cb709461c | ||
|
|
44c72f6e56 | ||
|
|
6943447c6a | ||
|
|
b259bb43c6 | ||
|
|
88e96def63 | ||
|
|
187e275147 | ||
|
|
7926e4e7eb | ||
|
|
23e4762baa | ||
|
|
1dad9cdfdf | ||
|
|
1bee785734 | ||
|
|
df61c2c2ce | ||
|
|
20b5896b7a | ||
|
|
eff8157cea | ||
|
|
043dd5cc12 | ||
|
|
3f66f18f85 | ||
|
|
9c7454993f | ||
|
|
7c38cd9359 | ||
|
|
66898b2144 | ||
|
|
9e33771e1b | ||
|
|
b25859d6de | ||
|
|
38a3fd4708 | ||
|
|
66be0fc1eb | ||
|
|
4345b26eb2 | ||
|
|
c4e8557afa | ||
|
|
6d91d560ec | ||
|
|
9c37f5e02f | ||
|
|
ed7b3e2325 | ||
|
|
1100a0b176 | ||
|
|
2aa29763af | ||
|
|
24efacf90d | ||
|
|
1639a468df | ||
|
|
a0766ac236 | ||
|
|
fa0f382a82 | ||
|
|
37fd8a4c36 | ||
|
|
83f7d0073a |
87
.github/scripts/label_promoted_commits.py
vendored
Executable file
87
.github/scripts/label_promoted_commits.py
vendored
Executable file
@@ -0,0 +1,87 @@
|
|||||||
|
from github import Github
|
||||||
|
import argparse
|
||||||
|
import re
|
||||||
|
import sys
|
||||||
|
import os
|
||||||
|
|
||||||
|
try:
|
||||||
|
github_token = os.environ["GITHUB_TOKEN"]
|
||||||
|
except KeyError:
|
||||||
|
print("Please set the 'GITHUB_TOKEN' environment variable")
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
|
||||||
|
def parser():
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
parser.add_argument('--repository', type=str, required=True,
|
||||||
|
help='Github repository name (e.g., scylladb/scylladb)')
|
||||||
|
parser.add_argument('--commit_before_merge', type=str, required=True, help='Git commit ID to start labeling from ('
|
||||||
|
'newest commit).')
|
||||||
|
parser.add_argument('--commit_after_merge', type=str, required=True,
|
||||||
|
help='Git commit ID to end labeling at (oldest '
|
||||||
|
'commit, exclusive).')
|
||||||
|
parser.add_argument('--update_issue', type=bool, default=False, help='Set True to update issues when backport was '
|
||||||
|
'done')
|
||||||
|
parser.add_argument('--ref', type=str, required=True, help='PR target branch')
|
||||||
|
return parser.parse_args()
|
||||||
|
|
||||||
|
|
||||||
|
def add_comment_and_close_pr(pr, comment):
|
||||||
|
if pr.state == 'open':
|
||||||
|
pr.create_issue_comment(comment)
|
||||||
|
pr.edit(state="closed")
|
||||||
|
|
||||||
|
|
||||||
|
def mark_backport_done(repo, ref_pr_number, branch):
|
||||||
|
pr = repo.get_pull(int(ref_pr_number))
|
||||||
|
label_to_remove = f'backport/{branch}'
|
||||||
|
label_to_add = f'{label_to_remove}-done'
|
||||||
|
current_labels = [label.name for label in pr.get_labels()]
|
||||||
|
if label_to_remove in current_labels:
|
||||||
|
pr.remove_from_labels(label_to_remove)
|
||||||
|
if label_to_add not in current_labels:
|
||||||
|
pr.add_to_labels(label_to_add)
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
# This script is triggered by a push event to either the master branch or a branch named branch-x.y (where x and y represent version numbers). Based on the pushed branch, the script performs the following actions:
|
||||||
|
# - When ref branch is `master`, it will add the `promoted-to-master` label, which we need later for the auto backport process
|
||||||
|
# - When ref branch is `branch-x.y` (which means we backported a patch), it will replace in the original PR the `backport/x.y` label with `backport/x.y-done` and will close the backport PR (Since GitHub close only the one referring to default branch)
|
||||||
|
args = parser()
|
||||||
|
pr_pattern = re.compile(r'Closes .*#([0-9]+)')
|
||||||
|
target_branch = re.search(r'branch-(\d+\.\d+)', args.ref)
|
||||||
|
g = Github(github_token)
|
||||||
|
repo = g.get_repo(args.repository, lazy=False)
|
||||||
|
commits = repo.compare(head=args.commit_after_merge, base=args.commit_before_merge)
|
||||||
|
processed_prs = set()
|
||||||
|
# Print commit information
|
||||||
|
for commit in commits.commits:
|
||||||
|
print(f'Commit sha is: {commit.sha}')
|
||||||
|
match = pr_pattern.search(commit.commit.message)
|
||||||
|
if match:
|
||||||
|
pr_number = int(match.group(1))
|
||||||
|
if pr_number in processed_prs:
|
||||||
|
continue
|
||||||
|
if target_branch:
|
||||||
|
pr = repo.get_pull(pr_number)
|
||||||
|
branch_name = target_branch[1]
|
||||||
|
refs_pr = re.findall(r'Refs (?:#|https.*?)(\d+)', pr.body)
|
||||||
|
if refs_pr:
|
||||||
|
print(f'branch-{target_branch.group(1)}, pr number is: {pr_number}')
|
||||||
|
# 1. change the backport label of the parent PR to note that
|
||||||
|
# we've merge the corresponding backport PR
|
||||||
|
# 2. close the backport PR and leave a comment on it to note
|
||||||
|
# that it has been merged with a certain git commit,
|
||||||
|
ref_pr_number = refs_pr[0]
|
||||||
|
mark_backport_done(repo, ref_pr_number, branch_name)
|
||||||
|
comment = f'Closed via {commit.sha}'
|
||||||
|
add_comment_and_close_pr(pr, comment)
|
||||||
|
else:
|
||||||
|
print(f'master branch, pr number is: {pr_number}')
|
||||||
|
pr = repo.get_pull(pr_number)
|
||||||
|
pr.add_to_labels('promoted-to-master')
|
||||||
|
processed_prs.add(pr_number)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
36
.github/workflows/add-label-when-promoted.yaml
vendored
Normal file
36
.github/workflows/add-label-when-promoted.yaml
vendored
Normal file
@@ -0,0 +1,36 @@
|
|||||||
|
name: Check if commits are promoted
|
||||||
|
|
||||||
|
on:
|
||||||
|
push:
|
||||||
|
branches:
|
||||||
|
- master
|
||||||
|
- branch-*.*
|
||||||
|
|
||||||
|
env:
|
||||||
|
DEFAULT_BRANCH: 'master'
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
check-commit:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
permissions:
|
||||||
|
pull-requests: write
|
||||||
|
issues: write
|
||||||
|
steps:
|
||||||
|
- name: Dump GitHub context
|
||||||
|
env:
|
||||||
|
GITHUB_CONTEXT: ${{ toJson(github) }}
|
||||||
|
run: echo "$GITHUB_CONTEXT"
|
||||||
|
- name: Checkout repository
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
with:
|
||||||
|
repository: ${{ github.repository }}
|
||||||
|
ref: ${{ env.DEFAULT_BRANCH }}
|
||||||
|
fetch-depth: 0 # Fetch all history for all tags and branches
|
||||||
|
|
||||||
|
- name: Install dependencies
|
||||||
|
run: sudo apt-get install -y python3-github
|
||||||
|
|
||||||
|
- name: Run python script
|
||||||
|
env:
|
||||||
|
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||||
|
run: python .github/scripts/label_promoted_commits.py --commit_before_merge ${{ github.event.before }} --commit_after_merge ${{ github.event.after }} --repository ${{ github.repository }} --ref ${{ github.ref }}
|
||||||
2
.gitmodules
vendored
2
.gitmodules
vendored
@@ -1,6 +1,6 @@
|
|||||||
[submodule "seastar"]
|
[submodule "seastar"]
|
||||||
path = seastar
|
path = seastar
|
||||||
url = ../seastar
|
url = ../scylla-seastar
|
||||||
ignore = dirty
|
ignore = dirty
|
||||||
[submodule "swagger-ui"]
|
[submodule "swagger-ui"]
|
||||||
path = swagger-ui
|
path = swagger-ui
|
||||||
|
|||||||
@@ -78,7 +78,7 @@ fi
|
|||||||
|
|
||||||
# Default scylla product/version tags
|
# Default scylla product/version tags
|
||||||
PRODUCT=scylla
|
PRODUCT=scylla
|
||||||
VERSION=5.4.0-dev
|
VERSION=5.4.10
|
||||||
|
|
||||||
if test -f version
|
if test -f version
|
||||||
then
|
then
|
||||||
|
|||||||
@@ -208,7 +208,10 @@ protected:
|
|||||||
sstring local_dc = topology.get_datacenter();
|
sstring local_dc = topology.get_datacenter();
|
||||||
std::unordered_set<gms::inet_address> local_dc_nodes = topology.get_datacenter_endpoints().at(local_dc);
|
std::unordered_set<gms::inet_address> local_dc_nodes = topology.get_datacenter_endpoints().at(local_dc);
|
||||||
for (auto& ip : local_dc_nodes) {
|
for (auto& ip : local_dc_nodes) {
|
||||||
if (_gossiper.is_alive(ip)) {
|
// Note that it's not enough for the node to be is_alive() - a
|
||||||
|
// node joining the cluster is also "alive" but not responsive to
|
||||||
|
// requests. We need the node to be in normal state. See #19694.
|
||||||
|
if (_gossiper.is_normal(ip)) {
|
||||||
rjson::push_back(results, rjson::from_string(ip.to_sstring()));
|
rjson::push_back(results, rjson::from_string(ip.to_sstring()));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -84,6 +84,14 @@
|
|||||||
"type":"string",
|
"type":"string",
|
||||||
"paramType":"path"
|
"paramType":"path"
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"name":"flush_memtables",
|
||||||
|
"description":"Controls flushing of memtables before compaction (true by default). Set to \"false\" to skip automatic flushing of memtables before compaction, e.g. when the table is flushed explicitly before invoking the compaction api.",
|
||||||
|
"required":false,
|
||||||
|
"allowMultiple":false,
|
||||||
|
"type":"boolean",
|
||||||
|
"paramType":"query"
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"name":"split_output",
|
"name":"split_output",
|
||||||
"description":"true if the output of the major compaction should be split in several sstables",
|
"description":"true if the output of the major compaction should be split in several sstables",
|
||||||
|
|||||||
43
api/api-doc/raft.json
Normal file
43
api/api-doc/raft.json
Normal file
@@ -0,0 +1,43 @@
|
|||||||
|
{
|
||||||
|
"apiVersion":"0.0.1",
|
||||||
|
"swaggerVersion":"1.2",
|
||||||
|
"basePath":"{{Protocol}}://{{Host}}",
|
||||||
|
"resourcePath":"/raft",
|
||||||
|
"produces":[
|
||||||
|
"application/json"
|
||||||
|
],
|
||||||
|
"apis":[
|
||||||
|
{
|
||||||
|
"path":"/raft/trigger_snapshot/{group_id}",
|
||||||
|
"operations":[
|
||||||
|
{
|
||||||
|
"method":"POST",
|
||||||
|
"summary":"Triggers snapshot creation and log truncation for the given Raft group",
|
||||||
|
"type":"string",
|
||||||
|
"nickname":"trigger_snapshot",
|
||||||
|
"produces":[
|
||||||
|
"application/json"
|
||||||
|
],
|
||||||
|
"parameters":[
|
||||||
|
{
|
||||||
|
"name":"group_id",
|
||||||
|
"description":"The ID of the group which should get snapshotted",
|
||||||
|
"required":true,
|
||||||
|
"allowMultiple":false,
|
||||||
|
"type":"string",
|
||||||
|
"paramType":"path"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name":"timeout",
|
||||||
|
"description":"Timeout in seconds after which the endpoint returns a failure. If not provided, 60s is used.",
|
||||||
|
"required":false,
|
||||||
|
"allowMultiple":false,
|
||||||
|
"type":"long",
|
||||||
|
"paramType":"query"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
@@ -701,6 +701,30 @@
|
|||||||
}
|
}
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"path":"/storage_service/compact",
|
||||||
|
"operations":[
|
||||||
|
{
|
||||||
|
"method":"POST",
|
||||||
|
"summary":"Forces major compaction in all keyspaces",
|
||||||
|
"type":"void",
|
||||||
|
"nickname":"force_compaction",
|
||||||
|
"produces":[
|
||||||
|
"application/json"
|
||||||
|
],
|
||||||
|
"parameters":[
|
||||||
|
{
|
||||||
|
"name":"flush_memtables",
|
||||||
|
"description":"Controls flushing of memtables before compaction (true by default). Set to \"false\" to skip automatic flushing of memtables before compaction, e.g. when tables were flushed explicitly before invoking the compaction api.",
|
||||||
|
"required":false,
|
||||||
|
"allowMultiple":false,
|
||||||
|
"type":"boolean",
|
||||||
|
"paramType":"query"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"path":"/storage_service/keyspace_compaction/{keyspace}",
|
"path":"/storage_service/keyspace_compaction/{keyspace}",
|
||||||
"operations":[
|
"operations":[
|
||||||
@@ -728,6 +752,14 @@
|
|||||||
"allowMultiple":false,
|
"allowMultiple":false,
|
||||||
"type":"string",
|
"type":"string",
|
||||||
"paramType":"query"
|
"paramType":"query"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name":"flush_memtables",
|
||||||
|
"description":"Controls flushing of memtables before compaction (true by default). Set to \"false\" to skip automatic flushing of memtables before compaction, e.g. when tables were flushed explicitly before invoking the compaction api.",
|
||||||
|
"required":false,
|
||||||
|
"allowMultiple":false,
|
||||||
|
"type":"boolean",
|
||||||
|
"paramType":"query"
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
@@ -912,6 +944,21 @@
|
|||||||
}
|
}
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"path":"/storage_service/flush",
|
||||||
|
"operations":[
|
||||||
|
{
|
||||||
|
"method":"POST",
|
||||||
|
"summary":"Flush all memtables in all keyspaces.",
|
||||||
|
"type":"void",
|
||||||
|
"nickname":"force_flush",
|
||||||
|
"produces":[
|
||||||
|
"application/json"
|
||||||
|
],
|
||||||
|
"parameters":[]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"path":"/storage_service/keyspace_flush/{keyspace}",
|
"path":"/storage_service/keyspace_flush/{keyspace}",
|
||||||
"operations":[
|
"operations":[
|
||||||
|
|||||||
15
api/api.cc
15
api/api.cc
@@ -31,6 +31,7 @@
|
|||||||
#include "api/config.hh"
|
#include "api/config.hh"
|
||||||
#include "task_manager.hh"
|
#include "task_manager.hh"
|
||||||
#include "task_manager_test.hh"
|
#include "task_manager_test.hh"
|
||||||
|
#include "raft.hh"
|
||||||
|
|
||||||
logging::logger apilog("api");
|
logging::logger apilog("api");
|
||||||
|
|
||||||
@@ -294,6 +295,18 @@ future<> set_server_task_manager_test(http_context& ctx) {
|
|||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
future<> set_server_raft(http_context& ctx, sharded<service::raft_group_registry>& raft_gr) {
|
||||||
|
auto rb = std::make_shared<api_registry_builder>(ctx.api_doc);
|
||||||
|
return ctx.http_server.set_routes([rb, &ctx, &raft_gr] (routes& r) {
|
||||||
|
rb->register_function(r, "raft", "The Raft API");
|
||||||
|
set_raft(ctx, r, raft_gr);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
future<> unset_server_raft(http_context& ctx) {
|
||||||
|
return ctx.http_server.set_routes([&ctx] (routes& r) { unset_raft(ctx, r); });
|
||||||
|
}
|
||||||
|
|
||||||
void req_params::process(const request& req) {
|
void req_params::process(const request& req) {
|
||||||
// Process mandatory parameters
|
// Process mandatory parameters
|
||||||
for (auto& [name, ent] : params) {
|
for (auto& [name, ent] : params) {
|
||||||
@@ -301,7 +314,7 @@ void req_params::process(const request& req) {
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
try {
|
try {
|
||||||
ent.value = req.param[name];
|
ent.value = req.get_path_param(name);
|
||||||
} catch (std::out_of_range&) {
|
} catch (std::out_of_range&) {
|
||||||
throw httpd::bad_param_exception(fmt::format("Mandatory parameter '{}' was not provided", name));
|
throw httpd::bad_param_exception(fmt::format("Mandatory parameter '{}' was not provided", name));
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -23,6 +23,7 @@ class load_meter;
|
|||||||
class storage_proxy;
|
class storage_proxy;
|
||||||
class storage_service;
|
class storage_service;
|
||||||
class raft_group0_client;
|
class raft_group0_client;
|
||||||
|
class raft_group_registry;
|
||||||
|
|
||||||
} // namespace service
|
} // namespace service
|
||||||
|
|
||||||
@@ -117,5 +118,7 @@ future<> set_server_compaction_manager(http_context& ctx);
|
|||||||
future<> set_server_done(http_context& ctx);
|
future<> set_server_done(http_context& ctx);
|
||||||
future<> set_server_task_manager(http_context& ctx, lw_shared_ptr<db::config> cfg);
|
future<> set_server_task_manager(http_context& ctx, lw_shared_ptr<db::config> cfg);
|
||||||
future<> set_server_task_manager_test(http_context& ctx);
|
future<> set_server_task_manager_test(http_context& ctx);
|
||||||
|
future<> set_server_raft(http_context&, sharded<service::raft_group_registry>&);
|
||||||
|
future<> unset_server_raft(http_context&);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -54,7 +54,7 @@ static const char* str_to_regex(const sstring& v) {
|
|||||||
void set_collectd(http_context& ctx, routes& r) {
|
void set_collectd(http_context& ctx, routes& r) {
|
||||||
cd::get_collectd.set(r, [](std::unique_ptr<request> req) {
|
cd::get_collectd.set(r, [](std::unique_ptr<request> req) {
|
||||||
|
|
||||||
auto id = ::make_shared<scollectd::type_instance_id>(req->param["pluginid"],
|
auto id = ::make_shared<scollectd::type_instance_id>(req->get_path_param("pluginid"),
|
||||||
req->get_query_param("instance"), req->get_query_param("type"),
|
req->get_query_param("instance"), req->get_query_param("type"),
|
||||||
req->get_query_param("type_instance"));
|
req->get_query_param("type_instance"));
|
||||||
|
|
||||||
@@ -91,7 +91,7 @@ void set_collectd(http_context& ctx, routes& r) {
|
|||||||
});
|
});
|
||||||
|
|
||||||
cd::enable_collectd.set(r, [](std::unique_ptr<request> req) -> future<json::json_return_type> {
|
cd::enable_collectd.set(r, [](std::unique_ptr<request> req) -> future<json::json_return_type> {
|
||||||
std::regex plugin(req->param["pluginid"].c_str());
|
std::regex plugin(req->get_path_param("pluginid").c_str());
|
||||||
std::regex instance(str_to_regex(req->get_query_param("instance")));
|
std::regex instance(str_to_regex(req->get_query_param("instance")));
|
||||||
std::regex type(str_to_regex(req->get_query_param("type")));
|
std::regex type(str_to_regex(req->get_query_param("type")));
|
||||||
std::regex type_instance(str_to_regex(req->get_query_param("type_instance")));
|
std::regex type_instance(str_to_regex(req->get_query_param("type_instance")));
|
||||||
|
|||||||
@@ -333,7 +333,7 @@ void set_column_family(http_context& ctx, routes& r, sharded<db::system_keyspace
|
|||||||
});
|
});
|
||||||
|
|
||||||
cf::get_memtable_columns_count.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
cf::get_memtable_columns_count.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
||||||
return map_reduce_cf(ctx, req->param["name"], uint64_t{0}, [](replica::column_family& cf) {
|
return map_reduce_cf(ctx, req->get_path_param("name"), uint64_t{0}, [](replica::column_family& cf) {
|
||||||
return boost::accumulate(cf.active_memtables() | boost::adaptors::transformed(std::mem_fn(&replica::memtable::partition_count)), uint64_t(0));
|
return boost::accumulate(cf.active_memtables() | boost::adaptors::transformed(std::mem_fn(&replica::memtable::partition_count)), uint64_t(0));
|
||||||
}, std::plus<>());
|
}, std::plus<>());
|
||||||
});
|
});
|
||||||
@@ -353,7 +353,7 @@ void set_column_family(http_context& ctx, routes& r, sharded<db::system_keyspace
|
|||||||
});
|
});
|
||||||
|
|
||||||
cf::get_memtable_off_heap_size.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
cf::get_memtable_off_heap_size.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
||||||
return map_reduce_cf(ctx, req->param["name"], int64_t(0), [](replica::column_family& cf) {
|
return map_reduce_cf(ctx, req->get_path_param("name"), int64_t(0), [](replica::column_family& cf) {
|
||||||
return boost::accumulate(cf.active_memtables() | boost::adaptors::transformed([] (replica::memtable* active_memtable) {
|
return boost::accumulate(cf.active_memtables() | boost::adaptors::transformed([] (replica::memtable* active_memtable) {
|
||||||
return active_memtable->region().occupancy().total_space();
|
return active_memtable->region().occupancy().total_space();
|
||||||
}), uint64_t(0));
|
}), uint64_t(0));
|
||||||
@@ -369,7 +369,7 @@ void set_column_family(http_context& ctx, routes& r, sharded<db::system_keyspace
|
|||||||
});
|
});
|
||||||
|
|
||||||
cf::get_memtable_live_data_size.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
cf::get_memtable_live_data_size.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
||||||
return map_reduce_cf(ctx, req->param["name"], int64_t(0), [](replica::column_family& cf) {
|
return map_reduce_cf(ctx, req->get_path_param("name"), int64_t(0), [](replica::column_family& cf) {
|
||||||
return boost::accumulate(cf.active_memtables() | boost::adaptors::transformed([] (replica::memtable* active_memtable) {
|
return boost::accumulate(cf.active_memtables() | boost::adaptors::transformed([] (replica::memtable* active_memtable) {
|
||||||
return active_memtable->region().occupancy().used_space();
|
return active_memtable->region().occupancy().used_space();
|
||||||
}), uint64_t(0));
|
}), uint64_t(0));
|
||||||
@@ -394,7 +394,7 @@ void set_column_family(http_context& ctx, routes& r, sharded<db::system_keyspace
|
|||||||
|
|
||||||
cf::get_cf_all_memtables_off_heap_size.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
cf::get_cf_all_memtables_off_heap_size.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
||||||
warn(unimplemented::cause::INDEXES);
|
warn(unimplemented::cause::INDEXES);
|
||||||
return map_reduce_cf(ctx, req->param["name"], int64_t(0), [](replica::column_family& cf) {
|
return map_reduce_cf(ctx, req->get_path_param("name"), int64_t(0), [](replica::column_family& cf) {
|
||||||
return cf.occupancy().total_space();
|
return cf.occupancy().total_space();
|
||||||
}, std::plus<int64_t>());
|
}, std::plus<int64_t>());
|
||||||
});
|
});
|
||||||
@@ -410,7 +410,7 @@ void set_column_family(http_context& ctx, routes& r, sharded<db::system_keyspace
|
|||||||
|
|
||||||
cf::get_cf_all_memtables_live_data_size.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
cf::get_cf_all_memtables_live_data_size.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
||||||
warn(unimplemented::cause::INDEXES);
|
warn(unimplemented::cause::INDEXES);
|
||||||
return map_reduce_cf(ctx, req->param["name"], int64_t(0), [](replica::column_family& cf) {
|
return map_reduce_cf(ctx, req->get_path_param("name"), int64_t(0), [](replica::column_family& cf) {
|
||||||
return cf.occupancy().used_space();
|
return cf.occupancy().used_space();
|
||||||
}, std::plus<int64_t>());
|
}, std::plus<int64_t>());
|
||||||
});
|
});
|
||||||
@@ -425,7 +425,7 @@ void set_column_family(http_context& ctx, routes& r, sharded<db::system_keyspace
|
|||||||
});
|
});
|
||||||
|
|
||||||
cf::get_memtable_switch_count.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
cf::get_memtable_switch_count.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
||||||
return get_cf_stats(ctx,req->param["name"] ,&replica::column_family_stats::memtable_switch_count);
|
return get_cf_stats(ctx,req->get_path_param("name") ,&replica::column_family_stats::memtable_switch_count);
|
||||||
});
|
});
|
||||||
|
|
||||||
cf::get_all_memtable_switch_count.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
cf::get_all_memtable_switch_count.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
||||||
@@ -434,7 +434,7 @@ void set_column_family(http_context& ctx, routes& r, sharded<db::system_keyspace
|
|||||||
|
|
||||||
// FIXME: this refers to partitions, not rows.
|
// FIXME: this refers to partitions, not rows.
|
||||||
cf::get_estimated_row_size_histogram.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
cf::get_estimated_row_size_histogram.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
||||||
return map_reduce_cf(ctx, req->param["name"], utils::estimated_histogram(0), [](replica::column_family& cf) {
|
return map_reduce_cf(ctx, req->get_path_param("name"), utils::estimated_histogram(0), [](replica::column_family& cf) {
|
||||||
utils::estimated_histogram res(0);
|
utils::estimated_histogram res(0);
|
||||||
for (auto sstables = cf.get_sstables(); auto& i : *sstables) {
|
for (auto sstables = cf.get_sstables(); auto& i : *sstables) {
|
||||||
res.merge(i->get_stats_metadata().estimated_partition_size);
|
res.merge(i->get_stats_metadata().estimated_partition_size);
|
||||||
@@ -446,7 +446,7 @@ void set_column_family(http_context& ctx, routes& r, sharded<db::system_keyspace
|
|||||||
|
|
||||||
// FIXME: this refers to partitions, not rows.
|
// FIXME: this refers to partitions, not rows.
|
||||||
cf::get_estimated_row_count.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
cf::get_estimated_row_count.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
||||||
return map_reduce_cf(ctx, req->param["name"], int64_t(0), [](replica::column_family& cf) {
|
return map_reduce_cf(ctx, req->get_path_param("name"), int64_t(0), [](replica::column_family& cf) {
|
||||||
uint64_t res = 0;
|
uint64_t res = 0;
|
||||||
for (auto sstables = cf.get_sstables(); auto& i : *sstables) {
|
for (auto sstables = cf.get_sstables(); auto& i : *sstables) {
|
||||||
res += i->get_stats_metadata().estimated_partition_size.count();
|
res += i->get_stats_metadata().estimated_partition_size.count();
|
||||||
@@ -457,7 +457,7 @@ void set_column_family(http_context& ctx, routes& r, sharded<db::system_keyspace
|
|||||||
});
|
});
|
||||||
|
|
||||||
cf::get_estimated_column_count_histogram.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
cf::get_estimated_column_count_histogram.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
||||||
return map_reduce_cf(ctx, req->param["name"], utils::estimated_histogram(0), [](replica::column_family& cf) {
|
return map_reduce_cf(ctx, req->get_path_param("name"), utils::estimated_histogram(0), [](replica::column_family& cf) {
|
||||||
utils::estimated_histogram res(0);
|
utils::estimated_histogram res(0);
|
||||||
for (auto sstables = cf.get_sstables(); auto& i : *sstables) {
|
for (auto sstables = cf.get_sstables(); auto& i : *sstables) {
|
||||||
res.merge(i->get_stats_metadata().estimated_cells_count);
|
res.merge(i->get_stats_metadata().estimated_cells_count);
|
||||||
@@ -474,7 +474,7 @@ void set_column_family(http_context& ctx, routes& r, sharded<db::system_keyspace
|
|||||||
});
|
});
|
||||||
|
|
||||||
cf::get_pending_flushes.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
cf::get_pending_flushes.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
||||||
return get_cf_stats(ctx,req->param["name"] ,&replica::column_family_stats::pending_flushes);
|
return get_cf_stats(ctx,req->get_path_param("name") ,&replica::column_family_stats::pending_flushes);
|
||||||
});
|
});
|
||||||
|
|
||||||
cf::get_all_pending_flushes.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
cf::get_all_pending_flushes.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
||||||
@@ -482,7 +482,7 @@ void set_column_family(http_context& ctx, routes& r, sharded<db::system_keyspace
|
|||||||
});
|
});
|
||||||
|
|
||||||
cf::get_read.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
cf::get_read.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
||||||
return get_cf_stats_count(ctx,req->param["name"] ,&replica::column_family_stats::reads);
|
return get_cf_stats_count(ctx,req->get_path_param("name") ,&replica::column_family_stats::reads);
|
||||||
});
|
});
|
||||||
|
|
||||||
cf::get_all_read.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
cf::get_all_read.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
||||||
@@ -490,7 +490,7 @@ void set_column_family(http_context& ctx, routes& r, sharded<db::system_keyspace
|
|||||||
});
|
});
|
||||||
|
|
||||||
cf::get_write.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
cf::get_write.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
||||||
return get_cf_stats_count(ctx, req->param["name"] ,&replica::column_family_stats::writes);
|
return get_cf_stats_count(ctx, req->get_path_param("name") ,&replica::column_family_stats::writes);
|
||||||
});
|
});
|
||||||
|
|
||||||
cf::get_all_write.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
cf::get_all_write.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
||||||
@@ -498,19 +498,19 @@ void set_column_family(http_context& ctx, routes& r, sharded<db::system_keyspace
|
|||||||
});
|
});
|
||||||
|
|
||||||
cf::get_read_latency_histogram_depricated.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
cf::get_read_latency_histogram_depricated.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
||||||
return get_cf_histogram(ctx, req->param["name"], &replica::column_family_stats::reads);
|
return get_cf_histogram(ctx, req->get_path_param("name"), &replica::column_family_stats::reads);
|
||||||
});
|
});
|
||||||
|
|
||||||
cf::get_read_latency_histogram.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
cf::get_read_latency_histogram.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
||||||
return get_cf_rate_and_histogram(ctx, req->param["name"], &replica::column_family_stats::reads);
|
return get_cf_rate_and_histogram(ctx, req->get_path_param("name"), &replica::column_family_stats::reads);
|
||||||
});
|
});
|
||||||
|
|
||||||
cf::get_read_latency.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
cf::get_read_latency.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
||||||
return get_cf_stats_sum(ctx,req->param["name"] ,&replica::column_family_stats::reads);
|
return get_cf_stats_sum(ctx,req->get_path_param("name") ,&replica::column_family_stats::reads);
|
||||||
});
|
});
|
||||||
|
|
||||||
cf::get_write_latency.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
cf::get_write_latency.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
||||||
return get_cf_stats_sum(ctx, req->param["name"] ,&replica::column_family_stats::writes);
|
return get_cf_stats_sum(ctx, req->get_path_param("name") ,&replica::column_family_stats::writes);
|
||||||
});
|
});
|
||||||
|
|
||||||
cf::get_all_read_latency_histogram_depricated.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
cf::get_all_read_latency_histogram_depricated.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
||||||
@@ -522,11 +522,11 @@ void set_column_family(http_context& ctx, routes& r, sharded<db::system_keyspace
|
|||||||
});
|
});
|
||||||
|
|
||||||
cf::get_write_latency_histogram_depricated.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
cf::get_write_latency_histogram_depricated.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
||||||
return get_cf_histogram(ctx, req->param["name"], &replica::column_family_stats::writes);
|
return get_cf_histogram(ctx, req->get_path_param("name"), &replica::column_family_stats::writes);
|
||||||
});
|
});
|
||||||
|
|
||||||
cf::get_write_latency_histogram.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
cf::get_write_latency_histogram.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
||||||
return get_cf_rate_and_histogram(ctx, req->param["name"], &replica::column_family_stats::writes);
|
return get_cf_rate_and_histogram(ctx, req->get_path_param("name"), &replica::column_family_stats::writes);
|
||||||
});
|
});
|
||||||
|
|
||||||
cf::get_all_write_latency_histogram_depricated.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
cf::get_all_write_latency_histogram_depricated.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
||||||
@@ -538,7 +538,7 @@ void set_column_family(http_context& ctx, routes& r, sharded<db::system_keyspace
|
|||||||
});
|
});
|
||||||
|
|
||||||
cf::get_pending_compactions.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
cf::get_pending_compactions.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
||||||
return map_reduce_cf(ctx, req->param["name"], int64_t(0), [](replica::column_family& cf) {
|
return map_reduce_cf(ctx, req->get_path_param("name"), int64_t(0), [](replica::column_family& cf) {
|
||||||
return cf.estimate_pending_compactions();
|
return cf.estimate_pending_compactions();
|
||||||
}, std::plus<int64_t>());
|
}, std::plus<int64_t>());
|
||||||
});
|
});
|
||||||
@@ -550,7 +550,7 @@ void set_column_family(http_context& ctx, routes& r, sharded<db::system_keyspace
|
|||||||
});
|
});
|
||||||
|
|
||||||
cf::get_live_ss_table_count.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
cf::get_live_ss_table_count.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
||||||
return get_cf_stats(ctx, req->param["name"], &replica::column_family_stats::live_sstable_count);
|
return get_cf_stats(ctx, req->get_path_param("name"), &replica::column_family_stats::live_sstable_count);
|
||||||
});
|
});
|
||||||
|
|
||||||
cf::get_all_live_ss_table_count.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
cf::get_all_live_ss_table_count.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
||||||
@@ -558,11 +558,11 @@ void set_column_family(http_context& ctx, routes& r, sharded<db::system_keyspace
|
|||||||
});
|
});
|
||||||
|
|
||||||
cf::get_unleveled_sstables.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
cf::get_unleveled_sstables.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
||||||
return get_cf_unleveled_sstables(ctx, req->param["name"]);
|
return get_cf_unleveled_sstables(ctx, req->get_path_param("name"));
|
||||||
});
|
});
|
||||||
|
|
||||||
cf::get_live_disk_space_used.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
cf::get_live_disk_space_used.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
||||||
return sum_sstable(ctx, req->param["name"], false);
|
return sum_sstable(ctx, req->get_path_param("name"), false);
|
||||||
});
|
});
|
||||||
|
|
||||||
cf::get_all_live_disk_space_used.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
cf::get_all_live_disk_space_used.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
||||||
@@ -570,7 +570,7 @@ void set_column_family(http_context& ctx, routes& r, sharded<db::system_keyspace
|
|||||||
});
|
});
|
||||||
|
|
||||||
cf::get_total_disk_space_used.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
cf::get_total_disk_space_used.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
||||||
return sum_sstable(ctx, req->param["name"], true);
|
return sum_sstable(ctx, req->get_path_param("name"), true);
|
||||||
});
|
});
|
||||||
|
|
||||||
cf::get_all_total_disk_space_used.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
cf::get_all_total_disk_space_used.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
||||||
@@ -579,7 +579,7 @@ void set_column_family(http_context& ctx, routes& r, sharded<db::system_keyspace
|
|||||||
|
|
||||||
// FIXME: this refers to partitions, not rows.
|
// FIXME: this refers to partitions, not rows.
|
||||||
cf::get_min_row_size.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
cf::get_min_row_size.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
||||||
return map_reduce_cf(ctx, req->param["name"], INT64_MAX, min_partition_size, min_int64);
|
return map_reduce_cf(ctx, req->get_path_param("name"), INT64_MAX, min_partition_size, min_int64);
|
||||||
});
|
});
|
||||||
|
|
||||||
// FIXME: this refers to partitions, not rows.
|
// FIXME: this refers to partitions, not rows.
|
||||||
@@ -589,7 +589,7 @@ void set_column_family(http_context& ctx, routes& r, sharded<db::system_keyspace
|
|||||||
|
|
||||||
// FIXME: this refers to partitions, not rows.
|
// FIXME: this refers to partitions, not rows.
|
||||||
cf::get_max_row_size.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
cf::get_max_row_size.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
||||||
return map_reduce_cf(ctx, req->param["name"], int64_t(0), max_partition_size, max_int64);
|
return map_reduce_cf(ctx, req->get_path_param("name"), int64_t(0), max_partition_size, max_int64);
|
||||||
});
|
});
|
||||||
|
|
||||||
// FIXME: this refers to partitions, not rows.
|
// FIXME: this refers to partitions, not rows.
|
||||||
@@ -600,7 +600,7 @@ void set_column_family(http_context& ctx, routes& r, sharded<db::system_keyspace
|
|||||||
// FIXME: this refers to partitions, not rows.
|
// FIXME: this refers to partitions, not rows.
|
||||||
cf::get_mean_row_size.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
cf::get_mean_row_size.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
||||||
// Cassandra 3.x mean values are truncated as integrals.
|
// Cassandra 3.x mean values are truncated as integrals.
|
||||||
return map_reduce_cf(ctx, req->param["name"], integral_ratio_holder(), mean_partition_size, std::plus<integral_ratio_holder>());
|
return map_reduce_cf(ctx, req->get_path_param("name"), integral_ratio_holder(), mean_partition_size, std::plus<integral_ratio_holder>());
|
||||||
});
|
});
|
||||||
|
|
||||||
// FIXME: this refers to partitions, not rows.
|
// FIXME: this refers to partitions, not rows.
|
||||||
@@ -610,7 +610,7 @@ void set_column_family(http_context& ctx, routes& r, sharded<db::system_keyspace
|
|||||||
});
|
});
|
||||||
|
|
||||||
cf::get_bloom_filter_false_positives.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
cf::get_bloom_filter_false_positives.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
||||||
return map_reduce_cf(ctx, req->param["name"], uint64_t(0), [] (replica::column_family& cf) {
|
return map_reduce_cf(ctx, req->get_path_param("name"), uint64_t(0), [] (replica::column_family& cf) {
|
||||||
auto sstables = cf.get_sstables();
|
auto sstables = cf.get_sstables();
|
||||||
return std::accumulate(sstables->begin(), sstables->end(), uint64_t(0), [](uint64_t s, auto& sst) {
|
return std::accumulate(sstables->begin(), sstables->end(), uint64_t(0), [](uint64_t s, auto& sst) {
|
||||||
return s + sst->filter_get_false_positive();
|
return s + sst->filter_get_false_positive();
|
||||||
@@ -628,7 +628,7 @@ void set_column_family(http_context& ctx, routes& r, sharded<db::system_keyspace
|
|||||||
});
|
});
|
||||||
|
|
||||||
cf::get_recent_bloom_filter_false_positives.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
cf::get_recent_bloom_filter_false_positives.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
||||||
return map_reduce_cf(ctx, req->param["name"], uint64_t(0), [] (replica::column_family& cf) {
|
return map_reduce_cf(ctx, req->get_path_param("name"), uint64_t(0), [] (replica::column_family& cf) {
|
||||||
auto sstables = cf.get_sstables();
|
auto sstables = cf.get_sstables();
|
||||||
return std::accumulate(sstables->begin(), sstables->end(), uint64_t(0), [](uint64_t s, auto& sst) {
|
return std::accumulate(sstables->begin(), sstables->end(), uint64_t(0), [](uint64_t s, auto& sst) {
|
||||||
return s + sst->filter_get_recent_false_positive();
|
return s + sst->filter_get_recent_false_positive();
|
||||||
@@ -646,7 +646,7 @@ void set_column_family(http_context& ctx, routes& r, sharded<db::system_keyspace
|
|||||||
});
|
});
|
||||||
|
|
||||||
cf::get_bloom_filter_false_ratio.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
cf::get_bloom_filter_false_ratio.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
||||||
return map_reduce_cf(ctx, req->param["name"], ratio_holder(), [] (replica::column_family& cf) {
|
return map_reduce_cf(ctx, req->get_path_param("name"), ratio_holder(), [] (replica::column_family& cf) {
|
||||||
return boost::accumulate(*cf.get_sstables() | boost::adaptors::transformed(filter_false_positive_as_ratio_holder), ratio_holder());
|
return boost::accumulate(*cf.get_sstables() | boost::adaptors::transformed(filter_false_positive_as_ratio_holder), ratio_holder());
|
||||||
}, std::plus<>());
|
}, std::plus<>());
|
||||||
});
|
});
|
||||||
@@ -658,7 +658,7 @@ void set_column_family(http_context& ctx, routes& r, sharded<db::system_keyspace
|
|||||||
});
|
});
|
||||||
|
|
||||||
cf::get_recent_bloom_filter_false_ratio.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
cf::get_recent_bloom_filter_false_ratio.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
||||||
return map_reduce_cf(ctx, req->param["name"], ratio_holder(), [] (replica::column_family& cf) {
|
return map_reduce_cf(ctx, req->get_path_param("name"), ratio_holder(), [] (replica::column_family& cf) {
|
||||||
return boost::accumulate(*cf.get_sstables() | boost::adaptors::transformed(filter_recent_false_positive_as_ratio_holder), ratio_holder());
|
return boost::accumulate(*cf.get_sstables() | boost::adaptors::transformed(filter_recent_false_positive_as_ratio_holder), ratio_holder());
|
||||||
}, std::plus<>());
|
}, std::plus<>());
|
||||||
});
|
});
|
||||||
@@ -670,7 +670,7 @@ void set_column_family(http_context& ctx, routes& r, sharded<db::system_keyspace
|
|||||||
});
|
});
|
||||||
|
|
||||||
cf::get_bloom_filter_disk_space_used.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
cf::get_bloom_filter_disk_space_used.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
||||||
return map_reduce_cf(ctx, req->param["name"], uint64_t(0), [] (replica::column_family& cf) {
|
return map_reduce_cf(ctx, req->get_path_param("name"), uint64_t(0), [] (replica::column_family& cf) {
|
||||||
auto sstables = cf.get_sstables();
|
auto sstables = cf.get_sstables();
|
||||||
return std::accumulate(sstables->begin(), sstables->end(), uint64_t(0), [](uint64_t s, auto& sst) {
|
return std::accumulate(sstables->begin(), sstables->end(), uint64_t(0), [](uint64_t s, auto& sst) {
|
||||||
return s + sst->filter_size();
|
return s + sst->filter_size();
|
||||||
@@ -688,7 +688,7 @@ void set_column_family(http_context& ctx, routes& r, sharded<db::system_keyspace
|
|||||||
});
|
});
|
||||||
|
|
||||||
cf::get_bloom_filter_off_heap_memory_used.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
cf::get_bloom_filter_off_heap_memory_used.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
||||||
return map_reduce_cf(ctx, req->param["name"], uint64_t(0), [] (replica::column_family& cf) {
|
return map_reduce_cf(ctx, req->get_path_param("name"), uint64_t(0), [] (replica::column_family& cf) {
|
||||||
auto sstables = cf.get_sstables();
|
auto sstables = cf.get_sstables();
|
||||||
return std::accumulate(sstables->begin(), sstables->end(), uint64_t(0), [](uint64_t s, auto& sst) {
|
return std::accumulate(sstables->begin(), sstables->end(), uint64_t(0), [](uint64_t s, auto& sst) {
|
||||||
return s + sst->filter_memory_size();
|
return s + sst->filter_memory_size();
|
||||||
@@ -706,7 +706,7 @@ void set_column_family(http_context& ctx, routes& r, sharded<db::system_keyspace
|
|||||||
});
|
});
|
||||||
|
|
||||||
cf::get_index_summary_off_heap_memory_used.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
cf::get_index_summary_off_heap_memory_used.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
||||||
return map_reduce_cf(ctx, req->param["name"], uint64_t(0), [] (replica::column_family& cf) {
|
return map_reduce_cf(ctx, req->get_path_param("name"), uint64_t(0), [] (replica::column_family& cf) {
|
||||||
auto sstables = cf.get_sstables();
|
auto sstables = cf.get_sstables();
|
||||||
return std::accumulate(sstables->begin(), sstables->end(), uint64_t(0), [](uint64_t s, auto& sst) {
|
return std::accumulate(sstables->begin(), sstables->end(), uint64_t(0), [](uint64_t s, auto& sst) {
|
||||||
return s + sst->get_summary().memory_footprint();
|
return s + sst->get_summary().memory_footprint();
|
||||||
@@ -729,7 +729,7 @@ void set_column_family(http_context& ctx, routes& r, sharded<db::system_keyspace
|
|||||||
// We are missing the off heap memory calculation
|
// We are missing the off heap memory calculation
|
||||||
// Return 0 is the wrong value. It's a work around
|
// Return 0 is the wrong value. It's a work around
|
||||||
// until the memory calculation will be available
|
// until the memory calculation will be available
|
||||||
//auto id = get_uuid(req->param["name"], ctx.db.local());
|
//auto id = get_uuid(req->get_path_param("name"), ctx.db.local());
|
||||||
return make_ready_future<json::json_return_type>(0);
|
return make_ready_future<json::json_return_type>(0);
|
||||||
});
|
});
|
||||||
|
|
||||||
@@ -742,7 +742,7 @@ void set_column_family(http_context& ctx, routes& r, sharded<db::system_keyspace
|
|||||||
cf::get_speculative_retries.set(r, [] (std::unique_ptr<http::request> req) {
|
cf::get_speculative_retries.set(r, [] (std::unique_ptr<http::request> req) {
|
||||||
//TBD
|
//TBD
|
||||||
unimplemented();
|
unimplemented();
|
||||||
//auto id = get_uuid(req->param["name"], ctx.db.local());
|
//auto id = get_uuid(req->get_path_param("name"), ctx.db.local());
|
||||||
return make_ready_future<json::json_return_type>(0);
|
return make_ready_future<json::json_return_type>(0);
|
||||||
});
|
});
|
||||||
|
|
||||||
@@ -755,7 +755,7 @@ void set_column_family(http_context& ctx, routes& r, sharded<db::system_keyspace
|
|||||||
cf::get_key_cache_hit_rate.set(r, [] (std::unique_ptr<http::request> req) {
|
cf::get_key_cache_hit_rate.set(r, [] (std::unique_ptr<http::request> req) {
|
||||||
//TBD
|
//TBD
|
||||||
unimplemented();
|
unimplemented();
|
||||||
//auto id = get_uuid(req->param["name"], ctx.db.local());
|
//auto id = get_uuid(req->get_path_param("name"), ctx.db.local());
|
||||||
return make_ready_future<json::json_return_type>(0);
|
return make_ready_future<json::json_return_type>(0);
|
||||||
});
|
});
|
||||||
|
|
||||||
@@ -780,7 +780,7 @@ void set_column_family(http_context& ctx, routes& r, sharded<db::system_keyspace
|
|||||||
cf::get_row_cache_hit_out_of_range.set(r, [] (std::unique_ptr<http::request> req) {
|
cf::get_row_cache_hit_out_of_range.set(r, [] (std::unique_ptr<http::request> req) {
|
||||||
//TBD
|
//TBD
|
||||||
unimplemented();
|
unimplemented();
|
||||||
//auto id = get_uuid(req->param["name"], ctx.db.local());
|
//auto id = get_uuid(req->get_path_param("name"), ctx.db.local());
|
||||||
return make_ready_future<json::json_return_type>(0);
|
return make_ready_future<json::json_return_type>(0);
|
||||||
});
|
});
|
||||||
|
|
||||||
@@ -791,7 +791,7 @@ void set_column_family(http_context& ctx, routes& r, sharded<db::system_keyspace
|
|||||||
});
|
});
|
||||||
|
|
||||||
cf::get_row_cache_hit.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
cf::get_row_cache_hit.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
||||||
return map_reduce_cf_raw(ctx, req->param["name"], utils::rate_moving_average(), [](const replica::column_family& cf) {
|
return map_reduce_cf_raw(ctx, req->get_path_param("name"), utils::rate_moving_average(), [](const replica::column_family& cf) {
|
||||||
return cf.get_row_cache().stats().hits.rate();
|
return cf.get_row_cache().stats().hits.rate();
|
||||||
}, std::plus<utils::rate_moving_average>()).then([](const utils::rate_moving_average& m) {
|
}, std::plus<utils::rate_moving_average>()).then([](const utils::rate_moving_average& m) {
|
||||||
return make_ready_future<json::json_return_type>(meter_to_json(m));
|
return make_ready_future<json::json_return_type>(meter_to_json(m));
|
||||||
@@ -807,7 +807,7 @@ void set_column_family(http_context& ctx, routes& r, sharded<db::system_keyspace
|
|||||||
});
|
});
|
||||||
|
|
||||||
cf::get_row_cache_miss.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
cf::get_row_cache_miss.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
||||||
return map_reduce_cf_raw(ctx, req->param["name"], utils::rate_moving_average(), [](const replica::column_family& cf) {
|
return map_reduce_cf_raw(ctx, req->get_path_param("name"), utils::rate_moving_average(), [](const replica::column_family& cf) {
|
||||||
return cf.get_row_cache().stats().misses.rate();
|
return cf.get_row_cache().stats().misses.rate();
|
||||||
}, std::plus<utils::rate_moving_average>()).then([](const utils::rate_moving_average& m) {
|
}, std::plus<utils::rate_moving_average>()).then([](const utils::rate_moving_average& m) {
|
||||||
return make_ready_future<json::json_return_type>(meter_to_json(m));
|
return make_ready_future<json::json_return_type>(meter_to_json(m));
|
||||||
@@ -824,57 +824,57 @@ void set_column_family(http_context& ctx, routes& r, sharded<db::system_keyspace
|
|||||||
});
|
});
|
||||||
|
|
||||||
cf::get_cas_prepare.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
cf::get_cas_prepare.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
||||||
return map_reduce_cf_time_histogram(ctx, req->param["name"], [](const replica::column_family& cf) {
|
return map_reduce_cf_time_histogram(ctx, req->get_path_param("name"), [](const replica::column_family& cf) {
|
||||||
return cf.get_stats().cas_prepare.histogram();
|
return cf.get_stats().cas_prepare.histogram();
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
cf::get_cas_propose.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
cf::get_cas_propose.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
||||||
return map_reduce_cf_time_histogram(ctx, req->param["name"], [](const replica::column_family& cf) {
|
return map_reduce_cf_time_histogram(ctx, req->get_path_param("name"), [](const replica::column_family& cf) {
|
||||||
return cf.get_stats().cas_accept.histogram();
|
return cf.get_stats().cas_accept.histogram();
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
cf::get_cas_commit.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
cf::get_cas_commit.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
||||||
return map_reduce_cf_time_histogram(ctx, req->param["name"], [](const replica::column_family& cf) {
|
return map_reduce_cf_time_histogram(ctx, req->get_path_param("name"), [](const replica::column_family& cf) {
|
||||||
return cf.get_stats().cas_learn.histogram();
|
return cf.get_stats().cas_learn.histogram();
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
cf::get_sstables_per_read_histogram.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
cf::get_sstables_per_read_histogram.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
||||||
return map_reduce_cf(ctx, req->param["name"], utils::estimated_histogram(0), [](replica::column_family& cf) {
|
return map_reduce_cf(ctx, req->get_path_param("name"), utils::estimated_histogram(0), [](replica::column_family& cf) {
|
||||||
return cf.get_stats().estimated_sstable_per_read;
|
return cf.get_stats().estimated_sstable_per_read;
|
||||||
},
|
},
|
||||||
utils::estimated_histogram_merge, utils_json::estimated_histogram());
|
utils::estimated_histogram_merge, utils_json::estimated_histogram());
|
||||||
});
|
});
|
||||||
|
|
||||||
cf::get_tombstone_scanned_histogram.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
cf::get_tombstone_scanned_histogram.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
||||||
return get_cf_histogram(ctx, req->param["name"], &replica::column_family_stats::tombstone_scanned);
|
return get_cf_histogram(ctx, req->get_path_param("name"), &replica::column_family_stats::tombstone_scanned);
|
||||||
});
|
});
|
||||||
|
|
||||||
cf::get_live_scanned_histogram.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
cf::get_live_scanned_histogram.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
||||||
return get_cf_histogram(ctx, req->param["name"], &replica::column_family_stats::live_scanned);
|
return get_cf_histogram(ctx, req->get_path_param("name"), &replica::column_family_stats::live_scanned);
|
||||||
});
|
});
|
||||||
|
|
||||||
cf::get_col_update_time_delta_histogram.set(r, [] (std::unique_ptr<http::request> req) {
|
cf::get_col_update_time_delta_histogram.set(r, [] (std::unique_ptr<http::request> req) {
|
||||||
//TBD
|
//TBD
|
||||||
unimplemented();
|
unimplemented();
|
||||||
//auto id = get_uuid(req->param["name"], ctx.db.local());
|
//auto id = get_uuid(req->get_path_param("name"), ctx.db.local());
|
||||||
std::vector<double> res;
|
std::vector<double> res;
|
||||||
return make_ready_future<json::json_return_type>(res);
|
return make_ready_future<json::json_return_type>(res);
|
||||||
});
|
});
|
||||||
|
|
||||||
cf::get_auto_compaction.set(r, [&ctx] (const_req req) {
|
cf::get_auto_compaction.set(r, [&ctx] (const_req req) {
|
||||||
auto uuid = get_uuid(req.param["name"], ctx.db.local());
|
auto uuid = get_uuid(req.get_path_param("name"), ctx.db.local());
|
||||||
replica::column_family& cf = ctx.db.local().find_column_family(uuid);
|
replica::column_family& cf = ctx.db.local().find_column_family(uuid);
|
||||||
return !cf.is_auto_compaction_disabled_by_user();
|
return !cf.is_auto_compaction_disabled_by_user();
|
||||||
});
|
});
|
||||||
|
|
||||||
cf::enable_auto_compaction.set(r, [&ctx](std::unique_ptr<http::request> req) {
|
cf::enable_auto_compaction.set(r, [&ctx](std::unique_ptr<http::request> req) {
|
||||||
apilog.info("column_family/enable_auto_compaction: name={}", req->param["name"]);
|
apilog.info("column_family/enable_auto_compaction: name={}", req->get_path_param("name"));
|
||||||
return ctx.db.invoke_on(0, [&ctx, req = std::move(req)] (replica::database& db) {
|
return ctx.db.invoke_on(0, [&ctx, req = std::move(req)] (replica::database& db) {
|
||||||
auto g = replica::database::autocompaction_toggle_guard(db);
|
auto g = replica::database::autocompaction_toggle_guard(db);
|
||||||
return foreach_column_family(ctx, req->param["name"], [](replica::column_family &cf) {
|
return foreach_column_family(ctx, req->get_path_param("name"), [](replica::column_family &cf) {
|
||||||
cf.enable_auto_compaction();
|
cf.enable_auto_compaction();
|
||||||
}).then([g = std::move(g)] {
|
}).then([g = std::move(g)] {
|
||||||
return make_ready_future<json::json_return_type>(json_void());
|
return make_ready_future<json::json_return_type>(json_void());
|
||||||
@@ -883,10 +883,10 @@ void set_column_family(http_context& ctx, routes& r, sharded<db::system_keyspace
|
|||||||
});
|
});
|
||||||
|
|
||||||
cf::disable_auto_compaction.set(r, [&ctx](std::unique_ptr<http::request> req) {
|
cf::disable_auto_compaction.set(r, [&ctx](std::unique_ptr<http::request> req) {
|
||||||
apilog.info("column_family/disable_auto_compaction: name={}", req->param["name"]);
|
apilog.info("column_family/disable_auto_compaction: name={}", req->get_path_param("name"));
|
||||||
return ctx.db.invoke_on(0, [&ctx, req = std::move(req)] (replica::database& db) {
|
return ctx.db.invoke_on(0, [&ctx, req = std::move(req)] (replica::database& db) {
|
||||||
auto g = replica::database::autocompaction_toggle_guard(db);
|
auto g = replica::database::autocompaction_toggle_guard(db);
|
||||||
return foreach_column_family(ctx, req->param["name"], [](replica::column_family &cf) {
|
return foreach_column_family(ctx, req->get_path_param("name"), [](replica::column_family &cf) {
|
||||||
return cf.disable_auto_compaction();
|
return cf.disable_auto_compaction();
|
||||||
}).then([g = std::move(g)] {
|
}).then([g = std::move(g)] {
|
||||||
return make_ready_future<json::json_return_type>(json_void());
|
return make_ready_future<json::json_return_type>(json_void());
|
||||||
@@ -895,14 +895,14 @@ void set_column_family(http_context& ctx, routes& r, sharded<db::system_keyspace
|
|||||||
});
|
});
|
||||||
|
|
||||||
cf::get_tombstone_gc.set(r, [&ctx] (const_req req) {
|
cf::get_tombstone_gc.set(r, [&ctx] (const_req req) {
|
||||||
auto uuid = get_uuid(req.param["name"], ctx.db.local());
|
auto uuid = get_uuid(req.get_path_param("name"), ctx.db.local());
|
||||||
replica::table& t = ctx.db.local().find_column_family(uuid);
|
replica::table& t = ctx.db.local().find_column_family(uuid);
|
||||||
return t.tombstone_gc_enabled();
|
return t.tombstone_gc_enabled();
|
||||||
});
|
});
|
||||||
|
|
||||||
cf::enable_tombstone_gc.set(r, [&ctx](std::unique_ptr<http::request> req) {
|
cf::enable_tombstone_gc.set(r, [&ctx](std::unique_ptr<http::request> req) {
|
||||||
apilog.info("column_family/enable_tombstone_gc: name={}", req->param["name"]);
|
apilog.info("column_family/enable_tombstone_gc: name={}", req->get_path_param("name"));
|
||||||
return foreach_column_family(ctx, req->param["name"], [](replica::table& t) {
|
return foreach_column_family(ctx, req->get_path_param("name"), [](replica::table& t) {
|
||||||
t.set_tombstone_gc_enabled(true);
|
t.set_tombstone_gc_enabled(true);
|
||||||
}).then([] {
|
}).then([] {
|
||||||
return make_ready_future<json::json_return_type>(json_void());
|
return make_ready_future<json::json_return_type>(json_void());
|
||||||
@@ -910,8 +910,8 @@ void set_column_family(http_context& ctx, routes& r, sharded<db::system_keyspace
|
|||||||
});
|
});
|
||||||
|
|
||||||
cf::disable_tombstone_gc.set(r, [&ctx](std::unique_ptr<http::request> req) {
|
cf::disable_tombstone_gc.set(r, [&ctx](std::unique_ptr<http::request> req) {
|
||||||
apilog.info("column_family/disable_tombstone_gc: name={}", req->param["name"]);
|
apilog.info("column_family/disable_tombstone_gc: name={}", req->get_path_param("name"));
|
||||||
return foreach_column_family(ctx, req->param["name"], [](replica::table& t) {
|
return foreach_column_family(ctx, req->get_path_param("name"), [](replica::table& t) {
|
||||||
t.set_tombstone_gc_enabled(false);
|
t.set_tombstone_gc_enabled(false);
|
||||||
}).then([] {
|
}).then([] {
|
||||||
return make_ready_future<json::json_return_type>(json_void());
|
return make_ready_future<json::json_return_type>(json_void());
|
||||||
@@ -919,7 +919,7 @@ void set_column_family(http_context& ctx, routes& r, sharded<db::system_keyspace
|
|||||||
});
|
});
|
||||||
|
|
||||||
cf::get_built_indexes.set(r, [&ctx, &sys_ks](std::unique_ptr<http::request> req) {
|
cf::get_built_indexes.set(r, [&ctx, &sys_ks](std::unique_ptr<http::request> req) {
|
||||||
auto ks_cf = parse_fully_qualified_cf_name(req->param["name"]);
|
auto ks_cf = parse_fully_qualified_cf_name(req->get_path_param("name"));
|
||||||
auto&& ks = std::get<0>(ks_cf);
|
auto&& ks = std::get<0>(ks_cf);
|
||||||
auto&& cf_name = std::get<1>(ks_cf);
|
auto&& cf_name = std::get<1>(ks_cf);
|
||||||
return sys_ks.local().load_view_build_progress().then([ks, cf_name, &ctx](const std::vector<db::system_keyspace_view_build_progress>& vb) mutable {
|
return sys_ks.local().load_view_build_progress().then([ks, cf_name, &ctx](const std::vector<db::system_keyspace_view_build_progress>& vb) mutable {
|
||||||
@@ -957,7 +957,7 @@ void set_column_family(http_context& ctx, routes& r, sharded<db::system_keyspace
|
|||||||
});
|
});
|
||||||
|
|
||||||
cf::get_compression_ratio.set(r, [&ctx](std::unique_ptr<http::request> req) {
|
cf::get_compression_ratio.set(r, [&ctx](std::unique_ptr<http::request> req) {
|
||||||
auto uuid = get_uuid(req->param["name"], ctx.db.local());
|
auto uuid = get_uuid(req->get_path_param("name"), ctx.db.local());
|
||||||
|
|
||||||
return ctx.db.map_reduce(sum_ratio<double>(), [uuid](replica::database& db) {
|
return ctx.db.map_reduce(sum_ratio<double>(), [uuid](replica::database& db) {
|
||||||
replica::column_family& cf = db.find_column_family(uuid);
|
replica::column_family& cf = db.find_column_family(uuid);
|
||||||
@@ -968,21 +968,21 @@ void set_column_family(http_context& ctx, routes& r, sharded<db::system_keyspace
|
|||||||
});
|
});
|
||||||
|
|
||||||
cf::get_read_latency_estimated_histogram.set(r, [&ctx](std::unique_ptr<http::request> req) {
|
cf::get_read_latency_estimated_histogram.set(r, [&ctx](std::unique_ptr<http::request> req) {
|
||||||
return map_reduce_cf_time_histogram(ctx, req->param["name"], [](const replica::column_family& cf) {
|
return map_reduce_cf_time_histogram(ctx, req->get_path_param("name"), [](const replica::column_family& cf) {
|
||||||
return cf.get_stats().reads.histogram();
|
return cf.get_stats().reads.histogram();
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
cf::get_write_latency_estimated_histogram.set(r, [&ctx](std::unique_ptr<http::request> req) {
|
cf::get_write_latency_estimated_histogram.set(r, [&ctx](std::unique_ptr<http::request> req) {
|
||||||
return map_reduce_cf_time_histogram(ctx, req->param["name"], [](const replica::column_family& cf) {
|
return map_reduce_cf_time_histogram(ctx, req->get_path_param("name"), [](const replica::column_family& cf) {
|
||||||
return cf.get_stats().writes.histogram();
|
return cf.get_stats().writes.histogram();
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
cf::set_compaction_strategy_class.set(r, [&ctx](std::unique_ptr<http::request> req) {
|
cf::set_compaction_strategy_class.set(r, [&ctx](std::unique_ptr<http::request> req) {
|
||||||
sstring strategy = req->get_query_param("class_name");
|
sstring strategy = req->get_query_param("class_name");
|
||||||
apilog.info("column_family/set_compaction_strategy_class: name={} strategy={}", req->param["name"], strategy);
|
apilog.info("column_family/set_compaction_strategy_class: name={} strategy={}", req->get_path_param("name"), strategy);
|
||||||
return foreach_column_family(ctx, req->param["name"], [strategy](replica::column_family& cf) {
|
return foreach_column_family(ctx, req->get_path_param("name"), [strategy](replica::column_family& cf) {
|
||||||
cf.set_compaction_strategy(sstables::compaction_strategy::type(strategy));
|
cf.set_compaction_strategy(sstables::compaction_strategy::type(strategy));
|
||||||
}).then([] {
|
}).then([] {
|
||||||
return make_ready_future<json::json_return_type>(json_void());
|
return make_ready_future<json::json_return_type>(json_void());
|
||||||
@@ -990,7 +990,7 @@ void set_column_family(http_context& ctx, routes& r, sharded<db::system_keyspace
|
|||||||
});
|
});
|
||||||
|
|
||||||
cf::get_compaction_strategy_class.set(r, [&ctx](const_req req) {
|
cf::get_compaction_strategy_class.set(r, [&ctx](const_req req) {
|
||||||
return ctx.db.local().find_column_family(get_uuid(req.param["name"], ctx.db.local())).get_compaction_strategy().name();
|
return ctx.db.local().find_column_family(get_uuid(req.get_path_param("name"), ctx.db.local())).get_compaction_strategy().name();
|
||||||
});
|
});
|
||||||
|
|
||||||
cf::set_compression_parameters.set(r, [](std::unique_ptr<http::request> req) {
|
cf::set_compression_parameters.set(r, [](std::unique_ptr<http::request> req) {
|
||||||
@@ -1006,7 +1006,7 @@ void set_column_family(http_context& ctx, routes& r, sharded<db::system_keyspace
|
|||||||
});
|
});
|
||||||
|
|
||||||
cf::get_sstable_count_per_level.set(r, [&ctx](std::unique_ptr<http::request> req) {
|
cf::get_sstable_count_per_level.set(r, [&ctx](std::unique_ptr<http::request> req) {
|
||||||
return map_reduce_cf_raw(ctx, req->param["name"], std::vector<uint64_t>(), [](const replica::column_family& cf) {
|
return map_reduce_cf_raw(ctx, req->get_path_param("name"), std::vector<uint64_t>(), [](const replica::column_family& cf) {
|
||||||
return cf.sstable_count_per_level();
|
return cf.sstable_count_per_level();
|
||||||
}, concat_sstable_count_per_level).then([](const std::vector<uint64_t>& res) {
|
}, concat_sstable_count_per_level).then([](const std::vector<uint64_t>& res) {
|
||||||
return make_ready_future<json::json_return_type>(res);
|
return make_ready_future<json::json_return_type>(res);
|
||||||
@@ -1015,7 +1015,7 @@ void set_column_family(http_context& ctx, routes& r, sharded<db::system_keyspace
|
|||||||
|
|
||||||
cf::get_sstables_for_key.set(r, [&ctx](std::unique_ptr<http::request> req) {
|
cf::get_sstables_for_key.set(r, [&ctx](std::unique_ptr<http::request> req) {
|
||||||
auto key = req->get_query_param("key");
|
auto key = req->get_query_param("key");
|
||||||
auto uuid = get_uuid(req->param["name"], ctx.db.local());
|
auto uuid = get_uuid(req->get_path_param("name"), ctx.db.local());
|
||||||
|
|
||||||
return ctx.db.map_reduce0([key, uuid] (replica::database& db) -> future<std::unordered_set<sstring>> {
|
return ctx.db.map_reduce0([key, uuid] (replica::database& db) -> future<std::unordered_set<sstring>> {
|
||||||
auto sstables = co_await db.find_column_family(uuid).get_sstables_by_partition_key(key);
|
auto sstables = co_await db.find_column_family(uuid).get_sstables_by_partition_key(key);
|
||||||
@@ -1031,7 +1031,7 @@ void set_column_family(http_context& ctx, routes& r, sharded<db::system_keyspace
|
|||||||
|
|
||||||
|
|
||||||
cf::toppartitions.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
cf::toppartitions.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
||||||
auto name = req->param["name"];
|
auto name = req->get_path_param("name");
|
||||||
auto [ks, cf] = parse_fully_qualified_cf_name(name);
|
auto [ks, cf] = parse_fully_qualified_cf_name(name);
|
||||||
|
|
||||||
api::req_param<std::chrono::milliseconds, unsigned> duration{*req, "duration", 1000ms};
|
api::req_param<std::chrono::milliseconds, unsigned> duration{*req, "duration", 1000ms};
|
||||||
@@ -1047,12 +1047,19 @@ void set_column_family(http_context& ctx, routes& r, sharded<db::system_keyspace
|
|||||||
});
|
});
|
||||||
|
|
||||||
cf::force_major_compaction.set(r, [&ctx](std::unique_ptr<http::request> req) -> future<json::json_return_type> {
|
cf::force_major_compaction.set(r, [&ctx](std::unique_ptr<http::request> req) -> future<json::json_return_type> {
|
||||||
if (req->get_query_param("split_output") != "") {
|
auto params = req_params({
|
||||||
|
std::pair("name", mandatory::yes),
|
||||||
|
std::pair("flush_memtables", mandatory::no),
|
||||||
|
std::pair("split_output", mandatory::no),
|
||||||
|
});
|
||||||
|
params.process(*req);
|
||||||
|
if (params.get("split_output")) {
|
||||||
fail(unimplemented::cause::API);
|
fail(unimplemented::cause::API);
|
||||||
}
|
}
|
||||||
|
auto [ks, cf] = parse_fully_qualified_cf_name(*params.get("name"));
|
||||||
|
auto flush = params.get_as<bool>("flush_memtables").value_or(true);
|
||||||
|
apilog.info("column_family/force_major_compaction: name={} flush={}", req->get_path_param("name"), flush);
|
||||||
|
|
||||||
apilog.info("column_family/force_major_compaction: name={}", req->param["name"]);
|
|
||||||
auto [ks, cf] = parse_fully_qualified_cf_name(req->param["name"]);
|
|
||||||
auto keyspace = validate_keyspace(ctx, ks);
|
auto keyspace = validate_keyspace(ctx, ks);
|
||||||
std::vector<table_info> table_infos = {table_info{
|
std::vector<table_info> table_infos = {table_info{
|
||||||
.name = cf,
|
.name = cf,
|
||||||
@@ -1060,7 +1067,11 @@ void set_column_family(http_context& ctx, routes& r, sharded<db::system_keyspace
|
|||||||
}};
|
}};
|
||||||
|
|
||||||
auto& compaction_module = ctx.db.local().get_compaction_manager().get_task_manager_module();
|
auto& compaction_module = ctx.db.local().get_compaction_manager().get_task_manager_module();
|
||||||
auto task = co_await compaction_module.make_and_start_task<major_keyspace_compaction_task_impl>({}, std::move(keyspace), ctx.db, std::move(table_infos));
|
std::optional<major_compaction_task_impl::flush_mode> fmopt;
|
||||||
|
if (!flush) {
|
||||||
|
fmopt = major_compaction_task_impl::flush_mode::skip;
|
||||||
|
}
|
||||||
|
auto task = co_await compaction_module.make_and_start_task<major_keyspace_compaction_task_impl>({}, std::move(keyspace), tasks::task_id::create_null_id(), ctx.db, std::move(table_infos), fmopt);
|
||||||
co_await task->done();
|
co_await task->done();
|
||||||
co_return json_void();
|
co_return json_void();
|
||||||
});
|
});
|
||||||
|
|||||||
@@ -7,6 +7,7 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
#include <seastar/core/coroutine.hh>
|
#include <seastar/core/coroutine.hh>
|
||||||
|
#include <seastar/coroutine/exception.hh>
|
||||||
|
|
||||||
#include "compaction_manager.hh"
|
#include "compaction_manager.hh"
|
||||||
#include "compaction/compaction_manager.hh"
|
#include "compaction/compaction_manager.hh"
|
||||||
@@ -109,7 +110,7 @@ void set_compaction_manager(http_context& ctx, routes& r) {
|
|||||||
});
|
});
|
||||||
|
|
||||||
cm::stop_keyspace_compaction.set(r, [&ctx] (std::unique_ptr<http::request> req) -> future<json::json_return_type> {
|
cm::stop_keyspace_compaction.set(r, [&ctx] (std::unique_ptr<http::request> req) -> future<json::json_return_type> {
|
||||||
auto ks_name = validate_keyspace(ctx, req->param);
|
auto ks_name = validate_keyspace(ctx, req);
|
||||||
auto table_names = parse_tables(ks_name, ctx, req->query_parameters, "tables");
|
auto table_names = parse_tables(ks_name, ctx, req->query_parameters, "tables");
|
||||||
if (table_names.empty()) {
|
if (table_names.empty()) {
|
||||||
table_names = map_keys(ctx.db.local().find_keyspace(ks_name).metadata().get()->cf_meta_data());
|
table_names = map_keys(ctx.db.local().find_keyspace(ks_name).metadata().get()->cf_meta_data());
|
||||||
@@ -152,10 +153,13 @@ void set_compaction_manager(http_context& ctx, routes& r) {
|
|||||||
});
|
});
|
||||||
|
|
||||||
cm::get_compaction_history.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
cm::get_compaction_history.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
||||||
std::function<future<>(output_stream<char>&&)> f = [&ctx](output_stream<char>&& s) {
|
std::function<future<>(output_stream<char>&&)> f = [&ctx] (output_stream<char>&& out) -> future<> {
|
||||||
return do_with(output_stream<char>(std::move(s)), true, [&ctx] (output_stream<char>& s, bool& first){
|
auto s = std::move(out);
|
||||||
return s.write("[").then([&ctx, &s, &first] {
|
bool first = true;
|
||||||
return ctx.db.local().get_compaction_manager().get_compaction_history([&s, &first](const db::compaction_history_entry& entry) mutable {
|
std::exception_ptr ex;
|
||||||
|
try {
|
||||||
|
co_await s.write("[");
|
||||||
|
co_await ctx.db.local().get_compaction_manager().get_compaction_history([&s, &first](const db::compaction_history_entry& entry) mutable -> future<> {
|
||||||
cm::history h;
|
cm::history h;
|
||||||
h.id = entry.id.to_sstring();
|
h.id = entry.id.to_sstring();
|
||||||
h.ks = std::move(entry.ks);
|
h.ks = std::move(entry.ks);
|
||||||
@@ -169,18 +173,21 @@ void set_compaction_manager(http_context& ctx, routes& r) {
|
|||||||
e.value = it.second;
|
e.value = it.second;
|
||||||
h.rows_merged.push(std::move(e));
|
h.rows_merged.push(std::move(e));
|
||||||
}
|
}
|
||||||
auto fut = first ? make_ready_future<>() : s.write(", ");
|
if (!first) {
|
||||||
|
co_await s.write(", ");
|
||||||
|
}
|
||||||
first = false;
|
first = false;
|
||||||
return fut.then([&s, h = std::move(h)] {
|
co_await formatter::write(s, h);
|
||||||
return formatter::write(s, h);
|
|
||||||
});
|
|
||||||
}).then([&s] {
|
|
||||||
return s.write("]").then([&s] {
|
|
||||||
return s.close();
|
|
||||||
});
|
|
||||||
});
|
});
|
||||||
});
|
co_await s.write("]");
|
||||||
});
|
co_await s.flush();
|
||||||
|
} catch (...) {
|
||||||
|
ex = std::current_exception();
|
||||||
|
}
|
||||||
|
co_await s.close();
|
||||||
|
if (ex) {
|
||||||
|
co_await coroutine::return_exception_ptr(std::move(ex));
|
||||||
|
}
|
||||||
};
|
};
|
||||||
return make_ready_future<json::json_return_type>(std::move(f));
|
return make_ready_future<json::json_return_type>(std::move(f));
|
||||||
});
|
});
|
||||||
|
|||||||
@@ -91,7 +91,7 @@ void set_config(std::shared_ptr < api_registry_builder20 > rb, http_context& ctx
|
|||||||
});
|
});
|
||||||
|
|
||||||
cs::find_config_id.set(r, [&cfg] (const_req r) {
|
cs::find_config_id.set(r, [&cfg] (const_req r) {
|
||||||
auto id = r.param["id"];
|
auto id = r.get_path_param("id");
|
||||||
for (auto&& cfg_ref : cfg.values()) {
|
for (auto&& cfg_ref : cfg.values()) {
|
||||||
auto&& cfg = cfg_ref.get();
|
auto&& cfg = cfg_ref.get();
|
||||||
if (id == cfg.name()) {
|
if (id == cfg.name()) {
|
||||||
|
|||||||
@@ -24,7 +24,7 @@ namespace hf = httpd::error_injection_json;
|
|||||||
void set_error_injection(http_context& ctx, routes& r) {
|
void set_error_injection(http_context& ctx, routes& r) {
|
||||||
|
|
||||||
hf::enable_injection.set(r, [](std::unique_ptr<request> req) {
|
hf::enable_injection.set(r, [](std::unique_ptr<request> req) {
|
||||||
sstring injection = req->param["injection"];
|
sstring injection = req->get_path_param("injection");
|
||||||
bool one_shot = req->get_query_param("one_shot") == "True";
|
bool one_shot = req->get_query_param("one_shot") == "True";
|
||||||
auto params = req->content;
|
auto params = req->content;
|
||||||
|
|
||||||
@@ -56,7 +56,7 @@ void set_error_injection(http_context& ctx, routes& r) {
|
|||||||
});
|
});
|
||||||
|
|
||||||
hf::disable_injection.set(r, [](std::unique_ptr<request> req) {
|
hf::disable_injection.set(r, [](std::unique_ptr<request> req) {
|
||||||
sstring injection = req->param["injection"];
|
sstring injection = req->get_path_param("injection");
|
||||||
|
|
||||||
auto& errinj = utils::get_local_injector();
|
auto& errinj = utils::get_local_injector();
|
||||||
return errinj.disable_on_all(injection).then([] {
|
return errinj.disable_on_all(injection).then([] {
|
||||||
@@ -72,7 +72,7 @@ void set_error_injection(http_context& ctx, routes& r) {
|
|||||||
});
|
});
|
||||||
|
|
||||||
hf::message_injection.set(r, [](std::unique_ptr<request> req) {
|
hf::message_injection.set(r, [](std::unique_ptr<request> req) {
|
||||||
sstring injection = req->param["injection"];
|
sstring injection = req->get_path_param("injection");
|
||||||
auto& errinj = utils::get_local_injector();
|
auto& errinj = utils::get_local_injector();
|
||||||
return errinj.receive_message_on_all(injection).then([] {
|
return errinj.receive_message_on_all(injection).then([] {
|
||||||
return make_ready_future<json::json_return_type>(json::json_void());
|
return make_ready_future<json::json_return_type>(json::json_void());
|
||||||
|
|||||||
@@ -18,37 +18,43 @@ namespace fd = httpd::failure_detector_json;
|
|||||||
|
|
||||||
void set_failure_detector(http_context& ctx, routes& r, gms::gossiper& g) {
|
void set_failure_detector(http_context& ctx, routes& r, gms::gossiper& g) {
|
||||||
fd::get_all_endpoint_states.set(r, [&g](std::unique_ptr<request> req) {
|
fd::get_all_endpoint_states.set(r, [&g](std::unique_ptr<request> req) {
|
||||||
std::vector<fd::endpoint_state> res;
|
return g.container().invoke_on(0, [] (gms::gossiper& g) {
|
||||||
res.reserve(g.num_endpoints());
|
std::vector<fd::endpoint_state> res;
|
||||||
g.for_each_endpoint_state([&] (const gms::inet_address& addr, const gms::endpoint_state& eps) {
|
res.reserve(g.num_endpoints());
|
||||||
fd::endpoint_state val;
|
g.for_each_endpoint_state([&] (const gms::inet_address& addr, const gms::endpoint_state& eps) {
|
||||||
val.addrs = fmt::to_string(addr);
|
fd::endpoint_state val;
|
||||||
val.is_alive = g.is_alive(addr);
|
val.addrs = fmt::to_string(addr);
|
||||||
val.generation = eps.get_heart_beat_state().get_generation().value();
|
val.is_alive = g.is_alive(addr);
|
||||||
val.version = eps.get_heart_beat_state().get_heart_beat_version().value();
|
val.generation = eps.get_heart_beat_state().get_generation().value();
|
||||||
val.update_time = eps.get_update_timestamp().time_since_epoch().count();
|
val.version = eps.get_heart_beat_state().get_heart_beat_version().value();
|
||||||
for (const auto& [as_type, app_state] : eps.get_application_state_map()) {
|
val.update_time = eps.get_update_timestamp().time_since_epoch().count();
|
||||||
fd::version_value version_val;
|
for (const auto& [as_type, app_state] : eps.get_application_state_map()) {
|
||||||
// We return the enum index and not it's name to stay compatible to origin
|
fd::version_value version_val;
|
||||||
// method that the state index are static but the name can be changed.
|
// We return the enum index and not it's name to stay compatible to origin
|
||||||
version_val.application_state = static_cast<std::underlying_type<gms::application_state>::type>(as_type);
|
// method that the state index are static but the name can be changed.
|
||||||
version_val.value = app_state.value();
|
version_val.application_state = static_cast<std::underlying_type<gms::application_state>::type>(as_type);
|
||||||
version_val.version = app_state.version().value();
|
version_val.value = app_state.value();
|
||||||
val.application_state.push(version_val);
|
version_val.version = app_state.version().value();
|
||||||
}
|
val.application_state.push(version_val);
|
||||||
res.emplace_back(std::move(val));
|
}
|
||||||
|
res.emplace_back(std::move(val));
|
||||||
|
});
|
||||||
|
return make_ready_future<json::json_return_type>(res);
|
||||||
});
|
});
|
||||||
return make_ready_future<json::json_return_type>(res);
|
|
||||||
});
|
});
|
||||||
|
|
||||||
fd::get_up_endpoint_count.set(r, [&g](std::unique_ptr<request> req) {
|
fd::get_up_endpoint_count.set(r, [&g](std::unique_ptr<request> req) {
|
||||||
int res = g.get_up_endpoint_count();
|
return g.container().invoke_on(0, [] (gms::gossiper& g) {
|
||||||
return make_ready_future<json::json_return_type>(res);
|
int res = g.get_up_endpoint_count();
|
||||||
|
return make_ready_future<json::json_return_type>(res);
|
||||||
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
fd::get_down_endpoint_count.set(r, [&g](std::unique_ptr<request> req) {
|
fd::get_down_endpoint_count.set(r, [&g](std::unique_ptr<request> req) {
|
||||||
int res = g.get_down_endpoint_count();
|
return g.container().invoke_on(0, [] (gms::gossiper& g) {
|
||||||
return make_ready_future<json::json_return_type>(res);
|
int res = g.get_down_endpoint_count();
|
||||||
|
return make_ready_future<json::json_return_type>(res);
|
||||||
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
fd::get_phi_convict_threshold.set(r, [] (std::unique_ptr<request> req) {
|
fd::get_phi_convict_threshold.set(r, [] (std::unique_ptr<request> req) {
|
||||||
@@ -56,11 +62,13 @@ void set_failure_detector(http_context& ctx, routes& r, gms::gossiper& g) {
|
|||||||
});
|
});
|
||||||
|
|
||||||
fd::get_simple_states.set(r, [&g] (std::unique_ptr<request> req) {
|
fd::get_simple_states.set(r, [&g] (std::unique_ptr<request> req) {
|
||||||
std::map<sstring, sstring> nodes_status;
|
return g.container().invoke_on(0, [] (gms::gossiper& g) {
|
||||||
g.for_each_endpoint_state([&] (const gms::inet_address& node, const gms::endpoint_state&) {
|
std::map<sstring, sstring> nodes_status;
|
||||||
nodes_status.emplace(node.to_sstring(), g.is_alive(node) ? "UP" : "DOWN");
|
g.for_each_endpoint_state([&] (const gms::inet_address& node, const gms::endpoint_state&) {
|
||||||
|
nodes_status.emplace(node.to_sstring(), g.is_alive(node) ? "UP" : "DOWN");
|
||||||
|
});
|
||||||
|
return make_ready_future<json::json_return_type>(map_to_key_value<fd::mapper>(nodes_status));
|
||||||
});
|
});
|
||||||
return make_ready_future<json::json_return_type>(map_to_key_value<fd::mapper>(nodes_status));
|
|
||||||
});
|
});
|
||||||
|
|
||||||
fd::set_phi_convict_threshold.set(r, [](std::unique_ptr<request> req) {
|
fd::set_phi_convict_threshold.set(r, [](std::unique_ptr<request> req) {
|
||||||
@@ -71,13 +79,15 @@ void set_failure_detector(http_context& ctx, routes& r, gms::gossiper& g) {
|
|||||||
});
|
});
|
||||||
|
|
||||||
fd::get_endpoint_state.set(r, [&g] (std::unique_ptr<request> req) {
|
fd::get_endpoint_state.set(r, [&g] (std::unique_ptr<request> req) {
|
||||||
auto state = g.get_endpoint_state_ptr(gms::inet_address(req->param["addr"]));
|
return g.container().invoke_on(0, [req = std::move(req)] (gms::gossiper& g) {
|
||||||
if (!state) {
|
auto state = g.get_endpoint_state_ptr(gms::inet_address(req->get_path_param("addr")));
|
||||||
return make_ready_future<json::json_return_type>(format("unknown endpoint {}", req->param["addr"]));
|
if (!state) {
|
||||||
}
|
return make_ready_future<json::json_return_type>(format("unknown endpoint {}", req->get_path_param("addr")));
|
||||||
std::stringstream ss;
|
}
|
||||||
g.append_endpoint_state(ss, *state);
|
std::stringstream ss;
|
||||||
return make_ready_future<json::json_return_type>(sstring(ss.str()));
|
g.append_endpoint_state(ss, *state);
|
||||||
|
return make_ready_future<json::json_return_type>(sstring(ss.str()));
|
||||||
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
fd::get_endpoint_phi_values.set(r, [](std::unique_ptr<request> req) {
|
fd::get_endpoint_phi_values.set(r, [](std::unique_ptr<request> req) {
|
||||||
|
|||||||
@@ -31,21 +31,21 @@ void set_gossiper(http_context& ctx, routes& r, gms::gossiper& g) {
|
|||||||
});
|
});
|
||||||
|
|
||||||
httpd::gossiper_json::get_endpoint_downtime.set(r, [&g] (std::unique_ptr<request> req) -> future<json::json_return_type> {
|
httpd::gossiper_json::get_endpoint_downtime.set(r, [&g] (std::unique_ptr<request> req) -> future<json::json_return_type> {
|
||||||
gms::inet_address ep(req->param["addr"]);
|
gms::inet_address ep(req->get_path_param("addr"));
|
||||||
// synchronize unreachable_members on all shards
|
// synchronize unreachable_members on all shards
|
||||||
co_await g.get_unreachable_members_synchronized();
|
co_await g.get_unreachable_members_synchronized();
|
||||||
co_return g.get_endpoint_downtime(ep);
|
co_return g.get_endpoint_downtime(ep);
|
||||||
});
|
});
|
||||||
|
|
||||||
httpd::gossiper_json::get_current_generation_number.set(r, [&g] (std::unique_ptr<http::request> req) {
|
httpd::gossiper_json::get_current_generation_number.set(r, [&g] (std::unique_ptr<http::request> req) {
|
||||||
gms::inet_address ep(req->param["addr"]);
|
gms::inet_address ep(req->get_path_param("addr"));
|
||||||
return g.get_current_generation_number(ep).then([] (gms::generation_type res) {
|
return g.get_current_generation_number(ep).then([] (gms::generation_type res) {
|
||||||
return make_ready_future<json::json_return_type>(res.value());
|
return make_ready_future<json::json_return_type>(res.value());
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
httpd::gossiper_json::get_current_heart_beat_version.set(r, [&g] (std::unique_ptr<http::request> req) {
|
httpd::gossiper_json::get_current_heart_beat_version.set(r, [&g] (std::unique_ptr<http::request> req) {
|
||||||
gms::inet_address ep(req->param["addr"]);
|
gms::inet_address ep(req->get_path_param("addr"));
|
||||||
return g.get_current_heart_beat_version(ep).then([] (gms::version_type res) {
|
return g.get_current_heart_beat_version(ep).then([] (gms::version_type res) {
|
||||||
return make_ready_future<json::json_return_type>(res.value());
|
return make_ready_future<json::json_return_type>(res.value());
|
||||||
});
|
});
|
||||||
@@ -53,17 +53,17 @@ void set_gossiper(http_context& ctx, routes& r, gms::gossiper& g) {
|
|||||||
|
|
||||||
httpd::gossiper_json::assassinate_endpoint.set(r, [&g](std::unique_ptr<http::request> req) {
|
httpd::gossiper_json::assassinate_endpoint.set(r, [&g](std::unique_ptr<http::request> req) {
|
||||||
if (req->get_query_param("unsafe") != "True") {
|
if (req->get_query_param("unsafe") != "True") {
|
||||||
return g.assassinate_endpoint(req->param["addr"]).then([] {
|
return g.assassinate_endpoint(req->get_path_param("addr")).then([] {
|
||||||
return make_ready_future<json::json_return_type>(json_void());
|
return make_ready_future<json::json_return_type>(json_void());
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
return g.unsafe_assassinate_endpoint(req->param["addr"]).then([] {
|
return g.unsafe_assassinate_endpoint(req->get_path_param("addr")).then([] {
|
||||||
return make_ready_future<json::json_return_type>(json_void());
|
return make_ready_future<json::json_return_type>(json_void());
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
httpd::gossiper_json::force_remove_endpoint.set(r, [&g](std::unique_ptr<http::request> req) {
|
httpd::gossiper_json::force_remove_endpoint.set(r, [&g](std::unique_ptr<http::request> req) {
|
||||||
gms::inet_address ep(req->param["addr"]);
|
gms::inet_address ep(req->get_path_param("addr"));
|
||||||
return g.force_remove_endpoint(ep, gms::null_permit_id).then([] {
|
return g.force_remove_endpoint(ep, gms::null_permit_id).then([] {
|
||||||
return make_ready_future<json::json_return_type>(json_void());
|
return make_ready_future<json::json_return_type>(json_void());
|
||||||
});
|
});
|
||||||
|
|||||||
70
api/raft.cc
Normal file
70
api/raft.cc
Normal file
@@ -0,0 +1,70 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (C) 2024-present ScyllaDB
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
* SPDX-License-Identifier: AGPL-3.0-or-later
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <seastar/core/coroutine.hh>
|
||||||
|
|
||||||
|
#include "api/api.hh"
|
||||||
|
#include "api/api-doc/raft.json.hh"
|
||||||
|
|
||||||
|
#include "service/raft/raft_group_registry.hh"
|
||||||
|
|
||||||
|
using namespace seastar::httpd;
|
||||||
|
|
||||||
|
extern logging::logger apilog;
|
||||||
|
|
||||||
|
namespace api {
|
||||||
|
|
||||||
|
namespace r = httpd::raft_json;
|
||||||
|
using namespace json;
|
||||||
|
|
||||||
|
void set_raft(http_context&, httpd::routes& r, sharded<service::raft_group_registry>& raft_gr) {
|
||||||
|
r::trigger_snapshot.set(r, [&raft_gr] (std::unique_ptr<http::request> req) -> future<json_return_type> {
|
||||||
|
raft::group_id gid{utils::UUID{req->get_path_param("group_id")}};
|
||||||
|
auto timeout_dur = std::invoke([timeout_str = req->get_query_param("timeout")] {
|
||||||
|
if (timeout_str.empty()) {
|
||||||
|
return std::chrono::seconds{60};
|
||||||
|
}
|
||||||
|
auto dur = std::stoll(timeout_str);
|
||||||
|
if (dur <= 0) {
|
||||||
|
throw std::runtime_error{"Timeout must be a positive number."};
|
||||||
|
}
|
||||||
|
return std::chrono::seconds{dur};
|
||||||
|
});
|
||||||
|
|
||||||
|
std::atomic<bool> found_srv{false};
|
||||||
|
co_await raft_gr.invoke_on_all([gid, timeout_dur, &found_srv] (service::raft_group_registry& raft_gr) -> future<> {
|
||||||
|
auto* srv = raft_gr.find_server(gid);
|
||||||
|
if (!srv) {
|
||||||
|
co_return;
|
||||||
|
}
|
||||||
|
|
||||||
|
found_srv = true;
|
||||||
|
abort_on_expiry aoe(lowres_clock::now() + timeout_dur);
|
||||||
|
apilog.info("Triggering Raft group {} snapshot", gid);
|
||||||
|
auto result = co_await srv->trigger_snapshot(&aoe.abort_source());
|
||||||
|
if (result) {
|
||||||
|
apilog.info("New snapshot for Raft group {} created", gid);
|
||||||
|
} else {
|
||||||
|
apilog.info("Could not create new snapshot for Raft group {}, no new entries applied", gid);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
if (!found_srv) {
|
||||||
|
throw std::runtime_error{fmt::format("Server for group ID {} not found", gid)};
|
||||||
|
}
|
||||||
|
|
||||||
|
co_return json_void{};
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
void unset_raft(http_context&, httpd::routes& r) {
|
||||||
|
r::trigger_snapshot.unset(r);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
18
api/raft.hh
Normal file
18
api/raft.hh
Normal file
@@ -0,0 +1,18 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (C) 2023-present ScyllaDB
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
* SPDX-License-Identifier: AGPL-3.0-or-later
|
||||||
|
*/
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "api_init.hh"
|
||||||
|
|
||||||
|
namespace api {
|
||||||
|
|
||||||
|
void set_raft(http_context& ctx, httpd::routes& r, sharded<service::raft_group_registry>& raft_gr);
|
||||||
|
void unset_raft(http_context& ctx, httpd::routes& r);
|
||||||
|
|
||||||
|
}
|
||||||
@@ -58,15 +58,19 @@ namespace ss = httpd::storage_service_json;
|
|||||||
namespace sp = httpd::storage_proxy_json;
|
namespace sp = httpd::storage_proxy_json;
|
||||||
using namespace json;
|
using namespace json;
|
||||||
|
|
||||||
sstring validate_keyspace(http_context& ctx, sstring ks_name) {
|
sstring validate_keyspace(const http_context& ctx, sstring ks_name) {
|
||||||
if (ctx.db.local().has_keyspace(ks_name)) {
|
if (ctx.db.local().has_keyspace(ks_name)) {
|
||||||
return ks_name;
|
return ks_name;
|
||||||
}
|
}
|
||||||
throw bad_param_exception(replica::no_such_keyspace(ks_name).what());
|
throw bad_param_exception(replica::no_such_keyspace(ks_name).what());
|
||||||
}
|
}
|
||||||
|
|
||||||
sstring validate_keyspace(http_context& ctx, const parameters& param) {
|
sstring validate_keyspace(const http_context& ctx, const std::unique_ptr<http::request>& req) {
|
||||||
return validate_keyspace(ctx, param["keyspace"]);
|
return validate_keyspace(ctx, req->get_path_param("keyspace"));
|
||||||
|
}
|
||||||
|
|
||||||
|
sstring validate_keyspace(const http_context& ctx, const http::request& req) {
|
||||||
|
return validate_keyspace(ctx, req.get_path_param("keyspace"));
|
||||||
}
|
}
|
||||||
|
|
||||||
locator::host_id validate_host_id(const sstring& param) {
|
locator::host_id validate_host_id(const sstring& param) {
|
||||||
@@ -171,7 +175,7 @@ using ks_cf_func = std::function<future<json::json_return_type>(http_context&, s
|
|||||||
|
|
||||||
static auto wrap_ks_cf(http_context &ctx, ks_cf_func f) {
|
static auto wrap_ks_cf(http_context &ctx, ks_cf_func f) {
|
||||||
return [&ctx, f = std::move(f)](std::unique_ptr<http::request> req) {
|
return [&ctx, f = std::move(f)](std::unique_ptr<http::request> req) {
|
||||||
auto keyspace = validate_keyspace(ctx, req->param);
|
auto keyspace = validate_keyspace(ctx, req);
|
||||||
auto table_infos = parse_table_infos(keyspace, ctx, req->query_parameters, "cf");
|
auto table_infos = parse_table_infos(keyspace, ctx, req->query_parameters, "cf");
|
||||||
return f(ctx, std::move(req), std::move(keyspace), std::move(table_infos));
|
return f(ctx, std::move(req), std::move(keyspace), std::move(table_infos));
|
||||||
};
|
};
|
||||||
@@ -250,17 +254,21 @@ future<json::json_return_type> set_tables_tombstone_gc(http_context& ctx, const
|
|||||||
}
|
}
|
||||||
|
|
||||||
void set_transport_controller(http_context& ctx, routes& r, cql_transport::controller& ctl) {
|
void set_transport_controller(http_context& ctx, routes& r, cql_transport::controller& ctl) {
|
||||||
ss::start_native_transport.set(r, [&ctl](std::unique_ptr<http::request> req) {
|
ss::start_native_transport.set(r, [&ctx, &ctl](std::unique_ptr<http::request> req) {
|
||||||
return smp::submit_to(0, [&] {
|
return smp::submit_to(0, [&] {
|
||||||
return ctl.start_server();
|
return with_scheduling_group(ctx.db.local().get_statement_scheduling_group(), [&ctl] {
|
||||||
|
return ctl.start_server();
|
||||||
|
});
|
||||||
}).then([] {
|
}).then([] {
|
||||||
return make_ready_future<json::json_return_type>(json_void());
|
return make_ready_future<json::json_return_type>(json_void());
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
ss::stop_native_transport.set(r, [&ctl](std::unique_ptr<http::request> req) {
|
ss::stop_native_transport.set(r, [&ctx, &ctl](std::unique_ptr<http::request> req) {
|
||||||
return smp::submit_to(0, [&] {
|
return smp::submit_to(0, [&] {
|
||||||
return ctl.request_stop_server();
|
return with_scheduling_group(ctx.db.local().get_statement_scheduling_group(), [&ctl] {
|
||||||
|
return ctl.request_stop_server();
|
||||||
|
});
|
||||||
}).then([] {
|
}).then([] {
|
||||||
return make_ready_future<json::json_return_type>(json_void());
|
return make_ready_future<json::json_return_type>(json_void());
|
||||||
});
|
});
|
||||||
@@ -282,17 +290,21 @@ void unset_transport_controller(http_context& ctx, routes& r) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void set_rpc_controller(http_context& ctx, routes& r, thrift_controller& ctl) {
|
void set_rpc_controller(http_context& ctx, routes& r, thrift_controller& ctl) {
|
||||||
ss::stop_rpc_server.set(r, [&ctl](std::unique_ptr<http::request> req) {
|
ss::stop_rpc_server.set(r, [&ctx, &ctl](std::unique_ptr<http::request> req) {
|
||||||
return smp::submit_to(0, [&] {
|
return smp::submit_to(0, [&] {
|
||||||
return ctl.request_stop_server();
|
return with_scheduling_group(ctx.db.local().get_statement_scheduling_group(), [&ctl] {
|
||||||
|
return ctl.request_stop_server();
|
||||||
|
});
|
||||||
}).then([] {
|
}).then([] {
|
||||||
return make_ready_future<json::json_return_type>(json_void());
|
return make_ready_future<json::json_return_type>(json_void());
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
ss::start_rpc_server.set(r, [&ctl](std::unique_ptr<http::request> req) {
|
ss::start_rpc_server.set(r, [&ctx, &ctl](std::unique_ptr<http::request> req) {
|
||||||
return smp::submit_to(0, [&] {
|
return smp::submit_to(0, [&] {
|
||||||
return ctl.start_server();
|
return with_scheduling_group(ctx.db.local().get_statement_scheduling_group(), [&ctl] {
|
||||||
|
return ctl.start_server();
|
||||||
|
});
|
||||||
}).then([] {
|
}).then([] {
|
||||||
return make_ready_future<json::json_return_type>(json_void());
|
return make_ready_future<json::json_return_type>(json_void());
|
||||||
});
|
});
|
||||||
@@ -330,7 +342,7 @@ void set_repair(http_context& ctx, routes& r, sharded<repair_service>& repair) {
|
|||||||
// returns immediately, not waiting for the repair to finish. The user
|
// returns immediately, not waiting for the repair to finish. The user
|
||||||
// then has other mechanisms to track the ongoing repair's progress,
|
// then has other mechanisms to track the ongoing repair's progress,
|
||||||
// or stop it.
|
// or stop it.
|
||||||
return repair_start(repair, validate_keyspace(ctx, req->param),
|
return repair_start(repair, validate_keyspace(ctx, req),
|
||||||
options_map).then([] (int i) {
|
options_map).then([] (int i) {
|
||||||
return make_ready_future<json::json_return_type>(i);
|
return make_ready_future<json::json_return_type>(i);
|
||||||
});
|
});
|
||||||
@@ -413,7 +425,7 @@ void unset_repair(http_context& ctx, routes& r) {
|
|||||||
|
|
||||||
void set_sstables_loader(http_context& ctx, routes& r, sharded<sstables_loader>& sst_loader) {
|
void set_sstables_loader(http_context& ctx, routes& r, sharded<sstables_loader>& sst_loader) {
|
||||||
ss::load_new_ss_tables.set(r, [&ctx, &sst_loader](std::unique_ptr<http::request> req) {
|
ss::load_new_ss_tables.set(r, [&ctx, &sst_loader](std::unique_ptr<http::request> req) {
|
||||||
auto ks = validate_keyspace(ctx, req->param);
|
auto ks = validate_keyspace(ctx, req);
|
||||||
auto cf = req->get_query_param("cf");
|
auto cf = req->get_query_param("cf");
|
||||||
auto stream = req->get_query_param("load_and_stream");
|
auto stream = req->get_query_param("load_and_stream");
|
||||||
auto primary_replica = req->get_query_param("primary_replica_only");
|
auto primary_replica = req->get_query_param("primary_replica_only");
|
||||||
@@ -444,8 +456,8 @@ void unset_sstables_loader(http_context& ctx, routes& r) {
|
|||||||
|
|
||||||
void set_view_builder(http_context& ctx, routes& r, sharded<db::view::view_builder>& vb) {
|
void set_view_builder(http_context& ctx, routes& r, sharded<db::view::view_builder>& vb) {
|
||||||
ss::view_build_statuses.set(r, [&ctx, &vb] (std::unique_ptr<http::request> req) {
|
ss::view_build_statuses.set(r, [&ctx, &vb] (std::unique_ptr<http::request> req) {
|
||||||
auto keyspace = validate_keyspace(ctx, req->param);
|
auto keyspace = validate_keyspace(ctx, req);
|
||||||
auto view = req->param["view"];
|
auto view = req->get_path_param("view");
|
||||||
return vb.local().view_build_statuses(std::move(keyspace), std::move(view)).then([] (std::unordered_map<sstring, sstring> status) {
|
return vb.local().view_build_statuses(std::move(keyspace), std::move(view)).then([] (std::unordered_map<sstring, sstring> status) {
|
||||||
std::vector<storage_service_json::mapper> res;
|
std::vector<storage_service_json::mapper> res;
|
||||||
return make_ready_future<json::json_return_type>(map_to_key_value(std::move(status), res));
|
return make_ready_future<json::json_return_type>(map_to_key_value(std::move(status), res));
|
||||||
@@ -582,7 +594,7 @@ void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_
|
|||||||
});
|
});
|
||||||
|
|
||||||
ss::get_range_to_endpoint_map.set(r, [&ctx, &ss](std::unique_ptr<http::request> req) -> future<json::json_return_type> {
|
ss::get_range_to_endpoint_map.set(r, [&ctx, &ss](std::unique_ptr<http::request> req) -> future<json::json_return_type> {
|
||||||
auto keyspace = validate_keyspace(ctx, req->param);
|
auto keyspace = validate_keyspace(ctx, req);
|
||||||
std::vector<ss::maplist_mapper> res;
|
std::vector<ss::maplist_mapper> res;
|
||||||
co_return stream_range_as_array(co_await ss.local().get_range_to_address_map(keyspace),
|
co_return stream_range_as_array(co_await ss.local().get_range_to_address_map(keyspace),
|
||||||
[](const std::pair<dht::token_range, inet_address_vector_replica_set>& entry){
|
[](const std::pair<dht::token_range, inet_address_vector_replica_set>& entry){
|
||||||
@@ -607,7 +619,7 @@ void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_
|
|||||||
ss::get_pending_range_to_endpoint_map.set(r, [&ctx](std::unique_ptr<http::request> req) {
|
ss::get_pending_range_to_endpoint_map.set(r, [&ctx](std::unique_ptr<http::request> req) {
|
||||||
//TBD
|
//TBD
|
||||||
unimplemented();
|
unimplemented();
|
||||||
auto keyspace = validate_keyspace(ctx, req->param);
|
auto keyspace = validate_keyspace(ctx, req);
|
||||||
std::vector<ss::maplist_mapper> res;
|
std::vector<ss::maplist_mapper> res;
|
||||||
return make_ready_future<json::json_return_type>(res);
|
return make_ready_future<json::json_return_type>(res);
|
||||||
});
|
});
|
||||||
@@ -623,7 +635,7 @@ void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_
|
|||||||
});
|
});
|
||||||
|
|
||||||
ss::describe_ring.set(r, [&ctx, &ss](std::unique_ptr<http::request> req) {
|
ss::describe_ring.set(r, [&ctx, &ss](std::unique_ptr<http::request> req) {
|
||||||
return describe_ring_as_json(ss, validate_keyspace(ctx, req->param));
|
return describe_ring_as_json(ss, validate_keyspace(ctx, req));
|
||||||
});
|
});
|
||||||
|
|
||||||
ss::get_host_id_map.set(r, [&ss](const_req req) {
|
ss::get_host_id_map.set(r, [&ss](const_req req) {
|
||||||
@@ -656,7 +668,7 @@ void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_
|
|||||||
});
|
});
|
||||||
|
|
||||||
ss::get_natural_endpoints.set(r, [&ctx, &ss](const_req req) {
|
ss::get_natural_endpoints.set(r, [&ctx, &ss](const_req req) {
|
||||||
auto keyspace = validate_keyspace(ctx, req.param);
|
auto keyspace = validate_keyspace(ctx, req);
|
||||||
return container_to_vec(ss.local().get_natural_endpoints(keyspace, req.get_query_param("cf"),
|
return container_to_vec(ss.local().get_natural_endpoints(keyspace, req.get_query_param("cf"),
|
||||||
req.get_query_param("key")));
|
req.get_query_param("key")));
|
||||||
});
|
});
|
||||||
@@ -669,14 +681,50 @@ void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_
|
|||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
ss::force_keyspace_compaction.set(r, [&ctx](std::unique_ptr<http::request> req) -> future<json::json_return_type> {
|
ss::force_compaction.set(r, [&ctx](std::unique_ptr<http::request> req) -> future<json::json_return_type> {
|
||||||
auto& db = ctx.db;
|
auto& db = ctx.db;
|
||||||
auto keyspace = validate_keyspace(ctx, req->param);
|
auto params = req_params({
|
||||||
auto table_infos = parse_table_infos(keyspace, ctx, req->query_parameters, "cf");
|
std::pair("flush_memtables", mandatory::no),
|
||||||
apilog.debug("force_keyspace_compaction: keyspace={} tables={}", keyspace, table_infos);
|
});
|
||||||
|
params.process(*req);
|
||||||
|
auto flush = params.get_as<bool>("flush_memtables").value_or(true);
|
||||||
|
apilog.info("force_compaction: flush={}", flush);
|
||||||
|
|
||||||
auto& compaction_module = db.local().get_compaction_manager().get_task_manager_module();
|
auto& compaction_module = db.local().get_compaction_manager().get_task_manager_module();
|
||||||
auto task = co_await compaction_module.make_and_start_task<major_keyspace_compaction_task_impl>({}, std::move(keyspace), db, table_infos);
|
std::optional<major_compaction_task_impl::flush_mode> fmopt;
|
||||||
|
if (!flush) {
|
||||||
|
fmopt = major_compaction_task_impl::flush_mode::skip;
|
||||||
|
}
|
||||||
|
auto task = co_await compaction_module.make_and_start_task<global_major_compaction_task_impl>({}, db, fmopt);
|
||||||
|
try {
|
||||||
|
co_await task->done();
|
||||||
|
} catch (...) {
|
||||||
|
apilog.error("force_compaction failed: {}", std::current_exception());
|
||||||
|
throw;
|
||||||
|
}
|
||||||
|
|
||||||
|
co_return json_void();
|
||||||
|
});
|
||||||
|
|
||||||
|
ss::force_keyspace_compaction.set(r, [&ctx](std::unique_ptr<http::request> req) -> future<json::json_return_type> {
|
||||||
|
auto& db = ctx.db;
|
||||||
|
auto params = req_params({
|
||||||
|
std::pair("keyspace", mandatory::yes),
|
||||||
|
std::pair("cf", mandatory::no),
|
||||||
|
std::pair("flush_memtables", mandatory::no),
|
||||||
|
});
|
||||||
|
params.process(*req);
|
||||||
|
auto keyspace = validate_keyspace(ctx, *params.get("keyspace"));
|
||||||
|
auto table_infos = parse_table_infos(keyspace, ctx, params.get("cf").value_or(""));
|
||||||
|
auto flush = params.get_as<bool>("flush_memtables").value_or(true);
|
||||||
|
apilog.debug("force_keyspace_compaction: keyspace={} tables={}, flush={}", keyspace, table_infos, flush);
|
||||||
|
|
||||||
|
auto& compaction_module = db.local().get_compaction_manager().get_task_manager_module();
|
||||||
|
std::optional<major_compaction_task_impl::flush_mode> fmopt;
|
||||||
|
if (!flush) {
|
||||||
|
fmopt = major_compaction_task_impl::flush_mode::skip;
|
||||||
|
}
|
||||||
|
auto task = co_await compaction_module.make_and_start_task<major_keyspace_compaction_task_impl>({}, std::move(keyspace), tasks::task_id::create_null_id(), db, table_infos, fmopt);
|
||||||
try {
|
try {
|
||||||
co_await task->done();
|
co_await task->done();
|
||||||
} catch (...) {
|
} catch (...) {
|
||||||
@@ -689,7 +737,7 @@ void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_
|
|||||||
|
|
||||||
ss::force_keyspace_cleanup.set(r, [&ctx, &ss](std::unique_ptr<http::request> req) -> future<json::json_return_type> {
|
ss::force_keyspace_cleanup.set(r, [&ctx, &ss](std::unique_ptr<http::request> req) -> future<json::json_return_type> {
|
||||||
auto& db = ctx.db;
|
auto& db = ctx.db;
|
||||||
auto keyspace = validate_keyspace(ctx, req->param);
|
auto keyspace = validate_keyspace(ctx, req);
|
||||||
auto table_infos = parse_table_infos(keyspace, ctx, req->query_parameters, "cf");
|
auto table_infos = parse_table_infos(keyspace, ctx, req->query_parameters, "cf");
|
||||||
apilog.info("force_keyspace_cleanup: keyspace={} tables={}", keyspace, table_infos);
|
apilog.info("force_keyspace_cleanup: keyspace={} tables={}", keyspace, table_infos);
|
||||||
if (!co_await ss.local().is_cleanup_allowed(keyspace)) {
|
if (!co_await ss.local().is_cleanup_allowed(keyspace)) {
|
||||||
@@ -743,8 +791,16 @@ void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_
|
|||||||
co_return json::json_return_type(0);
|
co_return json::json_return_type(0);
|
||||||
}));
|
}));
|
||||||
|
|
||||||
|
ss::force_flush.set(r, [&ctx](std::unique_ptr<http::request> req) -> future<json::json_return_type> {
|
||||||
|
apilog.info("flush all tables");
|
||||||
|
co_await ctx.db.invoke_on_all([] (replica::database& db) {
|
||||||
|
return db.flush_all_tables();
|
||||||
|
});
|
||||||
|
co_return json_void();
|
||||||
|
});
|
||||||
|
|
||||||
ss::force_keyspace_flush.set(r, [&ctx](std::unique_ptr<http::request> req) -> future<json::json_return_type> {
|
ss::force_keyspace_flush.set(r, [&ctx](std::unique_ptr<http::request> req) -> future<json::json_return_type> {
|
||||||
auto keyspace = validate_keyspace(ctx, req->param);
|
auto keyspace = validate_keyspace(ctx, req);
|
||||||
auto column_families = parse_tables(keyspace, ctx, req->query_parameters, "cf");
|
auto column_families = parse_tables(keyspace, ctx, req->query_parameters, "cf");
|
||||||
apilog.info("perform_keyspace_flush: keyspace={} tables={}", keyspace, column_families);
|
apilog.info("perform_keyspace_flush: keyspace={} tables={}", keyspace, column_families);
|
||||||
auto& db = ctx.db;
|
auto& db = ctx.db;
|
||||||
@@ -853,7 +909,7 @@ void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_
|
|||||||
ss::truncate.set(r, [&ctx](std::unique_ptr<http::request> req) {
|
ss::truncate.set(r, [&ctx](std::unique_ptr<http::request> req) {
|
||||||
//TBD
|
//TBD
|
||||||
unimplemented();
|
unimplemented();
|
||||||
auto keyspace = validate_keyspace(ctx, req->param);
|
auto keyspace = validate_keyspace(ctx, req);
|
||||||
auto column_family = req->get_query_param("cf");
|
auto column_family = req->get_query_param("cf");
|
||||||
return make_ready_future<json::json_return_type>(json_void());
|
return make_ready_future<json::json_return_type>(json_void());
|
||||||
});
|
});
|
||||||
@@ -987,14 +1043,14 @@ void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_
|
|||||||
ss::bulk_load.set(r, [](std::unique_ptr<http::request> req) {
|
ss::bulk_load.set(r, [](std::unique_ptr<http::request> req) {
|
||||||
//TBD
|
//TBD
|
||||||
unimplemented();
|
unimplemented();
|
||||||
auto path = req->param["path"];
|
auto path = req->get_path_param("path");
|
||||||
return make_ready_future<json::json_return_type>(json_void());
|
return make_ready_future<json::json_return_type>(json_void());
|
||||||
});
|
});
|
||||||
|
|
||||||
ss::bulk_load_async.set(r, [](std::unique_ptr<http::request> req) {
|
ss::bulk_load_async.set(r, [](std::unique_ptr<http::request> req) {
|
||||||
//TBD
|
//TBD
|
||||||
unimplemented();
|
unimplemented();
|
||||||
auto path = req->param["path"];
|
auto path = req->get_path_param("path");
|
||||||
return make_ready_future<json::json_return_type>(json_void());
|
return make_ready_future<json::json_return_type>(json_void());
|
||||||
});
|
});
|
||||||
|
|
||||||
@@ -1082,7 +1138,7 @@ void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_
|
|||||||
});
|
});
|
||||||
|
|
||||||
ss::enable_auto_compaction.set(r, [&ctx](std::unique_ptr<http::request> req) {
|
ss::enable_auto_compaction.set(r, [&ctx](std::unique_ptr<http::request> req) {
|
||||||
auto keyspace = validate_keyspace(ctx, req->param);
|
auto keyspace = validate_keyspace(ctx, req);
|
||||||
auto tables = parse_tables(keyspace, ctx, req->query_parameters, "cf");
|
auto tables = parse_tables(keyspace, ctx, req->query_parameters, "cf");
|
||||||
|
|
||||||
apilog.info("enable_auto_compaction: keyspace={} tables={}", keyspace, tables);
|
apilog.info("enable_auto_compaction: keyspace={} tables={}", keyspace, tables);
|
||||||
@@ -1090,7 +1146,7 @@ void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_
|
|||||||
});
|
});
|
||||||
|
|
||||||
ss::disable_auto_compaction.set(r, [&ctx](std::unique_ptr<http::request> req) {
|
ss::disable_auto_compaction.set(r, [&ctx](std::unique_ptr<http::request> req) {
|
||||||
auto keyspace = validate_keyspace(ctx, req->param);
|
auto keyspace = validate_keyspace(ctx, req);
|
||||||
auto tables = parse_tables(keyspace, ctx, req->query_parameters, "cf");
|
auto tables = parse_tables(keyspace, ctx, req->query_parameters, "cf");
|
||||||
|
|
||||||
apilog.info("disable_auto_compaction: keyspace={} tables={}", keyspace, tables);
|
apilog.info("disable_auto_compaction: keyspace={} tables={}", keyspace, tables);
|
||||||
@@ -1098,7 +1154,7 @@ void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_
|
|||||||
});
|
});
|
||||||
|
|
||||||
ss::enable_tombstone_gc.set(r, [&ctx](std::unique_ptr<http::request> req) {
|
ss::enable_tombstone_gc.set(r, [&ctx](std::unique_ptr<http::request> req) {
|
||||||
auto keyspace = validate_keyspace(ctx, req->param);
|
auto keyspace = validate_keyspace(ctx, req);
|
||||||
auto tables = parse_tables(keyspace, ctx, req->query_parameters, "cf");
|
auto tables = parse_tables(keyspace, ctx, req->query_parameters, "cf");
|
||||||
|
|
||||||
apilog.info("enable_tombstone_gc: keyspace={} tables={}", keyspace, tables);
|
apilog.info("enable_tombstone_gc: keyspace={} tables={}", keyspace, tables);
|
||||||
@@ -1106,7 +1162,7 @@ void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_
|
|||||||
});
|
});
|
||||||
|
|
||||||
ss::disable_tombstone_gc.set(r, [&ctx](std::unique_ptr<http::request> req) {
|
ss::disable_tombstone_gc.set(r, [&ctx](std::unique_ptr<http::request> req) {
|
||||||
auto keyspace = validate_keyspace(ctx, req->param);
|
auto keyspace = validate_keyspace(ctx, req);
|
||||||
auto tables = parse_tables(keyspace, ctx, req->query_parameters, "cf");
|
auto tables = parse_tables(keyspace, ctx, req->query_parameters, "cf");
|
||||||
|
|
||||||
apilog.info("disable_tombstone_gc: keyspace={} tables={}", keyspace, tables);
|
apilog.info("disable_tombstone_gc: keyspace={} tables={}", keyspace, tables);
|
||||||
@@ -1202,7 +1258,7 @@ void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_
|
|||||||
});
|
});
|
||||||
|
|
||||||
ss::get_effective_ownership.set(r, [&ctx, &ss] (std::unique_ptr<http::request> req) {
|
ss::get_effective_ownership.set(r, [&ctx, &ss] (std::unique_ptr<http::request> req) {
|
||||||
auto keyspace_name = req->param["keyspace"] == "null" ? "" : validate_keyspace(ctx, req->param);
|
auto keyspace_name = req->get_path_param("keyspace") == "null" ? "" : validate_keyspace(ctx, req);
|
||||||
return ss.local().effective_ownership(keyspace_name).then([] (auto&& ownership) {
|
return ss.local().effective_ownership(keyspace_name).then([] (auto&& ownership) {
|
||||||
std::vector<storage_service_json::mapper> res;
|
std::vector<storage_service_json::mapper> res;
|
||||||
return make_ready_future<json::json_return_type>(map_to_key_value(ownership, res));
|
return make_ready_future<json::json_return_type>(map_to_key_value(ownership, res));
|
||||||
@@ -1387,10 +1443,12 @@ void unset_storage_service(http_context& ctx, routes& r) {
|
|||||||
ss::get_current_generation_number.unset(r);
|
ss::get_current_generation_number.unset(r);
|
||||||
ss::get_natural_endpoints.unset(r);
|
ss::get_natural_endpoints.unset(r);
|
||||||
ss::cdc_streams_check_and_repair.unset(r);
|
ss::cdc_streams_check_and_repair.unset(r);
|
||||||
|
ss::force_compaction.unset(r);
|
||||||
ss::force_keyspace_compaction.unset(r);
|
ss::force_keyspace_compaction.unset(r);
|
||||||
ss::force_keyspace_cleanup.unset(r);
|
ss::force_keyspace_cleanup.unset(r);
|
||||||
ss::perform_keyspace_offstrategy_compaction.unset(r);
|
ss::perform_keyspace_offstrategy_compaction.unset(r);
|
||||||
ss::upgrade_sstables.unset(r);
|
ss::upgrade_sstables.unset(r);
|
||||||
|
ss::force_flush.unset(r);
|
||||||
ss::force_keyspace_flush.unset(r);
|
ss::force_keyspace_flush.unset(r);
|
||||||
ss::decommission.unset(r);
|
ss::decommission.unset(r);
|
||||||
ss::move.unset(r);
|
ss::move.unset(r);
|
||||||
@@ -1488,8 +1546,10 @@ void set_snapshot(http_context& ctx, routes& r, sharded<db::snapshot_ctl>& snap_
|
|||||||
});
|
});
|
||||||
}).then([&s] {
|
}).then([&s] {
|
||||||
return s.write("]").then([&s] {
|
return s.write("]").then([&s] {
|
||||||
return s.close();
|
return s.flush();
|
||||||
});
|
});
|
||||||
|
}).finally([&s] {
|
||||||
|
return s.close();
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
};
|
};
|
||||||
|
|||||||
@@ -37,11 +37,11 @@ namespace api {
|
|||||||
|
|
||||||
// verify that the keyspace is found, otherwise a bad_param_exception exception is thrown
|
// verify that the keyspace is found, otherwise a bad_param_exception exception is thrown
|
||||||
// containing the description of the respective keyspace error.
|
// containing the description of the respective keyspace error.
|
||||||
sstring validate_keyspace(http_context& ctx, sstring ks_name);
|
sstring validate_keyspace(const http_context& ctx, sstring ks_name);
|
||||||
|
|
||||||
// verify that the keyspace parameter is found, otherwise a bad_param_exception exception is thrown
|
// verify that the keyspace parameter is found, otherwise a bad_param_exception exception is thrown
|
||||||
// containing the description of the respective keyspace error.
|
// containing the description of the respective keyspace error.
|
||||||
sstring validate_keyspace(http_context& ctx, const httpd::parameters& param);
|
sstring validate_keyspace(const http_context& ctx, const std::unique_ptr<http::request>& req);
|
||||||
|
|
||||||
// splits a request parameter assumed to hold a comma-separated list of table names
|
// splits a request parameter assumed to hold a comma-separated list of table names
|
||||||
// verify that the tables are found, otherwise a bad_param_exception exception is thrown
|
// verify that the tables are found, otherwise a bad_param_exception exception is thrown
|
||||||
|
|||||||
@@ -106,7 +106,7 @@ void set_stream_manager(http_context& ctx, routes& r, sharded<streaming::stream_
|
|||||||
});
|
});
|
||||||
|
|
||||||
hs::get_total_incoming_bytes.set(r, [&sm](std::unique_ptr<request> req) {
|
hs::get_total_incoming_bytes.set(r, [&sm](std::unique_ptr<request> req) {
|
||||||
gms::inet_address peer(req->param["peer"]);
|
gms::inet_address peer(req->get_path_param("peer"));
|
||||||
return sm.map_reduce0([peer](streaming::stream_manager& sm) {
|
return sm.map_reduce0([peer](streaming::stream_manager& sm) {
|
||||||
return sm.get_progress_on_all_shards(peer).then([] (auto sbytes) {
|
return sm.get_progress_on_all_shards(peer).then([] (auto sbytes) {
|
||||||
return sbytes.bytes_received;
|
return sbytes.bytes_received;
|
||||||
@@ -127,7 +127,7 @@ void set_stream_manager(http_context& ctx, routes& r, sharded<streaming::stream_
|
|||||||
});
|
});
|
||||||
|
|
||||||
hs::get_total_outgoing_bytes.set(r, [&sm](std::unique_ptr<request> req) {
|
hs::get_total_outgoing_bytes.set(r, [&sm](std::unique_ptr<request> req) {
|
||||||
gms::inet_address peer(req->param["peer"]);
|
gms::inet_address peer(req->get_path_param("peer"));
|
||||||
return sm.map_reduce0([peer] (streaming::stream_manager& sm) {
|
return sm.map_reduce0([peer] (streaming::stream_manager& sm) {
|
||||||
return sm.get_progress_on_all_shards(peer).then([] (auto sbytes) {
|
return sm.get_progress_on_all_shards(peer).then([] (auto sbytes) {
|
||||||
return sbytes.bytes_sent;
|
return sbytes.bytes_sent;
|
||||||
|
|||||||
@@ -119,9 +119,9 @@ void set_system(http_context& ctx, routes& r) {
|
|||||||
|
|
||||||
hs::get_logger_level.set(r, [](const_req req) {
|
hs::get_logger_level.set(r, [](const_req req) {
|
||||||
try {
|
try {
|
||||||
return logging::level_name(logging::logger_registry().get_logger_level(req.param["name"]));
|
return logging::level_name(logging::logger_registry().get_logger_level(req.get_path_param("name")));
|
||||||
} catch (std::out_of_range& e) {
|
} catch (std::out_of_range& e) {
|
||||||
throw bad_param_exception("Unknown logger name " + req.param["name"]);
|
throw bad_param_exception("Unknown logger name " + req.get_path_param("name"));
|
||||||
}
|
}
|
||||||
// just to keep the compiler happy
|
// just to keep the compiler happy
|
||||||
return sstring();
|
return sstring();
|
||||||
@@ -130,9 +130,9 @@ void set_system(http_context& ctx, routes& r) {
|
|||||||
hs::set_logger_level.set(r, [](const_req req) {
|
hs::set_logger_level.set(r, [](const_req req) {
|
||||||
try {
|
try {
|
||||||
logging::log_level level = boost::lexical_cast<logging::log_level>(std::string(req.get_query_param("level")));
|
logging::log_level level = boost::lexical_cast<logging::log_level>(std::string(req.get_query_param("level")));
|
||||||
logging::logger_registry().set_logger_level(req.param["name"], level);
|
logging::logger_registry().set_logger_level(req.get_path_param("name"), level);
|
||||||
} catch (std::out_of_range& e) {
|
} catch (std::out_of_range& e) {
|
||||||
throw bad_param_exception("Unknown logger name " + req.param["name"]);
|
throw bad_param_exception("Unknown logger name " + req.get_path_param("name"));
|
||||||
} catch (boost::bad_lexical_cast& e) {
|
} catch (boost::bad_lexical_cast& e) {
|
||||||
throw bad_param_exception("Unknown logging level " + req.get_query_param("level"));
|
throw bad_param_exception("Unknown logging level " + req.get_query_param("level"));
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -7,6 +7,7 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
#include <seastar/core/coroutine.hh>
|
#include <seastar/core/coroutine.hh>
|
||||||
|
#include <seastar/coroutine/exception.hh>
|
||||||
|
|
||||||
#include "task_manager.hh"
|
#include "task_manager.hh"
|
||||||
#include "api/api-doc/task_manager.json.hh"
|
#include "api/api-doc/task_manager.json.hh"
|
||||||
@@ -124,7 +125,7 @@ void set_task_manager(http_context& ctx, routes& r, db::config& cfg) {
|
|||||||
chunked_stats local_res;
|
chunked_stats local_res;
|
||||||
tasks::task_manager::module_ptr module;
|
tasks::task_manager::module_ptr module;
|
||||||
try {
|
try {
|
||||||
module = tm.find_module(req->param["module"]);
|
module = tm.find_module(req->get_path_param("module"));
|
||||||
} catch (...) {
|
} catch (...) {
|
||||||
throw bad_param_exception(fmt::format("{}", std::current_exception()));
|
throw bad_param_exception(fmt::format("{}", std::current_exception()));
|
||||||
}
|
}
|
||||||
@@ -139,25 +140,34 @@ void set_task_manager(http_context& ctx, routes& r, db::config& cfg) {
|
|||||||
|
|
||||||
std::function<future<>(output_stream<char>&&)> f = [r = std::move(res)] (output_stream<char>&& os) -> future<> {
|
std::function<future<>(output_stream<char>&&)> f = [r = std::move(res)] (output_stream<char>&& os) -> future<> {
|
||||||
auto s = std::move(os);
|
auto s = std::move(os);
|
||||||
auto res = std::move(r);
|
std::exception_ptr ex;
|
||||||
co_await s.write("[");
|
try {
|
||||||
std::string delim = "";
|
auto res = std::move(r);
|
||||||
for (auto& v: res) {
|
co_await s.write("[");
|
||||||
for (auto& stats: v) {
|
std::string delim = "";
|
||||||
co_await s.write(std::exchange(delim, ", "));
|
for (auto& v: res) {
|
||||||
tm::task_stats ts;
|
for (auto& stats: v) {
|
||||||
ts = stats;
|
co_await s.write(std::exchange(delim, ", "));
|
||||||
co_await formatter::write(s, ts);
|
tm::task_stats ts;
|
||||||
|
ts = stats;
|
||||||
|
co_await formatter::write(s, ts);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
co_await s.write("]");
|
||||||
|
co_await s.flush();
|
||||||
|
} catch (...) {
|
||||||
|
ex = std::current_exception();
|
||||||
}
|
}
|
||||||
co_await s.write("]");
|
|
||||||
co_await s.close();
|
co_await s.close();
|
||||||
|
if (ex) {
|
||||||
|
co_await coroutine::return_exception_ptr(std::move(ex));
|
||||||
|
}
|
||||||
};
|
};
|
||||||
co_return std::move(f);
|
co_return std::move(f);
|
||||||
});
|
});
|
||||||
|
|
||||||
tm::get_task_status.set(r, [&ctx] (std::unique_ptr<http::request> req) -> future<json::json_return_type> {
|
tm::get_task_status.set(r, [&ctx] (std::unique_ptr<http::request> req) -> future<json::json_return_type> {
|
||||||
auto id = tasks::task_id{utils::UUID{req->param["task_id"]}};
|
auto id = tasks::task_id{utils::UUID{req->get_path_param("task_id")}};
|
||||||
tasks::task_manager::foreign_task_ptr task;
|
tasks::task_manager::foreign_task_ptr task;
|
||||||
try {
|
try {
|
||||||
task = co_await tasks::task_manager::invoke_on_task(ctx.tm, id, std::function([] (tasks::task_manager::task_ptr task) -> future<tasks::task_manager::foreign_task_ptr> {
|
task = co_await tasks::task_manager::invoke_on_task(ctx.tm, id, std::function([] (tasks::task_manager::task_ptr task) -> future<tasks::task_manager::foreign_task_ptr> {
|
||||||
@@ -174,7 +184,7 @@ void set_task_manager(http_context& ctx, routes& r, db::config& cfg) {
|
|||||||
});
|
});
|
||||||
|
|
||||||
tm::abort_task.set(r, [&ctx] (std::unique_ptr<http::request> req) -> future<json::json_return_type> {
|
tm::abort_task.set(r, [&ctx] (std::unique_ptr<http::request> req) -> future<json::json_return_type> {
|
||||||
auto id = tasks::task_id{utils::UUID{req->param["task_id"]}};
|
auto id = tasks::task_id{utils::UUID{req->get_path_param("task_id")}};
|
||||||
try {
|
try {
|
||||||
co_await tasks::task_manager::invoke_on_task(ctx.tm, id, [] (tasks::task_manager::task_ptr task) -> future<> {
|
co_await tasks::task_manager::invoke_on_task(ctx.tm, id, [] (tasks::task_manager::task_ptr task) -> future<> {
|
||||||
if (!task->is_abortable()) {
|
if (!task->is_abortable()) {
|
||||||
@@ -189,7 +199,7 @@ void set_task_manager(http_context& ctx, routes& r, db::config& cfg) {
|
|||||||
});
|
});
|
||||||
|
|
||||||
tm::wait_task.set(r, [&ctx] (std::unique_ptr<http::request> req) -> future<json::json_return_type> {
|
tm::wait_task.set(r, [&ctx] (std::unique_ptr<http::request> req) -> future<json::json_return_type> {
|
||||||
auto id = tasks::task_id{utils::UUID{req->param["task_id"]}};
|
auto id = tasks::task_id{utils::UUID{req->get_path_param("task_id")}};
|
||||||
tasks::task_manager::foreign_task_ptr task;
|
tasks::task_manager::foreign_task_ptr task;
|
||||||
try {
|
try {
|
||||||
task = co_await tasks::task_manager::invoke_on_task(ctx.tm, id, std::function([] (tasks::task_manager::task_ptr task) {
|
task = co_await tasks::task_manager::invoke_on_task(ctx.tm, id, std::function([] (tasks::task_manager::task_ptr task) {
|
||||||
@@ -210,7 +220,7 @@ void set_task_manager(http_context& ctx, routes& r, db::config& cfg) {
|
|||||||
|
|
||||||
tm::get_task_status_recursively.set(r, [&ctx] (std::unique_ptr<http::request> req) -> future<json::json_return_type> {
|
tm::get_task_status_recursively.set(r, [&ctx] (std::unique_ptr<http::request> req) -> future<json::json_return_type> {
|
||||||
auto& _ctx = ctx;
|
auto& _ctx = ctx;
|
||||||
auto id = tasks::task_id{utils::UUID{req->param["task_id"]}};
|
auto id = tasks::task_id{utils::UUID{req->get_path_param("task_id")}};
|
||||||
std::queue<tasks::task_manager::foreign_task_ptr> q;
|
std::queue<tasks::task_manager::foreign_task_ptr> q;
|
||||||
utils::chunked_vector<full_task_status> res;
|
utils::chunked_vector<full_task_status> res;
|
||||||
|
|
||||||
@@ -232,8 +242,8 @@ void set_task_manager(http_context& ctx, routes& r, db::config& cfg) {
|
|||||||
while (!q.empty()) {
|
while (!q.empty()) {
|
||||||
auto& current = q.front();
|
auto& current = q.front();
|
||||||
res.push_back(co_await retrieve_status(current));
|
res.push_back(co_await retrieve_status(current));
|
||||||
for (size_t i = 0; i < current->get_children().size(); ++i) {
|
for (auto& child: current->get_children()) {
|
||||||
q.push(co_await current->get_children()[i].copy());
|
q.push(co_await child.copy());
|
||||||
}
|
}
|
||||||
q.pop();
|
q.pop();
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -83,7 +83,7 @@ void set_task_manager_test(http_context& ctx, routes& r) {
|
|||||||
});
|
});
|
||||||
|
|
||||||
tmt::finish_test_task.set(r, [&ctx] (std::unique_ptr<http::request> req) -> future<json::json_return_type> {
|
tmt::finish_test_task.set(r, [&ctx] (std::unique_ptr<http::request> req) -> future<json::json_return_type> {
|
||||||
auto id = tasks::task_id{utils::UUID{req->param["task_id"]}};
|
auto id = tasks::task_id{utils::UUID{req->get_path_param("task_id")}};
|
||||||
auto it = req->query_parameters.find("error");
|
auto it = req->query_parameters.find("error");
|
||||||
bool fail = it != req->query_parameters.end();
|
bool fail = it != req->query_parameters.end();
|
||||||
std::string error = fail ? it->second : "";
|
std::string error = fail ? it->second : "";
|
||||||
|
|||||||
@@ -245,6 +245,8 @@ future<authenticated_user> password_authenticator::authenticate(
|
|||||||
std::throw_with_nested(exceptions::authentication_exception(e.what()));
|
std::throw_with_nested(exceptions::authentication_exception(e.what()));
|
||||||
} catch (exceptions::authentication_exception& e) {
|
} catch (exceptions::authentication_exception& e) {
|
||||||
std::throw_with_nested(e);
|
std::throw_with_nested(e);
|
||||||
|
} catch (exceptions::unavailable_exception& e) {
|
||||||
|
std::throw_with_nested(exceptions::authentication_exception(e.get_message()));
|
||||||
} catch (...) {
|
} catch (...) {
|
||||||
std::throw_with_nested(exceptions::authentication_exception("authentication failed"));
|
std::throw_with_nested(exceptions::authentication_exception("authentication failed"));
|
||||||
}
|
}
|
||||||
|
|||||||
2
bytes.hh
2
bytes.hh
@@ -89,7 +89,7 @@ public:
|
|||||||
// get the delimeter if any
|
// get the delimeter if any
|
||||||
auto it = ctx.begin();
|
auto it = ctx.begin();
|
||||||
auto end = ctx.end();
|
auto end = ctx.end();
|
||||||
if (it != end) {
|
if (it != end && *it != '}') {
|
||||||
int group_size = *it++ - '0';
|
int group_size = *it++ - '0';
|
||||||
if (group_size < 0 ||
|
if (group_size < 0 ||
|
||||||
static_cast<size_t>(group_size) > sizeof(uint64_t)) {
|
static_cast<size_t>(group_size) > sizeof(uint64_t)) {
|
||||||
|
|||||||
@@ -98,7 +98,16 @@ class cache_flat_mutation_reader final : public flat_mutation_reader_v2::impl {
|
|||||||
bool _next_row_in_range = false;
|
bool _next_row_in_range = false;
|
||||||
bool _has_rt = false;
|
bool _has_rt = false;
|
||||||
|
|
||||||
// True iff current population interval, since the previous clustering row, starts before all clustered rows.
|
// True iff current population interval starts at before_all_clustered_rows
|
||||||
|
// and _last_row is unset. (And the read isn't reverse).
|
||||||
|
//
|
||||||
|
// Rationale: in the "most general" step of cache population,
|
||||||
|
// we mark the `(_last_row, ...] `range as continuous, which can involve doing something to `_last_row`.
|
||||||
|
// But when populating the range `(before_all_clustered_rows, ...)`,
|
||||||
|
// a rows_entry at `before_all_clustered_rows` needn't exist.
|
||||||
|
// Thus this case needs a special treatment which doesn't involve `_last_row`.
|
||||||
|
// And for that, this case it has to be recognized (via this flag).
|
||||||
|
//
|
||||||
// We cannot just look at _lower_bound, because emission of range tombstones changes _lower_bound and
|
// We cannot just look at _lower_bound, because emission of range tombstones changes _lower_bound and
|
||||||
// because we mark clustering intervals as continuous when consuming a clustering_row, it would prevent
|
// because we mark clustering intervals as continuous when consuming a clustering_row, it would prevent
|
||||||
// us from marking the interval as continuous.
|
// us from marking the interval as continuous.
|
||||||
@@ -147,6 +156,8 @@ class cache_flat_mutation_reader final : public flat_mutation_reader_v2::impl {
|
|||||||
bool maybe_add_to_cache(const range_tombstone_change& rtc);
|
bool maybe_add_to_cache(const range_tombstone_change& rtc);
|
||||||
void maybe_add_to_cache(const static_row& sr);
|
void maybe_add_to_cache(const static_row& sr);
|
||||||
void maybe_set_static_row_continuous();
|
void maybe_set_static_row_continuous();
|
||||||
|
void set_rows_entry_continuous(rows_entry& e);
|
||||||
|
void restore_continuity_after_insertion(const mutation_partition::rows_type::iterator&);
|
||||||
void finish_reader() {
|
void finish_reader() {
|
||||||
push_mutation_fragment(*_schema, _permit, partition_end());
|
push_mutation_fragment(*_schema, _permit, partition_end());
|
||||||
_end_of_stream = true;
|
_end_of_stream = true;
|
||||||
@@ -341,7 +352,7 @@ future<> cache_flat_mutation_reader::do_fill_buffer() {
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
_state = state::reading_from_underlying;
|
_state = state::reading_from_underlying;
|
||||||
_population_range_starts_before_all_rows = _lower_bound.is_before_all_clustered_rows(*_schema) && !_read_context.is_reversed();
|
_population_range_starts_before_all_rows = _lower_bound.is_before_all_clustered_rows(*_schema) && !_read_context.is_reversed() && !_last_row;
|
||||||
_underlying_upper_bound = _next_row_in_range ? position_in_partition::before_key(_next_row.position())
|
_underlying_upper_bound = _next_row_in_range ? position_in_partition::before_key(_next_row.position())
|
||||||
: position_in_partition(_upper_bound);
|
: position_in_partition(_upper_bound);
|
||||||
if (!_read_context.partition_exists()) {
|
if (!_read_context.partition_exists()) {
|
||||||
@@ -442,7 +453,10 @@ future<> cache_flat_mutation_reader::read_from_underlying() {
|
|||||||
auto e = alloc_strategy_unique_ptr<rows_entry>(
|
auto e = alloc_strategy_unique_ptr<rows_entry>(
|
||||||
current_allocator().construct<rows_entry>(_ck_ranges_curr->start()->value()));
|
current_allocator().construct<rows_entry>(_ck_ranges_curr->start()->value()));
|
||||||
// Use _next_row iterator only as a hint, because there could be insertions after _upper_bound.
|
// Use _next_row iterator only as a hint, because there could be insertions after _upper_bound.
|
||||||
auto insert_result = rows.insert_before_hint(_next_row.get_iterator_in_latest_version(), std::move(e), cmp);
|
auto insert_result = rows.insert_before_hint(
|
||||||
|
_next_row.at_a_row() ? _next_row.get_iterator_in_latest_version() : rows.begin(),
|
||||||
|
std::move(e),
|
||||||
|
cmp);
|
||||||
if (insert_result.second) {
|
if (insert_result.second) {
|
||||||
auto it = insert_result.first;
|
auto it = insert_result.first;
|
||||||
_snp->tracker()->insert(*it);
|
_snp->tracker()->insert(*it);
|
||||||
@@ -459,18 +473,22 @@ future<> cache_flat_mutation_reader::read_from_underlying() {
|
|||||||
auto e = alloc_strategy_unique_ptr<rows_entry>(
|
auto e = alloc_strategy_unique_ptr<rows_entry>(
|
||||||
current_allocator().construct<rows_entry>(table_s, to_table_domain(_upper_bound), is_dummy::yes, is_continuous::no));
|
current_allocator().construct<rows_entry>(table_s, to_table_domain(_upper_bound), is_dummy::yes, is_continuous::no));
|
||||||
// Use _next_row iterator only as a hint, because there could be insertions after _upper_bound.
|
// Use _next_row iterator only as a hint, because there could be insertions after _upper_bound.
|
||||||
auto insert_result = rows.insert_before_hint(_next_row.get_iterator_in_latest_version(), std::move(e), cmp);
|
auto insert_result = rows.insert_before_hint(
|
||||||
|
_next_row.at_a_row() ? _next_row.get_iterator_in_latest_version() : rows.begin(),
|
||||||
|
std::move(e),
|
||||||
|
cmp);
|
||||||
if (insert_result.second) {
|
if (insert_result.second) {
|
||||||
clogger.trace("csm {}: L{}: inserted dummy at {}", fmt::ptr(this), __LINE__, _upper_bound);
|
clogger.trace("csm {}: L{}: inserted dummy at {}", fmt::ptr(this), __LINE__, _upper_bound);
|
||||||
_snp->tracker()->insert(*insert_result.first);
|
_snp->tracker()->insert(*insert_result.first);
|
||||||
|
restore_continuity_after_insertion(insert_result.first);
|
||||||
}
|
}
|
||||||
if (_read_context.is_reversed()) [[unlikely]] {
|
if (_read_context.is_reversed()) [[unlikely]] {
|
||||||
clogger.trace("csm {}: set_continuous({}), prev={}, rt={}", fmt::ptr(this), _last_row.position(), insert_result.first->position(), _current_tombstone);
|
clogger.trace("csm {}: set_continuous({}), prev={}, rt={}", fmt::ptr(this), _last_row.position(), insert_result.first->position(), _current_tombstone);
|
||||||
_last_row->set_continuous(true);
|
set_rows_entry_continuous(*_last_row);
|
||||||
_last_row->set_range_tombstone(_current_tombstone);
|
_last_row->set_range_tombstone(_current_tombstone);
|
||||||
} else {
|
} else {
|
||||||
clogger.trace("csm {}: set_continuous({}), prev={}, rt={}", fmt::ptr(this), insert_result.first->position(), _last_row.position(), _current_tombstone);
|
clogger.trace("csm {}: set_continuous({}), prev={}, rt={}", fmt::ptr(this), insert_result.first->position(), _last_row.position(), _current_tombstone);
|
||||||
insert_result.first->set_continuous(true);
|
set_rows_entry_continuous(*insert_result.first);
|
||||||
insert_result.first->set_range_tombstone(_current_tombstone);
|
insert_result.first->set_range_tombstone(_current_tombstone);
|
||||||
}
|
}
|
||||||
maybe_drop_last_entry(_current_tombstone);
|
maybe_drop_last_entry(_current_tombstone);
|
||||||
@@ -505,11 +523,11 @@ bool cache_flat_mutation_reader::ensure_population_lower_bound() {
|
|||||||
rows_entry::tri_compare cmp(*_schema);
|
rows_entry::tri_compare cmp(*_schema);
|
||||||
partition_snapshot_row_cursor cur(*_schema, *_snp, false, _read_context.is_reversed());
|
partition_snapshot_row_cursor cur(*_schema, *_snp, false, _read_context.is_reversed());
|
||||||
|
|
||||||
if (!cur.advance_to(_last_row.position())) {
|
if (!cur.advance_to(to_query_domain(_last_row.position()))) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (cmp(cur.position(), _last_row.position()) != 0) {
|
if (cmp(cur.table_position(), _last_row.position()) != 0) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -531,7 +549,7 @@ void cache_flat_mutation_reader::maybe_update_continuity() {
|
|||||||
position_in_partition::equal_compare eq(*_schema);
|
position_in_partition::equal_compare eq(*_schema);
|
||||||
if (can_populate()
|
if (can_populate()
|
||||||
&& ensure_population_lower_bound()
|
&& ensure_population_lower_bound()
|
||||||
&& !eq(_last_row.position(), _next_row.position())) {
|
&& !eq(_last_row.position(), _next_row.table_position())) {
|
||||||
with_allocator(_snp->region().allocator(), [&] {
|
with_allocator(_snp->region().allocator(), [&] {
|
||||||
rows_entry& e = _next_row.ensure_entry_in_latest().row;
|
rows_entry& e = _next_row.ensure_entry_in_latest().row;
|
||||||
auto& rows = _snp->version()->partition().mutable_clustered_rows();
|
auto& rows = _snp->version()->partition().mutable_clustered_rows();
|
||||||
@@ -553,14 +571,14 @@ void cache_flat_mutation_reader::maybe_update_continuity() {
|
|||||||
}
|
}
|
||||||
clogger.trace("csm {}: set_continuous({}), prev={}, rt={}", fmt::ptr(this), insert_result.first->position(),
|
clogger.trace("csm {}: set_continuous({}), prev={}, rt={}", fmt::ptr(this), insert_result.first->position(),
|
||||||
_last_row.position(), _current_tombstone);
|
_last_row.position(), _current_tombstone);
|
||||||
insert_result.first->set_continuous(true);
|
set_rows_entry_continuous(*insert_result.first);
|
||||||
insert_result.first->set_range_tombstone(_current_tombstone);
|
insert_result.first->set_range_tombstone(_current_tombstone);
|
||||||
clogger.trace("csm {}: set_continuous({})", fmt::ptr(this), _last_row.position());
|
clogger.trace("csm {}: set_continuous({})", fmt::ptr(this), _last_row.position());
|
||||||
_last_row->set_continuous(true);
|
set_rows_entry_continuous(*_last_row);
|
||||||
});
|
});
|
||||||
} else {
|
} else {
|
||||||
clogger.trace("csm {}: set_continuous({}), rt={}", fmt::ptr(this), _last_row.position(), _current_tombstone);
|
clogger.trace("csm {}: set_continuous({}), rt={}", fmt::ptr(this), _last_row.position(), _current_tombstone);
|
||||||
_last_row->set_continuous(true);
|
set_rows_entry_continuous(*_last_row);
|
||||||
_last_row->set_range_tombstone(_current_tombstone);
|
_last_row->set_range_tombstone(_current_tombstone);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
@@ -578,18 +596,18 @@ void cache_flat_mutation_reader::maybe_update_continuity() {
|
|||||||
if (insert_result.second) {
|
if (insert_result.second) {
|
||||||
clogger.trace("csm {}: L{}: inserted dummy at {}", fmt::ptr(this), __LINE__, insert_result.first->position());
|
clogger.trace("csm {}: L{}: inserted dummy at {}", fmt::ptr(this), __LINE__, insert_result.first->position());
|
||||||
_snp->tracker()->insert(*insert_result.first);
|
_snp->tracker()->insert(*insert_result.first);
|
||||||
|
clogger.trace("csm {}: set_continuous({}), prev={}, rt={}", fmt::ptr(this), insert_result.first->position(),
|
||||||
|
_last_row.position(), _current_tombstone);
|
||||||
|
set_rows_entry_continuous(*insert_result.first);
|
||||||
|
insert_result.first->set_range_tombstone(_current_tombstone);
|
||||||
}
|
}
|
||||||
clogger.trace("csm {}: set_continuous({}), prev={}, rt={}", fmt::ptr(this), insert_result.first->position(),
|
|
||||||
_last_row.position(), _current_tombstone);
|
|
||||||
insert_result.first->set_continuous(true);
|
|
||||||
insert_result.first->set_range_tombstone(_current_tombstone);
|
|
||||||
clogger.trace("csm {}: set_continuous({})", fmt::ptr(this), e.position());
|
clogger.trace("csm {}: set_continuous({})", fmt::ptr(this), e.position());
|
||||||
e.set_continuous(true);
|
set_rows_entry_continuous(e);
|
||||||
});
|
});
|
||||||
} else {
|
} else {
|
||||||
clogger.trace("csm {}: set_continuous({}), rt={}", fmt::ptr(this), e.position(), _current_tombstone);
|
clogger.trace("csm {}: set_continuous({}), rt={}", fmt::ptr(this), e.position(), _current_tombstone);
|
||||||
e.set_range_tombstone(_current_tombstone);
|
e.set_range_tombstone(_current_tombstone);
|
||||||
e.set_continuous(true);
|
set_rows_entry_continuous(e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
maybe_drop_last_entry(_current_tombstone);
|
maybe_drop_last_entry(_current_tombstone);
|
||||||
@@ -619,26 +637,27 @@ void cache_flat_mutation_reader::maybe_add_to_cache(const clustering_row& cr) {
|
|||||||
current_allocator().construct<rows_entry>(table_schema(), cr.key(), cr.as_deletable_row()));
|
current_allocator().construct<rows_entry>(table_schema(), cr.key(), cr.as_deletable_row()));
|
||||||
new_entry->set_continuous(false);
|
new_entry->set_continuous(false);
|
||||||
new_entry->set_range_tombstone(_current_tombstone);
|
new_entry->set_range_tombstone(_current_tombstone);
|
||||||
auto it = _next_row.iterators_valid() ? _next_row.get_iterator_in_latest_version()
|
auto it = _next_row.iterators_valid() && _next_row.at_a_row() ? _next_row.get_iterator_in_latest_version()
|
||||||
: mp.clustered_rows().lower_bound(cr.key(), cmp);
|
: mp.clustered_rows().lower_bound(cr.key(), cmp);
|
||||||
auto insert_result = mp.mutable_clustered_rows().insert_before_hint(it, std::move(new_entry), cmp);
|
auto insert_result = mp.mutable_clustered_rows().insert_before_hint(it, std::move(new_entry), cmp);
|
||||||
it = insert_result.first;
|
it = insert_result.first;
|
||||||
if (insert_result.second) {
|
if (insert_result.second) {
|
||||||
_snp->tracker()->insert(*it);
|
_snp->tracker()->insert(*it);
|
||||||
|
restore_continuity_after_insertion(it);
|
||||||
}
|
}
|
||||||
|
|
||||||
rows_entry& e = *it;
|
rows_entry& e = *it;
|
||||||
if (ensure_population_lower_bound()) {
|
if (ensure_population_lower_bound()) {
|
||||||
if (_read_context.is_reversed()) [[unlikely]] {
|
if (_read_context.is_reversed()) [[unlikely]] {
|
||||||
clogger.trace("csm {}: set_continuous({})", fmt::ptr(this), _last_row.position());
|
clogger.trace("csm {}: set_continuous({})", fmt::ptr(this), _last_row.position());
|
||||||
_last_row->set_continuous(true);
|
set_rows_entry_continuous(*_last_row);
|
||||||
// _current_tombstone must also apply to _last_row itself (if it's non-dummy)
|
// _current_tombstone must also apply to _last_row itself (if it's non-dummy)
|
||||||
// because otherwise there would be a rtc after it, either creating a different entry,
|
// because otherwise there would be a rtc after it, either creating a different entry,
|
||||||
// or clearing _last_row if population did not happen.
|
// or clearing _last_row if population did not happen.
|
||||||
_last_row->set_range_tombstone(_current_tombstone);
|
_last_row->set_range_tombstone(_current_tombstone);
|
||||||
} else {
|
} else {
|
||||||
clogger.trace("csm {}: set_continuous({})", fmt::ptr(this), e.position());
|
clogger.trace("csm {}: set_continuous({})", fmt::ptr(this), e.position());
|
||||||
e.set_continuous(true);
|
set_rows_entry_continuous(e);
|
||||||
e.set_range_tombstone(_current_tombstone);
|
e.set_range_tombstone(_current_tombstone);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
@@ -683,26 +702,31 @@ bool cache_flat_mutation_reader::maybe_add_to_cache(const range_tombstone_change
|
|||||||
|
|
||||||
auto new_entry = alloc_strategy_unique_ptr<rows_entry>(
|
auto new_entry = alloc_strategy_unique_ptr<rows_entry>(
|
||||||
current_allocator().construct<rows_entry>(table_schema(), to_table_domain(rtc.position()), is_dummy::yes, is_continuous::no));
|
current_allocator().construct<rows_entry>(table_schema(), to_table_domain(rtc.position()), is_dummy::yes, is_continuous::no));
|
||||||
auto it = _next_row.iterators_valid() ? _next_row.get_iterator_in_latest_version()
|
auto it = _next_row.iterators_valid() && _next_row.at_a_row() ? _next_row.get_iterator_in_latest_version()
|
||||||
: mp.clustered_rows().lower_bound(to_table_domain(rtc.position()), cmp);
|
: mp.clustered_rows().lower_bound(to_table_domain(rtc.position()), cmp);
|
||||||
auto insert_result = mp.mutable_clustered_rows().insert_before_hint(it, std::move(new_entry), cmp);
|
auto insert_result = mp.mutable_clustered_rows().insert_before_hint(it, std::move(new_entry), cmp);
|
||||||
it = insert_result.first;
|
it = insert_result.first;
|
||||||
if (insert_result.second) {
|
if (insert_result.second) {
|
||||||
_snp->tracker()->insert(*it);
|
_snp->tracker()->insert(*it);
|
||||||
|
restore_continuity_after_insertion(it);
|
||||||
}
|
}
|
||||||
|
|
||||||
rows_entry& e = *it;
|
rows_entry& e = *it;
|
||||||
if (ensure_population_lower_bound()) {
|
if (ensure_population_lower_bound()) {
|
||||||
// underlying may emit range_tombstone_change fragments with the same position.
|
// underlying may emit range_tombstone_change fragments with the same position.
|
||||||
// In such case, the range to which the tombstone from the first fragment applies is empty and should be ignored.
|
// In such case, the range to which the tombstone from the first fragment applies is empty and should be ignored.
|
||||||
if (q_cmp(_last_row.position(), it->position()) < 0) {
|
//
|
||||||
|
// Note: we are using a query schema comparator to compare table schema positions here,
|
||||||
|
// but this is okay because we are only checking for equality,
|
||||||
|
// which is preserved by schema reversals.
|
||||||
|
if (q_cmp(_last_row.position(), it->position()) != 0) {
|
||||||
if (_read_context.is_reversed()) [[unlikely]] {
|
if (_read_context.is_reversed()) [[unlikely]] {
|
||||||
clogger.trace("csm {}: set_continuous({}), rt={}", fmt::ptr(this), _last_row.position(), prev);
|
clogger.trace("csm {}: set_continuous({}), rt={}", fmt::ptr(this), _last_row.position(), prev);
|
||||||
_last_row->set_continuous(true);
|
set_rows_entry_continuous(*_last_row);
|
||||||
_last_row->set_range_tombstone(prev);
|
_last_row->set_range_tombstone(prev);
|
||||||
} else {
|
} else {
|
||||||
clogger.trace("csm {}: set_continuous({}), rt={}", fmt::ptr(this), e.position(), prev);
|
clogger.trace("csm {}: set_continuous({}), rt={}", fmt::ptr(this), e.position(), prev);
|
||||||
e.set_continuous(true);
|
set_rows_entry_continuous(e);
|
||||||
e.set_range_tombstone(prev);
|
e.set_range_tombstone(prev);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -881,7 +905,10 @@ void cache_flat_mutation_reader::move_to_range(query::clustering_row_ranges::con
|
|||||||
auto& rows = _snp->version()->partition().mutable_clustered_rows();
|
auto& rows = _snp->version()->partition().mutable_clustered_rows();
|
||||||
auto new_entry = alloc_strategy_unique_ptr<rows_entry>(current_allocator().construct<rows_entry>(table_schema(),
|
auto new_entry = alloc_strategy_unique_ptr<rows_entry>(current_allocator().construct<rows_entry>(table_schema(),
|
||||||
to_table_domain(_lower_bound), is_dummy::yes, is_continuous::no));
|
to_table_domain(_lower_bound), is_dummy::yes, is_continuous::no));
|
||||||
return rows.insert_before_hint(_next_row.get_iterator_in_latest_version(), std::move(new_entry), cmp);
|
return rows.insert_before_hint(
|
||||||
|
_next_row.at_a_row() ? _next_row.get_iterator_in_latest_version() : rows.begin(),
|
||||||
|
std::move(new_entry),
|
||||||
|
cmp);
|
||||||
});
|
});
|
||||||
auto it = insert_result.first;
|
auto it = insert_result.first;
|
||||||
if (insert_result.second) {
|
if (insert_result.second) {
|
||||||
@@ -1041,6 +1068,28 @@ void cache_flat_mutation_reader::maybe_set_static_row_continuous() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Last dummies can exist in a quasi-evicted state, where they are unlinked from LRU,
|
||||||
|
// but still alive.
|
||||||
|
// But while in this state, they mustn't carry any information (i.e. continuity),
|
||||||
|
// due to the "older versions are evicted first" rule of MVCC.
|
||||||
|
// Thus, when we make an entry continuous, we must ensure that it isn't an
|
||||||
|
// unlinked last dummy.
|
||||||
|
inline
|
||||||
|
void cache_flat_mutation_reader::set_rows_entry_continuous(rows_entry& e) {
|
||||||
|
e.set_continuous(true);
|
||||||
|
if (!e.is_linked()) [[unlikely]] {
|
||||||
|
_snp->tracker()->touch(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
inline
|
||||||
|
void cache_flat_mutation_reader::restore_continuity_after_insertion(const mutation_partition::rows_type::iterator& it) {
|
||||||
|
if (auto x = std::next(it); x->continuous()) {
|
||||||
|
it->set_continuous(true);
|
||||||
|
it->set_range_tombstone(x->range_tombstone());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
inline
|
inline
|
||||||
bool cache_flat_mutation_reader::can_populate() const {
|
bool cache_flat_mutation_reader::can_populate() const {
|
||||||
return _snp->at_latest_version() && _read_context.cache().phase_of(_read_context.key()) == _read_context.phase();
|
return _snp->at_latest_version() && _read_context.cache().phase_of(_read_context.key()) == _read_context.phase();
|
||||||
|
|||||||
@@ -51,8 +51,16 @@ namespace db {
|
|||||||
|
|
||||||
namespace cdc {
|
namespace cdc {
|
||||||
|
|
||||||
extern const api::timestamp_clock::duration generation_leeway =
|
api::timestamp_clock::duration get_generation_leeway() {
|
||||||
std::chrono::duration_cast<api::timestamp_clock::duration>(std::chrono::seconds(5));
|
static thread_local auto generation_leeway =
|
||||||
|
std::chrono::duration_cast<api::timestamp_clock::duration>(std::chrono::seconds(5));
|
||||||
|
|
||||||
|
utils::get_local_injector().inject("increase_cdc_generation_leeway", [&] {
|
||||||
|
generation_leeway = std::chrono::duration_cast<api::timestamp_clock::duration>(std::chrono::minutes(5));
|
||||||
|
});
|
||||||
|
|
||||||
|
return generation_leeway;
|
||||||
|
}
|
||||||
|
|
||||||
static void copy_int_to_bytes(int64_t i, size_t offset, bytes& b) {
|
static void copy_int_to_bytes(int64_t i, size_t offset, bytes& b) {
|
||||||
i = net::hton(i);
|
i = net::hton(i);
|
||||||
@@ -372,7 +380,7 @@ db_clock::time_point new_generation_timestamp(bool add_delay, std::chrono::milli
|
|||||||
|
|
||||||
auto ts = db_clock::now();
|
auto ts = db_clock::now();
|
||||||
if (add_delay && ring_delay != 0ms) {
|
if (add_delay && ring_delay != 0ms) {
|
||||||
ts += 2 * ring_delay + duration_cast<milliseconds>(generation_leeway);
|
ts += 2 * ring_delay + duration_cast<milliseconds>(get_generation_leeway());
|
||||||
}
|
}
|
||||||
return ts;
|
return ts;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -46,6 +46,8 @@ namespace gms {
|
|||||||
|
|
||||||
namespace cdc {
|
namespace cdc {
|
||||||
|
|
||||||
|
api::timestamp_clock::duration get_generation_leeway();
|
||||||
|
|
||||||
class stream_id final {
|
class stream_id final {
|
||||||
bytes _value;
|
bytes _value;
|
||||||
public:
|
public:
|
||||||
|
|||||||
@@ -15,10 +15,6 @@
|
|||||||
|
|
||||||
extern logging::logger cdc_log;
|
extern logging::logger cdc_log;
|
||||||
|
|
||||||
namespace cdc {
|
|
||||||
extern const api::timestamp_clock::duration generation_leeway;
|
|
||||||
} // namespace cdc
|
|
||||||
|
|
||||||
static api::timestamp_type to_ts(db_clock::time_point tp) {
|
static api::timestamp_type to_ts(db_clock::time_point tp) {
|
||||||
// This assumes that timestamp_clock and db_clock have the same epochs.
|
// This assumes that timestamp_clock and db_clock have the same epochs.
|
||||||
return std::chrono::duration_cast<api::timestamp_clock::duration>(tp.time_since_epoch()).count();
|
return std::chrono::duration_cast<api::timestamp_clock::duration>(tp.time_since_epoch()).count();
|
||||||
@@ -73,7 +69,7 @@ bool cdc::metadata::streams_available() const {
|
|||||||
|
|
||||||
cdc::stream_id cdc::metadata::get_stream(api::timestamp_type ts, dht::token tok) {
|
cdc::stream_id cdc::metadata::get_stream(api::timestamp_type ts, dht::token tok) {
|
||||||
auto now = api::new_timestamp();
|
auto now = api::new_timestamp();
|
||||||
if (ts > now + generation_leeway.count()) {
|
if (ts > now + get_generation_leeway().count()) {
|
||||||
throw exceptions::invalid_request_exception(format(
|
throw exceptions::invalid_request_exception(format(
|
||||||
"cdc: attempted to get a stream \"from the future\" ({}; current server time: {})."
|
"cdc: attempted to get a stream \"from the future\" ({}; current server time: {})."
|
||||||
" With CDC you cannot send writes with timestamps arbitrarily into the future, because we don't"
|
" With CDC you cannot send writes with timestamps arbitrarily into the future, because we don't"
|
||||||
@@ -86,27 +82,43 @@ cdc::stream_id cdc::metadata::get_stream(api::timestamp_type ts, dht::token tok)
|
|||||||
// Nothing protects us from that until we start using transactions for generation switching.
|
// Nothing protects us from that until we start using transactions for generation switching.
|
||||||
}
|
}
|
||||||
|
|
||||||
auto it = gen_used_at(now);
|
auto it = gen_used_at(now - get_generation_leeway().count());
|
||||||
if (it == _gens.end()) {
|
|
||||||
|
if (it != _gens.end()) {
|
||||||
|
// Garbage-collect generations that will no longer be used.
|
||||||
|
it = _gens.erase(_gens.begin(), it);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ts <= now - get_generation_leeway().count()) {
|
||||||
|
// We reject the write if `ts <= now - generation_leeway` and the write is not to the current generation, which
|
||||||
|
// happens iff one of the following is true:
|
||||||
|
// - the write is to no generation,
|
||||||
|
// - the write is to a generation older than the generation under `it`,
|
||||||
|
// - the write is to the generation under `it` and that generation is not the current generation.
|
||||||
|
// Note that we cannot distinguish the first and second cases because we garbage-collect obsolete generations,
|
||||||
|
// but we can check if one of them takes place (`it == _gens.end() || ts < it->first`). These three conditions
|
||||||
|
// are sufficient. The write with `ts <= now - generation_leeway` cannot be to one of the generations following
|
||||||
|
// the generation under `it` because that generation was operating at `now - generation_leeway`.
|
||||||
|
bool is_previous_gen = it != _gens.end() && std::next(it) != _gens.end() && std::next(it)->first <= now;
|
||||||
|
if (it == _gens.end() || ts < it->first || is_previous_gen) {
|
||||||
|
throw exceptions::invalid_request_exception(format(
|
||||||
|
"cdc: attempted to get a stream \"from the past\" ({}; current server time: {})."
|
||||||
|
" With CDC you cannot send writes with timestamps too far into the past, because that would break"
|
||||||
|
" consistency properties.\n"
|
||||||
|
"We *do* allow sending writes into the near past, but our ability to do that is limited."
|
||||||
|
" Are you using client-side timestamps? Make sure your clocks are well-synchronized"
|
||||||
|
" with the database's clocks.", format_timestamp(ts), format_timestamp(now)));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
it = _gens.begin();
|
||||||
|
if (it == _gens.end() || ts < it->first) {
|
||||||
throw std::runtime_error(format(
|
throw std::runtime_error(format(
|
||||||
"cdc::metadata::get_stream: could not find any CDC stream (current time: {})."
|
"cdc::metadata::get_stream: could not find any CDC stream for timestamp {}."
|
||||||
" Are we in the middle of a cluster upgrade?", format_timestamp(now)));
|
" Are we in the middle of a cluster upgrade?", format_timestamp(ts)));
|
||||||
}
|
}
|
||||||
|
|
||||||
// Garbage-collect generations that will no longer be used.
|
// Find the generation operating at `ts`.
|
||||||
it = _gens.erase(_gens.begin(), it);
|
|
||||||
|
|
||||||
if (it->first > ts) {
|
|
||||||
throw exceptions::invalid_request_exception(format(
|
|
||||||
"cdc: attempted to get a stream from an earlier generation than the currently used one."
|
|
||||||
" With CDC you cannot send writes with timestamps too far into the past, because that would break"
|
|
||||||
" consistency properties (write timestamp: {}, current generation started at: {})",
|
|
||||||
format_timestamp(ts), format_timestamp(it->first)));
|
|
||||||
}
|
|
||||||
|
|
||||||
// With `generation_leeway` we allow sending writes to the near future. It might happen
|
|
||||||
// that `ts` doesn't belong to the current generation ("current" according to our clock),
|
|
||||||
// but to the next generation. Adjust for this case:
|
|
||||||
{
|
{
|
||||||
auto next_it = std::next(it);
|
auto next_it = std::next(it);
|
||||||
while (next_it != _gens.end() && next_it->first <= ts) {
|
while (next_it != _gens.end() && next_it->first <= ts) {
|
||||||
@@ -147,8 +159,8 @@ bool cdc::metadata::known_or_obsolete(db_clock::time_point tp) const {
|
|||||||
++it;
|
++it;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check if some new generation has already superseded this one.
|
// Check if the generation is obsolete.
|
||||||
return it != _gens.end() && it->first <= api::new_timestamp();
|
return it != _gens.end() && it->first <= api::new_timestamp() - get_generation_leeway().count();
|
||||||
}
|
}
|
||||||
|
|
||||||
bool cdc::metadata::insert(db_clock::time_point tp, topology_description&& gen) {
|
bool cdc::metadata::insert(db_clock::time_point tp, topology_description&& gen) {
|
||||||
@@ -157,7 +169,7 @@ bool cdc::metadata::insert(db_clock::time_point tp, topology_description&& gen)
|
|||||||
}
|
}
|
||||||
|
|
||||||
auto now = api::new_timestamp();
|
auto now = api::new_timestamp();
|
||||||
auto it = gen_used_at(now);
|
auto it = gen_used_at(now - get_generation_leeway().count());
|
||||||
|
|
||||||
if (it != _gens.end()) {
|
if (it != _gens.end()) {
|
||||||
// Garbage-collect generations that will no longer be used.
|
// Garbage-collect generations that will no longer be used.
|
||||||
|
|||||||
@@ -42,7 +42,9 @@ class metadata final {
|
|||||||
|
|
||||||
container_t::const_iterator gen_used_at(api::timestamp_type ts) const;
|
container_t::const_iterator gen_used_at(api::timestamp_type ts) const;
|
||||||
public:
|
public:
|
||||||
/* Is a generation with the given timestamp already known or superseded by a newer generation? */
|
/* Is a generation with the given timestamp already known or obsolete? It is obsolete if and only if
|
||||||
|
* it is older than the generation operating at `now - get_generation_leeway()`.
|
||||||
|
*/
|
||||||
bool known_or_obsolete(db_clock::time_point) const;
|
bool known_or_obsolete(db_clock::time_point) const;
|
||||||
|
|
||||||
/* Are there streams available. I.e. valid for time == now. If this is false, any writes to
|
/* Are there streams available. I.e. valid for time == now. If this is false, any writes to
|
||||||
@@ -54,8 +56,9 @@ public:
|
|||||||
*
|
*
|
||||||
* If the provided timestamp is too far away "into the future" (where "now" is defined according to our local clock),
|
* If the provided timestamp is too far away "into the future" (where "now" is defined according to our local clock),
|
||||||
* we reject the get_stream query. This is because the resulting stream might belong to a generation which we don't
|
* we reject the get_stream query. This is because the resulting stream might belong to a generation which we don't
|
||||||
* yet know about. The amount of leeway (how much "into the future" we allow `ts` to be) is defined
|
* yet know about. Similarly, we reject queries to the previous generations if the timestamp is too far away "into
|
||||||
* by the `cdc::generation_leeway` constant.
|
* the past". The amount of leeway (how much "into the future" or "into the past" we allow `ts` to be) is defined by
|
||||||
|
* `get_generation_leeway()`.
|
||||||
*/
|
*/
|
||||||
stream_id get_stream(api::timestamp_type ts, dht::token tok);
|
stream_id get_stream(api::timestamp_type ts, dht::token tok);
|
||||||
|
|
||||||
|
|||||||
@@ -144,12 +144,21 @@ std::ostream& operator<<(std::ostream& os, compaction_type_options::scrub::quara
|
|||||||
}
|
}
|
||||||
|
|
||||||
static api::timestamp_type get_max_purgeable_timestamp(const table_state& table_s, sstable_set::incremental_selector& selector,
|
static api::timestamp_type get_max_purgeable_timestamp(const table_state& table_s, sstable_set::incremental_selector& selector,
|
||||||
const std::unordered_set<shared_sstable>& compacting_set, const dht::decorated_key& dk, uint64_t& bloom_filter_checks) {
|
const std::unordered_set<shared_sstable>& compacting_set, const dht::decorated_key& dk, uint64_t& bloom_filter_checks,
|
||||||
|
const api::timestamp_type compacting_max_timestamp) {
|
||||||
if (!table_s.tombstone_gc_enabled()) [[unlikely]] {
|
if (!table_s.tombstone_gc_enabled()) [[unlikely]] {
|
||||||
return api::min_timestamp;
|
return api::min_timestamp;
|
||||||
}
|
}
|
||||||
|
|
||||||
auto timestamp = table_s.min_memtable_timestamp();
|
auto timestamp = api::max_timestamp;
|
||||||
|
auto memtable_min_timestamp = table_s.min_memtable_timestamp();
|
||||||
|
// Use memtable timestamp if it contains data older than the sstables being compacted,
|
||||||
|
// and if the memtable also contains the key we're calculating max purgeable timestamp for.
|
||||||
|
// First condition helps to not penalize the common scenario where memtable only contains
|
||||||
|
// newer data.
|
||||||
|
if (memtable_min_timestamp <= compacting_max_timestamp && table_s.memtable_has_key(dk)) {
|
||||||
|
timestamp = memtable_min_timestamp;
|
||||||
|
}
|
||||||
std::optional<utils::hashed_key> hk;
|
std::optional<utils::hashed_key> hk;
|
||||||
for (auto&& sst : boost::range::join(selector.select(dk).sstables, table_s.compacted_undeleted_sstables())) {
|
for (auto&& sst : boost::range::join(selector.select(dk).sstables, table_s.compacted_undeleted_sstables())) {
|
||||||
if (compacting_set.contains(sst)) {
|
if (compacting_set.contains(sst)) {
|
||||||
@@ -441,7 +450,9 @@ protected:
|
|||||||
uint64_t _end_size = 0;
|
uint64_t _end_size = 0;
|
||||||
// fully expired files, which are skipped, aren't taken into account.
|
// fully expired files, which are skipped, aren't taken into account.
|
||||||
uint64_t _compacting_data_file_size = 0;
|
uint64_t _compacting_data_file_size = 0;
|
||||||
|
api::timestamp_type _compacting_max_timestamp = api::min_timestamp;
|
||||||
uint64_t _estimated_partitions = 0;
|
uint64_t _estimated_partitions = 0;
|
||||||
|
double _estimated_droppable_tombstone_ratio = 0;
|
||||||
uint64_t _bloom_filter_checks = 0;
|
uint64_t _bloom_filter_checks = 0;
|
||||||
db::replay_position _rp;
|
db::replay_position _rp;
|
||||||
encoding_stats_collector _stats_collector;
|
encoding_stats_collector _stats_collector;
|
||||||
@@ -470,6 +481,26 @@ private:
|
|||||||
cdata.compaction_fan_in = descriptor.fan_in();
|
cdata.compaction_fan_in = descriptor.fan_in();
|
||||||
return cdata;
|
return cdata;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Called in a seastar thread
|
||||||
|
dht::partition_range_vector
|
||||||
|
get_ranges_for_invalidation(const std::vector<shared_sstable>& sstables) {
|
||||||
|
// If owned ranges is disengaged, it means no cleanup work was done and
|
||||||
|
// so nothing needs to be invalidated.
|
||||||
|
if (!_owned_ranges) {
|
||||||
|
return dht::partition_range_vector{};
|
||||||
|
}
|
||||||
|
auto owned_ranges = dht::to_partition_ranges(*_owned_ranges, utils::can_yield::yes);
|
||||||
|
|
||||||
|
auto non_owned_ranges = boost::copy_range<dht::partition_range_vector>(sstables
|
||||||
|
| boost::adaptors::transformed([] (const shared_sstable& sst) {
|
||||||
|
seastar::thread::maybe_yield();
|
||||||
|
return dht::partition_range::make({sst->get_first_decorated_key(), true},
|
||||||
|
{sst->get_last_decorated_key(), true});
|
||||||
|
}));
|
||||||
|
|
||||||
|
return dht::subtract_ranges(*_schema, non_owned_ranges, std::move(owned_ranges)).get();
|
||||||
|
}
|
||||||
protected:
|
protected:
|
||||||
compaction(table_state& table_s, compaction_descriptor descriptor, compaction_data& cdata)
|
compaction(table_state& table_s, compaction_descriptor descriptor, compaction_data& cdata)
|
||||||
: _cdata(init_compaction_data(cdata, descriptor))
|
: _cdata(init_compaction_data(cdata, descriptor))
|
||||||
@@ -505,7 +536,7 @@ protected:
|
|||||||
auto max_sstable_size = std::max<uint64_t>(_max_sstable_size, 1);
|
auto max_sstable_size = std::max<uint64_t>(_max_sstable_size, 1);
|
||||||
uint64_t estimated_sstables = std::max(1UL, uint64_t(ceil(double(_compacting_data_file_size) / max_sstable_size)));
|
uint64_t estimated_sstables = std::max(1UL, uint64_t(ceil(double(_compacting_data_file_size) / max_sstable_size)));
|
||||||
return std::min(uint64_t(ceil(double(_estimated_partitions) / estimated_sstables)),
|
return std::min(uint64_t(ceil(double(_estimated_partitions) / estimated_sstables)),
|
||||||
_table_s.get_compaction_strategy().adjust_partition_estimate(_ms_metadata, _estimated_partitions));
|
_table_s.get_compaction_strategy().adjust_partition_estimate(_ms_metadata, _estimated_partitions, _schema));
|
||||||
}
|
}
|
||||||
|
|
||||||
void setup_new_sstable(shared_sstable& sst) {
|
void setup_new_sstable(shared_sstable& sst) {
|
||||||
@@ -549,9 +580,10 @@ protected:
|
|||||||
return _stats_collector.get();
|
return _stats_collector.get();
|
||||||
}
|
}
|
||||||
|
|
||||||
virtual compaction_completion_desc
|
compaction_completion_desc
|
||||||
get_compaction_completion_desc(std::vector<shared_sstable> input_sstables, std::vector<shared_sstable> output_sstables) {
|
get_compaction_completion_desc(std::vector<shared_sstable> input_sstables, std::vector<shared_sstable> output_sstables) {
|
||||||
return compaction_completion_desc{std::move(input_sstables), std::move(output_sstables)};
|
auto ranges_for_for_invalidation = get_ranges_for_invalidation(input_sstables);
|
||||||
|
return compaction_completion_desc{std::move(input_sstables), std::move(output_sstables), std::move(ranges_for_for_invalidation)};
|
||||||
}
|
}
|
||||||
|
|
||||||
// Tombstone expiration is enabled based on the presence of sstable set.
|
// Tombstone expiration is enabled based on the presence of sstable set.
|
||||||
@@ -567,7 +599,8 @@ protected:
|
|||||||
sstable_writer_config cfg = _table_s.configure_writer("garbage_collection");
|
sstable_writer_config cfg = _table_s.configure_writer("garbage_collection");
|
||||||
cfg.run_identifier = gc_run;
|
cfg.run_identifier = gc_run;
|
||||||
cfg.monitor = monitor.get();
|
cfg.monitor = monitor.get();
|
||||||
auto writer = sst->get_writer(*schema(), partitions_per_sstable(), cfg, get_encoding_stats());
|
uint64_t estimated_partitions = std::max(1UL, uint64_t(ceil(partitions_per_sstable() * _estimated_droppable_tombstone_ratio)));
|
||||||
|
auto writer = sst->get_writer(*schema(), estimated_partitions, cfg, get_encoding_stats());
|
||||||
return compaction_writer(std::move(monitor), std::move(writer), std::move(sst));
|
return compaction_writer(std::move(monitor), std::move(writer), std::move(sst));
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -686,6 +719,7 @@ private:
|
|||||||
auto fully_expired = _table_s.fully_expired_sstables(_sstables, gc_clock::now());
|
auto fully_expired = _table_s.fully_expired_sstables(_sstables, gc_clock::now());
|
||||||
min_max_tracker<api::timestamp_type> timestamp_tracker;
|
min_max_tracker<api::timestamp_type> timestamp_tracker;
|
||||||
|
|
||||||
|
double sum_of_estimated_droppable_tombstone_ratio = 0;
|
||||||
_input_sstable_generations.reserve(_sstables.size());
|
_input_sstable_generations.reserve(_sstables.size());
|
||||||
for (auto& sst : _sstables) {
|
for (auto& sst : _sstables) {
|
||||||
co_await coroutine::maybe_yield();
|
co_await coroutine::maybe_yield();
|
||||||
@@ -712,7 +746,10 @@ private:
|
|||||||
// for a better estimate for the number of partitions in the merged
|
// for a better estimate for the number of partitions in the merged
|
||||||
// sstable than just adding up the lengths of individual sstables.
|
// sstable than just adding up the lengths of individual sstables.
|
||||||
_estimated_partitions += sst->get_estimated_key_count();
|
_estimated_partitions += sst->get_estimated_key_count();
|
||||||
|
auto gc_before = sst->get_gc_before_for_drop_estimation(gc_clock::now(), _table_s.get_tombstone_gc_state(), _schema);
|
||||||
|
sum_of_estimated_droppable_tombstone_ratio += sst->estimate_droppable_tombstone_ratio(gc_before);
|
||||||
_compacting_data_file_size += sst->ondisk_data_size();
|
_compacting_data_file_size += sst->ondisk_data_size();
|
||||||
|
|
||||||
// TODO:
|
// TODO:
|
||||||
// Note that this is not fully correct. Since we might be merging sstables that originated on
|
// Note that this is not fully correct. Since we might be merging sstables that originated on
|
||||||
// another shard (#cpu changed), we might be comparing RP:s with differing shard ids,
|
// another shard (#cpu changed), we might be comparing RP:s with differing shard ids,
|
||||||
@@ -721,12 +758,16 @@ private:
|
|||||||
// this is kind of ok, esp. since we will hopefully not be trying to recover based on
|
// this is kind of ok, esp. since we will hopefully not be trying to recover based on
|
||||||
// compacted sstables anyway (CL should be clean by then).
|
// compacted sstables anyway (CL should be clean by then).
|
||||||
_rp = std::max(_rp, sst_stats.position);
|
_rp = std::max(_rp, sst_stats.position);
|
||||||
|
|
||||||
|
_compacting_max_timestamp = std::max(_compacting_max_timestamp, sst->get_stats_metadata().max_timestamp);
|
||||||
}
|
}
|
||||||
log_info("{} {}", report_start_desc(), formatted_msg);
|
log_info("{} {}", report_start_desc(), formatted_msg);
|
||||||
if (ssts->size() < _sstables.size()) {
|
if (ssts->size() < _sstables.size()) {
|
||||||
log_debug("{} out of {} input sstables are fully expired sstables that will not be actually compacted",
|
log_debug("{} out of {} input sstables are fully expired sstables that will not be actually compacted",
|
||||||
_sstables.size() - ssts->size(), _sstables.size());
|
_sstables.size() - ssts->size(), _sstables.size());
|
||||||
}
|
}
|
||||||
|
// _estimated_droppable_tombstone_ratio could exceed 1.0 in certain cases, so limit it to 1.0.
|
||||||
|
_estimated_droppable_tombstone_ratio = std::min(1.0, sum_of_estimated_droppable_tombstone_ratio / ssts->size());
|
||||||
|
|
||||||
_compacting = std::move(ssts);
|
_compacting = std::move(ssts);
|
||||||
|
|
||||||
@@ -841,7 +882,7 @@ private:
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
return [this] (const dht::decorated_key& dk) {
|
return [this] (const dht::decorated_key& dk) {
|
||||||
return get_max_purgeable_timestamp(_table_s, *_selector, _compacting_for_max_purgeable_func, dk, _bloom_filter_checks);
|
return get_max_purgeable_timestamp(_table_s, *_selector, _compacting_for_max_purgeable_func, dk, _bloom_filter_checks, _compacting_max_timestamp);
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1248,28 +1289,6 @@ public:
|
|||||||
};
|
};
|
||||||
|
|
||||||
class cleanup_compaction final : public regular_compaction {
|
class cleanup_compaction final : public regular_compaction {
|
||||||
private:
|
|
||||||
// Called in a seastar thread
|
|
||||||
dht::partition_range_vector
|
|
||||||
get_ranges_for_invalidation(const std::vector<shared_sstable>& sstables) {
|
|
||||||
auto owned_ranges = dht::to_partition_ranges(*_owned_ranges, utils::can_yield::yes);
|
|
||||||
|
|
||||||
auto non_owned_ranges = boost::copy_range<dht::partition_range_vector>(sstables
|
|
||||||
| boost::adaptors::transformed([] (const shared_sstable& sst) {
|
|
||||||
seastar::thread::maybe_yield();
|
|
||||||
return dht::partition_range::make({sst->get_first_decorated_key(), true},
|
|
||||||
{sst->get_last_decorated_key(), true});
|
|
||||||
}));
|
|
||||||
|
|
||||||
return dht::subtract_ranges(*_schema, non_owned_ranges, std::move(owned_ranges)).get();
|
|
||||||
}
|
|
||||||
protected:
|
|
||||||
virtual compaction_completion_desc
|
|
||||||
get_compaction_completion_desc(std::vector<shared_sstable> input_sstables, std::vector<shared_sstable> output_sstables) override {
|
|
||||||
auto ranges_for_for_invalidation = get_ranges_for_invalidation(input_sstables);
|
|
||||||
return compaction_completion_desc{std::move(input_sstables), std::move(output_sstables), std::move(ranges_for_for_invalidation)};
|
|
||||||
}
|
|
||||||
|
|
||||||
public:
|
public:
|
||||||
cleanup_compaction(table_state& table_s, compaction_descriptor descriptor, compaction_data& cdata)
|
cleanup_compaction(table_state& table_s, compaction_descriptor descriptor, compaction_data& cdata)
|
||||||
: regular_compaction(table_s, std::move(descriptor), cdata)
|
: regular_compaction(table_s, std::move(descriptor), cdata)
|
||||||
@@ -1595,7 +1614,7 @@ private:
|
|||||||
uint64_t partitions_per_sstable(shard_id s) const {
|
uint64_t partitions_per_sstable(shard_id s) const {
|
||||||
uint64_t estimated_sstables = std::max(uint64_t(1), uint64_t(ceil(double(_estimation_per_shard[s].estimated_size) / _max_sstable_size)));
|
uint64_t estimated_sstables = std::max(uint64_t(1), uint64_t(ceil(double(_estimation_per_shard[s].estimated_size) / _max_sstable_size)));
|
||||||
return std::min(uint64_t(ceil(double(_estimation_per_shard[s].estimated_partitions) / estimated_sstables)),
|
return std::min(uint64_t(ceil(double(_estimation_per_shard[s].estimated_partitions) / estimated_sstables)),
|
||||||
_table_s.get_compaction_strategy().adjust_partition_estimate(_ms_metadata, _estimation_per_shard[s].estimated_partitions));
|
_table_s.get_compaction_strategy().adjust_partition_estimate(_ms_metadata, _estimation_per_shard[s].estimated_partitions, _schema));
|
||||||
}
|
}
|
||||||
public:
|
public:
|
||||||
resharding_compaction(table_state& table_s, sstables::compaction_descriptor descriptor, compaction_data& cdata)
|
resharding_compaction(table_state& table_s, sstables::compaction_descriptor descriptor, compaction_data& cdata)
|
||||||
@@ -1800,7 +1819,7 @@ get_fully_expired_sstables(const table_state& table_s, const std::vector<sstable
|
|||||||
int64_t min_timestamp = std::numeric_limits<int64_t>::max();
|
int64_t min_timestamp = std::numeric_limits<int64_t>::max();
|
||||||
|
|
||||||
for (auto& sstable : overlapping) {
|
for (auto& sstable : overlapping) {
|
||||||
auto gc_before = sstable->get_gc_before_for_fully_expire(compaction_time, table_s.get_tombstone_gc_state());
|
auto gc_before = sstable->get_gc_before_for_fully_expire(compaction_time, table_s.get_tombstone_gc_state(), table_s.schema());
|
||||||
if (sstable->get_max_local_deletion_time() >= gc_before) {
|
if (sstable->get_max_local_deletion_time() >= gc_before) {
|
||||||
min_timestamp = std::min(min_timestamp, sstable->get_stats_metadata().min_timestamp);
|
min_timestamp = std::min(min_timestamp, sstable->get_stats_metadata().min_timestamp);
|
||||||
}
|
}
|
||||||
@@ -1819,7 +1838,7 @@ get_fully_expired_sstables(const table_state& table_s, const std::vector<sstable
|
|||||||
|
|
||||||
// SStables that do not contain live data is added to list of possibly expired sstables.
|
// SStables that do not contain live data is added to list of possibly expired sstables.
|
||||||
for (auto& candidate : compacting) {
|
for (auto& candidate : compacting) {
|
||||||
auto gc_before = candidate->get_gc_before_for_fully_expire(compaction_time, table_s.get_tombstone_gc_state());
|
auto gc_before = candidate->get_gc_before_for_fully_expire(compaction_time, table_s.get_tombstone_gc_state(), table_s.schema());
|
||||||
clogger.debug("Checking if candidate of generation {} and max_deletion_time {} is expired, gc_before is {}",
|
clogger.debug("Checking if candidate of generation {} and max_deletion_time {} is expired, gc_before is {}",
|
||||||
candidate->generation(), candidate->get_stats_metadata().max_local_deletion_time, gc_before);
|
candidate->generation(), candidate->get_stats_metadata().max_local_deletion_time, gc_before);
|
||||||
// A fully expired sstable which has an ancestor undeleted shouldn't be compacted because
|
// A fully expired sstable which has an ancestor undeleted shouldn't be compacted because
|
||||||
|
|||||||
@@ -22,6 +22,7 @@
|
|||||||
#include "sstables/exceptions.hh"
|
#include "sstables/exceptions.hh"
|
||||||
#include "sstables/sstable_directory.hh"
|
#include "sstables/sstable_directory.hh"
|
||||||
#include "locator/abstract_replication_strategy.hh"
|
#include "locator/abstract_replication_strategy.hh"
|
||||||
|
#include "utils/error_injection.hh"
|
||||||
#include "utils/fb_utilities.hh"
|
#include "utils/fb_utilities.hh"
|
||||||
#include "utils/UUID_gen.hh"
|
#include "utils/UUID_gen.hh"
|
||||||
#include "db/system_keyspace.hh"
|
#include "db/system_keyspace.hh"
|
||||||
@@ -1147,6 +1148,11 @@ protected:
|
|||||||
}
|
}
|
||||||
|
|
||||||
virtual future<compaction_manager::compaction_stats_opt> do_run() override {
|
virtual future<compaction_manager::compaction_stats_opt> do_run() override {
|
||||||
|
if (!is_system_keyspace(_status.keyspace)) {
|
||||||
|
co_await utils::get_local_injector().inject_with_handler("compaction_regular_compaction_task_executor_do_run",
|
||||||
|
[] (auto& handler) { return handler.wait_for_message(db::timeout_clock::now() + 10s); });
|
||||||
|
}
|
||||||
|
|
||||||
co_await coroutine::switch_to(_cm.compaction_sg());
|
co_await coroutine::switch_to(_cm.compaction_sg());
|
||||||
|
|
||||||
for (;;) {
|
for (;;) {
|
||||||
@@ -1321,13 +1327,20 @@ private:
|
|||||||
}));
|
}));
|
||||||
};
|
};
|
||||||
|
|
||||||
auto get_next_job = [&] () -> std::optional<sstables::compaction_descriptor> {
|
auto get_next_job = [&] () -> future<std::optional<sstables::compaction_descriptor>> {
|
||||||
auto desc = t.get_compaction_strategy().get_reshaping_job(get_reshape_candidates(), t.schema(), sstables::reshape_mode::strict);
|
auto candidates = get_reshape_candidates();
|
||||||
return desc.sstables.size() ? std::make_optional(std::move(desc)) : std::nullopt;
|
if (candidates.empty()) {
|
||||||
|
co_return std::nullopt;
|
||||||
|
}
|
||||||
|
// all sstables added to maintenance set share the same underlying storage.
|
||||||
|
auto& storage = candidates.front()->get_storage();
|
||||||
|
sstables::reshape_config cfg = co_await sstables::make_reshape_config(storage, sstables::reshape_mode::strict);
|
||||||
|
auto desc = t.get_compaction_strategy().get_reshaping_job(get_reshape_candidates(), t.schema(), cfg);
|
||||||
|
co_return desc.sstables.size() ? std::make_optional(std::move(desc)) : std::nullopt;
|
||||||
};
|
};
|
||||||
|
|
||||||
std::exception_ptr err;
|
std::exception_ptr err;
|
||||||
while (auto desc = get_next_job()) {
|
while (auto desc = co_await get_next_job()) {
|
||||||
auto compacting = compacting_sstable_registration(_cm, _cm.get_compaction_state(&t), desc->sstables);
|
auto compacting = compacting_sstable_registration(_cm, _cm.get_compaction_state(&t), desc->sstables);
|
||||||
auto on_replace = compacting.update_on_sstable_replacement();
|
auto on_replace = compacting.update_on_sstable_replacement();
|
||||||
|
|
||||||
@@ -1789,7 +1802,11 @@ future<> compaction_manager::perform_cleanup(owned_ranges_ptr sorted_owned_range
|
|||||||
};
|
};
|
||||||
|
|
||||||
cmlog.debug("perform_cleanup: waiting for sstables to become eligible for cleanup");
|
cmlog.debug("perform_cleanup: waiting for sstables to become eligible for cleanup");
|
||||||
co_await t.get_staging_done_condition().when(sleep_duration, [&] { return has_sstables_eligible_for_compaction(); });
|
try {
|
||||||
|
co_await t.get_staging_done_condition().when(sleep_duration, [&] { return has_sstables_eligible_for_compaction(); });
|
||||||
|
} catch (const seastar::condition_variable_timed_out&) {
|
||||||
|
// Ignored. Keep retrying for max_idle_duration
|
||||||
|
}
|
||||||
|
|
||||||
if (!has_sstables_eligible_for_compaction()) {
|
if (!has_sstables_eligible_for_compaction()) {
|
||||||
continue;
|
continue;
|
||||||
@@ -1841,6 +1858,9 @@ future<> compaction_manager::try_perform_cleanup(owned_ranges_ptr sorted_owned_r
|
|||||||
if (found_maintenance_sstables) {
|
if (found_maintenance_sstables) {
|
||||||
co_await perform_offstrategy(t, info);
|
co_await perform_offstrategy(t, info);
|
||||||
}
|
}
|
||||||
|
if (utils::get_local_injector().enter("major_compaction_before_cleanup")) {
|
||||||
|
co_await perform_major_compaction(t, info);
|
||||||
|
}
|
||||||
|
|
||||||
// Called with compaction_disabled
|
// Called with compaction_disabled
|
||||||
auto get_sstables = [this, &t] () -> future<std::vector<sstables::shared_sstable>> {
|
auto get_sstables = [this, &t] () -> future<std::vector<sstables::shared_sstable>> {
|
||||||
|
|||||||
@@ -51,7 +51,7 @@ std::vector<compaction_descriptor> compaction_strategy_impl::get_cleanup_compact
|
|||||||
}));
|
}));
|
||||||
}
|
}
|
||||||
|
|
||||||
bool compaction_strategy_impl::worth_dropping_tombstones(const shared_sstable& sst, gc_clock::time_point compaction_time, const tombstone_gc_state& gc_state) {
|
bool compaction_strategy_impl::worth_dropping_tombstones(const shared_sstable& sst, gc_clock::time_point compaction_time, const table_state& t) {
|
||||||
if (_disable_tombstone_compaction) {
|
if (_disable_tombstone_compaction) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
@@ -62,11 +62,11 @@ bool compaction_strategy_impl::worth_dropping_tombstones(const shared_sstable& s
|
|||||||
if (db_clock::now()-_tombstone_compaction_interval < sst->data_file_write_time()) {
|
if (db_clock::now()-_tombstone_compaction_interval < sst->data_file_write_time()) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
auto gc_before = sst->get_gc_before_for_drop_estimation(compaction_time, gc_state);
|
auto gc_before = sst->get_gc_before_for_drop_estimation(compaction_time, t.get_tombstone_gc_state(), t.schema());
|
||||||
return sst->estimate_droppable_tombstone_ratio(gc_before) >= _tombstone_threshold;
|
return sst->estimate_droppable_tombstone_ratio(gc_before) >= _tombstone_threshold;
|
||||||
}
|
}
|
||||||
|
|
||||||
uint64_t compaction_strategy_impl::adjust_partition_estimate(const mutation_source_metadata& ms_meta, uint64_t partition_estimate) const {
|
uint64_t compaction_strategy_impl::adjust_partition_estimate(const mutation_source_metadata& ms_meta, uint64_t partition_estimate, schema_ptr schema) const {
|
||||||
return partition_estimate;
|
return partition_estimate;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -75,7 +75,7 @@ reader_consumer_v2 compaction_strategy_impl::make_interposer_consumer(const muta
|
|||||||
}
|
}
|
||||||
|
|
||||||
compaction_descriptor
|
compaction_descriptor
|
||||||
compaction_strategy_impl::get_reshaping_job(std::vector<shared_sstable> input, schema_ptr schema, reshape_mode mode) const {
|
compaction_strategy_impl::get_reshaping_job(std::vector<shared_sstable> input, schema_ptr schema, reshape_config cfg) const {
|
||||||
return compaction_descriptor();
|
return compaction_descriptor();
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -700,12 +700,12 @@ compaction_backlog_tracker compaction_strategy::make_backlog_tracker() const {
|
|||||||
}
|
}
|
||||||
|
|
||||||
sstables::compaction_descriptor
|
sstables::compaction_descriptor
|
||||||
compaction_strategy::get_reshaping_job(std::vector<shared_sstable> input, schema_ptr schema, reshape_mode mode) const {
|
compaction_strategy::get_reshaping_job(std::vector<shared_sstable> input, schema_ptr schema, reshape_config cfg) const {
|
||||||
return _compaction_strategy_impl->get_reshaping_job(std::move(input), schema, mode);
|
return _compaction_strategy_impl->get_reshaping_job(std::move(input), schema, cfg);
|
||||||
}
|
}
|
||||||
|
|
||||||
uint64_t compaction_strategy::adjust_partition_estimate(const mutation_source_metadata& ms_meta, uint64_t partition_estimate) const {
|
uint64_t compaction_strategy::adjust_partition_estimate(const mutation_source_metadata& ms_meta, uint64_t partition_estimate, schema_ptr schema) const {
|
||||||
return _compaction_strategy_impl->adjust_partition_estimate(ms_meta, partition_estimate);
|
return _compaction_strategy_impl->adjust_partition_estimate(ms_meta, partition_estimate, std::move(schema));
|
||||||
}
|
}
|
||||||
|
|
||||||
reader_consumer_v2 compaction_strategy::make_interposer_consumer(const mutation_source_metadata& ms_meta, reader_consumer_v2 end_consumer) const {
|
reader_consumer_v2 compaction_strategy::make_interposer_consumer(const mutation_source_metadata& ms_meta, reader_consumer_v2 end_consumer) const {
|
||||||
@@ -739,6 +739,13 @@ compaction_strategy make_compaction_strategy(compaction_strategy_type strategy,
|
|||||||
return compaction_strategy(std::move(impl));
|
return compaction_strategy(std::move(impl));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
future<reshape_config> make_reshape_config(const sstables::storage& storage, reshape_mode mode) {
|
||||||
|
co_return sstables::reshape_config{
|
||||||
|
.mode = mode,
|
||||||
|
.free_storage_space = co_await storage.free_space() / smp::count,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
namespace compaction {
|
namespace compaction {
|
||||||
|
|||||||
@@ -31,6 +31,7 @@ class sstable;
|
|||||||
class sstable_set;
|
class sstable_set;
|
||||||
struct compaction_descriptor;
|
struct compaction_descriptor;
|
||||||
struct resharding_descriptor;
|
struct resharding_descriptor;
|
||||||
|
class storage;
|
||||||
|
|
||||||
class compaction_strategy {
|
class compaction_strategy {
|
||||||
::shared_ptr<compaction_strategy_impl> _compaction_strategy_impl;
|
::shared_ptr<compaction_strategy_impl> _compaction_strategy_impl;
|
||||||
@@ -104,7 +105,7 @@ public:
|
|||||||
|
|
||||||
compaction_backlog_tracker make_backlog_tracker() const;
|
compaction_backlog_tracker make_backlog_tracker() const;
|
||||||
|
|
||||||
uint64_t adjust_partition_estimate(const mutation_source_metadata& ms_meta, uint64_t partition_estimate) const;
|
uint64_t adjust_partition_estimate(const mutation_source_metadata& ms_meta, uint64_t partition_estimate, schema_ptr) const;
|
||||||
|
|
||||||
reader_consumer_v2 make_interposer_consumer(const mutation_source_metadata& ms_meta, reader_consumer_v2 end_consumer) const;
|
reader_consumer_v2 make_interposer_consumer(const mutation_source_metadata& ms_meta, reader_consumer_v2 end_consumer) const;
|
||||||
|
|
||||||
@@ -122,11 +123,13 @@ public:
|
|||||||
//
|
//
|
||||||
// The caller should also pass a maximum number of SSTables which is the maximum amount of
|
// The caller should also pass a maximum number of SSTables which is the maximum amount of
|
||||||
// SSTables that can be added into a single job.
|
// SSTables that can be added into a single job.
|
||||||
compaction_descriptor get_reshaping_job(std::vector<shared_sstable> input, schema_ptr schema, reshape_mode mode) const;
|
compaction_descriptor get_reshaping_job(std::vector<shared_sstable> input, schema_ptr schema, reshape_config cfg) const;
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
// Creates a compaction_strategy object from one of the strategies available.
|
// Creates a compaction_strategy object from one of the strategies available.
|
||||||
compaction_strategy make_compaction_strategy(compaction_strategy_type strategy, const std::map<sstring, sstring>& options);
|
compaction_strategy make_compaction_strategy(compaction_strategy_type strategy, const std::map<sstring, sstring>& options);
|
||||||
|
|
||||||
|
future<reshape_config> make_reshape_config(const sstables::storage& storage, reshape_mode mode);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -64,11 +64,11 @@ public:
|
|||||||
|
|
||||||
// Check if a given sstable is entitled for tombstone compaction based on its
|
// Check if a given sstable is entitled for tombstone compaction based on its
|
||||||
// droppable tombstone histogram and gc_before.
|
// droppable tombstone histogram and gc_before.
|
||||||
bool worth_dropping_tombstones(const shared_sstable& sst, gc_clock::time_point compaction_time, const tombstone_gc_state& gc_state);
|
bool worth_dropping_tombstones(const shared_sstable& sst, gc_clock::time_point compaction_time, const table_state& t);
|
||||||
|
|
||||||
virtual std::unique_ptr<compaction_backlog_tracker::impl> make_backlog_tracker() const = 0;
|
virtual std::unique_ptr<compaction_backlog_tracker::impl> make_backlog_tracker() const = 0;
|
||||||
|
|
||||||
virtual uint64_t adjust_partition_estimate(const mutation_source_metadata& ms_meta, uint64_t partition_estimate) const;
|
virtual uint64_t adjust_partition_estimate(const mutation_source_metadata& ms_meta, uint64_t partition_estimate, schema_ptr schema) const;
|
||||||
|
|
||||||
virtual reader_consumer_v2 make_interposer_consumer(const mutation_source_metadata& ms_meta, reader_consumer_v2 end_consumer) const;
|
virtual reader_consumer_v2 make_interposer_consumer(const mutation_source_metadata& ms_meta, reader_consumer_v2 end_consumer) const;
|
||||||
|
|
||||||
@@ -76,6 +76,6 @@ public:
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
virtual compaction_descriptor get_reshaping_job(std::vector<shared_sstable> input, schema_ptr schema, reshape_mode mode) const;
|
virtual compaction_descriptor get_reshaping_job(std::vector<shared_sstable> input, schema_ptr schema, reshape_config cfg) const;
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -8,6 +8,8 @@
|
|||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
|
#include <cstdint>
|
||||||
|
|
||||||
namespace sstables {
|
namespace sstables {
|
||||||
|
|
||||||
enum class compaction_strategy_type {
|
enum class compaction_strategy_type {
|
||||||
@@ -18,4 +20,10 @@ enum class compaction_strategy_type {
|
|||||||
};
|
};
|
||||||
|
|
||||||
enum class reshape_mode { strict, relaxed };
|
enum class reshape_mode { strict, relaxed };
|
||||||
|
|
||||||
|
struct reshape_config {
|
||||||
|
reshape_mode mode;
|
||||||
|
const uint64_t free_storage_space;
|
||||||
|
};
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -51,15 +51,15 @@ compaction_descriptor leveled_compaction_strategy::get_sstables_for_compaction(t
|
|||||||
auto& sstables = manifest.get_level(level);
|
auto& sstables = manifest.get_level(level);
|
||||||
// filter out sstables which droppable tombstone ratio isn't greater than the defined threshold.
|
// filter out sstables which droppable tombstone ratio isn't greater than the defined threshold.
|
||||||
auto e = boost::range::remove_if(sstables, [this, compaction_time, &table_s] (const sstables::shared_sstable& sst) -> bool {
|
auto e = boost::range::remove_if(sstables, [this, compaction_time, &table_s] (const sstables::shared_sstable& sst) -> bool {
|
||||||
return !worth_dropping_tombstones(sst, compaction_time, table_s.get_tombstone_gc_state());
|
return !worth_dropping_tombstones(sst, compaction_time, table_s);
|
||||||
});
|
});
|
||||||
sstables.erase(e, sstables.end());
|
sstables.erase(e, sstables.end());
|
||||||
if (sstables.empty()) {
|
if (sstables.empty()) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
auto& sst = *std::max_element(sstables.begin(), sstables.end(), [&] (auto& i, auto& j) {
|
auto& sst = *std::max_element(sstables.begin(), sstables.end(), [&] (auto& i, auto& j) {
|
||||||
auto gc_before1 = i->get_gc_before_for_drop_estimation(compaction_time, table_s.get_tombstone_gc_state());
|
auto gc_before1 = i->get_gc_before_for_drop_estimation(compaction_time, table_s.get_tombstone_gc_state(), table_s.schema());
|
||||||
auto gc_before2 = j->get_gc_before_for_drop_estimation(compaction_time, table_s.get_tombstone_gc_state());
|
auto gc_before2 = j->get_gc_before_for_drop_estimation(compaction_time, table_s.get_tombstone_gc_state(), table_s.schema());
|
||||||
return i->estimate_droppable_tombstone_ratio(gc_before1) < j->estimate_droppable_tombstone_ratio(gc_before2);
|
return i->estimate_droppable_tombstone_ratio(gc_before1) < j->estimate_droppable_tombstone_ratio(gc_before2);
|
||||||
});
|
});
|
||||||
return sstables::compaction_descriptor({ sst }, sst->get_sstable_level());
|
return sstables::compaction_descriptor({ sst }, sst->get_sstable_level());
|
||||||
@@ -146,7 +146,8 @@ int64_t leveled_compaction_strategy::estimated_pending_compactions(table_state&
|
|||||||
}
|
}
|
||||||
|
|
||||||
compaction_descriptor
|
compaction_descriptor
|
||||||
leveled_compaction_strategy::get_reshaping_job(std::vector<shared_sstable> input, schema_ptr schema, reshape_mode mode) const {
|
leveled_compaction_strategy::get_reshaping_job(std::vector<shared_sstable> input, schema_ptr schema, reshape_config cfg) const {
|
||||||
|
auto mode = cfg.mode;
|
||||||
std::array<std::vector<shared_sstable>, leveled_manifest::MAX_LEVELS> level_info;
|
std::array<std::vector<shared_sstable>, leveled_manifest::MAX_LEVELS> level_info;
|
||||||
|
|
||||||
auto is_disjoint = [schema] (const std::vector<shared_sstable>& sstables, unsigned tolerance) -> std::tuple<bool, unsigned> {
|
auto is_disjoint = [schema] (const std::vector<shared_sstable>& sstables, unsigned tolerance) -> std::tuple<bool, unsigned> {
|
||||||
@@ -203,7 +204,7 @@ leveled_compaction_strategy::get_reshaping_job(std::vector<shared_sstable> input
|
|||||||
|
|
||||||
if (level_info[0].size() > offstrategy_threshold) {
|
if (level_info[0].size() > offstrategy_threshold) {
|
||||||
size_tiered_compaction_strategy stcs(_stcs_options);
|
size_tiered_compaction_strategy stcs(_stcs_options);
|
||||||
return stcs.get_reshaping_job(std::move(level_info[0]), schema, mode);
|
return stcs.get_reshaping_job(std::move(level_info[0]), schema, cfg);
|
||||||
}
|
}
|
||||||
|
|
||||||
for (unsigned level = leveled_manifest::MAX_LEVELS - 1; level > 0; --level) {
|
for (unsigned level = leveled_manifest::MAX_LEVELS - 1; level > 0; --level) {
|
||||||
|
|||||||
@@ -74,7 +74,7 @@ public:
|
|||||||
|
|
||||||
virtual std::unique_ptr<compaction_backlog_tracker::impl> make_backlog_tracker() const override;
|
virtual std::unique_ptr<compaction_backlog_tracker::impl> make_backlog_tracker() const override;
|
||||||
|
|
||||||
virtual compaction_descriptor get_reshaping_job(std::vector<shared_sstable> input, schema_ptr schema, reshape_mode mode) const override;
|
virtual compaction_descriptor get_reshaping_job(std::vector<shared_sstable> input, schema_ptr schema, reshape_config cfg) const override;
|
||||||
};
|
};
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -243,7 +243,7 @@ size_tiered_compaction_strategy::get_sstables_for_compaction(table_state& table_
|
|||||||
for (auto&& sstables : buckets | boost::adaptors::reversed) {
|
for (auto&& sstables : buckets | boost::adaptors::reversed) {
|
||||||
// filter out sstables which droppable tombstone ratio isn't greater than the defined threshold.
|
// filter out sstables which droppable tombstone ratio isn't greater than the defined threshold.
|
||||||
auto e = boost::range::remove_if(sstables, [this, compaction_time, &table_s] (const sstables::shared_sstable& sst) -> bool {
|
auto e = boost::range::remove_if(sstables, [this, compaction_time, &table_s] (const sstables::shared_sstable& sst) -> bool {
|
||||||
return !worth_dropping_tombstones(sst, compaction_time, table_s.get_tombstone_gc_state());
|
return !worth_dropping_tombstones(sst, compaction_time, table_s);
|
||||||
});
|
});
|
||||||
sstables.erase(e, sstables.end());
|
sstables.erase(e, sstables.end());
|
||||||
if (sstables.empty()) {
|
if (sstables.empty()) {
|
||||||
@@ -297,8 +297,9 @@ size_tiered_compaction_strategy::most_interesting_bucket(const std::vector<sstab
|
|||||||
}
|
}
|
||||||
|
|
||||||
compaction_descriptor
|
compaction_descriptor
|
||||||
size_tiered_compaction_strategy::get_reshaping_job(std::vector<shared_sstable> input, schema_ptr schema, reshape_mode mode) const
|
size_tiered_compaction_strategy::get_reshaping_job(std::vector<shared_sstable> input, schema_ptr schema, reshape_config cfg) const
|
||||||
{
|
{
|
||||||
|
auto mode = cfg.mode;
|
||||||
size_t offstrategy_threshold = std::max(schema->min_compaction_threshold(), 4);
|
size_t offstrategy_threshold = std::max(schema->min_compaction_threshold(), 4);
|
||||||
size_t max_sstables = std::max(schema->max_compaction_threshold(), int(offstrategy_threshold));
|
size_t max_sstables = std::max(schema->max_compaction_threshold(), int(offstrategy_threshold));
|
||||||
|
|
||||||
|
|||||||
@@ -96,7 +96,7 @@ public:
|
|||||||
|
|
||||||
virtual std::unique_ptr<compaction_backlog_tracker::impl> make_backlog_tracker() const override;
|
virtual std::unique_ptr<compaction_backlog_tracker::impl> make_backlog_tracker() const override;
|
||||||
|
|
||||||
virtual compaction_descriptor get_reshaping_job(std::vector<shared_sstable> input, schema_ptr schema, reshape_mode mode) const override;
|
virtual compaction_descriptor get_reshaping_job(std::vector<shared_sstable> input, schema_ptr schema, reshape_config cfg) const override;
|
||||||
|
|
||||||
friend class ::size_tiered_backlog_tracker;
|
friend class ::size_tiered_backlog_tracker;
|
||||||
};
|
};
|
||||||
|
|||||||
@@ -48,6 +48,7 @@ public:
|
|||||||
virtual sstables::shared_sstable make_sstable() const = 0;
|
virtual sstables::shared_sstable make_sstable() const = 0;
|
||||||
virtual sstables::sstable_writer_config configure_writer(sstring origin) const = 0;
|
virtual sstables::sstable_writer_config configure_writer(sstring origin) const = 0;
|
||||||
virtual api::timestamp_type min_memtable_timestamp() const = 0;
|
virtual api::timestamp_type min_memtable_timestamp() const = 0;
|
||||||
|
virtual bool memtable_has_key(const dht::decorated_key& key) const = 0;
|
||||||
virtual future<> on_compaction_completion(sstables::compaction_completion_desc desc, sstables::offstrategy offstrategy) = 0;
|
virtual future<> on_compaction_completion(sstables::compaction_completion_desc desc, sstables::offstrategy offstrategy) = 0;
|
||||||
virtual bool is_auto_compaction_disabled_by_user() const noexcept = 0;
|
virtual bool is_auto_compaction_disabled_by_user() const noexcept = 0;
|
||||||
virtual bool tombstone_gc_enabled() const noexcept = 0;
|
virtual bool tombstone_gc_enabled() const noexcept = 0;
|
||||||
|
|||||||
@@ -14,6 +14,9 @@
|
|||||||
#include "sstables/sstables.hh"
|
#include "sstables/sstables.hh"
|
||||||
#include "sstables/sstable_directory.hh"
|
#include "sstables/sstable_directory.hh"
|
||||||
#include "utils/pretty_printers.hh"
|
#include "utils/pretty_printers.hh"
|
||||||
|
#include "db/config.hh"
|
||||||
|
|
||||||
|
using namespace std::chrono_literals;
|
||||||
|
|
||||||
namespace replica {
|
namespace replica {
|
||||||
|
|
||||||
@@ -254,11 +257,129 @@ future<> run_table_tasks(replica::database& db, std::vector<table_tasks_info> ta
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
struct keyspace_tasks_info {
|
||||||
|
tasks::task_manager::task_ptr task;
|
||||||
|
sstring keyspace;
|
||||||
|
std::vector<table_info> table_infos;
|
||||||
|
|
||||||
|
keyspace_tasks_info(tasks::task_manager::task_ptr t, sstring ks_name, std::vector<table_info> t_infos)
|
||||||
|
: task(t)
|
||||||
|
, keyspace(std::move(ks_name))
|
||||||
|
, table_infos(std::move(t_infos))
|
||||||
|
{}
|
||||||
|
};
|
||||||
|
|
||||||
|
future<> run_keyspace_tasks(replica::database& db, std::vector<keyspace_tasks_info> keyspace_tasks, seastar::condition_variable& cv, tasks::task_manager::task_ptr& current_task, bool sort) {
|
||||||
|
std::exception_ptr ex;
|
||||||
|
|
||||||
|
// While compaction is run on one table, the size of tables may significantly change.
|
||||||
|
// Thus, they are sorted before each invidual compaction and the smallest keyspace is chosen.
|
||||||
|
while (!keyspace_tasks.empty()) {
|
||||||
|
try {
|
||||||
|
if (sort) {
|
||||||
|
// Major compact smaller tables first, to increase chances of success if low on space.
|
||||||
|
// Tables will be kept in descending order.
|
||||||
|
std::ranges::sort(keyspace_tasks, std::greater<>(), [&] (const keyspace_tasks_info& kti) {
|
||||||
|
try {
|
||||||
|
return std::accumulate(kti.table_infos.begin(), kti.table_infos.end(), int64_t(0), [&] (int64_t sum, const table_info& t) {
|
||||||
|
try {
|
||||||
|
sum += db.find_column_family(t.id).get_stats().live_disk_space_used;
|
||||||
|
} catch (const replica::no_such_column_family&) {
|
||||||
|
// ignore
|
||||||
|
}
|
||||||
|
return sum;
|
||||||
|
});
|
||||||
|
} catch (const replica::no_such_keyspace&) {
|
||||||
|
return int64_t(-1);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
// Task responsible for the smallest keyspace.
|
||||||
|
current_task = keyspace_tasks.back().task;
|
||||||
|
keyspace_tasks.pop_back();
|
||||||
|
cv.broadcast();
|
||||||
|
co_await current_task->done();
|
||||||
|
} catch (...) {
|
||||||
|
ex = std::current_exception();
|
||||||
|
current_task = nullptr;
|
||||||
|
cv.broken(ex);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ex) {
|
||||||
|
// Wait for all tasks even on failure.
|
||||||
|
for (auto& kti: keyspace_tasks) {
|
||||||
|
co_await kti.task->done();
|
||||||
|
}
|
||||||
|
co_await coroutine::return_exception_ptr(std::move(ex));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
sstring major_compaction_task_impl::to_string(flush_mode fm) {
|
||||||
|
switch (fm) {
|
||||||
|
case flush_mode::skip: return "skip";
|
||||||
|
case flush_mode::compacted_tables: return "compacted_tables";
|
||||||
|
case flush_mode::all_tables: return "all_tables";
|
||||||
|
}
|
||||||
|
__builtin_unreachable();
|
||||||
|
}
|
||||||
|
|
||||||
|
static future<bool> maybe_flush_all_tables(sharded<replica::database>& db) {
|
||||||
|
auto interval = db.local().get_config().compaction_flush_all_tables_before_major_seconds();
|
||||||
|
if (interval) {
|
||||||
|
auto when = db_clock::now() - interval * 1s;
|
||||||
|
if (co_await replica::database::get_all_tables_flushed_at(db) <= when) {
|
||||||
|
co_await db.invoke_on_all([&] (replica::database& db) -> future<> {
|
||||||
|
co_await db.flush_all_tables();
|
||||||
|
});
|
||||||
|
co_return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
co_return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
future<> global_major_compaction_task_impl::run() {
|
||||||
|
bool flushed_all_tables = false;
|
||||||
|
if (_flush_mode == flush_mode::all_tables) {
|
||||||
|
flushed_all_tables = co_await maybe_flush_all_tables(_db);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::unordered_map<sstring, std::vector<table_info>> tables_by_keyspace;
|
||||||
|
auto tables_meta = _db.local().get_tables_metadata().get_column_families_copy();
|
||||||
|
for (const auto& [table_id, t] : tables_meta) {
|
||||||
|
const auto& ks_name = t->schema()->ks_name();
|
||||||
|
const auto& table_name = t->schema()->cf_name();
|
||||||
|
tables_by_keyspace[ks_name].emplace_back(table_name, table_id);
|
||||||
|
}
|
||||||
|
seastar::condition_variable cv;
|
||||||
|
tasks::task_manager::task_ptr current_task;
|
||||||
|
tasks::task_info parent_info{_status.id, _status.shard};
|
||||||
|
std::vector<keyspace_tasks_info> keyspace_tasks;
|
||||||
|
flush_mode fm = flushed_all_tables ? flush_mode::skip : _flush_mode;
|
||||||
|
for (auto& [ks, table_infos] : tables_by_keyspace) {
|
||||||
|
auto task = co_await _module->make_and_start_task<major_keyspace_compaction_task_impl>(parent_info, ks, parent_info.id, _db, table_infos, fm,
|
||||||
|
&cv, ¤t_task);
|
||||||
|
keyspace_tasks.emplace_back(std::move(task), ks, std::move(table_infos));
|
||||||
|
}
|
||||||
|
co_await run_keyspace_tasks(_db.local(), keyspace_tasks, cv, current_task, false);
|
||||||
|
}
|
||||||
|
|
||||||
future<> major_keyspace_compaction_task_impl::run() {
|
future<> major_keyspace_compaction_task_impl::run() {
|
||||||
|
if (_cv) {
|
||||||
|
co_await wait_for_your_turn(*_cv, *_current_task, _status.id);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool flushed_all_tables = false;
|
||||||
|
if (_flush_mode == flush_mode::all_tables) {
|
||||||
|
flushed_all_tables = co_await maybe_flush_all_tables(_db);
|
||||||
|
}
|
||||||
|
|
||||||
|
flush_mode fm = flushed_all_tables ? flush_mode::skip : _flush_mode;
|
||||||
co_await _db.invoke_on_all([&] (replica::database& db) -> future<> {
|
co_await _db.invoke_on_all([&] (replica::database& db) -> future<> {
|
||||||
tasks::task_info parent_info{_status.id, _status.shard};
|
tasks::task_info parent_info{_status.id, _status.shard};
|
||||||
auto& module = db.get_compaction_manager().get_task_manager_module();
|
auto& module = db.get_compaction_manager().get_task_manager_module();
|
||||||
auto task = co_await module.make_and_start_task<shard_major_keyspace_compaction_task_impl>(parent_info, _status.keyspace, _status.id, db, _table_infos);
|
auto task = co_await module.make_and_start_task<shard_major_keyspace_compaction_task_impl>(parent_info, _status.keyspace, _status.id, db, _table_infos, fm);
|
||||||
co_await task->done();
|
co_await task->done();
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
@@ -269,7 +390,7 @@ future<> shard_major_keyspace_compaction_task_impl::run() {
|
|||||||
tasks::task_info parent_info{_status.id, _status.shard};
|
tasks::task_info parent_info{_status.id, _status.shard};
|
||||||
std::vector<table_tasks_info> table_tasks;
|
std::vector<table_tasks_info> table_tasks;
|
||||||
for (auto& ti : _local_tables) {
|
for (auto& ti : _local_tables) {
|
||||||
table_tasks.emplace_back(co_await _module->make_and_start_task<table_major_keyspace_compaction_task_impl>(parent_info, _status.keyspace, ti.name, _status.id, _db, ti, cv, current_task), ti);
|
table_tasks.emplace_back(co_await _module->make_and_start_task<table_major_keyspace_compaction_task_impl>(parent_info, _status.keyspace, ti.name, _status.id, _db, ti, cv, current_task, _flush_mode), ti);
|
||||||
}
|
}
|
||||||
|
|
||||||
co_await run_table_tasks(_db, std::move(table_tasks), cv, current_task, true);
|
co_await run_table_tasks(_db, std::move(table_tasks), cv, current_task, true);
|
||||||
@@ -278,8 +399,9 @@ future<> shard_major_keyspace_compaction_task_impl::run() {
|
|||||||
future<> table_major_keyspace_compaction_task_impl::run() {
|
future<> table_major_keyspace_compaction_task_impl::run() {
|
||||||
co_await wait_for_your_turn(_cv, _current_task, _status.id);
|
co_await wait_for_your_turn(_cv, _current_task, _status.id);
|
||||||
tasks::task_info info{_status.id, _status.shard};
|
tasks::task_info info{_status.id, _status.shard};
|
||||||
co_await run_on_table("force_keyspace_compaction", _db, _status.keyspace, _ti, [info] (replica::table& t) {
|
replica::table::do_flush do_flush(_flush_mode != flush_mode::skip);
|
||||||
return t.compact_all_sstables(info);
|
co_await run_on_table("force_keyspace_compaction", _db, _status.keyspace, _ti, [info, do_flush] (replica::table& t) {
|
||||||
|
return t.compact_all_sstables(info, do_flush);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -433,7 +555,13 @@ future<> shard_reshaping_compaction_task_impl::run() {
|
|||||||
| boost::adaptors::filtered([&filter = _filter] (const auto& sst) {
|
| boost::adaptors::filtered([&filter = _filter] (const auto& sst) {
|
||||||
return filter(sst);
|
return filter(sst);
|
||||||
}));
|
}));
|
||||||
auto desc = table.get_compaction_strategy().get_reshaping_job(std::move(reshape_candidates), table.schema(), _mode);
|
if (reshape_candidates.empty()) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
// all sstables were found in the same sstable_directory instance, so they share the same underlying storage.
|
||||||
|
auto& storage = reshape_candidates.front()->get_storage();
|
||||||
|
auto cfg = co_await sstables::make_reshape_config(storage, _mode);
|
||||||
|
auto desc = table.get_compaction_strategy().get_reshaping_job(std::move(reshape_candidates), table.schema(), cfg);
|
||||||
if (desc.sstables.empty()) {
|
if (desc.sstables.empty()) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -8,6 +8,8 @@
|
|||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
|
#include <fmt/format.h>
|
||||||
|
|
||||||
#include "compaction/compaction.hh"
|
#include "compaction/compaction.hh"
|
||||||
#include "replica/database_fwd.hh"
|
#include "replica/database_fwd.hh"
|
||||||
#include "schema/schema_fwd.hh"
|
#include "schema/schema_fwd.hh"
|
||||||
@@ -45,6 +47,12 @@ protected:
|
|||||||
|
|
||||||
class major_compaction_task_impl : public compaction_task_impl {
|
class major_compaction_task_impl : public compaction_task_impl {
|
||||||
public:
|
public:
|
||||||
|
enum class flush_mode {
|
||||||
|
skip, // Skip flushing. Useful when application explicitly flushes all tables prior to compaction
|
||||||
|
compacted_tables, // Flush only the compacted keyspace/tables
|
||||||
|
all_tables // Flush all tables in the database prior to compaction
|
||||||
|
};
|
||||||
|
|
||||||
major_compaction_task_impl(tasks::task_manager::module_ptr module,
|
major_compaction_task_impl(tasks::task_manager::module_ptr module,
|
||||||
tasks::task_id id,
|
tasks::task_id id,
|
||||||
unsigned sequence_number,
|
unsigned sequence_number,
|
||||||
@@ -52,8 +60,10 @@ public:
|
|||||||
std::string keyspace,
|
std::string keyspace,
|
||||||
std::string table,
|
std::string table,
|
||||||
std::string entity,
|
std::string entity,
|
||||||
tasks::task_id parent_id) noexcept
|
tasks::task_id parent_id,
|
||||||
|
flush_mode fm = flush_mode::compacted_tables) noexcept
|
||||||
: compaction_task_impl(module, id, sequence_number, std::move(scope), std::move(keyspace), std::move(table), std::move(entity), parent_id)
|
: compaction_task_impl(module, id, sequence_number, std::move(scope), std::move(keyspace), std::move(table), std::move(entity), parent_id)
|
||||||
|
, _flush_mode(fm)
|
||||||
{
|
{
|
||||||
// FIXME: add progress units
|
// FIXME: add progress units
|
||||||
}
|
}
|
||||||
@@ -61,22 +71,54 @@ public:
|
|||||||
virtual std::string type() const override {
|
virtual std::string type() const override {
|
||||||
return "major compaction";
|
return "major compaction";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static sstring to_string(flush_mode);
|
||||||
protected:
|
protected:
|
||||||
|
flush_mode _flush_mode;
|
||||||
|
|
||||||
virtual future<> run() override = 0;
|
virtual future<> run() override = 0;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
class global_major_compaction_task_impl : public major_compaction_task_impl {
|
||||||
|
private:
|
||||||
|
sharded<replica::database>& _db;
|
||||||
|
public:
|
||||||
|
global_major_compaction_task_impl(tasks::task_manager::module_ptr module,
|
||||||
|
sharded<replica::database>& db,
|
||||||
|
std::optional<flush_mode> fm = std::nullopt) noexcept
|
||||||
|
: major_compaction_task_impl(module, tasks::task_id::create_random_id(), module->new_sequence_number(), "global", "", "", "", tasks::task_id::create_null_id(),
|
||||||
|
fm.value_or(flush_mode::all_tables))
|
||||||
|
, _db(db)
|
||||||
|
{}
|
||||||
|
protected:
|
||||||
|
virtual future<> run() override;
|
||||||
|
};
|
||||||
|
|
||||||
class major_keyspace_compaction_task_impl : public major_compaction_task_impl {
|
class major_keyspace_compaction_task_impl : public major_compaction_task_impl {
|
||||||
private:
|
private:
|
||||||
sharded<replica::database>& _db;
|
sharded<replica::database>& _db;
|
||||||
std::vector<table_info> _table_infos;
|
std::vector<table_info> _table_infos;
|
||||||
|
// _cvp and _current_task are engaged when the task is invoked from
|
||||||
|
// global_major_compaction_task_impl
|
||||||
|
seastar::condition_variable* _cv;
|
||||||
|
tasks::task_manager::task_ptr* _current_task;
|
||||||
public:
|
public:
|
||||||
major_keyspace_compaction_task_impl(tasks::task_manager::module_ptr module,
|
major_keyspace_compaction_task_impl(tasks::task_manager::module_ptr module,
|
||||||
std::string keyspace,
|
std::string keyspace,
|
||||||
|
tasks::task_id parent_id,
|
||||||
sharded<replica::database>& db,
|
sharded<replica::database>& db,
|
||||||
std::vector<table_info> table_infos) noexcept
|
std::vector<table_info> table_infos,
|
||||||
: major_compaction_task_impl(module, tasks::task_id::create_random_id(), module->new_sequence_number(), "keyspace", std::move(keyspace), "", "", tasks::task_id::create_null_id())
|
std::optional<flush_mode> fm = std::nullopt,
|
||||||
|
seastar::condition_variable* cv = nullptr,
|
||||||
|
tasks::task_manager::task_ptr* current_task = nullptr) noexcept
|
||||||
|
: major_compaction_task_impl(module, tasks::task_id::create_random_id(),
|
||||||
|
parent_id ? 0 : module->new_sequence_number(),
|
||||||
|
"keyspace", std::move(keyspace), "", "", parent_id,
|
||||||
|
fm.value_or(flush_mode::all_tables))
|
||||||
, _db(db)
|
, _db(db)
|
||||||
, _table_infos(std::move(table_infos))
|
, _table_infos(std::move(table_infos))
|
||||||
|
, _cv(cv)
|
||||||
|
, _current_task(current_task)
|
||||||
{}
|
{}
|
||||||
protected:
|
protected:
|
||||||
virtual future<> run() override;
|
virtual future<> run() override;
|
||||||
@@ -91,8 +133,9 @@ public:
|
|||||||
std::string keyspace,
|
std::string keyspace,
|
||||||
tasks::task_id parent_id,
|
tasks::task_id parent_id,
|
||||||
replica::database& db,
|
replica::database& db,
|
||||||
std::vector<table_info> local_tables) noexcept
|
std::vector<table_info> local_tables,
|
||||||
: major_compaction_task_impl(module, tasks::task_id::create_random_id(), 0, "shard", std::move(keyspace), "", "", parent_id)
|
flush_mode fm) noexcept
|
||||||
|
: major_compaction_task_impl(module, tasks::task_id::create_random_id(), 0, "shard", std::move(keyspace), "", "", parent_id, fm)
|
||||||
, _db(db)
|
, _db(db)
|
||||||
, _local_tables(std::move(local_tables))
|
, _local_tables(std::move(local_tables))
|
||||||
{}
|
{}
|
||||||
@@ -114,8 +157,9 @@ public:
|
|||||||
replica::database& db,
|
replica::database& db,
|
||||||
table_info ti,
|
table_info ti,
|
||||||
seastar::condition_variable& cv,
|
seastar::condition_variable& cv,
|
||||||
tasks::task_manager::task_ptr& current_task) noexcept
|
tasks::task_manager::task_ptr& current_task,
|
||||||
: major_compaction_task_impl(module, tasks::task_id::create_random_id(), 0, "table", std::move(keyspace), std::move(table), "", parent_id)
|
flush_mode fm) noexcept
|
||||||
|
: major_compaction_task_impl(module, tasks::task_id::create_random_id(), 0, "table", std::move(keyspace), std::move(table), "", parent_id, fm)
|
||||||
, _db(db)
|
, _db(db)
|
||||||
, _ti(std::move(ti))
|
, _ti(std::move(ti))
|
||||||
, _cv(cv)
|
, _cv(cv)
|
||||||
@@ -660,8 +704,21 @@ public:
|
|||||||
virtual std::string type() const override {
|
virtual std::string type() const override {
|
||||||
return "regular compaction";
|
return "regular compaction";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
virtual tasks::is_internal is_internal() const noexcept override {
|
||||||
|
return tasks::is_internal::yes;
|
||||||
|
}
|
||||||
protected:
|
protected:
|
||||||
virtual future<> run() override = 0;
|
virtual future<> run() override = 0;
|
||||||
};
|
};
|
||||||
|
|
||||||
}
|
} // namespace compaction
|
||||||
|
|
||||||
|
template <>
|
||||||
|
struct fmt::formatter<major_compaction_task_impl::flush_mode> {
|
||||||
|
constexpr auto parse(format_parse_context& ctx) { return ctx.begin(); }
|
||||||
|
template <typename FormatContext>
|
||||||
|
auto format(const major_compaction_task_impl::flush_mode& fm, FormatContext& ctx) const {
|
||||||
|
return fmt::format_to(ctx.out(), "{}", major_compaction_task_impl::to_string(fm));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|||||||
@@ -184,16 +184,27 @@ public:
|
|||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
uint64_t time_window_compaction_strategy::adjust_partition_estimate(const mutation_source_metadata& ms_meta, uint64_t partition_estimate) const {
|
uint64_t time_window_compaction_strategy::adjust_partition_estimate(const mutation_source_metadata& ms_meta, uint64_t partition_estimate, schema_ptr s) const {
|
||||||
if (!ms_meta.min_timestamp || !ms_meta.max_timestamp) {
|
// If not enough information, we assume the worst
|
||||||
// Not enough information, we assume the worst
|
auto estimated_window_count = max_data_segregation_window_count;
|
||||||
return partition_estimate / max_data_segregation_window_count;
|
auto default_ttl = std::chrono::duration_cast<std::chrono::microseconds>(s->default_time_to_live());
|
||||||
}
|
bool min_and_max_ts_available = ms_meta.min_timestamp && ms_meta.max_timestamp;
|
||||||
const auto min_window = get_window_for(_options, *ms_meta.min_timestamp);
|
auto estimate_window_count = [this] (timestamp_type min_window, timestamp_type max_window) {
|
||||||
const auto max_window = get_window_for(_options, *ms_meta.max_timestamp);
|
const auto window_size = get_window_size(_options);
|
||||||
const auto window_size = get_window_size(_options);
|
return (max_window + (window_size - 1) - min_window) / window_size;
|
||||||
|
};
|
||||||
|
|
||||||
auto estimated_window_count = (max_window + (window_size - 1) - min_window) / window_size;
|
if (!min_and_max_ts_available && default_ttl.count()) {
|
||||||
|
auto min_window = get_window_for(_options, timestamp_type(0));
|
||||||
|
auto max_window = get_window_for(_options, timestamp_type(default_ttl.count()));
|
||||||
|
|
||||||
|
estimated_window_count = estimate_window_count(min_window, max_window);
|
||||||
|
} else if (min_and_max_ts_available) {
|
||||||
|
auto min_window = get_window_for(_options, *ms_meta.min_timestamp);
|
||||||
|
auto max_window = get_window_for(_options, *ms_meta.max_timestamp);
|
||||||
|
|
||||||
|
estimated_window_count = estimate_window_count(min_window, max_window);
|
||||||
|
}
|
||||||
|
|
||||||
return partition_estimate / std::max(1UL, uint64_t(estimated_window_count));
|
return partition_estimate / std::max(1UL, uint64_t(estimated_window_count));
|
||||||
}
|
}
|
||||||
@@ -212,12 +223,14 @@ reader_consumer_v2 time_window_compaction_strategy::make_interposer_consumer(con
|
|||||||
}
|
}
|
||||||
|
|
||||||
compaction_descriptor
|
compaction_descriptor
|
||||||
time_window_compaction_strategy::get_reshaping_job(std::vector<shared_sstable> input, schema_ptr schema, reshape_mode mode) const {
|
time_window_compaction_strategy::get_reshaping_job(std::vector<shared_sstable> input, schema_ptr schema, reshape_config cfg) const {
|
||||||
|
auto mode = cfg.mode;
|
||||||
std::vector<shared_sstable> single_window;
|
std::vector<shared_sstable> single_window;
|
||||||
std::vector<shared_sstable> multi_window;
|
std::vector<shared_sstable> multi_window;
|
||||||
|
|
||||||
size_t offstrategy_threshold = std::max(schema->min_compaction_threshold(), 4);
|
size_t offstrategy_threshold = std::max(schema->min_compaction_threshold(), 4);
|
||||||
size_t max_sstables = std::max(schema->max_compaction_threshold(), int(offstrategy_threshold));
|
size_t max_sstables = std::max(schema->max_compaction_threshold(), int(offstrategy_threshold));
|
||||||
|
const uint64_t target_job_size = cfg.free_storage_space * reshape_target_space_overhead;
|
||||||
|
|
||||||
if (mode == reshape_mode::relaxed) {
|
if (mode == reshape_mode::relaxed) {
|
||||||
offstrategy_threshold = max_sstables;
|
offstrategy_threshold = max_sstables;
|
||||||
@@ -249,22 +262,40 @@ time_window_compaction_strategy::get_reshaping_job(std::vector<shared_sstable> i
|
|||||||
multi_window.size(), !multi_window.empty() && sstable_set_overlapping_count(schema, multi_window) == 0,
|
multi_window.size(), !multi_window.empty() && sstable_set_overlapping_count(schema, multi_window) == 0,
|
||||||
single_window.size(), !single_window.empty() && sstable_set_overlapping_count(schema, single_window) == 0);
|
single_window.size(), !single_window.empty() && sstable_set_overlapping_count(schema, single_window) == 0);
|
||||||
|
|
||||||
auto need_trimming = [max_sstables, schema, &is_disjoint] (const std::vector<shared_sstable>& ssts) {
|
auto get_job_size = [] (const std::vector<shared_sstable>& ssts) {
|
||||||
// All sstables can be compacted at once if they're disjoint, given that partitioned set
|
return boost::accumulate(ssts | boost::adaptors::transformed(std::mem_fn(&sstable::bytes_on_disk)), uint64_t(0));
|
||||||
// will incrementally open sstables which translates into bounded memory usage.
|
};
|
||||||
return ssts.size() > max_sstables && !is_disjoint(ssts);
|
|
||||||
|
// Targets a space overhead of 10%. All disjoint sstables can be compacted together as long as they won't
|
||||||
|
// cause an overhead above target. Otherwise, the job targets a maximum of #max_threshold sstables.
|
||||||
|
auto need_trimming = [&] (const std::vector<shared_sstable>& ssts, const uint64_t job_size, bool is_disjoint) {
|
||||||
|
const size_t min_sstables = 2;
|
||||||
|
auto is_above_target_size = job_size > target_job_size;
|
||||||
|
|
||||||
|
return (ssts.size() > max_sstables && !is_disjoint) ||
|
||||||
|
(ssts.size() > min_sstables && is_above_target_size);
|
||||||
|
};
|
||||||
|
|
||||||
|
auto maybe_trim_job = [&need_trimming] (std::vector<shared_sstable>& ssts, uint64_t job_size, bool is_disjoint) {
|
||||||
|
while (need_trimming(ssts, job_size, is_disjoint)) {
|
||||||
|
auto sst = ssts.back();
|
||||||
|
ssts.pop_back();
|
||||||
|
job_size -= sst->bytes_on_disk();
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
if (!multi_window.empty()) {
|
if (!multi_window.empty()) {
|
||||||
|
auto disjoint = is_disjoint(multi_window);
|
||||||
|
auto job_size = get_job_size(multi_window);
|
||||||
// Everything that spans multiple windows will need reshaping
|
// Everything that spans multiple windows will need reshaping
|
||||||
if (need_trimming(multi_window)) {
|
if (need_trimming(multi_window, job_size, disjoint)) {
|
||||||
// When trimming, let's keep sstables with overlapping time window, so as to reduce write amplification.
|
// When trimming, let's keep sstables with overlapping time window, so as to reduce write amplification.
|
||||||
// For example, if there are N sstables spanning window W, where N <= 32, then we can produce all data for W
|
// For example, if there are N sstables spanning window W, where N <= 32, then we can produce all data for W
|
||||||
// in a single compaction round, removing the need to later compact W to reduce its number of files.
|
// in a single compaction round, removing the need to later compact W to reduce its number of files.
|
||||||
boost::partial_sort(multi_window, multi_window.begin() + max_sstables, [](const shared_sstable &a, const shared_sstable &b) {
|
boost::partial_sort(multi_window, multi_window.begin() + max_sstables, [](const shared_sstable &a, const shared_sstable &b) {
|
||||||
return a->get_stats_metadata().max_timestamp < b->get_stats_metadata().max_timestamp;
|
return a->get_stats_metadata().max_timestamp < b->get_stats_metadata().max_timestamp;
|
||||||
});
|
});
|
||||||
multi_window.resize(max_sstables);
|
maybe_trim_job(multi_window, job_size, disjoint);
|
||||||
}
|
}
|
||||||
compaction_descriptor desc(std::move(multi_window));
|
compaction_descriptor desc(std::move(multi_window));
|
||||||
desc.options = compaction_type_options::make_reshape();
|
desc.options = compaction_type_options::make_reshape();
|
||||||
@@ -283,15 +314,17 @@ time_window_compaction_strategy::get_reshaping_job(std::vector<shared_sstable> i
|
|||||||
std::copy(ssts.begin(), ssts.end(), std::back_inserter(single_window));
|
std::copy(ssts.begin(), ssts.end(), std::back_inserter(single_window));
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
// reuse STCS reshape logic which will only compact similar-sized files, to increase overall efficiency
|
// reuse STCS reshape logic which will only compact similar-sized files, to increase overall efficiency
|
||||||
// when reshaping time buckets containing a huge amount of files
|
// when reshaping time buckets containing a huge amount of files
|
||||||
auto desc = size_tiered_compaction_strategy(_stcs_options).get_reshaping_job(std::move(ssts), schema, mode);
|
auto desc = size_tiered_compaction_strategy(_stcs_options).get_reshaping_job(std::move(ssts), schema, cfg);
|
||||||
if (!desc.sstables.empty()) {
|
if (!desc.sstables.empty()) {
|
||||||
return desc;
|
return desc;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (!single_window.empty()) {
|
if (!single_window.empty()) {
|
||||||
|
maybe_trim_job(single_window, get_job_size(single_window), all_disjoint);
|
||||||
compaction_descriptor desc(std::move(single_window));
|
compaction_descriptor desc(std::move(single_window));
|
||||||
desc.options = compaction_type_options::make_reshape();
|
desc.options = compaction_type_options::make_reshape();
|
||||||
return desc;
|
return desc;
|
||||||
@@ -366,7 +399,7 @@ time_window_compaction_strategy::get_next_non_expired_sstables(table_state& tabl
|
|||||||
// if there is no sstable to compact in standard way, try compacting single sstable whose droppable tombstone
|
// if there is no sstable to compact in standard way, try compacting single sstable whose droppable tombstone
|
||||||
// ratio is greater than threshold.
|
// ratio is greater than threshold.
|
||||||
auto e = boost::range::remove_if(non_expiring_sstables, [this, compaction_time, &table_s] (const shared_sstable& sst) -> bool {
|
auto e = boost::range::remove_if(non_expiring_sstables, [this, compaction_time, &table_s] (const shared_sstable& sst) -> bool {
|
||||||
return !worth_dropping_tombstones(sst, compaction_time, table_s.get_tombstone_gc_state());
|
return !worth_dropping_tombstones(sst, compaction_time, table_s);
|
||||||
});
|
});
|
||||||
non_expiring_sstables.erase(e, non_expiring_sstables.end());
|
non_expiring_sstables.erase(e, non_expiring_sstables.end());
|
||||||
if (non_expiring_sstables.empty()) {
|
if (non_expiring_sstables.empty()) {
|
||||||
|
|||||||
@@ -78,6 +78,7 @@ public:
|
|||||||
// To prevent an explosion in the number of sstables we cap it.
|
// To prevent an explosion in the number of sstables we cap it.
|
||||||
// Better co-locate some windows into the same sstables than OOM.
|
// Better co-locate some windows into the same sstables than OOM.
|
||||||
static constexpr uint64_t max_data_segregation_window_count = 100;
|
static constexpr uint64_t max_data_segregation_window_count = 100;
|
||||||
|
static constexpr float reshape_target_space_overhead = 0.1f;
|
||||||
|
|
||||||
using bucket_t = std::vector<shared_sstable>;
|
using bucket_t = std::vector<shared_sstable>;
|
||||||
enum class bucket_compaction_mode { none, size_tiered, major };
|
enum class bucket_compaction_mode { none, size_tiered, major };
|
||||||
@@ -162,7 +163,7 @@ public:
|
|||||||
|
|
||||||
virtual std::unique_ptr<compaction_backlog_tracker::impl> make_backlog_tracker() const override;
|
virtual std::unique_ptr<compaction_backlog_tracker::impl> make_backlog_tracker() const override;
|
||||||
|
|
||||||
virtual uint64_t adjust_partition_estimate(const mutation_source_metadata& ms_meta, uint64_t partition_estimate) const override;
|
virtual uint64_t adjust_partition_estimate(const mutation_source_metadata& ms_meta, uint64_t partition_estimate, schema_ptr s) const override;
|
||||||
|
|
||||||
virtual reader_consumer_v2 make_interposer_consumer(const mutation_source_metadata& ms_meta, reader_consumer_v2 end_consumer) const override;
|
virtual reader_consumer_v2 make_interposer_consumer(const mutation_source_metadata& ms_meta, reader_consumer_v2 end_consumer) const override;
|
||||||
|
|
||||||
@@ -170,7 +171,7 @@ public:
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
virtual compaction_descriptor get_reshaping_job(std::vector<shared_sstable> input, schema_ptr schema, reshape_mode mode) const override;
|
virtual compaction_descriptor get_reshaping_job(std::vector<shared_sstable> input, schema_ptr schema, reshape_config cfg) const override;
|
||||||
};
|
};
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -572,7 +572,7 @@ murmur3_partitioner_ignore_msb_bits: 12
|
|||||||
force_schema_commit_log: true
|
force_schema_commit_log: true
|
||||||
|
|
||||||
# Time for which task manager task is kept in memory after it completes.
|
# Time for which task manager task is kept in memory after it completes.
|
||||||
task_ttl_in_seconds: 10
|
# task_ttl_in_seconds: 0
|
||||||
|
|
||||||
# Use Raft to consistently manage schema information in the cluster.
|
# Use Raft to consistently manage schema information in the cluster.
|
||||||
# Refer to https://docs.scylladb.com/master/architecture/raft.html for more details.
|
# Refer to https://docs.scylladb.com/master/architecture/raft.html for more details.
|
||||||
|
|||||||
@@ -852,6 +852,7 @@ scylla_core = (['message/messaging_service.cc',
|
|||||||
'utils/rjson.cc',
|
'utils/rjson.cc',
|
||||||
'utils/human_readable.cc',
|
'utils/human_readable.cc',
|
||||||
'utils/histogram_metrics_helper.cc',
|
'utils/histogram_metrics_helper.cc',
|
||||||
|
'utils/on_internal_error.cc',
|
||||||
'utils/pretty_printers.cc',
|
'utils/pretty_printers.cc',
|
||||||
'converting_mutation_partition_applier.cc',
|
'converting_mutation_partition_applier.cc',
|
||||||
'readers/combined.cc',
|
'readers/combined.cc',
|
||||||
@@ -1126,6 +1127,7 @@ scylla_core = (['message/messaging_service.cc',
|
|||||||
'utils/lister.cc',
|
'utils/lister.cc',
|
||||||
'repair/repair.cc',
|
'repair/repair.cc',
|
||||||
'repair/row_level.cc',
|
'repair/row_level.cc',
|
||||||
|
'repair/table_check.cc',
|
||||||
'exceptions/exceptions.cc',
|
'exceptions/exceptions.cc',
|
||||||
'auth/allow_all_authenticator.cc',
|
'auth/allow_all_authenticator.cc',
|
||||||
'auth/allow_all_authorizer.cc',
|
'auth/allow_all_authorizer.cc',
|
||||||
@@ -1240,6 +1242,8 @@ api = ['api/api.cc',
|
|||||||
Json2Code('api/api-doc/error_injection.json'),
|
Json2Code('api/api-doc/error_injection.json'),
|
||||||
'api/authorization_cache.cc',
|
'api/authorization_cache.cc',
|
||||||
Json2Code('api/api-doc/authorization_cache.json'),
|
Json2Code('api/api-doc/authorization_cache.json'),
|
||||||
|
'api/raft.cc',
|
||||||
|
Json2Code('api/api-doc/raft.json'),
|
||||||
]
|
]
|
||||||
|
|
||||||
alternator = [
|
alternator = [
|
||||||
@@ -1451,7 +1455,7 @@ deps['test/boost/bytes_ostream_test'] = [
|
|||||||
"test/lib/log.cc",
|
"test/lib/log.cc",
|
||||||
]
|
]
|
||||||
deps['test/boost/input_stream_test'] = ['test/boost/input_stream_test.cc']
|
deps['test/boost/input_stream_test'] = ['test/boost/input_stream_test.cc']
|
||||||
deps['test/boost/UUID_test'] = ['utils/UUID_gen.cc', 'test/boost/UUID_test.cc', 'utils/uuid.cc', 'utils/dynamic_bitset.cc', 'utils/hashers.cc']
|
deps['test/boost/UUID_test'] = ['utils/UUID_gen.cc', 'test/boost/UUID_test.cc', 'utils/uuid.cc', 'utils/dynamic_bitset.cc', 'utils/hashers.cc', 'utils/on_internal_error.cc']
|
||||||
deps['test/boost/murmur_hash_test'] = ['bytes.cc', 'utils/murmur_hash.cc', 'test/boost/murmur_hash_test.cc']
|
deps['test/boost/murmur_hash_test'] = ['bytes.cc', 'utils/murmur_hash.cc', 'test/boost/murmur_hash_test.cc']
|
||||||
deps['test/boost/allocation_strategy_test'] = ['test/boost/allocation_strategy_test.cc', 'utils/logalloc.cc', 'utils/dynamic_bitset.cc']
|
deps['test/boost/allocation_strategy_test'] = ['test/boost/allocation_strategy_test.cc', 'utils/logalloc.cc', 'utils/dynamic_bitset.cc']
|
||||||
deps['test/boost/log_heap_test'] = ['test/boost/log_heap_test.cc']
|
deps['test/boost/log_heap_test'] = ['test/boost/log_heap_test.cc']
|
||||||
|
|||||||
@@ -338,6 +338,9 @@ functions::get(data_dictionary::database db,
|
|||||||
if (!receiver_cf.has_value()) {
|
if (!receiver_cf.has_value()) {
|
||||||
throw exceptions::invalid_request_exception("functions::get for token doesn't have a known column family");
|
throw exceptions::invalid_request_exception("functions::get for token doesn't have a known column family");
|
||||||
}
|
}
|
||||||
|
if (schema == nullptr) {
|
||||||
|
throw exceptions::invalid_request_exception(format("functions::get for token cannot find {} table", *receiver_cf));
|
||||||
|
}
|
||||||
auto fun = ::make_shared<token_fct>(schema);
|
auto fun = ::make_shared<token_fct>(schema);
|
||||||
validate_types(db, keyspace, schema.get(), fun, provided_args, receiver_ks, receiver_cf);
|
validate_types(db, keyspace, schema.get(), fun, provided_args, receiver_ks, receiver_cf);
|
||||||
return fun;
|
return fun;
|
||||||
|
|||||||
@@ -815,7 +815,7 @@ bool query_processor::has_more_results(cql3::internal_query_state& state) const
|
|||||||
|
|
||||||
future<> query_processor::for_each_cql_result(
|
future<> query_processor::for_each_cql_result(
|
||||||
cql3::internal_query_state& state,
|
cql3::internal_query_state& state,
|
||||||
noncopyable_function<future<stop_iteration>(const cql3::untyped_result_set::row&)>&& f) {
|
noncopyable_function<future<stop_iteration>(const cql3::untyped_result_set::row&)> f) {
|
||||||
do {
|
do {
|
||||||
auto msg = co_await execute_paged_internal(state);
|
auto msg = co_await execute_paged_internal(state);
|
||||||
for (auto& row : *msg) {
|
for (auto& row : *msg) {
|
||||||
@@ -1065,6 +1065,9 @@ void query_processor::migration_subscriber::on_update_aggregate(const sstring& k
|
|||||||
void query_processor::migration_subscriber::on_update_view(
|
void query_processor::migration_subscriber::on_update_view(
|
||||||
const sstring& ks_name,
|
const sstring& ks_name,
|
||||||
const sstring& view_name, bool columns_changed) {
|
const sstring& view_name, bool columns_changed) {
|
||||||
|
// scylladb/scylladb#16392 - Materialized views are also tables so we need at least handle
|
||||||
|
// them as such when changed.
|
||||||
|
on_update_column_family(ks_name, view_name, columns_changed);
|
||||||
}
|
}
|
||||||
|
|
||||||
void query_processor::migration_subscriber::on_update_tablet_metadata() {
|
void query_processor::migration_subscriber::on_update_tablet_metadata() {
|
||||||
@@ -1113,14 +1116,14 @@ future<> query_processor::query_internal(
|
|||||||
db::consistency_level cl,
|
db::consistency_level cl,
|
||||||
const std::initializer_list<data_value>& values,
|
const std::initializer_list<data_value>& values,
|
||||||
int32_t page_size,
|
int32_t page_size,
|
||||||
noncopyable_function<future<stop_iteration>(const cql3::untyped_result_set_row&)>&& f) {
|
noncopyable_function<future<stop_iteration>(const cql3::untyped_result_set_row&)> f) {
|
||||||
auto query_state = create_paged_state(query_string, cl, values, page_size);
|
auto query_state = create_paged_state(query_string, cl, values, page_size);
|
||||||
co_return co_await for_each_cql_result(query_state, std::move(f));
|
co_return co_await for_each_cql_result(query_state, std::move(f));
|
||||||
}
|
}
|
||||||
|
|
||||||
future<> query_processor::query_internal(
|
future<> query_processor::query_internal(
|
||||||
const sstring& query_string,
|
const sstring& query_string,
|
||||||
noncopyable_function<future<stop_iteration>(const cql3::untyped_result_set_row&)>&& f) {
|
noncopyable_function<future<stop_iteration>(const cql3::untyped_result_set_row&)> f) {
|
||||||
return query_internal(query_string, db::consistency_level::ONE, {}, 1000, std::move(f));
|
return query_internal(query_string, db::consistency_level::ONE, {}, 1000, std::move(f));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -307,7 +307,7 @@ public:
|
|||||||
db::consistency_level cl,
|
db::consistency_level cl,
|
||||||
const std::initializer_list<data_value>& values,
|
const std::initializer_list<data_value>& values,
|
||||||
int32_t page_size,
|
int32_t page_size,
|
||||||
noncopyable_function<future<stop_iteration>(const cql3::untyped_result_set_row&)>&& f);
|
noncopyable_function<future<stop_iteration>(const cql3::untyped_result_set_row&)> f);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* \brief iterate over all cql results using paging
|
* \brief iterate over all cql results using paging
|
||||||
@@ -322,7 +322,7 @@ public:
|
|||||||
*/
|
*/
|
||||||
future<> query_internal(
|
future<> query_internal(
|
||||||
const sstring& query_string,
|
const sstring& query_string,
|
||||||
noncopyable_function<future<stop_iteration>(const cql3::untyped_result_set_row&)>&& f);
|
noncopyable_function<future<stop_iteration>(const cql3::untyped_result_set_row&)> f);
|
||||||
|
|
||||||
class cache_internal_tag;
|
class cache_internal_tag;
|
||||||
using cache_internal = bool_class<cache_internal_tag>;
|
using cache_internal = bool_class<cache_internal_tag>;
|
||||||
@@ -479,7 +479,7 @@ private:
|
|||||||
*/
|
*/
|
||||||
future<> for_each_cql_result(
|
future<> for_each_cql_result(
|
||||||
cql3::internal_query_state& state,
|
cql3::internal_query_state& state,
|
||||||
noncopyable_function<future<stop_iteration>(const cql3::untyped_result_set_row&)>&& f);
|
noncopyable_function<future<stop_iteration>(const cql3::untyped_result_set_row&)> f);
|
||||||
|
|
||||||
/*!
|
/*!
|
||||||
* \brief check, based on the state if there are additional results
|
* \brief check, based on the state if there are additional results
|
||||||
|
|||||||
@@ -541,22 +541,32 @@ std::pair<std::optional<secondary_index::index>, expr::expression> statement_res
|
|||||||
int chosen_index_score = 0;
|
int chosen_index_score = 0;
|
||||||
expr::expression chosen_index_restrictions = expr::conjunction({});
|
expr::expression chosen_index_restrictions = expr::conjunction({});
|
||||||
|
|
||||||
for (const auto& index : sim.list_indexes()) {
|
// Several indexes may be usable for this query. When their score is tied,
|
||||||
auto cdef = _schema->get_column_definition(to_bytes(index.target_column()));
|
// let's pick one by order of the columns mentioned in the restriction
|
||||||
for (const expr::expression& restriction : index_restrictions()) {
|
// expression. This specific order isn't important (and maybe in the
|
||||||
if (has_partition_token(restriction, *_schema) || contains_multi_column_restriction(restriction)) {
|
// future we could plan a better order based on the specificity of each
|
||||||
continue;
|
// index), but it is critical that two coordinators - or the same
|
||||||
}
|
// coordinator over time - must choose the same index for the same query.
|
||||||
|
// Otherwise, paging can break (see issue #7969).
|
||||||
expr::single_column_restrictions_map rmap = expr::get_single_column_restrictions_map(restriction);
|
for (const expr::expression& restriction : index_restrictions()) {
|
||||||
const auto found = rmap.find(cdef);
|
if (has_partition_token(restriction, *_schema) || contains_multi_column_restriction(restriction)) {
|
||||||
if (found != rmap.end() && is_supported_by(found->second, index)
|
continue;
|
||||||
&& score(index) > chosen_index_score) {
|
|
||||||
chosen_index = index;
|
|
||||||
chosen_index_score = score(index);
|
|
||||||
chosen_index_restrictions = restriction;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
expr::for_each_expression<expr::column_value>(restriction, [&](const expr::column_value& cval) {
|
||||||
|
auto& cdef = cval.col;
|
||||||
|
expr::expression col_restrictions = expr::conjunction {
|
||||||
|
.children = expr::extract_single_column_restrictions_for_column(restriction, *cdef)
|
||||||
|
};
|
||||||
|
for (const auto& index : sim.list_indexes()) {
|
||||||
|
if (cdef->name_as_text() == index.target_column() &&
|
||||||
|
expr::is_supported_by(col_restrictions, index) &&
|
||||||
|
score(index) > chosen_index_score) {
|
||||||
|
chosen_index = index;
|
||||||
|
chosen_index_score = score(index);
|
||||||
|
chosen_index_restrictions = restriction;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
}
|
}
|
||||||
return {chosen_index, chosen_index_restrictions};
|
return {chosen_index, chosen_index_restrictions};
|
||||||
}
|
}
|
||||||
@@ -1132,13 +1142,14 @@ bool starts_before_start(
|
|||||||
const auto len1 = r1.start()->value().representation().size();
|
const auto len1 = r1.start()->value().representation().size();
|
||||||
const auto len2 = r2.start()->value().representation().size();
|
const auto len2 = r2.start()->value().representation().size();
|
||||||
if (len1 == len2) { // The values truly are equal.
|
if (len1 == len2) { // The values truly are equal.
|
||||||
|
// (a)>=(1) starts before (a)>(1)
|
||||||
return r1.start()->is_inclusive() && !r2.start()->is_inclusive();
|
return r1.start()->is_inclusive() && !r2.start()->is_inclusive();
|
||||||
} else if (len1 < len2) { // r1 start is a prefix of r2 start.
|
} else if (len1 < len2) { // r1 start is a prefix of r2 start.
|
||||||
// (a)>=(1) starts before (a,b)>=(1,1), but (a)>(1) doesn't.
|
// (a)>=(1) starts before (a,b)>=(1,1), but (a)>(1) doesn't.
|
||||||
return r1.start()->is_inclusive();
|
return r1.start()->is_inclusive();
|
||||||
} else { // r2 start is a prefix of r1 start.
|
} else { // r2 start is a prefix of r1 start.
|
||||||
// (a,b)>=(1,1) starts before (a)>(1) but after (a)>=(1).
|
// (a,b)>=(1,1) starts before (a)>(1) but after (a)>=(1).
|
||||||
return r2.start()->is_inclusive();
|
return !r2.start()->is_inclusive();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1163,6 +1174,7 @@ bool starts_before_or_at_end(
|
|||||||
const auto len1 = r1.start()->value().representation().size();
|
const auto len1 = r1.start()->value().representation().size();
|
||||||
const auto len2 = r2.end()->value().representation().size();
|
const auto len2 = r2.end()->value().representation().size();
|
||||||
if (len1 == len2) { // The values truly are equal.
|
if (len1 == len2) { // The values truly are equal.
|
||||||
|
// (a)>=(1) starts at end of (a)<=(1)
|
||||||
return r1.start()->is_inclusive() && r2.end()->is_inclusive();
|
return r1.start()->is_inclusive() && r2.end()->is_inclusive();
|
||||||
} else if (len1 < len2) { // r1 start is a prefix of r2 end.
|
} else if (len1 < len2) { // r1 start is a prefix of r2 end.
|
||||||
// a>=(1) starts before (a,b)<=(1,1) ends, but (a)>(1) doesn't.
|
// a>=(1) starts before (a,b)<=(1,1) ends, but (a)>(1) doesn't.
|
||||||
@@ -1194,6 +1206,7 @@ bool ends_before_end(
|
|||||||
const auto len1 = r1.end()->value().representation().size();
|
const auto len1 = r1.end()->value().representation().size();
|
||||||
const auto len2 = r2.end()->value().representation().size();
|
const auto len2 = r2.end()->value().representation().size();
|
||||||
if (len1 == len2) { // The values truly are equal.
|
if (len1 == len2) { // The values truly are equal.
|
||||||
|
// (a)<(1) ends before (a)<=(1) ends
|
||||||
return !r1.end()->is_inclusive() && r2.end()->is_inclusive();
|
return !r1.end()->is_inclusive() && r2.end()->is_inclusive();
|
||||||
} else if (len1 < len2) { // r1 end is a prefix of r2 end.
|
} else if (len1 < len2) { // r1 end is a prefix of r2 end.
|
||||||
// (a)<(1) ends before (a,b)<=(1,1), but (a)<=(1) doesn't.
|
// (a)<(1) ends before (a,b)<=(1,1), but (a)<=(1) doesn't.
|
||||||
@@ -1209,7 +1222,10 @@ std::optional<query::clustering_range> intersection(
|
|||||||
const query::clustering_range& r1,
|
const query::clustering_range& r1,
|
||||||
const query::clustering_range& r2,
|
const query::clustering_range& r2,
|
||||||
const clustering_key_prefix::prefix_equal_tri_compare& cmp) {
|
const clustering_key_prefix::prefix_equal_tri_compare& cmp) {
|
||||||
// Assume r1's start is to the left of r2's start.
|
// If needed, swap r1 and r2 so that r1's start is to the left of r2's
|
||||||
|
// start. Note that to avoid infinite recursion (#18688) the function
|
||||||
|
// starts_before_start() must never return true for both (r1,r2) and
|
||||||
|
// (r2,r1) - in other words, it must be a *strict* partial order.
|
||||||
if (starts_before_start(r2, r1, cmp)) {
|
if (starts_before_start(r2, r1, cmp)) {
|
||||||
return intersection(r2, r1, cmp);
|
return intersection(r2, r1, cmp);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -433,12 +433,17 @@ protected:
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
::shared_ptr<selection> selection::wildcard(schema_ptr schema) {
|
// Return a list of columns that "SELECT *" should show - these are all
|
||||||
|
// columns except potentially some that are is_hidden_from_cql() (currently,
|
||||||
|
// those can be the "virtual columns" used in materialized views).
|
||||||
|
// The list points to column_definition objects in the given schema_ptr,
|
||||||
|
// which can be used only as long as the caller keeps the schema_ptr alive.
|
||||||
|
std::vector<const column_definition*> selection::wildcard_columns(schema_ptr schema) {
|
||||||
auto columns = schema->all_columns_in_select_order();
|
auto columns = schema->all_columns_in_select_order();
|
||||||
// filter out hidden columns, which should not be seen by the
|
// filter out hidden columns, which should not be seen by the
|
||||||
// user when doing "SELECT *". We also disallow selecting them
|
// user when doing "SELECT *". We also disallow selecting them
|
||||||
// individually (see column_identifier::new_selector_factory()).
|
// individually (see column_identifier::new_selector_factory()).
|
||||||
auto cds = boost::copy_range<std::vector<const column_definition*>>(
|
return boost::copy_range<std::vector<const column_definition*>>(
|
||||||
columns |
|
columns |
|
||||||
boost::adaptors::filtered([](const column_definition& c) {
|
boost::adaptors::filtered([](const column_definition& c) {
|
||||||
return !c.is_hidden_from_cql();
|
return !c.is_hidden_from_cql();
|
||||||
@@ -446,7 +451,10 @@ protected:
|
|||||||
boost::adaptors::transformed([](const column_definition& c) {
|
boost::adaptors::transformed([](const column_definition& c) {
|
||||||
return &c;
|
return &c;
|
||||||
}));
|
}));
|
||||||
return simple_selection::make(schema, std::move(cds), true);
|
}
|
||||||
|
|
||||||
|
::shared_ptr<selection> selection::wildcard(schema_ptr schema) {
|
||||||
|
return simple_selection::make(schema, wildcard_columns(schema), true);
|
||||||
}
|
}
|
||||||
|
|
||||||
::shared_ptr<selection> selection::for_columns(schema_ptr schema, std::vector<const column_definition*> columns) {
|
::shared_ptr<selection> selection::for_columns(schema_ptr schema, std::vector<const column_definition*> columns) {
|
||||||
|
|||||||
@@ -118,6 +118,7 @@ public:
|
|||||||
}
|
}
|
||||||
|
|
||||||
static ::shared_ptr<selection> wildcard(schema_ptr schema);
|
static ::shared_ptr<selection> wildcard(schema_ptr schema);
|
||||||
|
static std::vector<const column_definition*> wildcard_columns(schema_ptr schema);
|
||||||
static ::shared_ptr<selection> for_columns(schema_ptr schema, std::vector<const column_definition*> columns);
|
static ::shared_ptr<selection> for_columns(schema_ptr schema, std::vector<const column_definition*> columns);
|
||||||
|
|
||||||
// Adds a column to the selection and result set. Returns an index within the result set row.
|
// Adds a column to the selection and result set. Returns an index within the result set row.
|
||||||
|
|||||||
@@ -135,6 +135,18 @@ user_type alter_type_statement::add_or_alter::do_add(data_dictionary::database d
|
|||||||
throw exceptions::invalid_request_exception(format("Cannot add new field to type {}: maximum number of fields reached", _name));
|
throw exceptions::invalid_request_exception(format("Cannot add new field to type {}: maximum number of fields reached", _name));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (_field_type->is_duration()) {
|
||||||
|
auto&& ks = db.find_keyspace(keyspace());
|
||||||
|
for (auto&& schema : ks.metadata()->cf_meta_data() | boost::adaptors::map_values) {
|
||||||
|
for (auto&& column : schema->clustering_key_columns()) {
|
||||||
|
if (column.type->references_user_type(_name.get_keyspace(), _name.get_user_type_name())) {
|
||||||
|
throw exceptions::invalid_request_exception(format("Cannot add new field to type {} because it is used in the clustering key column {} of table {}.{} where durations are not allowed",
|
||||||
|
_name.to_cql_string(), column.name_as_text(), schema->ks_name(), schema->cf_name()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
std::vector<bytes> new_names(to_update->field_names());
|
std::vector<bytes> new_names(to_update->field_names());
|
||||||
new_names.push_back(_field_name->name());
|
new_names.push_back(_field_name->name());
|
||||||
std::vector<data_type> new_types(to_update->field_types());
|
std::vector<data_type> new_types(to_update->field_types());
|
||||||
|
|||||||
@@ -226,7 +226,8 @@ future<> select_statement::check_access(query_processor& qp, const service::clie
|
|||||||
}
|
}
|
||||||
if (!_selection->is_trivial()) {
|
if (!_selection->is_trivial()) {
|
||||||
std::vector<::shared_ptr<functions::function>> used_functions = _selection->used_functions();
|
std::vector<::shared_ptr<functions::function>> used_functions = _selection->used_functions();
|
||||||
for (const auto& used_function : used_functions) {
|
auto not_native = [] (::shared_ptr<functions::function> func) { return !func->is_native(); };
|
||||||
|
for (const auto& used_function : used_functions | std::ranges::views::filter(not_native)) {
|
||||||
sstring encoded_signature = auth::encode_signature(used_function->name().name, used_function->arg_types());
|
sstring encoded_signature = auth::encode_signature(used_function->name().name, used_function->arg_types());
|
||||||
co_await state.has_function_access(used_function->name().keyspace, encoded_signature, auth::permission::EXECUTE);
|
co_await state.has_function_access(used_function->name().keyspace, encoded_signature, auth::permission::EXECUTE);
|
||||||
}
|
}
|
||||||
@@ -1660,7 +1661,7 @@ schema_ptr mutation_fragments_select_statement::generate_output_schema(schema_pt
|
|||||||
|
|
||||||
future<exceptions::coordinator_result<service::storage_proxy_coordinator_query_result>>
|
future<exceptions::coordinator_result<service::storage_proxy_coordinator_query_result>>
|
||||||
mutation_fragments_select_statement::do_query(
|
mutation_fragments_select_statement::do_query(
|
||||||
const locator::node* this_node,
|
locator::host_id this_node,
|
||||||
service::storage_proxy& sp,
|
service::storage_proxy& sp,
|
||||||
schema_ptr schema,
|
schema_ptr schema,
|
||||||
lw_shared_ptr<query::read_command> cmd,
|
lw_shared_ptr<query::read_command> cmd,
|
||||||
@@ -1670,7 +1671,7 @@ mutation_fragments_select_statement::do_query(
|
|||||||
auto res = co_await replica::mutation_dump::dump_mutations(sp.get_db(), schema, _underlying_schema, partition_ranges, *cmd, optional_params.timeout(sp));
|
auto res = co_await replica::mutation_dump::dump_mutations(sp.get_db(), schema, _underlying_schema, partition_ranges, *cmd, optional_params.timeout(sp));
|
||||||
service::replicas_per_token_range last_replicas;
|
service::replicas_per_token_range last_replicas;
|
||||||
if (this_node) {
|
if (this_node) {
|
||||||
last_replicas.emplace(dht::token_range::make_open_ended_both_sides(), std::vector<locator::host_id>{this_node->host_id()});
|
last_replicas.emplace(dht::token_range::make_open_ended_both_sides(), std::vector<locator::host_id>{this_node});
|
||||||
}
|
}
|
||||||
co_return service::storage_proxy_coordinator_query_result{std::move(res), std::move(last_replicas), {}};
|
co_return service::storage_proxy_coordinator_query_result{std::move(res), std::move(last_replicas), {}};
|
||||||
}
|
}
|
||||||
@@ -1731,12 +1732,17 @@ mutation_fragments_select_statement::do_execute(query_processor& qp, service::qu
|
|||||||
auto timeout_duration = get_timeout(state.get_client_state(), options);
|
auto timeout_duration = get_timeout(state.get_client_state(), options);
|
||||||
auto timeout = db::timeout_clock::now() + timeout_duration;
|
auto timeout = db::timeout_clock::now() + timeout_duration;
|
||||||
|
|
||||||
|
auto& tbl = qp.proxy().local_db().find_column_family(_underlying_schema);
|
||||||
|
|
||||||
|
// Since this query doesn't go through storage-proxy, we have to take care of pinning erm here.
|
||||||
|
auto erm_keepalive = tbl.get_effective_replication_map();
|
||||||
|
|
||||||
if (!aggregate && !_restrictions_need_filtering && (page_size <= 0
|
if (!aggregate && !_restrictions_need_filtering && (page_size <= 0
|
||||||
|| !service::pager::query_pagers::may_need_paging(*_schema, page_size,
|
|| !service::pager::query_pagers::may_need_paging(*_schema, page_size,
|
||||||
*command, key_ranges))) {
|
*command, key_ranges))) {
|
||||||
return do_query({}, qp.proxy(), _schema, command, std::move(key_ranges), cl,
|
return do_query({}, qp.proxy(), _schema, command, std::move(key_ranges), cl,
|
||||||
{timeout, state.get_permit(), state.get_client_state(), state.get_trace_state(), {}, {}})
|
{timeout, state.get_permit(), state.get_client_state(), state.get_trace_state(), {}, {}})
|
||||||
.then(wrap_result_to_error_message([&, this] (service::storage_proxy_coordinator_query_result&& qr) {
|
.then(wrap_result_to_error_message([&, this, erm_keepalive] (service::storage_proxy_coordinator_query_result&& qr) {
|
||||||
cql3::selection::result_set_builder builder(*_selection, now);
|
cql3::selection::result_set_builder builder(*_selection, now);
|
||||||
query::result_view::consume(*qr.query_result, std::move(slice),
|
query::result_view::consume(*qr.query_result, std::move(slice),
|
||||||
cql3::selection::result_set_builder::visitor(builder, *_schema, *_selection));
|
cql3::selection::result_set_builder::visitor(builder, *_schema, *_selection));
|
||||||
@@ -1745,16 +1751,14 @@ mutation_fragments_select_statement::do_execute(query_processor& qp, service::qu
|
|||||||
}));
|
}));
|
||||||
}
|
}
|
||||||
|
|
||||||
const locator::node* this_node = nullptr;
|
locator::host_id this_node;
|
||||||
{
|
{
|
||||||
auto& tbl = qp.proxy().local_db().find_column_family(_underlying_schema);
|
auto& topo = erm_keepalive->get_topology();
|
||||||
auto& erm = tbl.get_effective_replication_map();
|
this_node = topo.this_node()->host_id();
|
||||||
auto& topo = erm->get_topology();
|
|
||||||
this_node = topo.this_node();
|
|
||||||
auto state = options.get_paging_state();
|
auto state = options.get_paging_state();
|
||||||
if (state && !state->get_last_replicas().empty()) {
|
if (state && !state->get_last_replicas().empty()) {
|
||||||
auto last_host = state->get_last_replicas().begin()->second.front();
|
auto last_host = state->get_last_replicas().begin()->second.front();
|
||||||
if (last_host != this_node->host_id()) {
|
if (last_host != this_node) {
|
||||||
const auto last_node = topo.find_node(last_host);
|
const auto last_node = topo.find_node(last_host);
|
||||||
throw exceptions::invalid_request_exception(format(
|
throw exceptions::invalid_request_exception(format(
|
||||||
"Moving between coordinators is not allowed in SELECT FROM MUTATION_FRAGMENTS() statements, last page's coordinator was {}{}",
|
"Moving between coordinators is not allowed in SELECT FROM MUTATION_FRAGMENTS() statements, last page's coordinator was {}{}",
|
||||||
@@ -1774,7 +1778,10 @@ mutation_fragments_select_statement::do_execute(query_processor& qp, service::qu
|
|||||||
command,
|
command,
|
||||||
std::move(key_ranges),
|
std::move(key_ranges),
|
||||||
_restrictions_need_filtering ? _restrictions : nullptr,
|
_restrictions_need_filtering ? _restrictions : nullptr,
|
||||||
std::bind_front(&mutation_fragments_select_statement::do_query, this, this_node));
|
[this, erm_keepalive, this_node] (service::storage_proxy& sp, schema_ptr schema, lw_shared_ptr<query::read_command> cmd, dht::partition_range_vector partition_ranges,
|
||||||
|
db::consistency_level cl, service::storage_proxy_coordinator_query_options optional_params) {
|
||||||
|
return do_query(this_node, sp, std::move(schema), std::move(cmd), std::move(partition_ranges), cl, std::move(optional_params));
|
||||||
|
});
|
||||||
|
|
||||||
if (_selection->is_trivial() && !_restrictions_need_filtering && !_per_partition_limit) {
|
if (_selection->is_trivial() && !_restrictions_need_filtering && !_per_partition_limit) {
|
||||||
return p->fetch_page_generator_result(page_size, now, timeout, _stats).then(wrap_result_to_error_message([this, p = std::move(p)] (result_generator&& generator) {
|
return p->fetch_page_generator_result(page_size, now, timeout, _stats).then(wrap_result_to_error_message([this, p = std::move(p)] (result_generator&& generator) {
|
||||||
@@ -1901,6 +1908,21 @@ std::unique_ptr<prepared_statement> select_statement::prepare(data_dictionary::d
|
|||||||
// Force aggregation if GROUP BY is used. This will wrap every column x as first(x).
|
// Force aggregation if GROUP BY is used. This will wrap every column x as first(x).
|
||||||
if (!_group_by_columns.empty()) {
|
if (!_group_by_columns.empty()) {
|
||||||
aggregation_depth = std::max(aggregation_depth, 1u);
|
aggregation_depth = std::max(aggregation_depth, 1u);
|
||||||
|
if (prepared_selectors.empty()) {
|
||||||
|
// We have a "SELECT * GROUP BY". If we leave prepared_selectors
|
||||||
|
// empty, below we choose selection::wildcard() for SELECT *, and
|
||||||
|
// forget to do the "levellize" trick needed for the GROUP BY.
|
||||||
|
// So we need to set prepared_selectors. See #16531.
|
||||||
|
auto all_columns = selection::selection::wildcard_columns(schema);
|
||||||
|
std::vector<::shared_ptr<selection::raw_selector>> select_all;
|
||||||
|
select_all.reserve(all_columns.size());
|
||||||
|
for (const column_definition *cdef : all_columns) {
|
||||||
|
auto name = ::make_shared<cql3::column_identifier::raw>(cdef->name_as_text(), true);
|
||||||
|
select_all.push_back(::make_shared<selection::raw_selector>(
|
||||||
|
expr::unresolved_identifier(std::move(name)), nullptr));
|
||||||
|
}
|
||||||
|
prepared_selectors = selection::raw_selector::to_prepared_selectors(select_all, *schema, db, keyspace());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for (auto& ps : prepared_selectors) {
|
for (auto& ps : prepared_selectors) {
|
||||||
@@ -1982,7 +2004,10 @@ std::unique_ptr<prepared_statement> select_statement::prepare(data_dictionary::d
|
|||||||
)
|
)
|
||||||
&& !restrictions->need_filtering() // No filtering
|
&& !restrictions->need_filtering() // No filtering
|
||||||
&& group_by_cell_indices->empty() // No GROUP BY
|
&& group_by_cell_indices->empty() // No GROUP BY
|
||||||
&& db.get_config().enable_parallelized_aggregation();
|
&& db.get_config().enable_parallelized_aggregation()
|
||||||
|
&& !( // Do not parallelize the request if it's single partition read
|
||||||
|
restrictions->partition_key_restrictions_is_all_eq()
|
||||||
|
&& restrictions->partition_key_restrictions_size() == schema->partition_key_size());
|
||||||
};
|
};
|
||||||
|
|
||||||
if (_parameters->is_prune_materialized_view()) {
|
if (_parameters->is_prune_materialized_view()) {
|
||||||
|
|||||||
@@ -19,10 +19,7 @@
|
|||||||
#include "index/secondary_index_manager.hh"
|
#include "index/secondary_index_manager.hh"
|
||||||
#include "exceptions/exceptions.hh"
|
#include "exceptions/exceptions.hh"
|
||||||
#include "exceptions/coordinator_result.hh"
|
#include "exceptions/coordinator_result.hh"
|
||||||
|
#include "locator/host_id.hh"
|
||||||
namespace locator {
|
|
||||||
class node;
|
|
||||||
} // namespace locator
|
|
||||||
|
|
||||||
namespace service {
|
namespace service {
|
||||||
class client_state;
|
class client_state;
|
||||||
@@ -341,7 +338,7 @@ public:
|
|||||||
private:
|
private:
|
||||||
future<exceptions::coordinator_result<service::storage_proxy_coordinator_query_result>>
|
future<exceptions::coordinator_result<service::storage_proxy_coordinator_query_result>>
|
||||||
do_query(
|
do_query(
|
||||||
const locator::node* this_node,
|
locator::host_id this_node,
|
||||||
service::storage_proxy& sp,
|
service::storage_proxy& sp,
|
||||||
schema_ptr schema,
|
schema_ptr schema,
|
||||||
lw_shared_ptr<query::read_command> cmd,
|
lw_shared_ptr<query::read_command> cmd,
|
||||||
|
|||||||
@@ -56,7 +56,11 @@ future<> use_statement::check_access(query_processor& qp, const service::client_
|
|||||||
|
|
||||||
future<::shared_ptr<cql_transport::messages::result_message>>
|
future<::shared_ptr<cql_transport::messages::result_message>>
|
||||||
use_statement::execute(query_processor& qp, service::query_state& state, const query_options& options, std::optional<service::group0_guard> guard) const {
|
use_statement::execute(query_processor& qp, service::query_state& state, const query_options& options, std::optional<service::group0_guard> guard) const {
|
||||||
state.get_client_state().set_keyspace(qp.db().real_database(), _keyspace);
|
try {
|
||||||
|
state.get_client_state().set_keyspace(qp.db().real_database(), _keyspace);
|
||||||
|
} catch(...) {
|
||||||
|
return make_exception_future<::shared_ptr<cql_transport::messages::result_message>>(std::current_exception());
|
||||||
|
}
|
||||||
auto result =::make_shared<cql_transport::messages::result_message::set_keyspace>(_keyspace);
|
auto result =::make_shared<cql_transport::messages::result_message::set_keyspace>(_keyspace);
|
||||||
return make_ready_future<::shared_ptr<cql_transport::messages::result_message>>(result);
|
return make_ready_future<::shared_ptr<cql_transport::messages::result_message>>(result);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -151,13 +151,15 @@ static bytes from_json_object_aux(const map_type_impl& t, const rjson::value& va
|
|||||||
std::map<bytes, bytes, serialized_compare> raw_map(t.get_keys_type()->as_less_comparator());
|
std::map<bytes, bytes, serialized_compare> raw_map(t.get_keys_type()->as_less_comparator());
|
||||||
for (auto it = value.MemberBegin(); it != value.MemberEnd(); ++it) {
|
for (auto it = value.MemberBegin(); it != value.MemberEnd(); ++it) {
|
||||||
bytes value = from_json_object(*t.get_values_type(), it->value);
|
bytes value = from_json_object(*t.get_values_type(), it->value);
|
||||||
if (t.get_keys_type()->underlying_type() == ascii_type ||
|
// For all native (non-collection, non-tuple) key types, they are
|
||||||
t.get_keys_type()->underlying_type() == utf8_type) {
|
// represented as a string in JSON. For more elaborate types, they
|
||||||
|
// can also be a string representation of another JSON type, which
|
||||||
|
// needs to be reparsed as JSON. For example,
|
||||||
|
// map<frozen<list<int>>, int> will be represented as:
|
||||||
|
// { "[1, 3, 6]": 3, "[]": 0, "[1, 2]": 2 }
|
||||||
|
if (t.get_keys_type()->underlying_type()->is_native()) {
|
||||||
raw_map.emplace(from_json_object(*t.get_keys_type(), it->name), std::move(value));
|
raw_map.emplace(from_json_object(*t.get_keys_type(), it->name), std::move(value));
|
||||||
} else {
|
} else {
|
||||||
// Keys in maps can only be strings in JSON, but they can also be a string representation
|
|
||||||
// of another JSON type, which needs to be reparsed. Example - map<frozen<list<int>>, int>
|
|
||||||
// will be represented like this: { "[1, 3, 6]": 3, "[]": 0, "[1, 2]": 2 }
|
|
||||||
try {
|
try {
|
||||||
rjson::value map_key = rjson::parse(rjson::to_string_view(it->name));
|
rjson::value map_key = rjson::parse(rjson::to_string_view(it->name));
|
||||||
raw_map.emplace(from_json_object(*t.get_keys_type(), map_key), std::move(value));
|
raw_map.emplace(from_json_object(*t.get_keys_type(), map_key), std::move(value));
|
||||||
@@ -502,7 +504,7 @@ struct to_json_string_visitor {
|
|||||||
sstring operator()(const tuple_type_impl& t) { return to_json_string_aux(t, bv); }
|
sstring operator()(const tuple_type_impl& t) { return to_json_string_aux(t, bv); }
|
||||||
sstring operator()(const user_type_impl& t) { return to_json_string_aux(t, bv); }
|
sstring operator()(const user_type_impl& t) { return to_json_string_aux(t, bv); }
|
||||||
sstring operator()(const simple_date_type_impl& t) { return quote_json_string(t.to_string(bv)); }
|
sstring operator()(const simple_date_type_impl& t) { return quote_json_string(t.to_string(bv)); }
|
||||||
sstring operator()(const time_type_impl& t) { return t.to_string(bv); }
|
sstring operator()(const time_type_impl& t) { return quote_json_string(t.to_string(bv)); }
|
||||||
sstring operator()(const empty_type_impl& t) { return "null"; }
|
sstring operator()(const empty_type_impl& t) { return "null"; }
|
||||||
sstring operator()(const duration_type_impl& t) {
|
sstring operator()(const duration_type_impl& t) {
|
||||||
auto v = t.deserialize(bv);
|
auto v = t.deserialize(bv);
|
||||||
|
|||||||
@@ -135,7 +135,7 @@ future<> db::batchlog_manager::stop() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
future<size_t> db::batchlog_manager::count_all_batches() const {
|
future<size_t> db::batchlog_manager::count_all_batches() const {
|
||||||
sstring query = format("SELECT count(*) FROM {}.{}", system_keyspace::NAME, system_keyspace::BATCHLOG);
|
sstring query = format("SELECT count(*) FROM {}.{} BYPASS CACHE", system_keyspace::NAME, system_keyspace::BATCHLOG);
|
||||||
return _qp.execute_internal(query, cql3::query_processor::cache_internal::yes).then([](::shared_ptr<cql3::untyped_result_set> rs) {
|
return _qp.execute_internal(query, cql3::query_processor::cache_internal::yes).then([](::shared_ptr<cql3::untyped_result_set> rs) {
|
||||||
return size_t(rs->one().get_as<int64_t>("count"));
|
return size_t(rs->one().get_as<int64_t>("count"));
|
||||||
});
|
});
|
||||||
@@ -154,26 +154,26 @@ future<> db::batchlog_manager::replay_all_failed_batches() {
|
|||||||
auto throttle = _replay_rate / _qp.proxy().get_token_metadata_ptr()->count_normal_token_owners();
|
auto throttle = _replay_rate / _qp.proxy().get_token_metadata_ptr()->count_normal_token_owners();
|
||||||
auto limiter = make_lw_shared<utils::rate_limiter>(throttle);
|
auto limiter = make_lw_shared<utils::rate_limiter>(throttle);
|
||||||
|
|
||||||
auto batch = [this, limiter](const cql3::untyped_result_set::row& row) {
|
auto batch = [this, limiter](const cql3::untyped_result_set::row& row) -> future<stop_iteration> {
|
||||||
auto written_at = row.get_as<db_clock::time_point>("written_at");
|
auto written_at = row.get_as<db_clock::time_point>("written_at");
|
||||||
auto id = row.get_as<utils::UUID>("id");
|
auto id = row.get_as<utils::UUID>("id");
|
||||||
// enough time for the actual write + batchlog entry mutation delivery (two separate requests).
|
// enough time for the actual write + batchlog entry mutation delivery (two separate requests).
|
||||||
auto timeout = get_batch_log_timeout();
|
auto timeout = get_batch_log_timeout();
|
||||||
if (db_clock::now() < written_at + timeout) {
|
if (db_clock::now() < written_at + timeout) {
|
||||||
blogger.debug("Skipping replay of {}, too fresh", id);
|
blogger.debug("Skipping replay of {}, too fresh", id);
|
||||||
return make_ready_future<>();
|
return make_ready_future<stop_iteration>(stop_iteration::no);
|
||||||
}
|
}
|
||||||
|
|
||||||
// check version of serialization format
|
// check version of serialization format
|
||||||
if (!row.has("version")) {
|
if (!row.has("version")) {
|
||||||
blogger.warn("Skipping logged batch because of unknown version");
|
blogger.warn("Skipping logged batch because of unknown version");
|
||||||
return make_ready_future<>();
|
return make_ready_future<stop_iteration>(stop_iteration::no);
|
||||||
}
|
}
|
||||||
|
|
||||||
auto version = row.get_as<int32_t>("version");
|
auto version = row.get_as<int32_t>("version");
|
||||||
if (version != netw::messaging_service::current_version) {
|
if (version != netw::messaging_service::current_version) {
|
||||||
blogger.warn("Skipping logged batch because of incorrect version");
|
blogger.warn("Skipping logged batch because of incorrect version");
|
||||||
return make_ready_future<>();
|
return make_ready_future<stop_iteration>(stop_iteration::no);
|
||||||
}
|
}
|
||||||
|
|
||||||
auto data = row.get_blob("data");
|
auto data = row.get_blob("data");
|
||||||
@@ -255,49 +255,20 @@ future<> db::batchlog_manager::replay_all_failed_batches() {
|
|||||||
auto now = service::client_state(service::client_state::internal_tag()).get_timestamp();
|
auto now = service::client_state(service::client_state::internal_tag()).get_timestamp();
|
||||||
m.partition().apply_delete(*schema, clustering_key_prefix::make_empty(), tombstone(now, gc_clock::now()));
|
m.partition().apply_delete(*schema, clustering_key_prefix::make_empty(), tombstone(now, gc_clock::now()));
|
||||||
return _qp.proxy().mutate_locally(m, tracing::trace_state_ptr(), db::commitlog::force_sync::no);
|
return _qp.proxy().mutate_locally(m, tracing::trace_state_ptr(), db::commitlog::force_sync::no);
|
||||||
});
|
}).then([] { return make_ready_future<stop_iteration>(stop_iteration::no); });
|
||||||
};
|
};
|
||||||
|
|
||||||
return seastar::with_gate(_gate, [this, batch = std::move(batch)] {
|
return seastar::with_gate(_gate, [this, batch = std::move(batch)] () mutable {
|
||||||
blogger.debug("Started replayAllFailedBatches (cpu {})", this_shard_id());
|
blogger.debug("Started replayAllFailedBatches (cpu {})", this_shard_id());
|
||||||
|
return _qp.query_internal(
|
||||||
typedef ::shared_ptr<cql3::untyped_result_set> page_ptr;
|
format("SELECT id, data, written_at, version FROM {}.{} BYPASS CACHE", system_keyspace::NAME, system_keyspace::BATCHLOG),
|
||||||
sstring query = format("SELECT id, data, written_at, version FROM {}.{} LIMIT {:d}", system_keyspace::NAME, system_keyspace::BATCHLOG, page_size);
|
db::consistency_level::ONE,
|
||||||
return _qp.execute_internal(query, cql3::query_processor::cache_internal::yes).then([this, batch = std::move(batch)](page_ptr page) {
|
{},
|
||||||
return do_with(std::move(page), [this, batch = std::move(batch)](page_ptr & page) mutable {
|
page_size,
|
||||||
return repeat([this, &page, batch = std::move(batch)]() mutable {
|
std::move(batch)).then([this] {
|
||||||
if (page->empty()) {
|
// Replaying batches could have generated tombstones, flush to disk,
|
||||||
return make_ready_future<stop_iteration>(stop_iteration::yes);
|
// where they can be compacted away.
|
||||||
}
|
return replica::database::flush_table_on_all_shards(_qp.proxy().get_db(), system_keyspace::NAME, system_keyspace::BATCHLOG);
|
||||||
auto id = page->back().get_as<utils::UUID>("id");
|
|
||||||
return parallel_for_each(*page, batch).then([this, &page, id]() {
|
|
||||||
if (page->size() < page_size) {
|
|
||||||
return make_ready_future<stop_iteration>(stop_iteration::yes); // we've exhausted the batchlog, next query would be empty.
|
|
||||||
}
|
|
||||||
sstring query = format("SELECT id, data, written_at, version FROM {}.{} WHERE token(id) > token(?) LIMIT {:d}",
|
|
||||||
system_keyspace::NAME,
|
|
||||||
system_keyspace::BATCHLOG,
|
|
||||||
page_size);
|
|
||||||
return _qp.execute_internal(query, {id}, cql3::query_processor::cache_internal::yes).then([&page](auto res) {
|
|
||||||
page = std::move(res);
|
|
||||||
return make_ready_future<stop_iteration>(stop_iteration::no);
|
|
||||||
});
|
|
||||||
});
|
|
||||||
});
|
|
||||||
});
|
|
||||||
}).then([] {
|
|
||||||
// TODO FIXME : cleanup()
|
|
||||||
#if 0
|
|
||||||
ColumnFamilyStore cfs = Keyspace.open(SystemKeyspace.NAME).getColumnFamilyStore(SystemKeyspace.BATCHLOG);
|
|
||||||
cfs.forceBlockingFlush();
|
|
||||||
Collection<Descriptor> descriptors = new ArrayList<>();
|
|
||||||
for (SSTableReader sstr : cfs.getSSTables())
|
|
||||||
descriptors.add(sstr.descriptor);
|
|
||||||
if (!descriptors.isEmpty()) // don't pollute the logs if there is nothing to compact.
|
|
||||||
CompactionManager.instance.submitUserDefined(cfs, descriptors, Integer.MAX_VALUE).get();
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
}).then([] {
|
}).then([] {
|
||||||
blogger.debug("Finished replayAllFailedBatches");
|
blogger.debug("Finished replayAllFailedBatches");
|
||||||
});
|
});
|
||||||
|
|||||||
@@ -2628,12 +2628,20 @@ db::commitlog::read_log_file(sstring filename, sstring pfx, commit_load_reader_f
|
|||||||
return eof || next == pos;
|
return eof || next == pos;
|
||||||
}
|
}
|
||||||
future<> skip(size_t bytes) {
|
future<> skip(size_t bytes) {
|
||||||
pos += bytes;
|
auto n = std::min(file_size - pos, bytes);
|
||||||
if (pos > file_size) {
|
pos += n;
|
||||||
|
if (pos == file_size) {
|
||||||
eof = true;
|
eof = true;
|
||||||
pos = file_size;
|
|
||||||
}
|
}
|
||||||
return fin.skip(bytes);
|
if (n < bytes) {
|
||||||
|
// if we are trying to skip past end, we have at least
|
||||||
|
// the bytes skipped or the source from where we read
|
||||||
|
// this corrupt. So add at least four bytes. This is
|
||||||
|
// inexact, but adding the full "bytes" is equally wrong
|
||||||
|
// since it could be complete garbled junk.
|
||||||
|
corrupt_size += std::max(n, sizeof(uint32_t));
|
||||||
|
}
|
||||||
|
return fin.skip(n);
|
||||||
}
|
}
|
||||||
void stop() {
|
void stop() {
|
||||||
eof = true;
|
eof = true;
|
||||||
|
|||||||
14
db/config.cc
14
db/config.cc
@@ -341,6 +341,10 @@ db::config::config(std::shared_ptr<db::extensions> exts)
|
|||||||
"If set to higher than 0, ignore the controller's output and set the compaction shares statically. Do not set this unless you know what you are doing and suspect a problem in the controller. This option will be retired when the controller reaches more maturity")
|
"If set to higher than 0, ignore the controller's output and set the compaction shares statically. Do not set this unless you know what you are doing and suspect a problem in the controller. This option will be retired when the controller reaches more maturity")
|
||||||
, compaction_enforce_min_threshold(this, "compaction_enforce_min_threshold", liveness::LiveUpdate, value_status::Used, false,
|
, compaction_enforce_min_threshold(this, "compaction_enforce_min_threshold", liveness::LiveUpdate, value_status::Used, false,
|
||||||
"If set to true, enforce the min_threshold option for compactions strictly. If false (default), Scylla may decide to compact even if below min_threshold")
|
"If set to true, enforce the min_threshold option for compactions strictly. If false (default), Scylla may decide to compact even if below min_threshold")
|
||||||
|
, compaction_flush_all_tables_before_major_seconds(this, "compaction_flush_all_tables_before_major_seconds", value_status::Used, 86400,
|
||||||
|
"Set the minimum interval in seconds between flushing all tables before each major compaction (default is 86400). "
|
||||||
|
"This option is useful for maximizing tombstone garbage collection by releasing all active commitlog segments. "
|
||||||
|
"Set to 0 to disable automatic flushing all tables before major compaction")
|
||||||
/**
|
/**
|
||||||
* @Group Initialization properties
|
* @Group Initialization properties
|
||||||
* @GroupDescription The minimal properties needed for configuring a cluster.
|
* @GroupDescription The minimal properties needed for configuring a cluster.
|
||||||
@@ -485,6 +489,8 @@ db::config::config(std::shared_ptr<db::extensions> exts)
|
|||||||
"Adjusts the sensitivity of the failure detector on an exponential scale. Generally this setting never needs adjusting.\n"
|
"Adjusts the sensitivity of the failure detector on an exponential scale. Generally this setting never needs adjusting.\n"
|
||||||
"Related information: Failure detection and recovery")
|
"Related information: Failure detection and recovery")
|
||||||
, failure_detector_timeout_in_ms(this, "failure_detector_timeout_in_ms", liveness::LiveUpdate, value_status::Used, 20 * 1000, "Maximum time between two successful echo message before gossip mark a node down in milliseconds.\n")
|
, failure_detector_timeout_in_ms(this, "failure_detector_timeout_in_ms", liveness::LiveUpdate, value_status::Used, 20 * 1000, "Maximum time between two successful echo message before gossip mark a node down in milliseconds.\n")
|
||||||
|
, direct_failure_detector_ping_timeout_in_ms(this, "direct_failure_detector_ping_timeout_in_ms", value_status::Used, 600, "Duration after which the direct failure detector aborts a ping message, so the next ping can start.\n"
|
||||||
|
"Note: this failure detector is used by Raft, and is different from gossiper's failure detector (configured by `failure_detector_timeout_in_ms`).\n")
|
||||||
/**
|
/**
|
||||||
* @Group Performance tuning properties
|
* @Group Performance tuning properties
|
||||||
* @GroupDescription Tuning performance and system resource utilization, including commit log, compaction, memory, disk I/O, CPU, reads, and writes.
|
* @GroupDescription Tuning performance and system resource utilization, including commit log, compaction, memory, disk I/O, CPU, reads, and writes.
|
||||||
@@ -674,6 +680,9 @@ db::config::config(std::shared_ptr<db::extensions> exts)
|
|||||||
"The maximum number of tombstones a query can scan before aborting.")
|
"The maximum number of tombstones a query can scan before aborting.")
|
||||||
, query_tombstone_page_limit(this, "query_tombstone_page_limit", liveness::LiveUpdate, value_status::Used, 10000,
|
, query_tombstone_page_limit(this, "query_tombstone_page_limit", liveness::LiveUpdate, value_status::Used, 10000,
|
||||||
"The number of tombstones after which a query cuts a page, even if not full or even empty.")
|
"The number of tombstones after which a query cuts a page, even if not full or even empty.")
|
||||||
|
, query_page_size_in_bytes(this, "query_page_size_in_bytes", liveness::LiveUpdate, value_status::Used, 1 << 20,
|
||||||
|
"The size of pages in bytes, after a page accumulates this much data, the page is cut and sent to the client."
|
||||||
|
" Setting a too large value increases the risk of OOM.")
|
||||||
/**
|
/**
|
||||||
* @Group Network timeout settings
|
* @Group Network timeout settings
|
||||||
*/
|
*/
|
||||||
@@ -922,6 +931,8 @@ db::config::config(std::shared_ptr<db::extensions> exts)
|
|||||||
, enable_repair_based_node_ops(this, "enable_repair_based_node_ops", liveness::LiveUpdate, value_status::Used, true, "Set true to use enable repair based node operations instead of streaming based")
|
, enable_repair_based_node_ops(this, "enable_repair_based_node_ops", liveness::LiveUpdate, value_status::Used, true, "Set true to use enable repair based node operations instead of streaming based")
|
||||||
, allowed_repair_based_node_ops(this, "allowed_repair_based_node_ops", liveness::LiveUpdate, value_status::Used, "replace,removenode,rebuild,bootstrap,decommission", "A comma separated list of node operations which are allowed to enable repair based node operations. The operations can be bootstrap, replace, removenode, decommission and rebuild")
|
, allowed_repair_based_node_ops(this, "allowed_repair_based_node_ops", liveness::LiveUpdate, value_status::Used, "replace,removenode,rebuild,bootstrap,decommission", "A comma separated list of node operations which are allowed to enable repair based node operations. The operations can be bootstrap, replace, removenode, decommission and rebuild")
|
||||||
, enable_compacting_data_for_streaming_and_repair(this, "enable_compacting_data_for_streaming_and_repair", liveness::LiveUpdate, value_status::Used, true, "Enable the compacting reader, which compacts the data for streaming and repair (load'n'stream included) before sending it to, or synchronizing it with peers. Can reduce the amount of data to be processed by removing dead data, but adds CPU overhead.")
|
, enable_compacting_data_for_streaming_and_repair(this, "enable_compacting_data_for_streaming_and_repair", liveness::LiveUpdate, value_status::Used, true, "Enable the compacting reader, which compacts the data for streaming and repair (load'n'stream included) before sending it to, or synchronizing it with peers. Can reduce the amount of data to be processed by removing dead data, but adds CPU overhead.")
|
||||||
|
, repair_partition_count_estimation_ratio(this, "repair_partition_count_estimation_ratio", liveness::LiveUpdate, value_status::Used, 0.1,
|
||||||
|
"Specify the fraction of partitions written by repair out of the total partitions. The value is currently only used for bloom filter estimation. Value is between 0 and 1.")
|
||||||
, ring_delay_ms(this, "ring_delay_ms", value_status::Used, 30 * 1000, "Time a node waits to hear from other nodes before joining the ring in milliseconds. Same as -Dcassandra.ring_delay_ms in cassandra.")
|
, ring_delay_ms(this, "ring_delay_ms", value_status::Used, 30 * 1000, "Time a node waits to hear from other nodes before joining the ring in milliseconds. Same as -Dcassandra.ring_delay_ms in cassandra.")
|
||||||
, shadow_round_ms(this, "shadow_round_ms", value_status::Used, 300 * 1000, "The maximum gossip shadow round time. Can be used to reduce the gossip feature check time during node boot up.")
|
, shadow_round_ms(this, "shadow_round_ms", value_status::Used, 300 * 1000, "The maximum gossip shadow round time. Can be used to reduce the gossip feature check time during node boot up.")
|
||||||
, fd_max_interval_ms(this, "fd_max_interval_ms", value_status::Used, 2 * 1000, "The maximum failure_detector interval time in milliseconds. Interval larger than the maximum will be ignored. Larger cluster may need to increase the default.")
|
, fd_max_interval_ms(this, "fd_max_interval_ms", value_status::Used, 2 * 1000, "The maximum failure_detector interval time in milliseconds. Interval larger than the maximum will be ignored. Larger cluster may need to increase the default.")
|
||||||
@@ -940,6 +951,7 @@ db::config::config(std::shared_ptr<db::extensions> exts)
|
|||||||
, unspooled_dirty_soft_limit(this, "unspooled_dirty_soft_limit", value_status::Used, 0.6, "Soft limit of unspooled dirty memory expressed as a portion of the hard limit")
|
, unspooled_dirty_soft_limit(this, "unspooled_dirty_soft_limit", value_status::Used, 0.6, "Soft limit of unspooled dirty memory expressed as a portion of the hard limit")
|
||||||
, sstable_summary_ratio(this, "sstable_summary_ratio", value_status::Used, 0.0005, "Enforces that 1 byte of summary is written for every N (2000 by default) "
|
, sstable_summary_ratio(this, "sstable_summary_ratio", value_status::Used, 0.0005, "Enforces that 1 byte of summary is written for every N (2000 by default) "
|
||||||
"bytes written to data file. Value must be between 0 and 1.")
|
"bytes written to data file. Value must be between 0 and 1.")
|
||||||
|
, components_memory_reclaim_threshold(this, "components_memory_reclaim_threshold", liveness::LiveUpdate, value_status::Used, .2, "Ratio of available memory for all in-memory components of SSTables in a shard beyond which the memory will be reclaimed from components until it falls back under the threshold. Currently, this limit is only enforced for bloom filters.")
|
||||||
, large_memory_allocation_warning_threshold(this, "large_memory_allocation_warning_threshold", value_status::Used, size_t(1) << 20, "Warn about memory allocations above this size; set to zero to disable")
|
, large_memory_allocation_warning_threshold(this, "large_memory_allocation_warning_threshold", value_status::Used, size_t(1) << 20, "Warn about memory allocations above this size; set to zero to disable")
|
||||||
, enable_deprecated_partitioners(this, "enable_deprecated_partitioners", value_status::Used, false, "Enable the byteordered and random partitioners. These partitioners are deprecated and will be removed in a future version.")
|
, enable_deprecated_partitioners(this, "enable_deprecated_partitioners", value_status::Used, false, "Enable the byteordered and random partitioners. These partitioners are deprecated and will be removed in a future version.")
|
||||||
, enable_keyspace_column_family_metrics(this, "enable_keyspace_column_family_metrics", value_status::Used, false, "Enable per keyspace and per column family metrics reporting")
|
, enable_keyspace_column_family_metrics(this, "enable_keyspace_column_family_metrics", value_status::Used, false, "Enable per keyspace and per column family metrics reporting")
|
||||||
@@ -979,6 +991,8 @@ db::config::config(std::shared_ptr<db::extensions> exts)
|
|||||||
"Start serializing reads after their collective memory consumption goes above $normal_limit * $multiplier.")
|
"Start serializing reads after their collective memory consumption goes above $normal_limit * $multiplier.")
|
||||||
, reader_concurrency_semaphore_kill_limit_multiplier(this, "reader_concurrency_semaphore_kill_limit_multiplier", liveness::LiveUpdate, value_status::Used, 4,
|
, reader_concurrency_semaphore_kill_limit_multiplier(this, "reader_concurrency_semaphore_kill_limit_multiplier", liveness::LiveUpdate, value_status::Used, 4,
|
||||||
"Start killing reads after their collective memory consumption goes above $normal_limit * $multiplier.")
|
"Start killing reads after their collective memory consumption goes above $normal_limit * $multiplier.")
|
||||||
|
, reader_concurrency_semaphore_cpu_concurrency(this, "reader_concurrency_semaphore_cpu_concurrency", liveness::LiveUpdate, value_status::Used, 1,
|
||||||
|
"Admit new reads while there are less than this number of requests that need CPU.")
|
||||||
, twcs_max_window_count(this, "twcs_max_window_count", liveness::LiveUpdate, value_status::Used, 50,
|
, twcs_max_window_count(this, "twcs_max_window_count", liveness::LiveUpdate, value_status::Used, 50,
|
||||||
"The maximum number of compaction windows allowed when making use of TimeWindowCompactionStrategy. A setting of 0 effectively disables the restriction.")
|
"The maximum number of compaction windows allowed when making use of TimeWindowCompactionStrategy. A setting of 0 effectively disables the restriction.")
|
||||||
, initial_sstable_loading_concurrency(this, "initial_sstable_loading_concurrency", value_status::Used, 4u,
|
, initial_sstable_loading_concurrency(this, "initial_sstable_loading_concurrency", value_status::Used, 4u,
|
||||||
|
|||||||
@@ -163,6 +163,7 @@ public:
|
|||||||
named_value<float> memtable_flush_static_shares;
|
named_value<float> memtable_flush_static_shares;
|
||||||
named_value<float> compaction_static_shares;
|
named_value<float> compaction_static_shares;
|
||||||
named_value<bool> compaction_enforce_min_threshold;
|
named_value<bool> compaction_enforce_min_threshold;
|
||||||
|
named_value<uint32_t> compaction_flush_all_tables_before_major_seconds;
|
||||||
named_value<sstring> cluster_name;
|
named_value<sstring> cluster_name;
|
||||||
named_value<sstring> listen_address;
|
named_value<sstring> listen_address;
|
||||||
named_value<sstring> listen_interface;
|
named_value<sstring> listen_interface;
|
||||||
@@ -195,6 +196,7 @@ public:
|
|||||||
named_value<bool> snapshot_before_compaction;
|
named_value<bool> snapshot_before_compaction;
|
||||||
named_value<uint32_t> phi_convict_threshold;
|
named_value<uint32_t> phi_convict_threshold;
|
||||||
named_value<uint32_t> failure_detector_timeout_in_ms;
|
named_value<uint32_t> failure_detector_timeout_in_ms;
|
||||||
|
named_value<uint32_t> direct_failure_detector_ping_timeout_in_ms;
|
||||||
named_value<sstring> commitlog_sync;
|
named_value<sstring> commitlog_sync;
|
||||||
named_value<uint32_t> commitlog_segment_size_in_mb;
|
named_value<uint32_t> commitlog_segment_size_in_mb;
|
||||||
named_value<uint32_t> schema_commitlog_segment_size_in_mb;
|
named_value<uint32_t> schema_commitlog_segment_size_in_mb;
|
||||||
@@ -253,6 +255,7 @@ public:
|
|||||||
named_value<uint32_t> tombstone_warn_threshold;
|
named_value<uint32_t> tombstone_warn_threshold;
|
||||||
named_value<uint32_t> tombstone_failure_threshold;
|
named_value<uint32_t> tombstone_failure_threshold;
|
||||||
named_value<uint64_t> query_tombstone_page_limit;
|
named_value<uint64_t> query_tombstone_page_limit;
|
||||||
|
named_value<uint64_t> query_page_size_in_bytes;
|
||||||
named_value<uint32_t> range_request_timeout_in_ms;
|
named_value<uint32_t> range_request_timeout_in_ms;
|
||||||
named_value<uint32_t> read_request_timeout_in_ms;
|
named_value<uint32_t> read_request_timeout_in_ms;
|
||||||
named_value<uint32_t> counter_write_request_timeout_in_ms;
|
named_value<uint32_t> counter_write_request_timeout_in_ms;
|
||||||
@@ -328,6 +331,7 @@ public:
|
|||||||
named_value<bool> enable_repair_based_node_ops;
|
named_value<bool> enable_repair_based_node_ops;
|
||||||
named_value<sstring> allowed_repair_based_node_ops;
|
named_value<sstring> allowed_repair_based_node_ops;
|
||||||
named_value<bool> enable_compacting_data_for_streaming_and_repair;
|
named_value<bool> enable_compacting_data_for_streaming_and_repair;
|
||||||
|
named_value<double> repair_partition_count_estimation_ratio;
|
||||||
named_value<uint32_t> ring_delay_ms;
|
named_value<uint32_t> ring_delay_ms;
|
||||||
named_value<uint32_t> shadow_round_ms;
|
named_value<uint32_t> shadow_round_ms;
|
||||||
named_value<uint32_t> fd_max_interval_ms;
|
named_value<uint32_t> fd_max_interval_ms;
|
||||||
@@ -345,6 +349,7 @@ public:
|
|||||||
named_value<unsigned> murmur3_partitioner_ignore_msb_bits;
|
named_value<unsigned> murmur3_partitioner_ignore_msb_bits;
|
||||||
named_value<double> unspooled_dirty_soft_limit;
|
named_value<double> unspooled_dirty_soft_limit;
|
||||||
named_value<double> sstable_summary_ratio;
|
named_value<double> sstable_summary_ratio;
|
||||||
|
named_value<double> components_memory_reclaim_threshold;
|
||||||
named_value<size_t> large_memory_allocation_warning_threshold;
|
named_value<size_t> large_memory_allocation_warning_threshold;
|
||||||
named_value<bool> enable_deprecated_partitioners;
|
named_value<bool> enable_deprecated_partitioners;
|
||||||
named_value<bool> enable_keyspace_column_family_metrics;
|
named_value<bool> enable_keyspace_column_family_metrics;
|
||||||
@@ -368,6 +373,7 @@ public:
|
|||||||
named_value<uint64_t> max_memory_for_unlimited_query_hard_limit;
|
named_value<uint64_t> max_memory_for_unlimited_query_hard_limit;
|
||||||
named_value<uint32_t> reader_concurrency_semaphore_serialize_limit_multiplier;
|
named_value<uint32_t> reader_concurrency_semaphore_serialize_limit_multiplier;
|
||||||
named_value<uint32_t> reader_concurrency_semaphore_kill_limit_multiplier;
|
named_value<uint32_t> reader_concurrency_semaphore_kill_limit_multiplier;
|
||||||
|
named_value<uint32_t> reader_concurrency_semaphore_cpu_concurrency;
|
||||||
named_value<uint32_t> twcs_max_window_count;
|
named_value<uint32_t> twcs_max_window_count;
|
||||||
named_value<unsigned> initial_sstable_loading_concurrency;
|
named_value<unsigned> initial_sstable_loading_concurrency;
|
||||||
named_value<bool> enable_3_1_0_compatibility_mode;
|
named_value<bool> enable_3_1_0_compatibility_mode;
|
||||||
|
|||||||
@@ -155,7 +155,7 @@ future<> cql_table_large_data_handler::try_record(std::string_view large_table,
|
|||||||
const auto sstable_name = large_data_handler::sst_filename(sst);
|
const auto sstable_name = large_data_handler::sst_filename(sst);
|
||||||
std::string pk_str = key_to_str(partition_key.to_partition_key(s), s);
|
std::string pk_str = key_to_str(partition_key.to_partition_key(s), s);
|
||||||
auto timestamp = db_clock::now();
|
auto timestamp = db_clock::now();
|
||||||
large_data_logger.warn("Writing large {} {}/{}: {}{} ({} bytes) to {}", desc, ks_name, cf_name, pk_str, extra_path, size, sstable_name);
|
large_data_logger.warn("Writing large {} {}/{}: {} ({} bytes) to {}", desc, ks_name, cf_name, extra_path, size, sstable_name);
|
||||||
return _sys_ks->execute_cql(req, ks_name, cf_name, sstable_name, size, pk_str, timestamp, args...)
|
return _sys_ks->execute_cql(req, ks_name, cf_name, sstable_name, size, pk_str, timestamp, args...)
|
||||||
.discard_result()
|
.discard_result()
|
||||||
.handle_exception([ks_name, cf_name, large_table, sstable_name] (std::exception_ptr ep) {
|
.handle_exception([ks_name, cf_name, large_table, sstable_name] (std::exception_ptr ep) {
|
||||||
@@ -182,10 +182,10 @@ future<> cql_table_large_data_handler::internal_record_large_cells(const sstable
|
|||||||
if (clustering_key) {
|
if (clustering_key) {
|
||||||
const schema &s = *sst.get_schema();
|
const schema &s = *sst.get_schema();
|
||||||
auto ck_str = key_to_str(*clustering_key, s);
|
auto ck_str = key_to_str(*clustering_key, s);
|
||||||
return try_record("cell", sst, partition_key, int64_t(cell_size), cell_type, format("/{}/{}", ck_str, column_name), extra_fields, ck_str, column_name);
|
return try_record("cell", sst, partition_key, int64_t(cell_size), cell_type, column_name, extra_fields, ck_str, column_name);
|
||||||
} else {
|
} else {
|
||||||
auto desc = format("static {}", cell_type);
|
auto desc = format("static {}", cell_type);
|
||||||
return try_record("cell", sst, partition_key, int64_t(cell_size), desc, format("//{}", column_name), extra_fields, data_value::make_null(utf8_type), column_name);
|
return try_record("cell", sst, partition_key, int64_t(cell_size), desc, column_name, extra_fields, data_value::make_null(utf8_type), column_name);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -197,10 +197,10 @@ future<> cql_table_large_data_handler::internal_record_large_cells_and_collectio
|
|||||||
if (clustering_key) {
|
if (clustering_key) {
|
||||||
const schema &s = *sst.get_schema();
|
const schema &s = *sst.get_schema();
|
||||||
auto ck_str = key_to_str(*clustering_key, s);
|
auto ck_str = key_to_str(*clustering_key, s);
|
||||||
return try_record("cell", sst, partition_key, int64_t(cell_size), cell_type, format("/{}/{}", ck_str, column_name), extra_fields, ck_str, column_name, data_value((int64_t)collection_elements));
|
return try_record("cell", sst, partition_key, int64_t(cell_size), cell_type, column_name, extra_fields, ck_str, column_name, data_value((int64_t)collection_elements));
|
||||||
} else {
|
} else {
|
||||||
auto desc = format("static {}", cell_type);
|
auto desc = format("static {}", cell_type);
|
||||||
return try_record("cell", sst, partition_key, int64_t(cell_size), desc, format("//{}", column_name), extra_fields, data_value::make_null(utf8_type), column_name, data_value((int64_t)collection_elements));
|
return try_record("cell", sst, partition_key, int64_t(cell_size), desc, column_name, extra_fields, data_value::make_null(utf8_type), column_name, data_value((int64_t)collection_elements));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -210,7 +210,7 @@ future<> cql_table_large_data_handler::record_large_rows(const sstables::sstable
|
|||||||
if (clustering_key) {
|
if (clustering_key) {
|
||||||
const schema &s = *sst.get_schema();
|
const schema &s = *sst.get_schema();
|
||||||
std::string ck_str = key_to_str(*clustering_key, s);
|
std::string ck_str = key_to_str(*clustering_key, s);
|
||||||
return try_record("row", sst, partition_key, int64_t(row_size), "row", format("/{}", ck_str), extra_fields, ck_str);
|
return try_record("row", sst, partition_key, int64_t(row_size), "row", "", extra_fields, ck_str);
|
||||||
} else {
|
} else {
|
||||||
return try_record("row", sst, partition_key, int64_t(row_size), "static row", "", extra_fields, data_value::make_null(utf8_type));
|
return try_record("row", sst, partition_key, int64_t(row_size), "static row", "", extra_fields, data_value::make_null(utf8_type));
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -55,6 +55,10 @@ public:
|
|||||||
return ser::serialize_to_buffer<bytes>(_paxos_gc_sec);
|
return ser::serialize_to_buffer<bytes>(_paxos_gc_sec);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::string options_to_string() const override {
|
||||||
|
return std::to_string(_paxos_gc_sec);
|
||||||
|
}
|
||||||
|
|
||||||
static int32_t deserialize(const bytes_view& buffer) {
|
static int32_t deserialize(const bytes_view& buffer) {
|
||||||
return ser::deserialize_from_buffer(buffer, boost::type<int32_t>());
|
return ser::deserialize_from_buffer(buffer, boost::type<int32_t>());
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -973,7 +973,7 @@ future<> merge_schema(sharded<db::system_keyspace>& sys_ks, distributed<service:
|
|||||||
if (this_shard_id() != 0) {
|
if (this_shard_id() != 0) {
|
||||||
// mutations must be applied on the owning shard (0).
|
// mutations must be applied on the owning shard (0).
|
||||||
co_await smp::submit_to(0, [&, fmuts = freeze(mutations)] () mutable -> future<> {
|
co_await smp::submit_to(0, [&, fmuts = freeze(mutations)] () mutable -> future<> {
|
||||||
return merge_schema(sys_ks, proxy, feat, unfreeze(fmuts));
|
return merge_schema(sys_ks, proxy, feat, unfreeze(fmuts), reload);
|
||||||
});
|
});
|
||||||
co_return;
|
co_return;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1879,8 +1879,7 @@ std::vector<schema_ptr> system_keyspace::all_tables(const db::config& cfg) {
|
|||||||
|
|
||||||
static bool maybe_write_in_user_memory(schema_ptr s) {
|
static bool maybe_write_in_user_memory(schema_ptr s) {
|
||||||
return (s.get() == system_keyspace::batchlog().get()) || (s.get() == system_keyspace::paxos().get())
|
return (s.get() == system_keyspace::batchlog().get()) || (s.get() == system_keyspace::paxos().get())
|
||||||
|| s == system_keyspace::v3::scylla_views_builds_in_progress()
|
|| s == system_keyspace::v3::scylla_views_builds_in_progress();
|
||||||
|| s == system_keyspace::raft();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
future<> system_keyspace::make(
|
future<> system_keyspace::make(
|
||||||
@@ -1888,6 +1887,7 @@ future<> system_keyspace::make(
|
|||||||
replica::database& db) {
|
replica::database& db) {
|
||||||
for (auto&& table : system_keyspace::all_tables(db.get_config())) {
|
for (auto&& table : system_keyspace::all_tables(db.get_config())) {
|
||||||
co_await db.create_local_system_table(table, maybe_write_in_user_memory(table), erm_factory);
|
co_await db.create_local_system_table(table, maybe_write_in_user_memory(table), erm_factory);
|
||||||
|
co_await db.find_column_family(table).init_storage();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
139
db/view/view.cc
139
db/view/view.cc
@@ -493,37 +493,56 @@ mutation_partition& view_updates::partition_for(partition_key&& key) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
size_t view_updates::op_count() const {
|
size_t view_updates::op_count() const {
|
||||||
return _op_count++;;
|
return _op_count;
|
||||||
}
|
}
|
||||||
|
|
||||||
row_marker view_updates::compute_row_marker(const clustering_or_static_row& base_row) const {
|
row_marker view_updates::compute_row_marker(const clustering_or_static_row& base_row) const {
|
||||||
/*
|
/*
|
||||||
* We need to compute both the timestamp and expiration.
|
* We need to compute both the timestamp and expiration for view rows.
|
||||||
*
|
*
|
||||||
* There are 3 cases:
|
* Below there are several distinct cases depending on how many new key
|
||||||
* 1) There is a column that is not in the base PK but is in the view PK. In that case, as long as that column
|
* columns the view has - i.e., how many of the view's key columns were
|
||||||
* lives, the view entry does too, but as soon as it expires (or is deleted for that matter) the entry also
|
* regular columns in the base. base_regular_columns_in_view_pk.size():
|
||||||
* should expire. So the expiration for the view is the one of that column, regardless of any other expiration.
|
*
|
||||||
* To take an example of that case, if you have:
|
* Zero new key columns:
|
||||||
* CREATE TABLE t (a int, b int, c int, PRIMARY KEY (a, b))
|
* The view rows key is composed only from base key columns, and those
|
||||||
* CREATE MATERIALIZED VIEW mv AS SELECT * FROM t WHERE c IS NOT NULL AND a IS NOT NULL AND b IS NOT NULL PRIMARY KEY (c, a, b)
|
* cannot be changed in an update, so the view row remains alive as
|
||||||
* INSERT INTO t(a, b) VALUES (0, 0) USING TTL 3;
|
* long as the base row is alive. We need to return the same row
|
||||||
* UPDATE t SET c = 0 WHERE a = 0 AND b = 0;
|
* marker as the base for the view - to keep an empty view row alive
|
||||||
* then even after 3 seconds elapsed, the row will still exist (it just won't have a "row marker" anymore) and so
|
* for as long as an empty base row exists.
|
||||||
* the MV should still have a corresponding entry.
|
* Note that in this case, if there are *unselected* base columns, we
|
||||||
* This cell determines the liveness of the view row.
|
* may need to keep an empty view row alive even without a row marker
|
||||||
* 2) The columns for the base and view PKs are exactly the same, and all base columns are selected by the view.
|
* because the base row (which has additional columns) is still alive.
|
||||||
* In that case, all components (marker, deletion and cells) are the same and trivially mapped.
|
* For that we have the "virtual columns" feature: In the zero new
|
||||||
* 3) The columns for the base and view PKs are exactly the same, but some base columns are not selected in the view.
|
* key columns case, we put unselected columns in the view as empty
|
||||||
* Use the max timestamp out of the base row marker and all the unselected columns - this ensures we can keep the
|
* columns, to keep the view row alive.
|
||||||
* view row alive. Do the same thing for the expiration, if the marker is dead or will expire, and so
|
*
|
||||||
* will all unselected columns.
|
* One new key column:
|
||||||
|
* In this case, there is a regular base column that is part of the
|
||||||
|
* view key. This regular column can be added or deleted in an update,
|
||||||
|
* or its expiration be set, and those can cause the view row -
|
||||||
|
* including its row marker - to need to appear or disappear as well.
|
||||||
|
* So the liveness of cell of this one column determines the liveness
|
||||||
|
* of the view row and the row marker that we return.
|
||||||
|
*
|
||||||
|
* Two or more new key columns:
|
||||||
|
* This case is explicitly NOT supported in CQL - one cannot create a
|
||||||
|
* view with more than one base-regular columns in its key. In general
|
||||||
|
* picking one liveness (timestamp and expiration) is not possible
|
||||||
|
* if there are multiple regular base columns in the view key, as
|
||||||
|
* those can have different liveness.
|
||||||
|
* However, we do allow this case for Alternator - we need to allow
|
||||||
|
* the case of two (but not more) because the DynamoDB API allows
|
||||||
|
* creating a GSI whose two key columns (hash and range key) were
|
||||||
|
* regular columns.
|
||||||
|
* We can support this case in Alternator because it doesn't use
|
||||||
|
* expiration (the "TTL" it does support is different), and doesn't
|
||||||
|
* support user-defined timestamps. But, the two columns can still
|
||||||
|
* have different timestamps - this happens if an update modifies
|
||||||
|
* just one of them. In this case the timestamp of the view update
|
||||||
|
* (and that of the row marker we return) is the later of these two
|
||||||
|
* updated columns.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
// WARNING: The code assumes that if multiple regular base columns are present in the view key,
|
|
||||||
// they share liveness information. It's true especially in the only case currently allowed by CQL,
|
|
||||||
// which assumes there's up to one non-pk column in the view key. It's also true in alternator,
|
|
||||||
// which does not carry TTL information.
|
|
||||||
const auto& col_ids = base_row.is_clustering_row()
|
const auto& col_ids = base_row.is_clustering_row()
|
||||||
? _base_info->base_regular_columns_in_view_pk()
|
? _base_info->base_regular_columns_in_view_pk()
|
||||||
: _base_info->base_static_columns_in_view_pk();
|
: _base_info->base_static_columns_in_view_pk();
|
||||||
@@ -531,7 +550,20 @@ row_marker view_updates::compute_row_marker(const clustering_or_static_row& base
|
|||||||
auto& def = _base->column_at(base_row.column_kind(), col_ids[0]);
|
auto& def = _base->column_at(base_row.column_kind(), col_ids[0]);
|
||||||
// Note: multi-cell columns can't be part of the primary key.
|
// Note: multi-cell columns can't be part of the primary key.
|
||||||
auto cell = base_row.cells().cell_at(col_ids[0]).as_atomic_cell(def);
|
auto cell = base_row.cells().cell_at(col_ids[0]).as_atomic_cell(def);
|
||||||
return cell.is_live_and_has_ttl() ? row_marker(cell.timestamp(), cell.ttl(), cell.expiry()) : row_marker(cell.timestamp());
|
auto ts = cell.timestamp();
|
||||||
|
if (col_ids.size() > 1){
|
||||||
|
// As explained above, this case only happens in Alternator,
|
||||||
|
// and we may need to pick a higher ts:
|
||||||
|
auto& second_def = _base->column_at(base_row.column_kind(), col_ids[1]);
|
||||||
|
auto second_cell = base_row.cells().cell_at(col_ids[1]).as_atomic_cell(second_def);
|
||||||
|
auto second_ts = second_cell.timestamp();
|
||||||
|
ts = std::max(ts, second_ts);
|
||||||
|
// Alternator isn't supposed to have TTL or more than two col_ids!
|
||||||
|
if (col_ids.size() != 2 || cell.is_live_and_has_ttl() || second_cell.is_live_and_has_ttl()) [[unlikely]] {
|
||||||
|
utils::on_internal_error(format("Unexpected col_ids length {} or has TTL", col_ids.size()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return cell.is_live_and_has_ttl() ? row_marker(ts, cell.ttl(), cell.expiry()) : row_marker(ts);
|
||||||
}
|
}
|
||||||
|
|
||||||
return base_row.marker();
|
return base_row.marker();
|
||||||
@@ -930,8 +962,22 @@ void view_updates::do_delete_old_entry(const partition_key& base_key, const clus
|
|||||||
// Note: multi-cell columns can't be part of the primary key.
|
// Note: multi-cell columns can't be part of the primary key.
|
||||||
auto& def = _base->column_at(kind, col_ids[0]);
|
auto& def = _base->column_at(kind, col_ids[0]);
|
||||||
auto cell = existing.cells().cell_at(col_ids[0]).as_atomic_cell(def);
|
auto cell = existing.cells().cell_at(col_ids[0]).as_atomic_cell(def);
|
||||||
|
auto ts = cell.timestamp();
|
||||||
|
if (col_ids.size() > 1) {
|
||||||
|
// This is the Alternator-only support for two regular base
|
||||||
|
// columns that become view key columns. See explanation in
|
||||||
|
// view_updates::compute_row_marker().
|
||||||
|
auto& second_def = _base->column_at(kind, col_ids[1]);
|
||||||
|
auto second_cell = existing.cells().cell_at(col_ids[1]).as_atomic_cell(second_def);
|
||||||
|
auto second_ts = second_cell.timestamp();
|
||||||
|
ts = std::max(ts, second_ts);
|
||||||
|
// Alternator isn't supposed to have more than two col_ids!
|
||||||
|
if (col_ids.size() != 2) [[unlikely]] {
|
||||||
|
utils::on_internal_error(format("Unexpected col_ids length {}", col_ids.size()));
|
||||||
|
}
|
||||||
|
}
|
||||||
if (cell.is_live()) {
|
if (cell.is_live()) {
|
||||||
r->apply(shadowable_tombstone(cell.timestamp(), now));
|
r->apply(shadowable_tombstone(ts, now));
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// "update" caused the base row to have been deleted, and !col_id
|
// "update" caused the base row to have been deleted, and !col_id
|
||||||
@@ -1316,11 +1362,12 @@ void view_update_builder::generate_update(static_row&& update, const tombstone&
|
|||||||
|
|
||||||
future<stop_iteration> view_update_builder::on_results() {
|
future<stop_iteration> view_update_builder::on_results() {
|
||||||
constexpr size_t max_rows_for_view_updates = 100;
|
constexpr size_t max_rows_for_view_updates = 100;
|
||||||
size_t rows_for_view_updates = std::accumulate(_view_updates.begin(), _view_updates.end(), 0, [] (size_t acc, const view_updates& vu) {
|
auto should_stop_updates = [this] () -> bool {
|
||||||
return acc + vu.op_count();
|
size_t rows_for_view_updates = std::accumulate(_view_updates.begin(), _view_updates.end(), 0, [] (size_t acc, const view_updates& vu) {
|
||||||
});
|
return acc + vu.op_count();
|
||||||
const bool stop_updates = rows_for_view_updates >= max_rows_for_view_updates;
|
});
|
||||||
|
return rows_for_view_updates >= max_rows_for_view_updates;
|
||||||
|
};
|
||||||
if (_update && !_update->is_end_of_partition() && _existing && !_existing->is_end_of_partition()) {
|
if (_update && !_update->is_end_of_partition() && _existing && !_existing->is_end_of_partition()) {
|
||||||
auto cmp = position_in_partition::tri_compare(*_schema)(_update->position(), _existing->position());
|
auto cmp = position_in_partition::tri_compare(*_schema)(_update->position(), _existing->position());
|
||||||
if (cmp < 0) {
|
if (cmp < 0) {
|
||||||
@@ -1343,7 +1390,7 @@ future<stop_iteration> view_update_builder::on_results() {
|
|||||||
: std::nullopt;
|
: std::nullopt;
|
||||||
generate_update(std::move(update), _update_partition_tombstone, std::move(existing), _existing_partition_tombstone);
|
generate_update(std::move(update), _update_partition_tombstone, std::move(existing), _existing_partition_tombstone);
|
||||||
}
|
}
|
||||||
return stop_updates ? stop() : advance_updates();
|
return should_stop_updates() ? stop() : advance_updates();
|
||||||
}
|
}
|
||||||
if (cmp > 0) {
|
if (cmp > 0) {
|
||||||
// We have something existing but no update (which will happen either because it's a range tombstone marker in
|
// We have something existing but no update (which will happen either because it's a range tombstone marker in
|
||||||
@@ -1379,7 +1426,7 @@ future<stop_iteration> view_update_builder::on_results() {
|
|||||||
generate_update(std::move(update), _update_partition_tombstone, { std::move(existing) }, _existing_partition_tombstone);
|
generate_update(std::move(update), _update_partition_tombstone, { std::move(existing) }, _existing_partition_tombstone);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return stop_updates ? stop () : advance_existings();
|
return should_stop_updates() ? stop () : advance_existings();
|
||||||
}
|
}
|
||||||
// We're updating a row that had pre-existing data
|
// We're updating a row that had pre-existing data
|
||||||
if (_update->is_range_tombstone_change()) {
|
if (_update->is_range_tombstone_change()) {
|
||||||
@@ -1401,8 +1448,9 @@ future<stop_iteration> view_update_builder::on_results() {
|
|||||||
mutation_fragment_v2::printer(*_schema, *_update), mutation_fragment_v2::printer(*_schema, *_existing)));
|
mutation_fragment_v2::printer(*_schema, *_update), mutation_fragment_v2::printer(*_schema, *_existing)));
|
||||||
}
|
}
|
||||||
generate_update(std::move(*_update).as_static_row(), _update_partition_tombstone, { std::move(*_existing).as_static_row() }, _existing_partition_tombstone);
|
generate_update(std::move(*_update).as_static_row(), _update_partition_tombstone, { std::move(*_existing).as_static_row() }, _existing_partition_tombstone);
|
||||||
|
|
||||||
}
|
}
|
||||||
return stop_updates ? stop() : advance_all();
|
return should_stop_updates() ? stop() : advance_all();
|
||||||
}
|
}
|
||||||
|
|
||||||
auto tombstone = std::max(_update_partition_tombstone, _update_current_tombstone);
|
auto tombstone = std::max(_update_partition_tombstone, _update_current_tombstone);
|
||||||
@@ -1417,7 +1465,7 @@ future<stop_iteration> view_update_builder::on_results() {
|
|||||||
auto update = static_row();
|
auto update = static_row();
|
||||||
generate_update(std::move(update), _update_partition_tombstone, { std::move(existing) }, _existing_partition_tombstone);
|
generate_update(std::move(update), _update_partition_tombstone, { std::move(existing) }, _existing_partition_tombstone);
|
||||||
}
|
}
|
||||||
return stop_updates ? stop() : advance_existings();
|
return should_stop_updates() ? stop() : advance_existings();
|
||||||
}
|
}
|
||||||
|
|
||||||
// If we have updates and it's a range tombstone, it removes nothing pre-exisiting, so we can ignore it
|
// If we have updates and it's a range tombstone, it removes nothing pre-exisiting, so we can ignore it
|
||||||
@@ -1438,7 +1486,7 @@ future<stop_iteration> view_update_builder::on_results() {
|
|||||||
: std::nullopt;
|
: std::nullopt;
|
||||||
generate_update(std::move(*_update).as_static_row(), _update_partition_tombstone, std::move(existing), _existing_partition_tombstone);
|
generate_update(std::move(*_update).as_static_row(), _update_partition_tombstone, std::move(existing), _existing_partition_tombstone);
|
||||||
}
|
}
|
||||||
return stop_updates ? stop() : advance_updates();
|
return should_stop_updates() ? stop() : advance_updates();
|
||||||
}
|
}
|
||||||
|
|
||||||
return stop();
|
return stop();
|
||||||
@@ -1619,6 +1667,13 @@ static bool should_update_synchronously(const schema& s) {
|
|||||||
return *tag_opt == "true";
|
return *tag_opt == "true";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
size_t memory_usage_of(const frozen_mutation_and_schema& mut) {
|
||||||
|
// Overhead of sending a view mutation, in terms of data structures used by the storage_proxy, as well as possible background tasks
|
||||||
|
// allocated for a remote view update.
|
||||||
|
constexpr size_t base_overhead_bytes = 2288;
|
||||||
|
return base_overhead_bytes + mut.fm.representation().size();
|
||||||
|
}
|
||||||
|
|
||||||
// Take the view mutations generated by generate_view_updates(), which pertain
|
// Take the view mutations generated by generate_view_updates(), which pertain
|
||||||
// to a modification of a single base partition, and apply them to the
|
// to a modification of a single base partition, and apply them to the
|
||||||
// appropriate paired replicas. This is done asynchronously - we do not wait
|
// appropriate paired replicas. This is done asynchronously - we do not wait
|
||||||
@@ -1643,7 +1698,7 @@ future<> view_update_generator::mutate_MV(
|
|||||||
bool network_topology = dynamic_cast<const locator::network_topology_strategy*>(&ks.get_replication_strategy());
|
bool network_topology = dynamic_cast<const locator::network_topology_strategy*>(&ks.get_replication_strategy());
|
||||||
auto target_endpoint = get_view_natural_endpoint(ermp, network_topology, base_token, view_token);
|
auto target_endpoint = get_view_natural_endpoint(ermp, network_topology, base_token, view_token);
|
||||||
auto remote_endpoints = ermp->get_pending_endpoints(view_token);
|
auto remote_endpoints = ermp->get_pending_endpoints(view_token);
|
||||||
auto sem_units = pending_view_updates.split(mut.fm.representation().size());
|
auto sem_units = seastar::make_lw_shared<db::timeout_semaphore_units>(pending_view_updates.split(memory_usage_of(mut)));
|
||||||
|
|
||||||
const bool update_synchronously = should_update_synchronously(*mut.s);
|
const bool update_synchronously = should_update_synchronously(*mut.s);
|
||||||
if (update_synchronously) {
|
if (update_synchronously) {
|
||||||
@@ -1689,9 +1744,9 @@ future<> view_update_generator::mutate_MV(
|
|||||||
auto mut_ptr = remote_endpoints.empty() ? std::make_unique<frozen_mutation>(std::move(mut.fm)) : std::make_unique<frozen_mutation>(mut.fm);
|
auto mut_ptr = remote_endpoints.empty() ? std::make_unique<frozen_mutation>(std::move(mut.fm)) : std::make_unique<frozen_mutation>(mut.fm);
|
||||||
tracing::trace(tr_state, "Locally applying view update for {}.{}; base token = {}; view token = {}",
|
tracing::trace(tr_state, "Locally applying view update for {}.{}; base token = {}; view token = {}",
|
||||||
mut.s->ks_name(), mut.s->cf_name(), base_token, view_token);
|
mut.s->ks_name(), mut.s->cf_name(), base_token, view_token);
|
||||||
local_view_update = _proxy.local().mutate_locally(mut.s, *mut_ptr, tr_state, db::commitlog::force_sync::no).then_wrapped(
|
local_view_update = _proxy.local().mutate_mv_locally(mut.s, *mut_ptr, tr_state, db::commitlog::force_sync::no).then_wrapped(
|
||||||
[s = mut.s, &stats, &cf_stats, tr_state, base_token, view_token, my_address, mut_ptr = std::move(mut_ptr),
|
[s = mut.s, &stats, &cf_stats, tr_state, base_token, view_token, my_address, mut_ptr = std::move(mut_ptr),
|
||||||
units = sem_units.split(sem_units.count())] (future<>&& f) {
|
sem_units] (future<>&& f) {
|
||||||
--stats.writes;
|
--stats.writes;
|
||||||
if (f.failed()) {
|
if (f.failed()) {
|
||||||
++stats.view_updates_failed_local;
|
++stats.view_updates_failed_local;
|
||||||
@@ -1728,7 +1783,7 @@ future<> view_update_generator::mutate_MV(
|
|||||||
schema_ptr s = mut.s;
|
schema_ptr s = mut.s;
|
||||||
future<> view_update = apply_to_remote_endpoints(_proxy.local(), std::move(ermp), *target_endpoint, std::move(remote_endpoints), std::move(mut), base_token, view_token, allow_hints, tr_state).then_wrapped(
|
future<> view_update = apply_to_remote_endpoints(_proxy.local(), std::move(ermp), *target_endpoint, std::move(remote_endpoints), std::move(mut), base_token, view_token, allow_hints, tr_state).then_wrapped(
|
||||||
[s = std::move(s), &stats, &cf_stats, tr_state, base_token, view_token, target_endpoint, updates_pushed_remote,
|
[s = std::move(s), &stats, &cf_stats, tr_state, base_token, view_token, target_endpoint, updates_pushed_remote,
|
||||||
units = sem_units.split(sem_units.count()), apply_update_synchronously] (future<>&& f) mutable {
|
sem_units, apply_update_synchronously] (future<>&& f) mutable {
|
||||||
if (f.failed()) {
|
if (f.failed()) {
|
||||||
stats.view_updates_failed_remote += updates_pushed_remote;
|
stats.view_updates_failed_remote += updates_pushed_remote;
|
||||||
cf_stats.total_view_updates_failed_remote += updates_pushed_remote;
|
cf_stats.total_view_updates_failed_remote += updates_pushed_remote;
|
||||||
@@ -2255,7 +2310,7 @@ future<> view_builder::do_build_step() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}).handle_exception([] (std::exception_ptr ex) {
|
}).handle_exception([] (std::exception_ptr ex) {
|
||||||
vlogger.warn("Unexcepted error executing build step: {}. Ignored.", std::current_exception());
|
vlogger.warn("Unexcepted error executing build step: {}. Ignored.", ex);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -209,7 +209,7 @@ class view_updates final {
|
|||||||
schema_ptr _base;
|
schema_ptr _base;
|
||||||
base_info_ptr _base_info;
|
base_info_ptr _base_info;
|
||||||
std::unordered_map<partition_key, mutation_partition, partition_key::hashing, partition_key::equality> _updates;
|
std::unordered_map<partition_key, mutation_partition, partition_key::hashing, partition_key::equality> _updates;
|
||||||
mutable size_t _op_count = 0;
|
size_t _op_count = 0;
|
||||||
const bool _backing_secondary_index;
|
const bool _backing_secondary_index;
|
||||||
public:
|
public:
|
||||||
explicit view_updates(view_and_base vab, bool backing_secondary_index)
|
explicit view_updates(view_and_base vab, bool backing_secondary_index)
|
||||||
@@ -318,6 +318,8 @@ future<query::clustering_row_ranges> calculate_affected_clustering_ranges(
|
|||||||
|
|
||||||
bool needs_static_row(const mutation_partition& mp, const std::vector<view_and_base>& views);
|
bool needs_static_row(const mutation_partition& mp, const std::vector<view_and_base>& views);
|
||||||
|
|
||||||
|
size_t memory_usage_of(const frozen_mutation_and_schema& mut);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* create_virtual_column() adds a "virtual column" to a schema builder.
|
* create_virtual_column() adds a "virtual column" to a schema builder.
|
||||||
* The definition of a "virtual column" is based on the given definition
|
* The definition of a "virtual column" is based on the given definition
|
||||||
|
|||||||
@@ -234,12 +234,12 @@ void view_update_generator::do_abort() noexcept {
|
|||||||
}
|
}
|
||||||
|
|
||||||
vug_logger.info("Terminating background fiber");
|
vug_logger.info("Terminating background fiber");
|
||||||
_db.unplug_view_update_generator();
|
|
||||||
_as.request_abort();
|
_as.request_abort();
|
||||||
_pending_sstables.signal();
|
_pending_sstables.signal();
|
||||||
}
|
}
|
||||||
|
|
||||||
future<> view_update_generator::stop() {
|
future<> view_update_generator::stop() {
|
||||||
|
_db.unplug_view_update_generator();
|
||||||
do_abort();
|
do_abort();
|
||||||
return std::move(_started).then([this] {
|
return std::move(_started).then([this] {
|
||||||
_registration_sem.broken();
|
_registration_sem.broken();
|
||||||
|
|||||||
@@ -96,6 +96,7 @@ struct failure_detector::impl {
|
|||||||
clock& _clock;
|
clock& _clock;
|
||||||
|
|
||||||
clock::interval_t _ping_period;
|
clock::interval_t _ping_period;
|
||||||
|
clock::interval_t _ping_timeout;
|
||||||
|
|
||||||
// Number of workers on each shard.
|
// Number of workers on each shard.
|
||||||
// We use this to decide where to create new workers (we pick a shard with the smallest number of workers).
|
// We use this to decide where to create new workers (we pick a shard with the smallest number of workers).
|
||||||
@@ -138,7 +139,7 @@ struct failure_detector::impl {
|
|||||||
// The unregistering process requires cross-shard operations which we perform on this fiber.
|
// The unregistering process requires cross-shard operations which we perform on this fiber.
|
||||||
future<> _destroy_subscriptions = make_ready_future<>();
|
future<> _destroy_subscriptions = make_ready_future<>();
|
||||||
|
|
||||||
impl(failure_detector& parent, pinger&, clock&, clock::interval_t ping_period);
|
impl(failure_detector& parent, pinger&, clock&, clock::interval_t ping_period, clock::interval_t ping_timeout);
|
||||||
~impl();
|
~impl();
|
||||||
|
|
||||||
// Inform update_endpoint_fiber() about an added/removed endpoint.
|
// Inform update_endpoint_fiber() about an added/removed endpoint.
|
||||||
@@ -174,12 +175,14 @@ struct failure_detector::impl {
|
|||||||
future<> mark(listener* l, pinger::endpoint_id ep, bool alive);
|
future<> mark(listener* l, pinger::endpoint_id ep, bool alive);
|
||||||
};
|
};
|
||||||
|
|
||||||
failure_detector::failure_detector(pinger& pinger, clock& clock, clock::interval_t ping_period)
|
failure_detector::failure_detector(
|
||||||
: _impl(std::make_unique<impl>(*this, pinger, clock, ping_period))
|
pinger& pinger, clock& clock, clock::interval_t ping_period, clock::interval_t ping_timeout)
|
||||||
|
: _impl(std::make_unique<impl>(*this, pinger, clock, ping_period, ping_timeout))
|
||||||
{}
|
{}
|
||||||
|
|
||||||
failure_detector::impl::impl(failure_detector& parent, pinger& pinger, clock& clock, clock::interval_t ping_period)
|
failure_detector::impl::impl(
|
||||||
: _parent(parent), _pinger(pinger), _clock(clock), _ping_period(ping_period) {
|
failure_detector& parent, pinger& pinger, clock& clock, clock::interval_t ping_period, clock::interval_t ping_timeout)
|
||||||
|
: _parent(parent), _pinger(pinger), _clock(clock), _ping_period(ping_period), _ping_timeout(ping_timeout) {
|
||||||
if (this_shard_id() != 0) {
|
if (this_shard_id() != 0) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@@ -536,11 +539,9 @@ future<> endpoint_worker::ping_fiber() noexcept {
|
|||||||
auto start = clock.now();
|
auto start = clock.now();
|
||||||
auto next_ping_start = start + _fd._ping_period;
|
auto next_ping_start = start + _fd._ping_period;
|
||||||
|
|
||||||
// A ping should take significantly less time than _ping_period, but we give it a multiple of ping_period before it times out
|
auto timeout = start + _fd._ping_timeout;
|
||||||
// just in case of transient network partitions.
|
// If there's a listener that's going to timeout soon (before the ping returns), we abort the ping in order to handle
|
||||||
// However, if there's a listener that's going to timeout soon (before the ping returns), we abort the ping in order to handle
|
|
||||||
// the listener (mark it as dead).
|
// the listener (mark it as dead).
|
||||||
auto timeout = start + 3 * _fd._ping_period;
|
|
||||||
for (auto& [threshold, l]: _fd._listeners_liveness) {
|
for (auto& [threshold, l]: _fd._listeners_liveness) {
|
||||||
if (l.endpoint_liveness[_id].alive && last_response + threshold < timeout) {
|
if (l.endpoint_liveness[_id].alive && last_response + threshold < timeout) {
|
||||||
timeout = last_response + threshold;
|
timeout = last_response + threshold;
|
||||||
|
|||||||
@@ -120,14 +120,14 @@ public:
|
|||||||
|
|
||||||
// Every endpoint in the detected set will be periodically pinged every `ping_period`,
|
// Every endpoint in the detected set will be periodically pinged every `ping_period`,
|
||||||
// assuming that the pings return in a timely manner. A ping may take longer than `ping_period`
|
// assuming that the pings return in a timely manner. A ping may take longer than `ping_period`
|
||||||
// before it's aborted (up to a certain multiple of `ping_period`), in which case the next ping
|
// before it's aborted (up to `ping_timeout`), in which case the next ping will start immediately.
|
||||||
// will start immediately.
|
|
||||||
//
|
|
||||||
// `ping_period` should be chosen so that during normal operation, a ping takes significantly
|
|
||||||
// less time than `ping_period` (preferably at least an order of magnitude less).
|
|
||||||
//
|
//
|
||||||
// The passed-in value must be the same on every shard.
|
// The passed-in value must be the same on every shard.
|
||||||
clock::interval_t ping_period
|
clock::interval_t ping_period,
|
||||||
|
|
||||||
|
// Duration after which a ping is aborted, so that next ping can be started
|
||||||
|
// (pings are sent sequentially).
|
||||||
|
clock::interval_t ping_timeout
|
||||||
);
|
);
|
||||||
|
|
||||||
~failure_detector();
|
~failure_detector();
|
||||||
@@ -147,7 +147,7 @@ public:
|
|||||||
// The listener stops being called when the returned subscription is destroyed.
|
// The listener stops being called when the returned subscription is destroyed.
|
||||||
// The subscription must be destroyed before service is stopped.
|
// The subscription must be destroyed before service is stopped.
|
||||||
//
|
//
|
||||||
// `threshold` should be significantly larger than `ping_period`, preferably at least an order of magnitude larger.
|
// `threshold` should be significantly larger than `ping_timeout`, preferably at least an order of magnitude larger.
|
||||||
//
|
//
|
||||||
// Different listeners may use different thresholds, depending on the use case:
|
// Different listeners may use different thresholds, depending on the use case:
|
||||||
// some listeners may want to mark endpoints as dead more aggressively if fast reaction times are important
|
// some listeners may want to mark endpoints as dead more aggressively if fast reaction times are important
|
||||||
|
|||||||
5
dist/common/scripts/scylla_coredump_setup
vendored
5
dist/common/scripts/scylla_coredump_setup
vendored
@@ -62,8 +62,7 @@ ExternalSizeMax=1024G
|
|||||||
[Unit]
|
[Unit]
|
||||||
Description=Save coredump to scylla data directory
|
Description=Save coredump to scylla data directory
|
||||||
Conflicts=umount.target
|
Conflicts=umount.target
|
||||||
Before=scylla-server.service
|
Before=local-fs.target scylla-server.service
|
||||||
After=local-fs.target
|
|
||||||
DefaultDependencies=no
|
DefaultDependencies=no
|
||||||
|
|
||||||
[Mount]
|
[Mount]
|
||||||
@@ -73,7 +72,7 @@ Type=none
|
|||||||
Options=bind
|
Options=bind
|
||||||
|
|
||||||
[Install]
|
[Install]
|
||||||
WantedBy=multi-user.target
|
WantedBy=local-fs.target
|
||||||
'''[1:-1]
|
'''[1:-1]
|
||||||
with open('/etc/systemd/system/var-lib-systemd-coredump.mount', 'w') as f:
|
with open('/etc/systemd/system/var-lib-systemd-coredump.mount', 'w') as f:
|
||||||
f.write(dot_mount)
|
f.write(dot_mount)
|
||||||
|
|||||||
31
dist/common/scripts/scylla_io_setup
vendored
31
dist/common/scripts/scylla_io_setup
vendored
@@ -10,6 +10,7 @@
|
|||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
from scylla_util import *
|
from scylla_util import *
|
||||||
|
import resource
|
||||||
import subprocess
|
import subprocess
|
||||||
import argparse
|
import argparse
|
||||||
import yaml
|
import yaml
|
||||||
@@ -102,6 +103,34 @@ class scylla_cpuinfo:
|
|||||||
else:
|
else:
|
||||||
return len(self._cpu_data["system"])
|
return len(self._cpu_data["system"])
|
||||||
|
|
||||||
|
def configure_iotune_open_fd_limit(shards_count):
|
||||||
|
try:
|
||||||
|
fd_limits = resource.getrlimit(resource.RLIMIT_NOFILE)
|
||||||
|
except (OSError, ValueError) as e:
|
||||||
|
logging.warning("Could not get the limit of count of open file descriptors!")
|
||||||
|
logging.warning("iotune will proceed with the default limit. This may cause problems.")
|
||||||
|
return
|
||||||
|
|
||||||
|
precalculated_fds_count = (10 * shards_count) + 500
|
||||||
|
soft_limit, hard_limit = fd_limits
|
||||||
|
|
||||||
|
if hard_limit == resource.RLIM_INFINITY:
|
||||||
|
# If there is no hard limit, then ensure that soft limit allows enough FDs.
|
||||||
|
soft_limit = max(soft_limit, precalculated_fds_count)
|
||||||
|
else:
|
||||||
|
# If hard_limit is greater than precalculated_fds_count, then set it as soft and as hard limit.
|
||||||
|
required_fds_count = max(hard_limit, precalculated_fds_count)
|
||||||
|
soft_limit = max(soft_limit, required_fds_count)
|
||||||
|
hard_limit = max(hard_limit, required_fds_count)
|
||||||
|
|
||||||
|
try:
|
||||||
|
resource.setrlimit(resource.RLIMIT_NOFILE, (soft_limit, hard_limit))
|
||||||
|
except (OSError, ValueError) as e:
|
||||||
|
logging.error(e)
|
||||||
|
logging.error("Could not set the limit of open file descriptors for iotune!")
|
||||||
|
logging.error(f"Required FDs count: {precalculated_fds_count}, default limit: {fd_limits}!")
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
def run_iotune():
|
def run_iotune():
|
||||||
if "SCYLLA_CONF" in os.environ:
|
if "SCYLLA_CONF" in os.environ:
|
||||||
conf_dir = os.environ["SCYLLA_CONF"]
|
conf_dir = os.environ["SCYLLA_CONF"]
|
||||||
@@ -142,6 +171,8 @@ def run_iotune():
|
|||||||
elif cpudata.smp():
|
elif cpudata.smp():
|
||||||
iotune_args += [ "--smp", str(cpudata.smp()) ]
|
iotune_args += [ "--smp", str(cpudata.smp()) ]
|
||||||
|
|
||||||
|
configure_iotune_open_fd_limit(cpudata.nr_shards())
|
||||||
|
|
||||||
try:
|
try:
|
||||||
subprocess.check_call([bindir() + "/iotune",
|
subprocess.check_call([bindir() + "/iotune",
|
||||||
"--format", "envfile",
|
"--format", "envfile",
|
||||||
|
|||||||
12
dist/common/scripts/scylla_raid_setup
vendored
12
dist/common/scripts/scylla_raid_setup
vendored
@@ -257,19 +257,19 @@ if __name__ == '__main__':
|
|||||||
dev_type = 'realpath'
|
dev_type = 'realpath'
|
||||||
LOGGER.error(f'Failed to detect uuid, using {dev_type}: {mount_dev}')
|
LOGGER.error(f'Failed to detect uuid, using {dev_type}: {mount_dev}')
|
||||||
|
|
||||||
after = 'local-fs.target'
|
after = ''
|
||||||
wants = ''
|
wants = ''
|
||||||
if raid and args.raid_level != '0':
|
if raid and args.raid_level != '0':
|
||||||
after += f' {md_service}'
|
after = wants = 'md_service'
|
||||||
wants = f'\nWants={md_service}'
|
|
||||||
opt_discard = ''
|
opt_discard = ''
|
||||||
if args.online_discard:
|
if args.online_discard:
|
||||||
opt_discard = ',discard'
|
opt_discard = ',discard'
|
||||||
unit_data = f'''
|
unit_data = f'''
|
||||||
[Unit]
|
[Unit]
|
||||||
Description=Scylla data directory
|
Description=Scylla data directory
|
||||||
Before=scylla-server.service
|
Before=local-fs.target scylla-server.service
|
||||||
After={after}{wants}
|
After={after}
|
||||||
|
Wants={wants}
|
||||||
DefaultDependencies=no
|
DefaultDependencies=no
|
||||||
|
|
||||||
[Mount]
|
[Mount]
|
||||||
@@ -279,7 +279,7 @@ Type=xfs
|
|||||||
Options=noatime{opt_discard}
|
Options=noatime{opt_discard}
|
||||||
|
|
||||||
[Install]
|
[Install]
|
||||||
WantedBy=multi-user.target
|
WantedBy=local-fs.target
|
||||||
'''[1:-1]
|
'''[1:-1]
|
||||||
with open(f'/etc/systemd/system/{mntunit_bn}', 'w') as f:
|
with open(f'/etc/systemd/system/{mntunit_bn}', 'w') as f:
|
||||||
f.write(unit_data)
|
f.write(unit_data)
|
||||||
|
|||||||
4
dist/docker/debian/build_docker.sh
vendored
4
dist/docker/debian/build_docker.sh
vendored
@@ -64,7 +64,6 @@ bcp "${packages[@]}" packages/
|
|||||||
|
|
||||||
bcp dist/docker/etc etc/
|
bcp dist/docker/etc etc/
|
||||||
bcp dist/docker/scylla-housekeeping-service.sh /scylla-housekeeping-service.sh
|
bcp dist/docker/scylla-housekeeping-service.sh /scylla-housekeeping-service.sh
|
||||||
bcp dist/docker/sshd-service.sh /sshd-service.sh
|
|
||||||
|
|
||||||
bcp dist/docker/scyllasetup.py /scyllasetup.py
|
bcp dist/docker/scyllasetup.py /scyllasetup.py
|
||||||
bcp dist/docker/commandlineparser.py /commandlineparser.py
|
bcp dist/docker/commandlineparser.py /commandlineparser.py
|
||||||
@@ -74,10 +73,11 @@ bcp dist/docker/scylla_bashrc /scylla_bashrc
|
|||||||
|
|
||||||
run apt-get -y clean expire-cache
|
run apt-get -y clean expire-cache
|
||||||
run apt-get -y update
|
run apt-get -y update
|
||||||
|
run apt-get -y upgrade
|
||||||
run apt-get -y install dialog apt-utils
|
run apt-get -y install dialog apt-utils
|
||||||
run bash -ec "echo 'debconf debconf/frontend select Noninteractive' | debconf-set-selections"
|
run bash -ec "echo 'debconf debconf/frontend select Noninteractive' | debconf-set-selections"
|
||||||
run bash -ec "rm -rf /etc/rsyslog.conf"
|
run bash -ec "rm -rf /etc/rsyslog.conf"
|
||||||
run apt-get -y install hostname supervisor openssh-server openssh-client openjdk-11-jre-headless python2 python3 python3-yaml curl rsyslog sudo
|
run apt-get -y install hostname supervisor openjdk-11-jre-headless python2 python3 python3-yaml curl rsyslog sudo systemd
|
||||||
run bash -ec "echo LANG=C.UTF-8 > /etc/default/locale"
|
run bash -ec "echo LANG=C.UTF-8 > /etc/default/locale"
|
||||||
run bash -ec "dpkg -i packages/*.deb"
|
run bash -ec "dpkg -i packages/*.deb"
|
||||||
run apt-get -y clean all
|
run apt-get -y clean all
|
||||||
|
|||||||
@@ -1,6 +0,0 @@
|
|||||||
[program:sshd]
|
|
||||||
command=/sshd-service.sh
|
|
||||||
stdout_logfile=/dev/stdout
|
|
||||||
stdout_logfile_maxbytes=0
|
|
||||||
stderr_logfile=/dev/stderr
|
|
||||||
stderr_logfile_maxbytes=0
|
|
||||||
3
dist/docker/scyllasetup.py
vendored
3
dist/docker/scyllasetup.py
vendored
@@ -75,7 +75,8 @@ class ScyllaSetup:
|
|||||||
hostname = self._listenAddress
|
hostname = self._listenAddress
|
||||||
else:
|
else:
|
||||||
hostname = subprocess.check_output(['hostname', '-i']).decode('ascii').strip()
|
hostname = subprocess.check_output(['hostname', '-i']).decode('ascii').strip()
|
||||||
with open("%s/.cqlshrc" % home, "w") as cqlshrc:
|
self._run(["mkdir", "-p", "%s/.cassandra" % home])
|
||||||
|
with open("%s/.cassandra/cqlshrc" % home, "w") as cqlshrc:
|
||||||
cqlshrc.write("[connection]\nhostname = %s\n" % hostname)
|
cqlshrc.write("[connection]\nhostname = %s\n" % hostname)
|
||||||
|
|
||||||
def set_housekeeping(self):
|
def set_housekeeping(self):
|
||||||
|
|||||||
15
dist/docker/sshd-service.sh
vendored
15
dist/docker/sshd-service.sh
vendored
@@ -1,15 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
|
|
||||||
if [ ! -f /run/sshd ]; then
|
|
||||||
mkdir -p /run/sshd
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [ ! -f /etc/ssh/ssh_host_ed25519_key ]; then
|
|
||||||
ssh-keygen -t ed25519 -f /etc/ssh/ssh_host_ed25519_key -N ''
|
|
||||||
fi
|
|
||||||
if [ ! -f /etc/ssh/ssh_host_rsa_key ]; then
|
|
||||||
ssh-keygen -t rsa -b 4096 -f /etc/ssh/ssh_host_rsa_key -N ''
|
|
||||||
fi
|
|
||||||
|
|
||||||
/usr/sbin/sshd -D
|
|
||||||
|
|
||||||
@@ -91,7 +91,7 @@ redirects: setup
|
|||||||
# Preview commands
|
# Preview commands
|
||||||
.PHONY: preview
|
.PHONY: preview
|
||||||
preview: setup
|
preview: setup
|
||||||
$(POETRY) run sphinx-autobuild -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml --host $(PREVIEW_HOST) --port 5500 --ignore '_data/*'
|
$(POETRY) run sphinx-autobuild -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml --host $(PREVIEW_HOST) --port 5500 --ignore *.csv --ignore *.yaml
|
||||||
|
|
||||||
.PHONY: multiversionpreview
|
.PHONY: multiversionpreview
|
||||||
multiversionpreview: multiversion
|
multiversionpreview: multiversion
|
||||||
|
|||||||
@@ -118,6 +118,7 @@ class AMIVersionsTemplateDirective(Directive):
|
|||||||
option_spec = {
|
option_spec = {
|
||||||
"version": directives.unchanged,
|
"version": directives.unchanged,
|
||||||
"exclude": directives.unchanged,
|
"exclude": directives.unchanged,
|
||||||
|
"only_latest": directives.flag,
|
||||||
}
|
}
|
||||||
|
|
||||||
def _extract_version_from_filename(self, filename):
|
def _extract_version_from_filename(self, filename):
|
||||||
@@ -144,10 +145,28 @@ class AMIVersionsTemplateDirective(Directive):
|
|||||||
version = self._extract_version_from_filename(filename)
|
version = self._extract_version_from_filename(filename)
|
||||||
return tuple(map(int, version.split("."))) if version else (0,)
|
return tuple(map(int, version.split("."))) if version else (0,)
|
||||||
|
|
||||||
|
def _get_current_version(self, current_version, stable_version):
|
||||||
|
prefix = 'branch-'
|
||||||
|
version = current_version
|
||||||
|
|
||||||
|
if current_version.startswith(prefix):
|
||||||
|
version = current_version
|
||||||
|
elif not stable_version.startswith(prefix):
|
||||||
|
LOGGER.error("Invalid stable_version format in conf.py. It should start with 'branch-'")
|
||||||
|
else:
|
||||||
|
version = stable_version
|
||||||
|
|
||||||
|
return version.replace(prefix, '')
|
||||||
|
|
||||||
def run(self):
|
def run(self):
|
||||||
app = self.state.document.settings.env.app
|
app = self.state.document.settings.env.app
|
||||||
version_pattern = self.options.get("version", "")
|
current_version = os.environ.get('SPHINX_MULTIVERSION_NAME', '')
|
||||||
|
stable_version = app.config.smv_latest_version
|
||||||
|
|
||||||
|
version_pattern = self._get_current_version(current_version, stable_version)
|
||||||
|
version_options = self.options.get("version", "")
|
||||||
|
if version_options:
|
||||||
|
version_pattern = version_options
|
||||||
exclude_patterns = self.options.get("exclude", "").split(",")
|
exclude_patterns = self.options.get("exclude", "").split(",")
|
||||||
|
|
||||||
download_directory = os.path.join(
|
download_directory = os.path.join(
|
||||||
@@ -169,6 +188,8 @@ class AMIVersionsTemplateDirective(Directive):
|
|||||||
LOGGER.warning(
|
LOGGER.warning(
|
||||||
f"No files match in directory '{download_directory}' with version pattern '{version_pattern}'."
|
f"No files match in directory '{download_directory}' with version pattern '{version_pattern}'."
|
||||||
)
|
)
|
||||||
|
elif "only_latest" in self.options:
|
||||||
|
files = [files[0]]
|
||||||
|
|
||||||
output = []
|
output = []
|
||||||
for file in files:
|
for file in files:
|
||||||
|
|||||||
@@ -1,14 +1,23 @@
|
|||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
import yaml
|
import yaml
|
||||||
|
from typing import Any, Dict, List
|
||||||
|
|
||||||
|
import jinja2
|
||||||
|
|
||||||
|
from sphinx import addnodes
|
||||||
from sphinx.application import Sphinx
|
from sphinx.application import Sphinx
|
||||||
from sphinxcontrib.datatemplates.directive import DataTemplateYAML
|
from sphinx.directives import ObjectDescription
|
||||||
|
from sphinx.util import logging, status_iterator, ws_re
|
||||||
|
from sphinx.util.docfields import Field
|
||||||
|
from sphinx.util.docutils import switch_source_input, SphinxDirective
|
||||||
|
from sphinx.util.nodes import make_id, nested_parse_with_titles
|
||||||
|
from sphinx.jinja2glue import BuiltinTemplateLoader
|
||||||
|
from docutils import nodes
|
||||||
from docutils.parsers.rst import directives
|
from docutils.parsers.rst import directives
|
||||||
|
from docutils.statemachine import StringList
|
||||||
|
|
||||||
CONFIG_FILE_PATH = "../db/config.cc"
|
logger = logging.getLogger(__name__)
|
||||||
CONFIG_HEADER_FILE_PATH = "../db/config.hh"
|
|
||||||
DESTINATION_PATH = "_data/db_config.yaml"
|
|
||||||
|
|
||||||
|
|
||||||
class DBConfigParser:
|
class DBConfigParser:
|
||||||
|
|
||||||
@@ -47,42 +56,18 @@ class DBConfigParser:
|
|||||||
"""
|
"""
|
||||||
COMMENT_PATTERN = r"/\*.*?\*/|//.*?$"
|
COMMENT_PATTERN = r"/\*.*?\*/|//.*?$"
|
||||||
|
|
||||||
def __init__(self, config_file_path, config_header_file_path, destination_path):
|
all_properties = {}
|
||||||
|
|
||||||
|
def __init__(self, config_file_path, config_header_file_path):
|
||||||
self.config_file_path = config_file_path
|
self.config_file_path = config_file_path
|
||||||
self.config_header_file_path = config_header_file_path
|
self.config_header_file_path = config_header_file_path
|
||||||
self.destination_path = destination_path
|
|
||||||
|
|
||||||
def _create_yaml_file(self, destination, data):
|
|
||||||
current_data = None
|
|
||||||
|
|
||||||
try:
|
|
||||||
with open(destination, "r") as file:
|
|
||||||
current_data = yaml.safe_load(file)
|
|
||||||
except FileNotFoundError:
|
|
||||||
pass
|
|
||||||
|
|
||||||
if current_data != data:
|
|
||||||
os.makedirs(os.path.dirname(destination), exist_ok=True)
|
|
||||||
with open(destination, "w") as file:
|
|
||||||
yaml.dump(data, file)
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def _clean_description(description):
|
|
||||||
return (
|
|
||||||
description.replace("\\n", "")
|
|
||||||
.replace('<', '<')
|
|
||||||
.replace('>', '>')
|
|
||||||
.replace("\n", "<br>")
|
|
||||||
.replace("\\t", "- ")
|
|
||||||
.replace('"', "")
|
|
||||||
)
|
|
||||||
|
|
||||||
def _clean_comments(self, content):
|
def _clean_comments(self, content):
|
||||||
return re.sub(self.COMMENT_PATTERN, "", content, flags=re.DOTALL | re.MULTILINE)
|
return re.sub(self.COMMENT_PATTERN, "", content, flags=re.DOTALL | re.MULTILINE)
|
||||||
|
|
||||||
def _parse_group(self, group_match, config_group_content):
|
def _parse_group(self, group_match, config_group_content):
|
||||||
group_name = group_match.group(1).strip()
|
group_name = group_match.group(1).strip()
|
||||||
group_description = self._clean_description(group_match.group(2).strip()) if group_match.group(2) else ""
|
group_description = group_match.group(2).strip() if group_match.group(2) else ""
|
||||||
|
|
||||||
current_group = {
|
current_group = {
|
||||||
"name": group_name,
|
"name": group_name,
|
||||||
@@ -111,14 +96,16 @@ class DBConfigParser:
|
|||||||
config_matches = re.findall(self.CONFIG_CC_REGEX_PATTERN, content, re.DOTALL)
|
config_matches = re.findall(self.CONFIG_CC_REGEX_PATTERN, content, re.DOTALL)
|
||||||
|
|
||||||
for match in config_matches:
|
for match in config_matches:
|
||||||
|
name = match[1].strip()
|
||||||
property_data = {
|
property_data = {
|
||||||
"name": match[1].strip(),
|
"name": name,
|
||||||
"value_status": match[4].strip(),
|
"value_status": match[4].strip(),
|
||||||
"default": match[5].strip(),
|
"default": match[5].strip(),
|
||||||
"liveness": "True" if match[3] else "False",
|
"liveness": "True" if match[3] else "False",
|
||||||
"description": self._clean_description(match[6].strip()),
|
"description": match[6].strip(),
|
||||||
}
|
}
|
||||||
properties.append(property_data)
|
properties.append(property_data)
|
||||||
|
DBConfigParser.all_properties[name] = property_data
|
||||||
|
|
||||||
return properties
|
return properties
|
||||||
|
|
||||||
@@ -135,7 +122,7 @@ class DBConfigParser:
|
|||||||
if property_data["name"] == property_key:
|
if property_data["name"] == property_key:
|
||||||
property_data["type"] = match[0].strip()
|
property_data["type"] = match[0].strip()
|
||||||
|
|
||||||
def _parse_db_properties(self):
|
def parse(self):
|
||||||
groups = []
|
groups = []
|
||||||
|
|
||||||
with open(self.config_file_path, "r", encoding='utf-8') as file:
|
with open(self.config_file_path, "r", encoding='utf-8') as file:
|
||||||
@@ -158,26 +145,170 @@ class DBConfigParser:
|
|||||||
|
|
||||||
return groups
|
return groups
|
||||||
|
|
||||||
def run(self, app: Sphinx):
|
@classmethod
|
||||||
dest_path = os.path.join(app.builder.srcdir, self.destination_path)
|
def get(cls, name: str):
|
||||||
parsed_properties = self._parse_db_properties()
|
return DBConfigParser.all_properties[name]
|
||||||
self._create_yaml_file(dest_path, parsed_properties)
|
|
||||||
|
|
||||||
|
|
||||||
class DBConfigTemplateDirective(DataTemplateYAML):
|
def readable_desc(description: str) -> str:
|
||||||
|
return (
|
||||||
option_spec = DataTemplateYAML.option_spec.copy()
|
description.replace("\\n", "")
|
||||||
option_spec["value_status"] = directives.unchanged_required
|
.replace('<', '<')
|
||||||
|
.replace('>', '>')
|
||||||
def _make_context(self, data, config, env):
|
.replace("\n", "<br>")
|
||||||
context = super()._make_context(data, config, env)
|
.replace("\\t", "- ")
|
||||||
context["value_status"] = self.options.get("value_status")
|
.replace('"', "")
|
||||||
return context
|
|
||||||
|
|
||||||
|
|
||||||
def setup(app: Sphinx):
|
|
||||||
db_parser = DBConfigParser(
|
|
||||||
CONFIG_FILE_PATH, CONFIG_HEADER_FILE_PATH, DESTINATION_PATH
|
|
||||||
)
|
)
|
||||||
app.connect("builder-inited", db_parser.run)
|
|
||||||
app.add_directive("scylladb_config_template", DBConfigTemplateDirective)
|
|
||||||
|
def maybe_add_filters(builder):
|
||||||
|
env = builder.templates.environment
|
||||||
|
if 'readable_desc' not in env.filters:
|
||||||
|
env.filters['readable_desc'] = readable_desc
|
||||||
|
|
||||||
|
|
||||||
|
class ConfigOption(ObjectDescription):
|
||||||
|
has_content = True
|
||||||
|
required_arguments = 1
|
||||||
|
optional_arguments = 0
|
||||||
|
final_argument_whitespace = False
|
||||||
|
|
||||||
|
# TODO: instead of overriding transform_content(), render option properties
|
||||||
|
# as a field list.
|
||||||
|
doc_field_types = [
|
||||||
|
Field('type',
|
||||||
|
label='Type',
|
||||||
|
has_arg=False,
|
||||||
|
names=('type',)),
|
||||||
|
Field('default',
|
||||||
|
label='Default value',
|
||||||
|
has_arg=False,
|
||||||
|
names=('default',)),
|
||||||
|
Field('liveness',
|
||||||
|
label='Liveness',
|
||||||
|
has_arg=False,
|
||||||
|
names=('liveness',)),
|
||||||
|
]
|
||||||
|
|
||||||
|
def handle_signature(self,
|
||||||
|
sig: str,
|
||||||
|
signode: addnodes.desc_signature) -> str:
|
||||||
|
signode.clear()
|
||||||
|
signode += addnodes.desc_name(sig, sig)
|
||||||
|
# normalize whitespace like XRefRole does
|
||||||
|
return ws_re.sub(' ', sig)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def env(self):
|
||||||
|
document = self.state.document
|
||||||
|
return document.settings.env
|
||||||
|
|
||||||
|
def before_content(self) -> None:
|
||||||
|
maybe_add_filters(self.env.app.builder)
|
||||||
|
|
||||||
|
def _render(self, name) -> str:
|
||||||
|
item = DBConfigParser.get(name)
|
||||||
|
if item is None:
|
||||||
|
raise self.error(f'Option "{name}" not found!')
|
||||||
|
builder = self.env.app.builder
|
||||||
|
template = self.config.scylladb_cc_properties_option_tmpl
|
||||||
|
return builder.templates.render(template, item)
|
||||||
|
|
||||||
|
def transform_content(self,
|
||||||
|
contentnode: addnodes.desc_content) -> None:
|
||||||
|
name = self.arguments[0]
|
||||||
|
# the source is always None here
|
||||||
|
_, lineno = self.get_source_info()
|
||||||
|
source = f'scylla_config:{lineno}:<{name}>'
|
||||||
|
fields = StringList(self._render(name).splitlines(),
|
||||||
|
source=source, parent_offset=lineno)
|
||||||
|
with switch_source_input(self.state, fields):
|
||||||
|
self.state.nested_parse(fields, 0, contentnode)
|
||||||
|
|
||||||
|
def add_target_and_index(self,
|
||||||
|
name: str,
|
||||||
|
sig: str,
|
||||||
|
signode: addnodes.desc_signature) -> None:
|
||||||
|
node_id = make_id(self.env, self.state.document, self.objtype, name)
|
||||||
|
signode['ids'].append(node_id)
|
||||||
|
self.state.document.note_explicit_target(signode)
|
||||||
|
entry = f'{name}; configuration option'
|
||||||
|
self.indexnode['entries'].append(('pair', entry, node_id, '', None))
|
||||||
|
std = self.env.get_domain('std')
|
||||||
|
std.note_object(self.objtype, name, node_id, location=signode)
|
||||||
|
|
||||||
|
|
||||||
|
class ConfigOptionList(SphinxDirective):
|
||||||
|
has_content = False
|
||||||
|
required_arguments = 2
|
||||||
|
optional_arguments = 0
|
||||||
|
final_argument_whitespace = True
|
||||||
|
option_spec = {
|
||||||
|
'template': directives.path,
|
||||||
|
'value_status': directives.unchanged_required,
|
||||||
|
}
|
||||||
|
|
||||||
|
@property
|
||||||
|
def env(self):
|
||||||
|
document = self.state.document
|
||||||
|
return document.settings.env
|
||||||
|
|
||||||
|
def _resolve_src_path(self, path: str) -> str:
|
||||||
|
rel_filename, filename = self.env.relfn2path(path)
|
||||||
|
self.env.note_dependency(filename)
|
||||||
|
return filename
|
||||||
|
|
||||||
|
def _render(self, context: Dict[str, Any]) -> str:
|
||||||
|
builder = self.env.app.builder
|
||||||
|
template = self.options.get('template')
|
||||||
|
if template is None:
|
||||||
|
self.error(f'Option "template" not specified!')
|
||||||
|
return builder.templates.render(template, context)
|
||||||
|
|
||||||
|
def _make_context(self) -> Dict[str, Any]:
|
||||||
|
header = self._resolve_src_path(self.arguments[0])
|
||||||
|
source = self._resolve_src_path(self.arguments[1])
|
||||||
|
db_parser = DBConfigParser(source, header)
|
||||||
|
value_status = self.options.get("value_status")
|
||||||
|
return dict(data=db_parser.parse(),
|
||||||
|
value_status=value_status)
|
||||||
|
|
||||||
|
def run(self) -> List[nodes.Node]:
|
||||||
|
maybe_add_filters(self.env.app.builder)
|
||||||
|
rendered = self._render(self._make_context())
|
||||||
|
contents = StringList(rendered.splitlines())
|
||||||
|
node = nodes.section()
|
||||||
|
node.document = self.state.document
|
||||||
|
nested_parse_with_titles(self.state, contents, node)
|
||||||
|
return node.children
|
||||||
|
|
||||||
|
|
||||||
|
def setup(app: Sphinx) -> Dict[str, Any]:
|
||||||
|
app.add_config_value(
|
||||||
|
'scylladb_cc_properties_option_tmpl',
|
||||||
|
default='db_option.tmpl',
|
||||||
|
rebuild='html',
|
||||||
|
types=[str])
|
||||||
|
|
||||||
|
app.add_object_type(
|
||||||
|
'confgroup',
|
||||||
|
'confgroup',
|
||||||
|
objname='configuration group',
|
||||||
|
indextemplate='pair: %s; configuration group',
|
||||||
|
doc_field_types=[
|
||||||
|
Field('example',
|
||||||
|
label='Example',
|
||||||
|
has_arg=False)
|
||||||
|
])
|
||||||
|
app.add_object_type(
|
||||||
|
'confval',
|
||||||
|
'confval',
|
||||||
|
objname='configuration option')
|
||||||
|
app.add_directive_to_domain('std', 'confval', ConfigOption, override=True)
|
||||||
|
app.add_directive('scylladb_config_list', ConfigOptionList)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"version": "0.1",
|
||||||
|
"parallel_read_safe": True,
|
||||||
|
"parallel_write_safe": True,
|
||||||
|
}
|
||||||
|
|||||||
25
docs/_ext/scylladb_include_flag.py
Normal file
25
docs/_ext/scylladb_include_flag.py
Normal file
@@ -0,0 +1,25 @@
|
|||||||
|
from sphinx.directives.other import Include
|
||||||
|
from docutils.parsers.rst import directives
|
||||||
|
|
||||||
|
class IncludeFlagDirective(Include):
|
||||||
|
option_spec = Include.option_spec.copy()
|
||||||
|
option_spec['base_path'] = directives.unchanged
|
||||||
|
|
||||||
|
def run(self):
|
||||||
|
env = self.state.document.settings.env
|
||||||
|
base_path = self.options.get('base_path', '_common')
|
||||||
|
|
||||||
|
if env.app.tags.has('enterprise'):
|
||||||
|
self.arguments[0] = base_path + "_enterprise/" + self.arguments[0]
|
||||||
|
else:
|
||||||
|
self.arguments[0] = base_path + "/" + self.arguments[0]
|
||||||
|
return super().run()
|
||||||
|
|
||||||
|
def setup(app):
|
||||||
|
app.add_directive('scylladb_include_flag', IncludeFlagDirective, override=True)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"version": "0.1",
|
||||||
|
"parallel_read_safe": True,
|
||||||
|
"parallel_write_safe": True,
|
||||||
|
}
|
||||||
14
docs/_static/css/custom.css
vendored
14
docs/_static/css/custom.css
vendored
@@ -17,10 +17,22 @@
|
|||||||
}
|
}
|
||||||
|
|
||||||
.content blockquote li p {
|
.content blockquote li p {
|
||||||
margin-bottom: 10px;
|
margin-bottom: 5px;
|
||||||
}
|
}
|
||||||
|
|
||||||
h3 .pre {
|
h3 .pre {
|
||||||
font-size: 16px;
|
font-size: 16px;
|
||||||
font-weight: bold;
|
font-weight: bold;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
hr {
|
||||||
|
max-width: 100%;
|
||||||
|
}
|
||||||
|
|
||||||
|
dl dt:hover > a.headerlink {
|
||||||
|
visibility: visible;
|
||||||
|
}
|
||||||
|
|
||||||
|
dl.confval {
|
||||||
|
border-bottom: 1px solid #cacaca;
|
||||||
|
}
|
||||||
|
|||||||
14
docs/_templates/db_config.tmpl
vendored
14
docs/_templates/db_config.tmpl
vendored
@@ -8,22 +8,12 @@
|
|||||||
{% if group.description %}
|
{% if group.description %}
|
||||||
.. raw:: html
|
.. raw:: html
|
||||||
|
|
||||||
<p>{{ group.description }}</p>
|
<p>{{ group.description | readable_desc }}</p>
|
||||||
{% endif %}
|
{% endif %}
|
||||||
|
|
||||||
{% for item in group.properties %}
|
{% for item in group.properties %}
|
||||||
{% if item.value_status == value_status %}
|
{% if item.value_status == value_status %}
|
||||||
``{{ item.name }}``
|
.. confval:: {{ item.name }}
|
||||||
{{ '=' * (item.name|length + 4) }}
|
|
||||||
|
|
||||||
.. raw:: html
|
|
||||||
|
|
||||||
<p>{{ item.description }}</p>
|
|
||||||
|
|
||||||
{% if item.type %}* **Type:** ``{{ item.type }}``{% endif %}
|
|
||||||
{% if item.default %}* **Default value:** ``{{ item.default }}``{% endif %}
|
|
||||||
{% if item.liveness %}* **Liveness** :term:`* <Liveness>` **:** ``{{ item.liveness }}``{% endif %}
|
|
||||||
|
|
||||||
{% endif %}
|
{% endif %}
|
||||||
{% endfor %}
|
{% endfor %}
|
||||||
{% endif %}
|
{% endif %}
|
||||||
|
|||||||
7
docs/_templates/db_option.tmpl
vendored
Normal file
7
docs/_templates/db_option.tmpl
vendored
Normal file
@@ -0,0 +1,7 @@
|
|||||||
|
.. raw:: html
|
||||||
|
|
||||||
|
<p>{{ description | readable_desc }}</p>
|
||||||
|
|
||||||
|
{% if type %}* **Type:** ``{{ type }}``{% endif %}
|
||||||
|
{% if default %}* **Default value:** ``{{ default }}``{% endif %}
|
||||||
|
{% if liveness %}* **Liveness** :term:`* <Liveness>` **:** ``{{ liveness }}``{% endif %}
|
||||||
@@ -1,2 +1,115 @@
|
|||||||
### a dictionary of redirections
|
### a dictionary of redirections
|
||||||
#old path: new path
|
#old path: new path
|
||||||
|
|
||||||
|
# Moving pages from the install-scylla folder
|
||||||
|
|
||||||
|
/stable/getting-started/install-scylla/scylla-web-installer.html: /stable/getting-started/installation-common/scylla-web-installer.html
|
||||||
|
/stable/getting-started/install-scylla/unified-installer.html: /stable/getting-started/installation-common/unified-installer.html
|
||||||
|
/stable/getting-started/install-scylla/air-gapped-install.html: /stable/getting-started/installation-common/air-gapped-install.html
|
||||||
|
/stable/getting-started/install-scylla/disable-housekeeping.html: /stable/getting-started/installation-common/disable-housekeeping.html
|
||||||
|
/stable/getting-started/install-scylla/dev-mod.html: /stable/getting-started/installation-common/dev-mod.html
|
||||||
|
/stable/getting-started/install-scylla/config-commands.html: /stable/getting-started/config-commands.html
|
||||||
|
|
||||||
|
# Removed the outdated upgrade guides
|
||||||
|
|
||||||
|
/stable/upgrade/upgrade-opensource/upgrade-guide-from-ubuntu-14-to-16.html: /stable/upgrade/index.html
|
||||||
|
/stable/upgrade/upgrade-opensource/upgrade-guide-from-2.x.y-to-2.x.z/index.html: /stable/upgrade/index.html
|
||||||
|
/stable/upgrade/upgrade-opensource/upgrade-guide-from-2.x.y-to-2.x.z/upgrade-guide-from-2.x.y-to-2.x.z-rpm.html: /stable/upgrade/index.html
|
||||||
|
/stable/upgrade/upgrade-opensource/upgrade-guide-from-2.x.y-to-2.x.z/upgrade-guide-from-2.x.y-to-2.x.z-ubuntu.html: /stable/upgrade/index.html
|
||||||
|
/stable/upgrade/upgrade-opensource/upgrade-guide-from-2.x.y-to-2.x.z/upgrade-guide-from-2.x.y-to-2.x.z-debian.html: /stable/upgrade/index.html
|
||||||
|
/stable/upgrade/upgrade-opensource/upgrade-guide-from-2.1-to-2.2/index.html: /stable/upgrade/index.html
|
||||||
|
/stable/upgrade/upgrade-opensource/upgrade-guide-from-2.1-to-2.2/upgrade-guide-from-2.1-to-2.2-rpm.html: /stable/upgrade/index.html
|
||||||
|
/stable/upgrade/upgrade-opensource/upgrade-guide-from-2.1-to-2.2/upgrade-guide-from-2.1-to-2.2-ubuntu.html: /stable/upgrade/index.html
|
||||||
|
/stable/upgrade/upgrade-opensource/upgrade-guide-from-2.1-to-2.2/upgrade-guide-from-2.1-to-2.2-debian.html: /stable/upgrade/index.html
|
||||||
|
/stable/upgrade/upgrade-opensource/upgrade-guide-from-2.1-to-2.2/metric-update-2.1-to-2.2.html: /stable/upgrade/index.html
|
||||||
|
/stable/upgrade/upgrade-opensource/upgrade-guide-from-2.2-to-2.3/index.html: /stable/upgrade/index.html
|
||||||
|
/stable/upgrade/upgrade-opensource/upgrade-guide-from-2.2-to-2.3/upgrade-guide-from-2.2-to-2.3-rpm.html: /stable/upgrade/index.html
|
||||||
|
/stable/upgrade/upgrade-opensource/upgrade-guide-from-2.2-to-2.3/upgrade-guide-from-2.2-to-2.3-ubuntu.html: /stable/upgrade/index.html
|
||||||
|
/stable/upgrade/upgrade-opensource/upgrade-guide-from-2.2-to-2.3/upgrade-guide-from-2.2-to-2.3-ubuntu-16-04.html: /stable/upgrade/index.html
|
||||||
|
/stable/upgrade/upgrade-opensource/upgrade-guide-from-2.2-to-2.3/upgrade-guide-from-2.2-to-2.3-debian.html: /stable/upgrade/index.html
|
||||||
|
/stable/upgrade/upgrade-opensource/upgrade-guide-from-2.2-to-2.3/metric-update-2.2-to-2.3.html: /stable/upgrade/index.html
|
||||||
|
/stable/upgrade/upgrade-opensource/upgrade-guide-from-2.3-to-3.0/index.html: /stable/upgrade/index.html
|
||||||
|
/stable/upgrade/upgrade-opensource/upgrade-guide-from-2.3-to-3.0/upgrade-guide-from-2.3-to-3.0-rpm.html: /stable/upgrade/index.html
|
||||||
|
/stable/upgrade/upgrade-opensource/upgrade-guide-from-2.3-to-3.0/upgrade-guide-from-2.3-to-3.0-ubuntu.html: /stable/upgrade/index.html
|
||||||
|
/stable/upgrade/upgrade-opensource/upgrade-guide-from-2.3-to-3.0/upgrade-guide-from-2.3-to-3.0-ubuntu-16-04.html: /stable/upgrade/index.html
|
||||||
|
/stable/upgrade/upgrade-opensource/upgrade-guide-from-2.3-to-3.0/upgrade-guide-from-2.3-to-3.0-ubuntu-18-04.html: /stable/upgrade/index.html
|
||||||
|
/stable/upgrade/upgrade-opensource/upgrade-guide-from-2.3-to-3.0/upgrade-guide-from-2.3-to-3.0-debian.html: /stable/upgrade/index.html
|
||||||
|
/stable/upgrade/upgrade-opensource/upgrade-guide-from-2.3-to-3.0/metric-update-2.3-to-3.0.html: /stable/upgrade/index.html
|
||||||
|
/stable/upgrade/upgrade-opensource/upgrade-guide-from-3.0-to-3.1/index.html: /stable/upgrade/index.html
|
||||||
|
/stable/upgrade/upgrade-opensource/upgrade-guide-from-3.0-to-3.1/upgrade-guide-from-3.0-to-3.1-rpm.html: /stable/upgrade/index.html
|
||||||
|
/stable/upgrade/upgrade-opensource/upgrade-guide-from-3.0-to-3.1/upgrade-guide-from-3.0-to-3.1-ubuntu-16-04.html: /stable/upgrade/index.html
|
||||||
|
/stable/upgrade/upgrade-opensource/upgrade-guide-from-3.0-to-3.1/upgrade-guide-from-3.0-to-3.1-ubuntu-18-04.html: /stable/upgrade/index.html
|
||||||
|
/stable/upgrade/upgrade-opensource/upgrade-guide-from-3.0-to-3.1/upgrade-guide-from-3.0-to-3.1-debian.html: /stable/upgrade/index.html
|
||||||
|
/stable/upgrade/upgrade-opensource/upgrade-guide-from-3.0-to-3.1/metric-update-3.0-to-3.1.html: /stable/upgrade/index.html
|
||||||
|
/stable/upgrade/upgrade-opensource/upgrade-guide-from-3.1-to-3.2/index.html: /stable/upgrade/index.html
|
||||||
|
/stable/upgrade/upgrade-opensource/upgrade-guide-from-3.1-to-3.2/upgrade-guide-from-3.1-to-3.2-rpm.html: /stable/upgrade/index.html
|
||||||
|
/stable/upgrade/upgrade-opensource/upgrade-guide-from-3.1-to-3.2/upgrade-guide-from-3.1-to-3.2-ubuntu-16-04.html: /stable/upgrade/index.html
|
||||||
|
/stable/upgrade/upgrade-opensource/upgrade-guide-from-3.1-to-3.2/upgrade-guide-from-3.1-to-3.2-ubuntu-18-04.html: /stable/upgrade/index.html
|
||||||
|
/stable/upgrade/upgrade-opensource/upgrade-guide-from-3.1-to-3.2/upgrade-guide-from-3.1-to-3.2-debian.html: /stable/upgrade/index.html
|
||||||
|
/stable/upgrade/upgrade-opensource/upgrade-guide-from-3.1-to-3.2/metric-update-3.1-to-3.2.html: /stable/upgrade/index.html
|
||||||
|
/stable/upgrade/upgrade-opensource/upgrade-guide-from-3.2-to-3.3/index.html: /stable/upgrade/index.html
|
||||||
|
/stable/upgrade/upgrade-opensource/upgrade-guide-from-3.2-to-3.3/upgrade-guide-from-3.2-to-3.3-rpm.html: /stable/upgrade/index.html
|
||||||
|
/stable/upgrade/upgrade-opensource/upgrade-guide-from-3.2-to-3.3/upgrade-guide-from-3.2-to-3.3-ubuntu-16-04.html: /stable/upgrade/index.html
|
||||||
|
/stable/upgrade/upgrade-opensource/upgrade-guide-from-3.2-to-3.3/upgrade-guide-from-3.2-to-3.3-ubuntu-18-04.html: /stable/upgrade/index.html
|
||||||
|
/stable/upgrade/upgrade-opensource/upgrade-guide-from-3.2-to-3.3/upgrade-guide-from-3.2-to-3.3-debian.html: /stable/upgrade/index.html
|
||||||
|
/stable/upgrade/upgrade-opensource/upgrade-guide-from-3.2-to-3.3/metric-update-3.2-to-3.3.html: /stable/upgrade/index.html
|
||||||
|
/stable/upgrade/upgrade-opensource/upgrade-guide-from-3.3-to-4.0/index.html: /stable/upgrade/index.html
|
||||||
|
/stable/upgrade/upgrade-opensource/upgrade-guide-from-3.3-to-4.0/upgrade-guide-from-3.3-to-4.0-rpm.html: /stable/upgrade/index.html
|
||||||
|
/stable/upgrade/upgrade-opensource/upgrade-guide-from-3.3-to-4.0/upgrade-guide-from-3.3-to-4.0-ubuntu-16-04.html: /stable/upgrade/index.html
|
||||||
|
/stable/upgrade/upgrade-opensource/upgrade-guide-from-3.3-to-4.0/upgrade-guide-from-3.3-to-4.0-ubuntu-18-04.html: /stable/upgrade/index.html
|
||||||
|
/stable/upgrade/upgrade-opensource/upgrade-guide-from-3.3-to-4.0/upgrade-guide-from-3.3-to-4.0-debian.html: /stable/upgrade/index.html
|
||||||
|
/stable/upgrade/upgrade-opensource/upgrade-guide-from-3.3-to-4.0/metric-update-3.3-to-4.0.html: /stable/upgrade/index.html
|
||||||
|
/stable/upgrade/upgrade-opensource/upgrade-guide-from-3.x.y-to-3.x.z/index.html: /stable/upgrade/index.html
|
||||||
|
/stable/upgrade/upgrade-opensource/upgrade-guide-from-3.x.y-to-3.x.z/upgrade-guide-from-3.x.y-to-3.x.z-rpm.html: /stable/upgrade/index.html
|
||||||
|
/stable/upgrade/upgrade-opensource/upgrade-guide-from-3.x.y-to-3.x.z/upgrade-guide-from-3.x.y-to-3.x.z-ubuntu.html: /stable/upgrade/index.html
|
||||||
|
/stable/upgrade/upgrade-opensource/upgrade-guide-from-3.x.y-to-3.x.z/upgrade-guide-from-3.x.y-to-3.x.z-debian.html: /stable/upgrade/index.html
|
||||||
|
/stable/upgrade/upgrade-opensource/upgrade-guide-from-4.0-to-4.1/index.html: /stable/upgrade/index.html
|
||||||
|
/stable/upgrade/upgrade-opensource/upgrade-guide-from-4.0-to-4.1/upgrade-guide-from-4.0-to-4.1-rpm.html: /stable/upgrade/index.html
|
||||||
|
/stable/upgrade/upgrade-opensource/upgrade-guide-from-4.0-to-4.1/upgrade-guide-from-4.0-to-4.1-ubuntu-16-04.html: /stable/upgrade/index.html
|
||||||
|
/stable/upgrade/upgrade-opensource/upgrade-guide-from-4.0-to-4.1/upgrade-guide-from-4.0-to-4.1-ubuntu-18-04.html: /stable/upgrade/index.html
|
||||||
|
/stable/upgrade/upgrade-opensource/upgrade-guide-from-4.0-to-4.1/upgrade-guide-from-4.0-to-4.1-debian.html: /stable/upgrade/index.html
|
||||||
|
/stable/upgrade/upgrade-opensource/upgrade-guide-from-4.0-to-4.1/alternator.html: /stable/upgrade/index.html
|
||||||
|
/stable/upgrade/upgrade-opensource/upgrade-guide-from-4.0-to-4.1/metric-update-4.0-to-4.1.html: /stable/upgrade/index.html
|
||||||
|
/stable/upgrade/upgrade-opensource/upgrade-guide-from-4.x.y-to-4.x.z/index.html: /stable/upgrade/index.html
|
||||||
|
/stable/upgrade/upgrade-opensource/upgrade-guide-from-4.x.y-to-4.x.z/upgrade-guide-from-4.x.y-to-4.x.z-rpm.html: /stable/upgrade/index.html
|
||||||
|
/stable/upgrade/upgrade-opensource/upgrade-guide-from-4.x.y-to-4.x.z/upgrade-guide-from-4.x.y-to-4.x.z-ubuntu.html: /stable/upgrade/index.html
|
||||||
|
/stable/upgrade/upgrade-opensource/upgrade-guide-from-4.x.y-to-4.x.z/upgrade-guide-from-4.x.y-to-4.x.z-debian.html: /stable/upgrade/index.html
|
||||||
|
/stable/upgrade/upgrade-opensource/upgrade-guide-from-4.1-to-4.2/index.html: /stable/upgrade/index.html
|
||||||
|
/stable/upgrade/upgrade-opensource/upgrade-guide-from-4.1-to-4.2/upgrade-guide-from-4.1-to-4.2-rpm.html: /stable/upgrade/index.html
|
||||||
|
/stable/upgrade/upgrade-opensource/upgrade-guide-from-4.1-to-4.2/upgrade-guide-from-4.1-to-4.2-ubuntu-16-04.html: /stable/upgrade/index.html
|
||||||
|
/stable/upgrade/upgrade-opensource/upgrade-guide-from-4.1-to-4.2/upgrade-guide-from-4.1-to-4.2-ubuntu-18-04.html: /stable/upgrade/index.html
|
||||||
|
/stable/upgrade/upgrade-opensource/upgrade-guide-from-4.1-to-4.2/upgrade-guide-from-4.1-to-4.2-debian-9.html: /stable/upgrade/index.html
|
||||||
|
/stable/upgrade/upgrade-opensource/upgrade-guide-from-4.1-to-4.2/upgrade-guide-from-4.1-to-4.2-debian-10.html: /stable/upgrade/index.html
|
||||||
|
/stable/upgrade/upgrade-opensource/upgrade-guide-from-4.1-to-4.2/metric-update-4.1-to-4.2.html: /stable/upgrade/index.html
|
||||||
|
/stable/upgrade/upgrade-opensource/upgrade-guide-from-4.2-to-4.3/index.html: /stable/upgrade/index.html
|
||||||
|
/stable/upgrade/upgrade-opensource/upgrade-guide-from-4.2-to-4.3/upgrade-guide-from-4.2-to-4.3-rpm.html: /stable/upgrade/index.html
|
||||||
|
/stable/upgrade/upgrade-opensource/upgrade-guide-from-4.2-to-4.3/upgrade-guide-from-4.2-to-4.3-ubuntu-16-04.html: /stable/upgrade/index.html
|
||||||
|
/stable/upgrade/upgrade-opensource/upgrade-guide-from-4.2-to-4.3/upgrade-guide-from-4.2-to-4.3-ubuntu-18-04.html: /stable/upgrade/index.html
|
||||||
|
/stable/upgrade/upgrade-opensource/upgrade-guide-from-4.2-to-4.3/upgrade-guide-from-4.2-to-4.3-debian-9.html: /stable/upgrade/index.html
|
||||||
|
/stable/upgrade/upgrade-opensource/upgrade-guide-from-4.2-to-4.3/upgrade-guide-from-4.2-to-4.3-debian-10.html: /stable/upgrade/index.html
|
||||||
|
/stable/upgrade/upgrade-opensource/upgrade-guide-from-4.2-to-4.3/metric-update-4.2-to-4.3.html: /stable/upgrade/index.html
|
||||||
|
/stable/upgrade/upgrade-opensource/upgrade-guide-from-4.3-to-4.4/index.html: /stable/upgrade/index.html
|
||||||
|
/stable/upgrade/upgrade-opensource/upgrade-guide-from-4.3-to-4.4/upgrade-guide-from-4.3-to-4.4-rpm.html: /stable/upgrade/index.html
|
||||||
|
/stable/upgrade/upgrade-opensource/upgrade-guide-from-4.3-to-4.4/upgrade-guide-from-4.3-to-4.4-ubuntu-16-04.html: /stable/upgrade/index.html
|
||||||
|
/stable/upgrade/upgrade-opensource/upgrade-guide-from-4.3-to-4.4/upgrade-guide-from-4.3-to-4.4-ubuntu-18-04.html: /stable/upgrade/index.html
|
||||||
|
/stable/upgrade/upgrade-opensource/upgrade-guide-from-4.3-to-4.4/upgrade-guide-from-4.3-to-4.4-ubuntu-20-04.html: /stable/upgrade/index.html
|
||||||
|
/stable/upgrade/upgrade-opensource/upgrade-guide-from-4.3-to-4.4/upgrade-guide-from-4.3-to-4.4-debian-9.html: /stable/upgrade/index.html
|
||||||
|
/stable/upgrade/upgrade-opensource/upgrade-guide-from-4.3-to-4.4/upgrade-guide-from-4.3-to-4.4-debian-10.html: /stable/upgrade/index.html
|
||||||
|
/stable/upgrade/upgrade-opensource/upgrade-guide-from-4.3-to-4.4/metric-update-4.3-to-4.4.html: /stable/upgrade/index.html
|
||||||
|
/stable/upgrade/upgrade-opensource/upgrade-guide-from-4.4-to-4.5/index.html: /stable/upgrade/index.html
|
||||||
|
/stable/upgrade/upgrade-opensource/upgrade-guide-from-4.4-to-4.5/upgrade-guide-from-4.4-to-4.5-rpm.html: /stable/upgrade/index.html
|
||||||
|
/stable/upgrade/upgrade-opensource/upgrade-guide-from-4.4-to-4.5/upgrade-guide-from-4.4-to-4.5-ubuntu-16-04.html: /stable/upgrade/index.html
|
||||||
|
/stable/upgrade/upgrade-opensource/upgrade-guide-from-4.4-to-4.5/upgrade-guide-from-4.4-to-4.5-ubuntu-18-04.html: /stable/upgrade/index.html
|
||||||
|
/stable/upgrade/upgrade-opensource/upgrade-guide-from-4.4-to-4.5/upgrade-guide-from-4.4-to-4.5-ubuntu-20-04.html: /stable/upgrade/index.html
|
||||||
|
/stable/upgrade/upgrade-opensource/upgrade-guide-from-4.4-to-4.5/upgrade-guide-from-4.4-to-4.5-debian-9.html: /stable/upgrade/index.html
|
||||||
|
/stable/upgrade/upgrade-opensource/upgrade-guide-from-4.4-to-4.5/upgrade-guide-from-4.4-to-4.5-debian-10.html: /stable/upgrade/index.html
|
||||||
|
/stable/upgrade/upgrade-opensource/upgrade-guide-from-4.4-to-4.5/metric-update-4.4-to-4.5.html: /stable/upgrade/index.html
|
||||||
|
/stable/upgrade/upgrade-opensource/upgrade-guide-from-4.5-to-4.6/index.html: /stable/upgrade/index.html
|
||||||
|
/stable/upgrade/upgrade-opensource/upgrade-guide-from-4.5-to-4.6/upgrade-guide-from-4.5-to-4.6-rpm.html: /stable/upgrade/index.html
|
||||||
|
/stable/upgrade/upgrade-opensource/upgrade-guide-from-4.5-to-4.6/upgrade-guide-from-4.5-to-4.6-ubuntu-16-04.html: /stable/upgrade/index.html
|
||||||
|
/stable/upgrade/upgrade-opensource/upgrade-guide-from-4.5-to-4.6/upgrade-guide-from-4.5-to-4.6-ubuntu-18-04.html: /stable/upgrade/index.html
|
||||||
|
/stable/upgrade/upgrade-opensource/upgrade-guide-from-4.5-to-4.6/upgrade-guide-from-4.5-to-4.6-ubuntu-20-04.html: /stable/upgrade/index.html
|
||||||
|
/stable/upgrade/upgrade-opensource/upgrade-guide-from-4.5-to-4.6/upgrade-guide-from-4.5-to-4.6-debian-9.html: /stable/upgrade/index.html
|
||||||
|
/stable/upgrade/upgrade-opensource/upgrade-guide-from-4.5-to-4.6/upgrade-guide-from-4.5-to-4.6-debian-10.html: /stable/upgrade/index.html
|
||||||
|
/stable/upgrade/upgrade-opensource/upgrade-guide-from-4.5-to-4.6/metric-update-4.5-to-4.6.html: /stable/upgrade/index.html
|
||||||
|
|
||||||
|
|||||||
@@ -8,8 +8,7 @@ Scylla implements the following compaction strategies in order to reduce :term:`
|
|||||||
* `Size-tiered compaction strategy (STCS)`_ - triggered when the system has enough (four by default) similarly sized SSTables.
|
* `Size-tiered compaction strategy (STCS)`_ - triggered when the system has enough (four by default) similarly sized SSTables.
|
||||||
* `Leveled compaction strategy (LCS)`_ - the system uses small, fixed-size (by default 160 MB) SSTables distributed across different levels.
|
* `Leveled compaction strategy (LCS)`_ - the system uses small, fixed-size (by default 160 MB) SSTables distributed across different levels.
|
||||||
* `Incremental Compaction Strategy (ICS)`_ - shares the same read and write amplification factors as STCS, but it fixes its 2x temporary space amplification issue by breaking huge sstables into SSTable runs, which are comprised of a sorted set of smaller (1 GB by default), non-overlapping SSTables.
|
* `Incremental Compaction Strategy (ICS)`_ - shares the same read and write amplification factors as STCS, but it fixes its 2x temporary space amplification issue by breaking huge sstables into SSTable runs, which are comprised of a sorted set of smaller (1 GB by default), non-overlapping SSTables.
|
||||||
* `Time-window compaction strategy (TWCS)`_ - designed for time series data; replaced Date-tiered compaction.
|
* `Time-window compaction strategy (TWCS)`_ - designed for time series data.
|
||||||
* `Date-tiered compaction strategy (DTCS)`_ - designed for time series data.
|
|
||||||
|
|
||||||
This document covers how to choose a compaction strategy and presents the benefits and disadvantages of each one. If you want more information on compaction in general or on any of these strategies, refer to the :doc:`Compaction Overview </kb/compaction>`. If you want an explanation of the CQL commands used to create a compaction strategy, refer to :doc:`Compaction CQL Reference </cql/compaction>` .
|
This document covers how to choose a compaction strategy and presents the benefits and disadvantages of each one. If you want more information on compaction in general or on any of these strategies, refer to the :doc:`Compaction Overview </kb/compaction>`. If you want an explanation of the CQL commands used to create a compaction strategy, refer to :doc:`Compaction CQL Reference </cql/compaction>` .
|
||||||
|
|
||||||
@@ -78,7 +77,6 @@ ICS is only available in ScyllaDB Enterprise. See the `ScyllaDB Enetrpise docume
|
|||||||
Time-window Compaction Strategy (TWCS)
|
Time-window Compaction Strategy (TWCS)
|
||||||
======================================
|
======================================
|
||||||
|
|
||||||
Time-window compaction strategy was introduced in Cassandra 3.0.8 for time-series data as a replacement for `Date-tiered Compaction Strategy (DTCS)`_.
|
|
||||||
Time-Window Compaction Strategy compacts SSTables within each time window using `Size-tiered Compaction Strategy (STCS)`_.
|
Time-Window Compaction Strategy compacts SSTables within each time window using `Size-tiered Compaction Strategy (STCS)`_.
|
||||||
SSTables from different time windows are never compacted together. You set the :ref:`TimeWindowCompactionStrategy <time-window-compactionstrategy-twcs>` parameters when you create a table using a CQL command.
|
SSTables from different time windows are never compacted together. You set the :ref:`TimeWindowCompactionStrategy <time-window-compactionstrategy-twcs>` parameters when you create a table using a CQL command.
|
||||||
|
|
||||||
@@ -87,9 +85,8 @@ SSTables from different time windows are never compacted together. You set the :
|
|||||||
Time-window Compaction benefits
|
Time-window Compaction benefits
|
||||||
-------------------------------
|
-------------------------------
|
||||||
|
|
||||||
* Keeps entries according to a time range, making searches for data within a given range easy to do, resulting in better read performance
|
* Keeps entries according to a time range, making searches for data within a given range easy to do, resulting in better read performance.
|
||||||
* Improves over DTCS in that it reduces the number to huge compactions
|
* Allows you to expire an entire SSTable at once (using a TTL) as the data is already organized within a time frame.
|
||||||
* Allows you to expire an entire SSTable at once (using a TTL) as the data is already organized within a time frame
|
|
||||||
|
|
||||||
Time-window Compaction deficits
|
Time-window Compaction deficits
|
||||||
-------------------------------
|
-------------------------------
|
||||||
@@ -102,14 +99,6 @@ Set the parameters for :ref:`Time-window Compaction <time-window-compactionstrat
|
|||||||
|
|
||||||
Use the table in `Which strategy is best`_ to determine if this is the right strategy for your needs.
|
Use the table in `Which strategy is best`_ to determine if this is the right strategy for your needs.
|
||||||
|
|
||||||
.. _DTCS1:
|
|
||||||
|
|
||||||
Date-tiered Compaction Strategy (DTCS)
|
|
||||||
======================================
|
|
||||||
|
|
||||||
Date-Tiered Compaction is designed for time series data. This strategy was introduced with Cassandra 2.1.
|
|
||||||
It is only suitable for time-series data. This strategy is not recommended and has been replaced by :ref:`Time-window Compaction Strategy <TWCS1>`.
|
|
||||||
|
|
||||||
.. _which-strategy-is-best:
|
.. _which-strategy-is-best:
|
||||||
|
|
||||||
Which strategy is best
|
Which strategy is best
|
||||||
|
|||||||
@@ -37,7 +37,12 @@ Enabling Raft
|
|||||||
|
|
||||||
.. note::
|
.. note::
|
||||||
In ScyllaDB 5.2 and ScyllaDB Enterprise 2023.1 Raft is Generally Available and can be safely used for consistent schema management.
|
In ScyllaDB 5.2 and ScyllaDB Enterprise 2023.1 Raft is Generally Available and can be safely used for consistent schema management.
|
||||||
In further versions, it will be mandatory.
|
It will get enabled by default when you upgrade your cluster to ScyllaDB 5.4 or 2024.1.
|
||||||
|
If needed, you can explicitly prevent it from getting enabled upon upgrade.
|
||||||
|
|
||||||
|
.. only:: opensource
|
||||||
|
|
||||||
|
See :doc:`the upgrade guide from 5.2 to 5.4 </upgrade/index>` for details.
|
||||||
|
|
||||||
ScyllaDB Open Source 5.2 and later, and ScyllaDB Enterprise 2023.1 and later come equipped with a procedure that can setup Raft-based consistent cluster management in an existing cluster. We refer to this as the **Raft upgrade procedure** (do not confuse with the :doc:`ScyllaDB version upgrade procedure </upgrade/index/>`).
|
ScyllaDB Open Source 5.2 and later, and ScyllaDB Enterprise 2023.1 and later come equipped with a procedure that can setup Raft-based consistent cluster management in an existing cluster. We refer to this as the **Raft upgrade procedure** (do not confuse with the :doc:`ScyllaDB version upgrade procedure </upgrade/index/>`).
|
||||||
|
|
||||||
@@ -214,6 +219,36 @@ of nodes in the cluster is available. The following examples illustrate how Raft
|
|||||||
|
|
||||||
In summary, Raft makes schema changes safe, but it requires that a quorum of nodes in the cluster is available.
|
In summary, Raft makes schema changes safe, but it requires that a quorum of nodes in the cluster is available.
|
||||||
|
|
||||||
|
.. _raft-topology-changes:
|
||||||
|
|
||||||
|
.. only:: opensource
|
||||||
|
|
||||||
|
Consistent Topology with Raft :label-caution:`Experimental`
|
||||||
|
-----------------------------------------------------------------
|
||||||
|
|
||||||
|
ScyllaDB can use Raft to manage cluster topology. With Raft-managed topology
|
||||||
|
enabled, all topology operations are internally sequenced in a consistent
|
||||||
|
way. A centralized coordination process ensures that topology metadata is
|
||||||
|
synchronized across the nodes on each step of a topology change procedure.
|
||||||
|
This makes topology updates fast and safe, as the cluster administrator can
|
||||||
|
trigger many topology operations concurrently, and the coordination process
|
||||||
|
will safely drive all of them to completion. For example, multiple nodes can
|
||||||
|
be bootstrapped concurrently, which couldn't be done with the old
|
||||||
|
gossip-based topology.
|
||||||
|
|
||||||
|
Support for Raft-managed topology is experimental and must be explicitly
|
||||||
|
enabled in the ``scylla.yaml`` configuration file by specifying
|
||||||
|
the ``consistent-topology-changes`` option:
|
||||||
|
|
||||||
|
.. code::
|
||||||
|
|
||||||
|
experimental_features:
|
||||||
|
- consistent-topology-changes
|
||||||
|
|
||||||
|
As with other experimental features in ScyllaDB, you should not enable this
|
||||||
|
feature in production clusters due to insufficient stability. The feature
|
||||||
|
is undergoing backward-incompatible changes that may prevent upgrading
|
||||||
|
the cluster.
|
||||||
|
|
||||||
.. _raft-handling-failures:
|
.. _raft-handling-failures:
|
||||||
|
|
||||||
|
|||||||
@@ -39,7 +39,8 @@ extensions = [
|
|||||||
"recommonmark", # optional
|
"recommonmark", # optional
|
||||||
"sphinxcontrib.datatemplates",
|
"sphinxcontrib.datatemplates",
|
||||||
"scylladb_cc_properties",
|
"scylladb_cc_properties",
|
||||||
"scylladb_aws_images"
|
"scylladb_aws_images",
|
||||||
|
"scylladb_include_flag"
|
||||||
]
|
]
|
||||||
|
|
||||||
# The suffix(es) of source filenames.
|
# The suffix(es) of source filenames.
|
||||||
|
|||||||
@@ -19,8 +19,6 @@ The following compaction strategies are supported by Scylla:
|
|||||||
|
|
||||||
* Time-window Compaction Strategy (`TWCS`_)
|
* Time-window Compaction Strategy (`TWCS`_)
|
||||||
|
|
||||||
* Date-tiered Compaction Strategy (DTCS) - use `TWCS`_ instead
|
|
||||||
|
|
||||||
This page concentrates on the parameters to use when creating a table with a compaction strategy. If you are unsure which strategy to use or want general information on the compaction strategies which are available to Scylla, refer to :doc:`Compaction Strategies </architecture/compaction/compaction-strategies>`.
|
This page concentrates on the parameters to use when creating a table with a compaction strategy. If you are unsure which strategy to use or want general information on the compaction strategies which are available to Scylla, refer to :doc:`Compaction Strategies </architecture/compaction/compaction-strategies>`.
|
||||||
|
|
||||||
Common options
|
Common options
|
||||||
|
|||||||
@@ -79,27 +79,66 @@ and to the TRUNCATE data definition query.
|
|||||||
|
|
||||||
In addition, the timeout parameter can be applied to SELECT queries as well.
|
In addition, the timeout parameter can be applied to SELECT queries as well.
|
||||||
|
|
||||||
|
```eval_rst
|
||||||
|
.. _keyspace-storage-options:
|
||||||
|
```
|
||||||
|
|
||||||
## Keyspace storage options
|
## Keyspace storage options
|
||||||
|
|
||||||
Storage options allows specifying the storage format assigned to a keyspace.
|
<!---
|
||||||
The default storage format is `LOCAL`, which simply means storing all the sstables
|
This section must be moved to Data Definition> CREATE KEYSPACE
|
||||||
in a local directory.
|
when support for object storage is GA.
|
||||||
Experimental support for `S3` storage format is also added. This option is not fully
|
--->
|
||||||
implemented yet, but it will allow storing sstables in a shared, S3-compatible object store.
|
|
||||||
|
|
||||||
Storage options can be specified via `CREATE KEYSPACE` or `ALTER KEYSPACE` statement
|
By default, SStables of a keyspace are stored in a local directory.
|
||||||
and it's formatted as a map of options - similarly to how replication strategy is handled.
|
As an alternative, you can configure your keyspace to be stored
|
||||||
|
on Amazon S3 or another S3-compatible object store.
|
||||||
|
|
||||||
Examples:
|
Support for object storage is experimental and must be explicitly
|
||||||
```cql
|
enabled in the ``scylla.yaml`` configuration file by specifying
|
||||||
CREATE KEYSPACE ks
|
the ``keyspace-storage-options`` option:
|
||||||
WITH REPLICATION = { 'class' : 'SimpleStrategy', 'replication_factor' : 3 }
|
|
||||||
AND STORAGE = { 'type' : 'S3', 'bucket' : '/tmp/b1', 'endpoint' : 'localhost' } ;
|
```
|
||||||
|
experimental_features:
|
||||||
|
- keyspace-storage-options
|
||||||
```
|
```
|
||||||
|
|
||||||
|
With support for object storage enabled, add your endpoint configuration
|
||||||
|
to ``scylla.yaml``:
|
||||||
|
|
||||||
|
1. Create an ``object-storage-config-file.yaml`` file with a description of
|
||||||
|
allowed endpoints, for example:
|
||||||
|
|
||||||
|
```
|
||||||
|
endpoints:
|
||||||
|
- name: $endpoint_address_or_domain_name
|
||||||
|
port: $port_number
|
||||||
|
https: optional True or False
|
||||||
|
aws_region: optional region name, e.g. us-east-1
|
||||||
|
aws_access_key_id: optional AWS access key ID
|
||||||
|
aws_secret_access_key: optional AWS secret access key
|
||||||
|
aws_session_token: optional AWS session token
|
||||||
|
```
|
||||||
|
1. Specify the ``object-storage-config-file`` option in your ``scylla.yaml``,
|
||||||
|
providing ``object-storage-config-file.yaml`` as the value:
|
||||||
|
|
||||||
|
```
|
||||||
|
object-storage-config-file: object-storage-config-file.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
Now you can configure your object storage when creating a keyspace:
|
||||||
|
|
||||||
```cql
|
```cql
|
||||||
ALTER KEYSPACE ks WITH REPLICATION = { 'class' : 'SimpleStrategy', 'replication_factor' : 3 }
|
CREATE KEYSPACE with STORAGE = { 'type': 'S3', 'endpoint': '$endpoint_name', 'bucket': '$bucket' }
|
||||||
AND STORAGE = { 'type' : 'S3', 'bucket': '/tmp/b2', 'endpoint' : 'localhost' } ;
|
```
|
||||||
|
|
||||||
|
**Example**
|
||||||
|
|
||||||
|
```cql
|
||||||
|
CREATE KEYSPACE ks
|
||||||
|
WITH REPLICATION = { 'class' : 'NetworkTopologyStrategy', 'replication_factor' : 3 }
|
||||||
|
AND STORAGE = { 'type' : 'S3', 'bucket' : '/tmp/b1', 'endpoint' : 'localhost' } ;
|
||||||
```
|
```
|
||||||
|
|
||||||
Storage options can be inspected by checking the new system schema table: `system_schema.scylla_keyspaces`:
|
Storage options can be inspected by checking the new system schema table: `system_schema.scylla_keyspaces`:
|
||||||
|
|||||||
@@ -6,18 +6,26 @@
|
|||||||
CQLSh: the CQL shell
|
CQLSh: the CQL shell
|
||||||
--------------------
|
--------------------
|
||||||
|
|
||||||
cqlsh is a command line shell for interacting with Cassandra through CQL (the Cassandra Query Language). It is shipped
|
cqlsh is a command line shell for interacting with ScyllaDB through CQL
|
||||||
with every Cassandra package and can be found in the bin/ directory alongside the Cassandra executable. cqlsh utilizes
|
(the Cassandra Query Language). It is shipped with every ScyllaDB package
|
||||||
the Python native protocol driver and connects to the single node specified on the command line.
|
and can be found in the ``bin/`` directory. In addition, it is available on
|
||||||
|
`Docker Hub <https://hub.docker.com/r/scylladb/scylla-cqlsh>`_ and in
|
||||||
|
the `Python Package Index (PyPI) <https://pypi.org/project/scylla-cqlsh/>`_.
|
||||||
|
|
||||||
|
cqlsh utilizes the Python native protocol driver and connects to the single
|
||||||
|
node specified on the command line.
|
||||||
|
|
||||||
|
See the `scylla-cqlsh <https://github.com/scylladb/scylla-cqlsh>`_ repository
|
||||||
|
on GitHub for usage examples.
|
||||||
|
|
||||||
|
|
||||||
Compatibility
|
Compatibility
|
||||||
^^^^^^^^^^^^^
|
^^^^^^^^^^^^^
|
||||||
|
|
||||||
cqlsh is compatible with Python 2.7.
|
cqlsh is compatible with Python 3.8 - Python 3.11.
|
||||||
|
|
||||||
In general, a given version of cqlsh is only guaranteed to work with the version of Cassandra that it was released with.
|
A given version of cqlsh is only guaranteed to work with the version of ScyllaDB that it was released with.
|
||||||
In some cases, cqlsh may work with older or newer versions of Cassandra, but this is not officially supported.
|
cqlsh may work with older or newer versions of ScyllaDB without any guarantees.
|
||||||
|
|
||||||
|
|
||||||
Optional Dependencies
|
Optional Dependencies
|
||||||
@@ -72,13 +80,13 @@ Options:
|
|||||||
``/usr/bin/google-chrome-stable %s``).
|
``/usr/bin/google-chrome-stable %s``).
|
||||||
|
|
||||||
``--ssl``
|
``--ssl``
|
||||||
Use SSL when connecting to Cassandra
|
Use SSL when connecting to ScyllaDB.
|
||||||
|
|
||||||
``-u`` ``--user``
|
``-u`` ``--user``
|
||||||
Username to authenticate against Cassandra with
|
Username to authenticate against ScyllaDB.
|
||||||
|
|
||||||
``-p`` ``--password``
|
``-p`` ``--password``
|
||||||
The password to authenticate against Cassandra with should
|
The password to authenticate against ScyllaDB, which should
|
||||||
be used in conjunction with ``--user``
|
be used in conjunction with ``--user``
|
||||||
|
|
||||||
``-k`` ``--keyspace``
|
``-k`` ``--keyspace``
|
||||||
@@ -162,17 +170,17 @@ consistency ``ALL`` is not guaranteed to be enough).
|
|||||||
|
|
||||||
SHOW VERSION
|
SHOW VERSION
|
||||||
~~~~~~~~~~~~
|
~~~~~~~~~~~~
|
||||||
This command is useful if you want to check which Cassandra version is compatible with your Scylla version.
|
This command is useful if you want to check which Cassandra version is compatible with your ScyllaDB version.
|
||||||
Note that the two standards are not 100% identical and this command is simply a comparison tool.
|
Note that the two standards are not 100% identical and this command is simply a comparison tool.
|
||||||
|
|
||||||
If you want to display your current Scylla Version, refer to :ref:`Check your current version of Scylla <check-your-current-version-of-scylla>`.
|
If you want to display your current ScyllaDB version, refer to :ref:`Check your current version of Scylla <check-your-current-version-of-scylla>`.
|
||||||
|
|
||||||
The display shows:
|
The display shows:
|
||||||
|
|
||||||
* The cqlsh tool version that you're using
|
* The cqlsh tool version that you're using
|
||||||
* The Apache Cassandra version that your version of Scylla is most compatible with
|
* The Apache Cassandra version that your version of ScyllaDB is most compatible with
|
||||||
* The CQL protocol standard that your version of Scylla is most compatible with
|
* The CQL protocol standard that your version of ScyllaDB is most compatible with
|
||||||
* The native protocol standard that your version of Scylla is most compatible with
|
* The native protocol standard that your version of ScyllaDB is most compatible with
|
||||||
|
|
||||||
Example:
|
Example:
|
||||||
|
|
||||||
@@ -191,7 +199,7 @@ Returns:
|
|||||||
SHOW HOST
|
SHOW HOST
|
||||||
~~~~~~~~~
|
~~~~~~~~~
|
||||||
|
|
||||||
Prints the IP address and port of the Cassandra node that cqlsh is connected to in addition to the cluster name.
|
Prints the IP address and port of the ScyllaDB node that cqlsh is connected to in addition to the cluster name.
|
||||||
|
|
||||||
Example:
|
Example:
|
||||||
|
|
||||||
@@ -324,7 +332,7 @@ contents of a single column are large.
|
|||||||
LOGIN
|
LOGIN
|
||||||
~~~~~
|
~~~~~
|
||||||
|
|
||||||
Authenticate as a specified Cassandra user for the current session.
|
Authenticate as a specified ScyllaDB user for the current session.
|
||||||
|
|
||||||
`Usage`::
|
`Usage`::
|
||||||
|
|
||||||
|
|||||||
@@ -198,6 +198,18 @@ An example that excludes a datacenter while using ``replication_factor``::
|
|||||||
DESCRIBE KEYSPACE excalibur
|
DESCRIBE KEYSPACE excalibur
|
||||||
CREATE KEYSPACE excalibur WITH replication = {'class': 'NetworkTopologyStrategy', 'DC1': '3'} AND durable_writes = true;
|
CREATE KEYSPACE excalibur WITH replication = {'class': 'NetworkTopologyStrategy', 'DC1': '3'} AND durable_writes = true;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
.. only:: opensource
|
||||||
|
|
||||||
|
Keyspace storage options :label-caution:`Experimental`
|
||||||
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
|
By default, SStables of a keyspace are stored locally.
|
||||||
|
As an alternative, you can configure your keyspace to be stored
|
||||||
|
on Amazon S3 or another S3-compatible object store.
|
||||||
|
See :ref:`Keyspace storage options <keyspace-storage-options>` for details.
|
||||||
|
|
||||||
.. _use-statement:
|
.. _use-statement:
|
||||||
|
|
||||||
USE
|
USE
|
||||||
@@ -687,19 +699,12 @@ Compaction options
|
|||||||
|
|
||||||
The ``compaction`` options must at least define the ``'class'`` sub-option, which defines the compaction strategy class
|
The ``compaction`` options must at least define the ``'class'`` sub-option, which defines the compaction strategy class
|
||||||
to use. The default supported class are ``'SizeTieredCompactionStrategy'``,
|
to use. The default supported class are ``'SizeTieredCompactionStrategy'``,
|
||||||
``'LeveledCompactionStrategy'``, ``'IncrementalCompactionStrategy'``, and ``'DateTieredCompactionStrategy'``
|
``'LeveledCompactionStrategy'``, and ``'IncrementalCompactionStrategy'``.
|
||||||
Custom strategy can be provided by specifying the full class name as a :ref:`string constant
|
Custom strategy can be provided by specifying the full class name as a :ref:`string constant
|
||||||
<constants>`.
|
<constants>`.
|
||||||
|
|
||||||
All default strategies support a number of common options, as well as options specific to
|
All default strategies support a number of common options, as well as options specific to
|
||||||
the strategy chosen (see the section corresponding to your strategy for details: :ref:`STCS <stcs-options>`, :ref:`LCS <lcs-options>`, and :ref:`TWCS <twcs-options>`). DTCS is not recommended, and TWCS should be used instead.
|
the strategy chosen (see the section corresponding to your strategy for details: :ref:`STCS <stcs-options>`, :ref:`LCS <lcs-options>`, and :ref:`TWCS <twcs-options>`).
|
||||||
|
|
||||||
|
|
||||||
.. ``'Date Tiered Compaction Strategy is not recommended and has been replaced by Time Window Compaction Stragegy.'`` (:ref:`TWCS <TWCS>`) (the
|
|
||||||
.. is also supported but is deprecated and ``'TimeWindowCompactionStrategy'`` should be
|
|
||||||
.. preferred instead).
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
.. _cql-compression-options:
|
.. _cql-compression-options:
|
||||||
|
|
||||||
|
|||||||
@@ -20,7 +20,7 @@ sections common to data updating statements.
|
|||||||
Update parameters
|
Update parameters
|
||||||
~~~~~~~~~~~~~~~~~
|
~~~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
The ``UPDATE``, ``INSERT`` (and ``DELETE`` and ``BATCH`` for the ``TIMESTAMP``) statements support the following
|
The ``UPDATE``, ``INSERT`` (and ``DELETE`` and ``BATCH`` for the ``TIMESTAMP`` and ``TIMEOUT``) statements support the following
|
||||||
parameters:
|
parameters:
|
||||||
|
|
||||||
- ``TIMESTAMP``: sets the timestamp for the operation. If not specified, the coordinator will use the current time, in
|
- ``TIMESTAMP``: sets the timestamp for the operation. If not specified, the coordinator will use the current time, in
|
||||||
|
|||||||
@@ -5,7 +5,7 @@ Wasm support for user-defined functions
|
|||||||
This document describes the details of Wasm language support in user-defined functions (UDF). The language ``wasm`` is one of the possible languages to use, besides Lua, to implement these functions. To learn more about User-defined functions in ScyllaDB, click :ref:`here <udfs>`.
|
This document describes the details of Wasm language support in user-defined functions (UDF). The language ``wasm`` is one of the possible languages to use, besides Lua, to implement these functions. To learn more about User-defined functions in ScyllaDB, click :ref:`here <udfs>`.
|
||||||
|
|
||||||
|
|
||||||
.. note:: Until ScyllaDB 5.2, the Wasm language was called ``xwasm``. This name is replaced with ``wasm`` in ScyllaDB 5.3.
|
.. note:: Until ScyllaDB 5.2, the Wasm language was called ``xwasm``. This name is replaced with ``wasm`` in ScyllaDB 5.4.
|
||||||
|
|
||||||
How to generate a correct Wasm UDF source code
|
How to generate a correct Wasm UDF source code
|
||||||
----------------------------------------------
|
----------------------------------------------
|
||||||
|
|||||||
@@ -198,11 +198,27 @@ We're not able to prevent a node learning about a new generation too late due to
|
|||||||
|
|
||||||
After committing the generation ID, the topology coordinator publishes the generation data to user-facing description tables (`system_distributed.cdc_streams_descriptions_v2` and `system_distributed.cdc_generation_timestamps`).
|
After committing the generation ID, the topology coordinator publishes the generation data to user-facing description tables (`system_distributed.cdc_streams_descriptions_v2` and `system_distributed.cdc_generation_timestamps`).
|
||||||
|
|
||||||
#### Generation switching: other notes
|
#### Generation switching: accepting writes
|
||||||
|
|
||||||
Due to the need of maintaining colocation we don't allow the client to send writes with arbitrary timestamps.
|
Due to the need of maintaining colocation we don't allow the client to send writes with arbitrary timestamps. We allow:
|
||||||
Suppose that a write is requested and the write coordinator's local clock has time `C` and the generation operating at time `C` has timestamp `T` (`T <= C`). Then we only allow the write if its timestamp is in the interval [`T`, `C + generation_leeway`), where `generation_leeway` is a small time-inteval constant (e.g. 5 seconds).
|
- writes to the current and next generations unless they are too far into the future,
|
||||||
Reason: we cannot allow writes before `T`, because they belong to the old generation whose token ranges might no longer refine the current vnodes, so the corresponding log write would not necessarily be colocated with the base write. We also cannot allow writes too far "into the future" because we don't know what generation will be operating at that time (the node which will introduce this generation might not have joined yet). But, as mentioned before, we assume that we'll learn about the next generation in time. Again --- the need for this assumption will be gone in a future patch.
|
- writes to the previous generations unless they are too far into the past.
|
||||||
|
|
||||||
|
##### Writes to the current and next generations
|
||||||
|
|
||||||
|
Suppose that a write with timestamp `W` is requested and the write coordinator's local clock has time `C` and the generation operating at time `C` has timestamp `T` (`T <= C`) such that `T <= W`. Then we only allow the write if `W < C + generation_leeway`, where `generation_leeway` is a small time-interval constant (e.g. 5 seconds).
|
||||||
|
|
||||||
|
We cannot allow writes too far "into the future" because we don't know what generation will be operating at that time (the node which will introduce this generation might not have joined yet). But, as mentioned before, we assume that we'll learn about the next generation in time. Again --- the need for this assumption will be gone in a future patch.
|
||||||
|
|
||||||
|
##### Writes to the previous generations
|
||||||
|
|
||||||
|
This time suppose that `T > W`. Then we only allow the write if `W > C - generation_leeway` and there was a generation operating at `W`.
|
||||||
|
|
||||||
|
We allow writes to previous generations to improve user experience. If a client generates timestamps by itself and clocks are not perfectly synchronized, there may be short periods of time around the moment of switching generations when client's writes are rejected because they fall into one of the previous generations. Usually, this problem is easy to overcome by the client. It can simply repeat a write a few times, but using a higher timestamp. Unfortunately, if a table additionally uses LWT, the client cannot increase the timestamp because LWT makes timestamps permanent. Once Paxos commits an entry with a given timestamp, Scylla will keep trying to apply that entry until it succeeds, with the same timestamp. Applying the entry involves doing a CDC log table write. If it fails, we are stuck. Allowing writes to the previous generations is also a probabilistic fix for this bug.
|
||||||
|
|
||||||
|
Note that writing only to the previous generation might not be enough. With the Raft-based topology and tablets, we can add multiple nodes almost instantly. Then, we can have multiple generations with almost identical timestamps.
|
||||||
|
|
||||||
|
We allow writes only to the recent past to reduce the number of generations that must be stored in memory.
|
||||||
|
|
||||||
### Streams description tables
|
### Streams description tables
|
||||||
|
|
||||||
|
|||||||
21
docs/getting-started/_common/os-support-info.rst
Normal file
21
docs/getting-started/_common/os-support-info.rst
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
You can `build ScyllaDB from source <https://github.com/scylladb/scylladb#build-prerequisites>`_ on other x86_64 or aarch64 platforms, without any guarantees.
|
||||||
|
|
||||||
|
+----------------------------+-------------+---------------+---------+---------------+
|
||||||
|
| Linux Distributions |Ubuntu | Debian | CentOS /| Rocky / |
|
||||||
|
| | | | RHEL | RHEL |
|
||||||
|
+----------------------------+------+------+-------+-------+---------+-------+-------+
|
||||||
|
| ScyllaDB Version / Version |20.04 |22.04 | 10 | 11 | 7 | 8 | 9 |
|
||||||
|
+============================+======+======+=======+=======+=========+=======+=======+
|
||||||
|
| 5.4 | |v| | |v| | |v| | |v| | |x| | |v| | |v| |
|
||||||
|
+----------------------------+------+------+-------+-------+---------+-------+-------+
|
||||||
|
| 5.2 | |v| | |v| | |v| | |v| | |v| | |v| | |x| |
|
||||||
|
+----------------------------+------+------+-------+-------+---------+-------+-------+
|
||||||
|
|
||||||
|
* The recommended OS for ScyllaDB Open Source is Ubuntu 22.04.
|
||||||
|
* All releases are available as a Docker container and EC2 AMI, GCP, and Azure images.
|
||||||
|
|
||||||
|
Supported Architecture
|
||||||
|
-----------------------------
|
||||||
|
|
||||||
|
ScyllaDB Open Source supports x86_64 for all versions and AArch64 starting from ScyllaDB 4.6 and nightly build.
|
||||||
|
In particular, aarch64 support includes AWS EC2 Graviton.
|
||||||
@@ -1,10 +1,11 @@
|
|||||||
.. |UBUNTU_SCYLLADB_LIST| replace:: scylla-5.2.list
|
.. |UBUNTU_SCYLLADB_LIST| replace:: scylla-5.4.list
|
||||||
.. |CENTOS_SCYLLADB_REPO| replace:: scylla-5.2.repo
|
.. |CENTOS_SCYLLADB_REPO| replace:: scylla-5.4.repo
|
||||||
|
|
||||||
.. The |RHEL_EPEL| variable needs to be adjuster per release, depening on support for RHEL.
|
.. The |RHEL_EPEL| variable needs to be adjuster per release, depening on support for RHEL.
|
||||||
.. 5.2 supports Rocky/RHEL 8
|
.. 5.2 supports Rocky/RHEL 8 only
|
||||||
.. When RHEL 9 is supported, add https://dl.fedoraproject.org/pub/epel/epel-release-latest-9.noarch.rpm
|
.. 5.4 supports Rocky/RHEL 8 and 9
|
||||||
.. |RHEL_EPEL| replace:: https://dl.fedoraproject.org/pub/epel/epel-release-latest-8.noarch.rpm
|
.. |RHEL_EPEL_8| replace:: https://dl.fedoraproject.org/pub/epel/epel-release-latest-8.noarch.rpm
|
||||||
|
.. |RHEL_EPEL_9| replace:: https://dl.fedoraproject.org/pub/epel/epel-release-latest-9.noarch.rpm
|
||||||
|
|
||||||
======================================
|
======================================
|
||||||
Install ScyllaDB Linux Packages
|
Install ScyllaDB Linux Packages
|
||||||
@@ -113,14 +114,21 @@ Install ScyllaDB
|
|||||||
sudo yum install epel-release
|
sudo yum install epel-release
|
||||||
|
|
||||||
|
|
||||||
RHEL:
|
Rocky/RHEL 8
|
||||||
|
|
||||||
.. code-block:: console
|
.. code-block:: console
|
||||||
:substitutions:
|
:substitutions:
|
||||||
|
|
||||||
sudo yum -y install |RHEL_EPEL|
|
sudo yum -y install |RHEL_EPEL_8|
|
||||||
|
|
||||||
|
|
||||||
|
Rocky/RHEL 9
|
||||||
|
|
||||||
|
.. code-block:: console
|
||||||
|
:substitutions:
|
||||||
|
|
||||||
|
sudo yum -y install |RHEL_EPEL_9|
|
||||||
|
|
||||||
#. Add the ScyllaDB RPM repository to your system.
|
#. Add the ScyllaDB RPM repository to your system.
|
||||||
|
|
||||||
.. code-block:: console
|
.. code-block:: console
|
||||||
|
|||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user