mirror of
https://github.com/scylladb/scylladb.git
synced 2026-04-26 19:35:12 +00:00
Compare commits
91 Commits
scylla-5.4
...
next-5.4
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
ec9e5b82a0 | ||
|
|
4640b3efd3 | ||
|
|
136ccff353 | ||
|
|
29c352d9c8 | ||
|
|
888d5fe1a3 | ||
|
|
6e8911ed51 | ||
|
|
9c4fa2652c | ||
|
|
58377036b0 | ||
|
|
5b29da123f | ||
|
|
92ee525f22 | ||
|
|
bc1c6275a4 | ||
|
|
79629a80cd | ||
|
|
9f0b75bcd2 | ||
|
|
0fbec200e9 | ||
|
|
972b799773 | ||
|
|
68c581314a | ||
|
|
380ce9a6d8 | ||
|
|
2c01dfe12b | ||
|
|
ab22cb7253 | ||
|
|
0e02128d28 | ||
|
|
1e548770cf | ||
|
|
3e879c1bfa | ||
|
|
5e9a2193db | ||
|
|
c2e5d9e726 | ||
|
|
80ff0688b1 | ||
|
|
a319085870 | ||
|
|
b24bd4d176 | ||
|
|
f628e7439c | ||
|
|
42da43b5b4 | ||
|
|
679fa0f72a | ||
|
|
89733a1f18 | ||
|
|
f185a227a2 | ||
|
|
89fd08b955 | ||
|
|
f29d51d9c3 | ||
|
|
01d5169593 | ||
|
|
3116ea7d8e | ||
|
|
36ccf67bee | ||
|
|
bae47ca197 | ||
|
|
fdcbbb85ad | ||
|
|
65daae0fbe | ||
|
|
f07fbcf929 | ||
|
|
1d0a6672d6 | ||
|
|
e16327034c | ||
|
|
3510ff3179 | ||
|
|
e9b8d08b74 | ||
|
|
d67af8da1c | ||
|
|
66fc7c0494 | ||
|
|
67be26ff7d | ||
|
|
97893a4f6d | ||
|
|
ab9683d182 | ||
|
|
892ffa966d | ||
|
|
4b6e462266 | ||
|
|
ce22d0071b | ||
|
|
a5d34b62ac | ||
|
|
f121720898 | ||
|
|
d217ab9cc7 | ||
|
|
6eac67628e | ||
|
|
a28b38d0a9 | ||
|
|
614cabf9cd | ||
|
|
193fda6dfb | ||
|
|
50f3f3d1a3 | ||
|
|
9ce5c2e6ce | ||
|
|
2f2bc18376 | ||
|
|
c795275675 | ||
|
|
b13cee4c7c | ||
|
|
bea1f4891d | ||
|
|
9535abf552 | ||
|
|
abb4751e00 | ||
|
|
39ec136a09 | ||
|
|
b7ef9652fb | ||
|
|
d3b2702be1 | ||
|
|
4ef4893f7e | ||
|
|
19999554e7 | ||
|
|
43f77c71c7 | ||
|
|
4aa0b84ba7 | ||
|
|
427127de57 | ||
|
|
d7b1116170 | ||
|
|
72155312e5 | ||
|
|
f8dcbc6037 | ||
|
|
dc1968cb9e | ||
|
|
7e40b658c8 | ||
|
|
58671274d8 | ||
|
|
c18f14cd78 | ||
|
|
c19f980802 | ||
|
|
0abccd212d | ||
|
|
b8d0df24ed | ||
|
|
b3de65a8fb | ||
|
|
3eb15e841a | ||
|
|
017524c7d8 | ||
|
|
1680bc2902 | ||
|
|
2e836fa077 |
87
.github/scripts/label_promoted_commits.py
vendored
Executable file
87
.github/scripts/label_promoted_commits.py
vendored
Executable file
@@ -0,0 +1,87 @@
|
||||
from github import Github
|
||||
import argparse
|
||||
import re
|
||||
import sys
|
||||
import os
|
||||
|
||||
try:
|
||||
github_token = os.environ["GITHUB_TOKEN"]
|
||||
except KeyError:
|
||||
print("Please set the 'GITHUB_TOKEN' environment variable")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
def parser():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('--repository', type=str, required=True,
|
||||
help='Github repository name (e.g., scylladb/scylladb)')
|
||||
parser.add_argument('--commit_before_merge', type=str, required=True, help='Git commit ID to start labeling from ('
|
||||
'newest commit).')
|
||||
parser.add_argument('--commit_after_merge', type=str, required=True,
|
||||
help='Git commit ID to end labeling at (oldest '
|
||||
'commit, exclusive).')
|
||||
parser.add_argument('--update_issue', type=bool, default=False, help='Set True to update issues when backport was '
|
||||
'done')
|
||||
parser.add_argument('--ref', type=str, required=True, help='PR target branch')
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def add_comment_and_close_pr(pr, comment):
|
||||
if pr.state == 'open':
|
||||
pr.create_issue_comment(comment)
|
||||
pr.edit(state="closed")
|
||||
|
||||
|
||||
def mark_backport_done(repo, ref_pr_number, branch):
|
||||
pr = repo.get_pull(int(ref_pr_number))
|
||||
label_to_remove = f'backport/{branch}'
|
||||
label_to_add = f'{label_to_remove}-done'
|
||||
current_labels = [label.name for label in pr.get_labels()]
|
||||
if label_to_remove in current_labels:
|
||||
pr.remove_from_labels(label_to_remove)
|
||||
if label_to_add not in current_labels:
|
||||
pr.add_to_labels(label_to_add)
|
||||
|
||||
|
||||
def main():
|
||||
# This script is triggered by a push event to either the master branch or a branch named branch-x.y (where x and y represent version numbers). Based on the pushed branch, the script performs the following actions:
|
||||
# - When ref branch is `master`, it will add the `promoted-to-master` label, which we need later for the auto backport process
|
||||
# - When ref branch is `branch-x.y` (which means we backported a patch), it will replace in the original PR the `backport/x.y` label with `backport/x.y-done` and will close the backport PR (Since GitHub close only the one referring to default branch)
|
||||
args = parser()
|
||||
pr_pattern = re.compile(r'Closes .*#([0-9]+)')
|
||||
target_branch = re.search(r'branch-(\d+\.\d+)', args.ref)
|
||||
g = Github(github_token)
|
||||
repo = g.get_repo(args.repository, lazy=False)
|
||||
commits = repo.compare(head=args.commit_after_merge, base=args.commit_before_merge)
|
||||
processed_prs = set()
|
||||
# Print commit information
|
||||
for commit in commits.commits:
|
||||
print(f'Commit sha is: {commit.sha}')
|
||||
match = pr_pattern.search(commit.commit.message)
|
||||
if match:
|
||||
pr_number = int(match.group(1))
|
||||
if pr_number in processed_prs:
|
||||
continue
|
||||
if target_branch:
|
||||
pr = repo.get_pull(pr_number)
|
||||
branch_name = target_branch[1]
|
||||
refs_pr = re.findall(r'Refs (?:#|https.*?)(\d+)', pr.body)
|
||||
if refs_pr:
|
||||
print(f'branch-{target_branch.group(1)}, pr number is: {pr_number}')
|
||||
# 1. change the backport label of the parent PR to note that
|
||||
# we've merge the corresponding backport PR
|
||||
# 2. close the backport PR and leave a comment on it to note
|
||||
# that it has been merged with a certain git commit,
|
||||
ref_pr_number = refs_pr[0]
|
||||
mark_backport_done(repo, ref_pr_number, branch_name)
|
||||
comment = f'Closed via {commit.sha}'
|
||||
add_comment_and_close_pr(pr, comment)
|
||||
else:
|
||||
print(f'master branch, pr number is: {pr_number}')
|
||||
pr = repo.get_pull(pr_number)
|
||||
pr.add_to_labels('promoted-to-master')
|
||||
processed_prs.add(pr_number)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
36
.github/workflows/add-label-when-promoted.yaml
vendored
Normal file
36
.github/workflows/add-label-when-promoted.yaml
vendored
Normal file
@@ -0,0 +1,36 @@
|
||||
name: Check if commits are promoted
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- master
|
||||
- branch-*.*
|
||||
|
||||
env:
|
||||
DEFAULT_BRANCH: 'master'
|
||||
|
||||
jobs:
|
||||
check-commit:
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
pull-requests: write
|
||||
issues: write
|
||||
steps:
|
||||
- name: Dump GitHub context
|
||||
env:
|
||||
GITHUB_CONTEXT: ${{ toJson(github) }}
|
||||
run: echo "$GITHUB_CONTEXT"
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
repository: ${{ github.repository }}
|
||||
ref: ${{ env.DEFAULT_BRANCH }}
|
||||
fetch-depth: 0 # Fetch all history for all tags and branches
|
||||
|
||||
- name: Install dependencies
|
||||
run: sudo apt-get install -y python3-github
|
||||
|
||||
- name: Run python script
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
run: python .github/scripts/label_promoted_commits.py --commit_before_merge ${{ github.event.before }} --commit_after_merge ${{ github.event.after }} --repository ${{ github.repository }} --ref ${{ github.ref }}
|
||||
@@ -78,7 +78,7 @@ fi
|
||||
|
||||
# Default scylla product/version tags
|
||||
PRODUCT=scylla
|
||||
VERSION=5.4.7
|
||||
VERSION=5.4.10
|
||||
|
||||
if test -f version
|
||||
then
|
||||
|
||||
@@ -208,7 +208,10 @@ protected:
|
||||
sstring local_dc = topology.get_datacenter();
|
||||
std::unordered_set<gms::inet_address> local_dc_nodes = topology.get_datacenter_endpoints().at(local_dc);
|
||||
for (auto& ip : local_dc_nodes) {
|
||||
if (_gossiper.is_alive(ip)) {
|
||||
// Note that it's not enough for the node to be is_alive() - a
|
||||
// node joining the cluster is also "alive" but not responsive to
|
||||
// requests. We need the node to be in normal state. See #19694.
|
||||
if (_gossiper.is_normal(ip)) {
|
||||
rjson::push_back(results, rjson::from_string(ip.to_sstring()));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -314,7 +314,7 @@ void req_params::process(const request& req) {
|
||||
continue;
|
||||
}
|
||||
try {
|
||||
ent.value = req.param[name];
|
||||
ent.value = req.get_path_param(name);
|
||||
} catch (std::out_of_range&) {
|
||||
throw httpd::bad_param_exception(fmt::format("Mandatory parameter '{}' was not provided", name));
|
||||
}
|
||||
|
||||
@@ -54,7 +54,7 @@ static const char* str_to_regex(const sstring& v) {
|
||||
void set_collectd(http_context& ctx, routes& r) {
|
||||
cd::get_collectd.set(r, [](std::unique_ptr<request> req) {
|
||||
|
||||
auto id = ::make_shared<scollectd::type_instance_id>(req->param["pluginid"],
|
||||
auto id = ::make_shared<scollectd::type_instance_id>(req->get_path_param("pluginid"),
|
||||
req->get_query_param("instance"), req->get_query_param("type"),
|
||||
req->get_query_param("type_instance"));
|
||||
|
||||
@@ -91,7 +91,7 @@ void set_collectd(http_context& ctx, routes& r) {
|
||||
});
|
||||
|
||||
cd::enable_collectd.set(r, [](std::unique_ptr<request> req) -> future<json::json_return_type> {
|
||||
std::regex plugin(req->param["pluginid"].c_str());
|
||||
std::regex plugin(req->get_path_param("pluginid").c_str());
|
||||
std::regex instance(str_to_regex(req->get_query_param("instance")));
|
||||
std::regex type(str_to_regex(req->get_query_param("type")));
|
||||
std::regex type_instance(str_to_regex(req->get_query_param("type_instance")));
|
||||
|
||||
@@ -333,7 +333,7 @@ void set_column_family(http_context& ctx, routes& r, sharded<db::system_keyspace
|
||||
});
|
||||
|
||||
cf::get_memtable_columns_count.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
||||
return map_reduce_cf(ctx, req->param["name"], uint64_t{0}, [](replica::column_family& cf) {
|
||||
return map_reduce_cf(ctx, req->get_path_param("name"), uint64_t{0}, [](replica::column_family& cf) {
|
||||
return boost::accumulate(cf.active_memtables() | boost::adaptors::transformed(std::mem_fn(&replica::memtable::partition_count)), uint64_t(0));
|
||||
}, std::plus<>());
|
||||
});
|
||||
@@ -353,7 +353,7 @@ void set_column_family(http_context& ctx, routes& r, sharded<db::system_keyspace
|
||||
});
|
||||
|
||||
cf::get_memtable_off_heap_size.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
||||
return map_reduce_cf(ctx, req->param["name"], int64_t(0), [](replica::column_family& cf) {
|
||||
return map_reduce_cf(ctx, req->get_path_param("name"), int64_t(0), [](replica::column_family& cf) {
|
||||
return boost::accumulate(cf.active_memtables() | boost::adaptors::transformed([] (replica::memtable* active_memtable) {
|
||||
return active_memtable->region().occupancy().total_space();
|
||||
}), uint64_t(0));
|
||||
@@ -369,7 +369,7 @@ void set_column_family(http_context& ctx, routes& r, sharded<db::system_keyspace
|
||||
});
|
||||
|
||||
cf::get_memtable_live_data_size.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
||||
return map_reduce_cf(ctx, req->param["name"], int64_t(0), [](replica::column_family& cf) {
|
||||
return map_reduce_cf(ctx, req->get_path_param("name"), int64_t(0), [](replica::column_family& cf) {
|
||||
return boost::accumulate(cf.active_memtables() | boost::adaptors::transformed([] (replica::memtable* active_memtable) {
|
||||
return active_memtable->region().occupancy().used_space();
|
||||
}), uint64_t(0));
|
||||
@@ -394,7 +394,7 @@ void set_column_family(http_context& ctx, routes& r, sharded<db::system_keyspace
|
||||
|
||||
cf::get_cf_all_memtables_off_heap_size.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
||||
warn(unimplemented::cause::INDEXES);
|
||||
return map_reduce_cf(ctx, req->param["name"], int64_t(0), [](replica::column_family& cf) {
|
||||
return map_reduce_cf(ctx, req->get_path_param("name"), int64_t(0), [](replica::column_family& cf) {
|
||||
return cf.occupancy().total_space();
|
||||
}, std::plus<int64_t>());
|
||||
});
|
||||
@@ -410,7 +410,7 @@ void set_column_family(http_context& ctx, routes& r, sharded<db::system_keyspace
|
||||
|
||||
cf::get_cf_all_memtables_live_data_size.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
||||
warn(unimplemented::cause::INDEXES);
|
||||
return map_reduce_cf(ctx, req->param["name"], int64_t(0), [](replica::column_family& cf) {
|
||||
return map_reduce_cf(ctx, req->get_path_param("name"), int64_t(0), [](replica::column_family& cf) {
|
||||
return cf.occupancy().used_space();
|
||||
}, std::plus<int64_t>());
|
||||
});
|
||||
@@ -425,7 +425,7 @@ void set_column_family(http_context& ctx, routes& r, sharded<db::system_keyspace
|
||||
});
|
||||
|
||||
cf::get_memtable_switch_count.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
||||
return get_cf_stats(ctx,req->param["name"] ,&replica::column_family_stats::memtable_switch_count);
|
||||
return get_cf_stats(ctx,req->get_path_param("name") ,&replica::column_family_stats::memtable_switch_count);
|
||||
});
|
||||
|
||||
cf::get_all_memtable_switch_count.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
||||
@@ -434,7 +434,7 @@ void set_column_family(http_context& ctx, routes& r, sharded<db::system_keyspace
|
||||
|
||||
// FIXME: this refers to partitions, not rows.
|
||||
cf::get_estimated_row_size_histogram.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
||||
return map_reduce_cf(ctx, req->param["name"], utils::estimated_histogram(0), [](replica::column_family& cf) {
|
||||
return map_reduce_cf(ctx, req->get_path_param("name"), utils::estimated_histogram(0), [](replica::column_family& cf) {
|
||||
utils::estimated_histogram res(0);
|
||||
for (auto sstables = cf.get_sstables(); auto& i : *sstables) {
|
||||
res.merge(i->get_stats_metadata().estimated_partition_size);
|
||||
@@ -446,7 +446,7 @@ void set_column_family(http_context& ctx, routes& r, sharded<db::system_keyspace
|
||||
|
||||
// FIXME: this refers to partitions, not rows.
|
||||
cf::get_estimated_row_count.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
||||
return map_reduce_cf(ctx, req->param["name"], int64_t(0), [](replica::column_family& cf) {
|
||||
return map_reduce_cf(ctx, req->get_path_param("name"), int64_t(0), [](replica::column_family& cf) {
|
||||
uint64_t res = 0;
|
||||
for (auto sstables = cf.get_sstables(); auto& i : *sstables) {
|
||||
res += i->get_stats_metadata().estimated_partition_size.count();
|
||||
@@ -457,7 +457,7 @@ void set_column_family(http_context& ctx, routes& r, sharded<db::system_keyspace
|
||||
});
|
||||
|
||||
cf::get_estimated_column_count_histogram.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
||||
return map_reduce_cf(ctx, req->param["name"], utils::estimated_histogram(0), [](replica::column_family& cf) {
|
||||
return map_reduce_cf(ctx, req->get_path_param("name"), utils::estimated_histogram(0), [](replica::column_family& cf) {
|
||||
utils::estimated_histogram res(0);
|
||||
for (auto sstables = cf.get_sstables(); auto& i : *sstables) {
|
||||
res.merge(i->get_stats_metadata().estimated_cells_count);
|
||||
@@ -474,7 +474,7 @@ void set_column_family(http_context& ctx, routes& r, sharded<db::system_keyspace
|
||||
});
|
||||
|
||||
cf::get_pending_flushes.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
||||
return get_cf_stats(ctx,req->param["name"] ,&replica::column_family_stats::pending_flushes);
|
||||
return get_cf_stats(ctx,req->get_path_param("name") ,&replica::column_family_stats::pending_flushes);
|
||||
});
|
||||
|
||||
cf::get_all_pending_flushes.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
||||
@@ -482,7 +482,7 @@ void set_column_family(http_context& ctx, routes& r, sharded<db::system_keyspace
|
||||
});
|
||||
|
||||
cf::get_read.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
||||
return get_cf_stats_count(ctx,req->param["name"] ,&replica::column_family_stats::reads);
|
||||
return get_cf_stats_count(ctx,req->get_path_param("name") ,&replica::column_family_stats::reads);
|
||||
});
|
||||
|
||||
cf::get_all_read.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
||||
@@ -490,7 +490,7 @@ void set_column_family(http_context& ctx, routes& r, sharded<db::system_keyspace
|
||||
});
|
||||
|
||||
cf::get_write.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
||||
return get_cf_stats_count(ctx, req->param["name"] ,&replica::column_family_stats::writes);
|
||||
return get_cf_stats_count(ctx, req->get_path_param("name") ,&replica::column_family_stats::writes);
|
||||
});
|
||||
|
||||
cf::get_all_write.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
||||
@@ -498,19 +498,19 @@ void set_column_family(http_context& ctx, routes& r, sharded<db::system_keyspace
|
||||
});
|
||||
|
||||
cf::get_read_latency_histogram_depricated.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
||||
return get_cf_histogram(ctx, req->param["name"], &replica::column_family_stats::reads);
|
||||
return get_cf_histogram(ctx, req->get_path_param("name"), &replica::column_family_stats::reads);
|
||||
});
|
||||
|
||||
cf::get_read_latency_histogram.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
||||
return get_cf_rate_and_histogram(ctx, req->param["name"], &replica::column_family_stats::reads);
|
||||
return get_cf_rate_and_histogram(ctx, req->get_path_param("name"), &replica::column_family_stats::reads);
|
||||
});
|
||||
|
||||
cf::get_read_latency.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
||||
return get_cf_stats_sum(ctx,req->param["name"] ,&replica::column_family_stats::reads);
|
||||
return get_cf_stats_sum(ctx,req->get_path_param("name") ,&replica::column_family_stats::reads);
|
||||
});
|
||||
|
||||
cf::get_write_latency.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
||||
return get_cf_stats_sum(ctx, req->param["name"] ,&replica::column_family_stats::writes);
|
||||
return get_cf_stats_sum(ctx, req->get_path_param("name") ,&replica::column_family_stats::writes);
|
||||
});
|
||||
|
||||
cf::get_all_read_latency_histogram_depricated.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
||||
@@ -522,11 +522,11 @@ void set_column_family(http_context& ctx, routes& r, sharded<db::system_keyspace
|
||||
});
|
||||
|
||||
cf::get_write_latency_histogram_depricated.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
||||
return get_cf_histogram(ctx, req->param["name"], &replica::column_family_stats::writes);
|
||||
return get_cf_histogram(ctx, req->get_path_param("name"), &replica::column_family_stats::writes);
|
||||
});
|
||||
|
||||
cf::get_write_latency_histogram.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
||||
return get_cf_rate_and_histogram(ctx, req->param["name"], &replica::column_family_stats::writes);
|
||||
return get_cf_rate_and_histogram(ctx, req->get_path_param("name"), &replica::column_family_stats::writes);
|
||||
});
|
||||
|
||||
cf::get_all_write_latency_histogram_depricated.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
||||
@@ -538,7 +538,7 @@ void set_column_family(http_context& ctx, routes& r, sharded<db::system_keyspace
|
||||
});
|
||||
|
||||
cf::get_pending_compactions.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
||||
return map_reduce_cf(ctx, req->param["name"], int64_t(0), [](replica::column_family& cf) {
|
||||
return map_reduce_cf(ctx, req->get_path_param("name"), int64_t(0), [](replica::column_family& cf) {
|
||||
return cf.estimate_pending_compactions();
|
||||
}, std::plus<int64_t>());
|
||||
});
|
||||
@@ -550,7 +550,7 @@ void set_column_family(http_context& ctx, routes& r, sharded<db::system_keyspace
|
||||
});
|
||||
|
||||
cf::get_live_ss_table_count.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
||||
return get_cf_stats(ctx, req->param["name"], &replica::column_family_stats::live_sstable_count);
|
||||
return get_cf_stats(ctx, req->get_path_param("name"), &replica::column_family_stats::live_sstable_count);
|
||||
});
|
||||
|
||||
cf::get_all_live_ss_table_count.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
||||
@@ -558,11 +558,11 @@ void set_column_family(http_context& ctx, routes& r, sharded<db::system_keyspace
|
||||
});
|
||||
|
||||
cf::get_unleveled_sstables.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
||||
return get_cf_unleveled_sstables(ctx, req->param["name"]);
|
||||
return get_cf_unleveled_sstables(ctx, req->get_path_param("name"));
|
||||
});
|
||||
|
||||
cf::get_live_disk_space_used.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
||||
return sum_sstable(ctx, req->param["name"], false);
|
||||
return sum_sstable(ctx, req->get_path_param("name"), false);
|
||||
});
|
||||
|
||||
cf::get_all_live_disk_space_used.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
||||
@@ -570,7 +570,7 @@ void set_column_family(http_context& ctx, routes& r, sharded<db::system_keyspace
|
||||
});
|
||||
|
||||
cf::get_total_disk_space_used.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
||||
return sum_sstable(ctx, req->param["name"], true);
|
||||
return sum_sstable(ctx, req->get_path_param("name"), true);
|
||||
});
|
||||
|
||||
cf::get_all_total_disk_space_used.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
||||
@@ -579,7 +579,7 @@ void set_column_family(http_context& ctx, routes& r, sharded<db::system_keyspace
|
||||
|
||||
// FIXME: this refers to partitions, not rows.
|
||||
cf::get_min_row_size.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
||||
return map_reduce_cf(ctx, req->param["name"], INT64_MAX, min_partition_size, min_int64);
|
||||
return map_reduce_cf(ctx, req->get_path_param("name"), INT64_MAX, min_partition_size, min_int64);
|
||||
});
|
||||
|
||||
// FIXME: this refers to partitions, not rows.
|
||||
@@ -589,7 +589,7 @@ void set_column_family(http_context& ctx, routes& r, sharded<db::system_keyspace
|
||||
|
||||
// FIXME: this refers to partitions, not rows.
|
||||
cf::get_max_row_size.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
||||
return map_reduce_cf(ctx, req->param["name"], int64_t(0), max_partition_size, max_int64);
|
||||
return map_reduce_cf(ctx, req->get_path_param("name"), int64_t(0), max_partition_size, max_int64);
|
||||
});
|
||||
|
||||
// FIXME: this refers to partitions, not rows.
|
||||
@@ -600,7 +600,7 @@ void set_column_family(http_context& ctx, routes& r, sharded<db::system_keyspace
|
||||
// FIXME: this refers to partitions, not rows.
|
||||
cf::get_mean_row_size.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
||||
// Cassandra 3.x mean values are truncated as integrals.
|
||||
return map_reduce_cf(ctx, req->param["name"], integral_ratio_holder(), mean_partition_size, std::plus<integral_ratio_holder>());
|
||||
return map_reduce_cf(ctx, req->get_path_param("name"), integral_ratio_holder(), mean_partition_size, std::plus<integral_ratio_holder>());
|
||||
});
|
||||
|
||||
// FIXME: this refers to partitions, not rows.
|
||||
@@ -610,7 +610,7 @@ void set_column_family(http_context& ctx, routes& r, sharded<db::system_keyspace
|
||||
});
|
||||
|
||||
cf::get_bloom_filter_false_positives.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
||||
return map_reduce_cf(ctx, req->param["name"], uint64_t(0), [] (replica::column_family& cf) {
|
||||
return map_reduce_cf(ctx, req->get_path_param("name"), uint64_t(0), [] (replica::column_family& cf) {
|
||||
auto sstables = cf.get_sstables();
|
||||
return std::accumulate(sstables->begin(), sstables->end(), uint64_t(0), [](uint64_t s, auto& sst) {
|
||||
return s + sst->filter_get_false_positive();
|
||||
@@ -628,7 +628,7 @@ void set_column_family(http_context& ctx, routes& r, sharded<db::system_keyspace
|
||||
});
|
||||
|
||||
cf::get_recent_bloom_filter_false_positives.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
||||
return map_reduce_cf(ctx, req->param["name"], uint64_t(0), [] (replica::column_family& cf) {
|
||||
return map_reduce_cf(ctx, req->get_path_param("name"), uint64_t(0), [] (replica::column_family& cf) {
|
||||
auto sstables = cf.get_sstables();
|
||||
return std::accumulate(sstables->begin(), sstables->end(), uint64_t(0), [](uint64_t s, auto& sst) {
|
||||
return s + sst->filter_get_recent_false_positive();
|
||||
@@ -646,7 +646,7 @@ void set_column_family(http_context& ctx, routes& r, sharded<db::system_keyspace
|
||||
});
|
||||
|
||||
cf::get_bloom_filter_false_ratio.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
||||
return map_reduce_cf(ctx, req->param["name"], ratio_holder(), [] (replica::column_family& cf) {
|
||||
return map_reduce_cf(ctx, req->get_path_param("name"), ratio_holder(), [] (replica::column_family& cf) {
|
||||
return boost::accumulate(*cf.get_sstables() | boost::adaptors::transformed(filter_false_positive_as_ratio_holder), ratio_holder());
|
||||
}, std::plus<>());
|
||||
});
|
||||
@@ -658,7 +658,7 @@ void set_column_family(http_context& ctx, routes& r, sharded<db::system_keyspace
|
||||
});
|
||||
|
||||
cf::get_recent_bloom_filter_false_ratio.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
||||
return map_reduce_cf(ctx, req->param["name"], ratio_holder(), [] (replica::column_family& cf) {
|
||||
return map_reduce_cf(ctx, req->get_path_param("name"), ratio_holder(), [] (replica::column_family& cf) {
|
||||
return boost::accumulate(*cf.get_sstables() | boost::adaptors::transformed(filter_recent_false_positive_as_ratio_holder), ratio_holder());
|
||||
}, std::plus<>());
|
||||
});
|
||||
@@ -670,7 +670,7 @@ void set_column_family(http_context& ctx, routes& r, sharded<db::system_keyspace
|
||||
});
|
||||
|
||||
cf::get_bloom_filter_disk_space_used.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
||||
return map_reduce_cf(ctx, req->param["name"], uint64_t(0), [] (replica::column_family& cf) {
|
||||
return map_reduce_cf(ctx, req->get_path_param("name"), uint64_t(0), [] (replica::column_family& cf) {
|
||||
auto sstables = cf.get_sstables();
|
||||
return std::accumulate(sstables->begin(), sstables->end(), uint64_t(0), [](uint64_t s, auto& sst) {
|
||||
return s + sst->filter_size();
|
||||
@@ -688,7 +688,7 @@ void set_column_family(http_context& ctx, routes& r, sharded<db::system_keyspace
|
||||
});
|
||||
|
||||
cf::get_bloom_filter_off_heap_memory_used.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
||||
return map_reduce_cf(ctx, req->param["name"], uint64_t(0), [] (replica::column_family& cf) {
|
||||
return map_reduce_cf(ctx, req->get_path_param("name"), uint64_t(0), [] (replica::column_family& cf) {
|
||||
auto sstables = cf.get_sstables();
|
||||
return std::accumulate(sstables->begin(), sstables->end(), uint64_t(0), [](uint64_t s, auto& sst) {
|
||||
return s + sst->filter_memory_size();
|
||||
@@ -706,7 +706,7 @@ void set_column_family(http_context& ctx, routes& r, sharded<db::system_keyspace
|
||||
});
|
||||
|
||||
cf::get_index_summary_off_heap_memory_used.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
||||
return map_reduce_cf(ctx, req->param["name"], uint64_t(0), [] (replica::column_family& cf) {
|
||||
return map_reduce_cf(ctx, req->get_path_param("name"), uint64_t(0), [] (replica::column_family& cf) {
|
||||
auto sstables = cf.get_sstables();
|
||||
return std::accumulate(sstables->begin(), sstables->end(), uint64_t(0), [](uint64_t s, auto& sst) {
|
||||
return s + sst->get_summary().memory_footprint();
|
||||
@@ -729,7 +729,7 @@ void set_column_family(http_context& ctx, routes& r, sharded<db::system_keyspace
|
||||
// We are missing the off heap memory calculation
|
||||
// Return 0 is the wrong value. It's a work around
|
||||
// until the memory calculation will be available
|
||||
//auto id = get_uuid(req->param["name"], ctx.db.local());
|
||||
//auto id = get_uuid(req->get_path_param("name"), ctx.db.local());
|
||||
return make_ready_future<json::json_return_type>(0);
|
||||
});
|
||||
|
||||
@@ -742,7 +742,7 @@ void set_column_family(http_context& ctx, routes& r, sharded<db::system_keyspace
|
||||
cf::get_speculative_retries.set(r, [] (std::unique_ptr<http::request> req) {
|
||||
//TBD
|
||||
unimplemented();
|
||||
//auto id = get_uuid(req->param["name"], ctx.db.local());
|
||||
//auto id = get_uuid(req->get_path_param("name"), ctx.db.local());
|
||||
return make_ready_future<json::json_return_type>(0);
|
||||
});
|
||||
|
||||
@@ -755,7 +755,7 @@ void set_column_family(http_context& ctx, routes& r, sharded<db::system_keyspace
|
||||
cf::get_key_cache_hit_rate.set(r, [] (std::unique_ptr<http::request> req) {
|
||||
//TBD
|
||||
unimplemented();
|
||||
//auto id = get_uuid(req->param["name"], ctx.db.local());
|
||||
//auto id = get_uuid(req->get_path_param("name"), ctx.db.local());
|
||||
return make_ready_future<json::json_return_type>(0);
|
||||
});
|
||||
|
||||
@@ -780,7 +780,7 @@ void set_column_family(http_context& ctx, routes& r, sharded<db::system_keyspace
|
||||
cf::get_row_cache_hit_out_of_range.set(r, [] (std::unique_ptr<http::request> req) {
|
||||
//TBD
|
||||
unimplemented();
|
||||
//auto id = get_uuid(req->param["name"], ctx.db.local());
|
||||
//auto id = get_uuid(req->get_path_param("name"), ctx.db.local());
|
||||
return make_ready_future<json::json_return_type>(0);
|
||||
});
|
||||
|
||||
@@ -791,7 +791,7 @@ void set_column_family(http_context& ctx, routes& r, sharded<db::system_keyspace
|
||||
});
|
||||
|
||||
cf::get_row_cache_hit.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
||||
return map_reduce_cf_raw(ctx, req->param["name"], utils::rate_moving_average(), [](const replica::column_family& cf) {
|
||||
return map_reduce_cf_raw(ctx, req->get_path_param("name"), utils::rate_moving_average(), [](const replica::column_family& cf) {
|
||||
return cf.get_row_cache().stats().hits.rate();
|
||||
}, std::plus<utils::rate_moving_average>()).then([](const utils::rate_moving_average& m) {
|
||||
return make_ready_future<json::json_return_type>(meter_to_json(m));
|
||||
@@ -807,7 +807,7 @@ void set_column_family(http_context& ctx, routes& r, sharded<db::system_keyspace
|
||||
});
|
||||
|
||||
cf::get_row_cache_miss.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
||||
return map_reduce_cf_raw(ctx, req->param["name"], utils::rate_moving_average(), [](const replica::column_family& cf) {
|
||||
return map_reduce_cf_raw(ctx, req->get_path_param("name"), utils::rate_moving_average(), [](const replica::column_family& cf) {
|
||||
return cf.get_row_cache().stats().misses.rate();
|
||||
}, std::plus<utils::rate_moving_average>()).then([](const utils::rate_moving_average& m) {
|
||||
return make_ready_future<json::json_return_type>(meter_to_json(m));
|
||||
@@ -824,57 +824,57 @@ void set_column_family(http_context& ctx, routes& r, sharded<db::system_keyspace
|
||||
});
|
||||
|
||||
cf::get_cas_prepare.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
||||
return map_reduce_cf_time_histogram(ctx, req->param["name"], [](const replica::column_family& cf) {
|
||||
return map_reduce_cf_time_histogram(ctx, req->get_path_param("name"), [](const replica::column_family& cf) {
|
||||
return cf.get_stats().cas_prepare.histogram();
|
||||
});
|
||||
});
|
||||
|
||||
cf::get_cas_propose.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
||||
return map_reduce_cf_time_histogram(ctx, req->param["name"], [](const replica::column_family& cf) {
|
||||
return map_reduce_cf_time_histogram(ctx, req->get_path_param("name"), [](const replica::column_family& cf) {
|
||||
return cf.get_stats().cas_accept.histogram();
|
||||
});
|
||||
});
|
||||
|
||||
cf::get_cas_commit.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
||||
return map_reduce_cf_time_histogram(ctx, req->param["name"], [](const replica::column_family& cf) {
|
||||
return map_reduce_cf_time_histogram(ctx, req->get_path_param("name"), [](const replica::column_family& cf) {
|
||||
return cf.get_stats().cas_learn.histogram();
|
||||
});
|
||||
});
|
||||
|
||||
cf::get_sstables_per_read_histogram.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
||||
return map_reduce_cf(ctx, req->param["name"], utils::estimated_histogram(0), [](replica::column_family& cf) {
|
||||
return map_reduce_cf(ctx, req->get_path_param("name"), utils::estimated_histogram(0), [](replica::column_family& cf) {
|
||||
return cf.get_stats().estimated_sstable_per_read;
|
||||
},
|
||||
utils::estimated_histogram_merge, utils_json::estimated_histogram());
|
||||
});
|
||||
|
||||
cf::get_tombstone_scanned_histogram.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
||||
return get_cf_histogram(ctx, req->param["name"], &replica::column_family_stats::tombstone_scanned);
|
||||
return get_cf_histogram(ctx, req->get_path_param("name"), &replica::column_family_stats::tombstone_scanned);
|
||||
});
|
||||
|
||||
cf::get_live_scanned_histogram.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
||||
return get_cf_histogram(ctx, req->param["name"], &replica::column_family_stats::live_scanned);
|
||||
return get_cf_histogram(ctx, req->get_path_param("name"), &replica::column_family_stats::live_scanned);
|
||||
});
|
||||
|
||||
cf::get_col_update_time_delta_histogram.set(r, [] (std::unique_ptr<http::request> req) {
|
||||
//TBD
|
||||
unimplemented();
|
||||
//auto id = get_uuid(req->param["name"], ctx.db.local());
|
||||
//auto id = get_uuid(req->get_path_param("name"), ctx.db.local());
|
||||
std::vector<double> res;
|
||||
return make_ready_future<json::json_return_type>(res);
|
||||
});
|
||||
|
||||
cf::get_auto_compaction.set(r, [&ctx] (const_req req) {
|
||||
auto uuid = get_uuid(req.param["name"], ctx.db.local());
|
||||
auto uuid = get_uuid(req.get_path_param("name"), ctx.db.local());
|
||||
replica::column_family& cf = ctx.db.local().find_column_family(uuid);
|
||||
return !cf.is_auto_compaction_disabled_by_user();
|
||||
});
|
||||
|
||||
cf::enable_auto_compaction.set(r, [&ctx](std::unique_ptr<http::request> req) {
|
||||
apilog.info("column_family/enable_auto_compaction: name={}", req->param["name"]);
|
||||
apilog.info("column_family/enable_auto_compaction: name={}", req->get_path_param("name"));
|
||||
return ctx.db.invoke_on(0, [&ctx, req = std::move(req)] (replica::database& db) {
|
||||
auto g = replica::database::autocompaction_toggle_guard(db);
|
||||
return foreach_column_family(ctx, req->param["name"], [](replica::column_family &cf) {
|
||||
return foreach_column_family(ctx, req->get_path_param("name"), [](replica::column_family &cf) {
|
||||
cf.enable_auto_compaction();
|
||||
}).then([g = std::move(g)] {
|
||||
return make_ready_future<json::json_return_type>(json_void());
|
||||
@@ -883,10 +883,10 @@ void set_column_family(http_context& ctx, routes& r, sharded<db::system_keyspace
|
||||
});
|
||||
|
||||
cf::disable_auto_compaction.set(r, [&ctx](std::unique_ptr<http::request> req) {
|
||||
apilog.info("column_family/disable_auto_compaction: name={}", req->param["name"]);
|
||||
apilog.info("column_family/disable_auto_compaction: name={}", req->get_path_param("name"));
|
||||
return ctx.db.invoke_on(0, [&ctx, req = std::move(req)] (replica::database& db) {
|
||||
auto g = replica::database::autocompaction_toggle_guard(db);
|
||||
return foreach_column_family(ctx, req->param["name"], [](replica::column_family &cf) {
|
||||
return foreach_column_family(ctx, req->get_path_param("name"), [](replica::column_family &cf) {
|
||||
return cf.disable_auto_compaction();
|
||||
}).then([g = std::move(g)] {
|
||||
return make_ready_future<json::json_return_type>(json_void());
|
||||
@@ -895,14 +895,14 @@ void set_column_family(http_context& ctx, routes& r, sharded<db::system_keyspace
|
||||
});
|
||||
|
||||
cf::get_tombstone_gc.set(r, [&ctx] (const_req req) {
|
||||
auto uuid = get_uuid(req.param["name"], ctx.db.local());
|
||||
auto uuid = get_uuid(req.get_path_param("name"), ctx.db.local());
|
||||
replica::table& t = ctx.db.local().find_column_family(uuid);
|
||||
return t.tombstone_gc_enabled();
|
||||
});
|
||||
|
||||
cf::enable_tombstone_gc.set(r, [&ctx](std::unique_ptr<http::request> req) {
|
||||
apilog.info("column_family/enable_tombstone_gc: name={}", req->param["name"]);
|
||||
return foreach_column_family(ctx, req->param["name"], [](replica::table& t) {
|
||||
apilog.info("column_family/enable_tombstone_gc: name={}", req->get_path_param("name"));
|
||||
return foreach_column_family(ctx, req->get_path_param("name"), [](replica::table& t) {
|
||||
t.set_tombstone_gc_enabled(true);
|
||||
}).then([] {
|
||||
return make_ready_future<json::json_return_type>(json_void());
|
||||
@@ -910,8 +910,8 @@ void set_column_family(http_context& ctx, routes& r, sharded<db::system_keyspace
|
||||
});
|
||||
|
||||
cf::disable_tombstone_gc.set(r, [&ctx](std::unique_ptr<http::request> req) {
|
||||
apilog.info("column_family/disable_tombstone_gc: name={}", req->param["name"]);
|
||||
return foreach_column_family(ctx, req->param["name"], [](replica::table& t) {
|
||||
apilog.info("column_family/disable_tombstone_gc: name={}", req->get_path_param("name"));
|
||||
return foreach_column_family(ctx, req->get_path_param("name"), [](replica::table& t) {
|
||||
t.set_tombstone_gc_enabled(false);
|
||||
}).then([] {
|
||||
return make_ready_future<json::json_return_type>(json_void());
|
||||
@@ -919,7 +919,7 @@ void set_column_family(http_context& ctx, routes& r, sharded<db::system_keyspace
|
||||
});
|
||||
|
||||
cf::get_built_indexes.set(r, [&ctx, &sys_ks](std::unique_ptr<http::request> req) {
|
||||
auto ks_cf = parse_fully_qualified_cf_name(req->param["name"]);
|
||||
auto ks_cf = parse_fully_qualified_cf_name(req->get_path_param("name"));
|
||||
auto&& ks = std::get<0>(ks_cf);
|
||||
auto&& cf_name = std::get<1>(ks_cf);
|
||||
return sys_ks.local().load_view_build_progress().then([ks, cf_name, &ctx](const std::vector<db::system_keyspace_view_build_progress>& vb) mutable {
|
||||
@@ -957,7 +957,7 @@ void set_column_family(http_context& ctx, routes& r, sharded<db::system_keyspace
|
||||
});
|
||||
|
||||
cf::get_compression_ratio.set(r, [&ctx](std::unique_ptr<http::request> req) {
|
||||
auto uuid = get_uuid(req->param["name"], ctx.db.local());
|
||||
auto uuid = get_uuid(req->get_path_param("name"), ctx.db.local());
|
||||
|
||||
return ctx.db.map_reduce(sum_ratio<double>(), [uuid](replica::database& db) {
|
||||
replica::column_family& cf = db.find_column_family(uuid);
|
||||
@@ -968,21 +968,21 @@ void set_column_family(http_context& ctx, routes& r, sharded<db::system_keyspace
|
||||
});
|
||||
|
||||
cf::get_read_latency_estimated_histogram.set(r, [&ctx](std::unique_ptr<http::request> req) {
|
||||
return map_reduce_cf_time_histogram(ctx, req->param["name"], [](const replica::column_family& cf) {
|
||||
return map_reduce_cf_time_histogram(ctx, req->get_path_param("name"), [](const replica::column_family& cf) {
|
||||
return cf.get_stats().reads.histogram();
|
||||
});
|
||||
});
|
||||
|
||||
cf::get_write_latency_estimated_histogram.set(r, [&ctx](std::unique_ptr<http::request> req) {
|
||||
return map_reduce_cf_time_histogram(ctx, req->param["name"], [](const replica::column_family& cf) {
|
||||
return map_reduce_cf_time_histogram(ctx, req->get_path_param("name"), [](const replica::column_family& cf) {
|
||||
return cf.get_stats().writes.histogram();
|
||||
});
|
||||
});
|
||||
|
||||
cf::set_compaction_strategy_class.set(r, [&ctx](std::unique_ptr<http::request> req) {
|
||||
sstring strategy = req->get_query_param("class_name");
|
||||
apilog.info("column_family/set_compaction_strategy_class: name={} strategy={}", req->param["name"], strategy);
|
||||
return foreach_column_family(ctx, req->param["name"], [strategy](replica::column_family& cf) {
|
||||
apilog.info("column_family/set_compaction_strategy_class: name={} strategy={}", req->get_path_param("name"), strategy);
|
||||
return foreach_column_family(ctx, req->get_path_param("name"), [strategy](replica::column_family& cf) {
|
||||
cf.set_compaction_strategy(sstables::compaction_strategy::type(strategy));
|
||||
}).then([] {
|
||||
return make_ready_future<json::json_return_type>(json_void());
|
||||
@@ -990,7 +990,7 @@ void set_column_family(http_context& ctx, routes& r, sharded<db::system_keyspace
|
||||
});
|
||||
|
||||
cf::get_compaction_strategy_class.set(r, [&ctx](const_req req) {
|
||||
return ctx.db.local().find_column_family(get_uuid(req.param["name"], ctx.db.local())).get_compaction_strategy().name();
|
||||
return ctx.db.local().find_column_family(get_uuid(req.get_path_param("name"), ctx.db.local())).get_compaction_strategy().name();
|
||||
});
|
||||
|
||||
cf::set_compression_parameters.set(r, [](std::unique_ptr<http::request> req) {
|
||||
@@ -1006,7 +1006,7 @@ void set_column_family(http_context& ctx, routes& r, sharded<db::system_keyspace
|
||||
});
|
||||
|
||||
cf::get_sstable_count_per_level.set(r, [&ctx](std::unique_ptr<http::request> req) {
|
||||
return map_reduce_cf_raw(ctx, req->param["name"], std::vector<uint64_t>(), [](const replica::column_family& cf) {
|
||||
return map_reduce_cf_raw(ctx, req->get_path_param("name"), std::vector<uint64_t>(), [](const replica::column_family& cf) {
|
||||
return cf.sstable_count_per_level();
|
||||
}, concat_sstable_count_per_level).then([](const std::vector<uint64_t>& res) {
|
||||
return make_ready_future<json::json_return_type>(res);
|
||||
@@ -1015,7 +1015,7 @@ void set_column_family(http_context& ctx, routes& r, sharded<db::system_keyspace
|
||||
|
||||
cf::get_sstables_for_key.set(r, [&ctx](std::unique_ptr<http::request> req) {
|
||||
auto key = req->get_query_param("key");
|
||||
auto uuid = get_uuid(req->param["name"], ctx.db.local());
|
||||
auto uuid = get_uuid(req->get_path_param("name"), ctx.db.local());
|
||||
|
||||
return ctx.db.map_reduce0([key, uuid] (replica::database& db) -> future<std::unordered_set<sstring>> {
|
||||
auto sstables = co_await db.find_column_family(uuid).get_sstables_by_partition_key(key);
|
||||
@@ -1031,7 +1031,7 @@ void set_column_family(http_context& ctx, routes& r, sharded<db::system_keyspace
|
||||
|
||||
|
||||
cf::toppartitions.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
||||
auto name = req->param["name"];
|
||||
auto name = req->get_path_param("name");
|
||||
auto [ks, cf] = parse_fully_qualified_cf_name(name);
|
||||
|
||||
api::req_param<std::chrono::milliseconds, unsigned> duration{*req, "duration", 1000ms};
|
||||
@@ -1058,7 +1058,7 @@ void set_column_family(http_context& ctx, routes& r, sharded<db::system_keyspace
|
||||
}
|
||||
auto [ks, cf] = parse_fully_qualified_cf_name(*params.get("name"));
|
||||
auto flush = params.get_as<bool>("flush_memtables").value_or(true);
|
||||
apilog.info("column_family/force_major_compaction: name={} flush={}", req->param["name"], flush);
|
||||
apilog.info("column_family/force_major_compaction: name={} flush={}", req->get_path_param("name"), flush);
|
||||
|
||||
auto keyspace = validate_keyspace(ctx, ks);
|
||||
std::vector<table_info> table_infos = {table_info{
|
||||
|
||||
@@ -7,6 +7,7 @@
|
||||
*/
|
||||
|
||||
#include <seastar/core/coroutine.hh>
|
||||
#include <seastar/coroutine/exception.hh>
|
||||
|
||||
#include "compaction_manager.hh"
|
||||
#include "compaction/compaction_manager.hh"
|
||||
@@ -109,7 +110,7 @@ void set_compaction_manager(http_context& ctx, routes& r) {
|
||||
});
|
||||
|
||||
cm::stop_keyspace_compaction.set(r, [&ctx] (std::unique_ptr<http::request> req) -> future<json::json_return_type> {
|
||||
auto ks_name = validate_keyspace(ctx, req->param);
|
||||
auto ks_name = validate_keyspace(ctx, req);
|
||||
auto table_names = parse_tables(ks_name, ctx, req->query_parameters, "tables");
|
||||
if (table_names.empty()) {
|
||||
table_names = map_keys(ctx.db.local().find_keyspace(ks_name).metadata().get()->cf_meta_data());
|
||||
@@ -152,10 +153,13 @@ void set_compaction_manager(http_context& ctx, routes& r) {
|
||||
});
|
||||
|
||||
cm::get_compaction_history.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
||||
std::function<future<>(output_stream<char>&&)> f = [&ctx](output_stream<char>&& s) {
|
||||
return do_with(output_stream<char>(std::move(s)), true, [&ctx] (output_stream<char>& s, bool& first){
|
||||
return s.write("[").then([&ctx, &s, &first] {
|
||||
return ctx.db.local().get_compaction_manager().get_compaction_history([&s, &first](const db::compaction_history_entry& entry) mutable {
|
||||
std::function<future<>(output_stream<char>&&)> f = [&ctx] (output_stream<char>&& out) -> future<> {
|
||||
auto s = std::move(out);
|
||||
bool first = true;
|
||||
std::exception_ptr ex;
|
||||
try {
|
||||
co_await s.write("[");
|
||||
co_await ctx.db.local().get_compaction_manager().get_compaction_history([&s, &first](const db::compaction_history_entry& entry) mutable -> future<> {
|
||||
cm::history h;
|
||||
h.id = entry.id.to_sstring();
|
||||
h.ks = std::move(entry.ks);
|
||||
@@ -169,18 +173,21 @@ void set_compaction_manager(http_context& ctx, routes& r) {
|
||||
e.value = it.second;
|
||||
h.rows_merged.push(std::move(e));
|
||||
}
|
||||
auto fut = first ? make_ready_future<>() : s.write(", ");
|
||||
if (!first) {
|
||||
co_await s.write(", ");
|
||||
}
|
||||
first = false;
|
||||
return fut.then([&s, h = std::move(h)] {
|
||||
return formatter::write(s, h);
|
||||
});
|
||||
}).then([&s] {
|
||||
return s.write("]").then([&s] {
|
||||
return s.close();
|
||||
});
|
||||
co_await formatter::write(s, h);
|
||||
});
|
||||
});
|
||||
});
|
||||
co_await s.write("]");
|
||||
co_await s.flush();
|
||||
} catch (...) {
|
||||
ex = std::current_exception();
|
||||
}
|
||||
co_await s.close();
|
||||
if (ex) {
|
||||
co_await coroutine::return_exception_ptr(std::move(ex));
|
||||
}
|
||||
};
|
||||
return make_ready_future<json::json_return_type>(std::move(f));
|
||||
});
|
||||
|
||||
@@ -91,7 +91,7 @@ void set_config(std::shared_ptr < api_registry_builder20 > rb, http_context& ctx
|
||||
});
|
||||
|
||||
cs::find_config_id.set(r, [&cfg] (const_req r) {
|
||||
auto id = r.param["id"];
|
||||
auto id = r.get_path_param("id");
|
||||
for (auto&& cfg_ref : cfg.values()) {
|
||||
auto&& cfg = cfg_ref.get();
|
||||
if (id == cfg.name()) {
|
||||
|
||||
@@ -24,7 +24,7 @@ namespace hf = httpd::error_injection_json;
|
||||
void set_error_injection(http_context& ctx, routes& r) {
|
||||
|
||||
hf::enable_injection.set(r, [](std::unique_ptr<request> req) {
|
||||
sstring injection = req->param["injection"];
|
||||
sstring injection = req->get_path_param("injection");
|
||||
bool one_shot = req->get_query_param("one_shot") == "True";
|
||||
auto params = req->content;
|
||||
|
||||
@@ -56,7 +56,7 @@ void set_error_injection(http_context& ctx, routes& r) {
|
||||
});
|
||||
|
||||
hf::disable_injection.set(r, [](std::unique_ptr<request> req) {
|
||||
sstring injection = req->param["injection"];
|
||||
sstring injection = req->get_path_param("injection");
|
||||
|
||||
auto& errinj = utils::get_local_injector();
|
||||
return errinj.disable_on_all(injection).then([] {
|
||||
@@ -72,7 +72,7 @@ void set_error_injection(http_context& ctx, routes& r) {
|
||||
});
|
||||
|
||||
hf::message_injection.set(r, [](std::unique_ptr<request> req) {
|
||||
sstring injection = req->param["injection"];
|
||||
sstring injection = req->get_path_param("injection");
|
||||
auto& errinj = utils::get_local_injector();
|
||||
return errinj.receive_message_on_all(injection).then([] {
|
||||
return make_ready_future<json::json_return_type>(json::json_void());
|
||||
|
||||
@@ -80,9 +80,9 @@ void set_failure_detector(http_context& ctx, routes& r, gms::gossiper& g) {
|
||||
|
||||
fd::get_endpoint_state.set(r, [&g] (std::unique_ptr<request> req) {
|
||||
return g.container().invoke_on(0, [req = std::move(req)] (gms::gossiper& g) {
|
||||
auto state = g.get_endpoint_state_ptr(gms::inet_address(req->param["addr"]));
|
||||
auto state = g.get_endpoint_state_ptr(gms::inet_address(req->get_path_param("addr")));
|
||||
if (!state) {
|
||||
return make_ready_future<json::json_return_type>(format("unknown endpoint {}", req->param["addr"]));
|
||||
return make_ready_future<json::json_return_type>(format("unknown endpoint {}", req->get_path_param("addr")));
|
||||
}
|
||||
std::stringstream ss;
|
||||
g.append_endpoint_state(ss, *state);
|
||||
|
||||
@@ -31,21 +31,21 @@ void set_gossiper(http_context& ctx, routes& r, gms::gossiper& g) {
|
||||
});
|
||||
|
||||
httpd::gossiper_json::get_endpoint_downtime.set(r, [&g] (std::unique_ptr<request> req) -> future<json::json_return_type> {
|
||||
gms::inet_address ep(req->param["addr"]);
|
||||
gms::inet_address ep(req->get_path_param("addr"));
|
||||
// synchronize unreachable_members on all shards
|
||||
co_await g.get_unreachable_members_synchronized();
|
||||
co_return g.get_endpoint_downtime(ep);
|
||||
});
|
||||
|
||||
httpd::gossiper_json::get_current_generation_number.set(r, [&g] (std::unique_ptr<http::request> req) {
|
||||
gms::inet_address ep(req->param["addr"]);
|
||||
gms::inet_address ep(req->get_path_param("addr"));
|
||||
return g.get_current_generation_number(ep).then([] (gms::generation_type res) {
|
||||
return make_ready_future<json::json_return_type>(res.value());
|
||||
});
|
||||
});
|
||||
|
||||
httpd::gossiper_json::get_current_heart_beat_version.set(r, [&g] (std::unique_ptr<http::request> req) {
|
||||
gms::inet_address ep(req->param["addr"]);
|
||||
gms::inet_address ep(req->get_path_param("addr"));
|
||||
return g.get_current_heart_beat_version(ep).then([] (gms::version_type res) {
|
||||
return make_ready_future<json::json_return_type>(res.value());
|
||||
});
|
||||
@@ -53,17 +53,17 @@ void set_gossiper(http_context& ctx, routes& r, gms::gossiper& g) {
|
||||
|
||||
httpd::gossiper_json::assassinate_endpoint.set(r, [&g](std::unique_ptr<http::request> req) {
|
||||
if (req->get_query_param("unsafe") != "True") {
|
||||
return g.assassinate_endpoint(req->param["addr"]).then([] {
|
||||
return g.assassinate_endpoint(req->get_path_param("addr")).then([] {
|
||||
return make_ready_future<json::json_return_type>(json_void());
|
||||
});
|
||||
}
|
||||
return g.unsafe_assassinate_endpoint(req->param["addr"]).then([] {
|
||||
return g.unsafe_assassinate_endpoint(req->get_path_param("addr")).then([] {
|
||||
return make_ready_future<json::json_return_type>(json_void());
|
||||
});
|
||||
});
|
||||
|
||||
httpd::gossiper_json::force_remove_endpoint.set(r, [&g](std::unique_ptr<http::request> req) {
|
||||
gms::inet_address ep(req->param["addr"]);
|
||||
gms::inet_address ep(req->get_path_param("addr"));
|
||||
return g.force_remove_endpoint(ep, gms::null_permit_id).then([] {
|
||||
return make_ready_future<json::json_return_type>(json_void());
|
||||
});
|
||||
|
||||
@@ -24,7 +24,7 @@ using namespace json;
|
||||
|
||||
void set_raft(http_context&, httpd::routes& r, sharded<service::raft_group_registry>& raft_gr) {
|
||||
r::trigger_snapshot.set(r, [&raft_gr] (std::unique_ptr<http::request> req) -> future<json_return_type> {
|
||||
raft::group_id gid{utils::UUID{req->param["group_id"]}};
|
||||
raft::group_id gid{utils::UUID{req->get_path_param("group_id")}};
|
||||
auto timeout_dur = std::invoke([timeout_str = req->get_query_param("timeout")] {
|
||||
if (timeout_str.empty()) {
|
||||
return std::chrono::seconds{60};
|
||||
|
||||
@@ -58,15 +58,19 @@ namespace ss = httpd::storage_service_json;
|
||||
namespace sp = httpd::storage_proxy_json;
|
||||
using namespace json;
|
||||
|
||||
sstring validate_keyspace(http_context& ctx, sstring ks_name) {
|
||||
sstring validate_keyspace(const http_context& ctx, sstring ks_name) {
|
||||
if (ctx.db.local().has_keyspace(ks_name)) {
|
||||
return ks_name;
|
||||
}
|
||||
throw bad_param_exception(replica::no_such_keyspace(ks_name).what());
|
||||
}
|
||||
|
||||
sstring validate_keyspace(http_context& ctx, const parameters& param) {
|
||||
return validate_keyspace(ctx, param["keyspace"]);
|
||||
sstring validate_keyspace(const http_context& ctx, const std::unique_ptr<http::request>& req) {
|
||||
return validate_keyspace(ctx, req->get_path_param("keyspace"));
|
||||
}
|
||||
|
||||
sstring validate_keyspace(const http_context& ctx, const http::request& req) {
|
||||
return validate_keyspace(ctx, req.get_path_param("keyspace"));
|
||||
}
|
||||
|
||||
locator::host_id validate_host_id(const sstring& param) {
|
||||
@@ -171,7 +175,7 @@ using ks_cf_func = std::function<future<json::json_return_type>(http_context&, s
|
||||
|
||||
static auto wrap_ks_cf(http_context &ctx, ks_cf_func f) {
|
||||
return [&ctx, f = std::move(f)](std::unique_ptr<http::request> req) {
|
||||
auto keyspace = validate_keyspace(ctx, req->param);
|
||||
auto keyspace = validate_keyspace(ctx, req);
|
||||
auto table_infos = parse_table_infos(keyspace, ctx, req->query_parameters, "cf");
|
||||
return f(ctx, std::move(req), std::move(keyspace), std::move(table_infos));
|
||||
};
|
||||
@@ -338,7 +342,7 @@ void set_repair(http_context& ctx, routes& r, sharded<repair_service>& repair) {
|
||||
// returns immediately, not waiting for the repair to finish. The user
|
||||
// then has other mechanisms to track the ongoing repair's progress,
|
||||
// or stop it.
|
||||
return repair_start(repair, validate_keyspace(ctx, req->param),
|
||||
return repair_start(repair, validate_keyspace(ctx, req),
|
||||
options_map).then([] (int i) {
|
||||
return make_ready_future<json::json_return_type>(i);
|
||||
});
|
||||
@@ -421,7 +425,7 @@ void unset_repair(http_context& ctx, routes& r) {
|
||||
|
||||
void set_sstables_loader(http_context& ctx, routes& r, sharded<sstables_loader>& sst_loader) {
|
||||
ss::load_new_ss_tables.set(r, [&ctx, &sst_loader](std::unique_ptr<http::request> req) {
|
||||
auto ks = validate_keyspace(ctx, req->param);
|
||||
auto ks = validate_keyspace(ctx, req);
|
||||
auto cf = req->get_query_param("cf");
|
||||
auto stream = req->get_query_param("load_and_stream");
|
||||
auto primary_replica = req->get_query_param("primary_replica_only");
|
||||
@@ -452,8 +456,8 @@ void unset_sstables_loader(http_context& ctx, routes& r) {
|
||||
|
||||
void set_view_builder(http_context& ctx, routes& r, sharded<db::view::view_builder>& vb) {
|
||||
ss::view_build_statuses.set(r, [&ctx, &vb] (std::unique_ptr<http::request> req) {
|
||||
auto keyspace = validate_keyspace(ctx, req->param);
|
||||
auto view = req->param["view"];
|
||||
auto keyspace = validate_keyspace(ctx, req);
|
||||
auto view = req->get_path_param("view");
|
||||
return vb.local().view_build_statuses(std::move(keyspace), std::move(view)).then([] (std::unordered_map<sstring, sstring> status) {
|
||||
std::vector<storage_service_json::mapper> res;
|
||||
return make_ready_future<json::json_return_type>(map_to_key_value(std::move(status), res));
|
||||
@@ -590,7 +594,7 @@ void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_
|
||||
});
|
||||
|
||||
ss::get_range_to_endpoint_map.set(r, [&ctx, &ss](std::unique_ptr<http::request> req) -> future<json::json_return_type> {
|
||||
auto keyspace = validate_keyspace(ctx, req->param);
|
||||
auto keyspace = validate_keyspace(ctx, req);
|
||||
std::vector<ss::maplist_mapper> res;
|
||||
co_return stream_range_as_array(co_await ss.local().get_range_to_address_map(keyspace),
|
||||
[](const std::pair<dht::token_range, inet_address_vector_replica_set>& entry){
|
||||
@@ -615,7 +619,7 @@ void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_
|
||||
ss::get_pending_range_to_endpoint_map.set(r, [&ctx](std::unique_ptr<http::request> req) {
|
||||
//TBD
|
||||
unimplemented();
|
||||
auto keyspace = validate_keyspace(ctx, req->param);
|
||||
auto keyspace = validate_keyspace(ctx, req);
|
||||
std::vector<ss::maplist_mapper> res;
|
||||
return make_ready_future<json::json_return_type>(res);
|
||||
});
|
||||
@@ -631,7 +635,7 @@ void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_
|
||||
});
|
||||
|
||||
ss::describe_ring.set(r, [&ctx, &ss](std::unique_ptr<http::request> req) {
|
||||
return describe_ring_as_json(ss, validate_keyspace(ctx, req->param));
|
||||
return describe_ring_as_json(ss, validate_keyspace(ctx, req));
|
||||
});
|
||||
|
||||
ss::get_host_id_map.set(r, [&ss](const_req req) {
|
||||
@@ -664,7 +668,7 @@ void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_
|
||||
});
|
||||
|
||||
ss::get_natural_endpoints.set(r, [&ctx, &ss](const_req req) {
|
||||
auto keyspace = validate_keyspace(ctx, req.param);
|
||||
auto keyspace = validate_keyspace(ctx, req);
|
||||
return container_to_vec(ss.local().get_natural_endpoints(keyspace, req.get_query_param("cf"),
|
||||
req.get_query_param("key")));
|
||||
});
|
||||
@@ -733,7 +737,7 @@ void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_
|
||||
|
||||
ss::force_keyspace_cleanup.set(r, [&ctx, &ss](std::unique_ptr<http::request> req) -> future<json::json_return_type> {
|
||||
auto& db = ctx.db;
|
||||
auto keyspace = validate_keyspace(ctx, req->param);
|
||||
auto keyspace = validate_keyspace(ctx, req);
|
||||
auto table_infos = parse_table_infos(keyspace, ctx, req->query_parameters, "cf");
|
||||
apilog.info("force_keyspace_cleanup: keyspace={} tables={}", keyspace, table_infos);
|
||||
if (!co_await ss.local().is_cleanup_allowed(keyspace)) {
|
||||
@@ -796,7 +800,7 @@ void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_
|
||||
});
|
||||
|
||||
ss::force_keyspace_flush.set(r, [&ctx](std::unique_ptr<http::request> req) -> future<json::json_return_type> {
|
||||
auto keyspace = validate_keyspace(ctx, req->param);
|
||||
auto keyspace = validate_keyspace(ctx, req);
|
||||
auto column_families = parse_tables(keyspace, ctx, req->query_parameters, "cf");
|
||||
apilog.info("perform_keyspace_flush: keyspace={} tables={}", keyspace, column_families);
|
||||
auto& db = ctx.db;
|
||||
@@ -905,7 +909,7 @@ void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_
|
||||
ss::truncate.set(r, [&ctx](std::unique_ptr<http::request> req) {
|
||||
//TBD
|
||||
unimplemented();
|
||||
auto keyspace = validate_keyspace(ctx, req->param);
|
||||
auto keyspace = validate_keyspace(ctx, req);
|
||||
auto column_family = req->get_query_param("cf");
|
||||
return make_ready_future<json::json_return_type>(json_void());
|
||||
});
|
||||
@@ -1039,14 +1043,14 @@ void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_
|
||||
ss::bulk_load.set(r, [](std::unique_ptr<http::request> req) {
|
||||
//TBD
|
||||
unimplemented();
|
||||
auto path = req->param["path"];
|
||||
auto path = req->get_path_param("path");
|
||||
return make_ready_future<json::json_return_type>(json_void());
|
||||
});
|
||||
|
||||
ss::bulk_load_async.set(r, [](std::unique_ptr<http::request> req) {
|
||||
//TBD
|
||||
unimplemented();
|
||||
auto path = req->param["path"];
|
||||
auto path = req->get_path_param("path");
|
||||
return make_ready_future<json::json_return_type>(json_void());
|
||||
});
|
||||
|
||||
@@ -1134,7 +1138,7 @@ void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_
|
||||
});
|
||||
|
||||
ss::enable_auto_compaction.set(r, [&ctx](std::unique_ptr<http::request> req) {
|
||||
auto keyspace = validate_keyspace(ctx, req->param);
|
||||
auto keyspace = validate_keyspace(ctx, req);
|
||||
auto tables = parse_tables(keyspace, ctx, req->query_parameters, "cf");
|
||||
|
||||
apilog.info("enable_auto_compaction: keyspace={} tables={}", keyspace, tables);
|
||||
@@ -1142,7 +1146,7 @@ void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_
|
||||
});
|
||||
|
||||
ss::disable_auto_compaction.set(r, [&ctx](std::unique_ptr<http::request> req) {
|
||||
auto keyspace = validate_keyspace(ctx, req->param);
|
||||
auto keyspace = validate_keyspace(ctx, req);
|
||||
auto tables = parse_tables(keyspace, ctx, req->query_parameters, "cf");
|
||||
|
||||
apilog.info("disable_auto_compaction: keyspace={} tables={}", keyspace, tables);
|
||||
@@ -1150,7 +1154,7 @@ void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_
|
||||
});
|
||||
|
||||
ss::enable_tombstone_gc.set(r, [&ctx](std::unique_ptr<http::request> req) {
|
||||
auto keyspace = validate_keyspace(ctx, req->param);
|
||||
auto keyspace = validate_keyspace(ctx, req);
|
||||
auto tables = parse_tables(keyspace, ctx, req->query_parameters, "cf");
|
||||
|
||||
apilog.info("enable_tombstone_gc: keyspace={} tables={}", keyspace, tables);
|
||||
@@ -1158,7 +1162,7 @@ void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_
|
||||
});
|
||||
|
||||
ss::disable_tombstone_gc.set(r, [&ctx](std::unique_ptr<http::request> req) {
|
||||
auto keyspace = validate_keyspace(ctx, req->param);
|
||||
auto keyspace = validate_keyspace(ctx, req);
|
||||
auto tables = parse_tables(keyspace, ctx, req->query_parameters, "cf");
|
||||
|
||||
apilog.info("disable_tombstone_gc: keyspace={} tables={}", keyspace, tables);
|
||||
@@ -1254,7 +1258,7 @@ void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_
|
||||
});
|
||||
|
||||
ss::get_effective_ownership.set(r, [&ctx, &ss] (std::unique_ptr<http::request> req) {
|
||||
auto keyspace_name = req->param["keyspace"] == "null" ? "" : validate_keyspace(ctx, req->param);
|
||||
auto keyspace_name = req->get_path_param("keyspace") == "null" ? "" : validate_keyspace(ctx, req);
|
||||
return ss.local().effective_ownership(keyspace_name).then([] (auto&& ownership) {
|
||||
std::vector<storage_service_json::mapper> res;
|
||||
return make_ready_future<json::json_return_type>(map_to_key_value(ownership, res));
|
||||
@@ -1542,8 +1546,10 @@ void set_snapshot(http_context& ctx, routes& r, sharded<db::snapshot_ctl>& snap_
|
||||
});
|
||||
}).then([&s] {
|
||||
return s.write("]").then([&s] {
|
||||
return s.close();
|
||||
return s.flush();
|
||||
});
|
||||
}).finally([&s] {
|
||||
return s.close();
|
||||
});
|
||||
});
|
||||
};
|
||||
|
||||
@@ -37,11 +37,11 @@ namespace api {
|
||||
|
||||
// verify that the keyspace is found, otherwise a bad_param_exception exception is thrown
|
||||
// containing the description of the respective keyspace error.
|
||||
sstring validate_keyspace(http_context& ctx, sstring ks_name);
|
||||
sstring validate_keyspace(const http_context& ctx, sstring ks_name);
|
||||
|
||||
// verify that the keyspace parameter is found, otherwise a bad_param_exception exception is thrown
|
||||
// containing the description of the respective keyspace error.
|
||||
sstring validate_keyspace(http_context& ctx, const httpd::parameters& param);
|
||||
sstring validate_keyspace(const http_context& ctx, const std::unique_ptr<http::request>& req);
|
||||
|
||||
// splits a request parameter assumed to hold a comma-separated list of table names
|
||||
// verify that the tables are found, otherwise a bad_param_exception exception is thrown
|
||||
|
||||
@@ -106,7 +106,7 @@ void set_stream_manager(http_context& ctx, routes& r, sharded<streaming::stream_
|
||||
});
|
||||
|
||||
hs::get_total_incoming_bytes.set(r, [&sm](std::unique_ptr<request> req) {
|
||||
gms::inet_address peer(req->param["peer"]);
|
||||
gms::inet_address peer(req->get_path_param("peer"));
|
||||
return sm.map_reduce0([peer](streaming::stream_manager& sm) {
|
||||
return sm.get_progress_on_all_shards(peer).then([] (auto sbytes) {
|
||||
return sbytes.bytes_received;
|
||||
@@ -127,7 +127,7 @@ void set_stream_manager(http_context& ctx, routes& r, sharded<streaming::stream_
|
||||
});
|
||||
|
||||
hs::get_total_outgoing_bytes.set(r, [&sm](std::unique_ptr<request> req) {
|
||||
gms::inet_address peer(req->param["peer"]);
|
||||
gms::inet_address peer(req->get_path_param("peer"));
|
||||
return sm.map_reduce0([peer] (streaming::stream_manager& sm) {
|
||||
return sm.get_progress_on_all_shards(peer).then([] (auto sbytes) {
|
||||
return sbytes.bytes_sent;
|
||||
|
||||
@@ -119,9 +119,9 @@ void set_system(http_context& ctx, routes& r) {
|
||||
|
||||
hs::get_logger_level.set(r, [](const_req req) {
|
||||
try {
|
||||
return logging::level_name(logging::logger_registry().get_logger_level(req.param["name"]));
|
||||
return logging::level_name(logging::logger_registry().get_logger_level(req.get_path_param("name")));
|
||||
} catch (std::out_of_range& e) {
|
||||
throw bad_param_exception("Unknown logger name " + req.param["name"]);
|
||||
throw bad_param_exception("Unknown logger name " + req.get_path_param("name"));
|
||||
}
|
||||
// just to keep the compiler happy
|
||||
return sstring();
|
||||
@@ -130,9 +130,9 @@ void set_system(http_context& ctx, routes& r) {
|
||||
hs::set_logger_level.set(r, [](const_req req) {
|
||||
try {
|
||||
logging::log_level level = boost::lexical_cast<logging::log_level>(std::string(req.get_query_param("level")));
|
||||
logging::logger_registry().set_logger_level(req.param["name"], level);
|
||||
logging::logger_registry().set_logger_level(req.get_path_param("name"), level);
|
||||
} catch (std::out_of_range& e) {
|
||||
throw bad_param_exception("Unknown logger name " + req.param["name"]);
|
||||
throw bad_param_exception("Unknown logger name " + req.get_path_param("name"));
|
||||
} catch (boost::bad_lexical_cast& e) {
|
||||
throw bad_param_exception("Unknown logging level " + req.get_query_param("level"));
|
||||
}
|
||||
|
||||
@@ -7,6 +7,7 @@
|
||||
*/
|
||||
|
||||
#include <seastar/core/coroutine.hh>
|
||||
#include <seastar/coroutine/exception.hh>
|
||||
|
||||
#include "task_manager.hh"
|
||||
#include "api/api-doc/task_manager.json.hh"
|
||||
@@ -124,7 +125,7 @@ void set_task_manager(http_context& ctx, routes& r, db::config& cfg) {
|
||||
chunked_stats local_res;
|
||||
tasks::task_manager::module_ptr module;
|
||||
try {
|
||||
module = tm.find_module(req->param["module"]);
|
||||
module = tm.find_module(req->get_path_param("module"));
|
||||
} catch (...) {
|
||||
throw bad_param_exception(fmt::format("{}", std::current_exception()));
|
||||
}
|
||||
@@ -139,25 +140,34 @@ void set_task_manager(http_context& ctx, routes& r, db::config& cfg) {
|
||||
|
||||
std::function<future<>(output_stream<char>&&)> f = [r = std::move(res)] (output_stream<char>&& os) -> future<> {
|
||||
auto s = std::move(os);
|
||||
auto res = std::move(r);
|
||||
co_await s.write("[");
|
||||
std::string delim = "";
|
||||
for (auto& v: res) {
|
||||
for (auto& stats: v) {
|
||||
co_await s.write(std::exchange(delim, ", "));
|
||||
tm::task_stats ts;
|
||||
ts = stats;
|
||||
co_await formatter::write(s, ts);
|
||||
std::exception_ptr ex;
|
||||
try {
|
||||
auto res = std::move(r);
|
||||
co_await s.write("[");
|
||||
std::string delim = "";
|
||||
for (auto& v: res) {
|
||||
for (auto& stats: v) {
|
||||
co_await s.write(std::exchange(delim, ", "));
|
||||
tm::task_stats ts;
|
||||
ts = stats;
|
||||
co_await formatter::write(s, ts);
|
||||
}
|
||||
}
|
||||
co_await s.write("]");
|
||||
co_await s.flush();
|
||||
} catch (...) {
|
||||
ex = std::current_exception();
|
||||
}
|
||||
co_await s.write("]");
|
||||
co_await s.close();
|
||||
if (ex) {
|
||||
co_await coroutine::return_exception_ptr(std::move(ex));
|
||||
}
|
||||
};
|
||||
co_return std::move(f);
|
||||
});
|
||||
|
||||
tm::get_task_status.set(r, [&ctx] (std::unique_ptr<http::request> req) -> future<json::json_return_type> {
|
||||
auto id = tasks::task_id{utils::UUID{req->param["task_id"]}};
|
||||
auto id = tasks::task_id{utils::UUID{req->get_path_param("task_id")}};
|
||||
tasks::task_manager::foreign_task_ptr task;
|
||||
try {
|
||||
task = co_await tasks::task_manager::invoke_on_task(ctx.tm, id, std::function([] (tasks::task_manager::task_ptr task) -> future<tasks::task_manager::foreign_task_ptr> {
|
||||
@@ -174,7 +184,7 @@ void set_task_manager(http_context& ctx, routes& r, db::config& cfg) {
|
||||
});
|
||||
|
||||
tm::abort_task.set(r, [&ctx] (std::unique_ptr<http::request> req) -> future<json::json_return_type> {
|
||||
auto id = tasks::task_id{utils::UUID{req->param["task_id"]}};
|
||||
auto id = tasks::task_id{utils::UUID{req->get_path_param("task_id")}};
|
||||
try {
|
||||
co_await tasks::task_manager::invoke_on_task(ctx.tm, id, [] (tasks::task_manager::task_ptr task) -> future<> {
|
||||
if (!task->is_abortable()) {
|
||||
@@ -189,7 +199,7 @@ void set_task_manager(http_context& ctx, routes& r, db::config& cfg) {
|
||||
});
|
||||
|
||||
tm::wait_task.set(r, [&ctx] (std::unique_ptr<http::request> req) -> future<json::json_return_type> {
|
||||
auto id = tasks::task_id{utils::UUID{req->param["task_id"]}};
|
||||
auto id = tasks::task_id{utils::UUID{req->get_path_param("task_id")}};
|
||||
tasks::task_manager::foreign_task_ptr task;
|
||||
try {
|
||||
task = co_await tasks::task_manager::invoke_on_task(ctx.tm, id, std::function([] (tasks::task_manager::task_ptr task) {
|
||||
@@ -210,7 +220,7 @@ void set_task_manager(http_context& ctx, routes& r, db::config& cfg) {
|
||||
|
||||
tm::get_task_status_recursively.set(r, [&ctx] (std::unique_ptr<http::request> req) -> future<json::json_return_type> {
|
||||
auto& _ctx = ctx;
|
||||
auto id = tasks::task_id{utils::UUID{req->param["task_id"]}};
|
||||
auto id = tasks::task_id{utils::UUID{req->get_path_param("task_id")}};
|
||||
std::queue<tasks::task_manager::foreign_task_ptr> q;
|
||||
utils::chunked_vector<full_task_status> res;
|
||||
|
||||
|
||||
@@ -83,7 +83,7 @@ void set_task_manager_test(http_context& ctx, routes& r) {
|
||||
});
|
||||
|
||||
tmt::finish_test_task.set(r, [&ctx] (std::unique_ptr<http::request> req) -> future<json::json_return_type> {
|
||||
auto id = tasks::task_id{utils::UUID{req->param["task_id"]}};
|
||||
auto id = tasks::task_id{utils::UUID{req->get_path_param("task_id")}};
|
||||
auto it = req->query_parameters.find("error");
|
||||
bool fail = it != req->query_parameters.end();
|
||||
std::string error = fail ? it->second : "";
|
||||
|
||||
@@ -144,12 +144,21 @@ std::ostream& operator<<(std::ostream& os, compaction_type_options::scrub::quara
|
||||
}
|
||||
|
||||
static api::timestamp_type get_max_purgeable_timestamp(const table_state& table_s, sstable_set::incremental_selector& selector,
|
||||
const std::unordered_set<shared_sstable>& compacting_set, const dht::decorated_key& dk, uint64_t& bloom_filter_checks) {
|
||||
const std::unordered_set<shared_sstable>& compacting_set, const dht::decorated_key& dk, uint64_t& bloom_filter_checks,
|
||||
const api::timestamp_type compacting_max_timestamp) {
|
||||
if (!table_s.tombstone_gc_enabled()) [[unlikely]] {
|
||||
return api::min_timestamp;
|
||||
}
|
||||
|
||||
auto timestamp = table_s.min_memtable_timestamp();
|
||||
auto timestamp = api::max_timestamp;
|
||||
auto memtable_min_timestamp = table_s.min_memtable_timestamp();
|
||||
// Use memtable timestamp if it contains data older than the sstables being compacted,
|
||||
// and if the memtable also contains the key we're calculating max purgeable timestamp for.
|
||||
// First condition helps to not penalize the common scenario where memtable only contains
|
||||
// newer data.
|
||||
if (memtable_min_timestamp <= compacting_max_timestamp && table_s.memtable_has_key(dk)) {
|
||||
timestamp = memtable_min_timestamp;
|
||||
}
|
||||
std::optional<utils::hashed_key> hk;
|
||||
for (auto&& sst : boost::range::join(selector.select(dk).sstables, table_s.compacted_undeleted_sstables())) {
|
||||
if (compacting_set.contains(sst)) {
|
||||
@@ -441,6 +450,7 @@ protected:
|
||||
uint64_t _end_size = 0;
|
||||
// fully expired files, which are skipped, aren't taken into account.
|
||||
uint64_t _compacting_data_file_size = 0;
|
||||
api::timestamp_type _compacting_max_timestamp = api::min_timestamp;
|
||||
uint64_t _estimated_partitions = 0;
|
||||
double _estimated_droppable_tombstone_ratio = 0;
|
||||
uint64_t _bloom_filter_checks = 0;
|
||||
@@ -739,6 +749,7 @@ private:
|
||||
auto gc_before = sst->get_gc_before_for_drop_estimation(gc_clock::now(), _table_s.get_tombstone_gc_state(), _schema);
|
||||
sum_of_estimated_droppable_tombstone_ratio += sst->estimate_droppable_tombstone_ratio(gc_before);
|
||||
_compacting_data_file_size += sst->ondisk_data_size();
|
||||
|
||||
// TODO:
|
||||
// Note that this is not fully correct. Since we might be merging sstables that originated on
|
||||
// another shard (#cpu changed), we might be comparing RP:s with differing shard ids,
|
||||
@@ -747,6 +758,8 @@ private:
|
||||
// this is kind of ok, esp. since we will hopefully not be trying to recover based on
|
||||
// compacted sstables anyway (CL should be clean by then).
|
||||
_rp = std::max(_rp, sst_stats.position);
|
||||
|
||||
_compacting_max_timestamp = std::max(_compacting_max_timestamp, sst->get_stats_metadata().max_timestamp);
|
||||
}
|
||||
log_info("{} {}", report_start_desc(), formatted_msg);
|
||||
if (ssts->size() < _sstables.size()) {
|
||||
@@ -869,7 +882,7 @@ private:
|
||||
};
|
||||
}
|
||||
return [this] (const dht::decorated_key& dk) {
|
||||
return get_max_purgeable_timestamp(_table_s, *_selector, _compacting_for_max_purgeable_func, dk, _bloom_filter_checks);
|
||||
return get_max_purgeable_timestamp(_table_s, *_selector, _compacting_for_max_purgeable_func, dk, _bloom_filter_checks, _compacting_max_timestamp);
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
@@ -1327,13 +1327,20 @@ private:
|
||||
}));
|
||||
};
|
||||
|
||||
auto get_next_job = [&] () -> std::optional<sstables::compaction_descriptor> {
|
||||
auto desc = t.get_compaction_strategy().get_reshaping_job(get_reshape_candidates(), t.schema(), sstables::reshape_mode::strict);
|
||||
return desc.sstables.size() ? std::make_optional(std::move(desc)) : std::nullopt;
|
||||
auto get_next_job = [&] () -> future<std::optional<sstables::compaction_descriptor>> {
|
||||
auto candidates = get_reshape_candidates();
|
||||
if (candidates.empty()) {
|
||||
co_return std::nullopt;
|
||||
}
|
||||
// all sstables added to maintenance set share the same underlying storage.
|
||||
auto& storage = candidates.front()->get_storage();
|
||||
sstables::reshape_config cfg = co_await sstables::make_reshape_config(storage, sstables::reshape_mode::strict);
|
||||
auto desc = t.get_compaction_strategy().get_reshaping_job(get_reshape_candidates(), t.schema(), cfg);
|
||||
co_return desc.sstables.size() ? std::make_optional(std::move(desc)) : std::nullopt;
|
||||
};
|
||||
|
||||
std::exception_ptr err;
|
||||
while (auto desc = get_next_job()) {
|
||||
while (auto desc = co_await get_next_job()) {
|
||||
auto compacting = compacting_sstable_registration(_cm, _cm.get_compaction_state(&t), desc->sstables);
|
||||
auto on_replace = compacting.update_on_sstable_replacement();
|
||||
|
||||
|
||||
@@ -75,7 +75,7 @@ reader_consumer_v2 compaction_strategy_impl::make_interposer_consumer(const muta
|
||||
}
|
||||
|
||||
compaction_descriptor
|
||||
compaction_strategy_impl::get_reshaping_job(std::vector<shared_sstable> input, schema_ptr schema, reshape_mode mode) const {
|
||||
compaction_strategy_impl::get_reshaping_job(std::vector<shared_sstable> input, schema_ptr schema, reshape_config cfg) const {
|
||||
return compaction_descriptor();
|
||||
}
|
||||
|
||||
@@ -700,8 +700,8 @@ compaction_backlog_tracker compaction_strategy::make_backlog_tracker() const {
|
||||
}
|
||||
|
||||
sstables::compaction_descriptor
|
||||
compaction_strategy::get_reshaping_job(std::vector<shared_sstable> input, schema_ptr schema, reshape_mode mode) const {
|
||||
return _compaction_strategy_impl->get_reshaping_job(std::move(input), schema, mode);
|
||||
compaction_strategy::get_reshaping_job(std::vector<shared_sstable> input, schema_ptr schema, reshape_config cfg) const {
|
||||
return _compaction_strategy_impl->get_reshaping_job(std::move(input), schema, cfg);
|
||||
}
|
||||
|
||||
uint64_t compaction_strategy::adjust_partition_estimate(const mutation_source_metadata& ms_meta, uint64_t partition_estimate, schema_ptr schema) const {
|
||||
@@ -739,6 +739,13 @@ compaction_strategy make_compaction_strategy(compaction_strategy_type strategy,
|
||||
return compaction_strategy(std::move(impl));
|
||||
}
|
||||
|
||||
future<reshape_config> make_reshape_config(const sstables::storage& storage, reshape_mode mode) {
|
||||
co_return sstables::reshape_config{
|
||||
.mode = mode,
|
||||
.free_storage_space = co_await storage.free_space() / smp::count,
|
||||
};
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
namespace compaction {
|
||||
|
||||
@@ -31,6 +31,7 @@ class sstable;
|
||||
class sstable_set;
|
||||
struct compaction_descriptor;
|
||||
struct resharding_descriptor;
|
||||
class storage;
|
||||
|
||||
class compaction_strategy {
|
||||
::shared_ptr<compaction_strategy_impl> _compaction_strategy_impl;
|
||||
@@ -122,11 +123,13 @@ public:
|
||||
//
|
||||
// The caller should also pass a maximum number of SSTables which is the maximum amount of
|
||||
// SSTables that can be added into a single job.
|
||||
compaction_descriptor get_reshaping_job(std::vector<shared_sstable> input, schema_ptr schema, reshape_mode mode) const;
|
||||
compaction_descriptor get_reshaping_job(std::vector<shared_sstable> input, schema_ptr schema, reshape_config cfg) const;
|
||||
|
||||
};
|
||||
|
||||
// Creates a compaction_strategy object from one of the strategies available.
|
||||
compaction_strategy make_compaction_strategy(compaction_strategy_type strategy, const std::map<sstring, sstring>& options);
|
||||
|
||||
future<reshape_config> make_reshape_config(const sstables::storage& storage, reshape_mode mode);
|
||||
|
||||
}
|
||||
|
||||
@@ -76,6 +76,6 @@ public:
|
||||
return false;
|
||||
}
|
||||
|
||||
virtual compaction_descriptor get_reshaping_job(std::vector<shared_sstable> input, schema_ptr schema, reshape_mode mode) const;
|
||||
virtual compaction_descriptor get_reshaping_job(std::vector<shared_sstable> input, schema_ptr schema, reshape_config cfg) const;
|
||||
};
|
||||
}
|
||||
|
||||
@@ -8,6 +8,8 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
namespace sstables {
|
||||
|
||||
enum class compaction_strategy_type {
|
||||
@@ -18,4 +20,10 @@ enum class compaction_strategy_type {
|
||||
};
|
||||
|
||||
enum class reshape_mode { strict, relaxed };
|
||||
|
||||
struct reshape_config {
|
||||
reshape_mode mode;
|
||||
const uint64_t free_storage_space;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
@@ -146,7 +146,8 @@ int64_t leveled_compaction_strategy::estimated_pending_compactions(table_state&
|
||||
}
|
||||
|
||||
compaction_descriptor
|
||||
leveled_compaction_strategy::get_reshaping_job(std::vector<shared_sstable> input, schema_ptr schema, reshape_mode mode) const {
|
||||
leveled_compaction_strategy::get_reshaping_job(std::vector<shared_sstable> input, schema_ptr schema, reshape_config cfg) const {
|
||||
auto mode = cfg.mode;
|
||||
std::array<std::vector<shared_sstable>, leveled_manifest::MAX_LEVELS> level_info;
|
||||
|
||||
auto is_disjoint = [schema] (const std::vector<shared_sstable>& sstables, unsigned tolerance) -> std::tuple<bool, unsigned> {
|
||||
@@ -203,7 +204,7 @@ leveled_compaction_strategy::get_reshaping_job(std::vector<shared_sstable> input
|
||||
|
||||
if (level_info[0].size() > offstrategy_threshold) {
|
||||
size_tiered_compaction_strategy stcs(_stcs_options);
|
||||
return stcs.get_reshaping_job(std::move(level_info[0]), schema, mode);
|
||||
return stcs.get_reshaping_job(std::move(level_info[0]), schema, cfg);
|
||||
}
|
||||
|
||||
for (unsigned level = leveled_manifest::MAX_LEVELS - 1; level > 0; --level) {
|
||||
|
||||
@@ -74,7 +74,7 @@ public:
|
||||
|
||||
virtual std::unique_ptr<compaction_backlog_tracker::impl> make_backlog_tracker() const override;
|
||||
|
||||
virtual compaction_descriptor get_reshaping_job(std::vector<shared_sstable> input, schema_ptr schema, reshape_mode mode) const override;
|
||||
virtual compaction_descriptor get_reshaping_job(std::vector<shared_sstable> input, schema_ptr schema, reshape_config cfg) const override;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
@@ -297,8 +297,9 @@ size_tiered_compaction_strategy::most_interesting_bucket(const std::vector<sstab
|
||||
}
|
||||
|
||||
compaction_descriptor
|
||||
size_tiered_compaction_strategy::get_reshaping_job(std::vector<shared_sstable> input, schema_ptr schema, reshape_mode mode) const
|
||||
size_tiered_compaction_strategy::get_reshaping_job(std::vector<shared_sstable> input, schema_ptr schema, reshape_config cfg) const
|
||||
{
|
||||
auto mode = cfg.mode;
|
||||
size_t offstrategy_threshold = std::max(schema->min_compaction_threshold(), 4);
|
||||
size_t max_sstables = std::max(schema->max_compaction_threshold(), int(offstrategy_threshold));
|
||||
|
||||
|
||||
@@ -96,7 +96,7 @@ public:
|
||||
|
||||
virtual std::unique_ptr<compaction_backlog_tracker::impl> make_backlog_tracker() const override;
|
||||
|
||||
virtual compaction_descriptor get_reshaping_job(std::vector<shared_sstable> input, schema_ptr schema, reshape_mode mode) const override;
|
||||
virtual compaction_descriptor get_reshaping_job(std::vector<shared_sstable> input, schema_ptr schema, reshape_config cfg) const override;
|
||||
|
||||
friend class ::size_tiered_backlog_tracker;
|
||||
};
|
||||
|
||||
@@ -48,6 +48,7 @@ public:
|
||||
virtual sstables::shared_sstable make_sstable() const = 0;
|
||||
virtual sstables::sstable_writer_config configure_writer(sstring origin) const = 0;
|
||||
virtual api::timestamp_type min_memtable_timestamp() const = 0;
|
||||
virtual bool memtable_has_key(const dht::decorated_key& key) const = 0;
|
||||
virtual future<> on_compaction_completion(sstables::compaction_completion_desc desc, sstables::offstrategy offstrategy) = 0;
|
||||
virtual bool is_auto_compaction_disabled_by_user() const noexcept = 0;
|
||||
virtual bool tombstone_gc_enabled() const noexcept = 0;
|
||||
|
||||
@@ -555,7 +555,13 @@ future<> shard_reshaping_compaction_task_impl::run() {
|
||||
| boost::adaptors::filtered([&filter = _filter] (const auto& sst) {
|
||||
return filter(sst);
|
||||
}));
|
||||
auto desc = table.get_compaction_strategy().get_reshaping_job(std::move(reshape_candidates), table.schema(), _mode);
|
||||
if (reshape_candidates.empty()) {
|
||||
break;
|
||||
}
|
||||
// all sstables were found in the same sstable_directory instance, so they share the same underlying storage.
|
||||
auto& storage = reshape_candidates.front()->get_storage();
|
||||
auto cfg = co_await sstables::make_reshape_config(storage, _mode);
|
||||
auto desc = table.get_compaction_strategy().get_reshaping_job(std::move(reshape_candidates), table.schema(), cfg);
|
||||
if (desc.sstables.empty()) {
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -223,12 +223,14 @@ reader_consumer_v2 time_window_compaction_strategy::make_interposer_consumer(con
|
||||
}
|
||||
|
||||
compaction_descriptor
|
||||
time_window_compaction_strategy::get_reshaping_job(std::vector<shared_sstable> input, schema_ptr schema, reshape_mode mode) const {
|
||||
time_window_compaction_strategy::get_reshaping_job(std::vector<shared_sstable> input, schema_ptr schema, reshape_config cfg) const {
|
||||
auto mode = cfg.mode;
|
||||
std::vector<shared_sstable> single_window;
|
||||
std::vector<shared_sstable> multi_window;
|
||||
|
||||
size_t offstrategy_threshold = std::max(schema->min_compaction_threshold(), 4);
|
||||
size_t max_sstables = std::max(schema->max_compaction_threshold(), int(offstrategy_threshold));
|
||||
const uint64_t target_job_size = cfg.free_storage_space * reshape_target_space_overhead;
|
||||
|
||||
if (mode == reshape_mode::relaxed) {
|
||||
offstrategy_threshold = max_sstables;
|
||||
@@ -260,22 +262,40 @@ time_window_compaction_strategy::get_reshaping_job(std::vector<shared_sstable> i
|
||||
multi_window.size(), !multi_window.empty() && sstable_set_overlapping_count(schema, multi_window) == 0,
|
||||
single_window.size(), !single_window.empty() && sstable_set_overlapping_count(schema, single_window) == 0);
|
||||
|
||||
auto need_trimming = [max_sstables, schema, &is_disjoint] (const std::vector<shared_sstable>& ssts) {
|
||||
// All sstables can be compacted at once if they're disjoint, given that partitioned set
|
||||
// will incrementally open sstables which translates into bounded memory usage.
|
||||
return ssts.size() > max_sstables && !is_disjoint(ssts);
|
||||
auto get_job_size = [] (const std::vector<shared_sstable>& ssts) {
|
||||
return boost::accumulate(ssts | boost::adaptors::transformed(std::mem_fn(&sstable::bytes_on_disk)), uint64_t(0));
|
||||
};
|
||||
|
||||
// Targets a space overhead of 10%. All disjoint sstables can be compacted together as long as they won't
|
||||
// cause an overhead above target. Otherwise, the job targets a maximum of #max_threshold sstables.
|
||||
auto need_trimming = [&] (const std::vector<shared_sstable>& ssts, const uint64_t job_size, bool is_disjoint) {
|
||||
const size_t min_sstables = 2;
|
||||
auto is_above_target_size = job_size > target_job_size;
|
||||
|
||||
return (ssts.size() > max_sstables && !is_disjoint) ||
|
||||
(ssts.size() > min_sstables && is_above_target_size);
|
||||
};
|
||||
|
||||
auto maybe_trim_job = [&need_trimming] (std::vector<shared_sstable>& ssts, uint64_t job_size, bool is_disjoint) {
|
||||
while (need_trimming(ssts, job_size, is_disjoint)) {
|
||||
auto sst = ssts.back();
|
||||
ssts.pop_back();
|
||||
job_size -= sst->bytes_on_disk();
|
||||
}
|
||||
};
|
||||
|
||||
if (!multi_window.empty()) {
|
||||
auto disjoint = is_disjoint(multi_window);
|
||||
auto job_size = get_job_size(multi_window);
|
||||
// Everything that spans multiple windows will need reshaping
|
||||
if (need_trimming(multi_window)) {
|
||||
if (need_trimming(multi_window, job_size, disjoint)) {
|
||||
// When trimming, let's keep sstables with overlapping time window, so as to reduce write amplification.
|
||||
// For example, if there are N sstables spanning window W, where N <= 32, then we can produce all data for W
|
||||
// in a single compaction round, removing the need to later compact W to reduce its number of files.
|
||||
boost::partial_sort(multi_window, multi_window.begin() + max_sstables, [](const shared_sstable &a, const shared_sstable &b) {
|
||||
return a->get_stats_metadata().max_timestamp < b->get_stats_metadata().max_timestamp;
|
||||
});
|
||||
multi_window.resize(max_sstables);
|
||||
maybe_trim_job(multi_window, job_size, disjoint);
|
||||
}
|
||||
compaction_descriptor desc(std::move(multi_window));
|
||||
desc.options = compaction_type_options::make_reshape();
|
||||
@@ -294,15 +314,17 @@ time_window_compaction_strategy::get_reshaping_job(std::vector<shared_sstable> i
|
||||
std::copy(ssts.begin(), ssts.end(), std::back_inserter(single_window));
|
||||
continue;
|
||||
}
|
||||
|
||||
// reuse STCS reshape logic which will only compact similar-sized files, to increase overall efficiency
|
||||
// when reshaping time buckets containing a huge amount of files
|
||||
auto desc = size_tiered_compaction_strategy(_stcs_options).get_reshaping_job(std::move(ssts), schema, mode);
|
||||
auto desc = size_tiered_compaction_strategy(_stcs_options).get_reshaping_job(std::move(ssts), schema, cfg);
|
||||
if (!desc.sstables.empty()) {
|
||||
return desc;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!single_window.empty()) {
|
||||
maybe_trim_job(single_window, get_job_size(single_window), all_disjoint);
|
||||
compaction_descriptor desc(std::move(single_window));
|
||||
desc.options = compaction_type_options::make_reshape();
|
||||
return desc;
|
||||
|
||||
@@ -78,6 +78,7 @@ public:
|
||||
// To prevent an explosion in the number of sstables we cap it.
|
||||
// Better co-locate some windows into the same sstables than OOM.
|
||||
static constexpr uint64_t max_data_segregation_window_count = 100;
|
||||
static constexpr float reshape_target_space_overhead = 0.1f;
|
||||
|
||||
using bucket_t = std::vector<shared_sstable>;
|
||||
enum class bucket_compaction_mode { none, size_tiered, major };
|
||||
@@ -170,7 +171,7 @@ public:
|
||||
return true;
|
||||
}
|
||||
|
||||
virtual compaction_descriptor get_reshaping_job(std::vector<shared_sstable> input, schema_ptr schema, reshape_mode mode) const override;
|
||||
virtual compaction_descriptor get_reshaping_job(std::vector<shared_sstable> input, schema_ptr schema, reshape_config cfg) const override;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
@@ -815,7 +815,7 @@ bool query_processor::has_more_results(cql3::internal_query_state& state) const
|
||||
|
||||
future<> query_processor::for_each_cql_result(
|
||||
cql3::internal_query_state& state,
|
||||
noncopyable_function<future<stop_iteration>(const cql3::untyped_result_set::row&)>&& f) {
|
||||
noncopyable_function<future<stop_iteration>(const cql3::untyped_result_set::row&)> f) {
|
||||
do {
|
||||
auto msg = co_await execute_paged_internal(state);
|
||||
for (auto& row : *msg) {
|
||||
@@ -1116,14 +1116,14 @@ future<> query_processor::query_internal(
|
||||
db::consistency_level cl,
|
||||
const std::initializer_list<data_value>& values,
|
||||
int32_t page_size,
|
||||
noncopyable_function<future<stop_iteration>(const cql3::untyped_result_set_row&)>&& f) {
|
||||
noncopyable_function<future<stop_iteration>(const cql3::untyped_result_set_row&)> f) {
|
||||
auto query_state = create_paged_state(query_string, cl, values, page_size);
|
||||
co_return co_await for_each_cql_result(query_state, std::move(f));
|
||||
}
|
||||
|
||||
future<> query_processor::query_internal(
|
||||
const sstring& query_string,
|
||||
noncopyable_function<future<stop_iteration>(const cql3::untyped_result_set_row&)>&& f) {
|
||||
noncopyable_function<future<stop_iteration>(const cql3::untyped_result_set_row&)> f) {
|
||||
return query_internal(query_string, db::consistency_level::ONE, {}, 1000, std::move(f));
|
||||
}
|
||||
|
||||
|
||||
@@ -307,7 +307,7 @@ public:
|
||||
db::consistency_level cl,
|
||||
const std::initializer_list<data_value>& values,
|
||||
int32_t page_size,
|
||||
noncopyable_function<future<stop_iteration>(const cql3::untyped_result_set_row&)>&& f);
|
||||
noncopyable_function<future<stop_iteration>(const cql3::untyped_result_set_row&)> f);
|
||||
|
||||
/*
|
||||
* \brief iterate over all cql results using paging
|
||||
@@ -322,7 +322,7 @@ public:
|
||||
*/
|
||||
future<> query_internal(
|
||||
const sstring& query_string,
|
||||
noncopyable_function<future<stop_iteration>(const cql3::untyped_result_set_row&)>&& f);
|
||||
noncopyable_function<future<stop_iteration>(const cql3::untyped_result_set_row&)> f);
|
||||
|
||||
class cache_internal_tag;
|
||||
using cache_internal = bool_class<cache_internal_tag>;
|
||||
@@ -479,7 +479,7 @@ private:
|
||||
*/
|
||||
future<> for_each_cql_result(
|
||||
cql3::internal_query_state& state,
|
||||
noncopyable_function<future<stop_iteration>(const cql3::untyped_result_set_row&)>&& f);
|
||||
noncopyable_function<future<stop_iteration>(const cql3::untyped_result_set_row&)> f);
|
||||
|
||||
/*!
|
||||
* \brief check, based on the state if there are additional results
|
||||
|
||||
@@ -2004,7 +2004,10 @@ std::unique_ptr<prepared_statement> select_statement::prepare(data_dictionary::d
|
||||
)
|
||||
&& !restrictions->need_filtering() // No filtering
|
||||
&& group_by_cell_indices->empty() // No GROUP BY
|
||||
&& db.get_config().enable_parallelized_aggregation();
|
||||
&& db.get_config().enable_parallelized_aggregation()
|
||||
&& !( // Do not parallelize the request if it's single partition read
|
||||
restrictions->partition_key_restrictions_is_all_eq()
|
||||
&& restrictions->partition_key_restrictions_size() == schema->partition_key_size());
|
||||
};
|
||||
|
||||
if (_parameters->is_prune_materialized_view()) {
|
||||
|
||||
@@ -135,7 +135,7 @@ future<> db::batchlog_manager::stop() {
|
||||
}
|
||||
|
||||
future<size_t> db::batchlog_manager::count_all_batches() const {
|
||||
sstring query = format("SELECT count(*) FROM {}.{}", system_keyspace::NAME, system_keyspace::BATCHLOG);
|
||||
sstring query = format("SELECT count(*) FROM {}.{} BYPASS CACHE", system_keyspace::NAME, system_keyspace::BATCHLOG);
|
||||
return _qp.execute_internal(query, cql3::query_processor::cache_internal::yes).then([](::shared_ptr<cql3::untyped_result_set> rs) {
|
||||
return size_t(rs->one().get_as<int64_t>("count"));
|
||||
});
|
||||
@@ -154,26 +154,26 @@ future<> db::batchlog_manager::replay_all_failed_batches() {
|
||||
auto throttle = _replay_rate / _qp.proxy().get_token_metadata_ptr()->count_normal_token_owners();
|
||||
auto limiter = make_lw_shared<utils::rate_limiter>(throttle);
|
||||
|
||||
auto batch = [this, limiter](const cql3::untyped_result_set::row& row) {
|
||||
auto batch = [this, limiter](const cql3::untyped_result_set::row& row) -> future<stop_iteration> {
|
||||
auto written_at = row.get_as<db_clock::time_point>("written_at");
|
||||
auto id = row.get_as<utils::UUID>("id");
|
||||
// enough time for the actual write + batchlog entry mutation delivery (two separate requests).
|
||||
auto timeout = get_batch_log_timeout();
|
||||
if (db_clock::now() < written_at + timeout) {
|
||||
blogger.debug("Skipping replay of {}, too fresh", id);
|
||||
return make_ready_future<>();
|
||||
return make_ready_future<stop_iteration>(stop_iteration::no);
|
||||
}
|
||||
|
||||
// check version of serialization format
|
||||
if (!row.has("version")) {
|
||||
blogger.warn("Skipping logged batch because of unknown version");
|
||||
return make_ready_future<>();
|
||||
return make_ready_future<stop_iteration>(stop_iteration::no);
|
||||
}
|
||||
|
||||
auto version = row.get_as<int32_t>("version");
|
||||
if (version != netw::messaging_service::current_version) {
|
||||
blogger.warn("Skipping logged batch because of incorrect version");
|
||||
return make_ready_future<>();
|
||||
return make_ready_future<stop_iteration>(stop_iteration::no);
|
||||
}
|
||||
|
||||
auto data = row.get_blob("data");
|
||||
@@ -255,49 +255,20 @@ future<> db::batchlog_manager::replay_all_failed_batches() {
|
||||
auto now = service::client_state(service::client_state::internal_tag()).get_timestamp();
|
||||
m.partition().apply_delete(*schema, clustering_key_prefix::make_empty(), tombstone(now, gc_clock::now()));
|
||||
return _qp.proxy().mutate_locally(m, tracing::trace_state_ptr(), db::commitlog::force_sync::no);
|
||||
});
|
||||
}).then([] { return make_ready_future<stop_iteration>(stop_iteration::no); });
|
||||
};
|
||||
|
||||
return seastar::with_gate(_gate, [this, batch = std::move(batch)] {
|
||||
return seastar::with_gate(_gate, [this, batch = std::move(batch)] () mutable {
|
||||
blogger.debug("Started replayAllFailedBatches (cpu {})", this_shard_id());
|
||||
|
||||
typedef ::shared_ptr<cql3::untyped_result_set> page_ptr;
|
||||
sstring query = format("SELECT id, data, written_at, version FROM {}.{} LIMIT {:d}", system_keyspace::NAME, system_keyspace::BATCHLOG, page_size);
|
||||
return _qp.execute_internal(query, cql3::query_processor::cache_internal::yes).then([this, batch = std::move(batch)](page_ptr page) {
|
||||
return do_with(std::move(page), [this, batch = std::move(batch)](page_ptr & page) mutable {
|
||||
return repeat([this, &page, batch = std::move(batch)]() mutable {
|
||||
if (page->empty()) {
|
||||
return make_ready_future<stop_iteration>(stop_iteration::yes);
|
||||
}
|
||||
auto id = page->back().get_as<utils::UUID>("id");
|
||||
return parallel_for_each(*page, batch).then([this, &page, id]() {
|
||||
if (page->size() < page_size) {
|
||||
return make_ready_future<stop_iteration>(stop_iteration::yes); // we've exhausted the batchlog, next query would be empty.
|
||||
}
|
||||
sstring query = format("SELECT id, data, written_at, version FROM {}.{} WHERE token(id) > token(?) LIMIT {:d}",
|
||||
system_keyspace::NAME,
|
||||
system_keyspace::BATCHLOG,
|
||||
page_size);
|
||||
return _qp.execute_internal(query, {id}, cql3::query_processor::cache_internal::yes).then([&page](auto res) {
|
||||
page = std::move(res);
|
||||
return make_ready_future<stop_iteration>(stop_iteration::no);
|
||||
});
|
||||
});
|
||||
});
|
||||
});
|
||||
}).then([] {
|
||||
// TODO FIXME : cleanup()
|
||||
#if 0
|
||||
ColumnFamilyStore cfs = Keyspace.open(SystemKeyspace.NAME).getColumnFamilyStore(SystemKeyspace.BATCHLOG);
|
||||
cfs.forceBlockingFlush();
|
||||
Collection<Descriptor> descriptors = new ArrayList<>();
|
||||
for (SSTableReader sstr : cfs.getSSTables())
|
||||
descriptors.add(sstr.descriptor);
|
||||
if (!descriptors.isEmpty()) // don't pollute the logs if there is nothing to compact.
|
||||
CompactionManager.instance.submitUserDefined(cfs, descriptors, Integer.MAX_VALUE).get();
|
||||
|
||||
#endif
|
||||
|
||||
return _qp.query_internal(
|
||||
format("SELECT id, data, written_at, version FROM {}.{} BYPASS CACHE", system_keyspace::NAME, system_keyspace::BATCHLOG),
|
||||
db::consistency_level::ONE,
|
||||
{},
|
||||
page_size,
|
||||
std::move(batch)).then([this] {
|
||||
// Replaying batches could have generated tombstones, flush to disk,
|
||||
// where they can be compacted away.
|
||||
return replica::database::flush_table_on_all_shards(_qp.proxy().get_db(), system_keyspace::NAME, system_keyspace::BATCHLOG);
|
||||
}).then([] {
|
||||
blogger.debug("Finished replayAllFailedBatches");
|
||||
});
|
||||
|
||||
@@ -951,7 +951,7 @@ db::config::config(std::shared_ptr<db::extensions> exts)
|
||||
, unspooled_dirty_soft_limit(this, "unspooled_dirty_soft_limit", value_status::Used, 0.6, "Soft limit of unspooled dirty memory expressed as a portion of the hard limit")
|
||||
, sstable_summary_ratio(this, "sstable_summary_ratio", value_status::Used, 0.0005, "Enforces that 1 byte of summary is written for every N (2000 by default) "
|
||||
"bytes written to data file. Value must be between 0 and 1.")
|
||||
, components_memory_reclaim_threshold(this, "components_memory_reclaim_threshold", liveness::LiveUpdate, value_status::Used, .1, "Ratio of available memory for all in-memory components of SSTables in a shard beyond which the memory will be reclaimed from components until it falls back under the threshold. Currently, this limit is only enforced for bloom filters.")
|
||||
, components_memory_reclaim_threshold(this, "components_memory_reclaim_threshold", liveness::LiveUpdate, value_status::Used, .2, "Ratio of available memory for all in-memory components of SSTables in a shard beyond which the memory will be reclaimed from components until it falls back under the threshold. Currently, this limit is only enforced for bloom filters.")
|
||||
, large_memory_allocation_warning_threshold(this, "large_memory_allocation_warning_threshold", value_status::Used, size_t(1) << 20, "Warn about memory allocations above this size; set to zero to disable")
|
||||
, enable_deprecated_partitioners(this, "enable_deprecated_partitioners", value_status::Used, false, "Enable the byteordered and random partitioners. These partitioners are deprecated and will be removed in a future version.")
|
||||
, enable_keyspace_column_family_metrics(this, "enable_keyspace_column_family_metrics", value_status::Used, false, "Enable per keyspace and per column family metrics reporting")
|
||||
@@ -991,6 +991,8 @@ db::config::config(std::shared_ptr<db::extensions> exts)
|
||||
"Start serializing reads after their collective memory consumption goes above $normal_limit * $multiplier.")
|
||||
, reader_concurrency_semaphore_kill_limit_multiplier(this, "reader_concurrency_semaphore_kill_limit_multiplier", liveness::LiveUpdate, value_status::Used, 4,
|
||||
"Start killing reads after their collective memory consumption goes above $normal_limit * $multiplier.")
|
||||
, reader_concurrency_semaphore_cpu_concurrency(this, "reader_concurrency_semaphore_cpu_concurrency", liveness::LiveUpdate, value_status::Used, 1,
|
||||
"Admit new reads while there are less than this number of requests that need CPU.")
|
||||
, twcs_max_window_count(this, "twcs_max_window_count", liveness::LiveUpdate, value_status::Used, 50,
|
||||
"The maximum number of compaction windows allowed when making use of TimeWindowCompactionStrategy. A setting of 0 effectively disables the restriction.")
|
||||
, initial_sstable_loading_concurrency(this, "initial_sstable_loading_concurrency", value_status::Used, 4u,
|
||||
|
||||
@@ -373,6 +373,7 @@ public:
|
||||
named_value<uint64_t> max_memory_for_unlimited_query_hard_limit;
|
||||
named_value<uint32_t> reader_concurrency_semaphore_serialize_limit_multiplier;
|
||||
named_value<uint32_t> reader_concurrency_semaphore_kill_limit_multiplier;
|
||||
named_value<uint32_t> reader_concurrency_semaphore_cpu_concurrency;
|
||||
named_value<uint32_t> twcs_max_window_count;
|
||||
named_value<unsigned> initial_sstable_loading_concurrency;
|
||||
named_value<bool> enable_3_1_0_compatibility_mode;
|
||||
|
||||
@@ -55,6 +55,10 @@ public:
|
||||
return ser::serialize_to_buffer<bytes>(_paxos_gc_sec);
|
||||
}
|
||||
|
||||
std::string options_to_string() const override {
|
||||
return std::to_string(_paxos_gc_sec);
|
||||
}
|
||||
|
||||
static int32_t deserialize(const bytes_view& buffer) {
|
||||
return ser::deserialize_from_buffer(buffer, boost::type<int32_t>());
|
||||
}
|
||||
|
||||
@@ -21,7 +21,7 @@ For example:
|
||||
|
||||
In this scenario, a missing ``TOC`` file will prevent the Scylla node from starting.
|
||||
|
||||
The SSTable corporation problem can be different, for example, other missing or unreadable files. The following solution apply for all of the scenarios.
|
||||
The SSTable corruption problem can be different, for example, other missing or unreadable files. The following solution applies to all scenarios.
|
||||
|
||||
Solution
|
||||
^^^^^^^^
|
||||
|
||||
@@ -167,54 +167,27 @@ Download and install the new release
|
||||
|
||||
.. group-tab:: EC2/GCP/Azure Ubuntu Image
|
||||
|
||||
Before upgrading, check what version you are running now using ``scylla --version``. You should use the same version as this version in case you want to |ROLLBACK|_ the upgrade. If you are not running a |SRC_VERSION|.x version, stop right here! This guide only covers |SRC_VERSION|.x to |NEW_VERSION|.y upgrades.
|
||||
Before upgrading, check what version you are running now using ``scylla --version``. You should use the same version as this version in case you want to |ROLLBACK|_ the upgrade. If you are not running a |SRC_VERSION|.x version, stop right here! This guide only covers |SRC_VERSION|.x to |NEW_VERSION|.y upgrades.
|
||||
|
||||
There are two alternative upgrade procedures: upgrading ScyllaDB and simultaneously updating 3rd party and OS packages - recommended if you
|
||||
are running a ScyllaDB official image (EC2 AMI, GCP, and Azure images), which is based on Ubuntu 20.04, and upgrading ScyllaDB without updating
|
||||
any external packages.
|
||||
If you’re using the ScyllaDB official image (recommended), see
|
||||
the **Debian/Ubuntu** tab for upgrade instructions. If you’re using your
|
||||
own image and have installed ScyllaDB packages for Ubuntu or Debian,
|
||||
you need to apply an extended upgrade procedure:
|
||||
|
||||
#. Update the ScyllaDB deb repo (see above).
|
||||
#. Configure Java 1.8 (see above).
|
||||
#. Install the new ScyllaDB version with the additional
|
||||
``scylla-enterprise-machine-image`` package:
|
||||
|
||||
**To upgrade ScyllaDB and update 3rd party and OS packages (RECOMMENDED):**
|
||||
|
||||
Choosing this upgrade procedure allows you to upgrade your ScyllaDB version and update the 3rd party and OS packages using one command.
|
||||
|
||||
#. Update the |SCYLLA_DEB_NEW_REPO| to |NEW_VERSION|.
|
||||
|
||||
#. Load the new repo:
|
||||
|
||||
.. code:: sh
|
||||
|
||||
sudo apt-get update
|
||||
|
||||
#. Run the following command to update the manifest file:
|
||||
|
||||
.. code:: sh
|
||||
|
||||
cat scylla-enterprise-packages-<version>-<arch>.txt | sudo xargs -n1 apt-get install -y
|
||||
|
||||
Where:
|
||||
|
||||
* ``<version>`` - The ScyllaDB Enterprise version to which you are upgrading ( |NEW_VERSION| ).
|
||||
* ``<arch>`` - Architecture type: ``x86_64`` or ``aarch64``.
|
||||
|
||||
The file is included in the ScyllaDB Enterprise packages downloaded in the previous step. The file location is ``http://downloads.scylladb.com/downloads/scylla/aws/manifest/scylla-packages-<version>-<arch>.txt``
|
||||
|
||||
Example:
|
||||
|
||||
.. code:: sh
|
||||
|
||||
cat scylla-enterprise-packages-2022.2.0-x86_64.txt | sudo xargs -n1 apt-get install -y
|
||||
|
||||
|
||||
.. note::
|
||||
|
||||
Alternatively, you can update the manifest file with the following command:
|
||||
|
||||
``sudo apt-get install $(awk '{print $1'} scylla-enterprise-packages-<version>-<arch>.txt) -y``
|
||||
|
||||
|
||||
|
||||
To upgrade ScyllaDB without updating any external packages, follow the :ref:`download and installation instructions for Debian/Ubuntu <upgrade-debian-ubuntu-5.2-to-enterprise-2023.1>`.
|
||||
.. code::
|
||||
|
||||
sudo apt-get clean all
|
||||
sudo apt-get update
|
||||
sudo apt-get dist-upgrade scylla-enterprise
|
||||
sudo apt-get dist-upgrade scylla-enterprise-machine-image
|
||||
|
||||
#. Run ``scylla_setup`` without running ``io_setup``.
|
||||
#. Run ``sudo /opt/scylladb/scylla-machine-image/scylla_cloud_io_setup``.
|
||||
|
||||
Start the node
|
||||
--------------
|
||||
|
||||
@@ -87,7 +87,7 @@ The following libraries are available:
|
||||
More information
|
||||
----------------
|
||||
|
||||
`Scylla University: Change Data Capture (CDC) lesson <https://university.scylladb.com/courses/scylla-operations/lessons/change-data-capture-cdc/>`_ - Learn how to use CDC. Some of the topics covered are:
|
||||
`Scylla University: Change Data Capture (CDC) lesson <https://university.scylladb.com/courses/data-modeling/lessons/change-data-capture-cdc/>`_ - Learn how to use CDC. Some of the topics covered are:
|
||||
|
||||
* An overview of Change Data Capture, what exactly is it, what are some common use cases, what does it do, and an overview of how it works
|
||||
* How can that data be consumed? Different options for consuming the data changes including normal CQL, a layered approach, and integrators
|
||||
|
||||
@@ -2343,8 +2343,13 @@ bool gossiper::is_alive(inet_address ep) const {
|
||||
}
|
||||
|
||||
future<> gossiper::wait_alive(std::vector<gms::inet_address> nodes, std::chrono::milliseconds timeout) {
|
||||
return wait_alive([nodes = std::move(nodes)] { return nodes; }, timeout);
|
||||
}
|
||||
|
||||
future<> gossiper::wait_alive(noncopyable_function<std::vector<gms::inet_address>()> get_nodes, std::chrono::milliseconds timeout) {
|
||||
auto start_time = std::chrono::steady_clock::now();
|
||||
for (;;) {
|
||||
auto nodes = get_nodes();
|
||||
std::vector<gms::inet_address> live_nodes;
|
||||
for (const auto& node: nodes) {
|
||||
size_t nr_alive = co_await container().map_reduce0([node] (gossiper& g) -> size_t {
|
||||
|
||||
@@ -500,6 +500,7 @@ public:
|
||||
bool is_dead_state(const endpoint_state& eps) const;
|
||||
// Wait for nodes to be alive on all shards
|
||||
future<> wait_alive(std::vector<gms::inet_address> nodes, std::chrono::milliseconds timeout);
|
||||
future<> wait_alive(noncopyable_function<std::vector<gms::inet_address>()> get_nodes, std::chrono::milliseconds timeout);
|
||||
|
||||
// Wait for `n` live nodes to show up in gossip (including ourself).
|
||||
future<> wait_for_live_nodes_to_show_up(size_t n);
|
||||
|
||||
33
main.cc
33
main.cc
@@ -165,12 +165,29 @@ struct convert<::object_storage_endpoint_param> {
|
||||
ep.endpoint = node["name"].as<std::string>();
|
||||
ep.config.port = node["port"].as<unsigned>();
|
||||
ep.config.use_https = node["https"].as<bool>(false);
|
||||
if (node["aws_region"]) {
|
||||
if (node["aws_region"] || std::getenv("AWS_DEFAULT_REGION")) {
|
||||
ep.config.aws.emplace();
|
||||
ep.config.aws->region = node["aws_region"].as<std::string>();
|
||||
ep.config.aws->access_key_id = node["aws_access_key_id"].as<std::string>();
|
||||
ep.config.aws->secret_access_key = node["aws_secret_access_key"].as<std::string>();
|
||||
ep.config.aws->session_token = node["aws_session_token"].as<std::string>("");
|
||||
|
||||
// https://github.com/scylladb/scylla-pkg/issues/3845
|
||||
// Allow picking up aws values via standard env vars as well.
|
||||
// Value in config has prio, but fall back to env.
|
||||
// This has the added benefit of potentially reducing the amount of
|
||||
// sensitive data in config files (i.e. credentials)
|
||||
auto get_node_value_or_env = [&](const char* key, const char* var) {
|
||||
auto child = node[key];
|
||||
if (child) {
|
||||
return child.as<std::string>();
|
||||
}
|
||||
auto val = std::getenv(var);
|
||||
if (val) {
|
||||
return std::string(val);
|
||||
}
|
||||
return std::string{};
|
||||
};
|
||||
ep.config.aws->region = get_node_value_or_env("aws_region", "AWS_DEFAULT_REGION");
|
||||
ep.config.aws->access_key_id = get_node_value_or_env("aws_access_key_id", "AWS_ACCESS_KEY_ID");
|
||||
ep.config.aws->secret_access_key = get_node_value_or_env("aws_secret_access_key", "AWS_SECRET_ACCESS_KEY");
|
||||
ep.config.aws->session_token = get_node_value_or_env("aws_session_token", "AWS_SESSION_TOKEN");
|
||||
}
|
||||
return true;
|
||||
}
|
||||
@@ -1242,7 +1259,11 @@ To start the scylla server proper, simply invoke as: scylla server (or just scyl
|
||||
}
|
||||
|
||||
netw::messaging_service::scheduling_config scfg;
|
||||
scfg.statement_tenants = { {dbcfg.statement_scheduling_group, "$user"}, {default_scheduling_group(), "$system"} };
|
||||
scfg.statement_tenants = {
|
||||
{dbcfg.statement_scheduling_group, "$user"},
|
||||
{default_scheduling_group(), "$system"},
|
||||
{dbcfg.streaming_scheduling_group, "$maintenance"}
|
||||
};
|
||||
scfg.streaming = dbcfg.streaming_scheduling_group;
|
||||
scfg.gossip = dbcfg.gossip_scheduling_group;
|
||||
|
||||
|
||||
@@ -221,6 +221,12 @@ stop_iteration mutation_partition_v2::apply_monotonically(const schema& s, const
|
||||
alloc_strategy_unique_ptr<rows_entry> p_sentinel;
|
||||
alloc_strategy_unique_ptr<rows_entry> this_sentinel;
|
||||
auto insert_sentinel_back = defer([&] {
|
||||
// Note: this lambda will be run by a destructor (of the `defer` guard),
|
||||
// so it mustn't throw, or else it will crash the node.
|
||||
//
|
||||
// To prevent a `bad_alloc` during the tree insertion, we have to preallocate
|
||||
// some memory for the new tree nodes. This is done by the `hold_reserve`
|
||||
// constructed after the lambda.
|
||||
if (this_sentinel) {
|
||||
assert(p_i != p._rows.end());
|
||||
auto rt = this_sentinel->range_tombstone();
|
||||
@@ -254,6 +260,15 @@ stop_iteration mutation_partition_v2::apply_monotonically(const schema& s, const
|
||||
}
|
||||
});
|
||||
|
||||
// This guard will ensure that LSA reserves one free segment more than it
|
||||
// needs for internal reasons.
|
||||
//
|
||||
// It will be destroyed immediately before the sentinel-inserting `defer`
|
||||
// happens, ensuring that the sentinel insertion has at least one free LSA segment
|
||||
// to work with. This should be enough, since we only need to allocate a few
|
||||
// B-tree nodes.
|
||||
auto memory_reserve_for_sentinel_inserts = hold_reserve(logalloc::segment_size);
|
||||
|
||||
while (p_i != p._rows.end()) {
|
||||
rows_entry& src_e = *p_i;
|
||||
|
||||
|
||||
@@ -637,7 +637,9 @@ void fsm::step(server_id from, Message&& msg) {
|
||||
_last_election_time = _clock.now();
|
||||
|
||||
if (current_leader() != from) {
|
||||
on_internal_error_noexcept(logger, "Got append request/install snapshot/read_quorum from an unexpected leader");
|
||||
on_internal_error_noexcept(logger, format(
|
||||
"Got append request/install snapshot/read_quorum from an unexpected leader,"
|
||||
" expected leader: {}, message from: {}", current_leader(), from));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -926,12 +926,15 @@ future<> reader_concurrency_semaphore::execution_loop() noexcept {
|
||||
e.pr.set_exception(std::current_exception());
|
||||
}
|
||||
|
||||
// We now possibly have >= CPU concurrency, so even if the above read
|
||||
// didn't release any resources, just dequeueing it from the
|
||||
// _ready_list could allow us to admit new reads.
|
||||
maybe_admit_waiters();
|
||||
|
||||
if (need_preempt()) {
|
||||
co_await coroutine::maybe_yield();
|
||||
}
|
||||
}
|
||||
|
||||
maybe_admit_waiters();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -968,14 +971,21 @@ void reader_concurrency_semaphore::signal(const resources& r) noexcept {
|
||||
maybe_admit_waiters();
|
||||
}
|
||||
|
||||
reader_concurrency_semaphore::reader_concurrency_semaphore(int count, ssize_t memory, sstring name, size_t max_queue_length,
|
||||
utils::updateable_value<uint32_t> serialize_limit_multiplier, utils::updateable_value<uint32_t> kill_limit_multiplier)
|
||||
reader_concurrency_semaphore::reader_concurrency_semaphore(
|
||||
int count,
|
||||
ssize_t memory,
|
||||
sstring name,
|
||||
size_t max_queue_length,
|
||||
utils::updateable_value<uint32_t> serialize_limit_multiplier,
|
||||
utils::updateable_value<uint32_t> kill_limit_multiplier,
|
||||
utils::updateable_value<uint32_t> cpu_concurrency)
|
||||
: _initial_resources(count, memory)
|
||||
, _resources(count, memory)
|
||||
, _name(std::move(name))
|
||||
, _max_queue_length(max_queue_length)
|
||||
, _serialize_limit_multiplier(std::move(serialize_limit_multiplier))
|
||||
, _kill_limit_multiplier(std::move(kill_limit_multiplier))
|
||||
, _cpu_concurrency(cpu_concurrency)
|
||||
{ }
|
||||
|
||||
reader_concurrency_semaphore::reader_concurrency_semaphore(no_limits, sstring name)
|
||||
@@ -985,7 +995,8 @@ reader_concurrency_semaphore::reader_concurrency_semaphore(no_limits, sstring na
|
||||
std::move(name),
|
||||
std::numeric_limits<size_t>::max(),
|
||||
utils::updateable_value(std::numeric_limits<uint32_t>::max()),
|
||||
utils::updateable_value(std::numeric_limits<uint32_t>::max())) {}
|
||||
utils::updateable_value(std::numeric_limits<uint32_t>::max()),
|
||||
utils::updateable_value(uint32_t(1))) {}
|
||||
|
||||
reader_concurrency_semaphore::~reader_concurrency_semaphore() {
|
||||
assert(!_stats.waiters);
|
||||
@@ -1186,8 +1197,8 @@ bool reader_concurrency_semaphore::has_available_units(const resources& r) const
|
||||
return (_resources.non_zero() && _resources.count >= r.count && _resources.memory >= r.memory) || _resources.count == _initial_resources.count;
|
||||
}
|
||||
|
||||
bool reader_concurrency_semaphore::all_need_cpu_permits_are_awaiting() const {
|
||||
return _stats.need_cpu_permits == _stats.awaits_permits;
|
||||
bool reader_concurrency_semaphore::cpu_concurrency_limit_reached() const {
|
||||
return (_stats.need_cpu_permits - _stats.awaits_permits) >= _cpu_concurrency();
|
||||
}
|
||||
|
||||
std::exception_ptr reader_concurrency_semaphore::check_queue_size(std::string_view queue_name) {
|
||||
@@ -1270,7 +1281,7 @@ reader_concurrency_semaphore::can_admit_read(const reader_permit::impl& permit)
|
||||
return {can_admit::no, reason::ready_list};
|
||||
}
|
||||
|
||||
if (!all_need_cpu_permits_are_awaiting()) {
|
||||
if (cpu_concurrency_limit_reached()) {
|
||||
return {can_admit::no, reason::need_cpu_permits};
|
||||
}
|
||||
|
||||
|
||||
@@ -186,6 +186,7 @@ private:
|
||||
size_t _max_queue_length = std::numeric_limits<size_t>::max();
|
||||
utils::updateable_value<uint32_t> _serialize_limit_multiplier;
|
||||
utils::updateable_value<uint32_t> _kill_limit_multiplier;
|
||||
utils::updateable_value<uint32_t> _cpu_concurrency;
|
||||
stats _stats;
|
||||
bool _stopped = false;
|
||||
bool _evicting = false;
|
||||
@@ -201,7 +202,7 @@ private:
|
||||
|
||||
bool has_available_units(const resources& r) const;
|
||||
|
||||
bool all_need_cpu_permits_are_awaiting() const;
|
||||
bool cpu_concurrency_limit_reached() const;
|
||||
|
||||
[[nodiscard]] std::exception_ptr check_queue_size(std::string_view queue_name);
|
||||
|
||||
@@ -274,7 +275,19 @@ public:
|
||||
sstring name,
|
||||
size_t max_queue_length,
|
||||
utils::updateable_value<uint32_t> serialize_limit_multiplier,
|
||||
utils::updateable_value<uint32_t> kill_limit_multiplier);
|
||||
utils::updateable_value<uint32_t> kill_limit_multiplier,
|
||||
utils::updateable_value<uint32_t> cpu_concurrency);
|
||||
|
||||
reader_concurrency_semaphore(
|
||||
int count,
|
||||
ssize_t memory,
|
||||
sstring name,
|
||||
size_t max_queue_length,
|
||||
utils::updateable_value<uint32_t> serialize_limit_multiplier,
|
||||
utils::updateable_value<uint32_t> kill_limit_multiplier)
|
||||
: reader_concurrency_semaphore(count, memory, std::move(name), max_queue_length,
|
||||
std::move(serialize_limit_multiplier), std::move(kill_limit_multiplier), utils::updateable_value<uint32_t>(1))
|
||||
{ }
|
||||
|
||||
/// Create a semaphore with practically unlimited count and memory.
|
||||
///
|
||||
@@ -291,8 +304,10 @@ public:
|
||||
ssize_t memory = std::numeric_limits<ssize_t>::max(),
|
||||
size_t max_queue_length = std::numeric_limits<size_t>::max(),
|
||||
utils::updateable_value<uint32_t> serialize_limit_multipler = utils::updateable_value(std::numeric_limits<uint32_t>::max()),
|
||||
utils::updateable_value<uint32_t> kill_limit_multipler = utils::updateable_value(std::numeric_limits<uint32_t>::max()))
|
||||
: reader_concurrency_semaphore(count, memory, std::move(name), max_queue_length, std::move(serialize_limit_multipler), std::move(kill_limit_multipler))
|
||||
utils::updateable_value<uint32_t> kill_limit_multipler = utils::updateable_value(std::numeric_limits<uint32_t>::max()),
|
||||
utils::updateable_value<uint32_t> cpu_concurrency = utils::updateable_value<uint32_t>(1))
|
||||
: reader_concurrency_semaphore(count, memory, std::move(name), max_queue_length, std::move(serialize_limit_multipler),
|
||||
std::move(kill_limit_multipler), std::move(cpu_concurrency))
|
||||
{}
|
||||
|
||||
virtual ~reader_concurrency_semaphore();
|
||||
|
||||
@@ -3211,9 +3211,7 @@ repair_service::insert_repair_meta(
|
||||
reason,
|
||||
compaction_time] (schema_ptr s) {
|
||||
auto& db = get_db();
|
||||
auto& cf = db.local().find_column_family(s->id());
|
||||
return db.local().obtain_reader_permit(cf, "repair-meta", db::no_timeout, {}).then([s = std::move(s),
|
||||
&cf,
|
||||
return db.local().obtain_reader_permit(db.local().find_column_family(s->id()), "repair-meta", db::no_timeout, {}).then([s = std::move(s),
|
||||
this,
|
||||
from,
|
||||
repair_meta_id,
|
||||
@@ -3226,7 +3224,7 @@ repair_service::insert_repair_meta(
|
||||
compaction_time] (reader_permit permit) mutable {
|
||||
node_repair_meta_id id{from, repair_meta_id};
|
||||
auto rm = seastar::make_shared<repair_meta>(*this,
|
||||
cf,
|
||||
get_db().local().find_column_family(s->id()),
|
||||
s,
|
||||
std::move(permit),
|
||||
range,
|
||||
|
||||
@@ -14,16 +14,8 @@ namespace repair {
|
||||
|
||||
future<table_dropped> table_sync_and_check(replica::database& db, service::migration_manager& mm, const table_id& uuid) {
|
||||
if (mm.use_raft()) {
|
||||
abort_on_expiry aoe(lowres_clock::now() + std::chrono::seconds{10});
|
||||
auto& as = aoe.abort_source();
|
||||
auto sub = mm.get_abort_source().subscribe([&as] () noexcept {
|
||||
if (!as.abort_requested()) {
|
||||
as.request_abort();
|
||||
}
|
||||
});
|
||||
|
||||
// Trigger read barrier to synchronize schema.
|
||||
co_await mm.get_group0_barrier().trigger(as);
|
||||
co_await mm.get_group0_barrier().trigger(mm.get_abort_source());
|
||||
}
|
||||
|
||||
co_return !db.column_family_exists(uuid);
|
||||
|
||||
@@ -101,6 +101,8 @@ public:
|
||||
size_t memtable_count() const noexcept;
|
||||
// Returns minimum timestamp from memtable list
|
||||
api::timestamp_type min_memtable_timestamp() const;
|
||||
// Returns true if memtable(s) contains key.
|
||||
bool memtable_has_key(const dht::decorated_key& key) const;
|
||||
// Add sstable to main set
|
||||
void add_sstable(sstables::shared_sstable sstable);
|
||||
// Add sstable to maintenance set
|
||||
|
||||
@@ -332,7 +332,8 @@ database::database(const db::config& cfg, database_config dbcfg, service::migrat
|
||||
"_read_concurrency_sem",
|
||||
max_inactive_queue_length(),
|
||||
_cfg.reader_concurrency_semaphore_serialize_limit_multiplier,
|
||||
_cfg.reader_concurrency_semaphore_kill_limit_multiplier)
|
||||
_cfg.reader_concurrency_semaphore_kill_limit_multiplier,
|
||||
_cfg.reader_concurrency_semaphore_cpu_concurrency)
|
||||
// No timeouts or queue length limits - a failure here can kill an entire repair.
|
||||
// Trust the caller to limit concurrency.
|
||||
, _streaming_concurrency_sem(
|
||||
@@ -341,7 +342,8 @@ database::database(const db::config& cfg, database_config dbcfg, service::migrat
|
||||
"_streaming_concurrency_sem",
|
||||
std::numeric_limits<size_t>::max(),
|
||||
utils::updateable_value(std::numeric_limits<uint32_t>::max()),
|
||||
utils::updateable_value(std::numeric_limits<uint32_t>::max()))
|
||||
utils::updateable_value(std::numeric_limits<uint32_t>::max()),
|
||||
utils::updateable_value(uint32_t(1)))
|
||||
// No limits, just for accounting.
|
||||
, _compaction_concurrency_sem(reader_concurrency_semaphore::no_limits{}, "compaction")
|
||||
, _system_read_concurrency_sem(
|
||||
@@ -367,8 +369,8 @@ database::database(const db::config& cfg, database_config dbcfg, service::migrat
|
||||
_cfg.compaction_rows_count_warning_threshold,
|
||||
_cfg.compaction_collection_elements_count_warning_threshold))
|
||||
, _nop_large_data_handler(std::make_unique<db::nop_large_data_handler>())
|
||||
, _user_sstables_manager(std::make_unique<sstables::sstables_manager>(*_large_data_handler, _cfg, feat, _row_cache_tracker, dbcfg.available_memory, sst_dir_sem.local(), [&stm]{ return stm.get()->get_my_id(); }, &sstm))
|
||||
, _system_sstables_manager(std::make_unique<sstables::sstables_manager>(*_nop_large_data_handler, _cfg, feat, _row_cache_tracker, dbcfg.available_memory, sst_dir_sem.local(), [&stm]{ return stm.get()->get_my_id(); }))
|
||||
, _user_sstables_manager(std::make_unique<sstables::sstables_manager>(*_large_data_handler, _cfg, feat, _row_cache_tracker, dbcfg.available_memory, sst_dir_sem.local(), [&stm]{ return stm.get()->get_my_id(); }, dbcfg.streaming_scheduling_group, &sstm))
|
||||
, _system_sstables_manager(std::make_unique<sstables::sstables_manager>(*_nop_large_data_handler, _cfg, feat, _row_cache_tracker, dbcfg.available_memory, sst_dir_sem.local(), [&stm]{ return stm.get()->get_my_id(); }, dbcfg.streaming_scheduling_group))
|
||||
, _result_memory_limiter(dbcfg.available_memory / 10)
|
||||
, _data_listeners(std::make_unique<db::data_listeners>())
|
||||
, _mnotifier(mn)
|
||||
@@ -1388,7 +1390,7 @@ keyspace::make_column_family_config(const schema& s, const database& db) const {
|
||||
cfg.view_update_concurrency_semaphore = _config.view_update_concurrency_semaphore;
|
||||
cfg.view_update_concurrency_semaphore_limit = _config.view_update_concurrency_semaphore_limit;
|
||||
cfg.data_listeners = &db.data_listeners();
|
||||
cfg.enable_compacting_data_for_streaming_and_repair = db_config.enable_compacting_data_for_streaming_and_repair();
|
||||
cfg.enable_compacting_data_for_streaming_and_repair = db_config.enable_compacting_data_for_streaming_and_repair;
|
||||
|
||||
return cfg;
|
||||
}
|
||||
@@ -1904,8 +1906,8 @@ future<mutation> database::do_apply_counter_update(column_family& cf, const froz
|
||||
auto slice = query::partition_slice(std::move(cr_ranges), std::move(static_columns),
|
||||
std::move(regular_columns), { }, { }, query::max_rows);
|
||||
|
||||
return do_with(std::move(slice), std::move(m), std::vector<locked_cell>(),
|
||||
[this, &cf, timeout, trace_state = std::move(trace_state), op = cf.write_in_progress()] (const query::partition_slice& slice, mutation& m, std::vector<locked_cell>& locks) mutable {
|
||||
return do_with(std::move(slice), std::move(m), cf.write_in_progress(), std::vector<locked_cell>(),
|
||||
[this, &cf, timeout, trace_state = std::move(trace_state)] (const query::partition_slice& slice, mutation& m, const utils::phased_barrier::operation& op, std::vector<locked_cell>& locks) mutable {
|
||||
tracing::trace(trace_state, "Acquiring counter locks");
|
||||
return cf.lock_counter_cells(m, timeout).then([&, m_schema = cf.schema(), trace_state = std::move(trace_state), timeout, this] (std::vector<locked_cell> lcs) mutable {
|
||||
locks = std::move(lcs);
|
||||
|
||||
@@ -239,6 +239,11 @@ memtable::find_or_create_partition(const dht::decorated_key& key) {
|
||||
return i->partition();
|
||||
}
|
||||
|
||||
bool
|
||||
memtable::contains_partition(const dht::decorated_key& key) const {
|
||||
return partitions.find(key, dht::ring_position_comparator(*_schema)) != partitions.end();
|
||||
}
|
||||
|
||||
boost::iterator_range<memtable::partitions_type::const_iterator>
|
||||
memtable::slice(const dht::partition_range& range) const {
|
||||
if (query::is_single_partition(range)) {
|
||||
|
||||
@@ -216,6 +216,8 @@ public:
|
||||
mutation_cleaner& cleaner() noexcept {
|
||||
return _cleaner;
|
||||
}
|
||||
|
||||
bool contains_partition(const dht::decorated_key& key) const;
|
||||
public:
|
||||
memtable_list* get_memtable_list() noexcept {
|
||||
return _memtable_list;
|
||||
|
||||
@@ -384,6 +384,14 @@ api::timestamp_type compaction_group::min_memtable_timestamp() const {
|
||||
);
|
||||
}
|
||||
|
||||
bool compaction_group::memtable_has_key(const dht::decorated_key& key) const {
|
||||
if (_memtables->empty()) {
|
||||
return false;
|
||||
}
|
||||
return std::ranges::any_of(*_memtables,
|
||||
std::bind(&memtable::contains_partition, std::placeholders::_1, std::ref(key)));
|
||||
}
|
||||
|
||||
api::timestamp_type table::min_memtable_timestamp() const {
|
||||
return *boost::range::min_element(compaction_groups() | boost::adaptors::transformed(std::mem_fn(&compaction_group::min_memtable_timestamp)));
|
||||
}
|
||||
@@ -1154,9 +1162,15 @@ void table::set_metrics() {
|
||||
ms::make_counter("memtable_row_hits", _stats.memtable_app_stats.row_hits, ms::description("Number of rows overwritten by write operations in memtables"))(cf)(ks)(node_table_metrics).aggregate({seastar::metrics::shard_label}).set_skip_when_empty(),
|
||||
ms::make_gauge("total_disk_space", ms::description("Total disk space used"), _stats.total_disk_space_used)(cf)(ks)(node_table_metrics).aggregate({seastar::metrics::shard_label}).set_skip_when_empty(),
|
||||
ms::make_gauge("live_sstable", ms::description("Live sstable count"), _stats.live_sstable_count)(cf)(ks)(node_table_metrics).aggregate({seastar::metrics::shard_label}),
|
||||
ms::make_counter("read_latency_count", ms::description("Number of reads"), [this] {return _stats.reads.histogram().count();})(cf)(ks)(node_table_metrics).aggregate({seastar::metrics::shard_label}).set_skip_when_empty(),
|
||||
ms::make_counter("write_latency_count", ms::description("Number of writes"), [this] {return _stats.writes.histogram().count();})(cf)(ks)(node_table_metrics).aggregate({seastar::metrics::shard_label}).set_skip_when_empty()
|
||||
ms::make_gauge("live_disk_space", ms::description("Live disk space used"), _stats.live_disk_space_used)(cf)(ks)(node_table_metrics).aggregate({seastar::metrics::shard_label}),
|
||||
ms::make_histogram("read_latency", ms::description("Read latency histogram"), [this] {return to_metrics_histogram(_stats.reads.histogram());})(cf)(ks)(node_table_metrics).aggregate({seastar::metrics::shard_label}).set_skip_when_empty(),
|
||||
ms::make_histogram("write_latency", ms::description("Write latency histogram"), [this] {return to_metrics_histogram(_stats.writes.histogram());})(cf)(ks)(node_table_metrics).aggregate({seastar::metrics::shard_label}).set_skip_when_empty()
|
||||
});
|
||||
if (this_shard_id() == 0) {
|
||||
_metrics.add_group("column_family", {
|
||||
ms::make_gauge("cache_hit_rate", ms::description("Cache hit rate"), [this] {return float(_global_cache_hit_rate);})(cf)(ks)(ms::shard_label(""))
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -2007,9 +2021,18 @@ future<> table::clear() {
|
||||
// NOTE: does not need to be futurized, but might eventually, depending on
|
||||
// if we implement notifications, whatnot.
|
||||
future<db::replay_position> table::discard_sstables(db_clock::time_point truncated_at) {
|
||||
assert(std::ranges::all_of(compaction_groups(), [this] (const compaction_group_ptr& cg) {
|
||||
return _compaction_manager.compaction_disabled(cg->as_table_state());
|
||||
}));
|
||||
// truncate_table_on_all_shards() disables compaction for the truncated
|
||||
// tables and views, so we normally expect compaction to be disabled on
|
||||
// this table. But as shown in issue #17543, it is possible that a new
|
||||
// materialized view was created right after truncation started, and it
|
||||
// would not have compaction disabled when this function is called on it.
|
||||
if (!schema()->is_view()) {
|
||||
if (!std::ranges::all_of(compaction_groups(), [this] (const compaction_group_ptr& cg) {
|
||||
return _compaction_manager.compaction_disabled(cg->as_table_state()); })) {
|
||||
utils::on_internal_error(fmt::format("compaction not disabled on table {}.{} during TRUNCATE",
|
||||
schema()->ks_name(), schema()->cf_name()));
|
||||
}
|
||||
}
|
||||
|
||||
struct pruner {
|
||||
column_family& cf;
|
||||
@@ -2670,7 +2693,7 @@ table::disable_auto_compaction() {
|
||||
// - there are major compactions that additionally uses constant
|
||||
// size backlog of shares,
|
||||
// - sstables rewrites tasks that do the same.
|
||||
//
|
||||
//
|
||||
// Setting NullCompactionStrategy is not an option due to the
|
||||
// following reasons:
|
||||
// - it will 0 backlog if suspending current compactions is not an
|
||||
@@ -2939,6 +2962,9 @@ public:
|
||||
api::timestamp_type min_memtable_timestamp() const override {
|
||||
return _cg.min_memtable_timestamp();
|
||||
}
|
||||
bool memtable_has_key(const dht::decorated_key& key) const override {
|
||||
return _cg.memtable_has_key(key);
|
||||
}
|
||||
future<> on_compaction_completion(sstables::compaction_completion_desc desc, sstables::offstrategy offstrategy) override {
|
||||
if (offstrategy) {
|
||||
co_await _cg.update_sstable_lists_on_off_strategy_completion(std::move(desc));
|
||||
|
||||
@@ -768,6 +768,16 @@ static std::ostream& map_as_cql_param(std::ostream& os, const std::map<sstring,
|
||||
return os;
|
||||
}
|
||||
|
||||
// default impl assumes options are in a map.
|
||||
// implementations should override if not
|
||||
std::string schema_extension::options_to_string() const {
|
||||
std::ostringstream ss;
|
||||
ss << '{';
|
||||
map_as_cql_param(ss, ser::deserialize_from_buffer(serialize(), boost::type<default_map_type>(), 0));
|
||||
ss << '}';
|
||||
return ss.str();
|
||||
}
|
||||
|
||||
static std::ostream& column_definition_as_cql_key(std::ostream& os, const column_definition & cd) {
|
||||
os << cd.name_as_cql_string();
|
||||
os << " " << cd.type->cql3_type_name();
|
||||
@@ -922,23 +932,19 @@ std::ostream& schema::describe(replica::database& db, std::ostream& os, bool wit
|
||||
os << "}";
|
||||
|
||||
os << "\n AND crc_check_chance = " << crc_check_chance();
|
||||
os << "\n AND dclocal_read_repair_chance = " << dc_local_read_repair_chance();
|
||||
os << "\n AND dclocal_read_repair_chance = " << dc_local_read_repair_chance();
|
||||
os << "\n AND default_time_to_live = " << default_time_to_live().count();
|
||||
os << "\n AND gc_grace_seconds = " << gc_grace_seconds().count();
|
||||
os << "\n AND max_index_interval = " << max_index_interval();
|
||||
os << "\n AND memtable_flush_period_in_ms = " << memtable_flush_period();
|
||||
os << "\n AND min_index_interval = " << min_index_interval();
|
||||
os << "\n AND read_repair_chance = " << read_repair_chance();
|
||||
os << "\n AND read_repair_chance = " << read_repair_chance();
|
||||
os << "\n AND speculative_retry = '" << speculative_retry().to_sstring() << "'";
|
||||
os << "\n AND paxos_grace_seconds = " << paxos_grace_seconds().count();
|
||||
|
||||
auto tombstone_gc_str = tombstone_gc_options().to_sstring();
|
||||
std::replace(tombstone_gc_str.begin(), tombstone_gc_str.end(), '"', '\'');
|
||||
os << "\n AND tombstone_gc = " << tombstone_gc_str;
|
||||
|
||||
if (cdc_options().enabled()) {
|
||||
os << "\n AND cdc = " << cdc_options().to_sstring();
|
||||
for (auto& [type, ext] : extensions()) {
|
||||
os << "\n AND " << type << " = " << ext->options_to_string();
|
||||
}
|
||||
|
||||
if (is_view() && !is_index(db, view_info()->base_id(), *this)) {
|
||||
auto is_sync_update = db::find_tag(*this, db::SYNCHRONOUS_VIEW_UPDATES_TAG_KEY);
|
||||
if (is_sync_update.has_value()) {
|
||||
|
||||
@@ -551,6 +551,10 @@ public:
|
||||
virtual bool is_placeholder() const {
|
||||
return false;
|
||||
}
|
||||
using default_map_type = std::map<sstring, sstring>;
|
||||
// default impl assumes options are in a map.
|
||||
// implementations should override if not
|
||||
virtual std::string options_to_string() const;
|
||||
};
|
||||
|
||||
struct schema_static_props {
|
||||
|
||||
2
seastar
2
seastar
Submodule seastar updated: cfb015d0b4...154a0c0ff2
@@ -948,7 +948,7 @@ future<> migration_manager::announce_with_raft(std::vector<mutation> schema, gro
|
||||
},
|
||||
guard, std::move(description));
|
||||
|
||||
co_return co_await _group0_client.add_entry(std::move(group0_cmd), std::move(guard), &_as);
|
||||
co_return co_await _group0_client.add_entry(std::move(group0_cmd), std::move(guard), _as);
|
||||
}
|
||||
|
||||
future<> migration_manager::announce_without_raft(std::vector<mutation> schema, group0_guard guard) {
|
||||
@@ -983,7 +983,7 @@ future<> migration_manager::announce(std::vector<mutation> schema, group0_guard
|
||||
|
||||
future<group0_guard> migration_manager::start_group0_operation() {
|
||||
assert(this_shard_id() == 0);
|
||||
return _group0_client.start_operation(&_as);
|
||||
return _group0_client.start_operation(_as);
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@@ -276,24 +276,29 @@ future<> view_update_backlog_broker::on_change(gms::inet_address endpoint, gms::
|
||||
const char* start_bound = value.value().data();
|
||||
char* end_bound;
|
||||
for (auto* ptr : {¤t, &max}) {
|
||||
errno = 0;
|
||||
*ptr = std::strtoull(start_bound, &end_bound, 10);
|
||||
if (*ptr == ULLONG_MAX) {
|
||||
return make_ready_future();;
|
||||
if (errno == ERANGE) {
|
||||
return make_ready_future();
|
||||
}
|
||||
start_bound = end_bound + 1;
|
||||
}
|
||||
if (max == 0) {
|
||||
return make_ready_future();
|
||||
}
|
||||
errno = 0;
|
||||
ticks = std::strtoll(start_bound, &end_bound, 10);
|
||||
if (ticks == 0 || ticks == LLONG_MAX || end_bound != value.value().data() + value.value().size()) {
|
||||
if (ticks == 0 || errno == ERANGE || end_bound != value.value().data() + value.value().size()) {
|
||||
return make_ready_future();
|
||||
}
|
||||
auto backlog = view_update_backlog_timestamped{db::view::update_backlog{current, max}, ticks};
|
||||
auto[it, inserted] = _sp.local()._view_update_backlogs.try_emplace(endpoint, std::move(backlog));
|
||||
if (!inserted && it->second.ts < backlog.ts) {
|
||||
it->second = std::move(backlog);
|
||||
}
|
||||
return _sp.invoke_on_all([endpoint, backlog] (service::storage_proxy& sp) {
|
||||
auto[it, inserted] = sp._view_update_backlogs.try_emplace(endpoint, backlog);
|
||||
if (!inserted && it->second.ts < backlog.ts) {
|
||||
it->second = backlog;
|
||||
}
|
||||
return make_ready_future();
|
||||
});
|
||||
}
|
||||
return make_ready_future();
|
||||
}
|
||||
|
||||
@@ -7,6 +7,7 @@
|
||||
* SPDX-License-Identifier: (AGPL-3.0-or-later and Apache-2.0)
|
||||
*/
|
||||
#pragma once
|
||||
#include "seastar/core/semaphore.hh"
|
||||
#include "service/paxos/proposal.hh"
|
||||
#include "log.hh"
|
||||
#include "utils/digest_algorithm.hh"
|
||||
@@ -32,6 +33,7 @@ private:
|
||||
|
||||
class key_lock_map {
|
||||
using semaphore = basic_semaphore<semaphore_default_exception_factory, clock_type>;
|
||||
using semaphore_units = semaphore_units<semaphore_default_exception_factory, clock_type>;
|
||||
using map = std::unordered_map<dht::token, semaphore>;
|
||||
|
||||
semaphore& get_semaphore_for_key(const dht::token& key);
|
||||
@@ -82,22 +84,15 @@ public:
|
||||
key_lock_map& _map;
|
||||
dht::token _key;
|
||||
clock_type::time_point _timeout;
|
||||
bool _locked = false;
|
||||
key_lock_map::semaphore_units _units;
|
||||
public:
|
||||
future<> lock() {
|
||||
auto f = _map.get_semaphore_for_key(_key).wait(_timeout, 1);
|
||||
_locked = true;
|
||||
return f;
|
||||
future<> lock () {
|
||||
return get_units(_map.get_semaphore_for_key(_key), 1, _timeout).then([this] (auto&& u) { _units = std::move(u); });
|
||||
}
|
||||
guard(key_lock_map& map, const dht::token& key, clock_type::time_point timeout) : _map(map), _key(key), _timeout(timeout) {};
|
||||
guard(guard&& o) noexcept : _map(o._map), _key(std::move(o._key)), _timeout(o._timeout), _locked(o._locked) {
|
||||
o._locked = false;
|
||||
}
|
||||
guard(guard&& o) = default;
|
||||
~guard() {
|
||||
if (_locked) {
|
||||
_map.get_semaphore_for_key(_key).signal(1);
|
||||
_map.release_semaphore_for_key(_key);
|
||||
}
|
||||
_map.release_semaphore_for_key(_key);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@@ -152,7 +152,7 @@ semaphore& raft_group0_client::operation_mutex() {
|
||||
return _operation_mutex;
|
||||
}
|
||||
|
||||
future<> raft_group0_client::add_entry(group0_command group0_cmd, group0_guard guard, seastar::abort_source* as) {
|
||||
future<> raft_group0_client::add_entry(group0_command group0_cmd, group0_guard guard, seastar::abort_source& as) {
|
||||
if (this_shard_id() != 0) {
|
||||
// This should not happen since all places which construct `group0_guard` also check that they are on shard 0.
|
||||
// Note: `group0_guard::impl` is private to this module, making this easy to verify.
|
||||
@@ -172,7 +172,7 @@ future<> raft_group0_client::add_entry(group0_command group0_cmd, group0_guard g
|
||||
do {
|
||||
retry = false;
|
||||
try {
|
||||
co_await _raft_gr.group0().add_entry(cmd, raft::wait_type::applied, as);
|
||||
co_await _raft_gr.group0().add_entry(cmd, raft::wait_type::applied, &as);
|
||||
} catch (const raft::dropped_entry& e) {
|
||||
logger.warn("add_entry: returned \"{}\". Retrying the command (prev_state_id: {}, new_state_id: {})",
|
||||
e, group0_cmd.prev_state_id, group0_cmd.new_state_id);
|
||||
@@ -234,7 +234,7 @@ static utils::UUID generate_group0_state_id(utils::UUID prev_state_id) {
|
||||
return utils::UUID_gen::get_random_time_UUID_from_micros(std::chrono::microseconds{ts});
|
||||
}
|
||||
|
||||
future<group0_guard> raft_group0_client::start_operation(seastar::abort_source* as) {
|
||||
future<group0_guard> raft_group0_client::start_operation(seastar::abort_source& as) {
|
||||
if (this_shard_id() != 0) {
|
||||
on_internal_error(logger, "start_group0_operation: must run on shard 0");
|
||||
}
|
||||
@@ -242,12 +242,12 @@ future<group0_guard> raft_group0_client::start_operation(seastar::abort_source*
|
||||
auto [upgrade_lock_holder, upgrade_state] = co_await get_group0_upgrade_state();
|
||||
switch (upgrade_state) {
|
||||
case group0_upgrade_state::use_post_raft_procedures: {
|
||||
auto operation_holder = co_await get_units(_operation_mutex, 1);
|
||||
co_await _raft_gr.group0().read_barrier(as);
|
||||
auto operation_holder = co_await get_units(_operation_mutex, 1, as);
|
||||
co_await _raft_gr.group0().read_barrier(&as);
|
||||
|
||||
// Take `_group0_read_apply_mutex` *after* read barrier.
|
||||
// Read barrier may wait for `group0_state_machine::apply` which also takes this mutex.
|
||||
auto read_apply_holder = co_await hold_read_apply_mutex();
|
||||
auto read_apply_holder = co_await hold_read_apply_mutex(as);
|
||||
|
||||
auto observed_group0_state_id = co_await _sys_ks.get_last_group0_state_id();
|
||||
auto new_group0_state_id = generate_group0_state_id(observed_group0_state_id);
|
||||
|
||||
@@ -105,7 +105,7 @@ public:
|
||||
// Call after `system_keyspace` is initialized.
|
||||
future<> init();
|
||||
|
||||
future<> add_entry(group0_command group0_cmd, group0_guard guard, seastar::abort_source* as = nullptr);
|
||||
future<> add_entry(group0_command group0_cmd, group0_guard guard, seastar::abort_source& as);
|
||||
|
||||
future<> add_entry_unguarded(group0_command group0_cmd, seastar::abort_source* as = nullptr);
|
||||
|
||||
@@ -129,7 +129,7 @@ public:
|
||||
// FIXME?: this is kind of annoying for the user.
|
||||
// we could forward the call to shard 0, have group0_guard keep a foreign_ptr to the internal data structures on shard 0,
|
||||
// and add_entry would again forward to shard 0.
|
||||
future<group0_guard> start_operation(seastar::abort_source* as = nullptr);
|
||||
future<group0_guard> start_operation(seastar::abort_source& as);
|
||||
|
||||
template<typename Command>
|
||||
requires std::same_as<Command, broadcast_table_query> || std::same_as<Command, write_mutations>
|
||||
|
||||
@@ -1453,6 +1453,17 @@ public:
|
||||
timeout_cb();
|
||||
}
|
||||
}
|
||||
void no_targets() {
|
||||
// We don't have any live targets and we should complete the handler now.
|
||||
// Either we already stored sufficient hints to achieve CL and the handler
|
||||
// is completed successfully (see hint_to_dead_endpoints), or we don't achieve
|
||||
// CL because we didn't store sufficient hints and we don't have live targets,
|
||||
// so the handler is completed with error.
|
||||
if (!_cl_achieved) {
|
||||
_error = error::FAILURE;
|
||||
}
|
||||
_proxy->remove_response_handler(_id);
|
||||
}
|
||||
void expire_at(storage_proxy::clock_type::time_point timeout) {
|
||||
_expire_timer.arm(timeout);
|
||||
}
|
||||
@@ -2329,6 +2340,21 @@ bool storage_proxy::need_throttle_writes() const {
|
||||
}
|
||||
|
||||
void storage_proxy::unthrottle() {
|
||||
// Here, we garbage-collect (from _throttled_writes) the response IDs which are no longer
|
||||
// relevant, because their handlers are gone.
|
||||
//
|
||||
// need_throttle_writes() may remain true for an indefinite amount of time, so without this piece of code,
|
||||
// _throttled_writes might also grow without any limit. We saw this happen in a throughput test once.
|
||||
//
|
||||
// Note that we only remove the irrelevant entries which are in front of the list.
|
||||
// We don't touch the middle of the list, so an irrelevant ID will still remain in the list if there is some
|
||||
// earlier ID which is still relevant. But since writes should have some reasonable finite timeout,
|
||||
// we assume that it's not a problem.
|
||||
//
|
||||
while (!_throttled_writes.empty() && !_response_handlers.contains(_throttled_writes.front())) {
|
||||
_throttled_writes.pop_front();
|
||||
}
|
||||
|
||||
while(!need_throttle_writes() && !_throttled_writes.empty()) {
|
||||
auto id = _throttled_writes.front();
|
||||
_throttled_writes.pop_front();
|
||||
@@ -3924,6 +3950,16 @@ void storage_proxy::send_to_live_endpoints(storage_proxy::response_id_type respo
|
||||
auto& stats = handler_ptr->stats();
|
||||
auto& handler = *handler_ptr;
|
||||
auto& global_stats = handler._proxy->_global_stats;
|
||||
|
||||
if (handler.get_targets().size() == 0) {
|
||||
// Usually we remove the response handler when receiving responses from all targets.
|
||||
// Here we don't have any live targets to get responses from, so we should complete
|
||||
// the write response handler immediately. Otherwise, it will remain active
|
||||
// until it timeouts.
|
||||
handler.no_targets();
|
||||
return;
|
||||
}
|
||||
|
||||
if (handler.get_targets().size() != 1 || !fbu::is_me(handler.get_targets()[0])) {
|
||||
auto& topology = handler_ptr->_effective_replication_map_ptr->get_topology();
|
||||
auto local_dc = topology.get_datacenter();
|
||||
@@ -6088,7 +6124,7 @@ future<bool> storage_proxy::cas(schema_ptr schema, shared_ptr<cas_request> reque
|
||||
db::consistency_level cl = cl_for_paxos == db::consistency_level::LOCAL_SERIAL ?
|
||||
db::consistency_level::LOCAL_QUORUM : db::consistency_level::QUORUM;
|
||||
|
||||
unsigned contentions;
|
||||
unsigned contentions = 0;
|
||||
|
||||
dht::token token = partition_ranges[0].start()->value().as_decorated_key().token();
|
||||
utils::latency_counter lc;
|
||||
@@ -6107,6 +6143,8 @@ future<bool> storage_proxy::cas(schema_ptr schema, shared_ptr<cas_request> reque
|
||||
|
||||
paxos::paxos_state::guard l = co_await paxos::paxos_state::get_cas_lock(token, write_timeout);
|
||||
|
||||
co_await utils::get_local_injector().inject("cas_timeout_after_lock", write_timeout + std::chrono::milliseconds(100));
|
||||
|
||||
while (true) {
|
||||
// Finish the previous PAXOS round, if any, and, as a side effect, compute
|
||||
// a ballot (round identifier) which is a) unique b) has good chances of being
|
||||
|
||||
@@ -954,7 +954,7 @@ class topology_coordinator {
|
||||
};
|
||||
|
||||
future<group0_guard> start_operation() {
|
||||
auto guard = co_await _group0.client().start_operation(&_as);
|
||||
auto guard = co_await _group0.client().start_operation(_as);
|
||||
|
||||
if (_term != _raft.get_current_term()) {
|
||||
throw term_changed_error{};
|
||||
@@ -996,7 +996,7 @@ class topology_coordinator {
|
||||
slogger.trace("raft topology: do update {} reason {}", updates, reason);
|
||||
topology_change change{std::move(updates)};
|
||||
group0_command g0_cmd = _group0.client().prepare_command(std::move(change), guard, reason);
|
||||
co_await _group0.client().add_entry(std::move(g0_cmd), std::move(guard));
|
||||
co_await _group0.client().add_entry(std::move(g0_cmd), std::move(guard), _as);
|
||||
} catch (group0_concurrent_modification&) {
|
||||
slogger.info("raft topology: race while changing state: {}. Retrying", reason);
|
||||
throw;
|
||||
@@ -2642,7 +2642,7 @@ future<> storage_service::raft_initialize_discovery_leader(raft::server& raft_se
|
||||
}
|
||||
|
||||
slogger.info("raft topology: adding myself as the first node to the topology");
|
||||
auto guard = co_await _group0->client().start_operation(&_abort_source);
|
||||
auto guard = co_await _group0->client().start_operation(_abort_source);
|
||||
|
||||
auto insert_join_request_mutation = build_mutation_from_join_params(params, guard);
|
||||
|
||||
@@ -2656,7 +2656,7 @@ future<> storage_service::raft_initialize_discovery_leader(raft::server& raft_se
|
||||
group0_command g0_cmd = _group0->client().prepare_command(std::move(change), guard,
|
||||
"bootstrap: adding myself as the first node to the topology");
|
||||
try {
|
||||
co_await _group0->client().add_entry(std::move(g0_cmd), std::move(guard), &_abort_source);
|
||||
co_await _group0->client().add_entry(std::move(g0_cmd), std::move(guard), _abort_source);
|
||||
} catch (group0_concurrent_modification&) {
|
||||
slogger.info("raft topology: bootstrap: concurrent operation is detected, retrying.");
|
||||
}
|
||||
@@ -2703,7 +2703,7 @@ future<> storage_service::update_topology_with_local_metadata(raft::server& raft
|
||||
while (true) {
|
||||
slogger.info("raft topology: refreshing topology to check if it's synchronized with local metadata");
|
||||
|
||||
auto guard = co_await _group0->client().start_operation(&_abort_source);
|
||||
auto guard = co_await _group0->client().start_operation(_abort_source);
|
||||
|
||||
if (synchronized()) {
|
||||
break;
|
||||
@@ -2738,7 +2738,7 @@ future<> storage_service::update_topology_with_local_metadata(raft::server& raft
|
||||
std::move(change), guard, ::format("{}: update topology with local metadata", raft_server.id()));
|
||||
|
||||
try {
|
||||
co_await _group0->client().add_entry(std::move(g0_cmd), std::move(guard), &_abort_source);
|
||||
co_await _group0->client().add_entry(std::move(g0_cmd), std::move(guard), _abort_source);
|
||||
} catch (group0_concurrent_modification&) {
|
||||
slogger.info("raft topology: update topology with local metadata:"
|
||||
" concurrent operation is detected, retrying.");
|
||||
@@ -2947,10 +2947,8 @@ future<> storage_service::join_token_ring(sharded<db::system_distributed_keyspac
|
||||
// NORMAL doesn't necessarily mean UP (#14042). Wait for these nodes to be UP as well
|
||||
// to reduce flakiness (we need them to be UP to perform CDC generation write and for repair/streaming).
|
||||
//
|
||||
// This could be done in Raft topology mode as well, but the calculation of nodes to sync with
|
||||
// has to be done based on topology state machine instead of gossiper as it is here;
|
||||
// furthermore, the place in the code where we do this has to be different (it has to be coordinated
|
||||
// by the topology coordinator after it joins the node to the cluster).
|
||||
// We do it in Raft topology mode as well in join_node_response_handler. The calculation of nodes to
|
||||
// sync with is done based on topology state machine instead of gossiper as it is here.
|
||||
//
|
||||
// We calculate nodes to wait for based on token_metadata. Previously we would use gossiper
|
||||
// directly for this, but gossiper may still contain obsolete entries from 1. replaced nodes
|
||||
@@ -2958,23 +2956,29 @@ future<> storage_service::join_token_ring(sharded<db::system_distributed_keyspac
|
||||
// but here they may still be present if we're performing topology changes in quick succession.
|
||||
// `token_metadata` has all host ID / token collisions resolved so in particular it doesn't contain
|
||||
// these obsolete IPs. Refs: #14487, #14468
|
||||
auto& tm = get_token_metadata();
|
||||
auto ignore_nodes = ri
|
||||
? parse_node_list(_db.local().get_config().ignore_dead_nodes_for_replace(), tm)
|
||||
// TODO: specify ignore_nodes for bootstrap
|
||||
: std::unordered_set<gms::inet_address>{};
|
||||
//
|
||||
// We recalculate nodes in every step of the loop in wait_alive. For example, if we booted a new node
|
||||
// just after removing a different node, other nodes could still see the removed node as NORMAL. Then,
|
||||
// the joining node would wait for it to be UP, and wait_alive would time out. Recalculation fixes
|
||||
// this problem. Ref: #17526
|
||||
auto get_sync_nodes = [&] {
|
||||
auto ignore_nodes = ri
|
||||
? parse_node_list(_db.local().get_config().ignore_dead_nodes_for_replace(), get_token_metadata())
|
||||
// TODO: specify ignore_nodes for bootstrap
|
||||
: std::unordered_set<gms::inet_address>{};
|
||||
std::vector<gms::inet_address> sync_nodes;
|
||||
get_token_metadata().get_topology().for_each_node([&] (const locator::node* np) {
|
||||
auto ep = np->endpoint();
|
||||
if (!ignore_nodes.contains(ep) && (!ri || ep != ri->address)) {
|
||||
sync_nodes.push_back(ep);
|
||||
}
|
||||
});
|
||||
return sync_nodes;
|
||||
};
|
||||
|
||||
std::vector<gms::inet_address> sync_nodes;
|
||||
tm.get_topology().for_each_node([&] (const locator::node* np) {
|
||||
auto ep = np->endpoint();
|
||||
if (!ignore_nodes.contains(ep) && (!ri || ep != ri->address)) {
|
||||
sync_nodes.push_back(ep);
|
||||
}
|
||||
});
|
||||
|
||||
slogger.info("Waiting for nodes {} to be alive", sync_nodes);
|
||||
co_await _gossiper.wait_alive(sync_nodes, wait_for_live_nodes_timeout);
|
||||
slogger.info("Nodes {} are alive", sync_nodes);
|
||||
slogger.info("Waiting for other nodes to be alive. Current nodes: {}", get_sync_nodes());
|
||||
co_await _gossiper.wait_alive(get_sync_nodes, wait_for_live_nodes_timeout);
|
||||
slogger.info("Nodes {} are alive", get_sync_nodes());
|
||||
}
|
||||
|
||||
assert(_group0);
|
||||
@@ -4492,7 +4496,7 @@ future<> storage_service::raft_decomission() {
|
||||
});
|
||||
|
||||
while (true) {
|
||||
auto guard = co_await _group0->client().start_operation(&_abort_source);
|
||||
auto guard = co_await _group0->client().start_operation(_abort_source);
|
||||
|
||||
auto it = _topology_state_machine._topology.find(raft_server.id());
|
||||
if (!it) {
|
||||
@@ -4519,7 +4523,7 @@ future<> storage_service::raft_decomission() {
|
||||
group0_command g0_cmd = _group0->client().prepare_command(std::move(change), guard, ::format("decomission: request decomission for {}", raft_server.id()));
|
||||
|
||||
try {
|
||||
co_await _group0->client().add_entry(std::move(g0_cmd), std::move(guard), &_abort_source);
|
||||
co_await _group0->client().add_entry(std::move(g0_cmd), std::move(guard), _abort_source);
|
||||
} catch (group0_concurrent_modification&) {
|
||||
slogger.info("raft topology: decomission: concurrent operation is detected, retrying.");
|
||||
continue;
|
||||
@@ -4814,7 +4818,7 @@ future<> storage_service::raft_removenode(locator::host_id host_id, std::list<lo
|
||||
auto id = raft::server_id{host_id.uuid()};
|
||||
|
||||
while (true) {
|
||||
auto guard = co_await _group0->client().start_operation(&_abort_source);
|
||||
auto guard = co_await _group0->client().start_operation(_abort_source);
|
||||
|
||||
auto it = _topology_state_machine._topology.find(id);
|
||||
|
||||
@@ -4854,7 +4858,7 @@ future<> storage_service::raft_removenode(locator::host_id host_id, std::list<lo
|
||||
group0_command g0_cmd = _group0->client().prepare_command(std::move(change), guard, ::format("removenode: request remove for {}", id));
|
||||
|
||||
try {
|
||||
co_await _group0->client().add_entry(std::move(g0_cmd), std::move(guard), &_abort_source);
|
||||
co_await _group0->client().add_entry(std::move(g0_cmd), std::move(guard), _abort_source);
|
||||
} catch (group0_concurrent_modification&) {
|
||||
slogger.info("raft topology: removenode: concurrent operation is detected, retrying.");
|
||||
continue;
|
||||
@@ -5375,7 +5379,7 @@ future<> storage_service::raft_rebuild(sstring source_dc) {
|
||||
auto& raft_server = _group0->group0_server();
|
||||
|
||||
while (true) {
|
||||
auto guard = co_await _group0->client().start_operation(&_abort_source);
|
||||
auto guard = co_await _group0->client().start_operation(_abort_source);
|
||||
|
||||
auto it = _topology_state_machine._topology.find(raft_server.id());
|
||||
if (!it) {
|
||||
@@ -5401,7 +5405,7 @@ future<> storage_service::raft_rebuild(sstring source_dc) {
|
||||
group0_command g0_cmd = _group0->client().prepare_command(std::move(change), guard, ::format("rebuild: request rebuild for {} ({})", raft_server.id(), source_dc));
|
||||
|
||||
try {
|
||||
co_await _group0->client().add_entry(std::move(g0_cmd), std::move(guard), &_abort_source);
|
||||
co_await _group0->client().add_entry(std::move(g0_cmd), std::move(guard), _abort_source);
|
||||
} catch (group0_concurrent_modification&) {
|
||||
slogger.info("raft topology: rebuild: concurrent operation is detected, retrying.");
|
||||
continue;
|
||||
@@ -5420,7 +5424,7 @@ future<> storage_service::raft_check_and_repair_cdc_streams() {
|
||||
|
||||
while (true) {
|
||||
slogger.info("raft topology: request check_and_repair_cdc_streams, refreshing topology");
|
||||
auto guard = co_await _group0->client().start_operation(&_abort_source);
|
||||
auto guard = co_await _group0->client().start_operation(_abort_source);
|
||||
auto curr_req = _topology_state_machine._topology.global_request;
|
||||
if (curr_req && *curr_req != global_topology_request::new_cdc_generation) {
|
||||
// FIXME: replace this with a queue
|
||||
@@ -5446,7 +5450,7 @@ future<> storage_service::raft_check_and_repair_cdc_streams() {
|
||||
group0_command g0_cmd = _group0->client().prepare_command(std::move(change), guard,
|
||||
::format("request check+repair CDC generation from {}", _group0->group0_server().id()));
|
||||
try {
|
||||
co_await _group0->client().add_entry(std::move(g0_cmd), std::move(guard), &_abort_source);
|
||||
co_await _group0->client().add_entry(std::move(g0_cmd), std::move(guard), _abort_source);
|
||||
} catch (group0_concurrent_modification&) {
|
||||
slogger.info("raft topology: request check+repair CDC: concurrent operation is detected, retrying.");
|
||||
continue;
|
||||
@@ -6495,7 +6499,7 @@ future<join_node_request_result> storage_service::join_node_request_handler(join
|
||||
}
|
||||
|
||||
while (true) {
|
||||
auto guard = co_await _group0->client().start_operation(&_abort_source);
|
||||
auto guard = co_await _group0->client().start_operation(_abort_source);
|
||||
|
||||
if (const auto *p = _topology_state_machine._topology.find(params.host_id)) {
|
||||
const auto& rs = p->second;
|
||||
@@ -6526,7 +6530,7 @@ future<join_node_request_result> storage_service::join_node_request_handler(join
|
||||
group0_command g0_cmd = _group0->client().prepare_command(std::move(change), guard,
|
||||
format("raft topology: placing join request for {}", params.host_id));
|
||||
try {
|
||||
co_await _group0->client().add_entry(std::move(g0_cmd), std::move(guard), &_abort_source);
|
||||
co_await _group0->client().add_entry(std::move(g0_cmd), std::move(guard), _abort_source);
|
||||
break;
|
||||
} catch (group0_concurrent_modification&) {
|
||||
slogger.info("raft topology: join_node_request: concurrent operation is detected, retrying.");
|
||||
|
||||
@@ -796,7 +796,7 @@ public:
|
||||
_sst._shards = { shard };
|
||||
|
||||
_cfg.monitor->on_write_started(_data_writer->offset_tracker());
|
||||
_sst._components->filter = utils::i_filter::get_filter(estimated_partitions, _schema.bloom_filter_fp_chance(), utils::filter_format::m_format);
|
||||
_sst._components->filter = utils::i_filter::get_filter(estimated_partitions, _sst._schema->bloom_filter_fp_chance(), utils::filter_format::m_format);
|
||||
_pi_write_m.promoted_index_block_size = cfg.promoted_index_block_size;
|
||||
_pi_write_m.promoted_index_auto_scale_threshold = cfg.promoted_index_auto_scale_threshold;
|
||||
_index_sampling_state.summary_byte_cost = _cfg.summary_byte_cost;
|
||||
@@ -884,7 +884,7 @@ void writer::init_file_writers() {
|
||||
make_compressed_file_m_format_output_stream(
|
||||
output_stream<char>(std::move(out)),
|
||||
&_sst._components->compression,
|
||||
_schema.get_compressor_params()), _sst.filename(component_type::Data));
|
||||
_sst._schema->get_compressor_params()), _sst.filename(component_type::Data));
|
||||
}
|
||||
|
||||
out = _sst._storage->make_data_or_index_sink(_sst, component_type::Index).get0();
|
||||
@@ -1454,7 +1454,7 @@ void writer::consume_end_of_stream() {
|
||||
|
||||
_sst._components->statistics.contents[metadata_type::Serialization] = std::make_unique<serialization_header>(std::move(_sst_schema.header));
|
||||
seal_statistics(_sst.get_version(), _sst._components->statistics, _collector,
|
||||
_sst._schema->get_partitioner().name(), _schema.bloom_filter_fp_chance(),
|
||||
_sst._schema->get_partitioner().name(), _sst._schema->bloom_filter_fp_chance(),
|
||||
_sst._schema, _sst.get_first_decorated_key(), _sst.get_last_decorated_key(), _enc_stats);
|
||||
close_data_writer();
|
||||
_sst.write_summary();
|
||||
|
||||
@@ -105,6 +105,25 @@ thread_local utils::updateable_value<bool> global_cache_index_pages(true);
|
||||
|
||||
logging::logger sstlog("sstable");
|
||||
|
||||
template <typename T>
|
||||
const char* nullsafe_typename(T* x) noexcept {
|
||||
try {
|
||||
return typeid(*x).name();
|
||||
} catch (const std::bad_typeid&) {
|
||||
return "nullptr";
|
||||
}
|
||||
}
|
||||
|
||||
// dynamic_cast, but calls on_internal_error on failure.
|
||||
template <typename Derived, typename Base>
|
||||
Derived* downcast_ptr(Base* x) {
|
||||
if (auto casted = dynamic_cast<Derived*>(x)) {
|
||||
return casted;
|
||||
} else {
|
||||
on_internal_error(sstlog, fmt::format("Bad downcast: expected {}, but got {}", typeid(Derived*).name(), nullsafe_typename(x)));
|
||||
}
|
||||
}
|
||||
|
||||
// Because this is a noop and won't hold any state, it is better to use a global than a
|
||||
// thread_local. It will be faster, specially on non-x86.
|
||||
struct noop_write_monitor final : public write_monitor {
|
||||
@@ -1396,7 +1415,7 @@ void sstable::write_filter() {
|
||||
return;
|
||||
}
|
||||
|
||||
auto f = static_cast<utils::filter::murmur3_bloom_filter *>(_components->filter.get());
|
||||
auto f = downcast_ptr<utils::filter::murmur3_bloom_filter>(_components->filter.get());
|
||||
|
||||
auto&& bs = f->bits();
|
||||
auto filter_ref = sstables::filter_ref(f->num_hashes(), bs.get_storage());
|
||||
@@ -2872,6 +2891,7 @@ sstable::unlink(storage::sync_dir sync) noexcept {
|
||||
|
||||
co_await std::move(remove_fut);
|
||||
_stats.on_delete();
|
||||
_manager.on_unlink(this);
|
||||
}
|
||||
|
||||
thread_local sstables_stats::stats sstables_stats::_shard_stats;
|
||||
|
||||
@@ -6,6 +6,7 @@
|
||||
* SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
*/
|
||||
|
||||
#include <seastar/coroutine/switch_to.hh>
|
||||
#include "log.hh"
|
||||
#include "sstables/sstables_manager.hh"
|
||||
#include "sstables/partition_index_cache.hh"
|
||||
@@ -21,7 +22,7 @@ namespace sstables {
|
||||
logging::logger smlogger("sstables_manager");
|
||||
|
||||
sstables_manager::sstables_manager(
|
||||
db::large_data_handler& large_data_handler, const db::config& dbcfg, gms::feature_service& feat, cache_tracker& ct, size_t available_memory, directory_semaphore& dir_sem, noncopyable_function<locator::host_id()>&& resolve_host_id, storage_manager* shared)
|
||||
db::large_data_handler& large_data_handler, const db::config& dbcfg, gms::feature_service& feat, cache_tracker& ct, size_t available_memory, directory_semaphore& dir_sem, noncopyable_function<locator::host_id()>&& resolve_host_id, scheduling_group maintenance_sg, storage_manager* shared)
|
||||
: _storage(shared)
|
||||
, _available_memory(available_memory)
|
||||
, _large_data_handler(large_data_handler), _db_config(dbcfg), _features(feat), _cache_tracker(ct)
|
||||
@@ -34,6 +35,7 @@ sstables_manager::sstables_manager(
|
||||
utils::updateable_value(std::numeric_limits<uint32_t>::max()))
|
||||
, _dir_semaphore(dir_sem)
|
||||
, _resolve_host_id(std::move(resolve_host_id))
|
||||
, _maintenance_sg(std::move(maintenance_sg))
|
||||
{
|
||||
_components_reloader_status = components_reloader_fiber();
|
||||
}
|
||||
@@ -169,6 +171,8 @@ size_t sstables_manager::get_memory_available_for_reclaimable_components() {
|
||||
}
|
||||
|
||||
future<> sstables_manager::components_reloader_fiber() {
|
||||
co_await coroutine::switch_to(_maintenance_sg);
|
||||
|
||||
sstlog.trace("components_reloader_fiber start");
|
||||
while (true) {
|
||||
co_await _sstable_deleted_event.when();
|
||||
@@ -278,4 +282,9 @@ void sstables_manager::unplug_system_keyspace() noexcept {
|
||||
_sys_ks = nullptr;
|
||||
}
|
||||
|
||||
void sstables_manager::on_unlink(sstable* sst) {
|
||||
// Remove the sst from manager's reclaimed list to prevent any attempts to reload its components.
|
||||
_reclaimed.erase(*sst);
|
||||
}
|
||||
|
||||
} // namespace sstables
|
||||
|
||||
@@ -124,8 +124,10 @@ private:
|
||||
// after system_keyspace initialization.
|
||||
noncopyable_function<locator::host_id()> _resolve_host_id;
|
||||
|
||||
scheduling_group _maintenance_sg;
|
||||
|
||||
public:
|
||||
explicit sstables_manager(db::large_data_handler& large_data_handler, const db::config& dbcfg, gms::feature_service& feat, cache_tracker&, size_t available_memory, directory_semaphore& dir_sem, noncopyable_function<locator::host_id()>&& resolve_host_id, storage_manager* shared = nullptr);
|
||||
explicit sstables_manager(db::large_data_handler& large_data_handler, const db::config& dbcfg, gms::feature_service& feat, cache_tracker&, size_t available_memory, directory_semaphore& dir_sem, noncopyable_function<locator::host_id()>&& resolve_host_id, scheduling_group maintenance_sg = current_scheduling_group(), storage_manager* shared = nullptr);
|
||||
virtual ~sstables_manager();
|
||||
|
||||
shared_sstable make_sstable(schema_ptr schema, sstring table_dir,
|
||||
@@ -177,6 +179,9 @@ public:
|
||||
|
||||
future<> delete_atomically(std::vector<shared_sstable> ssts);
|
||||
|
||||
// To be called by the sstable to signal its unlinking
|
||||
void on_unlink(sstable* sst);
|
||||
|
||||
private:
|
||||
void add(sstable* sst);
|
||||
// Transition the sstable to the "inactive" state. It has no
|
||||
|
||||
@@ -74,6 +74,9 @@ public:
|
||||
return sstable_directory::delete_with_pending_deletion_log;
|
||||
}
|
||||
virtual future<> remove_by_registry_entry(utils::UUID uuid, entry_descriptor desc) override;
|
||||
virtual future<uint64_t> free_space() const override {
|
||||
return seastar::fs_avail(prefix());
|
||||
}
|
||||
|
||||
virtual sstring prefix() const override { return _dir; }
|
||||
};
|
||||
@@ -471,6 +474,10 @@ public:
|
||||
return delete_with_system_keyspace;
|
||||
}
|
||||
virtual future<> remove_by_registry_entry(utils::UUID uuid, entry_descriptor desc) override;
|
||||
virtual future<uint64_t> free_space() const override {
|
||||
// assumes infinite space on s3 (https://aws.amazon.com/s3/faqs/#How_much_data_can_I_store).
|
||||
return make_ready_future<uint64_t>(std::numeric_limits<uint64_t>::max());
|
||||
}
|
||||
|
||||
virtual sstring prefix() const override { return _location; }
|
||||
};
|
||||
|
||||
@@ -64,6 +64,8 @@ public:
|
||||
virtual future<> destroy(const sstable& sst) = 0;
|
||||
virtual noncopyable_function<future<>(std::vector<shared_sstable>)> atomic_deleter() const = 0;
|
||||
virtual future<> remove_by_registry_entry(utils::UUID uuid, entry_descriptor desc) = 0;
|
||||
// Free space available in the underlying storage.
|
||||
virtual future<uint64_t> free_space() const = 0;
|
||||
|
||||
virtual sstring prefix() const = 0;
|
||||
};
|
||||
|
||||
@@ -10,6 +10,7 @@
|
||||
|
||||
import pytest
|
||||
import time
|
||||
import requests
|
||||
from botocore.exceptions import ClientError
|
||||
from util import create_test_table, new_test_table, random_string, full_scan, full_query, multiset, list_tables
|
||||
|
||||
@@ -538,3 +539,35 @@ def test_lsi_and_gsi_same_name(dynamodb):
|
||||
}
|
||||
])
|
||||
table.delete()
|
||||
|
||||
# Test that the LSI table can be addressed in Scylla's REST API (obviously,
|
||||
# since this test is for the REST API, it is Scylla-only and can't be run on
|
||||
# DynamoDB).
|
||||
# At the time this test was written, the LSI's name has a "!" in it, so this
|
||||
# test reproduces a bug in URL decoding (#5883). But the goal of this test
|
||||
# isn't to insist that a table backing an LSI must have a specific name,
|
||||
# but rather that whatever name it does have - it can be addressed.
|
||||
def test_lsi_name_rest_api(test_table_lsi_1, rest_api):
|
||||
# See that the LSI is listed in list of tables. It will be a table
|
||||
# whose CQL name contains the Alternator table's name, and the
|
||||
# LSI's name ('hello'). As of this writing, it will actually be
|
||||
# alternator_<name>:<name>!:<lsi> - but the test doesn't enshrine this.
|
||||
resp = requests.get(f'{rest_api}/column_family/name')
|
||||
resp.raise_for_status()
|
||||
lsi_rest_name = None
|
||||
for name in resp.json():
|
||||
if test_table_lsi_1.name in name and 'hello' in name:
|
||||
lsi_rest_name = name
|
||||
break
|
||||
assert lsi_rest_name
|
||||
# Attempt to run a request on this LSI's table name "lsi_rest_name".
|
||||
# We'll use the compaction_strategy request here, but if for some
|
||||
# reason in the future we decide to drop that request, any other
|
||||
# request will be fine.
|
||||
resp = requests.get(f'{rest_api}/column_family/compaction_strategy/{lsi_rest_name}')
|
||||
resp.raise_for_status()
|
||||
# Let's make things difficult for the server by URL encoding the
|
||||
# lsi_rest_name - exposing issue #5883.
|
||||
encoded_lsi_rest_name = requests.utils.quote(lsi_rest_name)
|
||||
resp = requests.get(f'{rest_api}/column_family/compaction_strategy/{encoded_lsi_rest_name}')
|
||||
resp.raise_for_status()
|
||||
|
||||
@@ -190,16 +190,18 @@ SEASTAR_TEST_CASE(tests_reserve_partial) {
|
||||
with_allocator(region.allocator(), [&] {
|
||||
as(region, [&] {
|
||||
auto rand = std::default_random_engine();
|
||||
auto size_dist = std::uniform_int_distribution<unsigned>(1, 1 << 12);
|
||||
// use twice the max_chunk_capacity() as upper limit to test if
|
||||
// reserve_partial() can reserve capacity across multiple chunks.
|
||||
auto max_test_size = lsa::chunked_managed_vector<uint8_t>::max_chunk_capacity() * 2;
|
||||
auto size_dist = std::uniform_int_distribution<unsigned>(1, max_test_size);
|
||||
|
||||
for (int i = 0; i < 100; ++i) {
|
||||
lsa::chunked_managed_vector<uint8_t> v;
|
||||
const auto orig_size = size_dist(rand);
|
||||
auto size = orig_size;
|
||||
while (size) {
|
||||
size = v.reserve_partial(size);
|
||||
const auto size = size_dist(rand);
|
||||
while (v.capacity() != size) {
|
||||
v.reserve_partial(size);
|
||||
}
|
||||
BOOST_REQUIRE_EQUAL(v.capacity(), orig_size);
|
||||
BOOST_REQUIRE_EQUAL(v.capacity(), size);
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
@@ -180,13 +180,12 @@ BOOST_AUTO_TEST_CASE(tests_reserve_partial) {
|
||||
auto size_dist = std::uniform_int_distribution<unsigned>(1, 1 << 12);
|
||||
|
||||
for (int i = 0; i < 100; ++i) {
|
||||
utils::chunked_vector<uint8_t> v;
|
||||
const auto orig_size = size_dist(rand);
|
||||
auto size = orig_size;
|
||||
while (size) {
|
||||
size = v.reserve_partial(size);
|
||||
utils::chunked_vector<uint8_t, 512> v;
|
||||
const auto size = size_dist(rand);
|
||||
while (v.capacity() != size) {
|
||||
v.reserve_partial(size);
|
||||
}
|
||||
BOOST_REQUIRE_EQUAL(v.capacity(), orig_size);
|
||||
BOOST_REQUIRE_EQUAL(v.capacity(), size);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -304,24 +304,21 @@ SEASTAR_TEST_CASE(test_commitlog_delete_when_over_disk_limit) {
|
||||
cfg.commitlog_sync_period_in_ms = 1;
|
||||
return cl_test(cfg, [](commitlog& log) {
|
||||
auto sem = make_lw_shared<semaphore>(0);
|
||||
auto segments = make_lw_shared<segment_names>();
|
||||
auto segments = make_lw_shared<std::set<sstring>>();
|
||||
|
||||
// add a flush handler that simply says we're done with the range.
|
||||
auto r = log.add_flush_handler([&log, sem, segments](cf_id_type id, replay_position pos) {
|
||||
auto f = make_ready_future<>();
|
||||
// #6195 only get segment list at first callback. We can (not often)
|
||||
// be called again, but reading segment list at that point might (will)
|
||||
// render same list as in the diff check below.
|
||||
if (segments->empty()) {
|
||||
*segments = log.get_active_segment_names();
|
||||
// Verify #5899 - file size should not exceed the config max.
|
||||
f = parallel_for_each(*segments, [](sstring filename) {
|
||||
return file_size(filename).then([](uint64_t size) {
|
||||
BOOST_REQUIRE_LE(size, max_size_mb * 1024 * 1024);
|
||||
});
|
||||
});
|
||||
auto active_segments = log.get_active_segment_names();
|
||||
for (auto&& s : active_segments) {
|
||||
segments->insert(s);
|
||||
}
|
||||
return f.then([&log, sem, id] {
|
||||
|
||||
// Verify #5899 - file size should not exceed the config max.
|
||||
return parallel_for_each(active_segments, [](sstring filename) {
|
||||
return file_size(filename).then([](uint64_t size) {
|
||||
BOOST_REQUIRE_LE(size, max_size_mb * 1024 * 1024);
|
||||
});
|
||||
}).then([&log, sem, id] {
|
||||
log.discard_completed_segments(id);
|
||||
sem->signal();
|
||||
});
|
||||
@@ -339,7 +336,8 @@ SEASTAR_TEST_CASE(test_commitlog_delete_when_over_disk_limit) {
|
||||
set->insert(h.release().id);
|
||||
});
|
||||
}).then([&log, segments]() {
|
||||
auto diff = segment_diff(log, *segments);
|
||||
segment_names names(segments->begin(), segments->end());
|
||||
auto diff = segment_diff(log, names);
|
||||
auto nn = diff.size();
|
||||
auto dn = log.get_num_segments_destroyed();
|
||||
|
||||
|
||||
@@ -26,6 +26,8 @@
|
||||
#include <seastar/core/future-util.hh>
|
||||
#include <seastar/core/sleep.hh>
|
||||
#include "transport/messages/result_message.hh"
|
||||
#include "transport/messages/result_message_base.hh"
|
||||
#include "types/types.hh"
|
||||
#include "utils/big_decimal.hh"
|
||||
#include "types/user.hh"
|
||||
#include "types/map.hh"
|
||||
@@ -4125,7 +4127,7 @@ SEASTAR_TEST_CASE(test_describe_simple_schema) {
|
||||
" AND read_repair_chance = 0\n"
|
||||
" AND speculative_retry = '99.0PERCENTILE'\n"
|
||||
" AND paxos_grace_seconds = 43200\n"
|
||||
" AND tombstone_gc = {'mode':'timeout','propagation_delay_in_seconds':'3600'};\n"
|
||||
" AND tombstone_gc = {'mode': 'timeout','propagation_delay_in_seconds': '3600'};\n"
|
||||
},
|
||||
{"cf1", "CREATE TABLE ks.cf1 (\n"
|
||||
" pk blob,\n"
|
||||
@@ -4149,7 +4151,7 @@ SEASTAR_TEST_CASE(test_describe_simple_schema) {
|
||||
" AND read_repair_chance = 0\n"
|
||||
" AND speculative_retry = '99.0PERCENTILE'\n"
|
||||
" AND paxos_grace_seconds = 43200\n"
|
||||
" AND tombstone_gc = {'mode':'timeout','propagation_delay_in_seconds':'3600'};\n"
|
||||
" AND tombstone_gc = {'mode': 'timeout','propagation_delay_in_seconds': '3600'};\n"
|
||||
},
|
||||
{"CF2", "CREATE TABLE ks.\"CF2\" (\n"
|
||||
" pk blob,\n"
|
||||
@@ -4173,7 +4175,7 @@ SEASTAR_TEST_CASE(test_describe_simple_schema) {
|
||||
" AND read_repair_chance = 0\n"
|
||||
" AND speculative_retry = '99.0PERCENTILE'\n"
|
||||
" AND paxos_grace_seconds = 43200\n"
|
||||
" AND tombstone_gc = {'mode':'timeout','propagation_delay_in_seconds':'3600'};\n"
|
||||
" AND tombstone_gc = {'mode': 'timeout','propagation_delay_in_seconds': '3600'};\n"
|
||||
},
|
||||
{"Cf3", "CREATE TABLE ks.\"Cf3\" (\n"
|
||||
" pk blob,\n"
|
||||
@@ -4198,7 +4200,7 @@ SEASTAR_TEST_CASE(test_describe_simple_schema) {
|
||||
" AND read_repair_chance = 0\n"
|
||||
" AND speculative_retry = '99.0PERCENTILE'\n"
|
||||
" AND paxos_grace_seconds = 43200\n"
|
||||
" AND tombstone_gc = {'mode':'timeout','propagation_delay_in_seconds':'3600'};\n"
|
||||
" AND tombstone_gc = {'mode': 'timeout','propagation_delay_in_seconds': '3600'};\n"
|
||||
},
|
||||
{"cf4", "CREATE TABLE ks.cf4 (\n"
|
||||
" pk blob,\n"
|
||||
@@ -4222,7 +4224,7 @@ SEASTAR_TEST_CASE(test_describe_simple_schema) {
|
||||
" AND read_repair_chance = 0\n"
|
||||
" AND speculative_retry = '99.0PERCENTILE'\n"
|
||||
" AND paxos_grace_seconds = 43200\n"
|
||||
" AND tombstone_gc = {'mode':'timeout','propagation_delay_in_seconds':'3600'};\n"
|
||||
" AND tombstone_gc = {'mode': 'timeout','propagation_delay_in_seconds': '3600'};\n"
|
||||
}
|
||||
|
||||
};
|
||||
@@ -4267,7 +4269,7 @@ SEASTAR_TEST_CASE(test_describe_view_schema) {
|
||||
" AND read_repair_chance = 0\n"
|
||||
" AND speculative_retry = '99.0PERCENTILE'\n"
|
||||
" AND paxos_grace_seconds = 43200\n"
|
||||
" AND tombstone_gc = {'mode':'timeout','propagation_delay_in_seconds':'3600'};\n";
|
||||
" AND tombstone_gc = {'mode': 'timeout','propagation_delay_in_seconds': '3600'};\n";
|
||||
|
||||
std::unordered_map<std::string, std::string> cql_create_tables {
|
||||
{"cf_view", "CREATE MATERIALIZED VIEW \"KS\".cf_view AS\n"
|
||||
@@ -4291,7 +4293,7 @@ SEASTAR_TEST_CASE(test_describe_view_schema) {
|
||||
" AND read_repair_chance = 0\n"
|
||||
" AND speculative_retry = '99.0PERCENTILE'\n"
|
||||
" AND paxos_grace_seconds = 43200\n"
|
||||
" AND tombstone_gc = {'mode':'timeout','propagation_delay_in_seconds':'3600'};\n"},
|
||||
" AND tombstone_gc = {'mode': 'timeout','propagation_delay_in_seconds': '3600'};\n"},
|
||||
{"cf_index_index", "CREATE INDEX cf_index ON \"KS\".\"cF\"(col2);"},
|
||||
{"cf_index1_index", "CREATE INDEX cf_index1 ON \"KS\".\"cF\"(pk);"},
|
||||
{"cf_index2_index", "CREATE INDEX cf_index2 ON \"KS\".\"cF\"(pk1);"},
|
||||
@@ -5361,6 +5363,72 @@ SEASTAR_TEST_CASE(test_parallelized_select_counter_type) {
|
||||
});
|
||||
}
|
||||
|
||||
SEASTAR_TEST_CASE(test_single_partition_aggregation_is_not_parallelized) {
|
||||
// It's pointless from performance pov to parallelize
|
||||
// aggregation queries which reads only single partition.
|
||||
|
||||
return with_parallelized_aggregation_enabled_thread([](cql_test_env& e) {
|
||||
auto& qp = e.local_qp();
|
||||
const auto stat_parallelized = qp.get_cql_stats().select_parallelized;
|
||||
|
||||
e.execute_cql("CREATE TABLE tbl (pk int, ck int, col int, PRIMARY KEY (pk, ck));").get();
|
||||
const int value_count = 10;
|
||||
for (int pk = 0; pk < 2; pk++) {
|
||||
for (int c = 0; c < value_count; c++) {
|
||||
e.execute_cql(format("INSERT INTO tbl (pk, ck, col) VALUES ({:d}, {:d}, {:d});", pk, c, c)).get();
|
||||
}
|
||||
}
|
||||
|
||||
const auto result1 = e.execute_cql("SELECT COUNT(*) FROM tbl WHERE pk = 1;").get();
|
||||
assert_that(result1).is_rows().with_rows({
|
||||
{long_type->decompose(int64_t(value_count))}
|
||||
});
|
||||
BOOST_CHECK_EQUAL(stat_parallelized, qp.get_cql_stats().select_parallelized);
|
||||
|
||||
const auto result2 = e.execute_cql("SELECT COUNT(*) FROM tbl WHERE pk = 1 AND ck = 1;").get();
|
||||
assert_that(result2).is_rows().with_rows({
|
||||
{long_type->decompose(int64_t(1))}
|
||||
});
|
||||
BOOST_CHECK_EQUAL(stat_parallelized, qp.get_cql_stats().select_parallelized);
|
||||
|
||||
const auto result3 = e.execute_cql("SELECT COUNT(*) FROM tbl WHERE token(pk) = 1;").get();
|
||||
// We don't check value of count(*) here but only if it wasn't parallelized
|
||||
BOOST_CHECK_EQUAL(stat_parallelized, qp.get_cql_stats().select_parallelized);
|
||||
|
||||
const auto result4 = e.execute_cql("SELECT COUNT(*) FROM tbl WHERE pk = 1 AND pk = 2;").get();
|
||||
assert_that(result4).is_rows().with_rows({
|
||||
{long_type->decompose(int64_t(0))}
|
||||
});
|
||||
BOOST_CHECK_EQUAL(stat_parallelized, qp.get_cql_stats().select_parallelized);
|
||||
|
||||
|
||||
e.execute_cql("CREATE TABLE tbl2 (pk1 int, pk2 int, ck int, col int, PRIMARY KEY((pk1, pk2), ck));").get();
|
||||
for (int pk1 = 0; pk1 < 2; pk1++) {
|
||||
for (int pk2 = 0; pk2 < 2; pk2++) {
|
||||
for (int c = 0; c < value_count; c++) {
|
||||
e.execute_cql(format("INSERT INTO tbl2 (pk1, pk2, ck, col) VALUES ({:d}, {:d}, {:d}, {:d});", pk1, pk2, c, c)).get();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const auto result_pk12 = e.execute_cql("SELECT COUNT(*) FROM tbl2 WHERE pk1 = 1 AND pk2 = 0;").get();
|
||||
assert_that(result_pk12).is_rows().with_rows({
|
||||
{long_type->decompose(int64_t(value_count))}
|
||||
});
|
||||
BOOST_CHECK_EQUAL(stat_parallelized, qp.get_cql_stats().select_parallelized);
|
||||
|
||||
// Query with only partly restricted partition key requires `ALLOW FILTERING` clause
|
||||
// and we doesn't parallelize queries which need filtering.
|
||||
// See issue #19369.
|
||||
const auto result_pk1 = e.execute_cql("SELECT COUNT(*) FROM tbl2 WHERE pk1 = 1 ALLOW FILTERING;").get();
|
||||
// This query contains also column for pk1
|
||||
assert_that(result_pk1).is_rows().with_rows({
|
||||
{long_type->decompose(int64_t(value_count * 2)), int32_type->decompose(int32_t(1))}
|
||||
});
|
||||
BOOST_CHECK_EQUAL(stat_parallelized, qp.get_cql_stats().select_parallelized);
|
||||
});
|
||||
}
|
||||
|
||||
static future<> with_udf_and_parallel_aggregation_enabled_thread(std::function<void(cql_test_env&)>&& func) {
|
||||
auto db_cfg_ptr = make_shared<db::config>();
|
||||
auto& db_cfg = *db_cfg_ptr;
|
||||
|
||||
@@ -520,6 +520,73 @@ SEASTAR_TEST_CASE(test_zone_reclaiming_preserves_free_size) {
|
||||
});
|
||||
}
|
||||
|
||||
// Tests the intended usage of hold_reserve.
|
||||
//
|
||||
// Sets up a reserve, exhausts memory, opens the reserve,
|
||||
// checks that this allows us to do multiple additional allocations
|
||||
// without failing.
|
||||
SEASTAR_THREAD_TEST_CASE(test_hold_reserve) {
|
||||
logalloc::region region;
|
||||
logalloc::allocating_section as;
|
||||
|
||||
// We will fill LSA with an intrusive list of small entries.
|
||||
// We make it intrusive to avoid any containers which do std allocations,
|
||||
// since it could make the test imprecise.
|
||||
struct entry {
|
||||
using link = boost::intrusive::list_member_hook<boost::intrusive::link_mode<boost::intrusive::auto_unlink>>;
|
||||
link _link;
|
||||
// We are going to fill the entire memory with this.
|
||||
// Padding makes the entries bigger to speed up the test.
|
||||
std::array<char, 8192> _padding;
|
||||
};
|
||||
using list = boost::intrusive::list<entry,
|
||||
boost::intrusive::member_hook<entry, entry::link, &entry::_link>,
|
||||
boost::intrusive::constant_time_size<false>>;
|
||||
|
||||
as.with_reserve(region, [&] {
|
||||
with_allocator(region.allocator(), [&] {
|
||||
assert(sizeof(entry) + 128 < current_allocator().preferred_max_contiguous_allocation());
|
||||
logalloc::reclaim_lock rl(region);
|
||||
|
||||
// Reserve a segment.
|
||||
auto guard = std::make_optional<hold_reserve>(128*1024);
|
||||
|
||||
// Fill the entire available memory with LSA objects.
|
||||
list entries;
|
||||
auto clean_up = defer([&entries] {
|
||||
entries.clear_and_dispose([] (entry *e) {current_allocator().destroy(e);});
|
||||
});
|
||||
auto alloc_entry = [] () {
|
||||
return current_allocator().construct<entry>();
|
||||
};
|
||||
try {
|
||||
while (true) {
|
||||
entries.push_back(*alloc_entry());
|
||||
}
|
||||
} catch (const std::bad_alloc&) {
|
||||
// expected
|
||||
}
|
||||
|
||||
// Sanity check. We should be OOM at this point.
|
||||
BOOST_REQUIRE_THROW(hold_reserve(128*1024), std::bad_alloc);
|
||||
BOOST_REQUIRE_THROW(alloc_entry(), std::bad_alloc);
|
||||
|
||||
// Release the reserve.
|
||||
guard.reset();
|
||||
|
||||
// Sanity check.
|
||||
BOOST_REQUIRE_NO_THROW(hold_reserve(128*1024));
|
||||
BOOST_REQUIRE_NO_THROW(hold_reserve(128*1024));
|
||||
BOOST_REQUIRE_NO_THROW(hold_reserve(128*1024));
|
||||
|
||||
// Freeing up a segment should be enough to allocate multiple small entries;
|
||||
for (int i = 0; i < 10; ++i) {
|
||||
entries.push_back(*alloc_entry());
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
// No point in testing contiguous memory allocation in debug mode
|
||||
#ifndef SEASTAR_DEFAULT_ALLOCATOR
|
||||
SEASTAR_THREAD_TEST_CASE(test_can_reclaim_contiguous_memory_with_mixed_allocations) {
|
||||
|
||||
@@ -1058,6 +1058,9 @@ SEASTAR_TEST_CASE(failed_flush_prevents_writes) {
|
||||
std::cerr << "Skipping test as it depends on error injection. Please run in mode where it's enabled (debug,dev).\n";
|
||||
return make_ready_future<>();
|
||||
#else
|
||||
auto db_config = make_shared<db::config>();
|
||||
db_config->unspooled_dirty_soft_limit.set(1.0);
|
||||
|
||||
return do_with_cql_env_thread([](cql_test_env& env) {
|
||||
replica::database& db = env.local_db();
|
||||
service::migration_manager& mm = env.migration_manager().local();
|
||||
@@ -1090,22 +1093,22 @@ SEASTAR_TEST_CASE(failed_flush_prevents_writes) {
|
||||
// Trigger flush
|
||||
auto f = t.flush();
|
||||
|
||||
BOOST_ASSERT(eventually_true([&] {
|
||||
BOOST_REQUIRE(eventually_true([&] {
|
||||
return db.cf_stats()->failed_memtables_flushes_count - failed_memtables_flushes_count >= 4;
|
||||
}));
|
||||
|
||||
// The flush failed, make sure there is still data in memtable.
|
||||
BOOST_ASSERT(t.min_memtable_timestamp() < api::max_timestamp);
|
||||
BOOST_REQUIRE_LT(t.min_memtable_timestamp(), api::max_timestamp);
|
||||
utils::get_local_injector().disable("table_seal_active_memtable_reacquire_write_permit");
|
||||
|
||||
BOOST_ASSERT(eventually_true([&] {
|
||||
BOOST_REQUIRE(eventually_true([&] {
|
||||
// The error above is no longer being injected, so
|
||||
// seal_active_memtable retry loop should eventually succeed
|
||||
return t.min_memtable_timestamp() == api::max_timestamp;
|
||||
}));
|
||||
|
||||
std::move(f).get();
|
||||
});
|
||||
}, db_config);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
@@ -611,6 +611,35 @@ SEASTAR_THREAD_TEST_CASE(test_reader_concurrency_semaphore_stop_waits_on_permits
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static void require_can_admit(schema_ptr schema, reader_concurrency_semaphore& semaphore, bool expected_can_admit, const char* description,
|
||||
seastar::compat::source_location sl = seastar::compat::source_location::current()) {
|
||||
testlog.trace("Running admission scenario {}, with exepcted_can_admit={}", description, expected_can_admit);
|
||||
const auto stats_before = semaphore.get_stats();
|
||||
|
||||
auto admit_fut = semaphore.obtain_permit(schema, "require_can_admit", 1024, db::timeout_clock::now(), {});
|
||||
admit_fut.wait();
|
||||
const bool can_admit = !admit_fut.failed();
|
||||
if (can_admit) {
|
||||
admit_fut.ignore_ready_future();
|
||||
} else {
|
||||
// Make sure we have a timeout exception, not something else
|
||||
BOOST_REQUIRE_THROW(std::rethrow_exception(admit_fut.get_exception()), semaphore_timed_out);
|
||||
}
|
||||
|
||||
const auto stats_after = semaphore.get_stats();
|
||||
BOOST_REQUIRE_EQUAL(stats_after.reads_admitted, stats_before.reads_admitted + uint64_t(can_admit));
|
||||
// Deliberately not checking `reads_enqueued_for_admission`, a read can be enqueued temporarily during the admission process.
|
||||
|
||||
if (can_admit == expected_can_admit) {
|
||||
testlog.trace("admission scenario '{}' with expected_can_admit={} passed at {}:{}", description, expected_can_admit, sl.file_name(),
|
||||
sl.line());
|
||||
} else {
|
||||
BOOST_FAIL(fmt::format("admission scenario '{}' with expected_can_admit={} failed at {}:{}\ndiagnostics: {}", description,
|
||||
expected_can_admit, sl.file_name(), sl.line(), semaphore.dump_diagnostics()));
|
||||
}
|
||||
};
|
||||
|
||||
SEASTAR_THREAD_TEST_CASE(test_reader_concurrency_semaphore_admission) {
|
||||
simple_schema s;
|
||||
const auto schema = s.schema();
|
||||
@@ -620,30 +649,7 @@ SEASTAR_THREAD_TEST_CASE(test_reader_concurrency_semaphore_admission) {
|
||||
|
||||
auto require_can_admit = [&] (bool expected_can_admit, const char* description,
|
||||
seastar::compat::source_location sl = seastar::compat::source_location::current()) {
|
||||
testlog.trace("Running admission scenario {}, with exepcted_can_admit={}", description, expected_can_admit);
|
||||
const auto stats_before = semaphore.get_stats();
|
||||
|
||||
auto admit_fut = semaphore.obtain_permit(schema, get_name(), 1024, db::timeout_clock::now(), {});
|
||||
admit_fut.wait();
|
||||
const bool can_admit = !admit_fut.failed();
|
||||
if (can_admit) {
|
||||
admit_fut.ignore_ready_future();
|
||||
} else {
|
||||
// Make sure we have a timeout exception, not something else
|
||||
BOOST_REQUIRE_THROW(std::rethrow_exception(admit_fut.get_exception()), semaphore_timed_out);
|
||||
}
|
||||
|
||||
const auto stats_after = semaphore.get_stats();
|
||||
BOOST_REQUIRE_EQUAL(stats_after.reads_admitted, stats_before.reads_admitted + uint64_t(can_admit));
|
||||
// Deliberately not checking `reads_enqueued_for_admission`, a read can be enqueued temporarily during the admission process.
|
||||
|
||||
if (can_admit == expected_can_admit) {
|
||||
testlog.trace("admission scenario '{}' with expected_can_admit={} passed at {}:{}", description, expected_can_admit, sl.file_name(),
|
||||
sl.line());
|
||||
} else {
|
||||
BOOST_FAIL(fmt::format("admission scenario '{}' with expected_can_admit={} failed at {}:{}\ndiagnostics: {}", description,
|
||||
expected_can_admit, sl.file_name(), sl.line(), semaphore.dump_diagnostics()));
|
||||
}
|
||||
::require_can_admit(schema, semaphore, expected_can_admit, description, sl);
|
||||
};
|
||||
|
||||
require_can_admit(true, "semaphore in initial state");
|
||||
@@ -1944,3 +1950,57 @@ SEASTAR_THREAD_TEST_CASE(test_reader_concurrency_semaphore_execution_stage_wakeu
|
||||
|
||||
permit2_fut.get();
|
||||
}
|
||||
|
||||
SEASTAR_THREAD_TEST_CASE(test_reader_concurrency_semaphore_live_update_cpu_concurrency) {
|
||||
simple_schema s;
|
||||
const auto schema = s.schema();
|
||||
|
||||
utils::updateable_value_source<uint32_t> cpu_concurrency{2};
|
||||
const int32_t initial_count = 4;
|
||||
const uint32_t initial_memory = 4 * 1024;
|
||||
const auto serialize_multiplier = std::numeric_limits<uint32_t>::max();
|
||||
const auto kill_multiplier = std::numeric_limits<uint32_t>::max();
|
||||
|
||||
reader_concurrency_semaphore semaphore(
|
||||
utils::updateable_value<int>(initial_count),
|
||||
initial_memory,
|
||||
get_name(),
|
||||
100,
|
||||
utils::updateable_value<uint32_t>(serialize_multiplier),
|
||||
utils::updateable_value<uint32_t>(kill_multiplier),
|
||||
utils::updateable_value(cpu_concurrency));
|
||||
auto stop_sem = deferred_stop(semaphore);
|
||||
|
||||
auto require_can_admit = [&] (bool expected_can_admit, const char* description,
|
||||
seastar::compat::source_location sl = seastar::compat::source_location::current()) {
|
||||
::require_can_admit(schema, semaphore, expected_can_admit, description, sl);
|
||||
};
|
||||
|
||||
auto permit1 = semaphore.obtain_permit(schema, get_name(), 1024, db::timeout_clock::now(), {}).get();
|
||||
|
||||
require_can_admit(true, "!need_cpu");
|
||||
{
|
||||
reader_permit::need_cpu_guard ncpu_guard{permit1};
|
||||
|
||||
require_can_admit(true, "need_cpu < cpu_concurrency");
|
||||
|
||||
auto permit2 = semaphore.obtain_permit(schema, get_name(), 1024, db::timeout_clock::now(), {}).get();
|
||||
|
||||
// no change
|
||||
require_can_admit(true, "need_cpu < cpu_concurrency");
|
||||
{
|
||||
reader_permit::need_cpu_guard ncpu_guard{permit2};
|
||||
require_can_admit(false, "need_cpu == cpu_concurrency");
|
||||
|
||||
cpu_concurrency.set(3);
|
||||
|
||||
require_can_admit(true, "after set(3): need_cpu < cpu_concurrency");
|
||||
|
||||
cpu_concurrency.set(2);
|
||||
|
||||
require_can_admit(false, "after set(2): need_cpu == cpu_concurrency");
|
||||
}
|
||||
require_can_admit(true, "need_cpu < cpu_concurrency");
|
||||
}
|
||||
require_can_admit(true, "!need_cpu");
|
||||
}
|
||||
|
||||
@@ -5504,3 +5504,80 @@ SEASTAR_TEST_CASE(test_compression_premature_eof) {
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
// A reproducer for scylladb/scylladb#16065.
|
||||
// Creates an sstable with a newer schema, and populates
|
||||
// it with a reader created with an older schema.
|
||||
//
|
||||
// Before the fixes, it would have resulted in an assert violation.
|
||||
SEASTAR_TEST_CASE(test_alter_bloom_fp_chance_during_write) {
|
||||
return test_env::do_with_async([] (test_env& env) {
|
||||
auto s1 = schema_builder("ks", "t")
|
||||
.with_column("pk", bytes_type, column_kind::partition_key)
|
||||
.with_column("v", utf8_type, column_kind::regular_column)
|
||||
.set_bloom_filter_fp_chance(1.0)
|
||||
.build();
|
||||
auto s2 = schema_builder(s1)
|
||||
.set_bloom_filter_fp_chance(0.01)
|
||||
.build();
|
||||
|
||||
auto ts = api::new_timestamp();
|
||||
|
||||
auto m = mutation(s1, partition_key::from_single_value(*s1, serialized(0)));
|
||||
auto val = std::string(1000, '0');
|
||||
m.set_clustered_cell(clustering_key::make_empty(), "v", val, ts);
|
||||
|
||||
auto mt = make_lw_shared<replica::memtable>(s1);
|
||||
mt->apply(m);
|
||||
|
||||
auto sst = env.make_sstable(s2, sstable_version_types::me);
|
||||
sst->write_components(mt->make_flat_reader(s1, env.make_reader_permit()), 1, s1, env.manager().configure_writer(), mt->get_encoding_stats()).get();
|
||||
|
||||
sstable_assertions sa(env, sst);
|
||||
sa.load();
|
||||
m.upgrade(s2);
|
||||
auto assertions = assert_that(sa.make_reader());
|
||||
assertions.produces(m);
|
||||
assertions.produces_end_of_stream();
|
||||
});
|
||||
}
|
||||
|
||||
// Reproducer for scylladb/scylladb#16065.
|
||||
// Creates an sstable with a newer schema, and populates
|
||||
// it with a reader created with an older schema.
|
||||
//
|
||||
// Before the fixes, it would result in a "compress is not supported" error.
|
||||
SEASTAR_TEST_CASE(test_alter_compression_during_write) {
|
||||
return test_env::do_with_async([] (test_env& env) {
|
||||
auto s1 = schema_builder("ks", "t")
|
||||
.with_column("pk", bytes_type, column_kind::partition_key)
|
||||
.with_column("v", utf8_type, column_kind::regular_column)
|
||||
.set_compressor_params(std::map<sstring, sstring>{
|
||||
})
|
||||
.build();
|
||||
auto s2 = schema_builder(s1)
|
||||
.set_compressor_params(std::map<sstring, sstring>{
|
||||
{"sstable_compression", "org.apache.cassandra.io.compress.ZstdCompressor"}
|
||||
})
|
||||
.build();
|
||||
|
||||
auto ts = api::new_timestamp();
|
||||
|
||||
auto m = mutation(s1, partition_key::from_single_value(*s1, serialized(0)));
|
||||
auto val = std::string(1000, '0');
|
||||
m.set_clustered_cell(clustering_key::make_empty(), "v", val, ts);
|
||||
|
||||
auto mt = make_lw_shared<replica::memtable>(s1);
|
||||
mt->apply(m);
|
||||
|
||||
auto sst = env.make_sstable(s2, sstable_version_types::me);
|
||||
sst->write_components(mt->make_flat_reader(s1, env.make_reader_permit()), 1, s1, env.manager().configure_writer(), mt->get_encoding_stats()).get();
|
||||
|
||||
sstable_assertions sa(env, sst);
|
||||
sa.load();
|
||||
m.upgrade(s2);
|
||||
auto assertions = assert_that(sa.make_reader());
|
||||
assertions.produces(m);
|
||||
assertions.produces_end_of_stream();
|
||||
});
|
||||
}
|
||||
|
||||
@@ -3503,6 +3503,15 @@ SEASTAR_TEST_CASE(test_twcs_partition_estimate) {
|
||||
});
|
||||
}
|
||||
|
||||
static compaction_descriptor get_reshaping_job(sstables::compaction_strategy& cs, const std::vector<shared_sstable>& input,
|
||||
const schema_ptr& s, reshape_mode mode, uint64_t free_storage_space = std::numeric_limits<uint64_t>::max()) {
|
||||
reshape_config cfg {
|
||||
.mode = mode,
|
||||
.free_storage_space = free_storage_space,
|
||||
};
|
||||
return cs.get_reshaping_job(input, s, cfg);
|
||||
}
|
||||
|
||||
SEASTAR_TEST_CASE(stcs_reshape_test) {
|
||||
return test_env::do_with_async([] (test_env& env) {
|
||||
simple_schema ss;
|
||||
@@ -3520,8 +3529,8 @@ SEASTAR_TEST_CASE(stcs_reshape_test) {
|
||||
auto cs = sstables::make_compaction_strategy(sstables::compaction_strategy_type::size_tiered,
|
||||
s->compaction_strategy_options());
|
||||
|
||||
BOOST_REQUIRE(cs.get_reshaping_job(sstables, s, reshape_mode::strict).sstables.size());
|
||||
BOOST_REQUIRE(cs.get_reshaping_job(sstables, s, reshape_mode::relaxed).sstables.size());
|
||||
BOOST_REQUIRE(get_reshaping_job(cs, sstables, s, reshape_mode::strict).sstables.size());
|
||||
BOOST_REQUIRE(get_reshaping_job(cs, sstables, s, reshape_mode::relaxed).sstables.size());
|
||||
});
|
||||
}
|
||||
|
||||
@@ -3543,7 +3552,7 @@ SEASTAR_TEST_CASE(lcs_reshape_test) {
|
||||
sstables.push_back(std::move(sst));
|
||||
}
|
||||
|
||||
BOOST_REQUIRE(cs.get_reshaping_job(sstables, s, reshape_mode::strict).sstables.size() == 256);
|
||||
BOOST_REQUIRE(get_reshaping_job(cs, sstables, s, reshape_mode::strict).sstables.size() == 256);
|
||||
}
|
||||
// all overlapping
|
||||
{
|
||||
@@ -3555,7 +3564,7 @@ SEASTAR_TEST_CASE(lcs_reshape_test) {
|
||||
sstables.push_back(std::move(sst));
|
||||
}
|
||||
|
||||
BOOST_REQUIRE(cs.get_reshaping_job(sstables, s, reshape_mode::strict).sstables.size() == uint64_t(s->max_compaction_threshold()));
|
||||
BOOST_REQUIRE(get_reshaping_job(cs, sstables, s, reshape_mode::strict).sstables.size() == uint64_t(s->max_compaction_threshold()));
|
||||
}
|
||||
// single sstable
|
||||
{
|
||||
@@ -3563,7 +3572,7 @@ SEASTAR_TEST_CASE(lcs_reshape_test) {
|
||||
auto key = keys[0].key();
|
||||
sstables::test(sst).set_values_for_leveled_strategy(1 /* size */, 0 /* level */, 0 /* max ts */, key, key);
|
||||
|
||||
BOOST_REQUIRE(cs.get_reshaping_job({ sst }, s, reshape_mode::strict).sstables.size() == 0);
|
||||
BOOST_REQUIRE(get_reshaping_job(cs, { sst }, s, reshape_mode::strict).sstables.size() == 0);
|
||||
}
|
||||
});
|
||||
}
|
||||
@@ -3780,7 +3789,7 @@ SEASTAR_TEST_CASE(twcs_reshape_with_disjoint_set_test) {
|
||||
sstables.push_back(std::move(sst));
|
||||
}
|
||||
|
||||
BOOST_REQUIRE_EQUAL(cs.get_reshaping_job(sstables, s, reshape_mode::strict).sstables.size(), disjoint_sstable_count);
|
||||
BOOST_REQUIRE_EQUAL(get_reshaping_job(cs, sstables, s, reshape_mode::strict).sstables.size(), disjoint_sstable_count);
|
||||
}
|
||||
|
||||
{
|
||||
@@ -3793,7 +3802,7 @@ SEASTAR_TEST_CASE(twcs_reshape_with_disjoint_set_test) {
|
||||
sstables.push_back(std::move(sst));
|
||||
}
|
||||
|
||||
auto reshaping_count = cs.get_reshaping_job(sstables, s, reshape_mode::strict).sstables.size();
|
||||
auto reshaping_count = get_reshaping_job(cs, sstables, s, reshape_mode::strict).sstables.size();
|
||||
BOOST_REQUIRE_GE(reshaping_count, disjoint_sstable_count - min_threshold + 1);
|
||||
BOOST_REQUIRE_LE(reshaping_count, disjoint_sstable_count);
|
||||
}
|
||||
@@ -3811,7 +3820,7 @@ SEASTAR_TEST_CASE(twcs_reshape_with_disjoint_set_test) {
|
||||
sstables.push_back(std::move(sst));
|
||||
}
|
||||
|
||||
BOOST_REQUIRE_EQUAL(cs.get_reshaping_job(sstables, s, reshape_mode::strict).sstables.size(), 0);
|
||||
BOOST_REQUIRE_EQUAL(get_reshaping_job(cs, sstables, s, reshape_mode::strict).sstables.size(), 0);
|
||||
}
|
||||
|
||||
{
|
||||
@@ -3824,7 +3833,7 @@ SEASTAR_TEST_CASE(twcs_reshape_with_disjoint_set_test) {
|
||||
sstables.push_back(std::move(sst));
|
||||
}
|
||||
|
||||
BOOST_REQUIRE_EQUAL(cs.get_reshaping_job(sstables, s, reshape_mode::strict).sstables.size(), uint64_t(s->max_compaction_threshold()));
|
||||
BOOST_REQUIRE_EQUAL(get_reshaping_job(cs, sstables, s, reshape_mode::strict).sstables.size(), uint64_t(s->max_compaction_threshold()));
|
||||
}
|
||||
|
||||
{
|
||||
@@ -3859,7 +3868,7 @@ SEASTAR_TEST_CASE(twcs_reshape_with_disjoint_set_test) {
|
||||
}
|
||||
|
||||
auto check_mode_correctness = [&] (reshape_mode mode) {
|
||||
auto ret = cs.get_reshaping_job(sstables, s, mode);
|
||||
auto ret = get_reshaping_job(cs, sstables, s, mode);
|
||||
BOOST_REQUIRE_EQUAL(ret.sstables.size(), uint64_t(s->max_compaction_threshold()));
|
||||
// fail if any file doesn't belong to set of small files
|
||||
bool has_big_sized_files = boost::algorithm::any_of(ret.sstables, [&] (const sstables::shared_sstable& sst) {
|
||||
@@ -3871,6 +3880,45 @@ SEASTAR_TEST_CASE(twcs_reshape_with_disjoint_set_test) {
|
||||
check_mode_correctness(reshape_mode::strict);
|
||||
check_mode_correctness(reshape_mode::relaxed);
|
||||
}
|
||||
|
||||
{
|
||||
// create set of 256 disjoint ssts that spans multiple windows (essentially what happens in off-strategy during node op)
|
||||
|
||||
std::vector<sstables::shared_sstable> sstables;
|
||||
sstables.reserve(disjoint_sstable_count);
|
||||
for (auto i = 0U; i < disjoint_sstable_count; i++) {
|
||||
std::vector<mutation> muts;
|
||||
muts.reserve(5);
|
||||
for (auto j = 0; j < 5; j++) {
|
||||
muts.push_back(make_row(i, std::chrono::hours(j * 8)));
|
||||
}
|
||||
auto sst = make_sstable_containing(sst_gen, std::move(muts));
|
||||
sstables.push_back(std::move(sst));
|
||||
}
|
||||
|
||||
auto job_size = [] (auto&& sst_range) {
|
||||
return boost::accumulate(sst_range | boost::adaptors::transformed(std::mem_fn(&sstable::bytes_on_disk)), uint64_t(0));
|
||||
};
|
||||
auto free_space_for_reshaping_sstables = [&job_size] (auto&& sst_range) {
|
||||
return job_size(std::move(sst_range)) * (time_window_compaction_strategy::reshape_target_space_overhead * 100);
|
||||
};
|
||||
|
||||
// all sstables can be reshaped in a single round if there's enough space
|
||||
{
|
||||
uint64_t free_space = free_space_for_reshaping_sstables(boost::make_iterator_range(sstables));
|
||||
BOOST_REQUIRE(get_reshaping_job(cs, sstables, s, reshape_mode::strict, free_space).sstables.size() == sstables.size());
|
||||
}
|
||||
|
||||
// only a subset can be reshaped in a single round to respect the 10% space overhead
|
||||
{
|
||||
const size_t sstables_that_fit_in_target_overhead = 10;
|
||||
uint64_t free_space = free_space_for_reshaping_sstables(boost::make_iterator_range(sstables.begin(), sstables.begin() + sstables_that_fit_in_target_overhead));
|
||||
auto target_space_overhead = free_space * time_window_compaction_strategy::reshape_target_space_overhead;
|
||||
auto job = get_reshaping_job(cs, sstables, s, reshape_mode::strict, free_space);
|
||||
BOOST_REQUIRE(job.sstables.size() < sstables.size());
|
||||
BOOST_REQUIRE(job_size(boost::make_iterator_range(job.sstables)) <= target_space_overhead);
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
@@ -3913,7 +3961,7 @@ SEASTAR_TEST_CASE(stcs_reshape_overlapping_test) {
|
||||
sstables.push_back(std::move(sst));
|
||||
}
|
||||
|
||||
BOOST_REQUIRE(cs.get_reshaping_job(sstables, s, reshape_mode::strict).sstables.size() == disjoint_sstable_count);
|
||||
BOOST_REQUIRE(get_reshaping_job(cs, sstables, s, reshape_mode::strict).sstables.size() == disjoint_sstable_count);
|
||||
}
|
||||
|
||||
{
|
||||
@@ -3926,7 +3974,7 @@ SEASTAR_TEST_CASE(stcs_reshape_overlapping_test) {
|
||||
sstables.push_back(std::move(sst));
|
||||
}
|
||||
|
||||
BOOST_REQUIRE(cs.get_reshaping_job(sstables, s, reshape_mode::strict).sstables.size() == uint64_t(s->max_compaction_threshold()));
|
||||
BOOST_REQUIRE(get_reshaping_job(cs, sstables, s, reshape_mode::strict).sstables.size() == uint64_t(s->max_compaction_threshold()));
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
@@ -3085,6 +3085,10 @@ future<> test_sstable_bytes_correctness(sstring tname, test_env_config cfg) {
|
||||
|
||||
auto sst = make_sstable_containing(env.make_sstable(schema), muts);
|
||||
|
||||
auto free_space = sst->get_storage().free_space().get();
|
||||
BOOST_REQUIRE(free_space > 0);
|
||||
testlog.info("prefix: {}, free space: {}", sst->get_storage().prefix(), free_space);
|
||||
|
||||
auto get_bytes_on_disk_from_storage = [&] (const sstables::shared_sstable& sst) {
|
||||
uint64_t bytes_on_disk = 0;
|
||||
auto& underlying_storage = const_cast<sstables::storage&>(sst->get_storage());
|
||||
@@ -3231,6 +3235,11 @@ std::pair<shared_sstable, size_t> create_sstable_with_bloom_filter(test_env& env
|
||||
return {sst, sst_bf_memory};
|
||||
}
|
||||
|
||||
void dispose_and_stop_tracking_bf_memory(shared_sstable&& sst, test_env_sstables_manager& mgr) {
|
||||
mgr.remove_sst_from_reclaimed(sst.get());
|
||||
shared_sstable::dispose(sst.release().release());
|
||||
}
|
||||
|
||||
SEASTAR_TEST_CASE(test_sstable_manager_auto_reclaim_and_reload_of_bloom_filter) {
|
||||
return test_env::do_with_async([] (test_env& env) {
|
||||
simple_schema ss;
|
||||
@@ -3268,7 +3277,7 @@ SEASTAR_TEST_CASE(test_sstable_manager_auto_reclaim_and_reload_of_bloom_filter)
|
||||
|
||||
// Test auto reload - disposing sst3 should trigger reload of the
|
||||
// smallest filter in the reclaimed list, which is sst1's bloom filter.
|
||||
shared_sstable::dispose(sst3.release().release());
|
||||
dispose_and_stop_tracking_bf_memory(std::move(sst3), sst_mgr);
|
||||
REQUIRE_EVENTUALLY_EQUAL(sst1->filter_memory_size(), sst1_bf_memory);
|
||||
// only sst4's bloom filter memory should be reported as reclaimed
|
||||
REQUIRE_EVENTUALLY_EQUAL(sst_mgr.get_total_memory_reclaimed(), sst4_bf_memory);
|
||||
@@ -3278,7 +3287,7 @@ SEASTAR_TEST_CASE(test_sstable_manager_auto_reclaim_and_reload_of_bloom_filter)
|
||||
}, {
|
||||
// limit available memory to the sstables_manager to test reclaiming.
|
||||
// this will set the reclaim threshold to 100 bytes.
|
||||
.available_memory = 1000
|
||||
.available_memory = 500
|
||||
});
|
||||
}
|
||||
|
||||
@@ -3333,7 +3342,7 @@ SEASTAR_TEST_CASE(test_bloom_filter_reclaim_during_reload) {
|
||||
utils::get_local_injector().enable("reload_reclaimed_components/pause", true);
|
||||
|
||||
// dispose sst2 to trigger reload of sst1's bloom filter
|
||||
shared_sstable::dispose(sst2.release().release());
|
||||
dispose_and_stop_tracking_bf_memory(std::move(sst2), sst_mgr);
|
||||
// _total_reclaimable_memory will be updated when the reload begins; wait for it.
|
||||
REQUIRE_EVENTUALLY_EQUAL(sst_mgr.get_total_reclaimable_memory(), sst1_bf_memory);
|
||||
|
||||
@@ -3356,6 +3365,60 @@ SEASTAR_TEST_CASE(test_bloom_filter_reclaim_during_reload) {
|
||||
}, {
|
||||
// limit available memory to the sstables_manager to test reclaiming.
|
||||
// this will set the reclaim threshold to 100 bytes.
|
||||
.available_memory = 1000
|
||||
.available_memory = 500
|
||||
});
|
||||
}
|
||||
|
||||
SEASTAR_TEST_CASE(test_bloom_filter_reload_after_unlink) {
|
||||
return test_env::do_with_async([] (test_env& env) {
|
||||
#ifndef SCYLLA_ENABLE_ERROR_INJECTION
|
||||
fmt::print("Skipping test as it depends on error injection. Please run in mode where it's enabled (debug,dev).\n");
|
||||
return;
|
||||
#endif
|
||||
simple_schema ss;
|
||||
auto schema = ss.schema();
|
||||
|
||||
auto mut = mutation(schema, ss.make_pkey(1));
|
||||
mut.partition().apply_insert(*schema, ss.make_ckey(1), ss.new_timestamp());
|
||||
|
||||
// bloom filter will be reclaimed automatically due to low memory
|
||||
auto sst = make_sstable_containing(env.make_sstable(schema), {mut});
|
||||
auto& sst_mgr = env.manager();
|
||||
BOOST_REQUIRE_EQUAL(sst->filter_memory_size(), 0);
|
||||
auto memory_reclaimed = sst_mgr.get_total_memory_reclaimed();
|
||||
|
||||
// manager's reclaimed set has the sst now
|
||||
auto& reclaimed_set = sst_mgr.get_reclaimed_set();
|
||||
BOOST_REQUIRE_EQUAL(reclaimed_set.size(), 1);
|
||||
BOOST_REQUIRE_EQUAL(reclaimed_set.begin()->get_filename(), sst->get_filename());
|
||||
|
||||
// hold a copy of shared sst object in async thread to test reload after unlink
|
||||
utils::get_local_injector().enable("test_bloom_filter_reload_after_unlink");
|
||||
auto async_sst_holder = seastar::async([sst] {
|
||||
// do nothing just hold a copy of sst and wait for message signalling test completion
|
||||
utils::get_local_injector().inject_with_handler("test_bloom_filter_reload_after_unlink", [] (auto& handler) {
|
||||
auto ret = handler.wait_for_message(std::chrono::steady_clock::now() + std::chrono::seconds{5});
|
||||
return ret;
|
||||
}).get();
|
||||
});
|
||||
|
||||
// unlink the sst and release the object
|
||||
sst->unlink().get();
|
||||
sst.release();
|
||||
|
||||
// reclaimed set should be now empty but the total memory reclaimed should
|
||||
// be still the same as the sst object is not deactivated yet due to a copy
|
||||
// being alive in the async thread.
|
||||
BOOST_REQUIRE_EQUAL(sst_mgr.get_reclaimed_set().size(), 0);
|
||||
BOOST_REQUIRE_EQUAL(sst_mgr.get_total_memory_reclaimed(), memory_reclaimed);
|
||||
|
||||
// message async thread to complete waiting and thus release its copy of sst, triggering deactivation
|
||||
utils::get_local_injector().receive_message("test_bloom_filter_reload_after_unlink");
|
||||
async_sst_holder.get();
|
||||
|
||||
REQUIRE_EVENTUALLY_EQUAL(sst_mgr.get_total_memory_reclaimed(), 0);
|
||||
}, {
|
||||
// set available memory = 0 to force reclaim the bloom filter
|
||||
.available_memory = 0
|
||||
});
|
||||
};
|
||||
|
||||
@@ -615,7 +615,10 @@ private:
|
||||
_sl_controller.invoke_on_all(&qos::service_level_controller::start).get();
|
||||
|
||||
_sys_ks.start(std::ref(_qp), std::ref(_db)).get();
|
||||
auto stop_sys_kd = defer([this] { _sys_ks.stop().get(); });
|
||||
auto stop_sys_kd = defer([this] {
|
||||
_sys_ks.invoke_on_all(&db::system_keyspace::shutdown).get();
|
||||
_sys_ks.stop().get();
|
||||
});
|
||||
|
||||
replica::distributed_loader::init_system_keyspace(_sys_ks, _erm_factory, _db).get();
|
||||
_db.local().maybe_init_schema_commitlog();
|
||||
@@ -785,9 +788,6 @@ private:
|
||||
}
|
||||
|
||||
group0_client.init().get();
|
||||
auto stop_system_keyspace = defer([this] {
|
||||
_sys_ks.invoke_on_all(&db::system_keyspace::shutdown).get();
|
||||
});
|
||||
|
||||
auto shutdown_db = defer([this] {
|
||||
_db.invoke_on_all(&replica::database::shutdown).get();
|
||||
|
||||
@@ -54,6 +54,14 @@ public:
|
||||
size_t get_total_reclaimable_memory() {
|
||||
return _total_reclaimable_memory;
|
||||
}
|
||||
|
||||
void remove_sst_from_reclaimed(sstable* sst) {
|
||||
_reclaimed.erase(*sst);
|
||||
}
|
||||
|
||||
auto& get_reclaimed_set() {
|
||||
return _reclaimed;
|
||||
}
|
||||
};
|
||||
|
||||
struct test_env_config {
|
||||
|
||||
@@ -110,6 +110,7 @@ public:
|
||||
api::timestamp_type min_memtable_timestamp() const override {
|
||||
return table().min_memtable_timestamp();
|
||||
}
|
||||
bool memtable_has_key(const dht::decorated_key& key) const override { return false; }
|
||||
future<> on_compaction_completion(sstables::compaction_completion_desc desc, sstables::offstrategy offstrategy) override {
|
||||
return table().as_table_state().on_compaction_completion(std::move(desc), offstrategy);
|
||||
}
|
||||
@@ -201,7 +202,7 @@ test_env::impl::impl(test_env_config cfg, sstables::storage_manager* sstm)
|
||||
, feature_service(gms::feature_config_from_db_config(*db_config))
|
||||
, mgr(cfg.large_data_handler == nullptr ? nop_ld_handler : *cfg.large_data_handler, *db_config,
|
||||
feature_service, cache_tracker, cfg.available_memory, dir_sem,
|
||||
[host_id = locator::host_id::create_random_id()]{ return host_id; }, sstm)
|
||||
[host_id = locator::host_id::create_random_id()]{ return host_id; }, current_scheduling_group(), sstm)
|
||||
, semaphore(reader_concurrency_semaphore::no_limits{}, "sstables::test_env")
|
||||
, storage(std::move(cfg.storage))
|
||||
{ }
|
||||
|
||||
@@ -77,7 +77,7 @@ def _wrap_future(driver_response_future: ResponseFuture, all_pages: bool = False
|
||||
|
||||
|
||||
# TODO: paged result query handling (iterable?)
|
||||
def run_async(self, *args, all_pages = False, **kwargs) -> asyncio.Future:
|
||||
def run_async(self, *args, all_pages = True, **kwargs) -> asyncio.Future:
|
||||
"""Execute a CQL query asynchronously by wrapping the driver's future"""
|
||||
# The default timeouts should have been more than enough, but in some
|
||||
# extreme cases with a very slow debug build running on a slow or very busy
|
||||
|
||||
@@ -52,9 +52,10 @@ class MinioServer:
|
||||
self.default_user = 'minioadmin'
|
||||
self.default_pass = 'minioadmin'
|
||||
self.bucket_name = 'testbucket'
|
||||
self.access_key = ''.join(random.choice(string.hexdigits) for i in range(16))
|
||||
self.secret_key = ''.join(random.choice(string.hexdigits) for i in range(32))
|
||||
self.access_key = os.environ.get(self.ENV_ACCESS_KEY, ''.join(random.choice(string.hexdigits) for i in range(16)))
|
||||
self.secret_key = os.environ.get(self.ENV_SECRET_KEY, ''.join(random.choice(string.hexdigits) for i in range(32)))
|
||||
self.log_filename = (self.tempdir / 'minio').with_suffix(".log")
|
||||
self.old_env = dict()
|
||||
|
||||
def check_server(self, port):
|
||||
s = socket.socket()
|
||||
@@ -154,8 +155,11 @@ class MinioServer:
|
||||
with open(path, 'w', encoding='ascii') as config_file:
|
||||
endpoint = {'name': address,
|
||||
'port': port,
|
||||
'aws_access_key_id': acc_key,
|
||||
'aws_secret_access_key': secret_key,
|
||||
# don't put credentials here. We're exporing env vars, which should
|
||||
# be picked up properly by scylla.
|
||||
# https://github.com/scylladb/scylla-pkg/issues/3845
|
||||
#'aws_access_key_id': acc_key,
|
||||
#'aws_secret_access_key': secret_key,
|
||||
'aws_region': region,
|
||||
}
|
||||
yaml.dump({'endpoints': [endpoint]}, config_file)
|
||||
@@ -184,6 +188,37 @@ class MinioServer:
|
||||
|
||||
return cmd
|
||||
|
||||
def _set_environ(self):
|
||||
self.old_env = dict(os.environ)
|
||||
os.environ[self.ENV_CONFFILE] = f'{self.config_file}'
|
||||
os.environ[self.ENV_ADDRESS] = f'{self.address}'
|
||||
os.environ[self.ENV_PORT] = f'{self.port}'
|
||||
os.environ[self.ENV_BUCKET] = f'{self.bucket_name}'
|
||||
os.environ[self.ENV_ACCESS_KEY] = f'{self.access_key}'
|
||||
os.environ[self.ENV_SECRET_KEY] = f'{self.secret_key}'
|
||||
|
||||
def _get_environs(self):
|
||||
return [self.ENV_CONFFILE,
|
||||
self.ENV_ADDRESS,
|
||||
self.ENV_PORT,
|
||||
self.ENV_BUCKET,
|
||||
self.ENV_ACCESS_KEY,
|
||||
self.ENV_SECRET_KEY]
|
||||
|
||||
def _unset_environ(self):
|
||||
for env in self._get_environs():
|
||||
if self.old_env[env] is not None:
|
||||
os.environ[env] = self.old_env[env]
|
||||
else:
|
||||
del os.environ[env]
|
||||
|
||||
def print_environ(self):
|
||||
msgs = []
|
||||
for key in self._get_environs():
|
||||
value = os.environ[key]
|
||||
msgs.append(f'export {key}={value}')
|
||||
print('\n'.join(msgs))
|
||||
|
||||
async def start(self):
|
||||
if self.srv_exe is None:
|
||||
self.logger.info("Minio not installed, get it from https://dl.minio.io/server/minio/release/linux-amd64/minio and put into PATH")
|
||||
@@ -206,13 +241,7 @@ class MinioServer:
|
||||
return
|
||||
|
||||
self.create_conf_file(self.address, self.port, self.access_key, self.secret_key, self.DEFAULT_REGION, self.config_file)
|
||||
os.environ[self.ENV_CONFFILE] = f'{self.config_file}'
|
||||
os.environ[self.ENV_ADDRESS] = f'{self.address}'
|
||||
os.environ[self.ENV_PORT] = f'{self.port}'
|
||||
os.environ[self.ENV_BUCKET] = f'{self.bucket_name}'
|
||||
os.environ[self.ENV_ACCESS_KEY] = f'{self.access_key}'
|
||||
os.environ[self.ENV_SECRET_KEY] = f'{self.secret_key}'
|
||||
|
||||
self._set_environ()
|
||||
try:
|
||||
alias = 'local'
|
||||
self.log_to_file(f'Configuring access to {self.address}:{self.port}')
|
||||
@@ -238,6 +267,7 @@ class MinioServer:
|
||||
if not self.cmd:
|
||||
return
|
||||
|
||||
self._unset_environ()
|
||||
try:
|
||||
self.cmd.kill()
|
||||
except ProcessLookupError:
|
||||
|
||||
@@ -75,8 +75,7 @@ def make_scylla_conf(workdir: pathlib.Path, host_addr: str, seed_addrs: List[str
|
||||
'alternator-streams',
|
||||
'consistent-topology-changes',
|
||||
'broadcast-tables',
|
||||
'keyspace-storage-options',
|
||||
'tablets'],
|
||||
'keyspace-storage-options'],
|
||||
|
||||
'consistent_cluster_management': True,
|
||||
|
||||
@@ -396,7 +395,9 @@ class ScyllaServer:
|
||||
"""Start an installed server. May be used for restarts."""
|
||||
|
||||
env = os.environ.copy()
|
||||
env.clear() # pass empty env to make user user's SCYLLA_HOME has no impact
|
||||
# remove from env to make sure user's SCYLLA_HOME has no impact
|
||||
env.pop('SCYLLA_HOME', None)
|
||||
|
||||
self.cmd = await asyncio.create_subprocess_exec(
|
||||
self.exe,
|
||||
*self.cmdline_options,
|
||||
|
||||
@@ -115,7 +115,7 @@ async def test_paged_result(manager, random_tables):
|
||||
|
||||
# Check only 1 page
|
||||
stmt = SimpleStatement(f"SELECT * FROM {table} ALLOW FILTERING", fetch_size = fetch_size)
|
||||
res = await cql.run_async(stmt)
|
||||
res = await cql.run_async(stmt, all_pages=False)
|
||||
assert len(res) == fetch_size
|
||||
|
||||
# Check all pages
|
||||
|
||||
182
test/topology_custom/test_alternator.py
Normal file
182
test/topology_custom/test_alternator.py
Normal file
@@ -0,0 +1,182 @@
|
||||
#
|
||||
# Copyright (C) 2024-present ScyllaDB
|
||||
#
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
#
|
||||
|
||||
# Multi-node tests for Alternator.
|
||||
#
|
||||
# Please note that most tests for Alternator are single-node tests and can
|
||||
# be found in the test/alternator directory. Most functional testing of the
|
||||
# many different syntax features that Alternator provides don't need more
|
||||
# than a single node to be tested, and should be able to run also on DynamoDB
|
||||
# - not just on Alternator, which the test/alternator framework allows to do.
|
||||
# So only the minority of tests that do need a bigger cluster should be here.
|
||||
|
||||
import pytest
|
||||
import asyncio
|
||||
import logging
|
||||
import time
|
||||
import boto3
|
||||
import botocore
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Convenience function to open a connection to Alternator usable by the
|
||||
# AWS SDK.
|
||||
alternator_config = {
|
||||
'alternator_port': 8000,
|
||||
'alternator_write_isolation': 'only_rmw_uses_lwt',
|
||||
'alternator_ttl_period_in_seconds': '0.5',
|
||||
}
|
||||
def get_alternator(ip):
|
||||
url = f"http://{ip}:{alternator_config['alternator_port']}"
|
||||
return boto3.resource('dynamodb', endpoint_url=url,
|
||||
region_name='us-east-1',
|
||||
aws_access_key_id='alternator',
|
||||
aws_secret_access_key='secret_pass',
|
||||
config=botocore.client.Config(
|
||||
retries={"max_attempts": 0},
|
||||
read_timeout=300)
|
||||
)
|
||||
|
||||
# Alternator convenience function for fetching the entire result set of a
|
||||
# query into an array of items.
|
||||
def full_query(table, ConsistentRead=True, **kwargs):
|
||||
response = table.query(ConsistentRead=ConsistentRead, **kwargs)
|
||||
items = response['Items']
|
||||
while 'LastEvaluatedKey' in response:
|
||||
response = table.query(ExclusiveStartKey=response['LastEvaluatedKey'],
|
||||
ConsistentRead=ConsistentRead, **kwargs)
|
||||
items.extend(response['Items'])
|
||||
return items
|
||||
|
||||
# FIXME: boto3 is NOT async. So all tests that use it are not really async.
|
||||
# We could use the aioboto3 library to write a really asynchronous test, or
|
||||
# implement an async wrapper to the boto3 functions ourselves (e.g., run them
|
||||
# in a separate thread) ourselves.
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
async def alternator3(manager_internal):
|
||||
"""A fixture with a 3-node Alternator cluster that can be shared between
|
||||
multiple tests. These test should not modify the cluster's topology,
|
||||
and should each use unique table names and/or unique keys to avoid
|
||||
being confused by other tests.
|
||||
Returns the manager object and 3 boto3 resource objects for making
|
||||
DynamoDB API requests to each of the nodes in the Alternator cluster.
|
||||
"""
|
||||
manager = manager_internal
|
||||
servers = [await manager.server_add(config=alternator_config) for _ in range(3)]
|
||||
yield [manager] + [get_alternator(server.ip_addr) for server in servers]
|
||||
await manager.stop()
|
||||
|
||||
test_table_prefix = 'alternator_Test_'
|
||||
def unique_table_name():
|
||||
current_ms = int(round(time.time() * 1000))
|
||||
# If unique_table_name() is called twice in the same millisecond...
|
||||
if unique_table_name.last_ms >= current_ms:
|
||||
current_ms = unique_table_name.last_ms + 1
|
||||
unique_table_name.last_ms = current_ms
|
||||
return test_table_prefix + str(current_ms)
|
||||
unique_table_name.last_ms = 0
|
||||
|
||||
|
||||
async def test_alternator_ttl_scheduling_group(alternator3):
|
||||
"""A reproducer for issue #18719: The expiration scans and deletions
|
||||
initiated by the Alternator TTL feature are supposed to run entirely in
|
||||
the "streaming" scheduling group. But because of a bug in inheritence
|
||||
of scheduling groups through RPC, some of the work ended up being done
|
||||
on the "statement" scheduling group.
|
||||
This test verifies that Alternator TTL work is done on the right
|
||||
scheduling group.
|
||||
This test assumes that the cluster is not concurrently busy with
|
||||
running any other workload - so we won't see any work appearing
|
||||
in the wrong scheduling group. We can assume this because we don't
|
||||
run multiple tests in parallel on the same cluster.
|
||||
"""
|
||||
manager, alternator, *_ = alternator3
|
||||
table = alternator.create_table(TableName=unique_table_name(),
|
||||
BillingMode='PAY_PER_REQUEST',
|
||||
KeySchema=[
|
||||
{'AttributeName': 'p', 'KeyType': 'HASH' },
|
||||
],
|
||||
AttributeDefinitions=[
|
||||
{'AttributeName': 'p', 'AttributeType': 'N' },
|
||||
])
|
||||
# Enable expiration (TTL) on attribute "expiration"
|
||||
table.meta.client.update_time_to_live(TableName=table.name, TimeToLiveSpecification={'AttributeName': 'expiration', 'Enabled': True})
|
||||
|
||||
# Insert N rows, setting them all to expire 3 seconds from now.
|
||||
N = 100
|
||||
expiration = int(time.time())+3
|
||||
with table.batch_writer() as batch:
|
||||
for p in range(N):
|
||||
batch.put_item(Item={'p': p, 'expiration': expiration})
|
||||
|
||||
|
||||
# Unfortunately, Alternator has no way of doing the writes above with
|
||||
# CL=ALL, only CL=QUORUM. So at this point we're not sure all the writes
|
||||
# above have completed. We want to wait until they are over, so that we
|
||||
# won't measure any of those writes in the statement scheduling group.
|
||||
# Let's do it by checking the metrics of background writes and wait for
|
||||
# them to drop to zero.
|
||||
ips = [server.ip_addr for server in await manager.running_servers()]
|
||||
timeout = time.time() + 60
|
||||
while True:
|
||||
if time.time() > timeout:
|
||||
pytest.fail("timed out waiting for background writes to complete")
|
||||
bg_writes = 0
|
||||
for ip in ips:
|
||||
metrics = await manager.metrics.query(ip)
|
||||
bg_writes += metrics.get('scylla_storage_proxy_coordinator_background_writes')
|
||||
if bg_writes == 0:
|
||||
break # done waiting for the background writes to finish
|
||||
await asyncio.sleep(0.1)
|
||||
|
||||
# Get the current amount of work (in CPU ms) done across all nodes and
|
||||
# shards in different scheduling groups. We expect this to increase
|
||||
# considerably for the streaming group while expiration scanning is
|
||||
# proceeding, but not increase at all for the statement group because
|
||||
# there are no requests being executed.
|
||||
async def get_cpu_metrics():
|
||||
ms_streaming = 0
|
||||
ms_statement = 0
|
||||
for ip in ips:
|
||||
metrics = await manager.metrics.query(ip)
|
||||
ms_streaming += metrics.get('scylla_scheduler_runtime_ms', {'group': 'streaming'})
|
||||
ms_statement += metrics.get('scylla_scheduler_runtime_ms', {'group': 'statement'})
|
||||
return (ms_streaming, ms_statement)
|
||||
|
||||
ms_streaming_before, ms_statement_before = await get_cpu_metrics()
|
||||
|
||||
# Wait until all rows expire, and get the CPU metrics again. All items
|
||||
# were set to expire in 3 seconds, and the expiration thread is set up
|
||||
# in alternator_config to scan the whole table in 0.5 seconds, and the
|
||||
# whole table is just 100 rows, so we expect all the data to be gone in
|
||||
# 4 seconds. Let's wait 5 seconds just in case. Even if not all the data
|
||||
# will have been deleted by then, we do expect some deletions to have
|
||||
# happened, and certainly several scans, all taking CPU which we expect
|
||||
# to be in the right scheduling group.
|
||||
await asyncio.sleep(5)
|
||||
ms_streaming_after, ms_statement_after = await get_cpu_metrics()
|
||||
|
||||
# As a sanity check, verify some of the data really expired, so there
|
||||
# was some TTL work actually done. We actually expect all of the data
|
||||
# to have been expired by now, but in some extremely slow builds and
|
||||
# test machines, this may not be the case.
|
||||
assert N > table.scan(ConsistentRead=True, Select='COUNT')['Count']
|
||||
|
||||
# Between the calls to get_cpu_metrics() above, several expiration scans
|
||||
# took place (we configured scans to happen every 0.5 seconds), and also
|
||||
# a lot of deletes when the expiration time was reached. We expect all
|
||||
# that work to have happened in the streaming group, not statement group,
|
||||
# so "ratio" calculate below should be tiny, even exactly zero. Before
|
||||
# issue #18719 was fixed, it was not tiny at all - 0.58.
|
||||
# Just in case there are other unknown things happening, let's assert it
|
||||
# is <0.1 instead of zero.
|
||||
ms_streaming = ms_streaming_after - ms_streaming_before
|
||||
ms_statement = ms_statement_after - ms_statement_before
|
||||
ratio = ms_statement / ms_streaming
|
||||
assert ratio < 0.1
|
||||
|
||||
table.delete()
|
||||
51
test/topology_custom/test_hints.py
Normal file
51
test/topology_custom/test_hints.py
Normal file
@@ -0,0 +1,51 @@
|
||||
#
|
||||
# Copyright (C) 2024-present ScyllaDB
|
||||
#
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
#
|
||||
import asyncio
|
||||
import pytest
|
||||
import time
|
||||
import logging
|
||||
import requests
|
||||
import re
|
||||
|
||||
from cassandra.cluster import ConnectionException, NoHostAvailable # type: ignore
|
||||
from cassandra.query import SimpleStatement, ConsistencyLevel
|
||||
|
||||
from test.pylib.manager_client import ManagerClient
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Write with RF=1 and CL=ANY to a dead node should write hints and succeed
|
||||
@pytest.mark.asyncio
|
||||
async def test_write_cl_any_to_dead_node_generates_hints(manager: ManagerClient):
|
||||
node_count = 2
|
||||
servers = [await manager.server_add() for _ in range(node_count)]
|
||||
|
||||
cql = manager.get_cql()
|
||||
await cql.run_async("CREATE KEYSPACE ks WITH replication = {'class': 'NetworkTopologyStrategy', 'replication_factor': 1}")
|
||||
await cql.run_async("CREATE TABLE ks.t (pk int primary key, v int)")
|
||||
|
||||
await manager.server_stop_gracefully(servers[1].server_id)
|
||||
|
||||
def get_hints_written_count(server):
|
||||
c = 0
|
||||
metrics = requests.get(f"http://{server.ip_addr}:9180/metrics").text
|
||||
pattern = re.compile("^scylla_hints_manager_written")
|
||||
for metric in metrics.split('\n'):
|
||||
if pattern.match(metric) is not None:
|
||||
c += int(float(metric.split()[1]))
|
||||
return c
|
||||
|
||||
hints_before = get_hints_written_count(servers[0])
|
||||
|
||||
# Some of the inserts will be targeted to the dead node.
|
||||
# The coordinator doesn't have live targets to send the write to, but it should write a hint.
|
||||
for i in range(100):
|
||||
await cql.run_async(SimpleStatement(f"INSERT INTO ks.t (pk, v) VALUES ({i}, {i+1})", consistency_level=ConsistencyLevel.ANY))
|
||||
|
||||
# Verify hints are written
|
||||
hints_after = get_hints_written_count(servers[0])
|
||||
assert hints_after > hints_before
|
||||
37
test/topology_custom/test_lwt_semaphore.py
Normal file
37
test/topology_custom/test_lwt_semaphore.py
Normal file
@@ -0,0 +1,37 @@
|
||||
#
|
||||
# Copyright (C) 2024-present ScyllaDB
|
||||
#
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import time
|
||||
from test.pylib.rest_client import inject_error
|
||||
from test.pylib.util import wait_for_cql_and_get_hosts
|
||||
import pytest
|
||||
from cassandra.protocol import WriteTimeout
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_cas_semaphore(manager):
|
||||
""" This is a regression test for scylladb/scylladb#19698 """
|
||||
servers = [await manager.server_add(cmdline=['--smp', '1', '--write-request-timeout-in-ms', '500'])]
|
||||
|
||||
host = await wait_for_cql_and_get_hosts(manager.cql, {servers[0]}, time.time() + 60)
|
||||
|
||||
await manager.cql.run_async("CREATE KEYSPACE test WITH REPLICATION = {'class': 'SimpleStrategy', 'replication_factor': 1}")
|
||||
await manager.cql.run_async("CREATE TABLE test.test (a int PRIMARY KEY, b int)")
|
||||
|
||||
async with inject_error(manager.api, servers[0].ip_addr, 'cas_timeout_after_lock'):
|
||||
res = [manager.cql.run_async(f"INSERT INTO test.test (a) VALUES (0) IF NOT EXISTS", host=host[0]) for r in range(10)]
|
||||
try:
|
||||
await asyncio.gather(*res)
|
||||
except WriteTimeout:
|
||||
pass
|
||||
|
||||
res = [manager.cql.run_async(f"INSERT INTO test.test (a) VALUES (0) IF NOT EXISTS", host=host[0]) for r in range(10)]
|
||||
await asyncio.gather(*res)
|
||||
|
||||
metrics = await manager.metrics.query(servers[0].ip_addr)
|
||||
contention = metrics.get(name="scylla_storage_proxy_coordinator_cas_write_contention_count")
|
||||
|
||||
assert contention == None
|
||||
49
test/topology_custom/test_mv_topology_change.py
Normal file
49
test/topology_custom/test_mv_topology_change.py
Normal file
@@ -0,0 +1,49 @@
|
||||
#
|
||||
# Copyright (C) 2024-present ScyllaDB
|
||||
#
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
#
|
||||
import asyncio
|
||||
import pytest
|
||||
import time
|
||||
import logging
|
||||
import requests
|
||||
import re
|
||||
|
||||
from cassandra.cluster import ConnectionException, NoHostAvailable # type: ignore
|
||||
|
||||
from test.pylib.manager_client import ManagerClient
|
||||
from test.topology.conftest import skip_mode
|
||||
from test.pylib.util import wait_for
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Reproduces issue #19529
|
||||
# Write to a table with MV while one node is stopped, and verify
|
||||
# it doesn't cause MV write timeouts or preventing topology changes.
|
||||
# The writes that are targeted to the stopped node are with CL=ANY so
|
||||
# they should store a hint and then complete successfuly.
|
||||
# If the MV write handler is not completed after storing the hint, as in
|
||||
# issue #19529, it remains active until it timeouts, preventing topology changes
|
||||
# during this time.
|
||||
@pytest.mark.asyncio
|
||||
async def test_mv_write_to_dead_node(manager: ManagerClient):
|
||||
servers = [await manager.server_add() for _ in range(4)]
|
||||
|
||||
cql = manager.get_cql()
|
||||
await cql.run_async("CREATE KEYSPACE ks WITH replication = {'class': 'NetworkTopologyStrategy', 'replication_factor': 3}")
|
||||
await cql.run_async("CREATE TABLE ks.t (pk int primary key, v int)")
|
||||
await cql.run_async("CREATE materialized view ks.t_view AS select pk, v from ks.t where v is not null primary key (v, pk)")
|
||||
|
||||
await manager.server_stop_gracefully(servers[-1].server_id)
|
||||
|
||||
# Do inserts. some should generate MV writes to the stopped node
|
||||
for i in range(100):
|
||||
await cql.run_async(f"insert into ks.t (pk, v) values ({i}, {i+1})")
|
||||
|
||||
# Remove the node to trigger a topology change.
|
||||
# If the MV write is not completed, as in issue #19529, the topology change
|
||||
# will be held for long time until the write timeouts.
|
||||
# Otherwise, it is expected to complete in short time.
|
||||
await manager.remove_node(servers[0].server_id, servers[-1].server_id)
|
||||
@@ -892,6 +892,7 @@ public:
|
||||
virtual sstables::shared_sstable make_sstable() const override { return do_make_sstable(); }
|
||||
virtual sstables::sstable_writer_config configure_writer(sstring origin) const override { return do_configure_writer(std::move(origin)); }
|
||||
virtual api::timestamp_type min_memtable_timestamp() const override { return api::min_timestamp; }
|
||||
virtual bool memtable_has_key(const dht::decorated_key& key) const override { return false; }
|
||||
virtual future<> on_compaction_completion(sstables::compaction_completion_desc desc, sstables::offstrategy offstrategy) override { return make_ready_future<>(); }
|
||||
virtual bool is_auto_compaction_disabled_by_user() const noexcept override { return false; }
|
||||
virtual bool tombstone_gc_enabled() const noexcept override { return false; }
|
||||
|
||||
@@ -188,6 +188,24 @@ public:
|
||||
void invalidate_references() noexcept {
|
||||
++_invalidate_counter;
|
||||
}
|
||||
|
||||
// Asks the allocator to set aside some free memory,
|
||||
// preventing it from being allocated until the matching
|
||||
// unreserve() call. Can be used to preallocate some memory
|
||||
// for a critical section where allocations can't fail.
|
||||
//
|
||||
// This is hack designed with the implementation details of the
|
||||
// log-structured allocator in mind. In other allocators,
|
||||
// it doesn't do anything useful.
|
||||
//
|
||||
// Don't use this unless you understand exactly what you are doing.
|
||||
virtual uintptr_t reserve(size_t memory) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
// As the argument to this function, you must pass the *return value* of the matching reserve().
|
||||
virtual void unreserve(uintptr_t opaque) noexcept {
|
||||
}
|
||||
};
|
||||
|
||||
class standard_allocation_strategy : public allocation_strategy {
|
||||
@@ -257,6 +275,16 @@ struct alloc_strategy_deleter {
|
||||
}
|
||||
};
|
||||
|
||||
// RAII for allocation_strategy::reserve().
|
||||
class hold_reserve {
|
||||
uintptr_t _opaque;
|
||||
public:
|
||||
hold_reserve(size_t memory) : _opaque(current_allocator().reserve(memory)) {}
|
||||
~hold_reserve() { current_allocator().unreserve(_opaque); }
|
||||
// Disallow copying and moving. They *could* be implemented, but I just didn't bother.
|
||||
hold_reserve(hold_reserve&&) = delete;
|
||||
};
|
||||
|
||||
// std::unique_ptr which can be used for owning an object allocated using allocation_strategy.
|
||||
// Must be destroyed before the pointer is invalidated. For compacting allocators, that
|
||||
// means it must not escape outside allocating_section or reclaim lock.
|
||||
|
||||
@@ -81,7 +81,7 @@ private:
|
||||
}
|
||||
}
|
||||
void do_reserve_for_push_back();
|
||||
size_t make_room(size_t n, bool stop_after_one);
|
||||
void make_room(size_t n, bool stop_after_one);
|
||||
chunk_ptr new_chunk(size_t n);
|
||||
T* addr(size_t i) const {
|
||||
return &_chunks[i / max_chunk_capacity()][i % max_chunk_capacity()];
|
||||
@@ -177,22 +177,19 @@ public:
|
||||
///
|
||||
/// Allows reserving the memory chunk-by-chunk, avoiding stalls when a lot of
|
||||
/// chunks are needed. To drive the reservation to completion, call this
|
||||
/// repeatedly with the value returned from the previous call until it
|
||||
/// returns 0, yielding between calls when necessary. Example usage:
|
||||
/// repeatedly until the vector's capacity reaches the expected size, yielding
|
||||
/// between calls when necessary. Example usage:
|
||||
///
|
||||
/// return do_until([&size] { return !size; }, [&my_vector, &size] () mutable {
|
||||
/// size = my_vector.reserve_partial(size);
|
||||
/// return do_until([&my_vector, size] { return my_vector.capacity() == size; }, [&my_vector, size] () mutable {
|
||||
/// my_vector.reserve_partial(size);
|
||||
/// });
|
||||
///
|
||||
/// Here, `do_until()` takes care of yielding between iterations when
|
||||
/// necessary.
|
||||
///
|
||||
/// \returns the memory that remains to be reserved
|
||||
size_t reserve_partial(size_t n) {
|
||||
void reserve_partial(size_t n) {
|
||||
if (n > _capacity) {
|
||||
return make_room(n, true);
|
||||
make_room(n, true);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
size_t memory_size() const {
|
||||
@@ -402,7 +399,7 @@ chunked_vector<T, max_contiguous_allocation>::migrate(T* begin, T* end, T* resul
|
||||
}
|
||||
|
||||
template <typename T, size_t max_contiguous_allocation>
|
||||
size_t
|
||||
void
|
||||
chunked_vector<T, max_contiguous_allocation>::make_room(size_t n, bool stop_after_one) {
|
||||
// First, if the last chunk is below max_chunk_capacity(), enlarge it
|
||||
|
||||
@@ -434,7 +431,6 @@ chunked_vector<T, max_contiguous_allocation>::make_room(size_t n, bool stop_afte
|
||||
_capacity += now;
|
||||
stop = stop_after_one;
|
||||
}
|
||||
return (n - _capacity);
|
||||
}
|
||||
|
||||
template <typename T, size_t max_contiguous_allocation>
|
||||
|
||||
@@ -7,31 +7,23 @@
|
||||
*/
|
||||
|
||||
#include "large_bitset.hh"
|
||||
#include <algorithm>
|
||||
#include <seastar/core/align.hh>
|
||||
#include <seastar/core/thread.hh>
|
||||
#include "seastarx.hh"
|
||||
|
||||
using namespace seastar;
|
||||
|
||||
large_bitset::large_bitset(size_t nr_bits) : _nr_bits(nr_bits) {
|
||||
assert(thread::running_in_thread());
|
||||
|
||||
const size_t orig_nr_ints = align_up(nr_bits, bits_per_int()) / bits_per_int();
|
||||
auto nr_ints = orig_nr_ints;
|
||||
while (nr_ints) {
|
||||
nr_ints = _storage.reserve_partial(nr_ints);
|
||||
if (need_preempt()) {
|
||||
thread::yield();
|
||||
}
|
||||
size_t nr_ints = align_up(nr_bits, bits_per_int()) / bits_per_int();
|
||||
while (_storage.capacity() != nr_ints) {
|
||||
_storage.reserve_partial(nr_ints);
|
||||
thread::maybe_yield();
|
||||
}
|
||||
nr_ints = orig_nr_ints;
|
||||
while (nr_ints) {
|
||||
_storage.push_back(0);
|
||||
--nr_ints;
|
||||
if (need_preempt()) {
|
||||
thread::yield();
|
||||
}
|
||||
thread::maybe_yield();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -40,8 +32,6 @@ large_bitset::clear() {
|
||||
assert(thread::running_in_thread());
|
||||
for (auto&& pos: _storage) {
|
||||
pos = 0;
|
||||
if (need_preempt()) {
|
||||
thread::yield();
|
||||
}
|
||||
thread::maybe_yield();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1007,7 +1007,17 @@ class segment_pool {
|
||||
utils::dynamic_bitset _lsa_owned_segments_bitmap; // owned by this
|
||||
utils::dynamic_bitset _lsa_free_segments_bitmap; // owned by this, but not in use
|
||||
size_t _free_segments = 0;
|
||||
|
||||
// Invariant: _free_segments > _current_emergency_reserve_goal.
|
||||
// Used to ensure that some critical allocations won't fail.
|
||||
// (We grow _current_emergency_reserve_goal in advance and shrink it right
|
||||
// before the critical allocations, which allows them to utilize the pre-reserved
|
||||
// segments).
|
||||
size_t _current_emergency_reserve_goal = 1;
|
||||
// Used by allocating_section to request a certain number of free segments
|
||||
// to be prepared for usage when the section is entered.
|
||||
// This is more of a side-channel argument to refill_emergency_reserve() than a real piece of state.
|
||||
// Passing it via a variable makes it easier to debug.
|
||||
size_t _emergency_reserve_max = 30;
|
||||
bool _allocation_failure_flag = false;
|
||||
bool _allocation_enabled = true;
|
||||
@@ -1088,6 +1098,7 @@ public:
|
||||
void clear_allocation_failure_flag() noexcept { _allocation_failure_flag = false; }
|
||||
bool allocation_failure_flag() const noexcept { return _allocation_failure_flag; }
|
||||
void refill_emergency_reserve();
|
||||
void ensure_free_segments(size_t n_segments);
|
||||
void add_non_lsa_memory_in_use(size_t n) noexcept {
|
||||
_non_lsa_memory_in_use += n;
|
||||
}
|
||||
@@ -1330,10 +1341,18 @@ void segment_pool::deallocate_segment(segment* seg) noexcept
|
||||
}
|
||||
|
||||
void segment_pool::refill_emergency_reserve() {
|
||||
while (_free_segments < _emergency_reserve_max) {
|
||||
auto seg = allocate_segment(_emergency_reserve_max);
|
||||
try {
|
||||
ensure_free_segments(_emergency_reserve_max);
|
||||
} catch (const std::bad_alloc&) {
|
||||
throw bad_alloc(format("failed to refill emergency reserve of {} (have {} free segments)", _emergency_reserve_max, _free_segments));
|
||||
}
|
||||
}
|
||||
|
||||
void segment_pool::ensure_free_segments(size_t n_segments) {
|
||||
while (_free_segments < n_segments) {
|
||||
auto seg = allocate_segment(n_segments);
|
||||
if (!seg) {
|
||||
throw bad_alloc(format("failed to refill emergency reserve of {} (have {} free segments)", _emergency_reserve_max, _free_segments));
|
||||
throw std::bad_alloc();
|
||||
}
|
||||
++_segments_in_use;
|
||||
free_segment(seg);
|
||||
@@ -2337,6 +2356,44 @@ public:
|
||||
return _eviction_fn;
|
||||
}
|
||||
|
||||
// LSA holds an internal "emergency reserve" of free segments that
|
||||
// is only "opened" for usage before some critical allocations
|
||||
// (in particular: the ones performed during memory compaction)
|
||||
// to ensure that they won't fail.
|
||||
//
|
||||
// Here we hijack this mechanism to let the rest of the application implement
|
||||
// some critical sections with infallible LSA allocations.
|
||||
//
|
||||
// reserve() increments the size of the internal emergency reserve,
|
||||
// unreserve() decrements it.
|
||||
//
|
||||
// When you want to have some critical section that has to do some LSA
|
||||
// allocations infallibly (e.g. to restore some invariants
|
||||
// of a LSA-managed data structure in a destructor), you can call reserve()
|
||||
// beforehand to ensure that some extra memory will be held unused,
|
||||
// and then call unreserve() (with reserve()'s return value as the argument)
|
||||
// to make the reserved free segments available to the critical section.
|
||||
//
|
||||
uintptr_t reserve(size_t memory) override {
|
||||
// We round up the requested reserve to full segments.
|
||||
size_t n_segments = (memory + segment::size - 1) >> segment::size_shift;
|
||||
|
||||
auto& pool = segment_pool();
|
||||
size_t new_goal = pool.current_emergency_reserve_goal() + n_segments;
|
||||
pool.ensure_free_segments(new_goal);
|
||||
pool.set_current_emergency_reserve_goal(new_goal);
|
||||
|
||||
static_assert(sizeof(uintptr_t) >= sizeof(size_t));
|
||||
return n_segments;
|
||||
}
|
||||
|
||||
void unreserve(uintptr_t n_segments) noexcept override {
|
||||
auto& pool = segment_pool();
|
||||
assert(pool.current_emergency_reserve_goal() >= n_segments);
|
||||
size_t new_goal = pool.current_emergency_reserve_goal() - n_segments;
|
||||
pool.set_current_emergency_reserve_goal(new_goal);
|
||||
}
|
||||
|
||||
friend class region;
|
||||
friend class lsa_buffer;
|
||||
friend class region_evictable_occupancy_ascending_less_comparator;
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user