Compare commits

..

1 Commits

Author SHA1 Message Date
Yaron Kaikov
dcbc6c839d ./github/scripts/auto-backport.py: don't remove backport label when backport process has an error
Today, when the `Fixes` prefix is missing or the developer is not a collaborator with `scylladbbot` we remove the backport labels to prevent the process from starting and notifying the developers.

Developers are worried that removing these backport labels will cause us to forget we need to do these backports. @nyh suggested to add a `scylladbbot/backport_error` label instead

Applied those changes, so when a `Fixes` prefix is missing we will add a `scylladbbot/backport_error` label and stop the process

When a user doesn't accept the invite we will still open the PR but he will not be assigned and will not be able to edit the branch when we have conflicts

Fixes: https://github.com/scylladb/scylla-pkg/issues/4898
Fixes: https://github.com/scylladb/scylla-pkg/issues/4897
2025-03-18 13:58:59 +02:00
836 changed files with 9694 additions and 29034 deletions

14
.github/CODEOWNERS vendored
View File

@@ -1,5 +1,5 @@
# AUTH
auth/* @nuivall @ptrsmrn
auth/* @nuivall @ptrsmrn @KrzaQ
# CACHE
row_cache* @tgrabiec
@@ -25,15 +25,15 @@ compaction/* @raphaelsc
transport/*
# CQL QUERY LANGUAGE
cql3/* @tgrabiec @nuivall @ptrsmrn
cql3/* @tgrabiec @nuivall @ptrsmrn @KrzaQ
# COUNTERS
counters* @nuivall @ptrsmrn
tests/counter_test* @nuivall @ptrsmrn
counters* @nuivall @ptrsmrn @KrzaQ
tests/counter_test* @nuivall @ptrsmrn @KrzaQ
# DOCS
docs/* @annastuchlik @tzach
docs/alternator @annastuchlik @tzach @nyh
docs/alternator @annastuchlik @tzach @nyh @nuivall @ptrsmrn @KrzaQ
# GOSSIP
gms/* @tgrabiec @asias @kbr-scylla
@@ -74,8 +74,8 @@ streaming/* @tgrabiec @asias
service/storage_service.* @tgrabiec @asias
# ALTERNATOR
alternator/* @nyh
test/alternator/* @nyh
alternator/* @nyh @nuivall @ptrsmrn @KrzaQ
test/alternator/* @nyh @nuivall @ptrsmrn @KrzaQ
# HINTED HANDOFF
db/hints/* @piodul @vladzcloudius @eliransin

View File

@@ -1,86 +1,15 @@
name: "Report a bug"
description: "File a bug report."
title: "[Bug]: "
type: "bug"
labels: bug
body:
- type: checkboxes
id: terms
attributes:
label: Code of Conduct
description: "This is Scylla's bug tracker, to be used for reporting bugs only.
This is Scylla's bug tracker, to be used for reporting bugs only.
If you have a question about Scylla, and not a bug, please ask it in
our forum at https://forum.scylladb.com/ or in our slack channel https://slack.scylladb.com/ "
options:
- label: I have read the disclaimer above and am reporting a suspected malfunction in Scylla.
required: true
our mailing-list at scylladb-dev@googlegroups.com or in our slack channel.
- type: input
id: product-version
attributes:
label: product version
description: Scylla version (or git commit hash)
placeholder: ex. scylla-6.1.1
validations:
required: true
- type: input
id: cluster-size
attributes:
label: Cluster Size
validations:
required: true
- type: input
id: os
attributes:
label: OS
placeholder: RHEL/CentOS/Ubuntu/AWS AMI
validations:
required: true
- type: textarea
id: additional-data
attributes:
label: Additional Environmental Data
#description:
placeholder: Add additional data
value: "Platform (physical/VM/cloud instance type/docker):\n
Hardware: sockets= cores= hyperthreading= memory=\n
Disks: (SSD/HDD, count)"
validations:
required: false
- type: textarea
id: reproducer-steps
attributes:
label: Reproduction Steps
placeholder: Describe how to reproduce the problem
value: "The steps to reproduce the problem are:"
validations:
required: true
- type: textarea
id: the-problem
attributes:
label: What is the problem?
placeholder: Describe the problem you found
value: "The problem is that"
validations:
required: true
- type: textarea
id: what-happened
attributes:
label: Expected behavior?
placeholder: Describe what should have happened
value: "I expected that "
validations:
required: true
- type: textarea
id: logs
attributes:
label: Relevant log output
description: Please copy and paste any relevant log output. This will be automatically formatted into code, so no need for backticks.
render: shell
- [] I have read the disclaimer above, and I am reporting a suspected malfunction in Scylla.
*Installation details*
Scylla version (or git commit hash):
Cluster size:
OS (RHEL/CentOS/Ubuntu/AWS AMI):
*Hardware details (for performance issues)* Delete if unneeded
Platform (physical/VM/cloud instance type/docker):
Hardware: sockets= cores= hyperthreading= memory=
Disks: (SSD/HDD, count)

View File

@@ -112,15 +112,10 @@ def backport(repo, pr, version, commits, backport_base_branch, is_collaborator):
is_draft = True
repo_local.git.add(A=True)
repo_local.git.cherry_pick('--continue')
# Check if the branch already exists in the remote fork
remote_refs = repo_local.git.ls_remote('--heads', fork_repo, new_branch_name)
if not remote_refs:
# Branch does not exist, create it with a regular push
repo_local.git.push(fork_repo, new_branch_name)
create_pull_request(repo, new_branch_name, backport_base_branch, pr, backport_pr_title, commits,
is_draft, is_collaborator)
else:
logging.info(f"Remote branch {new_branch_name} already exists in fork. Skipping push.")
repo_local.git.push(fork_repo, new_branch_name, force=True)
create_pull_request(repo, new_branch_name, backport_base_branch, pr, backport_pr_title, commits,
is_draft, is_collaborator)
except GitCommandError as e:
logging.warning(f"GitCommandError: {e}")

View File

@@ -1,16 +0,0 @@
{
"problemMatcher": [
{
"owner": "seastar-bad-include",
"severity": "error",
"pattern": [
{
"regexp": "^(.+):(\\d+):(.+)$",
"file": 1,
"line": 2,
"message": 3
}
]
}
]
}

View File

@@ -11,8 +11,7 @@ env:
CLEANER_OUTPUT_PATH: build/clang-include-cleaner.log
# the "idl" subdirectory does not contain C++ source code. the .hh files in it are
# supposed to be processed by idl-compiler.py, so we don't check them using the cleaner
CLEANER_DIRS: test/unit exceptions alternator api auth cdc compaction db dht gms index lang message mutation mutation_writer node_ops raft redis replica service
SEASTAR_BAD_INCLUDE_OUTPUT_PATH: build/seastar-bad-include.log
CLEANER_DIRS: test/unit exceptions alternator api auth cdc compaction db dht gms index lang message mutation mutation_writer node_ops redis replica
permissions: {}
@@ -81,24 +80,7 @@ jobs:
done
- run: |
echo "::remove-matcher owner=clang-include-cleaner::"
- run: |
echo "::add-matcher::.github/seastar-bad-include.json"
- name: check for seastar includes
run: |
git -c safe.directory="$PWD" \
grep -nE '#include +"seastar/' \
| tee "$SEASTAR_BAD_INCLUDE_OUTPUT_PATH"
- run: |
echo "::remove-matcher owner=seastar-bad-include::"
- uses: actions/upload-artifact@v4
with:
name: Logs
path: |
${{ env.CLEANER_OUTPUT_PATH }}
${{ env.SEASTAR_BAD_INCLUDE_OUTPUT_PATH }}
- name: fail if seastar headers are included as an internal library
run: |
if [ -s "$SEASTAR_BAD_INCLUDE_OUTPUT_PATH" ]; then
echo "::error::Found #include \"seastar/ in the source code. Use angle brackets instead."
exit 1
fi
name: Logs (clang-include-cleaner)
path: "./${{ env.CLEANER_OUTPUT_PATH }}"

View File

@@ -16,13 +16,6 @@ jobs:
pull-requests: write
steps:
- name: Checkout repository
uses: actions/checkout@v4
with:
repository: ${{ github.repository }}
ref: ${{ env.DEFAULT_BRANCH }}
token: ${{ secrets.AUTO_BACKPORT_TOKEN }}
fetch-depth: 1
- name: Mark pull request as ready for review
run: gh pr ready "${{ github.event.pull_request.number }}"
env:

View File

@@ -13,8 +13,6 @@ jobs:
issues: write
pull-requests: write
steps:
- name: Wait for label to be added
run: sleep 1m
- uses: mheap/github-action-required-labels@v5
with:
mode: minimum

5
.gitmodules vendored
View File

@@ -1,6 +1,6 @@
[submodule "seastar"]
path = seastar
url = ../scylla-seastar
url = ../seastar
ignore = dirty
[submodule "swagger-ui"]
path = swagger-ui
@@ -9,6 +9,9 @@
[submodule "abseil"]
path = abseil
url = ../abseil-cpp
[submodule "scylla-tools"]
path = tools/java
url = ../scylla-tools-java
[submodule "scylla-python3"]
path = tools/python3
url = ../scylla-python3

View File

@@ -163,6 +163,14 @@ file(MAKE_DIRECTORY "${scylla_gen_build_dir}")
include(add_version_library)
generate_scylla_version()
add_library(scylla-zstd STATIC
zstd.cc)
target_link_libraries(scylla-zstd
PRIVATE
db
Seastar::seastar
zstd::libzstd)
add_library(scylla-main STATIC)
target_sources(scylla-main
PRIVATE
@@ -174,7 +182,7 @@ target_sources(scylla-main
compress.cc
converting_mutation_partition_applier.cc
counters.cc
sstable_dict_autotrainer.cc
direct_failure_detector/failure_detector.cc
duration.cc
exceptions/exceptions.cc
frozen_schema.cc
@@ -196,7 +204,6 @@ target_sources(scylla-main
reader_concurrency_semaphore_group.cc
schema_mutations.cc
serializer.cc
service/direct_failure_detector/failure_detector.cc
sstables_loader.cc
table_helper.cc
tasks/task_handler.cc
@@ -207,6 +214,7 @@ target_sources(scylla-main
vint-serialization.cc)
target_link_libraries(scylla-main
PRIVATE
"$<LINK_LIBRARY:WHOLE_ARCHIVE,scylla-zstd>"
db
absl::headers
absl::btree

View File

@@ -220,9 +220,28 @@ On a development machine, one might run Scylla as
$ SCYLLA_HOME=$HOME/scylla build/release/scylla --overprovisioned --developer-mode=yes
```
To interact with scylla it is recommended to build our version of
cqlsh. It is available at
https://github.com/scylladb/scylla-cqlsh and is available as a submodule.
To interact with scylla it is recommended to build our versions of
cqlsh and nodetool. They are available at
https://github.com/scylladb/scylla-tools-java and can be built with
```bash
$ sudo ./install-dependencies.sh
$ ant jar
```
cqlsh should work out of the box, but nodetool depends on a running
scylla-jmx (https://github.com/scylladb/scylla-jmx). It can be build
with
```bash
$ mvn package
```
and must be started with
```bash
$ ./scripts/scylla-jmx
```
### Branches and tags

View File

@@ -78,7 +78,7 @@ fi
# Default scylla product/version tags
PRODUCT=scylla
VERSION=2025.2.2
VERSION=2025.2.0-dev
if test -f version
then

View File

@@ -24,7 +24,7 @@ static constexpr uint64_t KB = 1024ULL;
static constexpr uint64_t RCU_BLOCK_SIZE_LENGTH = 4*KB;
static constexpr uint64_t WCU_BLOCK_SIZE_LENGTH = 1*KB;
bool consumed_capacity_counter::should_add_capacity(const rjson::value& request) {
static bool should_add_capacity(const rjson::value& request) {
const rjson::value* return_consumed = rjson::find(request, "ReturnConsumedCapacity");
if (!return_consumed) {
return false;
@@ -62,22 +62,15 @@ static uint64_t calculate_half_units(uint64_t unit_block_size, uint64_t total_by
rcu_consumed_capacity_counter::rcu_consumed_capacity_counter(const rjson::value& request, bool is_quorum) :
consumed_capacity_counter(should_add_capacity(request)),_is_quorum(is_quorum) {
}
uint64_t rcu_consumed_capacity_counter::get_half_units(uint64_t total_bytes, bool is_quorum) noexcept {
return calculate_half_units(RCU_BLOCK_SIZE_LENGTH, total_bytes, is_quorum);
}
uint64_t rcu_consumed_capacity_counter::get_half_units() const noexcept {
return get_half_units(_total_bytes, _is_quorum);
return calculate_half_units(RCU_BLOCK_SIZE_LENGTH, _total_bytes, _is_quorum);
}
uint64_t wcu_consumed_capacity_counter::get_half_units() const noexcept {
return calculate_half_units(WCU_BLOCK_SIZE_LENGTH, _total_bytes, true);
}
uint64_t wcu_consumed_capacity_counter::get_units(uint64_t total_bytes) noexcept {
return calculate_half_units(WCU_BLOCK_SIZE_LENGTH, total_bytes, true) * HALF_UNIT_MULTIPLIER;
}
wcu_consumed_capacity_counter::wcu_consumed_capacity_counter(const rjson::value& request) :
consumed_capacity_counter(should_add_capacity(request)) {
}

View File

@@ -42,25 +42,21 @@ public:
*/
virtual uint64_t get_half_units() const noexcept = 0;
uint64_t _total_bytes = 0;
static bool should_add_capacity(const rjson::value& request);
protected:
bool _should_add_to_reponse = false;
};
class rcu_consumed_capacity_counter : public consumed_capacity_counter {
virtual uint64_t get_half_units() const noexcept;
bool _is_quorum = false;
public:
rcu_consumed_capacity_counter(const rjson::value& request, bool is_quorum);
rcu_consumed_capacity_counter(): consumed_capacity_counter(false), _is_quorum(false){}
virtual uint64_t get_half_units() const noexcept;
static uint64_t get_half_units(uint64_t total_bytes, bool is_quorum) noexcept;
};
class wcu_consumed_capacity_counter : public consumed_capacity_counter {
virtual uint64_t get_half_units() const noexcept;
public:
wcu_consumed_capacity_counter(const rjson::value& request);
static uint64_t get_units(uint64_t total_bytes) noexcept;
};
}

File diff suppressed because it is too large Load Diff

View File

@@ -241,8 +241,7 @@ public:
const query::partition_slice&& slice,
shared_ptr<cql3::selection::selection> selection,
foreign_ptr<lw_shared_ptr<query::result>> query_result,
shared_ptr<const std::optional<attrs_to_get>> attrs_to_get,
uint64_t& rcu_half_units);
shared_ptr<const std::optional<attrs_to_get>> attrs_to_get);
static void describe_single_item(const cql3::selection::selection&,
const std::vector<managed_bytes_opt>&,

View File

@@ -165,9 +165,7 @@ static std::optional<std::string> resolve_path_component(const std::string& colu
fmt::format("ExpressionAttributeNames missing entry '{}' required by expression", column_name));
}
used_attribute_names.emplace(column_name);
auto result = std::string(rjson::to_string_view(*value));
validate_attr_name_length("", result.size(), false, "ExpressionAttributeNames contains invalid value: ");
return result;
return std::string(rjson::to_string_view(*value));
}
return std::nullopt;
}
@@ -739,26 +737,6 @@ rjson::value calculate_value(const parsed::set_rhs& rhs,
return rjson::null_value();
}
void validate_attr_name_length(std::string_view supplementary_context, size_t attr_name_length, bool is_key, std::string_view error_msg_prefix) {
constexpr const size_t DYNAMODB_KEY_ATTR_NAME_SIZE_MAX = 255;
constexpr const size_t DYNAMODB_NONKEY_ATTR_NAME_SIZE_MAX = 65535;
const size_t max_length = is_key ? DYNAMODB_KEY_ATTR_NAME_SIZE_MAX : DYNAMODB_NONKEY_ATTR_NAME_SIZE_MAX;
if (attr_name_length > max_length) {
std::string error_msg;
if (!error_msg_prefix.empty()) {
error_msg += error_msg_prefix;
}
if (!supplementary_context.empty()) {
error_msg += "in ";
error_msg += supplementary_context;
error_msg += " - ";
}
error_msg += fmt::format("Attribute name is too large, must be less than {} bytes", std::to_string(max_length + 1));
throw api_error::validation(error_msg);
}
}
} // namespace alternator
auto fmt::formatter<alternator::parsed::path>::format(const alternator::parsed::path& p, fmt::format_context& ctx) const

View File

@@ -91,7 +91,5 @@ rjson::value calculate_value(const parsed::value& v,
rjson::value calculate_value(const parsed::set_rhs& rhs,
const rjson::value* previous_item);
void validate_attr_name_length(std::string_view supplementary_context, size_t attr_name_length, bool is_key, std::string_view error_msg_prefix = {});
} /* namespace alternator */

View File

@@ -228,8 +228,9 @@ protected:
// If the rack does not exist, we return an empty list - not an error.
sstring query_rack = req->get_query_param("rack");
for (auto& id : local_dc_nodes) {
auto ip = _gossiper.get_address_map().get(id);
if (!query_rack.empty()) {
auto rack = _gossiper.get_application_state_value(id, gms::application_state::RACK);
auto rack = _gossiper.get_application_state_value(ip, gms::application_state::RACK);
if (rack != query_rack) {
continue;
}
@@ -237,10 +238,10 @@ protected:
// Note that it's not enough for the node to be is_alive() - a
// node joining the cluster is also "alive" but not responsive to
// requests. We alive *and* normal. See #19694, #21538.
if (_gossiper.is_alive(id) && _gossiper.is_normal(id)) {
if (_gossiper.is_alive(ip) && _gossiper.is_normal(ip)) {
// Use the gossiped broadcast_rpc_address if available instead
// of the internal IP address "ip". See discussion in #18711.
rjson::push_back(results, rjson::from_string(_gossiper.get_rpc_address(id)));
rjson::push_back(results, rjson::from_string(_gossiper.get_rpc_address(ip)));
}
}
rep->set_status(reply::status_type::ok);
@@ -504,7 +505,7 @@ server::server(executor& exec, service::storage_proxy& proxy, gms::gossiper& gos
, _key_cache(1024, 1min, slogger)
, _enforce_authorization(false)
, _enabled_servers{}
, _pending_requests("alternator::server::pending_requests")
, _pending_requests{}
, _timeout_config(_proxy.data_dictionary().get_config())
, _callbacks{
{"CreateTable", [] (executor& e, executor::client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit, rjson::value json_request, std::unique_ptr<request> req) {

View File

@@ -41,7 +41,7 @@ class server : public peering_sharded_service<server> {
key_cache _key_cache;
utils::updateable_value<bool> _enforce_authorization;
utils::small_vector<std::reference_wrapper<seastar::httpd::http_server>, 2> _enabled_servers;
named_gate _pending_requests;
gate _pending_requests;
// In some places we will need a CQL updateable_timeout_config object even
// though it isn't really relevant for Alternator which defines its own
// timeouts separately. We can create this object only once.

View File

@@ -14,20 +14,7 @@
namespace alternator {
const char* ALTERNATOR_METRICS = "alternator";
static seastar::metrics::histogram estimated_histogram_to_metrics(const utils::estimated_histogram& histogram) {
seastar::metrics::histogram res;
res.buckets.resize(histogram.bucket_offsets.size());
uint64_t cumulative_count = 0;
res.sample_count = histogram._count;
res.sample_sum = histogram._sample_sum;
for (size_t i = 0; i < res.buckets.size(); i++) {
auto& v = res.buckets[i];
v.upper_bound = histogram.bucket_offsets[i];
cumulative_count += histogram.buckets[i];
v.count = cumulative_count;
}
return res;
}
stats::stats() : api_operations{} {
// Register the
seastar::metrics::label op("op");
@@ -108,26 +95,22 @@ stats::stats() : api_operations{} {
seastar::metrics::description("number of rows read during filtering operations"))(alternator_label).set_skip_when_empty(),
seastar::metrics::make_total_operations("filtered_rows_matched_total", cql_stats.filtered_rows_matched_total,
seastar::metrics::description("number of rows read and matched during filtering operations")),
seastar::metrics::make_counter("rcu_total", [this]{return 0.5 * rcu_half_units_total;},
seastar::metrics::description("total number of consumed read units"))(alternator_label).set_skip_when_empty(),
seastar::metrics::make_counter("rcu_total", rcu_total,
seastar::metrics::description("total number of consumed read units, counted as half units"))(alternator_label).set_skip_when_empty(),
seastar::metrics::make_counter("wcu_total", wcu_total[wcu_types::PUT_ITEM],
seastar::metrics::description("total number of consumed write units"),{op("PutItem")})(alternator_label).set_skip_when_empty(),
seastar::metrics::description("total number of consumed write units, counted as half units"),{op("PutItem")})(alternator_label).set_skip_when_empty(),
seastar::metrics::make_counter("wcu_total", wcu_total[wcu_types::DELETE_ITEM],
seastar::metrics::description("total number of consumed write units"),{op("DeleteItem")})(alternator_label).set_skip_when_empty(),
seastar::metrics::description("total number of consumed write units, counted as half units"),{op("DeleteItem")})(alternator_label).set_skip_when_empty(),
seastar::metrics::make_counter("wcu_total", wcu_total[wcu_types::UPDATE_ITEM],
seastar::metrics::description("total number of consumed write units"),{op("UpdateItem")})(alternator_label).set_skip_when_empty(),
seastar::metrics::description("total number of consumed write units, counted as half units"),{op("UpdateItem")})(alternator_label).set_skip_when_empty(),
seastar::metrics::make_counter("wcu_total", wcu_total[wcu_types::INDEX],
seastar::metrics::description("total number of consumed write units"),{op("Index")})(alternator_label).set_skip_when_empty(),
seastar::metrics::description("total number of consumed write units, counted as half units"),{op("Index")})(alternator_label).set_skip_when_empty(),
seastar::metrics::make_total_operations("filtered_rows_dropped_total", [this] { return cql_stats.filtered_rows_read_total - cql_stats.filtered_rows_matched_total; },
seastar::metrics::description("number of rows read and dropped during filtering operations"))(alternator_label).set_skip_when_empty(),
seastar::metrics::make_counter("batch_item_count", seastar::metrics::description("The total number of items processed across all batches"),{op("BatchWriteItem")},
api_operations.batch_write_item_batch_total)(alternator_label).set_skip_when_empty(),
seastar::metrics::make_counter("batch_item_count", seastar::metrics::description("The total number of items processed across all batches"),{op("BatchGetItem")},
api_operations.batch_get_item_batch_total)(alternator_label).set_skip_when_empty(),
seastar::metrics::make_histogram("batch_item_count_histogram", seastar::metrics::description("Histogram of the number of items in a batch request"),{op("BatchGetItem")},
[this]{ return estimated_histogram_to_metrics(api_operations.batch_get_item_histogram);})(alternator_label).aggregate({seastar::metrics::shard_label}).set_skip_when_empty(),
seastar::metrics::make_histogram("batch_item_count_histogram", seastar::metrics::description("Histogram of the number of items in a batch request"),{op("BatchWriteItem")},
[this]{ return estimated_histogram_to_metrics(api_operations.batch_write_item_histogram);})(alternator_label).aggregate({seastar::metrics::shard_label}).set_skip_when_empty(),
});
}

View File

@@ -12,7 +12,6 @@
#include <seastar/core/metrics_registration.hh>
#include "utils/histogram.hh"
#include "utils/estimated_histogram.hh"
#include "cql3/stats.hh"
namespace alternator {
@@ -76,9 +75,6 @@ public:
utils::timed_rate_moving_average_summary_and_histogram batch_write_item_latency;
utils::timed_rate_moving_average_summary_and_histogram batch_get_item_latency;
utils::timed_rate_moving_average_summary_and_histogram get_records_latency;
utils::estimated_histogram batch_get_item_histogram{22}; // a histogram that covers the range 1 - 100
utils::estimated_histogram batch_write_item_histogram{22}; // a histogram that covers the range 1 - 100
} api_operations;
// Miscellaneous event counters
uint64_t total_operations = 0;
@@ -88,7 +84,7 @@ public:
uint64_t shard_bounce_for_lwt = 0;
uint64_t requests_blocked_memory = 0;
uint64_t requests_shed = 0;
uint64_t rcu_half_units_total = 0;
uint64_t rcu_total = 0;
// wcu can results from put, update, delete and index
// Index related will be done on top of the operation it comes with
enum wcu_types {

View File

@@ -808,9 +808,6 @@ future<executor::request_return_type> executor::get_records(client_state& client
if (limit < 1) {
throw api_error::validation("Limit must be 1 or more");
}
if (limit > 1000) {
throw api_error::validation("Limit must be less than or equal to 1000");
}
auto db = _proxy.data_dictionary();
schema_ptr schema, base;

View File

@@ -136,6 +136,14 @@
"allowMultiple":false,
"type":"string",
"paramType":"path"
},
{
"name":"unsafe",
"description":"Set to True to perform an unsafe assassination",
"required":false,
"allowMultiple":false,
"type":"boolean",
"paramType":"query"
}
]
}

View File

@@ -2144,31 +2144,6 @@
"allowMultiple":false,
"type":"string",
"paramType":"query"
},
{
"name":"skip_cleanup",
"description":"Don't cleanup keys from loaded sstables. Invalid if load_and_stream is true",
"required":false,
"allowMultiple":false,
"type":"string",
"paramType":"query"
},
{
"name":"skip_reshape",
"description":"Don't reshape the loaded sstables. Invalid if load_and_stream is true",
"required":false,
"allowMultiple":false,
"type":"string",
"paramType":"query"
},
{
"name":"scope",
"description":"Defines the set of nodes to which mutations can be streamed",
"required":false,
"allowMultiple":false,
"type":"string",
"paramType":"query",
"enum": ["all", "dc", "rack", "node"]
}
]
}
@@ -3052,73 +3027,6 @@
}
]
},
{
"path":"/storage_service/retrain_dict",
"operations":[
{
"method":"POST",
"summary":"Retrain the SSTable compression dictionary for the target table.",
"type":"void",
"nickname":"retrain_dict",
"produces":[
"application/json"
],
"parameters":[
{
"name":"keyspace",
"description":"Name of the keyspace containing the target table.",
"required":true,
"allowMultiple":false,
"type":"string",
"paramType":"query"
},
{
"name":"cf",
"description":"Name of the target table.",
"required":true,
"allowMultiple":false,
"type":"string",
"paramType":"query"
}
]
}
]
},
{
"path":"/storage_service/estimate_compression_ratios",
"operations":[
{
"method":"GET",
"summary":"Compute an estimated compression ratio for SSTables of the given table, for various compression configurations.",
"type":"array",
"items":{
"type":"compression_config_result"
},
"nickname":"estimate_compression_ratios",
"produces":[
"application/json"
],
"parameters":[
{
"name":"keyspace",
"description":"Name of the keyspace containing the target table.",
"required":true,
"allowMultiple":false,
"type":"string",
"paramType":"query"
},
{
"name":"cf",
"description":"Name of the target table.",
"required":true,
"allowMultiple":false,
"type":"string",
"paramType":"query"
}
]
}
]
},
{
"path":"/storage_service/raft_topology/reload",
"operations":[
@@ -3161,22 +3069,6 @@
]
}
]
},
{
"path":"/storage_service/raft_topology/cmd_rpc_status",
"operations":[
{
"method":"GET",
"summary":"Get information about currently running topology cmd rpc",
"type":"string",
"nickname":"raft_topology_get_cmd_status",
"produces":[
"application/json"
],
"parameters":[
]
}
]
}
],
"models":{
@@ -3436,32 +3328,6 @@
"type":"string"
}
}
},
"compression_config_result":{
"id":"compression_config_result",
"description":"Compression ratio estimation result for one config",
"properties":{
"level":{
"type":"long",
"description":"The used value of `compression_level`"
},
"chunk_length_in_kb":{
"type":"long",
"description":"The used value of `chunk_length_in_kb`"
},
"dict":{
"type":"string",
"description":"The used dictionary: `none`, `past` (== current), or `future`"
},
"sstable_compression":{
"type":"string",
"description":"The used compressor name (aka `sstable_compression`)"
},
"ratio":{
"type":"float",
"description":"The resulting compression ratio (estimated on a random sample of files)"
}
}
}
}
}

View File

@@ -902,13 +902,17 @@ void set_column_family(http_context& ctx, routes& r, sharded<db::system_keyspace
});
ss::enable_auto_compaction.set(r, [&ctx](std::unique_ptr<http::request> req) {
auto [keyspace, tables] = parse_table_infos(ctx, *req);
auto keyspace = validate_keyspace(ctx, req);
auto tables = parse_table_infos(keyspace, ctx, req->query_parameters, "cf");
apilog.info("enable_auto_compaction: keyspace={} tables={}", keyspace, tables);
return set_tables_autocompaction(ctx, std::move(tables), true);
});
ss::disable_auto_compaction.set(r, [&ctx](std::unique_ptr<http::request> req) {
auto [keyspace, tables] = parse_table_infos(ctx, *req);
auto keyspace = validate_keyspace(ctx, req);
auto tables = parse_table_infos(keyspace, ctx, req->query_parameters, "cf");
apilog.info("disable_auto_compaction: keyspace={} tables={}", keyspace, tables);
return set_tables_autocompaction(ctx, std::move(tables), false);
});
@@ -932,13 +936,17 @@ void set_column_family(http_context& ctx, routes& r, sharded<db::system_keyspace
});
ss::enable_tombstone_gc.set(r, [&ctx](std::unique_ptr<http::request> req) {
auto [keyspace, tables] = parse_table_infos(ctx, *req);
auto keyspace = validate_keyspace(ctx, req);
auto tables = parse_table_infos(keyspace, ctx, req->query_parameters, "cf");
apilog.info("enable_tombstone_gc: keyspace={} tables={}", keyspace, tables);
return set_tables_tombstone_gc(ctx, std::move(tables), true);
});
ss::disable_tombstone_gc.set(r, [&ctx](std::unique_ptr<http::request> req) {
auto [keyspace, tables] = parse_table_infos(ctx, *req);
auto keyspace = validate_keyspace(ctx, req);
auto tables = parse_table_infos(keyspace, ctx, req->query_parameters, "cf");
apilog.info("disable_tombstone_gc: keyspace={} tables={}", keyspace, tables);
return set_tables_tombstone_gc(ctx, std::move(tables), false);
});
@@ -1046,7 +1054,7 @@ void set_column_family(http_context& ctx, routes& r, sharded<db::system_keyspace
return ctx.db.map_reduce0([key, uuid] (replica::database& db) -> future<std::unordered_set<sstring>> {
auto sstables = co_await db.find_column_family(uuid).get_sstables_by_partition_key(key);
co_return sstables | std::views::transform([] (auto s) -> sstring { return fmt::to_string(s->get_filename()); }) | std::ranges::to<std::unordered_set>();
co_return sstables | std::views::transform([] (auto s) { return s->get_filename(); }) | std::ranges::to<std::unordered_set>();
}, std::unordered_set<sstring>(),
[](std::unordered_set<sstring> a, std::unordered_set<sstring>&& b) mutable {
a.merge(b);

View File

@@ -111,7 +111,8 @@ void set_compaction_manager(http_context& ctx, routes& r, sharded<compaction_man
});
cm::stop_keyspace_compaction.set(r, [&ctx] (std::unique_ptr<http::request> req) -> future<json::json_return_type> {
auto [ks_name, tables] = parse_table_infos(ctx, *req, "tables");
auto ks_name = validate_keyspace(ctx, req);
auto tables = parse_table_infos(ks_name, ctx, req->query_parameters, "tables");
auto type = req->get_query_param("type");
co_await ctx.db.invoke_on_all([&] (replica::database& db) {
auto& cm = db.get_compaction_manager();

View File

@@ -22,10 +22,10 @@ void set_failure_detector(http_context& ctx, routes& r, gms::gossiper& g) {
return g.container().invoke_on(0, [] (gms::gossiper& g) {
std::vector<fd::endpoint_state> res;
res.reserve(g.num_endpoints());
g.for_each_endpoint_state([&] (const gms::endpoint_state& eps) {
g.for_each_endpoint_state([&] (const gms::inet_address& addr, const gms::endpoint_state& eps) {
fd::endpoint_state val;
val.addrs = fmt::to_string(eps.get_ip());
val.is_alive = g.is_alive(eps.get_host_id());
val.addrs = fmt::to_string(addr);
val.is_alive = g.is_alive(addr);
val.generation = eps.get_heart_beat_state().get_generation().value();
val.version = eps.get_heart_beat_state().get_heart_beat_version().value();
val.update_time = eps.get_update_timestamp().time_since_epoch().count();
@@ -65,8 +65,8 @@ void set_failure_detector(http_context& ctx, routes& r, gms::gossiper& g) {
fd::get_simple_states.set(r, [&g] (std::unique_ptr<request> req) {
return g.container().invoke_on(0, [] (gms::gossiper& g) {
std::map<sstring, sstring> nodes_status;
g.for_each_endpoint_state([&] (const gms::endpoint_state& es) {
nodes_status.emplace(fmt::to_string(es.get_ip()), g.is_alive(es.get_host_id()) ? "UP" : "DOWN");
g.for_each_endpoint_state([&] (const gms::inet_address& node, const gms::endpoint_state&) {
nodes_status.emplace(fmt::to_string(node), g.is_alive(node) ? "UP" : "DOWN");
});
return make_ready_future<json::json_return_type>(map_to_key_value<fd::mapper>(nodes_status));
});
@@ -81,7 +81,7 @@ void set_failure_detector(http_context& ctx, routes& r, gms::gossiper& g) {
fd::get_endpoint_state.set(r, [&g] (std::unique_ptr<request> req) {
return g.container().invoke_on(0, [req = std::move(req)] (gms::gossiper& g) {
auto state = g.get_endpoint_state_ptr(g.get_host_id(gms::inet_address(req->get_path_param("addr"))));
auto state = g.get_endpoint_state_ptr(gms::inet_address(req->get_path_param("addr")));
if (!state) {
return make_ready_future<json::json_return_type>(format("unknown endpoint {}", req->get_path_param("addr")));
}

View File

@@ -35,32 +35,37 @@ void set_gossiper(http_context& ctx, routes& r, gms::gossiper& g) {
gms::inet_address ep(req->get_path_param("addr"));
// synchronize unreachable_members on all shards
co_await g.get_unreachable_members_synchronized();
co_return g.get_endpoint_downtime(g.get_host_id(ep));
co_return g.get_endpoint_downtime(ep);
});
httpd::gossiper_json::get_current_generation_number.set(r, [&g] (std::unique_ptr<http::request> req) {
gms::inet_address ep(req->get_path_param("addr"));
return g.get_current_generation_number(g.get_host_id(ep)).then([] (gms::generation_type res) {
return g.get_current_generation_number(ep).then([] (gms::generation_type res) {
return make_ready_future<json::json_return_type>(res.value());
});
});
httpd::gossiper_json::get_current_heart_beat_version.set(r, [&g] (std::unique_ptr<http::request> req) {
gms::inet_address ep(req->get_path_param("addr"));
return g.get_current_heart_beat_version(g.get_host_id(ep)).then([] (gms::version_type res) {
return g.get_current_heart_beat_version(ep).then([] (gms::version_type res) {
return make_ready_future<json::json_return_type>(res.value());
});
});
httpd::gossiper_json::assassinate_endpoint.set(r, [&g](std::unique_ptr<http::request> req) {
return g.assassinate_endpoint(req->get_path_param("addr")).then([] {
if (req->get_query_param("unsafe") != "True") {
return g.assassinate_endpoint(req->get_path_param("addr")).then([] {
return make_ready_future<json::json_return_type>(json_void());
});
}
return g.unsafe_assassinate_endpoint(req->get_path_param("addr")).then([] {
return make_ready_future<json::json_return_type>(json_void());
});
});
httpd::gossiper_json::force_remove_endpoint.set(r, [&g](std::unique_ptr<http::request> req) {
gms::inet_address ep(req->get_path_param("addr"));
return g.force_remove_endpoint(g.get_host_id(ep), gms::null_permit_id).then([] () {
return g.force_remove_endpoint(ep, gms::null_permit_id).then([] {
return make_ready_future<json::json_return_type>(json_void());
});
});

View File

@@ -148,7 +148,7 @@ void set_messaging_service(http_context& ctx, routes& r, sharded<netw::messaging
hf::inject_disconnect.set(r, [&ms] (std::unique_ptr<request> req) -> future<json::json_return_type> {
auto ip = msg_addr(req->get_path_param("ip"));
co_await ms.invoke_on_all([ip] (netw::messaging_service& ms) {
ms.remove_rpc_client(ip, std::nullopt);
ms.remove_rpc_client(ip);
});
co_return json::json_void();
});

View File

@@ -11,7 +11,7 @@
#include "cql3/query_processor.hh"
#include "cql3/untyped_result_set.hh"
#include "db/consistency_level_type.hh"
#include <seastar/json/json_elements.hh>
#include "seastar/json/json_elements.hh"
#include "transport/controller.hh"
#include <unordered_map>

View File

@@ -14,9 +14,6 @@
#include "api/scrub_status.hh"
#include "db/config.hh"
#include "db/schema_tables.hh"
#include "gms/feature_service.hh"
#include "schema/schema_builder.hh"
#include "sstables/sstables_manager.hh"
#include "utils/hash.hh"
#include <optional>
#include <sstream>
@@ -32,7 +29,6 @@
#include "service/raft/raft_group0_client.hh"
#include "service/storage_service.hh"
#include "service/load_meter.hh"
#include "gms/feature_service.hh"
#include "gms/gossiper.hh"
#include "db/system_keyspace.hh"
#include <seastar/http/exception.hh>
@@ -59,7 +55,6 @@
#include "db/view/view_builder.hh"
#include "utils/rjson.hh"
#include "utils/user_provided_param.hh"
#include "sstable_dict_autotrainer.hh"
using namespace seastar::httpd;
using namespace std::chrono_literals;
@@ -132,6 +127,32 @@ int64_t validate_int(const sstring& param) {
return std::atoll(param.c_str());
}
// splits a request parameter assumed to hold a comma-separated list of table names
// verify that the tables are found, otherwise a bad_param_exception exception is thrown
// containing the description of the respective no_such_column_family error.
static std::vector<sstring> parse_tables(const sstring& ks_name, const http_context& ctx, sstring value) {
if (value.empty()) {
return map_keys(ctx.db.local().find_keyspace(ks_name).metadata().get()->cf_meta_data());
}
std::vector<sstring> names = split(value, ",");
try {
for (const auto& table_name : names) {
ctx.db.local().find_column_family(ks_name, table_name);
}
} catch (const replica::no_such_column_family& e) {
throw bad_param_exception(e.what());
}
return names;
}
static std::vector<sstring> parse_tables(const sstring& ks_name, const http_context& ctx, const std::unordered_map<sstring, sstring>& query_params, sstring param_name) {
auto it = query_params.find(param_name);
if (it == query_params.end()) {
return {};
}
return parse_tables(ks_name, ctx, it->second);
}
std::vector<table_info> parse_table_infos(const sstring& ks_name, const http_context& ctx, sstring value) {
std::vector<table_info> res;
try {
@@ -157,12 +178,9 @@ std::vector<table_info> parse_table_infos(const sstring& ks_name, const http_con
return res;
}
std::pair<sstring, std::vector<table_info>> parse_table_infos(const http_context& ctx, const http::request& req, sstring cf_param_name) {
auto keyspace = validate_keyspace(ctx, req);
const auto& query_params = req.query_parameters;
auto it = query_params.find(cf_param_name);
auto tis = parse_table_infos(keyspace, ctx, it != query_params.end() ? it->second : "");
return std::make_pair(std::move(keyspace), std::move(tis));
std::vector<table_info> parse_table_infos(const sstring& ks_name, const http_context& ctx, const std::unordered_map<sstring, sstring>& query_params, sstring param_name) {
auto it = query_params.find(param_name);
return parse_table_infos(ks_name, ctx, it != query_params.end() ? it->second : "");
}
static ss::token_range token_range_endpoints_to_json(const dht::token_range_endpoints& d) {
@@ -183,6 +201,16 @@ static ss::token_range token_range_endpoints_to_json(const dht::token_range_endp
return r;
}
using ks_cf_func = std::function<future<json::json_return_type>(http_context&, std::unique_ptr<http::request>, sstring, std::vector<table_info>)>;
static auto wrap_ks_cf(http_context &ctx, ks_cf_func f) {
return [&ctx, f = std::move(f)](std::unique_ptr<http::request> req) {
auto keyspace = validate_keyspace(ctx, req);
auto table_infos = parse_table_infos(keyspace, ctx, req->query_parameters, "cf");
return f(ctx, std::move(req), std::move(keyspace), std::move(table_infos));
};
}
seastar::future<json::json_return_type> run_toppartitions_query(db::toppartitions_query& q, http_context &ctx, bool legacy_request) {
return q.scatter().then([&q, legacy_request] {
return sleep(q.duration()).then([&q, legacy_request] {
@@ -225,7 +253,7 @@ future<scrub_info> parse_scrub_options(const http_context& ctx, sharded<db::snap
});
rp.process(*req);
info.keyspace = validate_keyspace(ctx, *rp.get("keyspace"));
info.column_families = parse_table_infos(info.keyspace, ctx, *rp.get("cf")) | std::views::transform([] (auto ti) { return ti.name; }) | std::ranges::to<std::vector>();
info.column_families = parse_tables(info.keyspace, ctx, *rp.get("cf"));
auto scrub_mode_opt = rp.get("scrub_mode");
auto scrub_mode = sstables::compaction_type_options::scrub::mode::abort;
@@ -250,9 +278,11 @@ future<scrub_info> parse_scrub_options(const http_context& ctx, sharded<db::snap
}
}
if (!req_param<bool>(*req, "disable_snapshot", false) && !info.column_families.empty()) {
if (!req_param<bool>(*req, "disable_snapshot", false)) {
auto tag = format("pre-scrub-{:d}", db_clock::now().time_since_epoch().count());
co_await snap_ctl.local().take_column_family_snapshot(info.keyspace, info.column_families, tag, db::snapshot_ctl::skip_flush::no);
co_await coroutine::parallel_for_each(info.column_families, [&snap_ctl, keyspace = info.keyspace, tag](sstring cf) {
return snap_ctl.local().take_column_family_snapshot(keyspace, cf, tag, db::snapshot_ctl::skip_flush::no);
});
}
info.opts = {
@@ -453,26 +483,17 @@ void set_sstables_loader(http_context& ctx, routes& r, sharded<sstables_loader>&
auto cf = req->get_query_param("cf");
auto stream = req->get_query_param("load_and_stream");
auto primary_replica = req->get_query_param("primary_replica_only");
auto skip_cleanup_p = req->get_query_param("skip_cleanup");
boost::algorithm::to_lower(stream);
boost::algorithm::to_lower(primary_replica);
bool load_and_stream = stream == "true" || stream == "1";
bool primary_replica_only = primary_replica == "true" || primary_replica == "1";
bool skip_cleanup = skip_cleanup_p == "true" || skip_cleanup_p == "1";
auto scope = parse_stream_scope(req->get_query_param("scope"));
auto skip_reshape_p = req->get_query_param("skip_reshape");
auto skip_reshape = skip_reshape_p == "true" || skip_reshape_p == "1";
if (scope != sstables_loader::stream_scope::all && !load_and_stream) {
throw httpd::bad_param_exception("scope takes no effect without load-and-stream");
}
// No need to add the keyspace, since all we want is to avoid always sending this to the same
// CPU. Even then I am being overzealous here. This is not something that happens all the time.
auto coordinator = std::hash<sstring>()(cf) % smp::count;
return sst_loader.invoke_on(coordinator,
[ks = std::move(ks), cf = std::move(cf),
load_and_stream, primary_replica_only, skip_cleanup, skip_reshape, scope] (sstables_loader& loader) {
return loader.load_new_sstables(ks, cf, load_and_stream, primary_replica_only, skip_cleanup, skip_reshape, scope);
load_and_stream, primary_replica_only] (sstables_loader& loader) {
return loader.load_new_sstables(ks, cf, load_and_stream, primary_replica_only, sstables_loader::stream_scope::all);
}).then_wrapped([] (auto&& f) {
if (f.failed()) {
auto msg = fmt::format("Failed to load new sstables: {}", f.get_exception());
@@ -704,7 +725,7 @@ rest_get_load(http_context& ctx, std::unique_ptr<http::request> req) {
static
future<json::json_return_type>
rest_get_current_generation_number(sharded<service::storage_service>& ss, std::unique_ptr<http::request> req) {
auto ep = ss.local().get_token_metadata().get_topology().my_host_id();
auto ep = ss.local().get_token_metadata().get_topology().my_address();
return ss.local().gossiper().get_current_generation_number(ep).then([](gms::generation_type res) {
return make_ready_future<json::json_return_type>(res.value());
});
@@ -747,7 +768,13 @@ rest_force_compaction(http_context& ctx, std::unique_ptr<http::request> req) {
fmopt = flush_mode::skip;
}
auto task = co_await compaction_module.make_and_start_task<global_major_compaction_task_impl>({}, db, fmopt, consider_only_existing_data);
co_await task->done();
try {
co_await task->done();
} catch (...) {
apilog.error("force_compaction failed: {}", std::current_exception());
throw;
}
co_return json_void();
}
@@ -774,7 +801,13 @@ rest_force_keyspace_compaction(http_context& ctx, std::unique_ptr<http::request>
fmopt = flush_mode::skip;
}
auto task = co_await compaction_module.make_and_start_task<major_keyspace_compaction_task_impl>({}, std::move(keyspace), tasks::task_id::create_null_id(), db, table_infos, fmopt, consider_only_existing_data);
co_await task->done();
try {
co_await task->done();
} catch (...) {
apilog.error("force_keyspace_compaction: keyspace={} tables={} failed: {}", task->get_status().keyspace, table_infos, std::current_exception());
throw;
}
co_return json_void();
}
@@ -782,7 +815,8 @@ static
future<json::json_return_type>
rest_force_keyspace_cleanup(http_context& ctx, sharded<service::storage_service>& ss, std::unique_ptr<http::request> req) {
auto& db = ctx.db;
auto [keyspace, table_infos] = parse_table_infos(ctx, *req);
auto keyspace = validate_keyspace(ctx, req);
auto table_infos = parse_table_infos(keyspace, ctx, req->query_parameters, "cf");
const auto& rs = db.local().find_keyspace(keyspace).get_replication_strategy();
if (rs.get_type() == locator::replication_strategy_type::local || !rs.is_vnode_based()) {
auto reason = rs.get_type() == locator::replication_strategy_type::local ? "require" : "support";
@@ -799,7 +833,13 @@ rest_force_keyspace_cleanup(http_context& ctx, sharded<service::storage_service>
auto& compaction_module = db.local().get_compaction_manager().get_task_manager_module();
auto task = co_await compaction_module.make_and_start_task<cleanup_keyspace_compaction_task_impl>(
{}, std::move(keyspace), db, table_infos, flush_mode::all_tables, tasks::is_user_task::yes);
co_await task->done();
try {
co_await task->done();
} catch (...) {
apilog.error("force_keyspace_cleanup: keyspace={} tables={} failed: {}", task->get_status().keyspace, table_infos, std::current_exception());
throw;
}
co_return json::json_return_type(0);
}
@@ -821,34 +861,49 @@ rest_cleanup_all(http_context& ctx, sharded<service::storage_service>& ss, std::
auto& db = ctx.db;
auto& compaction_module = db.local().get_compaction_manager().get_task_manager_module();
auto task = co_await compaction_module.make_and_start_task<global_cleanup_compaction_task_impl>({}, db);
co_await task->done();
try {
co_await task->done();
} catch (...) {
apilog.error("cleanup_all failed: {}", std::current_exception());
throw;
}
co_return json::json_return_type(0);
}
static
future<json::json_return_type>
rest_perform_keyspace_offstrategy_compaction(http_context& ctx, std::unique_ptr<http::request> req) {
auto [keyspace, table_infos] = parse_table_infos(ctx, *req);
rest_perform_keyspace_offstrategy_compaction(http_context& ctx, std::unique_ptr<http::request> req, sstring keyspace, std::vector<table_info> table_infos) {
apilog.info("perform_keyspace_offstrategy_compaction: keyspace={} tables={}", keyspace, table_infos);
bool res = false;
auto& compaction_module = ctx.db.local().get_compaction_manager().get_task_manager_module();
auto task = co_await compaction_module.make_and_start_task<offstrategy_keyspace_compaction_task_impl>({}, std::move(keyspace), ctx.db, table_infos, &res);
co_await task->done();
try {
co_await task->done();
} catch (...) {
apilog.error("perform_keyspace_offstrategy_compaction: keyspace={} tables={} failed: {}", task->get_status().keyspace, table_infos, std::current_exception());
throw;
}
co_return json::json_return_type(res);
}
static
future<json::json_return_type>
rest_upgrade_sstables(http_context& ctx, std::unique_ptr<http::request> req) {
rest_upgrade_sstables(http_context& ctx, std::unique_ptr<http::request> req, sstring keyspace, std::vector<table_info> table_infos) {
auto& db = ctx.db;
auto [keyspace, table_infos] = parse_table_infos(ctx, *req);
bool exclude_current_version = req_param<bool>(*req, "exclude_current_version", false);
apilog.info("upgrade_sstables: keyspace={} tables={} exclude_current_version={}", keyspace, table_infos, exclude_current_version);
auto& compaction_module = db.local().get_compaction_manager().get_task_manager_module();
auto task = co_await compaction_module.make_and_start_task<upgrade_sstables_compaction_task_impl>({}, std::move(keyspace), db, table_infos, exclude_current_version);
co_await task->done();
try {
co_await task->done();
} catch (...) {
apilog.error("upgrade_sstables: keyspace={} tables={} failed: {}", keyspace, table_infos, std::current_exception());
throw;
}
co_return json::json_return_type(0);
}
@@ -865,10 +920,15 @@ rest_force_flush(http_context& ctx, std::unique_ptr<http::request> req) {
static
future<json::json_return_type>
rest_force_keyspace_flush(http_context& ctx, std::unique_ptr<http::request> req) {
auto [keyspace, table_infos] = parse_table_infos(ctx, *req);
apilog.info("perform_keyspace_flush: keyspace={} tables={}", keyspace, table_infos);
auto keyspace = validate_keyspace(ctx, req);
auto column_families = parse_tables(keyspace, ctx, req->query_parameters, "cf");
apilog.info("perform_keyspace_flush: keyspace={} tables={}", keyspace, column_families);
auto& db = ctx.db;
co_await replica::database::flush_tables_on_all_shards(db, std::move(table_infos));
if (column_families.empty()) {
co_await replica::database::flush_keyspace_on_all_shards(db, keyspace);
} else {
co_await replica::database::flush_tables_on_all_shards(db, keyspace, std::move(column_families));
}
co_return json_void();
}
@@ -1388,95 +1448,6 @@ rest_get_effective_ownership(http_context& ctx, sharded<service::storage_service
});
}
static
future<json::json_return_type>
rest_estimate_compression_ratios(http_context& ctx, sharded<service::storage_service>& ss, std::unique_ptr<http::request> req) {
if (!ss.local().get_feature_service().sstable_compression_dicts) {
apilog.warn("estimate_compression_ratios: called before the cluster feature was enabled");
throw std::runtime_error("estimate_compression_ratios requires all nodes to support the SSTABLE_COMPRESSION_DICTS cluster feature");
}
auto ticket = get_units(ss.local().get_do_sample_sstables_concurrency_limiter(), 1);
auto ks = api::req_param<sstring>(*req, "keyspace", {}).value;
auto cf = api::req_param<sstring>(*req, "cf", {}).value;
apilog.debug("estimate_compression_ratios: called with ks={} cf={}", ks, cf);
auto s = ctx.db.local().find_column_family(ks, cf).schema();
auto training_sample = co_await ss.local().do_sample_sstables(s->id(), 4096, 4096);
auto validation_sample = co_await ss.local().do_sample_sstables(s->id(), 16*1024, 1024);
apilog.debug("estimate_compression_ratios: got training sample with {} blocks and validation sample with {}", training_sample.size(), validation_sample.size());
auto dict = co_await ss.local().train_dict(std::move(training_sample));
apilog.debug("estimate_compression_ratios: got dict of size {}", dict.size());
std::vector<ss::compression_config_result> res;
auto make_result = [](std::string_view name, int chunk_length_kb, std::string_view dict, int level, float ratio) -> ss::compression_config_result {
ss::compression_config_result x;
x.sstable_compression = sstring(name);
x.chunk_length_in_kb = chunk_length_kb;
x.dict = sstring(dict);
x.level = level;
x.ratio = ratio;
return x;
};
using algorithm = compression_parameters::algorithm;
for (const auto& algo : {algorithm::lz4_with_dicts, algorithm::zstd_with_dicts}) {
for (const auto& chunk_size_kb : {1, 4, 16}) {
std::vector<int> levels;
if (algo == compressor::algorithm::zstd_with_dicts) {
for (int i = 1; i <= 5; ++i) {
levels.push_back(i);
}
} else {
levels.push_back(1);
}
for (auto level : levels) {
auto algo_name = compression_parameters::algorithm_to_name(algo);
auto m = std::map<sstring, sstring>{
{compression_parameters::CHUNK_LENGTH_KB, std::to_string(chunk_size_kb)},
{compression_parameters::SSTABLE_COMPRESSION, sstring(algo_name)},
};
if (algo == compressor::algorithm::zstd_with_dicts) {
m.insert(decltype(m)::value_type{sstring("compression_level"), sstring(std::to_string(level))});
}
auto params = compression_parameters(std::move(m));
auto ratio_with_no_dict = co_await try_one_compression_config({}, s, params, validation_sample);
auto ratio_with_past_dict = co_await try_one_compression_config(ctx.db.local().get_user_sstables_manager().get_compressor_factory(), s, params, validation_sample);
auto ratio_with_future_dict = co_await try_one_compression_config(dict, s, params, validation_sample);
res.push_back(make_result(algo_name, chunk_size_kb, "none", level, ratio_with_no_dict));
res.push_back(make_result(algo_name, chunk_size_kb, "past", level, ratio_with_past_dict));
res.push_back(make_result(algo_name, chunk_size_kb, "future", level, ratio_with_future_dict));
}
}
}
co_return res;
}
static
future<json::json_return_type>
rest_retrain_dict(http_context& ctx, sharded<service::storage_service>& ss, service::raft_group0_client& group0_client, std::unique_ptr<http::request> req) {
if (!ss.local().get_feature_service().sstable_compression_dicts) {
apilog.warn("retrain_dict: called before the cluster feature was enabled");
throw std::runtime_error("retrain_dict requires all nodes to support the SSTABLE_COMPRESSION_DICTS cluster feature");
}
auto ticket = get_units(ss.local().get_do_sample_sstables_concurrency_limiter(), 1);
auto ks = api::req_param<sstring>(*req, "keyspace", {}).value;
auto cf = api::req_param<sstring>(*req, "cf", {}).value;
apilog.debug("retrain_dict: called with ks={} cf={}", ks, cf);
const auto t_id = ctx.db.local().find_column_family(ks, cf).schema()->id();
constexpr uint64_t chunk_size = 4096;
constexpr uint64_t n_chunks = 4096;
auto sample = co_await ss.local().do_sample_sstables(t_id, chunk_size, n_chunks);
apilog.debug("retrain_dict: got sample with {} blocks", sample.size());
auto dict = co_await ss.local().train_dict(std::move(sample));
apilog.debug("retrain_dict: got dict of size {}", dict.size());
co_await ss.local().publish_new_sstable_dict(t_id, dict, group0_client);
apilog.debug("retrain_dict: published new dict");
co_return json_void();
}
static
future<json::json_return_type>
rest_sstable_info(http_context& ctx, std::unique_ptr<http::request> req) {
@@ -1538,23 +1509,21 @@ rest_sstable_info(http_context& ctx, std::unique_ptr<http::request> req) {
info.version = sstable->get_version();
if (sstable->has_component(sstables::component_type::CompressionInfo)) {
const auto& cp = sstable->get_compression().get_compressor();
auto& c = sstable->get_compression();
auto cp = sstables::get_sstable_compressor(c);
ss::named_maps nm;
nm.group = "compression_parameters";
for (auto& p : cp.options()) {
if (compressor::is_hidden_option_name(p.first)) {
continue;
}
for (auto& p : cp->options()) {
ss::mapper e;
e.key = p.first;
e.value = p.second;
nm.attributes.push(std::move(e));
}
if (!cp.options().contains(compression_parameters::SSTABLE_COMPRESSION)) {
if (!cp->options().contains(compression_parameters::SSTABLE_COMPRESSION)) {
ss::mapper e;
e.key = compression_parameters::SSTABLE_COMPRESSION;
e.value = sstring(cp.name());
e.value = cp->name();
nm.attributes.push(std::move(e));
}
info.extended_properties.push(std::move(nm));
@@ -1641,18 +1610,6 @@ rest_raft_topology_upgrade_status(sharded<service::storage_service>& ss, std::un
co_return sstring(format("{}", ustate));
}
static
future<json::json_return_type>
rest_raft_topology_get_cmd_status(sharded<service::storage_service>& ss, std::unique_ptr<http::request> req) {
const auto status = co_await ss.invoke_on(0, [] (auto& ss) {
return ss.get_topology_cmd_status();
});
if (status.active_dst.empty()) {
co_return sstring("none");
}
co_return sstring(fmt::format("{}[{}]: {}", status.current, status.index, fmt::join(status.active_dst, ",")));
}
static
future<json::json_return_type>
rest_move_tablet(http_context& ctx, sharded<service::storage_service>& ss, std::unique_ptr<http::request> req) {
@@ -1683,7 +1640,7 @@ rest_add_tablet_replica(http_context& ctx, sharded<service::storage_service>& ss
auto token = dht::token::from_int64(validate_int(req->get_query_param("token")));
auto ks = req->get_query_param("ks");
auto table = req->get_query_param("table");
auto table_id = validate_table(ctx.db.local(), ks, table);
auto table_id = ctx.db.local().find_column_family(ks, table).schema()->id();
auto force_str = req->get_query_param("force");
auto force = service::loosen_constraints(force_str == "" ? false : validate_bool(force_str));
@@ -1702,7 +1659,7 @@ rest_del_tablet_replica(http_context& ctx, sharded<service::storage_service>& ss
auto token = dht::token::from_int64(validate_int(req->get_query_param("token")));
auto ks = req->get_query_param("ks");
auto table = req->get_query_param("table");
auto table_id = validate_table(ctx.db.local(), ks, table);
auto table_id = ctx.db.local().find_column_family(ks, table).schema()->id();
auto force_str = req->get_query_param("force");
auto force = service::loosen_constraints(force_str == "" ? false : validate_bool(force_str));
@@ -1810,6 +1767,12 @@ rest_bind(FuncType func, BindArgs&... args) {
return std::bind_front(func, std::ref(args)...);
}
static
seastar::httpd::future_json_function
rest_bind(ks_cf_func func, http_context& ctx) {
return wrap_ks_cf(ctx, func);
}
void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_service>& ss, service::raft_group0_client& group0_client) {
ss::get_token_endpoint.set(r, rest_bind(rest_get_token_endpoint, ctx, ss));
ss::toppartitions_generic.set(r, rest_bind(rest_toppartitions_generic, ctx));
@@ -1878,13 +1841,10 @@ void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_
ss::get_total_hints.set(r, rest_bind(rest_get_total_hints));
ss::get_ownership.set(r, rest_bind(rest_get_ownership, ctx, ss));
ss::get_effective_ownership.set(r, rest_bind(rest_get_effective_ownership, ctx, ss));
ss::retrain_dict.set(r, rest_bind(rest_retrain_dict, ctx, ss, group0_client));
ss::estimate_compression_ratios.set(r, rest_bind(rest_estimate_compression_ratios, ctx, ss));
ss::sstable_info.set(r, rest_bind(rest_sstable_info, ctx));
ss::reload_raft_topology_state.set(r, rest_bind(rest_reload_raft_topology_state, ss, group0_client));
ss::upgrade_to_raft_topology.set(r, rest_bind(rest_upgrade_to_raft_topology, ss));
ss::raft_topology_upgrade_status.set(r, rest_bind(rest_raft_topology_upgrade_status, ss));
ss::raft_topology_get_cmd_status.set(r, rest_bind(rest_raft_topology_get_cmd_status, ss));
ss::move_tablet.set(r, rest_bind(rest_move_tablet, ctx, ss));
ss::add_tablet_replica.set(r, rest_bind(rest_add_tablet_replica, ctx, ss));
ss::del_tablet_replica.set(r, rest_bind(rest_del_tablet_replica, ctx, ss));
@@ -1966,7 +1926,6 @@ void unset_storage_service(http_context& ctx, routes& r) {
ss::reload_raft_topology_state.unset(r);
ss::upgrade_to_raft_topology.unset(r);
ss::raft_topology_upgrade_status.unset(r);
ss::raft_topology_get_cmd_status.unset(r);
ss::move_tablet.unset(r);
ss::add_tablet_replica.unset(r);
ss::del_tablet_replica.unset(r);

View File

@@ -52,11 +52,10 @@ table_id validate_table(const replica::database& db, sstring ks_name, sstring ta
// containing the description of the respective no_such_column_family error.
// Returns a vector of all table infos given by the parameter, or
// if the parameter is not found or is empty, returns a list of all table infos in the keyspace.
std::vector<table_info> parse_table_infos(const sstring& ks_name, const http_context& ctx, const std::unordered_map<sstring, sstring>& query_params, sstring param_name);
std::vector<table_info> parse_table_infos(const sstring& ks_name, const http_context& ctx, sstring value);
std::pair<sstring, std::vector<table_info>> parse_table_infos(const http_context& ctx, const http::request& req, sstring cf_param_name = "cf");
struct scrub_info {
sstables::compaction_type_options::scrub opts;
sstring keyspace;

View File

@@ -31,7 +31,8 @@ using ks_cf_func = std::function<future<json::json_return_type>(http_context&, s
static auto wrap_ks_cf(http_context &ctx, ks_cf_func f) {
return [&ctx, f = std::move(f)](std::unique_ptr<http::request> req) {
auto [keyspace, table_infos] = parse_table_infos(ctx, *req);
auto keyspace = validate_keyspace(ctx, req);
auto table_infos = parse_table_infos(keyspace, ctx, req->query_parameters, "cf");
return f(ctx, std::move(req), std::move(keyspace), std::move(table_infos));
};
}
@@ -62,7 +63,8 @@ void set_tasks_compaction_module(http_context& ctx, routes& r, sharded<service::
t::force_keyspace_cleanup_async.set(r, [&ctx, &ss](std::unique_ptr<http::request> req) -> future<json::json_return_type> {
auto& db = ctx.db;
auto [keyspace, table_infos] = parse_table_infos(ctx, *req);
auto keyspace = validate_keyspace(ctx, req);
auto table_infos = parse_table_infos(keyspace, ctx, req->query_parameters, "cf");
apilog.info("force_keyspace_cleanup_async: keyspace={} tables={}", keyspace, table_infos);
if (!co_await ss.local().is_cleanup_allowed(keyspace)) {
auto msg = "Can not perform cleanup operation when topology changes";

View File

@@ -74,9 +74,6 @@ void set_token_metadata(http_context& ctx, routes& r, sharded<locator::shared_to
});
ss::get_host_id_map.set(r, [&tm, &g](const_req req) {
if (!g.local().is_enabled()) {
throw std::runtime_error("The gossiper is not ready yet");
}
std::vector<ss::mapper> res;
auto map = tm.local().get()->get_host_ids() |
std::views::transform([&g] (locator::host_id id) { return std::make_pair(g.local().get_address_map().get(id), id); }) |

View File

@@ -33,6 +33,20 @@ namespace audit {
namespace {
future<> syslog_send_helper(net::datagram_channel& sender,
const socket_address& address,
const sstring& msg) {
return sender.send(address, net::packet{msg.data(), msg.size()}).handle_exception([address](auto&& exception_ptr) {
auto error_msg = seastar::format(
"Syslog audit backend failed (sending a message to {} resulted in {}).",
address,
exception_ptr
);
logger.error("{}", error_msg);
throw audit_exception(std::move(error_msg));
});
}
static auto syslog_address_helper(const db::config& cfg)
{
return cfg.audit_unix_socket_path.is_set()
@@ -40,40 +54,11 @@ static auto syslog_address_helper(const db::config& cfg)
: unix_domain_addr(_PATH_LOG);
}
static std::string json_escape(std::string_view str) {
std::string result;
result.reserve(str.size() * 1.2);
for (auto c : str) {
if (c == '"' || c == '\\') {
result.push_back('\\');
}
result.push_back(c);
}
return result;
}
}
future<> audit_syslog_storage_helper::syslog_send_helper(const sstring& msg) {
try {
auto lock = co_await get_units(_semaphore, 1, std::chrono::hours(1));
co_await _sender.send(_syslog_address, net::packet{msg.data(), msg.size()});
}
catch (const std::exception& e) {
auto error_msg = seastar::format(
"Syslog audit backend failed (sending a message to {} resulted in {}).",
_syslog_address,
e
);
logger.error("{}", error_msg);
throw audit_exception(std::move(error_msg));
}
}
audit_syslog_storage_helper::audit_syslog_storage_helper(cql3::query_processor& qp, service::migration_manager&) :
_syslog_address(syslog_address_helper(qp.db().get_config())),
_sender(make_unbound_datagram_channel(AF_UNIX)),
_semaphore(1) {
_sender(make_unbound_datagram_channel(AF_UNIX)) {
}
audit_syslog_storage_helper::~audit_syslog_storage_helper() {
@@ -88,10 +73,10 @@ audit_syslog_storage_helper::~audit_syslog_storage_helper() {
*/
future<> audit_syslog_storage_helper::start(const db::config& cfg) {
if (this_shard_id() != 0) {
co_return;
return make_ready_future();
}
co_await syslog_send_helper("Initializing syslog audit backend.");
return syslog_send_helper(_sender, _syslog_address, "Initializing syslog audit backend.");
}
future<> audit_syslog_storage_helper::stop() {
@@ -108,7 +93,7 @@ future<> audit_syslog_storage_helper::write(const audit_info* audit_info,
auto now = std::chrono::system_clock::to_time_t(std::chrono::system_clock::now());
tm time;
localtime_r(&now, &time);
sstring msg = seastar::format(R"(<{}>{:%h %e %T} scylla-audit: node="{}" category="{}" cl="{}" error="{}" keyspace="{}" query="{}" client_ip="{}" table="{}" username="{}")",
sstring msg = seastar::format("<{}>{:%h %e %T} scylla-audit: \"{}\", \"{}\", \"{}\", \"{}\", \"{}\", \"{}\", \"{}\", \"{}\", \"{}\"",
LOG_NOTICE | LOG_USER,
time,
node_ip,
@@ -116,12 +101,12 @@ future<> audit_syslog_storage_helper::write(const audit_info* audit_info,
cl,
(error ? "true" : "false"),
audit_info->keyspace(),
json_escape(audit_info->query()),
audit_info->query(),
client_ip,
audit_info->table(),
username);
co_await syslog_send_helper(msg);
return syslog_send_helper(_sender, _syslog_address, msg);
}
future<> audit_syslog_storage_helper::write_login(const sstring& username,
@@ -132,15 +117,15 @@ future<> audit_syslog_storage_helper::write_login(const sstring& username,
auto now = std::chrono::system_clock::to_time_t(std::chrono::system_clock::now());
tm time;
localtime_r(&now, &time);
sstring msg = seastar::format(R"(<{}>{:%h %e %T} scylla-audit: node="{}", category="AUTH", cl="", error="{}", keyspace="", query="", client_ip="{}", table="", username="{}")",
sstring msg = seastar::format("<{}>{:%h %e %T} scylla-audit: \"{}\", \"AUTH\", \"\", \"\", \"\", \"\", \"{}\", \"{}\", \"{}\"",
LOG_NOTICE | LOG_USER,
time,
node_ip,
(error ? "true" : "false"),
client_ip,
username);
username,
(error ? "true" : "false"));
co_await syslog_send_helper(msg.c_str());
co_await syslog_send_helper(_sender, _syslog_address, msg.c_str());
}
using registry = class_registrator<storage_helper, audit_syslog_storage_helper, cql3::query_processor&, service::migration_manager&>;

View File

@@ -24,9 +24,6 @@ namespace audit {
class audit_syslog_storage_helper : public storage_helper {
socket_address _syslog_address;
net::datagram_channel _sender;
seastar::semaphore _semaphore;
future<> syslog_send_helper(const sstring& msg);
public:
explicit audit_syslog_storage_helper(cql3::query_processor&, service::migration_manager&);
virtual ~audit_syslog_storage_helper();

View File

@@ -119,11 +119,6 @@ future<> create_legacy_metadata_table_if_missing(
return qs;
}
::service::raft_timeout get_raft_timeout() noexcept {
auto dur = internal_distributed_query_state().get_client_state().get_timeout_config().other_timeout;
return ::service::raft_timeout{.value = lowres_clock::now() + dur};
}
static future<> announce_mutations_with_guard(
::service::raft_group0_client& group0_client,
std::vector<canonical_mutation> muts,

View File

@@ -17,7 +17,6 @@
#include "types/types.hh"
#include "service/raft/raft_group0_client.hh"
#include "timeout_config.hh"
using namespace std::chrono_literals;
@@ -78,8 +77,6 @@ future<> create_legacy_metadata_table_if_missing(
///
::service::query_state& internal_distributed_query_state() noexcept;
::service::raft_timeout get_raft_timeout() noexcept;
// Execute update query via group0 mechanism, mutations will be applied on all nodes.
// Use this function when need to perform read before write on a single guard or if
// you have more than one mutation and potentially exceed single command size limit.

View File

@@ -338,7 +338,8 @@ future<std::vector<cql3::description>> ldap_role_manager::describe_role_grants()
}
future<> ldap_role_manager::ensure_superuser_is_created() {
return _std_mgr.ensure_superuser_is_created();
// ldap is responsible for users
co_return;
}
} // namespace auth

View File

@@ -117,8 +117,7 @@ future<> password_authenticator::migrate_legacy_metadata() const {
});
}
future<> password_authenticator::legacy_create_default_if_missing() {
SCYLLA_ASSERT(legacy_mode(_qp));
future<> password_authenticator::create_default_if_missing() {
const auto exists = co_await default_role_row_satisfies(_qp, &has_salted_hash, _superuser);
if (exists) {
co_return;
@@ -128,75 +127,18 @@ future<> password_authenticator::legacy_create_default_if_missing() {
salted_pwd = passwords::hash(DEFAULT_USER_PASSWORD, rng_for_salt);
}
const auto query = update_row_query();
co_await _qp.execute_internal(
if (legacy_mode(_qp)) {
co_await _qp.execute_internal(
query,
db::consistency_level::QUORUM,
internal_distributed_query_state(),
{salted_pwd, _superuser},
cql3::query_processor::cache_internal::no);
plogger.info("Created default superuser authentication record.");
}
future<> password_authenticator::maybe_create_default_password() {
auto needs_password = [this] () -> future<bool> {
const sstring query = seastar::format("SELECT * FROM {}.{} WHERE is_superuser = true ALLOW FILTERING", get_auth_ks_name(_qp), meta::roles_table::name);
auto results = co_await _qp.execute_internal(query,
db::consistency_level::LOCAL_ONE,
internal_distributed_query_state(), cql3::query_processor::cache_internal::yes);
// Don't add default password if
// - there is no default superuser
// - there is a superuser with a password.
bool has_default = false;
bool has_superuser_with_password = false;
for (auto& result : *results) {
if (result.get_as<sstring>(meta::roles_table::role_col_name) == _superuser) {
has_default = true;
}
if (has_salted_hash(result)) {
has_superuser_with_password = true;
}
}
co_return has_default && !has_superuser_with_password;
};
if (!co_await needs_password()) {
co_return;
}
// We don't want to start operation earlier to avoid quorum requirement in
// a common case.
::service::group0_batch batch(
co_await _group0_client.start_operation(_as, get_raft_timeout()));
// Check again as the state may have changed before we took the guard (batch).
if (!co_await needs_password()) {
co_return;
}
// Set default superuser's password.
std::string salted_pwd(get_config_value(_qp.db().get_config().auth_superuser_salted_password(), ""));
if (salted_pwd.empty()) {
salted_pwd = passwords::hash(DEFAULT_USER_PASSWORD, rng_for_salt);
}
const auto update_query = update_row_query();
co_await collect_mutations(_qp, batch, update_query, {salted_pwd, _superuser});
co_await std::move(batch).commit(_group0_client, _as, get_raft_timeout());
plogger.info("Created default superuser authentication record.");
}
future<> password_authenticator::maybe_create_default_password_with_retries() {
size_t retries = _migration_manager.get_concurrent_ddl_retries();
while (true) {
try {
co_return co_await maybe_create_default_password();
} catch (const ::service::group0_concurrent_modification& ex) {
plogger.warn("Failed to execute maybe_create_default_password due to guard conflict.{}.", retries ? " Retrying" : " Number of retries exceeded, giving up");
if (retries--) {
continue;
}
// Log error but don't crash the whole node startup sequence.
plogger.error("Failed to create default superuser password due to guard conflict.");
co_return;
} catch (const ::service::raft_operation_timeout_error& ex) {
plogger.error("Failed to create default superuser password due to exception: {}", ex.what());
co_return;
}
plogger.info("Created default superuser authentication record.");
} else {
co_await announce_mutations(_qp, _group0_client, query,
{salted_pwd, _superuser}, _as, ::service::raft_timeout{});
plogger.info("Created default superuser authentication record.");
}
}
@@ -222,14 +164,11 @@ future<> password_authenticator::start() {
migrate_legacy_metadata().get();
return;
}
legacy_create_default_if_missing().get();
}
utils::get_local_injector().inject("password_authenticator_start_pause", utils::wait_for_message(5min)).get();
create_default_if_missing().get();
if (!legacy_mode(_qp)) {
maybe_create_default_password_with_retries().get();
if (!_superuser_created_promise.available()) {
_superuser_created_promise.set_value();
}
_superuser_created_promise.set_value();
}
});
});

View File

@@ -41,7 +41,7 @@ class password_authenticator : public authenticator {
::service::migration_manager& _migration_manager;
future<> _stopped;
abort_source _as;
std::string _superuser; // default superuser name from the config (may or may not be present in roles table)
std::string _superuser;
shared_promise<> _superuser_created_promise;
public:
@@ -89,10 +89,7 @@ private:
future<> migrate_legacy_metadata() const;
future<> legacy_create_default_if_missing();
future<> maybe_create_default_password();
future<> maybe_create_default_password_with_retries();
future<> create_default_if_missing();
sstring update_row_query() const;
};

View File

@@ -240,13 +240,6 @@ future<> service::start(::service::migration_manager& mm, db::system_keyspace& s
});
}
co_await _role_manager->start();
if (this_shard_id() == 0) {
// Role manager and password authenticator have this odd startup
// mechanism where they asynchronously create the superuser role
// in the background. Correct password creation depends on role
// creation therefore we need to wait here.
co_await _role_manager->ensure_superuser_is_created();
}
co_await when_all_succeed(_authorizer->start(), _authenticator->start()).discard_result();
_permissions_cache = std::make_unique<permissions_cache>(_loading_cache_config, *this, log);
co_await once_among_shards([this] {

View File

@@ -9,7 +9,6 @@
#include "auth/standard_role_manager.hh"
#include <optional>
#include <stdexcept>
#include <unordered_set>
#include <vector>
@@ -29,7 +28,6 @@
#include "cql3/util.hh"
#include "db/consistency_level_type.hh"
#include "exceptions/exceptions.hh"
#include "utils/error_injection.hh"
#include "utils/log.hh"
#include <seastar/core/loop.hh>
#include <seastar/coroutine/maybe_yield.hh>
@@ -180,8 +178,7 @@ future<> standard_role_manager::create_legacy_metadata_tables_if_missing() const
_migration_manager)).discard_result();
}
future<> standard_role_manager::legacy_create_default_role_if_missing() {
SCYLLA_ASSERT(legacy_mode(_qp));
future<> standard_role_manager::create_default_role_if_missing() {
try {
const auto exists = co_await default_role_row_satisfies(_qp, &has_can_login, _superuser);
if (exists) {
@@ -191,12 +188,16 @@ future<> standard_role_manager::legacy_create_default_role_if_missing() {
get_auth_ks_name(_qp),
meta::roles_table::name,
meta::roles_table::role_col_name);
co_await _qp.execute_internal(
query,
db::consistency_level::QUORUM,
internal_distributed_query_state(),
{_superuser},
cql3::query_processor::cache_internal::no).discard_result();
if (legacy_mode(_qp)) {
co_await _qp.execute_internal(
query,
db::consistency_level::QUORUM,
internal_distributed_query_state(),
{_superuser},
cql3::query_processor::cache_internal::no).discard_result();
} else {
co_await announce_mutations(_qp, _group0_client, query, {_superuser}, _as, ::service::raft_timeout{});
}
log.info("Created default superuser role '{}'.", _superuser);
} catch(const exceptions::unavailable_exception& e) {
log.warn("Skipped default role setup: some nodes were not ready; will retry");
@@ -204,60 +205,6 @@ future<> standard_role_manager::legacy_create_default_role_if_missing() {
}
}
future<> standard_role_manager::maybe_create_default_role() {
auto has_superuser = [this] () -> future<bool> {
const sstring query = seastar::format("SELECT * FROM {}.{} WHERE is_superuser = true ALLOW FILTERING", get_auth_ks_name(_qp), meta::roles_table::name);
auto results = co_await _qp.execute_internal(query, db::consistency_level::LOCAL_ONE,
internal_distributed_query_state(), cql3::query_processor::cache_internal::yes);
for (const auto& result : *results) {
if (has_can_login(result)) {
co_return true;
}
}
co_return false;
};
if (co_await has_superuser()) {
co_return;
}
// We don't want to start operation earlier to avoid quorum requirement in
// a common case.
::service::group0_batch batch(
co_await _group0_client.start_operation(_as, get_raft_timeout()));
// Check again as the state may have changed before we took the guard (batch).
if (co_await has_superuser()) {
co_return;
}
// There is no superuser which has can_login field - create default role.
// Note that we don't check if can_login is set to true.
const sstring insert_query = seastar::format("INSERT INTO {}.{} ({}, is_superuser, can_login) VALUES (?, true, true)",
get_auth_ks_name(_qp),
meta::roles_table::name,
meta::roles_table::role_col_name);
co_await collect_mutations(_qp, batch, insert_query, {_superuser});
co_await std::move(batch).commit(_group0_client, _as, get_raft_timeout());
log.info("Created default superuser role '{}'.", _superuser);
}
future<> standard_role_manager::maybe_create_default_role_with_retries() {
size_t retries = _migration_manager.get_concurrent_ddl_retries();
while (true) {
try {
co_return co_await maybe_create_default_role();
} catch (const ::service::group0_concurrent_modification& ex) {
log.warn("Failed to execute maybe_create_default_role due to guard conflict.{}.", retries ? " Retrying" : " Number of retries exceeded, giving up");
if (retries--) {
continue;
}
// Log error but don't crash the whole node startup sequence.
log.error("Failed to create default superuser role due to guard conflict.");
co_return;
} catch (const ::service::raft_operation_timeout_error& ex) {
log.error("Failed to create default superuser role due to exception: {}", ex.what());
co_return;
}
}
}
static const sstring legacy_table_name{"users"};
bool standard_role_manager::legacy_metadata_exists() {
@@ -319,13 +266,10 @@ future<> standard_role_manager::start() {
co_await migrate_legacy_metadata();
co_return;
}
co_await legacy_create_default_role_if_missing();
}
co_await create_default_role_if_missing();
if (!legacy) {
co_await maybe_create_default_role_with_retries();
if (!_superuser_created_promise.available()) {
_superuser_created_promise.set_value();
}
_superuser_created_promise.set_value();
}
};
@@ -675,12 +619,6 @@ future<role_set> standard_role_manager::query_all() {
// To avoid many copies of a view.
static const auto role_col_name_string = sstring(meta::roles_table::role_col_name);
if (utils::get_local_injector().enter("standard_role_manager_fail_legacy_query")) {
if (legacy_mode(_qp)) {
throw std::runtime_error("standard_role_manager::query_all: failed due to error injection");
}
}
const auto results = co_await _qp.execute_internal(
query,
db::consistency_level::QUORUM,

View File

@@ -95,10 +95,7 @@ private:
future<> migrate_legacy_metadata();
future<> legacy_create_default_role_if_missing();
future<> maybe_create_default_role();
future<> maybe_create_default_role_with_retries();
future<> create_default_role_if_missing();
future<> create_or_replace(std::string_view role_name, const role_config&, ::service::group0_batch&);

View File

@@ -35,9 +35,8 @@ inline bytes_view to_bytes_view(std::string_view view) {
}
struct fmt_hex {
std::span<const std::byte> v;
fmt_hex(const bytes_view& v) noexcept : v(std::as_bytes(std::span(v))) {}
fmt_hex(std::span<const std::byte> v) noexcept : v(v) {}
const bytes_view& v;
fmt_hex(const bytes_view& v) noexcept : v(v) {}
};
bytes from_hex(std::string_view s);

View File

@@ -23,10 +23,6 @@ class cdc_extension : public schema_extension {
public:
static constexpr auto NAME = "cdc";
// cdc_extension was written before schema_extension was deprecated, so support it
// without warnings
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wdeprecated-declarations"
cdc_extension() = default;
cdc_extension(const options& opts) : _cdc_options(opts) {}
explicit cdc_extension(std::map<sstring, sstring> tags) : _cdc_options(std::move(tags)) {}
@@ -34,7 +30,6 @@ public:
explicit cdc_extension(const sstring& s) {
throw std::logic_error("Cannot create cdc info from string");
}
#pragma clang diagnostic pop
bytes serialize() const override {
return ser::serialize_to_buffer<bytes>(_cdc_options.to_map());
}

View File

@@ -39,12 +39,12 @@
extern logging::logger cdc_log;
static int get_shard_count(const locator::host_id& endpoint, const gms::gossiper& g) {
static int get_shard_count(const gms::inet_address& endpoint, const gms::gossiper& g) {
auto ep_state = g.get_application_state_ptr(endpoint, gms::application_state::SHARD_COUNT);
return ep_state ? std::stoi(ep_state->value()) : -1;
}
static unsigned get_sharding_ignore_msb(const locator::host_id& endpoint, const gms::gossiper& g) {
static unsigned get_sharding_ignore_msb(const gms::inet_address& endpoint, const gms::gossiper& g) {
auto ep_state = g.get_application_state_ptr(endpoint, gms::application_state::IGNORE_MSB_BITS);
return ep_state ? std::stoi(ep_state->value()) : 0;
}
@@ -198,7 +198,7 @@ static std::vector<stream_id> create_stream_ids(
}
bool should_propose_first_generation(const locator::host_id& my_host_id, const gms::gossiper& g) {
return g.for_each_endpoint_state_until([&] (const gms::endpoint_state& eps) {
return g.for_each_endpoint_state_until([&] (const gms::inet_address&, const gms::endpoint_state& eps) {
return stop_iteration(my_host_id < eps.get_host_id());
}) == stop_iteration::no;
}
@@ -365,9 +365,6 @@ cdc::topology_description make_new_generation_description(
const noncopyable_function<std::pair<size_t, uint8_t>(dht::token)>& get_sharding_info,
const locator::token_metadata_ptr tmptr) {
const auto tokens = get_tokens(bootstrap_tokens, tmptr);
if (tokens.empty()) {
on_internal_error(cdc_log, "Attempted to create a CDC generation from an empty list of tokens");
}
utils::chunked_vector<token_range_description> vnode_descriptions;
vnode_descriptions.reserve(tokens.size());
@@ -405,8 +402,9 @@ future<cdc::generation_id> generation_service::legacy_make_new_generation(const
throw std::runtime_error(
format("Can't find endpoint for token {}", end));
}
auto sc = get_shard_count(*endpoint, _gossiper);
return {sc > 0 ? sc : 1, get_sharding_ignore_msb(*endpoint, _gossiper)};
const auto ep = _gossiper.get_address_map().get(*endpoint);
auto sc = get_shard_count(ep, _gossiper);
return {sc > 0 ? sc : 1, get_sharding_ignore_msb(ep, _gossiper)};
}
};
@@ -465,7 +463,7 @@ future<cdc::generation_id> generation_service::legacy_make_new_generation(const
* but if the cluster already supports CDC, then every newly joining node will propose a new CDC generation,
* which means it will gossip the generation's timestamp.
*/
static std::optional<cdc::generation_id> get_generation_id_for(const locator::host_id& endpoint, const gms::endpoint_state& eps) {
static std::optional<cdc::generation_id> get_generation_id_for(const gms::inet_address& endpoint, const gms::endpoint_state& eps) {
const auto* gen_id_ptr = eps.get_application_state_ptr(gms::application_state::CDC_GENERATION_ID);
if (!gen_id_ptr) {
return std::nullopt;
@@ -843,18 +841,18 @@ future<> generation_service::leave_ring() {
co_await _gossiper.unregister_(shared_from_this());
}
future<> generation_service::on_join(gms::inet_address ep, locator::host_id id, gms::endpoint_state_ptr ep_state, gms::permit_id pid) {
return on_change(ep, id, ep_state->get_application_state_map(), pid);
future<> generation_service::on_join(gms::inet_address ep, gms::endpoint_state_ptr ep_state, gms::permit_id pid) {
return on_change(ep, ep_state->get_application_state_map(), pid);
}
future<> generation_service::on_change(gms::inet_address ep, locator::host_id id, const gms::application_state_map& states, gms::permit_id pid) {
future<> generation_service::on_change(gms::inet_address ep, const gms::application_state_map& states, gms::permit_id pid) {
assert_shard_zero(__PRETTY_FUNCTION__);
if (_raft_topology_change_enabled()) {
return make_ready_future<>();
}
return on_application_state_change(ep, id, states, gms::application_state::CDC_GENERATION_ID, pid, [this] (gms::inet_address ep, locator::host_id id, const gms::versioned_value& v, gms::permit_id) {
return on_application_state_change(ep, states, gms::application_state::CDC_GENERATION_ID, pid, [this] (gms::inet_address ep, const gms::versioned_value& v, gms::permit_id) {
auto gen_id = gms::versioned_value::cdc_generation_id_from_string(v.value());
cdc_log.debug("Endpoint: {}, CDC generation ID change: {}", ep, gen_id);
@@ -869,8 +867,7 @@ future<> generation_service::check_and_repair_cdc_streams() {
}
std::optional<cdc::generation_id> latest = _gen_id;
_gossiper.for_each_endpoint_state([&] (const gms::endpoint_state& state) {
auto addr = state.get_host_id();
_gossiper.for_each_endpoint_state([&] (const gms::inet_address& addr, const gms::endpoint_state& state) {
if (_gossiper.is_left(addr)) {
cdc_log.info("check_and_repair_cdc_streams ignored node {} because it is in LEFT state", addr);
return;
@@ -1069,8 +1066,8 @@ future<> generation_service::legacy_scan_cdc_generations() {
assert_shard_zero(__PRETTY_FUNCTION__);
std::optional<cdc::generation_id> latest;
_gossiper.for_each_endpoint_state([&] (const gms::endpoint_state& eps) {
auto gen_id = get_generation_id_for(eps.get_host_id(), eps);
_gossiper.for_each_endpoint_state([&] (const gms::inet_address& node, const gms::endpoint_state& eps) {
auto gen_id = get_generation_id_for(node, eps);
if (!latest || (gen_id && get_ts(*gen_id) > get_ts(*latest))) {
latest = gen_id;
}

View File

@@ -110,8 +110,13 @@ public:
return _cdc_metadata;
}
virtual future<> on_join(gms::inet_address, locator::host_id id, gms::endpoint_state_ptr, gms::permit_id) override;
virtual future<> on_change(gms::inet_address, locator::host_id id, const gms::application_state_map&, gms::permit_id) override;
virtual future<> on_alive(gms::inet_address, gms::endpoint_state_ptr, gms::permit_id) override { return make_ready_future(); }
virtual future<> on_dead(gms::inet_address, gms::endpoint_state_ptr, gms::permit_id) override { return make_ready_future(); }
virtual future<> on_remove(gms::inet_address, gms::permit_id) override { return make_ready_future(); }
virtual future<> on_restart(gms::inet_address, gms::endpoint_state_ptr, gms::permit_id) override { return make_ready_future(); }
virtual future<> on_join(gms::inet_address, gms::endpoint_state_ptr, gms::permit_id) override;
virtual future<> on_change(gms::inet_address, const gms::application_state_map&, gms::permit_id) override;
future<> check_and_repair_cdc_streams();

View File

@@ -960,12 +960,8 @@ public:
// Given a reference to such a column from the base schema, this function sets the corresponding column
// in the log to the given value for the given row.
void set_value(const clustering_key& log_ck, const column_definition& base_cdef, const managed_bytes_view& value) {
auto log_cdef_ptr = _log_schema.get_column_definition(log_data_column_name_bytes(base_cdef.name()));
if (!log_cdef_ptr) {
throw exceptions::invalid_request_exception(format("CDC log schema for {}.{} does not have base column {}",
_log_schema.ks_name(), _log_schema.cf_name(), base_cdef.name_as_text()));
}
_log_mut.set_cell(log_ck, *log_cdef_ptr, atomic_cell::make_live(*base_cdef.type, _ts, value, _ttl));
auto& log_cdef = *_log_schema.get_column_definition(log_data_column_name_bytes(base_cdef.name()));
_log_mut.set_cell(log_ck, log_cdef, atomic_cell::make_live(*base_cdef.type, _ts, value, _ttl));
}
// Each regular and static column in the base schema has a corresponding column in the log schema
@@ -973,13 +969,7 @@ public:
// Given a reference to such a column from the base schema, this function sets the corresponding column
// in the log to `true` for the given row. If not called, the column will be `null`.
void set_deleted(const clustering_key& log_ck, const column_definition& base_cdef) {
auto log_cdef_ptr = _log_schema.get_column_definition(log_data_column_deleted_name_bytes(base_cdef.name()));
if (!log_cdef_ptr) {
throw exceptions::invalid_request_exception(format("CDC log schema for {}.{} does not have base column {}",
_log_schema.ks_name(), _log_schema.cf_name(), base_cdef.name_as_text()));
}
auto& log_cdef = *log_cdef_ptr;
_log_mut.set_cell(log_ck, *log_cdef_ptr, atomic_cell::make_live(*log_cdef.type, _ts, log_cdef.type->decompose(true), _ttl));
_log_mut.set_cell(log_ck, log_data_column_deleted_name_bytes(base_cdef.name()), data_value(true), _ts, _ttl);
}
// Each regular and static non-atomic column in the base schema has a corresponding column in the log schema
@@ -988,12 +978,7 @@ public:
// Given a reference to such a column from the base schema, this function sets the corresponding column
// in the log to the given set of keys for the given row.
void set_deleted_elements(const clustering_key& log_ck, const column_definition& base_cdef, const managed_bytes& deleted_elements) {
auto log_cdef_ptr = _log_schema.get_column_definition(log_data_column_deleted_elements_name_bytes(base_cdef.name()));
if (!log_cdef_ptr) {
throw exceptions::invalid_request_exception(format("CDC log schema for {}.{} does not have base column {}",
_log_schema.ks_name(), _log_schema.cf_name(), base_cdef.name_as_text()));
}
auto& log_cdef = *log_cdef_ptr;
auto& log_cdef = *_log_schema.get_column_definition(log_data_column_deleted_elements_name_bytes(base_cdef.name()));
_log_mut.set_cell(log_ck, log_cdef, atomic_cell::make_live(*log_cdef.type, _ts, deleted_elements, _ttl));
}
@@ -1880,10 +1865,5 @@ bool cdc::cdc_service::needs_cdc_augmentation(const std::vector<mutation>& mutat
future<std::tuple<std::vector<mutation>, lw_shared_ptr<cdc::operation_result_tracker>>>
cdc::cdc_service::augment_mutation_call(lowres_clock::time_point timeout, std::vector<mutation>&& mutations, tracing::trace_state_ptr tr_state, db::consistency_level write_cl) {
if (utils::get_local_injector().enter("sleep_before_cdc_augmentation")) {
return seastar::sleep(std::chrono::milliseconds(100)).then([this, timeout, mutations = std::move(mutations), tr_state = std::move(tr_state), write_cl] () mutable {
return _impl->augment_mutation_call(timeout, std::move(mutations), std::move(tr_state), write_cl);
});
}
return _impl->augment_mutation_call(timeout, std::move(mutations), std::move(tr_state), write_cl);
}

View File

@@ -3,7 +3,7 @@ set(CMAKE_CXX_FLAGS_COVERAGE
CACHE
INTERNAL
"")
update_build_flags(Coverage
update_cxx_flags(CMAKE_CXX_FLAGS_COVERAGE
WITH_DEBUG_INFO
OPTIMIZATION_LEVEL "g")

View File

@@ -1,6 +1,6 @@
set(OptimizationLevel "g")
update_build_flags(Debug
update_cxx_flags(CMAKE_CXX_FLAGS_DEBUG
WITH_DEBUG_INFO
OPTIMIZATION_LEVEL ${OptimizationLevel})

View File

@@ -3,7 +3,7 @@ set(CMAKE_CXX_FLAGS_DEV
CACHE
INTERNAL
"")
update_build_flags(Dev
update_cxx_flags(CMAKE_CXX_FLAGS_DEV
OPTIMIZATION_LEVEL "2")
set(scylla_build_mode_Dev "dev")

View File

@@ -8,7 +8,7 @@ set(CMAKE_CXX_FLAGS_RELWITHDEBINFO
CACHE
INTERNAL
"")
update_build_flags(RelWithDebInfo
update_cxx_flags(CMAKE_CXX_FLAGS_RELWITHDEBINFO
WITH_DEBUG_INFO
OPTIMIZATION_LEVEL "3")

View File

@@ -3,7 +3,7 @@ set(CMAKE_CXX_FLAGS_SANITIZE
CACHE
INTERNAL
"")
update_build_flags(Sanitize
update_cxx_flags(CMAKE_CXX_FLAGS_SANITIZE
WITH_DEBUG_INFO
OPTIMIZATION_LEVEL "s")

View File

@@ -72,7 +72,7 @@ function(get_padded_dynamic_linker_option output length)
ERROR_VARIABLE driver_command_line
ERROR_STRIP_TRAILING_WHITESPACE)
# extract the argument for the "-dynamic-linker" option
if(driver_command_line MATCHES ".*\"?${dynamic_linker_option}\"?[ =]\"?([^ \"]*)\"?[ \n].*")
if(driver_command_line MATCHES ".*\"?${dynamic_linker_option}\"? \"?([^ \"]*)\"? .*")
set(dynamic_linker ${CMAKE_MATCH_1})
else()
message(FATAL_ERROR "Unable to find ${dynamic_linker_option} in driver-generated command: "
@@ -135,7 +135,7 @@ function(maybe_limit_stack_usage_in_KB stack_usage_threshold_in_KB config)
endif()
endfunction()
macro(update_build_flags config)
macro(update_cxx_flags flags)
cmake_parse_arguments (
parsed_args
"WITH_DEBUG_INFO"
@@ -145,15 +145,11 @@ macro(update_build_flags config)
if(NOT DEFINED parsed_args_OPTIMIZATION_LEVEL)
message(FATAL_ERROR "OPTIMIZATION_LEVEL is missing")
endif()
string(TOUPPER ${config} CONFIG)
set(cxx_flags "CMAKE_CXX_FLAGS_${CONFIG}")
string(APPEND ${cxx_flags}
string(APPEND ${flags}
" -O${parsed_args_OPTIMIZATION_LEVEL}")
if(parsed_args_WITH_DEBUG_INFO)
string(APPEND ${cxx_flags} " -g -gz")
string(APPEND ${flags} " -g -gz")
endif()
unset(CONFIG)
unset(cxx_flags)
endmacro()
set(pgo_opts "")

View File

@@ -769,7 +769,7 @@ private:
}
virtual sstables::sstable_set make_sstable_set_for_input() const {
return _table_s.get_compaction_strategy().make_sstable_set(_table_s);
return _table_s.get_compaction_strategy().make_sstable_set(_schema);
}
const tombstone_gc_state& get_tombstone_gc_state() const {
@@ -1301,7 +1301,7 @@ public:
}
virtual sstables::sstable_set make_sstable_set_for_input() const override {
return sstables::make_partitioned_sstable_set(_schema, _table_s.token_range());
return sstables::make_partitioned_sstable_set(_schema, false);
}
// Unconditionally enable incremental compaction if the strategy specifies a max output size, e.g. LCS.

View File

@@ -395,7 +395,7 @@ future<sstables::sstable_set> compaction_task_executor::sstable_set_for_tombston
auto compound_set = t.sstable_set_for_tombstone_gc();
// Compound set will be linearized into a single set, since compaction might add or remove sstables
// to it for incremental compaction to work.
auto new_set = sstables::make_partitioned_sstable_set(t.schema(), t.token_range());
auto new_set = sstables::make_partitioned_sstable_set(t.schema(), false);
co_await compound_set->for_each_sstable_gently([&] (const sstables::shared_sstable& sst) {
auto inserted = new_set.insert(sst);
if (!inserted) {
@@ -458,7 +458,9 @@ future<sstables::compaction_result> compaction_task_executor::compact_sstables(s
future<> compaction_task_executor::update_history(table_state& t, const sstables::compaction_result& res, const sstables::compaction_data& cdata) {
auto ended_at = std::chrono::duration_cast<std::chrono::milliseconds>(res.stats.ended_at.time_since_epoch());
if (auto sys_ks = _cm._sys_ks.get_permit()) {
if (_cm._sys_ks) {
auto sys_ks = _cm._sys_ks; // hold pointer on sys_ks
co_await utils::get_local_injector().inject("update_history_wait", utils::wait_for_message(120s));
std::unordered_map<int32_t, int64_t> rows_merged;
for (size_t id=0; id<res.stats.reader_statistics.rows_merged_histogram.size(); ++id) {
@@ -473,9 +475,11 @@ future<> compaction_task_executor::update_history(table_state& t, const sstables
}
future<> compaction_manager::get_compaction_history(compaction_history_consumer&& f) {
if (auto sys_ks = _sys_ks.get_permit()) {
co_await sys_ks->get_compaction_history(std::move(f));
if (!_sys_ks) {
return make_ready_future<>();
}
return _sys_ks->get_compaction_history(std::move(f)).finally([s = _sys_ks] {});
}
template<std::derived_from<compaction::compaction_task_executor> Executor>
@@ -920,7 +924,6 @@ public:
compaction_manager::compaction_manager(config cfg, abort_source& as, tasks::task_manager& tm)
: _task_manager_module(make_shared<task_manager_module>(tm))
, _sys_ks("compaction_manager::system_keyspace")
, _cfg(std::move(cfg))
, _compaction_submission_timer(compaction_sg(), compaction_submission_callback())
, _compaction_controller(make_compaction_controller(compaction_sg(), static_shares(), [this] () -> float {
@@ -957,7 +960,6 @@ compaction_manager::compaction_manager(config cfg, abort_source& as, tasks::task
compaction_manager::compaction_manager(tasks::task_manager& tm)
: _task_manager_module(make_shared<task_manager_module>(tm))
, _sys_ks("compaction_manager::system_keyspace")
, _cfg(config{ .available_memory = 1 })
, _compaction_submission_timer(compaction_sg(), compaction_submission_callback())
, _compaction_controller(make_compaction_controller(compaction_sg(), 1, [] () -> float { return 1.0; }))
@@ -1126,11 +1128,8 @@ future<> compaction_manager::drain() {
// Disable the state so that it can be enabled later if requested.
_state = state::disabled;
}
_compaction_submission_timer.cancel();
// Stop ongoing compactions, if the request has not been sent already and wait for them to stop.
co_await stop_ongoing_compactions("drain");
// Trigger a signal to properly exit from postponed_compactions_reevaluation() fiber
reevaluate_postponed_compactions();
cmlog.info("Drained");
}
@@ -1159,7 +1158,6 @@ future<> compaction_manager::really_do_stop() noexcept {
});
reevaluate_postponed_compactions();
co_await std::move(_waiting_reevalution);
co_await _sys_ks.close();
_weight_tracker.clear();
_compaction_submission_timer.cancel();
co_await _compaction_controller.shutdown();
@@ -2180,8 +2178,7 @@ future<compaction_manager::compaction_stats_opt> compaction_manager::perform_sst
}
compaction::compaction_state::compaction_state(table_state& t)
: gate(format("compaction_state for table {}.{}", t.schema()->ks_name(), t.schema()->cf_name()))
, backlog_tracker(t.get_compaction_strategy().make_backlog_tracker())
: backlog_tracker(t.get_compaction_strategy().make_backlog_tracker())
{
}
@@ -2297,11 +2294,11 @@ strategy_control& compaction_manager::get_strategy_control() const noexcept {
}
void compaction_manager::plug_system_keyspace(db::system_keyspace& sys_ks) noexcept {
_sys_ks.plug(sys_ks.shared_from_this());
_sys_ks = sys_ks.shared_from_this();
}
future<> compaction_manager::unplug_system_keyspace() noexcept {
co_await _sys_ks.unplug();
void compaction_manager::unplug_system_keyspace() noexcept {
_sys_ks = nullptr;
}
double compaction_backlog_tracker::backlog() const {

View File

@@ -32,7 +32,6 @@
#include "seastarx.hh"
#include "sstables/exceptions.hh"
#include "tombstone_gc.hh"
#include "utils/pluggable.hh"
namespace db {
class system_keyspace;
@@ -139,7 +138,7 @@ private:
// being picked more than once.
seastar::named_semaphore _off_strategy_sem = {1, named_semaphore_exception_factory{"off-strategy compaction"}};
utils::pluggable<db::system_keyspace> _sys_ks;
seastar::shared_ptr<db::system_keyspace> _sys_ks;
std::function<void()> compaction_submission_callback();
// all registered tables are reevaluated at a constant interval.
@@ -392,7 +391,7 @@ public:
future<> run_with_compaction_disabled(compaction::table_state& t, std::function<future<> ()> func);
void plug_system_keyspace(db::system_keyspace& sys_ks) noexcept;
future<> unplug_system_keyspace() noexcept;
void unplug_system_keyspace() noexcept;
// Adds a table to the compaction manager.
// Creates a compaction_state structure that can be used for submitting

View File

@@ -22,7 +22,7 @@ namespace compaction {
struct compaction_state {
// Used both by compaction tasks that refer to the compaction_state
// and by any function running under run_with_compaction_disabled().
seastar::named_gate gate;
seastar::gate gate;
// Prevents table from running major and minor compaction at the same time.
seastar::rwlock lock;

View File

@@ -789,8 +789,8 @@ future<reshape_config> make_reshape_config(const sstables::storage& storage, res
};
}
std::unique_ptr<sstable_set_impl> incremental_compaction_strategy::make_sstable_set(const table_state& ts) const {
return std::make_unique<partitioned_sstable_set>(ts.schema(), ts.token_range());
std::unique_ptr<sstable_set_impl> incremental_compaction_strategy::make_sstable_set(schema_ptr schema) const {
return std::make_unique<partitioned_sstable_set>(std::move(schema), false);
}
}

View File

@@ -105,7 +105,7 @@ public:
return name(type());
}
sstable_set make_sstable_set(const table_state& ts) const;
sstable_set make_sstable_set(schema_ptr schema) const;
compaction_backlog_tracker make_backlog_tracker() const;

View File

@@ -56,7 +56,7 @@ public:
return true;
}
virtual int64_t estimated_pending_compactions(table_state& table_s) const = 0;
virtual std::unique_ptr<sstable_set_impl> make_sstable_set(const table_state& ts) const;
virtual std::unique_ptr<sstable_set_impl> make_sstable_set(schema_ptr schema) const;
bool use_clustering_key_filter() const {
return _use_clustering_key_filter;

View File

@@ -98,7 +98,7 @@ public:
virtual compaction_descriptor get_reshaping_job(std::vector<shared_sstable> input, schema_ptr schema, reshape_config cfg) const override;
virtual std::unique_ptr<sstable_set_impl> make_sstable_set(const table_state& ts) const override;
virtual std::unique_ptr<sstable_set_impl> make_sstable_set(schema_ptr schema) const override;
friend class ::incremental_backlog_tracker;
};

View File

@@ -70,7 +70,7 @@ public:
virtual compaction_strategy_type type() const override {
return compaction_strategy_type::leveled;
}
virtual std::unique_ptr<sstable_set_impl> make_sstable_set(const table_state& ts) const override;
virtual std::unique_ptr<sstable_set_impl> make_sstable_set(schema_ptr schema) const override;
virtual std::unique_ptr<compaction_backlog_tracker::impl> make_backlog_tracker() const override;

View File

@@ -33,7 +33,6 @@ namespace compaction {
class table_state {
public:
virtual ~table_state() {}
virtual dht::token_range token_range() const noexcept = 0;
virtual const schema_ptr& schema() const noexcept = 0;
// min threshold as defined by table.
virtual unsigned min_compaction_threshold() const noexcept = 0;

View File

@@ -150,7 +150,7 @@ public:
return compaction_strategy_type::time_window;
}
virtual std::unique_ptr<sstable_set_impl> make_sstable_set(const table_state& ts) const override;
virtual std::unique_ptr<sstable_set_impl> make_sstable_set(schema_ptr schema) const override;
virtual std::unique_ptr<compaction_backlog_tracker::impl> make_backlog_tracker() const override;

View File

@@ -255,9 +255,6 @@ public:
// Returns true iff given prefix has no missing components
bool is_full(managed_bytes_view v) const {
SCYLLA_ASSERT(AllowPrefixes == allow_prefixes::yes);
if (_types.size() == 0) {
return v.empty();
}
return std::distance(begin(v), end(v)) == (ssize_t)_types.size();
}
bool is_empty(managed_bytes_view v) const {

File diff suppressed because it is too large Load Diff

View File

@@ -10,25 +10,17 @@
#include <map>
#include <optional>
#include <set>
#include <seastar/core/future.hh>
#include <seastar/core/shared_ptr.hh>
#include <seastar/core/sstring.hh>
#include <seastar/util/bool_class.hh>
#include "seastarx.hh"
class compression_parameters;
class compressor {
sstring _name;
public:
enum class algorithm {
lz4,
lz4_with_dicts,
zstd,
zstd_with_dicts,
snappy,
deflate,
none,
};
compressor(sstring);
virtual ~compressor() {}
@@ -50,38 +42,44 @@ public:
virtual size_t compress_max_size(size_t input_len) const = 0;
/**
* Returns metadata which must be written together with the compressed
* data and used to construct a corresponding decompressor.
* Returns accepted option names for this compressor
*/
virtual std::set<sstring> option_names() const;
/**
* Returns original options used in instantiating this compressor
*/
virtual std::map<sstring, sstring> options() const;
static bool is_hidden_option_name(std::string_view sv);
/**
* Compressor class name.
*/
const sstring& name() const {
return _name;
}
std::string name() const;
// to cheaply bridge sstable compression options / maps
using opt_string = std::optional<sstring>;
using opt_getter = std::function<opt_string(const sstring&)>;
using ptr_type = shared_ptr<compressor>;
virtual algorithm get_algorithm() const = 0;
static ptr_type create(const sstring& name, const opt_getter&);
static ptr_type create(const std::map<sstring, sstring>&);
virtual std::optional<unsigned> get_dict_owner_for_test() const;
static thread_local const ptr_type lz4;
static thread_local const ptr_type snappy;
static thread_local const ptr_type deflate;
using ptr_type = std::unique_ptr<compressor>;
static sstring make_name(std::string_view short_name);
};
template<typename BaseType, typename... Args>
class class_registry;
using compressor_ptr = compressor::ptr_type;
using compressor_registry = class_registry<compressor, const typename compressor::opt_getter&>;
compressor_ptr make_lz4_sstable_compressor_for_tests();
// Per-table compression options, parsed and validated.
//
// Compression options are configured through the JSON-like `compression` entry in the schema.
// The CQL layer parses the text of that entry to a `map<string, string>`.
// A `compression_parameters` object is constructed from this map.
// and the passed keys and values are parsed and validated in the constructor.
// This object can be then used to create a `compressor` objects for sstable readers and writers.
class compression_parameters {
public:
using algorithm = compressor::algorithm;
static constexpr std::string_view name_prefix = "org.apache.cassandra.io.compress.";
static constexpr int32_t DEFAULT_CHUNK_LENGTH = 4 * 1024;
static constexpr double DEFAULT_CRC_CHECK_CHANCE = 1.0;
@@ -90,37 +88,26 @@ public:
static const sstring CHUNK_LENGTH_KB_ERR;
static const sstring CRC_CHECK_CHANCE;
private:
algorithm _algorithm;
compressor_ptr _compressor;
std::optional<int> _chunk_length;
std::optional<double> _crc_check_chance;
std::optional<int> _zstd_compression_level;
public:
compression_parameters();
compression_parameters(algorithm);
compression_parameters(compressor_ptr);
compression_parameters(const std::map<sstring, sstring>& options);
~compression_parameters();
compressor_ptr get_compressor() const { return _compressor; }
int32_t chunk_length() const { return _chunk_length.value_or(int(DEFAULT_CHUNK_LENGTH)); }
double crc_check_chance() const { return _crc_check_chance.value_or(double(DEFAULT_CRC_CHECK_CHANCE)); }
algorithm get_algorithm() const { return _algorithm; }
std::optional<int> zstd_compression_level() const { return _zstd_compression_level; }
using dicts_feature_enabled = bool_class<struct dicts_feature_enabled_tag>;
using dicts_usage_allowed = bool_class<struct dicts_usage_allowed_tag>;
void validate(dicts_feature_enabled, dicts_usage_allowed);
void validate();
std::map<sstring, sstring> get_options() const;
bool operator==(const compression_parameters& other) const;
bool compression_enabled() const {
return _algorithm != algorithm::none;
}
static compression_parameters no_compression() {
return compression_parameters(algorithm::none);
return compression_parameters(nullptr);
}
bool operator==(const compression_parameters&) const = default;
static std::string_view algorithm_to_name(algorithm);
static std::string algorithm_to_qualified_name(algorithm);
private:
static void validate_options(const std::map<sstring, sstring>&);
static algorithm name_to_algorithm(std::string_view name);
void validate_options(const std::map<sstring, sstring>&);
};

View File

@@ -825,9 +825,7 @@ maintenance_socket: ignore
# Guardrail to enable the deprecated feature of CREATE TABLE WITH COMPACT STORAGE.
# enable_create_table_with_compact_storage: false
# Control tablets for new keyspaces.
# Can be set to: disabled|enabled
#
# Enable tablets for new keyspaces.
# When enabled, newly created keyspaces will have tablets enabled by default.
# That can be explicitly disabled in the CREATE KEYSPACE query
# by using the `tablets = {'enabled': false}` replication option.
@@ -836,37 +834,6 @@ maintenance_socket: ignore
# unless tablets are explicitly enabled in the CREATE KEYSPACE query
# by using the `tablets = {'enabled': true}` replication option.
#
# When set to `enforced`, newly created keyspaces will always have tablets enabled by default.
# This prevents explicitly disabling tablets in the CREATE KEYSPACE query
# using the `tablets = {'enabled': false}` replication option.
# It also mandates a replication strategy supporting tablets, like
# NetworkTopologyStrategy
#
# Note that creating keyspaces with tablets enabled or disabled is irreversible.
# The `tablets` option cannot be changed using `ALTER KEYSPACE`.
tablets_mode_for_new_keyspaces: enabled
# Enforce RF-rack-valid keyspaces.
rf_rack_valid_keyspaces: false
#
# Alternator options
#
# Maximum number of items in single BatchWriteItem command. Default is 100.
# Note: DynamoDB has a hard-coded limit of 25.
# alternator_max_items_in_batch_write: 100
#
# io-streaming rate limiting
# When setting this value to be non-zero scylla throttles disk throughput for
# stream (network) activities such as backup, repair, tablet migration and more.
# This limit is useful for user queries so the network interface does
# not get saturated by streaming activities.
# The recommended value is 75% of network bandwidth
# E.g for i4i.8xlarge (https://github.com/scylladb/scylla-machine-image/tree/next/common/aws_net_params.json):
# network: 18.75 GiB/s --> 18750 Mib/s --> 1875 MB/s (from network bits to network bytes: divide by 10, not 8)
# Converted to disk bytes: 1875 * 1000 / 1024 = 1831 MB/s (disk wise)
# 75% of disk bytes is: 0.75 * 1831 = 1373 megabytes/s
# stream_io_throughput_mb_per_sec: 1373
#
enable_tablets: true

View File

@@ -610,10 +610,6 @@ perf_tests = set([
'test/perf/perf_sort_by_proximity',
])
perf_standalone_tests = set([
'test/perf/perf_generic_server',
])
raft_tests = set([
'test/raft/replication_test',
'test/raft/randomized_nemesis_test',
@@ -648,7 +644,7 @@ lto_binaries = set([
'scylla'
])
tests = scylla_tests | perf_tests | perf_standalone_tests | raft_tests
tests = scylla_tests | perf_tests | raft_tests
other = set([
'iotune',
@@ -684,8 +680,8 @@ arg_parser.add_argument('--compiler', action='store', dest='cxx', default='clang
help='C++ compiler path')
arg_parser.add_argument('--c-compiler', action='store', dest='cc', default='clang',
help='C compiler path')
add_tristate(arg_parser, name='dpdk', dest='dpdk', default=False,
help='Use dpdk (from seastar dpdk sources)')
add_tristate(arg_parser, name='dpdk', dest='dpdk',
help='Use dpdk (from seastar dpdk sources) (default=True for release builds)')
arg_parser.add_argument('--dpdk-target', action='store', dest='dpdk_target', default='',
help='Path to DPDK SDK target location (e.g. <DPDK SDK dir>/x86_64-native-linuxapp-gcc)')
arg_parser.add_argument('--debuginfo', action='store', dest='debuginfo', type=int, default=1,
@@ -816,7 +812,6 @@ scylla_core = (['message/messaging_service.cc',
'utils/rjson.cc',
'utils/human_readable.cc',
'utils/histogram_metrics_helper.cc',
'utils/io-wrappers.cc',
'utils/on_internal_error.cc',
'utils/pretty_printers.cc',
'utils/stream_compressor.cc',
@@ -830,7 +825,7 @@ scylla_core = (['message/messaging_service.cc',
'keys.cc',
'counters.cc',
'compress.cc',
'sstable_dict_autotrainer.cc',
'zstd.cc',
'sstables/sstables.cc',
'sstables/sstables_manager.cc',
'sstables/sstable_set.cc',
@@ -981,7 +976,6 @@ scylla_core = (['message/messaging_service.cc',
'cql3/result_set.cc',
'cql3/prepare_context.cc',
'db/batchlog_manager.cc',
'db/corrupt_data_handler.cc',
'db/commitlog/commitlog.cc',
'db/commitlog/commitlog_entry.cc',
'db/commitlog/commitlog_replayer.cc',
@@ -1035,18 +1029,14 @@ scylla_core = (['message/messaging_service.cc',
'utils/multiprecision_int.cc',
'utils/gz/crc_combine.cc',
'utils/gz/crc_combine_table.cc',
'utils/http.cc',
'utils/s3/aws_error.cc',
'utils/s3/client.cc',
'utils/s3/retryable_http_client.cc',
'utils/s3/retry_strategy.cc',
'utils/s3/s3_retry_strategy.cc',
'utils/s3/credentials_providers/aws_credentials_provider.cc',
'utils/s3/credentials_providers/environment_aws_credentials_provider.cc',
'utils/s3/credentials_providers/instance_profile_credentials_provider.cc',
'utils/s3/credentials_providers/sts_assume_role_credentials_provider.cc',
'utils/s3/credentials_providers/aws_credentials_provider_chain.cc',
'utils/s3/utils/manip_s3.cc',
'utils/advanced_rpc_compressor.cc',
'gms/version_generator.cc',
'gms/versioned_value.cc',
@@ -1116,7 +1106,7 @@ scylla_core = (['message/messaging_service.cc',
'utils/lister.cc',
'repair/repair.cc',
'repair/row_level.cc',
'streaming/table_check.cc',
'repair/table_check.cc',
'exceptions/exceptions.cc',
'auth/allow_all_authenticator.cc',
'auth/allow_all_authorizer.cc',
@@ -1170,7 +1160,6 @@ scylla_core = (['message/messaging_service.cc',
'ent/encryption/kms_key_provider.cc',
'ent/encryption/gcp_host.cc',
'ent/encryption/gcp_key_provider.cc',
'ent/encryption/utils.cc',
'ent/ldap/ldap_connection.cc',
'multishard_mutation_query.cc',
'reader_concurrency_semaphore.cc',
@@ -1193,7 +1182,6 @@ scylla_core = (['message/messaging_service.cc',
'service/raft/group0_state_id_handler.cc',
'service/raft/group0_state_machine.cc',
'service/raft/group0_state_machine_merger.cc',
'service/raft/group0_voter_handler.cc',
'service/raft/raft_sys_table_storage.cc',
'serializer.cc',
'release.cc',
@@ -1201,7 +1189,7 @@ scylla_core = (['message/messaging_service.cc',
'service/raft/raft_group_registry.cc',
'service/raft/discovery.cc',
'service/raft/raft_group0.cc',
'service/direct_failure_detector/failure_detector.cc',
'direct_failure_detector/failure_detector.cc',
'service/raft/raft_group0_client.cc',
'service/broadcast_tables/experimental/lang.cc',
'tasks/task_handler.cc',
@@ -1340,7 +1328,6 @@ idls = ['idl/gossip_digest.idl.hh',
'idl/replica_exception.idl.hh',
'idl/per_partition_rate_limit_info.idl.hh',
'idl/position_in_partition.idl.hh',
'idl/full_position.idl.hh',
'idl/experimental/broadcast_tables_lang.idl.hh',
'idl/storage_service.idl.hh',
'idl/join_node.idl.hh',
@@ -1392,6 +1379,7 @@ scylla_perfs = ['test/perf/perf_alternator.cc',
'test/perf/perf_tablets.cc',
'test/perf/tablet_load_balancing.cc',
'test/perf/perf.cc',
'test/lib/alternator_test_env.cc',
'test/lib/cql_test_env.cc',
'test/lib/log.cc',
'test/lib/test_services.cc',
@@ -1482,16 +1470,13 @@ for t in sorted(scylla_tests):
else:
deps[t] += scylla_core + alternator + idls + scylla_tests_generic_dependencies
for t in sorted(perf_tests | perf_standalone_tests):
deps[t] = [t + '.cc'] + scylla_tests_dependencies
deps[t] += ['test/perf/perf.cc', 'seastar/tests/perf/linux_perf_event.cc']
perf_tests_seastar_deps = [
'seastar/tests/perf/perf_tests.cc'
]
for t in sorted(perf_tests):
deps[t] += perf_tests_seastar_deps
deps[t] = [t + '.cc'] + scylla_tests_dependencies + perf_tests_seastar_deps
deps[t] += ['test/perf/perf.cc', 'seastar/tests/perf/linux_perf_event.cc']
deps['test/boost/combined_tests'] += [
'test/boost/aggregate_fcts_test.cc',
@@ -1516,7 +1501,6 @@ deps['test/boost/combined_tests'] += [
'test/boost/filtering_test.cc',
'test/boost/group0_cmd_merge_test.cc',
'test/boost/group0_test.cc',
'test/boost/group0_voter_calculator_test.cc',
'test/boost/index_with_paging_test.cc',
'test/boost/json_cql_query_test.cc',
'test/boost/large_paging_state_test.cc',
@@ -1528,7 +1512,6 @@ deps['test/boost/combined_tests'] += [
'test/boost/mutation_writer_test.cc',
'test/boost/network_topology_strategy_test.cc',
'test/boost/per_partition_rate_limit_test.cc',
'test/boost/pluggable_test.cc',
'test/boost/querier_cache_test.cc',
'test/boost/query_processor_test.cc',
'test/boost/reader_concurrency_semaphore_test.cc',
@@ -1541,7 +1524,6 @@ deps['test/boost/combined_tests'] += [
'test/boost/secondary_index_test.cc',
'test/boost/sessions_test.cc',
'test/boost/sstable_compaction_test.cc',
'test/boost/sstable_compressor_factory_test.cc',
'test/boost/sstable_directory_test.cc',
'test/boost/sstable_set_test.cc',
'test/boost/statement_restrictions_test.cc',
@@ -1604,8 +1586,8 @@ deps['test/boost/rust_test'] += ['rust/inc/src/lib.rs']
deps['test/raft/replication_test'] = ['test/raft/replication_test.cc', 'test/raft/replication.cc', 'test/raft/helpers.cc', 'test/lib/eventually.cc'] + scylla_raft_dependencies
deps['test/raft/raft_server_test'] = ['test/raft/raft_server_test.cc', 'test/raft/replication.cc', 'test/raft/helpers.cc', 'test/lib/eventually.cc'] + scylla_raft_dependencies
deps['test/raft/randomized_nemesis_test'] = ['test/raft/randomized_nemesis_test.cc', 'service/direct_failure_detector/failure_detector.cc', 'test/raft/helpers.cc'] + scylla_raft_dependencies
deps['test/raft/failure_detector_test'] = ['test/raft/failure_detector_test.cc', 'service/direct_failure_detector/failure_detector.cc', 'test/raft/helpers.cc'] + scylla_raft_dependencies
deps['test/raft/randomized_nemesis_test'] = ['test/raft/randomized_nemesis_test.cc', 'direct_failure_detector/failure_detector.cc', 'test/raft/helpers.cc'] + scylla_raft_dependencies
deps['test/raft/failure_detector_test'] = ['test/raft/failure_detector_test.cc', 'direct_failure_detector/failure_detector.cc', 'test/raft/helpers.cc'] + scylla_raft_dependencies
deps['test/raft/many_test'] = ['test/raft/many_test.cc', 'test/raft/replication.cc', 'test/raft/helpers.cc', 'test/lib/eventually.cc'] + scylla_raft_dependencies
deps['test/raft/fsm_test'] = ['test/raft/fsm_test.cc', 'test/raft/helpers.cc', 'test/lib/log.cc'] + scylla_raft_dependencies
deps['test/raft/etcd_test'] = ['test/raft/etcd_test.cc', 'test/raft/helpers.cc', 'test/lib/log.cc'] + scylla_raft_dependencies
@@ -1745,11 +1727,19 @@ def generate_version(date_stamp):
# the program headers.
def dynamic_linker_option():
gcc_linker_output = subprocess.check_output(['gcc', '-###', '/dev/null', '-o', 't'], stderr=subprocess.STDOUT).decode('utf-8')
original_dynamic_linker = re.search('"?-dynamic-linker"?[ =]"?([^ "]*)"?[ \n]', gcc_linker_output).groups()[0]
original_dynamic_linker = re.search('-dynamic-linker ([^ ]*)', gcc_linker_output).groups()[0]
# gdb has a SO_NAME_MAX_PATH_SIZE of 512, so limit the path size to
# that. The 512 includes the null at the end, hence the 511 below.
dynamic_linker = '/' * (511 - len(original_dynamic_linker)) + original_dynamic_linker
employ_ld_trickery = True
# distro-specific setup
if os.environ.get('NIX_CC'):
employ_ld_trickery = False
if employ_ld_trickery:
# gdb has a SO_NAME_MAX_PATH_SIZE of 512, so limit the path size to
# that. The 512 includes the null at the end, hence the 511 below.
dynamic_linker = '/' * (511 - len(original_dynamic_linker)) + original_dynamic_linker
else:
dynamic_linker = original_dynamic_linker
return f'--dynamic-linker={dynamic_linker}'
forced_ldflags = '-Wl,'
@@ -1885,8 +1875,9 @@ def prepare_advanced_optimizations(*, modes, build_modes, args):
submode['profile_target'] = profile_target
submode['lib_cflags'] += f" -f{it}profile-generate={os.path.realpath(outdir)}/{submode_name} {conservative_opts}"
submode['cxx_ld_flags'] += f" -f{it}profile-generate={os.path.realpath(outdir)}/{submode_name} {conservative_opts}"
# Profile collection depends on java tools because we use cassandra-stress as the load.
submode['profile_recipe'] = textwrap.dedent(f"""\
build $builddir/{submode_name}/profiles/prof.profdata: train $builddir/{submode_name}/scylla
build $builddir/{submode_name}/profiles/prof.profdata: train $builddir/{submode_name}/scylla | dist-tools-tar
build $builddir/{submode_name}/profiles/merged.profdata: merge_profdata $builddir/{submode_name}/profiles/prof.profdata {profile_target or str()}
""")
submode['is_profile'] = True
@@ -1970,6 +1961,8 @@ def configure_seastar(build_dir, mode, mode_config):
seastar_cmake_args += ['-DSeastar_STACK_GUARDS={}'.format(stack_guards)]
dpdk = args.dpdk
if dpdk is None:
dpdk = platform.machine() == 'x86_64' and mode == 'release'
if dpdk:
seastar_cmake_args += ['-DSeastar_DPDK=ON', '-DSeastar_DPDK_MACHINE=westmere']
if args.split_dwarf:
@@ -2634,10 +2627,11 @@ def write_build_file(f,
f.write(f' mode = {mode}\n')
f.write(f'build dist-server-{mode}: phony $builddir/dist/{mode}/redhat $builddir/dist/{mode}/debian\n')
f.write(f'build dist-server-debuginfo-{mode}: phony $builddir/{mode}/dist/tar/{scylla_product}-debuginfo-{scylla_version}-{scylla_release}.{arch}.tar.gz\n')
f.write(f'build dist-tools-{mode}: phony $builddir/{mode}/dist/tar/{scylla_product}-tools-{scylla_version}-{scylla_release}.noarch.tar.gz dist-tools-rpm dist-tools-deb\n')
f.write(f'build dist-cqlsh-{mode}: phony $builddir/{mode}/dist/tar/{scylla_product}-cqlsh-{scylla_version}-{scylla_release}.{arch}.tar.gz dist-cqlsh-rpm dist-cqlsh-deb\n')
f.write(f'build dist-python3-{mode}: phony dist-python3-tar dist-python3-rpm dist-python3-deb\n')
f.write(f'build dist-unified-{mode}: phony $builddir/{mode}/dist/tar/{scylla_product}-unified-{scylla_version}-{scylla_release}.{arch}.tar.gz\n')
f.write(f'build $builddir/{mode}/dist/tar/{scylla_product}-unified-{scylla_version}-{scylla_release}.{arch}.tar.gz: unified $builddir/{mode}/dist/tar/{scylla_product}-{scylla_version}-{scylla_release}.{arch}.tar.gz $builddir/{mode}/dist/tar/{scylla_product}-python3-{scylla_version}-{scylla_release}.{arch}.tar.gz $builddir/{mode}/dist/tar/{scylla_product}-cqlsh-{scylla_version}-{scylla_release}.{arch}.tar.gz | always\n')
f.write(f'build $builddir/{mode}/dist/tar/{scylla_product}-unified-{scylla_version}-{scylla_release}.{arch}.tar.gz: unified $builddir/{mode}/dist/tar/{scylla_product}-{scylla_version}-{scylla_release}.{arch}.tar.gz $builddir/{mode}/dist/tar/{scylla_product}-python3-{scylla_version}-{scylla_release}.{arch}.tar.gz $builddir/{mode}/dist/tar/{scylla_product}-tools-{scylla_version}-{scylla_release}.noarch.tar.gz $builddir/{mode}/dist/tar/{scylla_product}-cqlsh-{scylla_version}-{scylla_release}.{arch}.tar.gz | always\n')
f.write(f' mode = {mode}\n')
f.write(f'build $builddir/{mode}/dist/tar/{scylla_product}-unified-package-{scylla_version}-{scylla_release}.tar.gz: copy $builddir/{mode}/dist/tar/{scylla_product}-unified-{scylla_version}-{scylla_release}.{arch}.tar.gz\n')
f.write(f'build $builddir/{mode}/dist/tar/{scylla_product}-unified-{arch}-package-{scylla_version}-{scylla_release}.tar.gz: copy $builddir/{mode}/dist/tar/{scylla_product}-unified-{scylla_version}-{scylla_release}.{arch}.tar.gz\n')
@@ -2678,6 +2672,17 @@ def write_build_file(f,
rule build-submodule-deb
command = cd $dir && ./reloc/build_deb.sh --reloc-pkg $artifact
build tools/java/build/{scylla_product}-tools-{scylla_version}-{scylla_release}.noarch.tar.gz: build-submodule-reloc | $builddir/SCYLLA-PRODUCT-FILE $builddir/SCYLLA-VERSION-FILE $builddir/SCYLLA-RELEASE-FILE
reloc_dir = tools/java
build dist-tools-rpm: build-submodule-rpm tools/java/build/{scylla_product}-tools-{scylla_version}-{scylla_release}.noarch.tar.gz
dir = tools/java
artifact = build/{scylla_product}-tools-{scylla_version}-{scylla_release}.noarch.tar.gz
build dist-tools-deb: build-submodule-deb tools/java/build/{scylla_product}-tools-{scylla_version}-{scylla_release}.noarch.tar.gz
dir = tools/java
artifact = build/{scylla_product}-tools-{scylla_version}-{scylla_release}.noarch.tar.gz
build dist-tools-tar: phony {' '.join(['$builddir/{mode}/dist/tar/{scylla_product}-tools-{scylla_version}-{scylla_release}.noarch.tar.gz'.format(mode=mode, scylla_product=scylla_product, scylla_version=scylla_version, scylla_release=scylla_release) for mode in default_modes])}
build dist-tools: phony dist-tools-tar dist-tools-rpm dist-tools-deb
build tools/cqlsh/build/{scylla_product}-cqlsh-{scylla_version}-{scylla_release}.{arch}.tar.gz: build-submodule-reloc | $builddir/SCYLLA-PRODUCT-FILE $builddir/SCYLLA-VERSION-FILE $builddir/SCYLLA-RELEASE-FILE
reloc_dir = tools/cqlsh
build dist-cqlsh-rpm: build-submodule-rpm tools/cqlsh/build/{scylla_product}-cqlsh-{scylla_version}-{scylla_release}.{arch}.tar.gz
@@ -2700,11 +2705,11 @@ def write_build_file(f,
artifact = build/{scylla_product}-python3-{scylla_version}-{scylla_release}.{arch}.tar.gz
build dist-python3-tar: phony {' '.join(['$builddir/{mode}/dist/tar/{scylla_product}-python3-{scylla_version}-{scylla_release}.{arch}.tar.gz'.format(mode=mode, scylla_product=scylla_product, arch=arch, scylla_version=scylla_version, scylla_release=scylla_release) for mode in default_modes])}
build dist-python3: phony dist-python3-tar dist-python3-rpm dist-python3-deb
build dist-deb: phony dist-server-deb dist-python3-deb dist-cqlsh-deb
build dist-rpm: phony dist-server-rpm dist-python3-rpm dist-cqlsh-rpm
build dist-tar: phony dist-unified-tar dist-server-tar dist-python3-tar dist-cqlsh-tar
build dist-deb: phony dist-server-deb dist-python3-deb dist-tools-deb dist-cqlsh-deb
build dist-rpm: phony dist-server-rpm dist-python3-rpm dist-tools-rpm dist-cqlsh-rpm
build dist-tar: phony dist-unified-tar dist-server-tar dist-python3-tar dist-tools-tar dist-cqlsh-tar
build dist: phony dist-unified dist-server dist-python3 dist-cqlsh
build dist: phony dist-unified dist-server dist-python3 dist-tools dist-cqlsh
'''))
f.write(textwrap.dedent(f'''\
@@ -2717,10 +2722,12 @@ def write_build_file(f,
build $builddir/{mode}/dist/tar/{scylla_product}-python3-{scylla_version}-{scylla_release}.{arch}.tar.gz: copy tools/python3/build/{scylla_product}-python3-{scylla_version}-{scylla_release}.{arch}.tar.gz
build $builddir/{mode}/dist/tar/{scylla_product}-python3-package.tar.gz: copy tools/python3/build/{scylla_product}-python3-{scylla_version}-{scylla_release}.{arch}.tar.gz
build $builddir/{mode}/dist/tar/{scylla_product}-python3-{arch}-package.tar.gz: copy tools/python3/build/{scylla_product}-python3-{scylla_version}-{scylla_release}.{arch}.tar.gz
build $builddir/{mode}/dist/tar/{scylla_product}-tools-{scylla_version}-{scylla_release}.noarch.tar.gz: copy tools/java/build/{scylla_product}-tools-{scylla_version}-{scylla_release}.noarch.tar.gz
build $builddir/{mode}/dist/tar/{scylla_product}-tools-package.tar.gz: copy tools/java/build/{scylla_product}-tools-{scylla_version}-{scylla_release}.noarch.tar.gz
build $builddir/{mode}/dist/tar/{scylla_product}-cqlsh-{scylla_version}-{scylla_release}.{arch}.tar.gz: copy tools/cqlsh/build/{scylla_product}-cqlsh-{scylla_version}-{scylla_release}.{arch}.tar.gz
build $builddir/{mode}/dist/tar/{scylla_product}-cqlsh-package.tar.gz: copy tools/cqlsh/build/{scylla_product}-cqlsh-{scylla_version}-{scylla_release}.{arch}.tar.gz
build {mode}-dist: phony dist-server-{mode} dist-server-debuginfo-{mode} dist-python3-{mode} dist-unified-{mode} dist-cqlsh-{mode}
build {mode}-dist: phony dist-server-{mode} dist-server-debuginfo-{mode} dist-python3-{mode} dist-tools-{mode} dist-unified-{mode} dist-cqlsh-{mode}
build dist-{mode}: phony {mode}-dist
build dist-check-{mode}: dist-check
mode = {mode}

View File

@@ -709,23 +709,17 @@ batchStatement returns [std::unique_ptr<cql3::statements::raw::batch_statement>
: K_BEGIN
( K_UNLOGGED { type = btype::UNLOGGED; } | K_COUNTER { type = btype::COUNTER; } )?
K_BATCH ( usingClause[attrs] )?
( s=batchStatementObjective ';'?
{
auto&& stmt = *$s.statement;
stmt->add_raw(sstring{$s.text});
statements.push_back(std::move(stmt));
} )*
( s=batchStatementObjective ';'? { statements.push_back(std::move(s)); } )*
K_APPLY K_BATCH
{
$expr = std::make_unique<cql3::statements::raw::batch_statement>(type, std::move(attrs), std::move(statements));
}
;
batchStatementObjective returns [::lw_shared_ptr<std::unique_ptr<cql3::statements::raw::modification_statement>> statement]
@init { using original_ret_type = std::unique_ptr<cql3::statements::raw::modification_statement>; }
: i=insertStatement { $statement = make_lw_shared<original_ret_type>(std::move(i)); }
| u=updateStatement { $statement = make_lw_shared<original_ret_type>(std::move(u)); }
| d=deleteStatement { $statement = make_lw_shared<original_ret_type>(std::move(d)); }
batchStatementObjective returns [std::unique_ptr<cql3::statements::raw::modification_statement> statement]
: i=insertStatement { $statement = std::move(i); }
| u=updateStatement { $statement = std::move(u); }
| d=deleteStatement { $statement = std::move(d); }
;
dropAggregateStatement returns [std::unique_ptr<cql3::statements::drop_aggregate_statement> expr]

View File

@@ -82,7 +82,7 @@ public:
const sstring& text() const;
sstring to_string() const;
virtual sstring to_string() const;
sstring to_cql_string() const;
friend std::hash<column_identifier_raw>;

View File

@@ -48,16 +48,14 @@ const std::chrono::minutes prepared_statements_cache::entry_expiry = std::chrono
struct query_processor::remote {
remote(service::migration_manager& mm, service::mapreduce_service& fwd,
service::storage_service& ss, service::raft_group0_client& group0_client)
: mm(mm), mapreducer(fwd), ss(ss), group0_client(group0_client)
, gate("query_processor::remote")
{}
: mm(mm), mapreducer(fwd), ss(ss), group0_client(group0_client) {}
service::migration_manager& mm;
service::mapreduce_service& mapreducer;
service::storage_service& ss;
service::raft_group0_client& group0_client;
seastar::named_gate gate;
seastar::gate gate;
};
bool query_processor::topology_global_queue_empty() {

View File

@@ -13,7 +13,6 @@
#include <seastar/core/on_internal_error.hh>
#include <stdexcept>
#include "alter_keyspace_statement.hh"
#include "locator/tablets.hh"
#include "prepared_statement.hh"
#include "service/migration_manager.hh"
#include "service/storage_proxy.hh"
@@ -26,7 +25,6 @@
#include "create_keyspace_statement.hh"
#include "gms/feature_service.hh"
#include "replica/database.hh"
#include "db/config.hh"
using namespace std::string_literals;
@@ -196,9 +194,9 @@ cql3::statements::alter_keyspace_statement::prepare_schema_mutations(query_proce
event::schema_change::target_type target_type = event::schema_change::target_type::KEYSPACE;
auto ks = qp.db().find_keyspace(_name);
auto ks_md = ks.metadata();
const auto tmptr = qp.proxy().get_token_metadata_ptr();
const auto& tm = *qp.proxy().get_token_metadata_ptr();
const auto& feat = qp.proxy().features();
auto ks_md_update = _attrs->as_ks_metadata_update(ks_md, *tmptr, feat);
auto ks_md_update = _attrs->as_ks_metadata_update(ks_md, tm, feat);
std::vector<mutation> muts;
std::vector<sstring> warnings;
auto old_ks_options = get_old_options_flattened(ks);
@@ -267,36 +265,6 @@ cql3::statements::alter_keyspace_statement::prepare_schema_mutations(query_proce
muts.insert(muts.begin(), schema_mutations.begin(), schema_mutations.end());
}
// If `rf_rack_valid_keyspaces` is enabled, it's forbidden to perform a schema change that
// would lead to an RF-rack-valid keyspace. Verify that this change does not.
// For more context, see: scylladb/scylladb#23071.
if (qp.db().get_config().rf_rack_valid_keyspaces()) {
auto rs = locator::abstract_replication_strategy::create_replication_strategy(
ks_md_update->strategy_name(),
locator::replication_strategy_params(ks_md_update->strategy_options(), ks_md_update->initial_tablets()));
try {
// There are two things to note here:
// 1. We hold a group0_guard, so it's correct to check this here.
// The topology or schema cannot change while we're performing this query.
// 2. The replication strategy we use here does NOT represent the actual state
// we will arrive at after applying the schema change. For instance, if the user
// did not specify the RF for some of the DCs, it's equal to 0 in the replication
// strategy we pass to this function, while in reality that means that the RF
// will NOT change. That is not a problem:
// - RF=0 is valid for all DCs, so it won't trigger an exception on its own,
// - the keyspace must've been RF-rack-valid before this change. We check that
// condition for all keyspaces at startup.
// The second hyphen is not really true because currently topological changes can
// disturb it (see scylladb/scylladb#23345), but we ignore that.
locator::assert_rf_rack_valid_keyspace(_name, tmptr, *rs);
} catch (const std::exception& e) {
// There's no guarantee what the type of the exception will be, so we need to
// wrap it manually here in a type that can be passed to the user.
throw exceptions::invalid_request_exception(e.what());
}
}
auto ret = ::make_shared<event::schema_change>(
event::schema_change::change_type::UPDATED,
target_type,

View File

@@ -8,7 +8,6 @@
* SPDX-License-Identifier: (LicenseRef-ScyllaDB-Source-Available-1.0 and Apache-2.0)
*/
#include "cdc/log.hh"
#include "utils/assert.hh"
#include <seastar/core/coroutine.hh>
#include "cql3/query_options.hh"
@@ -28,7 +27,6 @@
#include "db/view/view.hh"
#include "cql3/query_processor.hh"
#include "cdc/cdc_extension.hh"
#include "cdc/cdc_partitioner.hh"
namespace cql3 {
@@ -286,59 +284,12 @@ void alter_table_statement::drop_column(const query_options& options, const sche
}
}
std::pair<schema_ptr, std::vector<view_ptr>> alter_table_statement::prepare_schema_update(data_dictionary::database db, const query_options& options) const {
std::pair<schema_builder, std::vector<view_ptr>> alter_table_statement::prepare_schema_update(data_dictionary::database db, const query_options& options) const {
auto s = validation::validate_column_family(db, keyspace(), column_family());
if (s->is_view()) {
throw exceptions::invalid_request_exception("Cannot use ALTER TABLE on Materialized View");
}
const bool is_cdc_log_table = cdc::is_log_for_some_table(db.real_database(), s->ks_name(), s->cf_name());
// Only a CDC log table will have this partitioner name. User tables should
// not be able to set this. Note that we perform a similar check when trying to
// re-enable CDC for a table, when the log table has been replaced by a user table.
// For better visualization of the above, consider this
//
// cqlsh> CREATE TABLE ks.t (p int PRIMARY KEY, v int) WITH cdc = {'enabled': true};
// cqlsh> INSERT INTO ks.t (p, v) VALUES (1, 2);
// cqlsh> ALTER TABLE ks.t WITH cdc = {'enabled': false};
// cqlsh> DESC TABLE ks.t_scylla_cdc_log WITH INTERNALS; # Save this output!
// cqlsh> DROP TABLE ks.t_scylla_cdc_log;
// cqlsh> [Recreate the log table using the received statement]
// cqlsh> ALTER TABLE ks.t WITH cdc = {'enabled': true};
//
// InvalidRequest: Error from server: code=2200 [Invalid query] message="Cannot create CDC log
// table for table ks.t because a table of name ks.t_scylla_cdc_log already exists"
//
// See commit adda43edc75b901b2329bca8f3eb74596698d05f for more information on THAT case.
// We reuse the same technique here.
const bool was_cdc_log_table = s->get_partitioner().name() == cdc::cdc_partitioner::classname;
if (_column_changes.size() != 0 && is_cdc_log_table) {
throw exceptions::invalid_request_exception(
"You cannot modify the set of columns of a CDC log table directly. "
"Modify the base table instead.");
}
if (_column_changes.size() != 0 && was_cdc_log_table) {
throw exceptions::invalid_request_exception(
"You cannot modify the set of columns of a CDC log table directly. "
"Although the base table has deactivated CDC, this table will continue being "
"a CDC log table until it is dropped. If you want to modify the columns in it, "
"you can only do that by reenabling CDC on the base table, which will reattach "
"this log table. Then you will be able to modify the columns in the base table, "
"and that will have effect on the log table too. Modifying the columns of a CDC "
"log table directly is never allowed.");
}
if (_renames.size() != 0 && is_cdc_log_table) {
throw exceptions::invalid_request_exception("Cannot rename a column of a CDC log table.");
}
if (_renames.size() != 0 && was_cdc_log_table) {
throw exceptions::invalid_request_exception(
"You cannot rename a column of a CDC log table. Although the base table "
"has deactivated CDC, this table will continue being a CDC log table until it "
"is dropped.");
}
auto cfm = schema_builder(s);
if (_properties->get_id()) {
@@ -426,45 +377,41 @@ std::pair<schema_ptr, std::vector<view_ptr>> alter_table_statement::prepare_sche
validate_column_rename(db, *s, *from, *to);
cfm.rename_column(from->name(), to->name());
}
// New view schemas contain the new column names, so we need to base them on the
// new base schema.
schema_ptr new_base_schema = cfm.build();
// If the view includes a renamed column, it must be renamed in
// the view table and the definition.
for (auto&& view : cf.views()) {
schema_builder builder(view);
std::vector<std::pair<::shared_ptr<column_identifier>, ::shared_ptr<column_identifier>>> view_renames;
for (auto&& entry : _renames) {
auto from = entry.first->prepare_column_identifier(*s);
// If the view includes a renamed column, it must be renamed in
// the view table and the definition.
for (auto&& view : cf.views()) {
if (view->get_column_definition(from->name())) {
schema_builder builder(view);
auto view_from = entry.first->prepare_column_identifier(*view);
auto view_to = entry.second->prepare_column_identifier(*view);
validate_column_rename(db, *view, *view_from, *view_to);
builder.rename_column(view_from->name(), view_to->name());
view_renames.emplace_back(view_from, view_to);
auto new_where = util::rename_column_in_where_clause(
view->view_info()->where_clause(),
column_identifier::raw(view_from->text(), true),
column_identifier::raw(view_to->text(), true),
cql3::dialect{});
builder.with_view_info(view->view_info()->base_id(), view->view_info()->base_name(),
view->view_info()->include_all_columns(), std::move(new_where));
view_updates.push_back(view_ptr(builder.build()));
}
}
if (!view_renames.empty()) {
auto new_where = util::rename_columns_in_where_clause(
view->view_info()->where_clause(),
view_renames,
cql3::dialect{});
builder.with_view_info(new_base_schema, view->view_info()->include_all_columns(), std::move(new_where));
view_updates.push_back(view_ptr(builder.build()));
}
}
return make_pair(std::move(new_base_schema), std::move(view_updates));
break;
}
return make_pair(cfm.build(), std::move(view_updates));
return make_pair(std::move(cfm), std::move(view_updates));
}
future<std::tuple<::shared_ptr<cql_transport::event::schema_change>, std::vector<mutation>, cql3::cql_warnings_vec>>
alter_table_statement::prepare_schema_mutations(query_processor& qp, const query_options& options, api::timestamp_type ts) const {
data_dictionary::database db = qp.db();
auto [s, view_updates] = prepare_schema_update(db, options);
auto m = co_await service::prepare_column_family_update_announcement(qp.proxy(), std::move(s), std::move(view_updates), ts);
auto [cfm, view_updates] = prepare_schema_update(db, options);
auto m = co_await service::prepare_column_family_update_announcement(qp.proxy(), cfm.build(), std::move(view_updates), ts);
using namespace cql_transport;
auto ret = ::make_shared<event::schema_change>(

View File

@@ -69,7 +69,7 @@ private:
void add_column(const query_options& options, const schema& schema, data_dictionary::table cf, schema_builder& cfm, std::vector<view_ptr>& view_updates, const column_identifier& column_name, const cql3_type validator, const column_definition* def, bool is_static) const;
void alter_column(const query_options& options, const schema& schema, data_dictionary::table cf, schema_builder& cfm, std::vector<view_ptr>& view_updates, const column_identifier& column_name, const cql3_type validator, const column_definition* def, bool is_static) const;
void drop_column(const query_options& options, const schema& schema, data_dictionary::table cf, schema_builder& cfm, std::vector<view_ptr>& view_updates, const column_identifier& column_name, const cql3_type validator, const column_definition* def, bool is_static) const;
std::pair<schema_ptr, std::vector<view_ptr>> prepare_schema_update(data_dictionary::database db, const query_options& options) const;
std::pair<schema_builder, std::vector<view_ptr>> prepare_schema_update(data_dictionary::database db, const query_options& options) const;
};
class alter_table_statement::raw_statement : public raw::cf_statement {

View File

@@ -23,7 +23,6 @@
#include "db/per_partition_rate_limit_options.hh"
#include "db/tablet_options.hh"
#include "utils/bloom_calculations.hh"
#include "db/config.hh"
#include <boost/algorithm/string/predicate.hpp>
@@ -136,9 +135,7 @@ void cf_prop_defs::validate(const data_dictionary::database db, sstring ks_name,
throw exceptions::configuration_exception(sstring("Missing sub-option '") + compression_parameters::SSTABLE_COMPRESSION + "' for the '" + KW_COMPRESSION + "' option.");
}
compression_parameters cp(*compression_options);
cp.validate(
compression_parameters::dicts_feature_enabled(bool(db.features().sstable_compression_dicts)),
compression_parameters::dicts_usage_allowed(db.get_config().sstable_compression_dictionaries_allow_in_ddl()));
cp.validate();
}
auto per_partition_rate_limit_options = get_per_partition_rate_limit_options(schema_extensions);

View File

@@ -53,8 +53,7 @@ const sstring& cf_statement::keyspace() const
const sstring& cf_statement::column_family() const
{
thread_local static sstring empty = "";
return bool(_cf_name) ? _cf_name->get_column_family() : empty;
return _cf_name->get_column_family();
}
}

View File

@@ -11,8 +11,6 @@
#include <seastar/core/coroutine.hh>
#include "cql3/statements/create_keyspace_statement.hh"
#include "cql3/statements/ks_prop_defs.hh"
#include "exceptions/exceptions.hh"
#include "locator/tablets.hh"
#include "prepared_statement.hh"
#include "data_dictionary/data_dictionary.hh"
#include "data_dictionary/keyspace_metadata.hh"
@@ -92,14 +90,14 @@ void create_keyspace_statement::validate(query_processor& qp, const service::cli
future<std::tuple<::shared_ptr<cql_transport::event::schema_change>, std::vector<mutation>, cql3::cql_warnings_vec>> create_keyspace_statement::prepare_schema_mutations(query_processor& qp, const query_options&, api::timestamp_type ts) const {
using namespace cql_transport;
const auto tmptr = qp.proxy().get_token_metadata_ptr();
const auto& tm = *qp.proxy().get_token_metadata_ptr();
const auto& feat = qp.proxy().features();
const auto& cfg = qp.db().get_config();
std::vector<mutation> m;
std::vector<sstring> warnings;
try {
auto ksm = _attrs->as_ks_metadata(_name, *tmptr, feat, cfg);
auto ksm = _attrs->as_ks_metadata(_name, tm, feat, cfg);
m = service::prepare_new_keyspace_announcement(qp.db().real_database(), ksm, ts);
// If the new keyspace uses tablets, as long as there are features
// which aren't supported by tablets we want to warn the user that
@@ -113,28 +111,14 @@ future<std::tuple<::shared_ptr<cql_transport::event::schema_change>, std::vector
if (rs->uses_tablets()) {
warnings.push_back(
"Tables in this keyspace will be replicated using Tablets "
"and will not support Materialized Views, Secondary Indexes, CDC, LWT and counters features. "
"To use Materialized Views, Secondary Indexes, CDC, LWT or counters, drop this keyspace and re-create it "
"without tablets by adding AND TABLETS = {'enabled': false} to the CREATE KEYSPACE statement.");
"and will not support CDC, LWT and counters features. "
"To use CDC, LWT or counters, drop this keyspace and re-create it "
"without tablets by adding AND TABLETS = {'enabled': false} "
"to the CREATE KEYSPACE statement.");
if (ksm->initial_tablets().value()) {
warnings.push_back("Keyspace `initial` tablets option is deprecated. Use per-table tablet options instead.");
}
}
// If `rf_rack_valid_keyspaces` is enabled, it's forbidden to create an RF-rack-invalid keyspace.
// Verify that it's RF-rack-valid.
// For more context, see: scylladb/scylladb#23071.
if (cfg.rf_rack_valid_keyspaces()) {
try {
// We hold a group0_guard, so it's correct to check this here.
// The topology or schema cannot change while we're performing this query.
locator::assert_rf_rack_valid_keyspace(_name, tmptr, *rs);
} catch (const std::exception& e) {
// There's no guarantee what the type of the exception will be, so we need to
// wrap it manually here in a type that can be passed to the user.
throw exceptions::invalid_request_exception(e.what());
}
}
} catch (const exceptions::already_exists_exception& e) {
if (!_if_not_exists) {
co_return coroutine::exception(std::current_exception());
@@ -236,6 +220,9 @@ std::vector<sstring> check_against_restricted_replication_strategies(
// We ignore errors (non-number, negative number, etc.) here,
// these are checked and reported elsewhere.
for (auto opt : attrs.get_replication_options()) {
if (opt.first == sstring("initial_tablets")) {
continue;
}
try {
auto rf = std::stol(opt.second);
if (rf > 0) {

View File

@@ -378,7 +378,7 @@ std::pair<view_ptr, cql3::cql_warnings_vec> create_view_statement::prepare_view(
}
auto where_clause_text = util::relations_to_where_clause(_where_clause);
builder.with_view_info(schema, included.empty(), std::move(where_clause_text));
builder.with_view_info(schema->id(), schema->cf_name(), included.empty(), std::move(where_clause_text));
return std::make_pair(view_ptr(builder.build()), std::move(warnings));
}

View File

@@ -150,7 +150,7 @@ data_dictionary::storage_options ks_prop_defs::get_storage_options() const {
return opts;
}
std::optional<unsigned> ks_prop_defs::get_initial_tablets(std::optional<unsigned> default_value, bool enforce_tablets) const {
std::optional<unsigned> ks_prop_defs::get_initial_tablets(std::optional<unsigned> default_value) const {
auto tablets_options = get_map(KW_TABLETS);
if (!tablets_options) {
return default_value;
@@ -165,9 +165,6 @@ std::optional<unsigned> ks_prop_defs::get_initial_tablets(std::optional<unsigned
if (enabled == "true") {
// nothing
} else if (enabled == "false") {
if (enforce_tablets) {
throw exceptions::configuration_exception("Cannot disable tablets for keyspace since tablets are enforced using the `tablets_mode_for_new_keyspaces: enforced` config option.");
}
return std::nullopt;
} else {
throw exceptions::configuration_exception(sstring("Tablets enabled value must be true or false; found: ") + enabled);
@@ -202,10 +199,8 @@ bool ks_prop_defs::get_durable_writes() const {
lw_shared_ptr<data_dictionary::keyspace_metadata> ks_prop_defs::as_ks_metadata(sstring ks_name, const locator::token_metadata& tm, const gms::feature_service& feat, const db::config& cfg) {
auto sc = get_replication_strategy_class().value();
// if tablets options have not been specified, but tablets are globally enabled, set the value to 0 for N.T.S. only
auto enable_tablets = feat.tablets && cfg.enable_tablets_by_default();
std::optional<unsigned> default_initial_tablets = enable_tablets && locator::abstract_replication_strategy::to_qualified_class_name(sc) == "org.apache.cassandra.locator.NetworkTopologyStrategy"
? std::optional<unsigned>(0) : std::nullopt;
auto initial_tablets = get_initial_tablets(default_initial_tablets, cfg.enforce_tablets());
auto enable_tablets = feat.tablets && cfg.enable_tablets();
auto initial_tablets = get_initial_tablets(enable_tablets && locator::abstract_replication_strategy::to_qualified_class_name(sc) == "org.apache.cassandra.locator.NetworkTopologyStrategy" ? std::optional<unsigned>(0) : std::nullopt);
auto options = prepare_options(sc, tm, get_replication_options());
return data_dictionary::keyspace_metadata::new_keyspace(ks_name, sc,
std::move(options), initial_tablets, get_boolean(KW_DURABLE_WRITES, true), get_storage_options());

View File

@@ -60,7 +60,7 @@ public:
void validate();
std::map<sstring, sstring> get_replication_options() const;
std::optional<sstring> get_replication_strategy_class() const;
std::optional<unsigned> get_initial_tablets(std::optional<unsigned> default_value, bool enforce_tablets = false) const;
std::optional<unsigned> get_initial_tablets(std::optional<unsigned> default_value) const;
data_dictionary::storage_options get_storage_options() const;
bool get_durable_writes() const;
lw_shared_ptr<data_dictionary::keyspace_metadata> as_ks_metadata(sstring ks_name, const locator::token_metadata&, const gms::feature_service&, const db::config&);

View File

@@ -36,7 +36,6 @@
#include "service/pager/query_pagers.hh"
#include "service/storage_proxy.hh"
#include <seastar/core/execution_stage.hh>
#include <seastar/core/on_internal_error.hh>
#include "view_info.hh"
#include "partition_slice_builder.hh"
#include "cql3/untyped_result_set.hh"
@@ -776,6 +775,7 @@ indexed_table_select_statement::do_execute_base_query(
}
next_iteration_size = std::min<size_t>({next_iteration_size, keys.size() - already_done, max_base_table_query_concurrency});
auto key_it_end = key_it + next_iteration_size;
auto command = ::make_lw_shared<query::read_command>(*cmd);
query::result_merger oneshot_merger(cmd->get_row_limit(), query::max_partitions);
coordinator_result<foreign_ptr<lw_shared_ptr<query::result>>> rresult = co_await utils::result_map_reduce(key_it, key_it_end, coroutine::lambda([&] (auto& key)
@@ -1398,40 +1398,20 @@ indexed_table_select_statement::find_index_partition_ranges(query_processor& qp,
using value_type = std::tuple<dht::partition_range_vector, lw_shared_ptr<const service::pager::paging_state>>;
auto now = gc_clock::now();
auto timeout = db::timeout_clock::now() + get_timeout(state.get_client_state(), options);
bool is_paged = options.get_page_size() >= 0;
constexpr size_t MAX_PARTITION_RANGES = 1000;
// Check that `partition_range_vector` won't cause a large allocation (see #18536).
// If this fails, please adjust MAX_PARTITION_RANGES accordingly.
static_assert(MAX_PARTITION_RANGES * sizeof(dht::partition_range_vector::value_type) <= 128 * 1024,
"MAX_PARTITION_RANGES too high - will cause large allocations from partition_range_vector");
size_t max_vector_size = is_paged ? MAX_PARTITION_RANGES : std::numeric_limits<size_t>::max();
std::unique_ptr<query_options> paging_adjusted_options = [&] () {
if (is_paged && _schema->clustering_key_size() == 0 && static_cast<size_t>(options.get_page_size()) > max_vector_size) {
// Result is guaranteed to contain one partition key per row.
// Limit the page size a priori.
auto internal_options = std::make_unique<cql3::query_options>(cql3::query_options(options));
internal_options.reset(new cql3::query_options(std::move(internal_options), options.get_paging_state(), max_vector_size));
return internal_options;
}
return std::unique_ptr<cql3::query_options>(nullptr);
}();
return do_with(std::move(paging_adjusted_options), [&, max_vector_size](auto& paging_adjusted_options) {
// FIXME: Indent.
const query_options& _options = paging_adjusted_options ? *paging_adjusted_options : options;
const uint64_t limit = get_inner_loop_limit(get_limit(options, _limit), _selection->is_aggregate());
return read_posting_list(qp, _options, limit, state, now, timeout, false).then(utils::result_wrap(
[this, &_options, max_vector_size] (::shared_ptr<cql_transport::messages::result_message::rows> rows) {
return read_posting_list(qp, options, limit, state, now, timeout, false).then(utils::result_wrap(
[this, &options] (::shared_ptr<cql_transport::messages::result_message::rows> rows) {
auto rs = cql3::untyped_result_set(rows);
dht::partition_range_vector partition_ranges;
partition_ranges.reserve(std::min(rs.size(), max_vector_size));
partition_ranges.reserve(rs.size());
// We are reading the list of primary keys as rows of a single
// partition (in the index view), so they are sorted in
// lexicographical order (N.B. this is NOT token order!). We need
// to avoid outputting the same partition key twice, but luckily in
// the sorted order, these will be adjacent.
std::optional<dht::decorated_key> last_dk;
if (_options.get_paging_state()) {
auto paging_state = _options.get_paging_state();
if (options.get_paging_state()) {
auto paging_state = options.get_paging_state();
auto base_pk = generate_base_key_from_index_pk<partition_key>(paging_state->get_partition_key(),
paging_state->get_clustering_key(), *_schema, *_view_schema);
last_dk = dht::decorate_key(*_schema, base_pk);
@@ -1454,14 +1434,10 @@ indexed_table_select_statement::find_index_partition_ranges(query_processor& qp,
last_dk = dk;
auto range = dht::partition_range::make_singular(dk);
partition_ranges.emplace_back(range);
if (partition_ranges.size() >= partition_ranges.capacity()) {
break;
}
}
auto paging_state = rows->rs().get_metadata().paging_state();
return make_ready_future<coordinator_result<value_type>>(value_type(std::move(partition_ranges), std::move(paging_state)));
}));
});
}
// Note: the partitions keys returned by this function are sorted
@@ -2029,13 +2005,6 @@ std::unique_ptr<prepared_statement> select_statement::prepare(data_dictionary::d
? selection::selection::wildcard(schema)
: selection::selection::from_selectors(db, schema, keyspace(), levellized_prepared_selectors);
// Cassandra 5.0.2 disallows PER PARTITION LIMIT with aggregate queries
// but only if GROUP BY is not used.
// See #9879 for more details.
if (selection->is_aggregate() && _per_partition_limit && _group_by_columns.empty()) {
throw exceptions::invalid_request_exception("PER PARTITION LIMIT is not allowed with aggregate queries.");
}
auto restrictions = prepare_restrictions(db, schema, ctx, selection, for_view, _parameters->allow_filtering(),
restrictions::check_indexes(!_parameters->is_mutation_fragments()));

View File

@@ -154,7 +154,7 @@ class result_set;
class untyped_result_set {
public:
using row = untyped_result_set_row;
using rows_type = utils::chunked_vector<row>;
using rows_type = std::vector<row>;
using const_iterator = rows_type::const_iterator;
using iterator = rows_type::const_iterator;

View File

@@ -144,29 +144,26 @@ expr::expression where_clause_to_relations(const std::string_view& where_clause,
return do_with_parser(where_clause, d, std::mem_fn(&cql3_parser::CqlParser::whereClause));
}
sstring rename_columns_in_where_clause(const std::string_view& where_clause, std::vector<std::pair<::shared_ptr<column_identifier>, ::shared_ptr<column_identifier>>> renames, dialect d) {
sstring rename_column_in_where_clause(const std::string_view& where_clause, column_identifier::raw from, column_identifier::raw to, dialect d) {
std::vector<expr::expression> relations = boolean_factors(where_clause_to_relations(where_clause, d));
std::vector<expr::expression> new_relations;
new_relations.reserve(relations.size());
for (const expr::expression& old_relation : relations) {
new_relations.emplace_back(
expr::search_and_replace(old_relation,
[&](const expr::expression& e) -> std::optional<expr::expression> {
for (const auto& [view_from, view_to] : renames) {
if (auto ident = expr::as_if<expr::unresolved_identifier>(&e)) {
auto from = column_identifier::raw(view_from->text(), true);
if (*ident->ident == from) {
return expr::unresolved_identifier{
::make_shared<column_identifier::raw>(view_to->text(), true)
};
}
}
expr::expression new_relation = expr::search_and_replace(old_relation,
[&](const expr::expression& e) -> std::optional<expr::expression> {
if (auto ident = expr::as_if<expr::unresolved_identifier>(&e)) {
if (*ident->ident == from) {
return expr::unresolved_identifier{
::make_shared<column_identifier::raw>(to)
};
}
return std::nullopt;
}
)
return std::nullopt;
}
);
new_relations.emplace_back(std::move(new_relation));
}
return relations_to_where_clause(expr::conjunction{std::move(new_relations)});

View File

@@ -40,7 +40,7 @@ sstring relations_to_where_clause(const expr::expression& e);
expr::expression where_clause_to_relations(const std::string_view& where_clause, dialect d);
sstring rename_columns_in_where_clause(const std::string_view& where_clause, std::vector<std::pair<::shared_ptr<column_identifier>, ::shared_ptr<column_identifier>>> renames, dialect d);
sstring rename_column_in_where_clause(const std::string_view& where_clause, column_identifier::raw from, column_identifier::raw to, dialect d);
/// build a CQL "select" statement with the desired parameters.
/// If select_all_columns==true, all columns are selected and the value of

View File

@@ -13,7 +13,6 @@
#include <variant>
#include <seastar/core/sstring.hh>
#include "schema/schema_fwd.hh"
#include "utils/s3/utils/manip_s3.hh"
#include "seastarx.hh"
namespace seastar {
@@ -64,18 +63,6 @@ inline storage_options make_local_options(std::filesystem::path dir) {
return so;
}
inline storage_options make_s3_options(const std::string& endpoint, const std::string& fqn) {
std::string bucket;
std::string object;
s3::s3fqn_to_parts(fqn, bucket, object);
object = std::filesystem::path(object).parent_path().string(); // remove the filename and trailing separator from the path
storage_options so;
so.value = storage_options::s3{.bucket = std::move(bucket), .endpoint = endpoint, .location = std::move(object)};
return so;
}
} // namespace data_dictionary
template <>

View File

@@ -27,7 +27,6 @@ target_sources(db
extensions.cc
heat_load_balance.cc
large_data_handler.cc
corrupt_data_handler.cc
marshal/type_parser.cc
batchlog_manager.cc
tags/utils.cc

View File

@@ -36,7 +36,7 @@
static logging::logger blogger("batchlog_manager");
const std::chrono::seconds db::batchlog_manager::replay_interval;
const uint32_t db::batchlog_manager::replay_interval;
const uint32_t db::batchlog_manager::page_size;
db::batchlog_manager::batchlog_manager(cql3::query_processor& qp, db::system_keyspace& sys_ks, batchlog_manager_config config)
@@ -46,7 +46,6 @@ db::batchlog_manager::batchlog_manager(cql3::query_processor& qp, db::system_key
, _replay_rate(config.replay_rate)
, _delay(config.delay)
, _replay_cleanup_after_replays(config.replay_cleanup_after_replays)
, _gate("batchlog_manager")
, _loop_done(batchlog_replay_loop())
{
namespace sm = seastar::metrics;
@@ -116,8 +115,7 @@ future<> db::batchlog_manager::batchlog_replay_loop() {
} catch (...) {
blogger.error("Exception in batch replay: {}", std::current_exception());
}
delay = utils::get_local_injector().is_enabled("short_batchlog_manager_replay_interval") ?
std::chrono::seconds(1) : replay_interval;
delay = std::chrono::milliseconds(replay_interval);
}
}
@@ -133,8 +131,6 @@ future<> db::batchlog_manager::drain() {
_sem.broken();
}
co_await _qp.proxy().abort_batch_writes();
co_await std::move(_loop_done);
blogger.info("Drained");
}
@@ -176,11 +172,6 @@ future<> db::batchlog_manager::replay_all_failed_batches(post_replay_cleanup cle
return make_ready_future<stop_iteration>(stop_iteration::no);
}
if (utils::get_local_injector().is_enabled("skip_batch_replay")) {
blogger.debug("Skipping batch replay due to skip_batch_replay injection");
return make_ready_future<stop_iteration>(stop_iteration::no);
}
// check version of serialization format
if (!row.has("version")) {
blogger.warn("Skipping logged batch because of unknown version");
@@ -250,8 +241,7 @@ future<> db::batchlog_manager::replay_all_failed_batches(post_replay_cleanup cle
// send to partially or wholly fail in actually sending stuff. Since we don't
// have hints (yet), send with CL=ALL, and hope we can re-do this soon.
// See below, we use retry on write failure.
auto timeout = db::timeout_clock::now() + write_timeout;
return _qp.proxy().send_batchlog_replay_to_all_replicas(std::move(mutations), timeout);
return _qp.proxy().mutate(mutations, db::consistency_level::ALL, db::no_timeout, nullptr, empty_service_permit(), db::allow_per_partition_rate_limit::no);
});
}).then_wrapped([this, id](future<> batch_result) {
try {
@@ -276,7 +266,7 @@ future<> db::batchlog_manager::replay_all_failed_batches(post_replay_cleanup cle
}).then([] { return make_ready_future<stop_iteration>(stop_iteration::no); });
};
co_await with_gate(_gate, [this, cleanup, batch = std::move(batch)] () mutable -> future<> {
co_await seastar::with_gate(_gate, [this, cleanup, batch = std::move(batch)] () mutable -> future<> {
blogger.debug("Started replayAllFailedBatches (cpu {})", this_shard_id());
co_await utils::get_local_injector().inject("add_delay_to_batch_replay", std::chrono::milliseconds(1000));
co_await _qp.query_internal(

View File

@@ -43,9 +43,8 @@ public:
using post_replay_cleanup = bool_class<class post_replay_cleanup_tag>;
private:
static constexpr std::chrono::seconds replay_interval = std::chrono::seconds(60);
static constexpr uint32_t replay_interval = 60 * 1000; // milliseconds
static constexpr uint32_t page_size = 128; // same as HHOM, for now, w/out using any heuristics. TODO: set based on avg batch size.
static constexpr std::chrono::seconds write_timeout = std::chrono::seconds(300);
using clock_type = lowres_clock;
@@ -63,7 +62,7 @@ private:
std::chrono::milliseconds _delay;
unsigned _replay_cleanup_after_replays = 100;
semaphore _sem{1};
seastar::named_gate _gate;
seastar::gate _gate;
unsigned _cpu = 0;
seastar::abort_source _stop;
future<> _loop_done;

View File

@@ -16,7 +16,7 @@
#include "query-request.hh"
#include "partition_snapshot_row_cursor.hh"
#include "read_context.hh"
#include "readers/delegating.hh"
#include "readers/delegating_v2.hh"
#include "clustering_key_filter.hh"
namespace cache {
@@ -123,9 +123,6 @@ class cache_mutation_reader final : public mutation_reader::impl {
gc_clock::time_point _read_time;
gc_clock::time_point _gc_before;
api::timestamp_type _max_purgeable_timestamp = api::missing_timestamp;
api::timestamp_type _max_purgeable_timestamp_shadowable = api::missing_timestamp;
future<> do_fill_buffer();
future<> ensure_underlying();
void copy_from_cache_to_buffer();
@@ -210,11 +207,6 @@ class cache_mutation_reader final : public mutation_reader::impl {
return gc_clock::time_point::min();
}
bool can_gc(tombstone t, is_shadowable is) const {
const auto max_purgeable = is ? _max_purgeable_timestamp_shadowable : _max_purgeable_timestamp;
return t.timestamp < max_purgeable;
}
public:
cache_mutation_reader(schema_ptr s,
dht::decorated_key dk,
@@ -236,19 +228,8 @@ public:
, _read_time(get_read_time())
, _gc_before(get_gc_before(*_schema, dk, _read_time))
{
_max_purgeable_timestamp = ctx.get_max_purgeable(dk, is_shadowable::no);
_max_purgeable_timestamp_shadowable = ctx.get_max_purgeable(dk, is_shadowable::yes);
clogger.trace("csm {}: table={}.{}, dk={}, gc-before={}, max-purgeable-regular={}, max-purgeable-shadowable={}, reversed={}, snap={}",
fmt::ptr(this),
_schema->ks_name(),
_schema->cf_name(),
dk,
_gc_before,
_max_purgeable_timestamp,
_max_purgeable_timestamp_shadowable,
_read_context.is_reversed(),
fmt::ptr(&*_snp));
clogger.trace("csm {}: table={}.{}, reversed={}, snap={}", fmt::ptr(this), _schema->ks_name(), _schema->cf_name(), _read_context.is_reversed(),
fmt::ptr(&*_snp));
push_mutation_fragment(*_schema, _permit, partition_start(std::move(dk), _snp->partition_tombstone()));
}
cache_mutation_reader(schema_ptr s,
@@ -806,12 +787,12 @@ void cache_mutation_reader::copy_from_cache_to_buffer() {
t.apply(range_tomb);
auto row_tomb_expired = [&](row_tombstone tomb) {
return (tomb && tomb.max_deletion_time() < _gc_before && can_gc(tomb.tomb(), tomb.is_shadowable()));
return (tomb && tomb.max_deletion_time() < _gc_before);
};
auto is_row_dead = [&](const deletable_row& row) {
auto& m = row.marker();
return (!m.is_missing() && m.is_dead(_read_time) && m.deletion_time() < _gc_before && can_gc(tombstone(m.timestamp(), m.deletion_time()), is_shadowable::no));
return (!m.is_missing() && m.is_dead(_read_time) && m.deletion_time() < _gc_before);
};
if (row_tomb_expired(t) || is_row_dead(row)) {
@@ -819,11 +800,9 @@ void cache_mutation_reader::copy_from_cache_to_buffer() {
_read_context.cache()._tracker.on_row_compacted();
auto mutation_can_gc = can_gc_fn([this] (tombstone t, is_shadowable is) { return can_gc(t, is); });
with_allocator(_snp->region().allocator(), [&] {
deletable_row row_copy(row_schema, row);
row_copy.compact_and_expire(row_schema, t.tomb(), _read_time, mutation_can_gc, _gc_before, nullptr);
row_copy.compact_and_expire(row_schema, t.tomb(), _read_time, always_gc, _gc_before, nullptr);
std::swap(row, row_copy);
});
remove_row = row.empty();

View File

@@ -519,7 +519,7 @@ private:
future<> replenish_reserve();
future<> _reserve_replenisher;
future<> _background_sync;
seastar::named_gate _gate;
seastar::gate _gate;
uint64_t _new_counter = 0;
uint32_t _frag_id_counter = 0;
std::optional<size_t> _disk_write_alignment;
@@ -1915,7 +1915,6 @@ db::commitlog::segment_manager::segment_manager(config c)
, _recycled_segments(std::numeric_limits<size_t>::max())
, _reserve_replenisher(make_ready_future<>())
, _background_sync(make_ready_future<>())
, _gate(format("commitlog::segment_manager::{}", cfg.commit_log_location))
{
SCYLLA_ASSERT(max_size > 0);
SCYLLA_ASSERT(max_mutation_size < segment::multi_entry_size_magic);
@@ -2802,8 +2801,6 @@ future<> db::commitlog::segment_manager::do_pending_deletes() {
promise<> deleting_done;
auto pending_deletes = std::exchange(_pending_deletes, deleting_done.get_future());
co_await std::move(pending_deletes);
std::exception_ptr recycle_error;
auto exts = cfg.extensions;
@@ -2874,6 +2871,7 @@ future<> db::commitlog::segment_manager::do_pending_deletes() {
if (recycle_error && _recycled_segments.empty()) {
abort_recycled_list(recycle_error);
}
co_await std::move(pending_deletes);
deleting_done.set_value();
}

View File

@@ -76,22 +76,6 @@ hinted_handoff_enabled_to_json(const db::config::hinted_handoff_enabled_type& h)
return value_to_json(h.to_configuration_string());
}
static
json::json_return_type
object_storage_endpoints_to_json(const std::vector<db::object_storage_endpoint_param> &endpoints) {
std::unordered_map<sstring, sstring> m;
for (auto& e : endpoints) {
m[e.endpoint] = e.to_json_string();
}
return value_to_json(m);
}
static
json::json_return_type
uuid_to_json(const db::config::UUID& uuid) {
return value_to_json(format("{}", uuid));
}
// Convert a value that can be printed with fmt::format, or a vector of
// such values, to JSON. An example is enum_option<T>, because enum_option<T>
// has a specialization for fmt::formatter.
@@ -261,13 +245,6 @@ const config_type& config_type_for<enum_option<db::tri_mode_restriction_t>>() {
return ct;
}
template <>
const config_type& config_type_for<enum_option<db::tablets_mode_t>>() {
static config_type ct(
"tablets mode", printable_to_json<enum_option<db::tablets_mode_t>>);
return ct;
}
template <>
const config_type& config_type_for<db::config::hinted_handoff_enabled_type>() {
static config_type ct("hinted handoff enabled", hinted_handoff_enabled_to_json);
@@ -294,18 +271,6 @@ const config_type& config_type_for<utils::advanced_rpc_compressor::tracker::algo
return ct;
}
template <>
const config_type& config_type_for<std::vector<db::object_storage_endpoint_param>>() {
static config_type ct("object storage endpoint configuration", object_storage_endpoints_to_json);
return ct;
}
template <>
const config_type& config_type_for<db::config::UUID>() {
static config_type ct("UUID", uuid_to_json);
return ct;
}
}
namespace YAML {
@@ -414,23 +379,6 @@ public:
}
};
template <>
class convert<enum_option<db::tablets_mode_t>> {
public:
static bool decode(const Node& node, enum_option<db::tablets_mode_t>& rhs) {
std::string name;
if (!convert<std::string>::decode(node, name)) {
return false;
}
try {
std::istringstream(name) >> rhs;
} catch (boost::program_options::invalid_option_value&) {
return false;
}
return true;
}
};
template<>
struct convert<db::config::error_injection_at_startup> {
static bool decode(const Node& node, db::config::error_injection_at_startup& rhs) {
@@ -491,34 +439,6 @@ public:
}
};
template<>
struct convert<db::object_storage_endpoint_param> {
static bool decode(const Node& node, db::object_storage_endpoint_param& ep) {
ep.endpoint = node["name"].as<std::string>();
ep.config.port = node["port"].as<unsigned>();
ep.config.use_https = node["https"].as<bool>(false);
ep.config.region = node["aws_region"] ? node["aws_region"].as<std::string>() : std::getenv("AWS_DEFAULT_REGION");
ep.config.role_arn = node["iam_role_arn"] ? node["iam_role_arn"].as<std::string>() : "";
return true;
}
};
template<>
struct convert<utils::UUID> {
static bool decode(const Node& node, utils::UUID& uuid) {
std::string uuid_string;
if (!convert<std::string>::decode(node, uuid_string)) {
return false;
}
try {
std::istringstream(uuid_string) >> uuid;
} catch (boost::program_options::invalid_option_value&) {
return false;
}
return true;
}
};
}
#if defined(DEBUG)
@@ -623,9 +543,6 @@ db::config::config(std::shared_ptr<db::extensions> exts)
"The directory where the schema commit log is stored. This is a special commitlog instance used for schema and system tables. For optimal write performance, it is recommended the commit log be on a separate disk partition (ideally, a separate physical device) from the data file directories.")
, data_file_directories(this, "data_file_directories", "datadir", value_status::Used, { },
"The directory location where table data (SSTables) is stored.")
, data_file_capacity(this, "data_file_capacity", liveness::LiveUpdate, value_status::Used, 0,
"Total capacity in bytes for storing data files. Used by tablet load balancer to compute storage utilization."
" If not set, will use file system's capacity.")
, hints_directory(this, "hints_directory", value_status::Used, "",
"The directory where hints files are stored if hinted handoff is enabled.")
, view_hints_directory(this, "view_hints_directory", value_status::Used, "",
@@ -847,7 +764,7 @@ db::config::config(std::shared_ptr<db::extensions> exts)
, inter_dc_stream_throughput_outbound_megabits_per_sec(this, "inter_dc_stream_throughput_outbound_megabits_per_sec", value_status::Unused, 0,
"Throttles all streaming file transfer between the data centers. This setting allows throttles streaming throughput betweens data centers in addition to throttling all network stream traffic as configured with stream_throughput_outbound_megabits_per_sec.")
, stream_io_throughput_mb_per_sec(this, "stream_io_throughput_mb_per_sec", liveness::LiveUpdate, value_status::Used, 0,
"Throttles streaming I/O to the specified total throughput (in MiBs/s) across the entire system. Streaming I/O includes the one performed by repair and both RBNO and legacy topology operations such as adding or removing a node. Setting the value to 0 disables stream throttling. It is recommended to set the value for this parameter to be 75% of network bandwidth")
"Throttles streaming I/O to the specified total throughput (in MiBs/s) across the entire system. Streaming I/O includes the one performed by repair and both RBNO and legacy topology operations such as adding or removing a node. Setting the value to 0 disables stream throttling.")
, stream_plan_ranges_fraction(this, "stream_plan_ranges_fraction", liveness::LiveUpdate, value_status::Used, 0.1,
"Specify the fraction of ranges to stream in a single stream plan. Value is between 0 and 1.")
, enable_file_stream(this, "enable_file_stream", liveness::LiveUpdate, value_status::Used, true, "Set true to use file based stream for tablet instead of mutation based stream")
@@ -970,8 +887,6 @@ db::config::config(std::shared_ptr<db::extensions> exts)
"The default timeout for other, miscellaneous operations.\n"
"\n"
"Related information: About hinted handoff writes")
, group0_raft_op_timeout_in_ms(this, "group0_raft_op_timeout_in_ms", liveness::LiveUpdate, value_status::Used, 60000,
"The time in milliseconds that group0 allows a Raft operation to complete.")
/**
* @Group Inter-node settings
*/
@@ -1271,28 +1186,8 @@ db::config::config(std::shared_ptr<db::extensions> exts)
, enable_sstables_mc_format(this, "enable_sstables_mc_format", value_status::Unused, true, "Enable SSTables 'mc' format to be used as the default file format. Deprecated, please use \"sstable_format\" instead.")
, enable_sstables_md_format(this, "enable_sstables_md_format", value_status::Unused, true, "Enable SSTables 'md' format to be used as the default file format. Deprecated, please use \"sstable_format\" instead.")
, sstable_format(this, "sstable_format", value_status::Used, "me", "Default sstable file format", {"md", "me"})
, sstable_compression_dictionaries_allow_in_ddl(this, "sstable_compression_dictionaries_allow_in_ddl", liveness::LiveUpdate, value_status::Used, true,
"Allows for configuring tables to use SSTable compression with shared dictionaries. "
"If the option is disabled, Scylla will reject CREATE and ALTER statements which try to set dictionary-based sstable compressors.\n"
"This is only enforced when this node validates a new DDL statement; disabling the option won't disable dictionary-based compression "
"on tables which already have it configured, and won't do anything to existing sstables.\n"
"To affect existing tables, you can ALTER them to a non-dictionary compressor, or disable dictionary compression "
"for the whole node through `sstable_compression_dictionaries_enable_writing`.")
, sstable_compression_dictionaries_enable_writing(this, "sstable_compression_dictionaries_enable_writing", liveness::LiveUpdate, value_status::Used, true,
"Enables SSTable compression with shared dictionaries (for tables which opt in). If set to false, this node won't write any new SSTables using dictionary compression.\n"
"Option meant not for regular usage, but for unforeseen problems that call for disabling dictionaries without modifying table schema.")
, sstable_compression_dictionaries_memory_budget_fraction(this, "sstable_compression_dictionaries_memory_budget_fraction", liveness::LiveUpdate, value_status::Used, 0.01,
"Fall back to compression without dictionaries if RAM usage by dictionaries is greater or equal to this fraction of the shard's memory.")
, sstable_compression_dictionaries_retrain_period_in_seconds(this, "sstable_compression_dictionaries_retrain_period_in_seconds", liveness::LiveUpdate, value_status::Used, 86400,
"Minimum age of the current compression dictionary before another dictionary for this table is trained.")
, sstable_compression_dictionaries_autotrainer_tick_period_in_seconds(this, "sstable_compression_dictionaries_autotrainer_tick_period_in_seconds", liveness::LiveUpdate, value_status::Used, 900,
"The period with which automatic dictionary training is attempted.")
, sstable_compression_dictionaries_min_training_dataset_bytes(this, "sstable_compression_dictionaries_min_training_dataset_bytes", liveness::LiveUpdate, value_status::Used, 1*1024*1024*1024,
"The minimum size a table has to reach before dictionaries will be trained for it.")
, sstable_compression_dictionaries_min_training_improvement_factor(this, "sstable_compression_dictionaries_min_training_improvement_factor", liveness::LiveUpdate, value_status::Used, 0.95,
"New dictionaries will be only published if the estimated compression ratio is smaller than current ratio multiplied by this factor.")
, uuid_sstable_identifiers_enabled(this,
"uuid_sstable_identifiers_enabled", value_status::Unused, true, "If set to true, each newly created sstable will have a UUID "
"uuid_sstable_identifiers_enabled", liveness::LiveUpdate, value_status::Used, true, "If set to true, each newly created sstable will have a UUID "
"based generation identifier, and such files are not readable by previous Scylla versions.")
, table_digest_insensitive_to_expiry(this, "table_digest_insensitive_to_expiry", liveness::MustRestart, value_status::Used, true,
"When enabled, per-table schema digest calculation ignores empty partitions.")
@@ -1343,8 +1238,6 @@ db::config::config(std::shared_ptr<db::extensions> exts)
"Time period in seconds after which unused schema versions will be evicted from the local schema registry cache. Default is 1 second.")
, max_concurrent_requests_per_shard(this, "max_concurrent_requests_per_shard", liveness::LiveUpdate, value_status::Used, std::numeric_limits<uint32_t>::max(),
"Maximum number of concurrent requests a single shard can handle before it starts shedding extra load. By default, no requests will be shed.")
, uninitialized_connections_semaphore_cpu_concurrency(this, "uninitialized_connections_semaphore_cpu_concurrency", liveness::LiveUpdate, value_status::Used, 8,
"Maximum number of new concurrent connections from drivers that a single shard can be processing before it starts throttling incoming connections. This limit applies only to new connections excluding the ones blocked on network IO; connections that are ready to serve requests are not affected. By default the limit is 8.")
, cdc_dont_rewrite_streams(this, "cdc_dont_rewrite_streams", value_status::Used, false,
"Disable rewriting streams from cdc_streams_descriptions to cdc_streams_descriptions_v2. Should not be necessary, but the procedure is expensive and prone to failures; this config option is left as a backdoor in case some user requires manual intervention.")
, strict_allow_filtering(this, "strict_allow_filtering", liveness::LiveUpdate, value_status::Used, strict_allow_filtering_default(), "Match Cassandra in requiring ALLOW FILTERING on slow queries. Can be true, false, or warn. When false, Scylla accepts some slow queries even without ALLOW FILTERING that Cassandra rejects. Warn is same as false, but with warning.")
@@ -1386,9 +1279,6 @@ db::config::config(std::shared_ptr<db::extensions> exts)
"the same endpoint used in the request. The string 'disabled' "
"disables the DescribeEndpoints operation. Any other string is the "
"fixed value that will be returned by DescribeEndpoints operations.")
// alternator_max_items_in_batch_write matches DynamoDB behaviour of size limit, but with different value - for DynamoDB it's 25
// (see DynamoDB's documentation for BatchWriteItem command)
, alternator_max_items_in_batch_write(this, "alternator_max_items_in_batch_write", value_status::Used, 100, "Maximum amount of items in single BatchItemWrite call.")
, abort_on_ebadf(this, "abort_on_ebadf", value_status::Used, true, "Abort the server on incorrect file descriptor access. Throws exception when disabled.")
, redis_port(this, "redis_port", value_status::Used, 0, "Port on which the REDIS transport listens for clients.")
, redis_ssl_port(this, "redis_ssl_port", value_status::Used, 0, "Port on which the REDIS TLS native transport listens for clients.")
@@ -1427,7 +1317,6 @@ db::config::config(std::shared_ptr<db::extensions> exts)
"The maximum fraction of cache memory permitted for use by index cache. Clamped to the [0.0; 1.0] range. Must be small enough to not deprive the row cache of memory, but should be big enough to fit a large fraction of the index. The default value 0.2 means that at least 80\% of cache memory is reserved for the row cache, while at most 20\% is usable by the index cache.")
, consistent_cluster_management(this, "consistent_cluster_management", value_status::Deprecated, true, "Use RAFT for cluster management and DDL.")
, force_gossip_topology_changes(this, "force_gossip_topology_changes", value_status::Used, false, "Force gossip-based topology operations in a fresh cluster. Only the first node in the cluster must use it. The rest will fall back to gossip-based operations anyway. This option should be used only for testing. Note: gossip topology changes are incompatible with tablets.")
, recovery_leader(this, "recovery_leader", liveness::LiveUpdate, value_status::Used, utils::null_uuid(), "Host ID of the node restarted first while performing the Manual Raft-based Recovery Procedure. Warning: this option disables some guardrails for the needs of the Manual Raft-based Recovery Procedure. Make sure you unset it at the end of the procedure.")
, wasm_cache_memory_fraction(this, "wasm_cache_memory_fraction", value_status::Used, 0.01, "Maximum total size of all WASM instances stored in the cache as fraction of total shard memory.")
, wasm_cache_timeout_in_ms(this, "wasm_cache_timeout_in_ms", value_status::Used, 5000, "Time after which an instance is evicted from the cache.")
, wasm_cache_instance_size_limit(this, "wasm_cache_instance_size_limit", value_status::Used, 1024*1024, "Instances with size above this limit will not be stored in the cache.")
@@ -1435,6 +1324,7 @@ db::config::config(std::shared_ptr<db::extensions> exts)
, wasm_udf_total_fuel(this, "wasm_udf_total_fuel", value_status::Used, 100000000, "Wasmtime fuel a WASM UDF can consume before termination.")
, wasm_udf_memory_limit(this, "wasm_udf_memory_limit", value_status::Used, 2*1024*1024, "How much memory each WASM UDF can allocate at most.")
, relabel_config_file(this, "relabel_config_file", value_status::Used, "", "Optionally, read relabel config from file.")
, object_storage_config_file(this, "object_storage_config_file", value_status::Used, "", "Optionally, read object-storage endpoints config from file.")
, live_updatable_config_params_changeable_via_cql(this, "live_updatable_config_params_changeable_via_cql", liveness::MustRestart, value_status::Used, true, "If set to true, configuration parameters defined with LiveUpdate can be updated in runtime via CQL (by updating system.config virtual table), otherwise they can't.")
, auth_superuser_name(this, "auth_superuser_name", value_status::Used, "",
"Initial authentication super username. Ignored if authentication tables already contain a super user.")
@@ -1475,14 +1365,10 @@ db::config::config(std::shared_ptr<db::extensions> exts)
, ldap_bind_dn(this, "ldap_bind_dn", value_status::Used, "", "Distinguished name used by LDAPRoleManager for binding to LDAP server.")
, ldap_bind_passwd(this, "ldap_bind_passwd", value_status::Used, "", "Password used by LDAPRoleManager for binding to LDAP server.")
, saslauthd_socket_path(this, "saslauthd_socket_path", value_status::Used, "", "UNIX domain socket on which saslauthd is listening.")
, object_storage_endpoints(this, "object_storage_endpoints", liveness::LiveUpdate, value_status::Used, {}, "Object storage endpoints configuration.")
, error_injections_at_startup(this, "error_injections_at_startup", error_injection_value_status, {}, "List of error injections that should be enabled on startup.")
, topology_barrier_stall_detector_threshold_seconds(this, "topology_barrier_stall_detector_threshold_seconds", value_status::Used, 2, "Report sites blocking topology barrier if it takes longer than this.")
, enable_tablets(this, "enable_tablets", value_status::Used, false, "Enable tablets for newly created keyspaces. (deprecated)")
, tablets_mode_for_new_keyspaces(this, "tablets_mode_for_new_keyspaces", value_status::Used, tablets_mode_t::mode::unset, "Control tablets for new keyspaces. Can be set to the following values:\n"
"\tdisabled: New keyspaces use vnodes by default, unless enabled by the tablets={'enabled':true} option\n"
"\tenabled: New keyspaces use tablets by default, unless disabled by the tablets={'disabled':true} option\n"
"\tenforced: New keyspaces must use tablets. Tablets cannot be disabled using the CREATE KEYSPACE option")
, enable_tablets(this, "enable_tablets", value_status::Used, false, "Enable tablets for newly created keyspaces.")
, view_flow_control_delay_limit_in_ms(this, "view_flow_control_delay_limit_in_ms", liveness::LiveUpdate, value_status::Used, 1000,
"The maximal amount of time that materialized-view update flow control may delay responses "
"to try to slow down the client and prevent buildup of unfinished view updates. "
@@ -1492,9 +1378,6 @@ db::config::config(std::shared_ptr<db::extensions> exts)
, disk_space_monitor_high_polling_interval_in_seconds(this, "disk_space_monitor_high_polling_interval_in_seconds", value_status::Used, 1, "Disk-space polling interval at or above polling threshold")
, disk_space_monitor_polling_interval_threshold(this, "disk_space_monitor_polling_interval_threshold", value_status::Used, 0.9, "Disk-space polling threshold. Polling interval is increased when disk utilization is greater than or equal to this threshold")
, enable_create_table_with_compact_storage(this, "enable_create_table_with_compact_storage", liveness::LiveUpdate, value_status::Used, false, "Enable the deprecated feature of CREATE TABLE WITH COMPACT STORAGE. This feature will eventually be removed in a future version.")
, rf_rack_valid_keyspaces(this, "rf_rack_valid_keyspaces", liveness::MustRestart, value_status::Used, false,
"Enforce RF-rack-valid keyspaces. Additionally, if there are existing RF-rack-invalid "
"keyspaces, attempting to start a node with this option ON will fail.")
, default_log_level(this, "default_log_level", value_status::Used, seastar::log_level::info, "Default log level for log messages")
, logger_log_level(this, "logger_log_level", value_status::Used, {}, "Map of logger name to log level. Valid log levels are 'error', 'warn', 'info', 'debug' and 'trace'")
, log_to_stdout(this, "log_to_stdout", value_status::Used, true, "Send log output to stdout")
@@ -1591,14 +1474,6 @@ std::istream& operator>>(std::istream& is, error_injection_at_startup& eias) {
return is;
}
std::istream& operator>>(std::istream& is, object_storage_endpoint_param& f) {
// FIXME -- this operator is used, in particular, by boost lexical_cast<>
// it's here just to make the code compile, but it's not yet called for real
throw std::runtime_error("reading object_storage_endpoint_param from istream is not implemented");
return is;
}
}
auto fmt::formatter<db::error_injection_at_startup>::format(const db::error_injection_at_startup& eias, fmt::format_context& ctx) const
@@ -1607,11 +1482,6 @@ auto fmt::formatter<db::error_injection_at_startup>::format(const db::error_inje
eias.name, eias.one_shot, eias.parameters);
}
auto fmt::formatter<db::object_storage_endpoint_param>::format(const db::object_storage_endpoint_param& e, fmt::format_context& ctx) const
-> decltype(ctx.out()) {
return fmt::format_to(ctx.out(), "object_storage_endpoint_param{{}}", e.to_json_string());
}
namespace utils {
template<>
@@ -1735,16 +1605,6 @@ std::unordered_map<sstring, db::tri_mode_restriction_t::mode> db::tri_mode_restr
{"warn", db::tri_mode_restriction_t::mode::WARN}};
}
std::unordered_map<sstring, db::tablets_mode_t::mode> db::tablets_mode_t::map() {
return {{"disabled", db::tablets_mode_t::mode::disabled},
{"0", db::tablets_mode_t::mode::disabled},
{"enabled", db::tablets_mode_t::mode::enabled},
{"1", db::tablets_mode_t::mode::enabled},
{"enforced", db::tablets_mode_t::mode::enforced},
{"2", db::tablets_mode_t::mode::enforced}
};
}
template struct utils::config_file::named_value<seastar::log_level>;
namespace utils {

View File

@@ -21,6 +21,7 @@
#include "utils/enum_option.hh"
#include "gms/inet_address.hh"
#include "db/hints/host_filter.hh"
#include "utils/updateable_value.hh"
#include "utils/s3/creds.hh"
#include "utils/error_injection.hh"
#include "utils/dict_trainer.hh"
@@ -85,23 +86,6 @@ struct error_injection_at_startup {
std::istream& operator>>(std::istream& is, error_injection_at_startup&);
struct object_storage_endpoint_param {
sstring endpoint;
s3::endpoint_config config;
bool operator==(const object_storage_endpoint_param& other) const {
return endpoint == other.endpoint && config == other.config;
}
sstring to_json_string() const {
return fmt::format("{{ \"port\": {}, \"use_https\": {}, \"aws_region\": \"{}\", \"iam_role_arn\": \"{}\" }}",
config.port, config.use_https, config.region, config.role_arn);
}
friend fmt::formatter<object_storage_endpoint_param>;
};
std::istream& operator>>(std::istream& is, object_storage_endpoint_param& f);
}
template<>
@@ -110,11 +94,6 @@ struct fmt::formatter<db::error_injection_at_startup> {
auto format(const db::error_injection_at_startup&, fmt::format_context& ctx) const -> decltype(ctx.out());
};
template <>
struct fmt::formatter<db::object_storage_endpoint_param> : fmt::formatter<sstring_view> {
auto format(const db::object_storage_endpoint_param&, fmt::format_context& ctx) const -> decltype(ctx.out());
};
namespace utils {
sstring config_value_as_json(const db::seed_provider_type& v);
@@ -156,20 +135,6 @@ struct replication_strategy_restriction_t {
constexpr unsigned default_murmur3_partitioner_ignore_msb_bits = 12;
struct tablets_mode_t {
// The `unset` mode is used internally for backward compatibility
// with the legacy `enable_tablets` option.
// It is defined as -1 as existing test code associates the value
// 0 with `false` and 1 with `true` when read from system.config.
enum class mode : int8_t {
unset = -1,
disabled = 0,
enabled = 1,
enforced = 2
};
static std::unordered_map<sstring, mode> map(); // for enum_option<>
};
class config final : public utils::config_file {
public:
config();
@@ -207,7 +172,6 @@ public:
using seed_provider_type = db::seed_provider_type;
using hinted_handoff_enabled_type = db::hints::host_filter;
using error_injection_at_startup = db::error_injection_at_startup;
using UUID = utils::UUID;
/*
* All values and documentation taken from
@@ -227,7 +191,6 @@ public:
named_value<sstring> commitlog_directory;
named_value<sstring> schema_commitlog_directory;
named_value<string_list> data_file_directories;
named_value<uint64_t> data_file_capacity;
named_value<sstring> hints_directory;
named_value<sstring> view_hints_directory;
named_value<sstring> saved_caches_directory;
@@ -323,7 +286,6 @@ public:
named_value<uint32_t> truncate_request_timeout_in_ms;
named_value<uint32_t> write_request_timeout_in_ms;
named_value<uint32_t> request_timeout_in_ms;
named_value<uint32_t> group0_raft_op_timeout_in_ms;
named_value<bool> cross_node_timeout;
named_value<uint32_t> internode_send_buff_size_in_bytes;
named_value<uint32_t> internode_recv_buff_size_in_bytes;
@@ -437,13 +399,6 @@ public:
named_value<bool> enable_sstables_mc_format;
named_value<bool> enable_sstables_md_format;
named_value<sstring> sstable_format;
named_value<bool> sstable_compression_dictionaries_allow_in_ddl;
named_value<bool> sstable_compression_dictionaries_enable_writing;
named_value<float> sstable_compression_dictionaries_memory_budget_fraction;
named_value<float> sstable_compression_dictionaries_retrain_period_in_seconds;
named_value<float> sstable_compression_dictionaries_autotrainer_tick_period_in_seconds;
named_value<uint64_t> sstable_compression_dictionaries_min_training_dataset_bytes;
named_value<float> sstable_compression_dictionaries_min_training_improvement_factor;
named_value<bool> uuid_sstable_identifiers_enabled;
named_value<bool> table_digest_insensitive_to_expiry;
named_value<bool> enable_dangerous_direct_import_of_cassandra_counters;
@@ -470,7 +425,6 @@ public:
named_value<unsigned> user_defined_function_contiguous_allocation_limit_bytes;
named_value<uint32_t> schema_registry_grace_period;
named_value<uint32_t> max_concurrent_requests_per_shard;
named_value<uint32_t> uninitialized_connections_semaphore_cpu_concurrency;
named_value<bool> cdc_dont_rewrite_streams;
named_value<tri_mode_restriction> strict_allow_filtering;
named_value<tri_mode_restriction> strict_is_not_null_in_views;
@@ -487,7 +441,6 @@ public:
named_value<uint32_t> alternator_timeout_in_ms;
named_value<double> alternator_ttl_period_in_seconds;
named_value<sstring> alternator_describe_endpoints;
named_value<uint32_t> alternator_max_items_in_batch_write;
named_value<bool> abort_on_ebadf;
@@ -522,7 +475,6 @@ public:
named_value<bool> consistent_cluster_management;
named_value<bool> force_gossip_topology_changes;
named_value<UUID> recovery_leader;
named_value<double> wasm_cache_memory_fraction;
named_value<uint32_t> wasm_cache_timeout_in_ms;
@@ -531,6 +483,7 @@ public:
named_value<uint64_t> wasm_udf_total_fuel;
named_value<size_t> wasm_udf_memory_limit;
named_value<sstring> relabel_config_file;
named_value<sstring> object_storage_config_file;
// wasm_udf_reserved_memory is static because the options in db::config
// are parsed using seastar::app_template, while this option is used for
// configuring the Seastar memory subsystem.
@@ -578,28 +531,11 @@ public:
const db::extensions& extensions() const;
named_value<std::vector<object_storage_endpoint_param>> object_storage_endpoints;
utils::updateable_value_source<std::unordered_map<sstring, s3::endpoint_config>> object_storage_config;
named_value<std::vector<error_injection_at_startup>> error_injections_at_startup;
named_value<double> topology_barrier_stall_detector_threshold_seconds;
named_value<bool> enable_tablets;
named_value<enum_option<tablets_mode_t>> tablets_mode_for_new_keyspaces;
bool enable_tablets_by_default() const noexcept {
switch (tablets_mode_for_new_keyspaces()) {
case tablets_mode_t::mode::unset:
return enable_tablets();
case tablets_mode_t::mode::disabled:
return false;
case tablets_mode_t::mode::enabled:
case tablets_mode_t::mode::enforced:
return true;
}
}
bool enforce_tablets() const noexcept {
return tablets_mode_for_new_keyspaces() == tablets_mode_t::mode::enforced;
}
named_value<uint32_t> view_flow_control_delay_limit_in_ms;
named_value<int> disk_space_monitor_normal_polling_interval_in_seconds;
@@ -608,8 +544,6 @@ public:
named_value<bool> enable_create_table_with_compact_storage;
named_value<bool> rf_rack_valid_keyspaces;
static const sstring default_tls_priority;
private:
template<typename T>

View File

@@ -1,134 +0,0 @@
/*
* Copyright (C) 2025-present ScyllaDB
*/
/*
* SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
*/
#include "db/corrupt_data_handler.hh"
#include "reader_concurrency_semaphore.hh"
#include "replica/database.hh"
#include "utils/UUID_gen.hh"
static logging::logger corrupt_data_logger("corrupt_data");
namespace sm = seastar::metrics;
namespace db {
corrupt_data_handler::corrupt_data_handler(register_metrics rm) {
if (rm) {
_metrics.add_group("corrupt_data", {
sm::make_counter("entries_reported", _stats.corrupt_data_reported,
sm::description("Counts the number of corrupt data instances reported to the corrupt data handler. "
"A non-zero value indicates that the database suffered data corruption."))
});
}
}
future<corrupt_data_handler::entry_id> corrupt_data_handler::record_corrupt_clustering_row(const schema& s, const partition_key& pk,
clustering_row cr, sstring origin, std::optional<sstring> sstable_name) {
++_stats.corrupt_data_reported;
++_stats.corrupt_clustering_rows_reported;
return do_record_corrupt_clustering_row(s, pk, std::move(cr), std::move(origin), std::move(sstable_name)).then([this] (entry_id id) {
if (id) {
++_stats.corrupt_data_recorded;
++_stats.corrupt_clustering_rows_recorded;
}
return id;
});
}
system_table_corrupt_data_handler::system_table_corrupt_data_handler(config cfg, register_metrics rm)
: corrupt_data_handler(rm)
, _entry_ttl(cfg.entry_ttl)
, _sys_ks("system_table_corrupt_data_handler::system_keyspace")
{
}
system_table_corrupt_data_handler::~system_table_corrupt_data_handler() {
}
reader_permit system_table_corrupt_data_handler::make_fragment_permit(const schema& s) {
return _fragment_semaphore->make_tracking_only_permit(s.shared_from_this(), "system_table_corrupt_data_handler::make_fragment_permit", db::no_timeout, {});
}
future<corrupt_data_handler::entry_id> system_table_corrupt_data_handler::do_record_corrupt_mutation_fragment(
pluggable_system_keyspace::permit sys_ks,
const schema& user_table_schema,
const partition_key& pk,
const clustering_key& ck,
mutation_fragment_v2::kind kind,
frozen_mutation_fragment_v2 fmf,
sstring origin,
std::optional<sstring> sstable_name) {
const corrupt_data_handler::entry_id id{utils::UUID_gen::get_time_UUID()};
const auto corrupt_data_schema = sys_ks->local_db().find_column_family(system_keyspace::NAME, system_keyspace::CORRUPT_DATA).schema();
// Using the lower-level mutation API to avoid large allocation warnings when linearizing the frozen mutation fragment.
mutation entry_mutation(corrupt_data_schema, partition_key::from_exploded(*corrupt_data_schema, {serialized(user_table_schema.ks_name()), serialized(user_table_schema.cf_name())}));
auto& entry_row = entry_mutation.partition().clustered_row(*corrupt_data_schema, clustering_key::from_single_value(*corrupt_data_schema, serialized(timeuuid_native_type{id.uuid()})));
const auto timestamp = api::new_timestamp();
auto set_cell_raw = [this, &entry_row, &corrupt_data_schema, timestamp] (const char* cell_name, managed_bytes cell_value) {
auto cdef = corrupt_data_schema->get_column_definition(cell_name);
SCYLLA_ASSERT(cdef);
entry_row.cells().apply(*cdef, atomic_cell::make_live(*cdef->type, timestamp, cell_value, _entry_ttl));
};
auto set_cell = [this, &entry_row, &corrupt_data_schema, timestamp] (const char* cell_name, data_value cell_value) {
auto cdef = corrupt_data_schema->get_column_definition(cell_name);
SCYLLA_ASSERT(cdef);
entry_row.cells().apply(*cdef, atomic_cell::make_live(*cdef->type, timestamp, cell_value.serialize_nonnull(), _entry_ttl));
};
entry_row.apply(row_marker(timestamp, _entry_ttl, gc_clock::now() + _entry_ttl));
set_cell("partition_key", data_value(to_bytes(pk.representation())));
set_cell("clustering_key", data_value(to_bytes(ck.representation())));
set_cell("mutation_fragment_kind", fmt::to_string(kind));
// FIXME: Exposing knowledge here that bytes are serialized by just storing the raw value.
// Need to replace with a fragmented-buffer serialize API call, which we don't have yet.
set_cell_raw("frozen_mutation_fragment", std::move(fmf).representation().to_managed_bytes());
set_cell("origin", origin);
set_cell("sstable_name", sstable_name);
return sys_ks->apply_mutation(std::move(entry_mutation)).then([id] {
return id;
});
}
future<corrupt_data_handler::entry_id> system_table_corrupt_data_handler::do_record_corrupt_clustering_row(const schema& s, const partition_key& pk,
clustering_row cr, sstring origin, std::optional<sstring> sstable_name) {
auto sys_ks = _sys_ks.get_permit();
if (!sys_ks) {
co_return corrupt_data_handler::entry_id::create_null_id();
}
const auto ck = cr.key();
auto fmf = freeze(s, mutation_fragment_v2(s, make_fragment_permit(s), std::move(cr)));
co_return co_await do_record_corrupt_mutation_fragment(std::move(sys_ks), s, pk, ck, mutation_fragment_v2::kind::clustering_row, std::move(fmf),
std::move(origin), std::move(sstable_name));
}
void system_table_corrupt_data_handler::plug_system_keyspace(db::system_keyspace& sys_ks) noexcept {
_sys_ks.plug(sys_ks.shared_from_this());
_fragment_semaphore = std::make_unique<reader_concurrency_semaphore>(reader_concurrency_semaphore::no_limits{}, "system_table_corrupt_data_handler", reader_concurrency_semaphore::register_metrics::no);
}
future<> system_table_corrupt_data_handler::unplug_system_keyspace() noexcept {
co_await _sys_ks.unplug();
co_await _fragment_semaphore->stop();
}
future<corrupt_data_handler::entry_id> nop_corrupt_data_handler::do_record_corrupt_clustering_row(const schema& s, const partition_key& pk,
clustering_row cr, sstring origin, std::optional<sstring> sstable_name) {
return make_ready_future<entry_id>(entry_id::create_null_id());
}
} // namespace db

View File

@@ -1,110 +0,0 @@
/*
* Copyright (C) 2025-present ScyllaDB
*/
/*
* SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
*/
#pragma once
#include "db/system_keyspace.hh"
#include "utils/UUID.hh"
#include "utils/pluggable.hh"
class reader_concurrency_semaphore;
class reader_permit;
namespace db {
class corrupt_data_handler {
public:
// An ID identifying the corrupt data entry.
// To be interpreted in the context of the storage where it is recorded, see storage_name().
using entry_id = utils::tagged_uuid<struct corrupt_data_entry_tag>;
struct stats {
// Counters for the number of corrupt data entries reported.
uint64_t corrupt_data_reported = 0;
// Counters for the number of corrupt data entries recorded.
// Can be less than reported depending on the configuration or if entries failed to be recorded.
uint64_t corrupt_data_recorded = 0;
uint64_t corrupt_clustering_rows_reported = 0;
uint64_t corrupt_clustering_rows_recorded = 0;
};
private:
stats _stats;
seastar::metrics::metric_groups _metrics;
protected:
virtual future<entry_id> do_record_corrupt_clustering_row(const schema& s, const partition_key& pk, clustering_row cr, sstring origin, std::optional<sstring> sstable_name) = 0;
public:
using register_metrics = bool_class<struct corrupt_data_handler_register_metrics_tag>;
explicit corrupt_data_handler(register_metrics);
virtual ~corrupt_data_handler() = default;
const stats& get_stats() const noexcept {
return _stats;
}
// The name of the storage where corrupt data is recorded.
// The storage-name and the entry-id together should allow the user to unambiguously locate the entry.
virtual sstring storage_name() const noexcept = 0;
// Record a corrupt clustering row.
// If the returned id is null, the row was not recorded.
future<entry_id> record_corrupt_clustering_row(const schema& s, const partition_key& pk, clustering_row cr, sstring origin, std::optional<sstring> sstable_name);
};
// Stores corrupt data entries in the system.corrupt_data table.
class system_table_corrupt_data_handler final : public corrupt_data_handler {
public:
using pluggable_system_keyspace = utils::pluggable<db::system_keyspace>;
struct config {
gc_clock::duration entry_ttl;
};
private:
gc_clock::duration _entry_ttl;
pluggable_system_keyspace _sys_ks;
std::unique_ptr<reader_concurrency_semaphore> _fragment_semaphore;
private:
reader_permit make_fragment_permit(const schema& s);
future<entry_id> do_record_corrupt_mutation_fragment(pluggable_system_keyspace::permit sys_ks, const schema& user_table_schema, const partition_key& pk, const clustering_key& ck,
mutation_fragment_v2::kind kind, frozen_mutation_fragment_v2 mf, sstring origin, std::optional<sstring> sstable_name);
virtual future<entry_id> do_record_corrupt_clustering_row(const schema& s, const partition_key& pk, clustering_row cr, sstring origin, std::optional<sstring> sstable_name) override;
public:
explicit system_table_corrupt_data_handler(config, register_metrics);
~system_table_corrupt_data_handler();
virtual sstring storage_name() const noexcept override {
return format("{}.{}", db::system_keyspace::NAME, db::system_keyspace::CORRUPT_DATA);
}
void plug_system_keyspace(db::system_keyspace& sys_ks) noexcept;
future<> unplug_system_keyspace() noexcept;
};
// A no-op corrupt data handler that does not record any data.
class nop_corrupt_data_handler final : public corrupt_data_handler {
virtual future<entry_id> do_record_corrupt_clustering_row(const schema& s, const partition_key& pk, clustering_row cr, sstring origin, std::optional<sstring> sstable_name) override;
public:
explicit nop_corrupt_data_handler(register_metrics rm)
: corrupt_data_handler(rm) {}
virtual sstring storage_name() const noexcept override {
return "/dev/null";
}
};
} // namespace db

View File

@@ -146,14 +146,9 @@ future<> hint_endpoint_manager::stop(drain should_drain) noexcept {
});
}
void hint_endpoint_manager::cancel_draining() noexcept {
_sender.cancel_draining();
}
hint_endpoint_manager::hint_endpoint_manager(const endpoint_id& key, fs::path hint_directory, manager& shard_manager)
: _key(key)
, _shard_manager(shard_manager)
, _store_gate("hint_endpoint_manager")
, _file_update_mutex_ptr(make_lw_shared<seastar::shared_mutex>())
, _file_update_mutex(*_file_update_mutex_ptr)
, _state(state_set::of<state::stopped>())
@@ -167,7 +162,6 @@ hint_endpoint_manager::hint_endpoint_manager(const endpoint_id& key, fs::path hi
hint_endpoint_manager::hint_endpoint_manager(hint_endpoint_manager&& other)
: _key(other._key)
, _shard_manager(other._shard_manager)
, _store_gate(std::move(other._store_gate))
, _file_update_mutex_ptr(std::move(other._file_update_mutex_ptr))
, _file_update_mutex(*_file_update_mutex_ptr)
, _state(other._state)

View File

@@ -41,7 +41,7 @@ private:
endpoint_id _key;
manager& _shard_manager;
hints_store_ptr _hints_store_anchor;
seastar::named_gate _store_gate;
seastar::gate _store_gate;
lw_shared_ptr<seastar::shared_mutex> _file_update_mutex_ptr;
seastar::shared_mutex& _file_update_mutex;
@@ -102,8 +102,6 @@ public:
/// \return Ready future when all operations are complete
future<> stop(drain should_drain = drain::no) noexcept;
void cancel_draining() noexcept;
/// \brief Start the timer.
void start();
@@ -146,10 +144,6 @@ public:
return _state.contains(state::stopped);
}
bool canceled_draining() const noexcept {
return _sender.canceled_draining();
}
/// \brief Returns replay position of the most recently written hint.
///
/// If there weren't any hints written during this endpoint manager's lifetime, a zero replay_position is returned.

View File

@@ -10,7 +10,6 @@
#include "db/hints/internal/hint_sender.hh"
// Seastar features.
#include <chrono>
#include <exception>
#include <seastar/core/abort_source.hh>
#include <seastar/core/coroutine.hh>
@@ -190,14 +189,6 @@ future<> hint_sender::stop(drain should_drain) noexcept {
});
}
void hint_sender::cancel_draining() {
manager_logger.info("Draining of {} has been marked as canceled", _ep_key);
if (_state.contains(state::draining)) {
_state.remove(state::draining);
}
_state.set(state::canceled_draining);
}
void hint_sender::add_segment(sstring seg_name) {
_segments_to_replay.emplace_back(std::move(seg_name));
}
@@ -455,8 +446,6 @@ bool hint_sender::send_one_file(const sstring& fname) {
gc_clock::duration secs_since_file_mod = std::chrono::seconds(last_mod.tv_sec);
lw_shared_ptr<send_one_file_ctx> ctx_ptr = make_lw_shared<send_one_file_ctx>(_last_schema_ver_to_column_mapping);
struct canceled_draining_exception {};
try {
commitlog::read_log_file(fname, manager::FILENAME_PREFIX, [this, secs_since_file_mod, &fname, ctx_ptr] (commitlog::buffer_and_replay_position buf_rp) -> future<> {
auto& buf = buf_rp.buffer;
@@ -469,12 +458,6 @@ bool hint_sender::send_one_file(const sstring& fname) {
co_return;
}
if (canceled_draining()) {
manager_logger.debug("[{}] Exiting reading from commitlog because of canceled draining", _ep_key);
// We need to throw an exception here to cancel reading the segment.
throw canceled_draining_exception{};
}
// Break early if stop() was called or the destination node went down.
if (!can_send()) {
ctx_ptr->segment_replay_failed = true;
@@ -505,8 +488,6 @@ bool hint_sender::send_one_file(const sstring& fname) {
manager_logger.error("{}: {}. Dropping...", fname, ex.what());
ctx_ptr->segment_replay_failed = false;
++this->shard_stats().corrupted_files;
} catch (const canceled_draining_exception&) {
manager_logger.debug("[{}] Loop in send_one_file finishes due to canceled draining", _ep_key);
} catch (...) {
manager_logger.trace("sending of {} failed: {}", fname, std::current_exception());
ctx_ptr->segment_replay_failed = true;
@@ -515,12 +496,6 @@ bool hint_sender::send_one_file(const sstring& fname) {
// wait till all background hints sending is complete
ctx_ptr->file_send_gate.close().get();
// If draining was canceled, we can't say anything about the segment's state,
// so return immediately. We return false here because of that reason too.
if (canceled_draining()) {
return false;
}
// If we are draining ignore failures and drop the segment even if we failed to send it.
if (draining() && ctx_ptr->segment_replay_failed) {
manager_logger.trace("send_one_file(): we are draining so we are going to delete the segment anyway");
@@ -578,10 +553,6 @@ void hint_sender::send_hints_maybe() noexcept {
try {
while (true) {
if (canceled_draining()) {
manager_logger.debug("[{}] Exiting loop in send_hints_maybe because of canceled draining", _ep_key);
break;
}
const sstring* seg_name = name_of_current_segment();
if (!seg_name || !replay_allowed() || !can_send()) {
break;

Some files were not shown because too many files have changed in this diff Show More