Compare commits

..

14 Commits

Author SHA1 Message Date
copilot-swe-agent[bot]
b2c75edccd Update documentation for sstables conversions
Co-authored-by: tgrabiec <283695+tgrabiec@users.noreply.github.com>
2025-12-10 18:25:47 +00:00
copilot-swe-agent[bot]
a5c217aef4 Convert all sstables SCYLLA_ASSERT to scylla_assert (58 conversions across 22 files)
Co-authored-by: tgrabiec <283695+tgrabiec@users.noreply.github.com>
2025-12-10 18:24:02 +00:00
copilot-swe-agent[bot]
3e8c1e47c8 Update documentation for storage_service.cc conversions
Co-authored-by: tgrabiec <283695+tgrabiec@users.noreply.github.com>
2025-12-05 22:28:11 +00:00
copilot-swe-agent[bot]
a4fc85c915 Convert SCYLLA_ASSERT to scylla_assert in storage_service.cc (28 safe conversions)
Co-authored-by: tgrabiec <283695+tgrabiec@users.noreply.github.com>
2025-12-05 22:27:06 +00:00
copilot-swe-agent[bot]
ff155a2c32 Update documentation for topology_coordinator.cc conversions
Co-authored-by: tgrabiec <283695+tgrabiec@users.noreply.github.com>
2025-12-05 22:18:27 +00:00
copilot-swe-agent[bot]
e698e89113 Convert SCYLLA_ASSERT to scylla_assert in topology_coordinator.cc
Co-authored-by: tgrabiec <283695+tgrabiec@users.noreply.github.com>
2025-12-05 22:15:51 +00:00
copilot-swe-agent[bot]
efe3e73b5c Add comprehensive summary of SCYLLA_ASSERT conversion work
Co-authored-by: tgrabiec <283695+tgrabiec@users.noreply.github.com>
2025-12-05 21:11:06 +00:00
copilot-swe-agent[bot]
13644ff110 Update documentation with correct conversion counts
Co-authored-by: tgrabiec <283695+tgrabiec@users.noreply.github.com>
2025-12-05 21:09:43 +00:00
copilot-swe-agent[bot]
307262ca27 Add descriptive error messages to scylla_assert unreachable code paths
Co-authored-by: tgrabiec <283695+tgrabiec@users.noreply.github.com>
2025-12-05 21:08:19 +00:00
copilot-swe-agent[bot]
f7e1ca23f7 Convert additional safe SCYLLA_ASSERT usages to scylla_assert
Co-authored-by: tgrabiec <283695+tgrabiec@users.noreply.github.com>
2025-12-05 21:06:35 +00:00
copilot-swe-agent[bot]
254c7e8cc9 Add comprehensive documentation for SCYLLA_ASSERT conversion
Co-authored-by: tgrabiec <283695+tgrabiec@users.noreply.github.com>
2025-12-05 21:03:14 +00:00
copilot-swe-agent[bot]
f447c4464b Replace SCYLLA_ASSERT with scylla_assert in safe contexts (sample files)
Co-authored-by: tgrabiec <283695+tgrabiec@users.noreply.github.com>
2025-12-05 21:01:27 +00:00
copilot-swe-agent[bot]
62cda957bc Add scylla_assert() macro based on on_internal_error()
Co-authored-by: tgrabiec <283695+tgrabiec@users.noreply.github.com>
2025-12-05 20:55:52 +00:00
copilot-swe-agent[bot]
d68f071b91 Initial plan 2025-12-05 20:47:55 +00:00
155 changed files with 3139 additions and 2311 deletions

9
.github/CODEOWNERS vendored
View File

@@ -1,5 +1,5 @@
# AUTH
auth/* @nuivall
auth/* @nuivall @ptrsmrn
# CACHE
row_cache* @tgrabiec
@@ -25,11 +25,11 @@ compaction/* @raphaelsc
transport/*
# CQL QUERY LANGUAGE
cql3/* @tgrabiec @nuivall
cql3/* @tgrabiec @nuivall @ptrsmrn
# COUNTERS
counters* @nuivall
tests/counter_test* @nuivall
counters* @nuivall @ptrsmrn
tests/counter_test* @nuivall @ptrsmrn
# DOCS
docs/* @annastuchlik @tzach
@@ -57,6 +57,7 @@ repair/* @tgrabiec @asias
# SCHEMA MANAGEMENT
db/schema_tables* @tgrabiec
db/legacy_schema_migrator* @tgrabiec
service/migration* @tgrabiec
schema* @tgrabiec

View File

@@ -62,7 +62,7 @@ def create_pull_request(repo, new_branch_name, base_branch_name, pr, backport_pr
if is_draft:
labels_to_add.append("conflicts")
pr_comment = f"@{pr.user.login} - This PR was marked as draft because it has conflicts\n"
pr_comment += "Please resolve them and remove the 'conflicts' label. The PR will be made ready for review automatically."
pr_comment += "Please resolve them and mark this PR as ready for review"
backport_pr.create_issue_comment(pr_comment)
# Apply all labels at once if we have any

View File

@@ -18,7 +18,7 @@ jobs:
// Regular expression pattern to check for "Fixes" prefix
// Adjusted to dynamically insert the repository full name
const pattern = `Fixes:? ((?:#|${repo.replace('/', '\\/')}#|https://github\\.com/${repo.replace('/', '\\/')}/issues/)(\\d+)|([A-Z]+-\\d+))`;
const pattern = `Fixes:? (?:#|${repo.replace('/', '\\/')}#|https://github\\.com/${repo.replace('/', '\\/')}/issues/)(\\d+)`;
const regex = new RegExp(pattern);
if (!regex.test(body)) {

View File

@@ -7,7 +7,7 @@ on:
- enterprise
paths:
- '**/*.cc'
- 'scripts/metrics-config.yml'
- 'scripts/metrics-config.yml'
- 'scripts/get_description.py'
- 'docs/_ext/scylladb_metrics.py'
@@ -15,20 +15,20 @@ jobs:
validate-metrics:
runs-on: ubuntu-latest
name: Check metrics documentation coverage
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
submodules: true
- name: Set up Python
uses: actions/setup-python@v6
with:
python-version: '3.10'
- name: Install dependencies
run: pip install PyYAML
- name: Validate metrics
run: python3 scripts/get_description.py --validate -c scripts/metrics-config.yml

View File

@@ -3,13 +3,10 @@ name: Trigger Scylla CI Route
on:
issue_comment:
types: [created]
pull_request_target:
types:
- unlabeled
jobs:
trigger-jenkins:
if: (github.event.comment.user.login != 'scylladbbot' && contains(github.event.comment.body, '@scylladbbot') && contains(github.event.comment.body, 'trigger-ci')) || github.event.label.name == 'conflicts'
if: github.event.comment.user.login != 'scylladbbot' && contains(github.event.comment.body, '@scylladbbot') && contains(github.event.comment.body, 'trigger-ci')
runs-on: ubuntu-latest
steps:
- name: Trigger Scylla-CI-Route Jenkins Job

View File

@@ -0,0 +1,182 @@
# SCYLLA_ASSERT to scylla_assert() Conversion Summary
## Objective
Replace crash-inducing `SCYLLA_ASSERT` with exception-throwing `scylla_assert()` to prevent cluster-wide crashes and maintain availability.
## What Was Done
### 1. Infrastructure Implementation ✓
Created new `scylla_assert()` macro in `utils/assert.hh`:
- Based on `on_internal_error()` for exception-based error handling
- Supports optional custom error messages via variadic arguments
- Uses `seastar::format()` for string formatting
- Compatible with C++23 standard (uses `__VA_OPT__`)
**Key difference from SCYLLA_ASSERT:**
```cpp
// Old: Crashes the process immediately
SCYLLA_ASSERT(condition);
// New: Throws exception (or aborts based on config)
scylla_assert(condition);
scylla_assert(condition, "custom error message: {}", value);
```
### 2. Comprehensive Analysis ✓
Analyzed entire codebase to identify safe vs unsafe conversion locations:
**Statistics:**
- Total SCYLLA_ASSERT usages: ~1307 (including tests)
- Non-test usages: ~886
- **Unsafe to convert**: 223 usages (25%)
- In noexcept functions: 187 usages across 50 files
- In destructors: 36 usages across 25 files
- **Safe to convert**: ~668 usages (75%)
- **Converted in this PR**: 112 usages (16.8% of safe conversions)
### 3. Documentation ✓
Created comprehensive documentation:
1. **Conversion Guide** (`docs/dev/scylla_assert_conversion.md`)
- Explains safe vs unsafe contexts
- Provides conversion strategy
- Lists all completed conversions
- Includes testing guidance
2. **Unsafe Locations Report** (`docs/dev/unsafe_scylla_assert_locations.md`)
- Detailed listing of 223 unsafe locations
- Organized by file with line numbers
- Separated into noexcept and destructor categories
### 4. Sample Conversions ✓
Converted 112 safe SCYLLA_ASSERT usages across 32 files as demonstration:
| File | Conversions | Context |
|------|------------|---------|
| db/large_data_handler.{cc,hh} | 5 | Future-returning functions |
| db/schema_applier.cc | 1 | Coroutine function |
| db/system_distributed_keyspace.cc | 1 | Regular function |
| db/commitlog/commitlog_replayer.cc | 1 | Coroutine function |
| db/view/row_locking.cc | 2 | Regular function |
| db/size_estimates_virtual_reader.cc | 1 | Lambda in coroutine |
| db/corrupt_data_handler.cc | 2 | Lambdas in future-returning function |
| raft/tracker.cc | 2 | Unreachable code (switch defaults) |
| service/topology_coordinator.cc | 11 | Coroutine functions (topology operations) |
| service/storage_service.cc | 28 | Critical node lifecycle operations |
| sstables/* (22 files) | 58 | SSTable operations (read/write/compress/index) |
All conversions were in **safe contexts** (non-noexcept, non-destructor functions). 3 assertions in storage_service.cc remain as SCYLLA_ASSERT (in noexcept functions).
## Why These Cannot Be Converted
### Unsafe Context #1: noexcept Functions (187 usages)
**Problem**: Throwing from noexcept causes `std::terminate()`, same as crash.
**Example** (from `locator/production_snitch_base.hh`):
```cpp
virtual bool prefer_local() const noexcept override {
SCYLLA_ASSERT(_backreference != nullptr); // Cannot convert!
return _backreference->prefer_local();
}
```
**Solution for these**: Keep as SCYLLA_ASSERT or use `on_fatal_internal_error()`.
### Unsafe Context #2: Destructors (36 usages)
**Problem**: Destructors are implicitly noexcept, throwing causes `std::terminate()`.
**Example** (from `utils/file_lock.cc`):
```cpp
~file_lock() noexcept {
if (_fd.get() != -1) {
SCYLLA_ASSERT(_fd.get() != -1); // Cannot convert!
auto r = ::flock(_fd.get(), LOCK_UN);
SCYLLA_ASSERT(r == 0); // Cannot convert!
}
}
```
**Solution for these**: Keep as SCYLLA_ASSERT.
## Benefits of scylla_assert()
1. **Prevents Cluster-Wide Crashes**
- Exception can be caught and handled gracefully
- Failed node doesn't bring down entire cluster
2. **Maintains Availability**
- Service can continue with degraded functionality
- Better than complete crash
3. **Better Error Reporting**
- Includes backtrace via `on_internal_error()`
- Supports custom error messages
- Configurable abort-on-error for testing
4. **Backward Compatible**
- SCYLLA_ASSERT still exists for unsafe contexts
- Can be gradually adopted
## Testing
- Created manual test in `test/manual/test_scylla_assert.cc`
- Verifies passing and failing assertions
- Tests custom error messages
- Code review passed with improvements made
## Next Steps (Future Work)
1. **Gradual Conversion**
- Convert remaining ~653 safe SCYLLA_ASSERT usages incrementally
- Prioritize high-impact code paths first
2. **Review noexcept Functions**
- Evaluate if some can be made non-noexcept
- Consider using `on_fatal_internal_error()` where appropriate
3. **Integration Testing**
- Run full test suite with conversions
- Monitor for any unexpected behavior
- Validate exception propagation
4. **Automated Analysis Tool**
- Create tool to identify safe conversion candidates
- Generate conversion patches automatically
- Track conversion progress
## Files Modified in This PR
### Core Implementation
- `utils/assert.hh` - Added scylla_assert() macro
### Conversions
- `db/large_data_handler.cc`
- `db/large_data_handler.hh`
- `db/schema_applier.cc`
- `db/system_distributed_keyspace.cc`
- `db/commitlog/commitlog_replayer.cc`
- `db/view/row_locking.cc`
- `db/size_estimates_virtual_reader.cc`
- `db/corrupt_data_handler.cc`
- `raft/tracker.cc`
- `service/topology_coordinator.cc`
- `service/storage_service.cc`
- `sstables/` (22 files across trie/, mx/, and core sstables)
### Documentation
- `docs/dev/scylla_assert_conversion.md`
- `docs/dev/unsafe_scylla_assert_locations.md`
- `test/manual/test_scylla_assert.cc`
## Conclusion
This PR establishes the infrastructure and methodology for replacing SCYLLA_ASSERT with scylla_assert() to improve cluster availability. The sample conversions demonstrate the approach, while comprehensive documentation enables future work.
**Key Achievement**: Provided a safe path forward for converting 75% (~668) of SCYLLA_ASSERT usages to exception-based assertions, while clearly documenting the 25% (~223) that must remain as crash-inducing assertions due to language constraints. Converted 112 usages as demonstration (16.8% of safe conversions), prioritizing critical files like storage_service.cc (node lifecycle) and all sstables files (data persistence), with ~556 remaining.

View File

@@ -42,7 +42,7 @@ comparison_operator_type get_comparison_operator(const rjson::value& comparison_
if (!comparison_operator.IsString()) {
throw api_error::validation(fmt::format("Invalid comparison operator definition {}", rjson::print(comparison_operator)));
}
std::string op = rjson::to_string(comparison_operator);
std::string op = comparison_operator.GetString();
auto it = ops.find(op);
if (it == ops.end()) {
throw api_error::validation(fmt::format("Unsupported comparison operator {}", op));
@@ -377,8 +377,8 @@ bool check_compare(const rjson::value* v1, const rjson::value& v2, const Compara
return cmp(unwrap_number(*v1, cmp.diagnostic), unwrap_number(v2, cmp.diagnostic));
}
if (kv1.name == "S") {
return cmp(rjson::to_string_view(kv1.value),
rjson::to_string_view(kv2.value));
return cmp(std::string_view(kv1.value.GetString(), kv1.value.GetStringLength()),
std::string_view(kv2.value.GetString(), kv2.value.GetStringLength()));
}
if (kv1.name == "B") {
auto d_kv1 = unwrap_bytes(kv1.value, v1_from_query);
@@ -470,9 +470,9 @@ static bool check_BETWEEN(const rjson::value* v, const rjson::value& lb, const r
return check_BETWEEN(unwrap_number(*v, diag), unwrap_number(lb, diag), unwrap_number(ub, diag), bounds_from_query);
}
if (kv_v.name == "S") {
return check_BETWEEN(rjson::to_string_view(kv_v.value),
rjson::to_string_view(kv_lb.value),
rjson::to_string_view(kv_ub.value),
return check_BETWEEN(std::string_view(kv_v.value.GetString(), kv_v.value.GetStringLength()),
std::string_view(kv_lb.value.GetString(), kv_lb.value.GetStringLength()),
std::string_view(kv_ub.value.GetString(), kv_ub.value.GetStringLength()),
bounds_from_query);
}
if (kv_v.name == "B") {

View File

@@ -8,8 +8,6 @@
#include "consumed_capacity.hh"
#include "error.hh"
#include "utils/rjson.hh"
#include <fmt/format.h>
namespace alternator {
@@ -34,12 +32,12 @@ bool consumed_capacity_counter::should_add_capacity(const rjson::value& request)
if (!return_consumed->IsString()) {
throw api_error::validation("Non-string ReturnConsumedCapacity field in request");
}
std::string_view consumed = rjson::to_string_view(*return_consumed);
std::string consumed = return_consumed->GetString();
if (consumed == "INDEXES") {
throw api_error::validation("INDEXES consumed capacity is not supported");
}
if (consumed != "TOTAL") {
throw api_error::validation(fmt::format("Unknown consumed capacity {}", consumed));
throw api_error::validation("Unknown consumed capacity "+ consumed);
}
return true;
}

View File

@@ -419,7 +419,7 @@ static std::optional<std::string> find_table_name(const rjson::value& request) {
if (!table_name_value->IsString()) {
throw api_error::validation("Non-string TableName field in request");
}
std::string table_name = rjson::to_string(*table_name_value);
std::string table_name = table_name_value->GetString();
return table_name;
}
@@ -546,7 +546,7 @@ get_table_or_view(service::storage_proxy& proxy, const rjson::value& request) {
// does exist but the index does not (ValidationException).
if (proxy.data_dictionary().has_schema(keyspace_name, orig_table_name)) {
throw api_error::validation(
fmt::format("Requested resource not found: Index '{}' for table '{}'", rjson::to_string_view(*index_name), orig_table_name));
fmt::format("Requested resource not found: Index '{}' for table '{}'", index_name->GetString(), orig_table_name));
} else {
throw api_error::resource_not_found(
fmt::format("Requested resource not found: Table: {} not found", orig_table_name));
@@ -587,7 +587,7 @@ static std::string get_string_attribute(const rjson::value& value, std::string_v
throw api_error::validation(fmt::format("Expected string value for attribute {}, got: {}",
attribute_name, value));
}
return rjson::to_string(*attribute_value);
return std::string(attribute_value->GetString(), attribute_value->GetStringLength());
}
// Convenience function for getting the value of a boolean attribute, or a
@@ -1080,8 +1080,8 @@ static void add_column(schema_builder& builder, const std::string& name, const r
}
for (auto it = attribute_definitions.Begin(); it != attribute_definitions.End(); ++it) {
const rjson::value& attribute_info = *it;
if (rjson::to_string_view(attribute_info["AttributeName"]) == name) {
std::string_view type = rjson::to_string_view(attribute_info["AttributeType"]);
if (attribute_info["AttributeName"].GetString() == name) {
auto type = attribute_info["AttributeType"].GetString();
data_type dt = parse_key_type(type);
if (computed_column) {
// Computed column for GSI (doesn't choose a real column as-is
@@ -1116,7 +1116,7 @@ static std::pair<std::string, std::string> parse_key_schema(const rjson::value&
throw api_error::validation("First element of KeySchema must be an object");
}
const rjson::value *v = rjson::find((*key_schema)[0], "KeyType");
if (!v || !v->IsString() || rjson::to_string_view(*v) != "HASH") {
if (!v || !v->IsString() || v->GetString() != std::string("HASH")) {
throw api_error::validation("First key in KeySchema must be a HASH key");
}
v = rjson::find((*key_schema)[0], "AttributeName");
@@ -1124,14 +1124,14 @@ static std::pair<std::string, std::string> parse_key_schema(const rjson::value&
throw api_error::validation("First key in KeySchema must have string AttributeName");
}
validate_attr_name_length(supplementary_context, v->GetStringLength(), true, "HASH key in KeySchema - ");
std::string hash_key = rjson::to_string(*v);
std::string hash_key = v->GetString();
std::string range_key;
if (key_schema->Size() == 2) {
if (!(*key_schema)[1].IsObject()) {
throw api_error::validation("Second element of KeySchema must be an object");
}
v = rjson::find((*key_schema)[1], "KeyType");
if (!v || !v->IsString() || rjson::to_string_view(*v) != "RANGE") {
if (!v || !v->IsString() || v->GetString() != std::string("RANGE")) {
throw api_error::validation("Second key in KeySchema must be a RANGE key");
}
v = rjson::find((*key_schema)[1], "AttributeName");
@@ -1887,8 +1887,8 @@ future<executor::request_return_type> executor::create_table(client_state& clien
std::string def_type = type_to_string(def.type);
for (auto it = attribute_definitions.Begin(); it != attribute_definitions.End(); ++it) {
const rjson::value& attribute_info = *it;
if (rjson::to_string_view(attribute_info["AttributeName"]) == def.name_as_text()) {
std::string_view type = rjson::to_string_view(attribute_info["AttributeType"]);
if (attribute_info["AttributeName"].GetString() == def.name_as_text()) {
auto type = attribute_info["AttributeType"].GetString();
if (type != def_type) {
throw api_error::validation(fmt::format("AttributeDefinitions redefined {} to {} already a key attribute of type {} in this table", def.name_as_text(), type, def_type));
}
@@ -2223,12 +2223,12 @@ void validate_value(const rjson::value& v, const char* caller) {
// The put_or_delete_item class builds the mutations needed by the PutItem and
// DeleteItem operations - either as stand-alone commands or part of a list
// of commands in BatchWriteItem.
// of commands in BatchWriteItems.
// put_or_delete_item splits each operation into two stages: Constructing the
// object parses and validates the user input (throwing exceptions if there
// are input errors). Later, build() generates the actual mutation, with a
// specified timestamp. This split is needed because of the peculiar needs of
// BatchWriteItem and LWT. BatchWriteItem needs all parsing to happen before
// BatchWriteItems and LWT. BatchWriteItems needs all parsing to happen before
// any writing happens (if one of the commands has an error, none of the
// writes should be done). LWT makes it impossible for the parse step to
// generate "mutation" objects, because the timestamp still isn't known.
@@ -2362,7 +2362,7 @@ put_or_delete_item::put_or_delete_item(const rjson::value& item, schema_ptr sche
_cells = std::vector<cell>();
_cells->reserve(item.MemberCount());
for (auto it = item.MemberBegin(); it != item.MemberEnd(); ++it) {
bytes column_name = to_bytes(rjson::to_string_view(it->name));
bytes column_name = to_bytes(it->name.GetString());
validate_value(it->value, "PutItem");
const column_definition* cdef = find_attribute(*schema, column_name);
validate_attr_name_length("", column_name.size(), cdef && cdef->is_primary_key());
@@ -2739,7 +2739,7 @@ future<executor::request_return_type> rmw_operation::execute(service::storage_pr
auto read_command = needs_read_before_write ?
previous_item_read_command(proxy, schema(), _ck, selection) :
nullptr;
return proxy.cas(schema(), std::move(*cas_shard), *this, read_command, to_partition_ranges(*schema(), _pk),
return proxy.cas(schema(), std::move(*cas_shard), shared_from_this(), read_command, to_partition_ranges(*schema(), _pk),
{timeout, std::move(permit), client_state, trace_state},
db::consistency_level::LOCAL_SERIAL, db::consistency_level::LOCAL_QUORUM, timeout, timeout, true, std::move(cdc_opts)).then([this, read_command, &wcu_total] (bool is_applied) mutable {
if (!is_applied) {
@@ -2783,10 +2783,10 @@ static void verify_all_are_used(const rjson::value* field,
return;
}
for (auto it = field->MemberBegin(); it != field->MemberEnd(); ++it) {
if (!used.contains(rjson::to_string(it->name))) {
if (!used.contains(it->name.GetString())) {
throw api_error::validation(
format("{} has spurious '{}', not used in {}",
field_name, rjson::to_string_view(it->name), operation));
field_name, it->name.GetString(), operation));
}
}
}
@@ -3000,7 +3000,7 @@ future<executor::request_return_type> executor::delete_item(client_state& client
}
static schema_ptr get_table_from_batch_request(const service::storage_proxy& proxy, const rjson::value::ConstMemberIterator& batch_request) {
sstring table_name = rjson::to_sstring(batch_request->name); // JSON keys are always strings
sstring table_name = batch_request->name.GetString(); // JSON keys are always strings
try {
return proxy.data_dictionary().find_schema(sstring(executor::KEYSPACE_NAME_PREFIX) + table_name, table_name);
} catch(data_dictionary::no_such_column_family&) {
@@ -3026,20 +3026,17 @@ struct primary_key_equal {
};
// This is a cas_request subclass for applying given put_or_delete_items to
// one partition using LWT as part as BatchWriteItem. This is a write-only
// one partition using LWT as part as BatchWriteItems. This is a write-only
// operation, not needing the previous value of the item (the mutation to be
// done is known prior to starting the operation). Nevertheless, we want to
// do this mutation via LWT to ensure that it is serialized with other LWT
// mutations to the same partition.
//
// The std::vector<put_or_delete_item> must remain alive until the
// storage_proxy::cas() future is resolved.
class put_or_delete_item_cas_request : public service::cas_request {
schema_ptr schema;
const std::vector<put_or_delete_item>& _mutation_builders;
std::vector<put_or_delete_item> _mutation_builders;
public:
put_or_delete_item_cas_request(schema_ptr s, const std::vector<put_or_delete_item>& b) :
schema(std::move(s)), _mutation_builders(b) { }
put_or_delete_item_cas_request(schema_ptr s, std::vector<put_or_delete_item>&& b) :
schema(std::move(s)), _mutation_builders(std::move(b)) { }
virtual ~put_or_delete_item_cas_request() = default;
virtual std::optional<mutation> apply(foreign_ptr<lw_shared_ptr<query::result>> qr, const query::partition_slice& slice, api::timestamp_type ts, cdc::per_request_options& cdc_opts) override {
std::optional<mutation> ret;
@@ -3055,48 +3052,20 @@ public:
}
};
future<> executor::cas_write(schema_ptr schema, service::cas_shard cas_shard, const dht::decorated_key& dk,
const std::vector<put_or_delete_item>& mutation_builders, service::client_state& client_state,
tracing::trace_state_ptr trace_state, service_permit permit)
{
if (!cas_shard.this_shard()) {
_stats.shard_bounce_for_lwt++;
return container().invoke_on(cas_shard.shard(), _ssg,
[cs = client_state.move_to_other_shard(),
&mb = mutation_builders,
&dk,
ks = schema->ks_name(),
cf = schema->cf_name(),
gt = tracing::global_trace_state_ptr(trace_state),
permit = std::move(permit)]
(executor& self) mutable {
return do_with(cs.get(), [&mb, &dk, ks = std::move(ks), cf = std::move(cf),
trace_state = tracing::trace_state_ptr(gt), &self]
(service::client_state& client_state) mutable {
auto schema = self._proxy.data_dictionary().find_schema(ks, cf);
service::cas_shard cas_shard(*schema, dk.token());
//FIXME: Instead of passing empty_service_permit() to the background operation,
// the current permit's lifetime should be prolonged, so that it's destructed
// only after all background operations are finished as well.
return self.cas_write(schema, std::move(cas_shard), dk, mb, client_state, std::move(trace_state), empty_service_permit());
});
});
}
static future<> cas_write(service::storage_proxy& proxy, schema_ptr schema, service::cas_shard cas_shard, dht::decorated_key dk, std::vector<put_or_delete_item>&& mutation_builders,
service::client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit) {
auto timeout = executor::default_timeout();
auto op = std::make_unique<put_or_delete_item_cas_request>(schema, mutation_builders);
auto* op_ptr = op.get();
auto op = seastar::make_shared<put_or_delete_item_cas_request>(schema, std::move(mutation_builders));
auto cdc_opts = cdc::per_request_options{
.alternator = true,
.alternator_streams_increased_compatibility =
schema->cdc_options().enabled() && _proxy.data_dictionary().get_config().alternator_streams_increased_compatibility(),
schema->cdc_options().enabled() && proxy.data_dictionary().get_config().alternator_streams_increased_compatibility(),
};
return _proxy.cas(schema, std::move(cas_shard), *op_ptr, nullptr, to_partition_ranges(dk),
return proxy.cas(schema, std::move(cas_shard), op, nullptr, to_partition_ranges(dk),
{timeout, std::move(permit), client_state, trace_state},
db::consistency_level::LOCAL_SERIAL, db::consistency_level::LOCAL_QUORUM,
timeout, timeout, true, std::move(cdc_opts)).finally([op = std::move(op)]{}).discard_result();
// We discarded cas()'s future value ("is_applied") because BatchWriteItem
timeout, timeout, true, std::move(cdc_opts)).discard_result();
// We discarded cas()'s future value ("is_applied") because BatchWriteItems
// does not need to support conditional updates.
}
@@ -3118,11 +3087,13 @@ struct schema_decorated_key_equal {
// FIXME: if we failed writing some of the mutations, need to return a list
// of these failed mutations rather than fail the whole write (issue #5650).
future<> executor::do_batch_write(
static future<> do_batch_write(service::storage_proxy& proxy,
smp_service_group ssg,
std::vector<std::pair<schema_ptr, put_or_delete_item>> mutation_builders,
service::client_state& client_state,
tracing::trace_state_ptr trace_state,
service_permit permit) {
service_permit permit,
stats& stats) {
if (mutation_builders.empty()) {
return make_ready_future<>();
}
@@ -3144,7 +3115,7 @@ future<> executor::do_batch_write(
mutations.push_back(b.second.build(b.first, now));
any_cdc_enabled |= b.first->cdc_options().enabled();
}
return _proxy.mutate(std::move(mutations),
return proxy.mutate(std::move(mutations),
db::consistency_level::LOCAL_QUORUM,
executor::default_timeout(),
trace_state,
@@ -3153,48 +3124,55 @@ future<> executor::do_batch_write(
false,
cdc::per_request_options{
.alternator = true,
.alternator_streams_increased_compatibility = any_cdc_enabled && _proxy.data_dictionary().get_config().alternator_streams_increased_compatibility(),
.alternator_streams_increased_compatibility = any_cdc_enabled && proxy.data_dictionary().get_config().alternator_streams_increased_compatibility(),
});
} else {
// Do the write via LWT:
// Multiple mutations may be destined for the same partition, adding
// or deleting different items of one partition. Join them together
// because we can do them in one cas() call.
using map_type = std::unordered_map<schema_decorated_key,
std::vector<put_or_delete_item>,
schema_decorated_key_hash,
schema_decorated_key_equal>;
auto key_builders = std::make_unique<map_type>(1, schema_decorated_key_hash{}, schema_decorated_key_equal{});
for (auto&& b : std::move(mutation_builders)) {
auto [it, added] = key_builders->try_emplace(schema_decorated_key {
.schema = b.first,
.dk = dht::decorate_key(*b.first, b.second.pk())
});
std::unordered_map<schema_decorated_key, std::vector<put_or_delete_item>, schema_decorated_key_hash, schema_decorated_key_equal>
key_builders(1, schema_decorated_key_hash{}, schema_decorated_key_equal{});
for (auto& b : mutation_builders) {
auto dk = dht::decorate_key(*b.first, b.second.pk());
auto [it, added] = key_builders.try_emplace(schema_decorated_key{b.first, dk});
it->second.push_back(std::move(b.second));
}
auto* key_builders_ptr = key_builders.get();
return parallel_for_each(*key_builders_ptr, [this, &client_state, trace_state, permit = std::move(permit)] (const auto& e) {
_stats.write_using_lwt++;
return parallel_for_each(std::move(key_builders), [&proxy, &client_state, &stats, trace_state, ssg, permit = std::move(permit)] (auto& e) {
stats.write_using_lwt++;
auto desired_shard = service::cas_shard(*e.first.schema, e.first.dk.token());
auto s = e.first.schema;
if (desired_shard.this_shard()) {
return cas_write(proxy, e.first.schema, std::move(desired_shard), e.first.dk, std::move(e.second), client_state, trace_state, permit);
} else {
stats.shard_bounce_for_lwt++;
return proxy.container().invoke_on(desired_shard.shard(), ssg,
[cs = client_state.move_to_other_shard(),
mb = e.second,
dk = e.first.dk,
ks = e.first.schema->ks_name(),
cf = e.first.schema->cf_name(),
gt = tracing::global_trace_state_ptr(trace_state),
permit = std::move(permit)]
(service::storage_proxy& proxy) mutable {
return do_with(cs.get(), [&proxy, mb = std::move(mb), dk = std::move(dk), ks = std::move(ks), cf = std::move(cf),
trace_state = tracing::trace_state_ptr(gt)]
(service::client_state& client_state) mutable {
auto schema = proxy.data_dictionary().find_schema(ks, cf);
static const auto* injection_name = "alternator_executor_batch_write_wait";
return utils::get_local_injector().inject(injection_name, [s = std::move(s)] (auto& handler) -> future<> {
const auto ks = handler.get("keyspace");
const auto cf = handler.get("table");
const auto shard = std::atoll(handler.get("shard")->data());
if (ks == s->ks_name() && cf == s->cf_name() && shard == this_shard_id()) {
elogger.info("{}: hit", injection_name);
co_await handler.wait_for_message(std::chrono::steady_clock::now() + std::chrono::minutes{5});
elogger.info("{}: continue", injection_name);
}
}).then([&e, desired_shard = std::move(desired_shard),
&client_state, trace_state = std::move(trace_state), permit = std::move(permit), this]() mutable
{
return cas_write(e.first.schema, std::move(desired_shard), e.first.dk,
std::move(e.second), client_state, std::move(trace_state), std::move(permit));
});
}).finally([key_builders = std::move(key_builders)]{});
// The desired_shard on the original shard remains alive for the duration
// of cas_write on this shard and prevents any tablet operations.
// However, we need a local instance of cas_shard on this shard
// to pass it to sp::cas, so we just create a new one.
service::cas_shard cas_shard(*schema, dk.token());
//FIXME: Instead of passing empty_service_permit() to the background operation,
// the current permit's lifetime should be prolonged, so that it's destructed
// only after all background operations are finished as well.
return cas_write(proxy, schema, std::move(cas_shard), dk, std::move(mb), client_state, std::move(trace_state), empty_service_permit());
});
}).finally([desired_shard = std::move(desired_shard)]{});
}
});
}
}
@@ -3341,7 +3319,7 @@ future<executor::request_return_type> executor::batch_write_item(client_state& c
_stats.wcu_total[stats::DELETE_ITEM] += wcu_delete_units;
_stats.api_operations.batch_write_item_batch_total += total_items;
_stats.api_operations.batch_write_item_histogram.add(total_items);
co_await do_batch_write(std::move(mutation_builders), client_state, trace_state, std::move(permit));
co_await do_batch_write(_proxy, _ssg, std::move(mutation_builders), client_state, trace_state, std::move(permit), _stats);
// FIXME: Issue #5650: If we failed writing some of the updates,
// need to return a list of these failed updates in UnprocessedItems
// rather than fail the whole write (issue #5650).
@@ -3386,7 +3364,7 @@ static bool hierarchy_filter(rjson::value& val, const attribute_path_map_node<T>
}
rjson::value newv = rjson::empty_object();
for (auto it = v.MemberBegin(); it != v.MemberEnd(); ++it) {
std::string attr = rjson::to_string(it->name);
std::string attr = it->name.GetString();
auto x = members.find(attr);
if (x != members.end()) {
if (x->second) {
@@ -3606,7 +3584,7 @@ static std::optional<attrs_to_get> calculate_attrs_to_get(const rjson::value& re
const rjson::value& attributes_to_get = req["AttributesToGet"];
attrs_to_get ret;
for (auto it = attributes_to_get.Begin(); it != attributes_to_get.End(); ++it) {
attribute_path_map_add("AttributesToGet", ret, rjson::to_string(*it));
attribute_path_map_add("AttributesToGet", ret, it->GetString());
validate_attr_name_length("AttributesToGet", it->GetStringLength(), false);
}
if (ret.empty()) {
@@ -4272,12 +4250,12 @@ inline void update_item_operation::apply_attribute_updates(const std::unique_ptr
attribute_collector& modified_attrs, bool& any_updates, bool& any_deletes) const {
for (auto it = _attribute_updates->MemberBegin(); it != _attribute_updates->MemberEnd(); ++it) {
// Note that it.key() is the name of the column, *it is the operation
bytes column_name = to_bytes(rjson::to_string_view(it->name));
bytes column_name = to_bytes(it->name.GetString());
const column_definition* cdef = _schema->get_column_definition(column_name);
if (cdef && cdef->is_primary_key()) {
throw api_error::validation(format("UpdateItem cannot update key column {}", rjson::to_string_view(it->name)));
throw api_error::validation(format("UpdateItem cannot update key column {}", it->name.GetString()));
}
std::string action = rjson::to_string((it->value)["Action"]);
std::string action = (it->value)["Action"].GetString();
if (action == "DELETE") {
// The DELETE operation can do two unrelated tasks. Without a
// "Value" option, it is used to delete an attribute. With a
@@ -5474,7 +5452,7 @@ calculate_bounds_conditions(schema_ptr schema, const rjson::value& conditions) {
std::vector<query::clustering_range> ck_bounds;
for (auto it = conditions.MemberBegin(); it != conditions.MemberEnd(); ++it) {
sstring key = rjson::to_sstring(it->name);
std::string key = it->name.GetString();
const rjson::value& condition = it->value;
const rjson::value& comp_definition = rjson::get(condition, "ComparisonOperator");
@@ -5482,13 +5460,13 @@ calculate_bounds_conditions(schema_ptr schema, const rjson::value& conditions) {
const column_definition& pk_cdef = schema->partition_key_columns().front();
const column_definition* ck_cdef = schema->clustering_key_size() > 0 ? &schema->clustering_key_columns().front() : nullptr;
if (key == pk_cdef.name_as_text()) {
if (sstring(key) == pk_cdef.name_as_text()) {
if (!partition_ranges.empty()) {
throw api_error::validation("Currently only a single restriction per key is allowed");
}
partition_ranges.push_back(calculate_pk_bound(schema, pk_cdef, comp_definition, attr_list));
}
if (ck_cdef && key == ck_cdef->name_as_text()) {
if (ck_cdef && sstring(key) == ck_cdef->name_as_text()) {
if (!ck_bounds.empty()) {
throw api_error::validation("Currently only a single restriction per key is allowed");
}
@@ -5889,7 +5867,7 @@ future<executor::request_return_type> executor::list_tables(client_state& client
rjson::value* exclusive_start_json = rjson::find(request, "ExclusiveStartTableName");
rjson::value* limit_json = rjson::find(request, "Limit");
std::string exclusive_start = exclusive_start_json ? rjson::to_string(*exclusive_start_json) : "";
std::string exclusive_start = exclusive_start_json ? exclusive_start_json->GetString() : "";
int limit = limit_json ? limit_json->GetInt() : 100;
if (limit < 1 || limit > 100) {
co_return api_error::validation("Limit must be greater than 0 and no greater than 100");

View File

@@ -40,7 +40,6 @@ namespace cql3::selection {
namespace service {
class storage_proxy;
class cas_shard;
}
namespace cdc {
@@ -58,7 +57,6 @@ class schema_builder;
namespace alternator {
class rmw_operation;
class put_or_delete_item;
schema_ptr get_table(service::storage_proxy& proxy, const rjson::value& request);
bool is_alternator_keyspace(const sstring& ks_name);
@@ -221,16 +219,6 @@ private:
static void describe_key_schema(rjson::value& parent, const schema&, std::unordered_map<std::string,std::string> * = nullptr, const std::map<sstring, sstring> *tags = nullptr);
future<> do_batch_write(
std::vector<std::pair<schema_ptr, put_or_delete_item>> mutation_builders,
service::client_state& client_state,
tracing::trace_state_ptr trace_state,
service_permit permit);
future<> cas_write(schema_ptr schema, service::cas_shard cas_shard, const dht::decorated_key& dk,
const std::vector<put_or_delete_item>& mutation_builders, service::client_state& client_state,
tracing::trace_state_ptr trace_state, service_permit permit);
public:
static void describe_key_schema(rjson::value& parent, const schema& schema, std::unordered_map<std::string,std::string>&, const std::map<sstring, sstring> *tags = nullptr);

View File

@@ -496,7 +496,7 @@ const std::pair<std::string, const rjson::value*> unwrap_set(const rjson::value&
return {"", nullptr};
}
auto it = v.MemberBegin();
const std::string it_key = rjson::to_string(it->name);
const std::string it_key = it->name.GetString();
if (it_key != "SS" && it_key != "BS" && it_key != "NS") {
return {std::move(it_key), nullptr};
}

View File

@@ -93,7 +93,7 @@ future<executor::request_return_type> executor::update_time_to_live(client_state
if (v->GetStringLength() < 1 || v->GetStringLength() > 255) {
co_return api_error::validation("The length of AttributeName must be between 1 and 255");
}
sstring attribute_name = rjson::to_sstring(*v);
sstring attribute_name(v->GetString(), v->GetStringLength());
co_await verify_permission(_enforce_authorization, _warn_authorization, client_state, schema, auth::permission::ALTER, _stats);
co_await db::modify_tags(_mm, schema->ks_name(), schema->cf_name(), [&](std::map<sstring, sstring>& tags_map) {

View File

@@ -3051,7 +3051,7 @@
},
{
"name":"incremental_mode",
"description":"Set the incremental repair mode. Can be 'disabled', 'incremental', or 'full'. 'incremental': The incremental repair logic is enabled. Unrepaired sstables will be included for repair. Repaired sstables will be skipped. The incremental repair states will be updated after repair. 'full': The incremental repair logic is enabled. Both repaired and unrepaired sstables will be included for repair. The incremental repair states will be updated after repair. 'disabled': The incremental repair logic is disabled completely. The incremental repair states, e.g., repaired_at in sstables and sstables_repaired_at in the system.tablets table, will not be updated after repair. When the option is not provided, it defaults to 'disabled' mode.",
"description":"Set the incremental repair mode. Can be 'disabled', 'incremental', or 'full'. 'incremental': The incremental repair logic is enabled. Unrepaired sstables will be included for repair. Repaired sstables will be skipped. The incremental repair states will be updated after repair. 'full': The incremental repair logic is enabled. Both repaired and unrepaired sstables will be included for repair. The incremental repair states will be updated after repair. 'disabled': The incremental repair logic is disabled completely. The incremental repair states, e.g., repaired_at in sstables and sstables_repaired_at in the system.tablets table, will not be updated after repair. When the option is not provided, it defaults to incremental mode.",
"required":false,
"allowMultiple":false,
"type":"string",

View File

@@ -9,6 +9,7 @@
#include "auth/allow_all_authenticator.hh"
#include "service/migration_manager.hh"
#include "utils/alien_worker.hh"
#include "utils/class_registrator.hh"
namespace auth {
@@ -22,6 +23,7 @@ static const class_registrator<
cql3::query_processor&,
::service::raft_group0_client&,
::service::migration_manager&,
cache&> registration("org.apache.cassandra.auth.AllowAllAuthenticator");
cache&,
utils::alien_worker&> registration("org.apache.cassandra.auth.AllowAllAuthenticator");
}

View File

@@ -14,6 +14,7 @@
#include "auth/authenticator.hh"
#include "auth/cache.hh"
#include "auth/common.hh"
#include "utils/alien_worker.hh"
namespace cql3 {
class query_processor;
@@ -29,7 +30,7 @@ extern const std::string_view allow_all_authenticator_name;
class allow_all_authenticator final : public authenticator {
public:
allow_all_authenticator(cql3::query_processor&, ::service::raft_group0_client&, ::service::migration_manager&, cache&) {
allow_all_authenticator(cql3::query_processor&, ::service::raft_group0_client&, ::service::migration_manager&, cache&, utils::alien_worker&) {
}
virtual future<> start() override {

View File

@@ -8,7 +8,6 @@
*/
#include "auth/certificate_authenticator.hh"
#include "auth/cache.hh"
#include <boost/regex.hpp>
#include <fmt/ranges.h>
@@ -35,13 +34,13 @@ static const class_registrator<auth::authenticator
, cql3::query_processor&
, ::service::raft_group0_client&
, ::service::migration_manager&
, auth::cache&> cert_auth_reg(CERT_AUTH_NAME);
, utils::alien_worker&> cert_auth_reg(CERT_AUTH_NAME);
enum class auth::certificate_authenticator::query_source {
subject, altname
};
auth::certificate_authenticator::certificate_authenticator(cql3::query_processor& qp, ::service::raft_group0_client&, ::service::migration_manager&, auth::cache&)
auth::certificate_authenticator::certificate_authenticator(cql3::query_processor& qp, ::service::raft_group0_client&, ::service::migration_manager&, utils::alien_worker&)
: _queries([&] {
auto& conf = qp.db().get_config();
auto queries = conf.auth_certificate_role_queries();
@@ -76,9 +75,9 @@ auth::certificate_authenticator::certificate_authenticator(cql3::query_processor
throw std::invalid_argument(fmt::format("Invalid source: {}", map.at(cfg_source_attr)));
}
continue;
} catch (const std::out_of_range&) {
} catch (std::out_of_range&) {
// just fallthrough
} catch (const boost::regex_error&) {
} catch (boost::regex_error&) {
std::throw_with_nested(std::invalid_argument(fmt::format("Invalid query expression: {}", map.at(cfg_query_attr))));
}
}

View File

@@ -10,6 +10,7 @@
#pragma once
#include "auth/authenticator.hh"
#include "utils/alien_worker.hh"
#include <boost/regex_fwd.hpp> // IWYU pragma: keep
namespace cql3 {
@@ -25,15 +26,13 @@ class raft_group0_client;
namespace auth {
class cache;
extern const std::string_view certificate_authenticator_name;
class certificate_authenticator : public authenticator {
enum class query_source;
std::vector<std::pair<query_source, boost::regex>> _queries;
public:
certificate_authenticator(cql3::query_processor&, ::service::raft_group0_client&, ::service::migration_manager&, cache&);
certificate_authenticator(cql3::query_processor&, ::service::raft_group0_client&, ::service::migration_manager&, utils::alien_worker&);
~certificate_authenticator();
future<> start() override;

View File

@@ -94,7 +94,7 @@ static future<> create_legacy_metadata_table_if_missing_impl(
try {
co_return co_await mm.announce(co_await ::service::prepare_new_column_family_announcement(qp.proxy(), table, ts),
std::move(group0_guard), format("auth: create {} metadata table", table->cf_name()));
} catch (const exceptions::already_exists_exception&) {}
} catch (exceptions::already_exists_exception&) {}
}
}

View File

@@ -256,7 +256,7 @@ future<> default_authorizer::revoke_all(std::string_view role_name, ::service::g
} else {
co_await collect_mutations(_qp, mc, query, {sstring(role_name)});
}
} catch (const exceptions::request_execution_exception& e) {
} catch (exceptions::request_execution_exception& e) {
alogger.warn("CassandraAuthorizer failed to revoke all permissions of {}: {}", role_name, e);
}
}
@@ -293,13 +293,13 @@ future<> default_authorizer::revoke_all_legacy(const resource& resource) {
[resource](auto ep) {
try {
std::rethrow_exception(ep);
} catch (const exceptions::request_execution_exception& e) {
} catch (exceptions::request_execution_exception& e) {
alogger.warn("CassandraAuthorizer failed to revoke all permissions on {}: {}", resource, e);
}
});
});
} catch (const exceptions::request_execution_exception& e) {
} catch (exceptions::request_execution_exception& e) {
alogger.warn("CassandraAuthorizer failed to revoke all permissions on {}: {}", resource, e);
return make_ready_future();
}

View File

@@ -49,7 +49,8 @@ static const class_registrator<
cql3::query_processor&,
::service::raft_group0_client&,
::service::migration_manager&,
cache&> password_auth_reg("org.apache.cassandra.auth.PasswordAuthenticator");
cache&,
utils::alien_worker&> password_auth_reg("org.apache.cassandra.auth.PasswordAuthenticator");
static thread_local auto rng_for_salt = std::default_random_engine(std::random_device{}());
@@ -63,13 +64,14 @@ std::string password_authenticator::default_superuser(const db::config& cfg) {
password_authenticator::~password_authenticator() {
}
password_authenticator::password_authenticator(cql3::query_processor& qp, ::service::raft_group0_client& g0, ::service::migration_manager& mm, cache& cache)
password_authenticator::password_authenticator(cql3::query_processor& qp, ::service::raft_group0_client& g0, ::service::migration_manager& mm, cache& cache, utils::alien_worker& hashing_worker)
: _qp(qp)
, _group0_client(g0)
, _migration_manager(mm)
, _cache(cache)
, _stopped(make_ready_future<>())
, _superuser(default_superuser(qp.db().get_config()))
, _hashing_worker(hashing_worker)
{}
static bool has_salted_hash(const cql3::untyped_result_set_row& row) {
@@ -328,18 +330,20 @@ future<authenticated_user> password_authenticator::authenticate(
}
salted_hash = role->salted_hash;
}
const bool password_match = co_await passwords::check(password, *salted_hash);
const bool password_match = co_await _hashing_worker.submit<bool>([password = std::move(password), salted_hash] {
return passwords::check(password, *salted_hash);
});
if (!password_match) {
throw exceptions::authentication_exception("Username and/or password are incorrect");
}
co_return username;
} catch (const std::system_error &) {
} catch (std::system_error &) {
std::throw_with_nested(exceptions::authentication_exception("Could not verify password"));
} catch (const exceptions::request_execution_exception& e) {
} catch (exceptions::request_execution_exception& e) {
std::throw_with_nested(exceptions::authentication_exception(e.what()));
} catch (const exceptions::authentication_exception& e) {
} catch (exceptions::authentication_exception& e) {
std::throw_with_nested(e);
} catch (const exceptions::unavailable_exception& e) {
} catch (exceptions::unavailable_exception& e) {
std::throw_with_nested(exceptions::authentication_exception(e.get_message()));
} catch (...) {
std::throw_with_nested(exceptions::authentication_exception("authentication failed"));

View File

@@ -18,6 +18,7 @@
#include "auth/passwords.hh"
#include "auth/cache.hh"
#include "service/raft/raft_group0_client.hh"
#include "utils/alien_worker.hh"
namespace db {
class config;
@@ -48,12 +49,13 @@ class password_authenticator : public authenticator {
shared_promise<> _superuser_created_promise;
// We used to also support bcrypt, SHA-256, and MD5 (ref. scylladb#24524).
constexpr static auth::passwords::scheme _scheme = passwords::scheme::sha_512;
utils::alien_worker& _hashing_worker;
public:
static db::consistency_level consistency_for_user(std::string_view role_name);
static std::string default_superuser(const db::config&);
password_authenticator(cql3::query_processor&, ::service::raft_group0_client&, ::service::migration_manager&, cache&);
password_authenticator(cql3::query_processor&, ::service::raft_group0_client&, ::service::migration_manager&, cache&, utils::alien_worker&);
~password_authenticator();

View File

@@ -7,8 +7,6 @@
*/
#include "auth/passwords.hh"
#include "utils/crypt_sha512.hh"
#include <seastar/core/coroutine.hh>
#include <cerrno>
@@ -23,46 +21,25 @@ static thread_local crypt_data tlcrypt = {};
namespace detail {
void verify_hashing_output(const char * res) {
if (!res || (res[0] == '*')) {
throw std::system_error(errno, std::system_category());
}
}
void verify_scheme(scheme scheme) {
const sstring random_part_of_salt = "aaaabbbbccccdddd";
const sstring salt = sstring(prefix_for_scheme(scheme)) + random_part_of_salt;
const char* e = crypt_r("fisk", salt.c_str(), &tlcrypt);
try {
verify_hashing_output(e);
} catch (const std::system_error& ex) {
throw no_supported_schemes();
if (e && (e[0] != '*')) {
return;
}
throw no_supported_schemes();
}
sstring hash_with_salt(const sstring& pass, const sstring& salt) {
auto res = crypt_r(pass.c_str(), salt.c_str(), &tlcrypt);
verify_hashing_output(res);
return res;
}
seastar::future<sstring> hash_with_salt_async(const sstring& pass, const sstring& salt) {
sstring res;
// Only SHA-512 hashes for passphrases shorter than 256 bytes can be computed using
// the __crypt_sha512 method. For other computations, we fall back to the
// crypt_r implementation from `<crypt.h>`, which can stall.
if (salt.starts_with(prefix_for_scheme(scheme::sha_512)) && pass.size() <= 255) {
char buf[128];
const char * output_ptr = co_await __crypt_sha512(pass.c_str(), salt.c_str(), buf);
verify_hashing_output(output_ptr);
res = output_ptr;
} else {
const char * output_ptr = crypt_r(pass.c_str(), salt.c_str(), &tlcrypt);
verify_hashing_output(output_ptr);
res = output_ptr;
if (!res || (res[0] == '*')) {
throw std::system_error(errno, std::system_category());
}
co_return res;
return res;
}
std::string_view prefix_for_scheme(scheme c) noexcept {
@@ -81,9 +58,8 @@ no_supported_schemes::no_supported_schemes()
: std::runtime_error("No allowed hashing schemes are supported on this system") {
}
seastar::future<bool> check(const sstring& pass, const sstring& salted_hash) {
const auto pwd_hash = co_await detail::hash_with_salt_async(pass, salted_hash);
co_return pwd_hash == salted_hash;
bool check(const sstring& pass, const sstring& salted_hash) {
return detail::hash_with_salt(pass, salted_hash) == salted_hash;
}
} // namespace auth::passwords

View File

@@ -11,7 +11,6 @@
#include <random>
#include <stdexcept>
#include <seastar/core/future.hh>
#include <seastar/core/sstring.hh>
#include "seastarx.hh"
@@ -76,19 +75,10 @@ sstring generate_salt(RandomNumberEngine& g, scheme scheme) {
///
/// Hash a password combined with an implementation-specific salt string.
/// Deprecated in favor of `hash_with_salt_async`.
///
/// \throws \ref std::system_error when an unexpected implementation-specific error occurs.
///
[[deprecated("Use hash_with_salt_async instead")]] sstring hash_with_salt(const sstring& pass, const sstring& salt);
///
/// Async version of `hash_with_salt` that returns a future.
/// If possible, hashing uses `coroutine::maybe_yield` to prevent reactor stalls.
///
/// \throws \ref std::system_error when an unexpected implementation-specific error occurs.
///
seastar::future<sstring> hash_with_salt_async(const sstring& pass, const sstring& salt);
sstring hash_with_salt(const sstring& pass, const sstring& salt);
} // namespace detail
@@ -117,6 +107,6 @@ sstring hash(const sstring& pass, RandomNumberEngine& g, scheme scheme) {
///
/// \throws \ref std::system_error when an unexpected implementation-specific error occurs.
///
seastar::future<bool> check(const sstring& pass, const sstring& salted_hash);
bool check(const sstring& pass, const sstring& salted_hash);
} // namespace auth::passwords

View File

@@ -35,9 +35,10 @@ static const class_registrator<
cql3::query_processor&,
::service::raft_group0_client&,
::service::migration_manager&,
cache&> saslauthd_auth_reg("com.scylladb.auth.SaslauthdAuthenticator");
cache&,
utils::alien_worker&> saslauthd_auth_reg("com.scylladb.auth.SaslauthdAuthenticator");
saslauthd_authenticator::saslauthd_authenticator(cql3::query_processor& qp, ::service::raft_group0_client&, ::service::migration_manager&, cache&)
saslauthd_authenticator::saslauthd_authenticator(cql3::query_processor& qp, ::service::raft_group0_client&, ::service::migration_manager&, cache&, utils::alien_worker&)
: _socket_path(qp.db().get_config().saslauthd_socket_path())
{}

View File

@@ -12,6 +12,7 @@
#include "auth/authenticator.hh"
#include "auth/cache.hh"
#include "utils/alien_worker.hh"
namespace cql3 {
class query_processor;
@@ -29,7 +30,7 @@ namespace auth {
class saslauthd_authenticator : public authenticator {
sstring _socket_path; ///< Path to the domain socket on which saslauthd is listening.
public:
saslauthd_authenticator(cql3::query_processor&, ::service::raft_group0_client&, ::service::migration_manager&, cache&);
saslauthd_authenticator(cql3::query_processor&, ::service::raft_group0_client&, ::service::migration_manager&, cache&,utils::alien_worker&);
future<> start() override;

View File

@@ -191,7 +191,8 @@ service::service(
::service::migration_manager& mm,
const service_config& sc,
maintenance_socket_enabled used_by_maintenance_socket,
cache& cache)
cache& cache,
utils::alien_worker& hashing_worker)
: service(
std::move(c),
cache,
@@ -199,7 +200,7 @@ service::service(
g0,
mn,
create_object<authorizer>(sc.authorizer_java_name, qp, g0, mm),
create_object<authenticator>(sc.authenticator_java_name, qp, g0, mm, cache),
create_object<authenticator>(sc.authenticator_java_name, qp, g0, mm, cache, hashing_worker),
create_object<role_manager>(sc.role_manager_java_name, qp, g0, mm, cache),
used_by_maintenance_socket) {
}
@@ -225,7 +226,7 @@ future<> service::create_legacy_keyspace_if_missing(::service::migration_manager
try {
co_return co_await mm.announce(::service::prepare_new_keyspace_announcement(db.real_database(), ksm, ts),
std::move(group0_guard), seastar::format("auth_service: create {} keyspace", meta::legacy::AUTH_KS));
} catch (const ::service::group0_concurrent_modification&) {
} catch (::service::group0_concurrent_modification&) {
log.info("Concurrent operation is detected while creating {} keyspace, retrying.", meta::legacy::AUTH_KS);
}
}

View File

@@ -27,6 +27,7 @@
#include "cql3/description.hh"
#include "seastarx.hh"
#include "service/raft/raft_group0_client.hh"
#include "utils/alien_worker.hh"
#include "utils/observable.hh"
#include "utils/serialized_action.hh"
#include "service/maintenance_mode.hh"
@@ -130,7 +131,8 @@ public:
::service::migration_manager&,
const service_config&,
maintenance_socket_enabled,
cache&);
cache&,
utils::alien_worker&);
future<> start(::service::migration_manager&, db::system_keyspace&);

View File

@@ -192,7 +192,7 @@ future<> standard_role_manager::legacy_create_default_role_if_missing() {
{_superuser},
cql3::query_processor::cache_internal::no).discard_result();
log.info("Created default superuser role '{}'.", _superuser);
} catch (const exceptions::unavailable_exception& e) {
} catch(const exceptions::unavailable_exception& e) {
log.warn("Skipped default role setup: some nodes were not ready; will retry");
throw e;
}

View File

@@ -38,8 +38,8 @@ class transitional_authenticator : public authenticator {
public:
static const sstring PASSWORD_AUTHENTICATOR_NAME;
transitional_authenticator(cql3::query_processor& qp, ::service::raft_group0_client& g0, ::service::migration_manager& mm, cache& cache)
: transitional_authenticator(std::make_unique<password_authenticator>(qp, g0, mm, cache)) {
transitional_authenticator(cql3::query_processor& qp, ::service::raft_group0_client& g0, ::service::migration_manager& mm, cache& cache, utils::alien_worker& hashing_worker)
: transitional_authenticator(std::make_unique<password_authenticator>(qp, g0, mm, cache, hashing_worker)) {
}
transitional_authenticator(std::unique_ptr<authenticator> a)
: _authenticator(std::move(a)) {
@@ -81,7 +81,7 @@ public:
}).handle_exception([](auto ep) {
try {
std::rethrow_exception(ep);
} catch (const exceptions::authentication_exception&) {
} catch (exceptions::authentication_exception&) {
// return anon user
return make_ready_future<authenticated_user>(anonymous_user());
}
@@ -126,7 +126,7 @@ public:
virtual bytes evaluate_response(bytes_view client_response) override {
try {
return _sasl->evaluate_response(client_response);
} catch (const exceptions::authentication_exception&) {
} catch (exceptions::authentication_exception&) {
_complete = true;
return {};
}
@@ -141,7 +141,7 @@ public:
return _sasl->get_authenticated_user().handle_exception([](auto ep) {
try {
std::rethrow_exception(ep);
} catch (const exceptions::authentication_exception&) {
} catch (exceptions::authentication_exception&) {
// return anon user
return make_ready_future<authenticated_user>(anonymous_user());
}
@@ -241,7 +241,8 @@ static const class_registrator<
cql3::query_processor&,
::service::raft_group0_client&,
::service::migration_manager&,
auth::cache&> transitional_authenticator_reg(auth::PACKAGE_NAME + "TransitionalAuthenticator");
auth::cache&,
utils::alien_worker&> transitional_authenticator_reg(auth::PACKAGE_NAME + "TransitionalAuthenticator");
static const class_registrator<
auth::authorizer,

View File

@@ -859,7 +859,6 @@ scylla_core = (['message/messaging_service.cc',
'utils/alien_worker.cc',
'utils/array-search.cc',
'utils/base64.cc',
'utils/crypt_sha512.cc',
'utils/logalloc.cc',
'utils/large_bitset.cc',
'utils/buffer_input_stream.cc',
@@ -1063,6 +1062,7 @@ scylla_core = (['message/messaging_service.cc',
'db/hints/resource_manager.cc',
'db/hints/sync_point.cc',
'db/large_data_handler.cc',
'db/legacy_schema_migrator.cc',
'db/marshal/type_parser.cc',
'db/per_partition_rate_limit_options.cc',
'db/rate_limiter.cc',
@@ -1480,6 +1480,7 @@ deps = {
pure_boost_tests = set([
'test/boost/anchorless_list_test',
'test/boost/auth_passwords_test',
'test/boost/auth_resource_test',
'test/boost/big_decimal_test',
'test/boost/caching_options_test',

View File

@@ -331,7 +331,7 @@ future<shared_ptr<cql_transport::messages::result_message>> batch_statement::exe
if (!cl_for_paxos) [[unlikely]] {
return make_exception_future<shared_ptr<cql_transport::messages::result_message>>(std::move(cl_for_paxos).assume_error());
}
std::unique_ptr<cas_request> request;
seastar::shared_ptr<cas_request> request;
schema_ptr schema;
db::timeout_clock::time_point now = db::timeout_clock::now();
@@ -354,9 +354,9 @@ future<shared_ptr<cql_transport::messages::result_message>> batch_statement::exe
if (keys.empty()) {
continue;
}
if (!request) {
if (request.get() == nullptr) {
schema = statement.s;
request = std::make_unique<cas_request>(schema, std::move(keys));
request = seastar::make_shared<cas_request>(schema, std::move(keys));
} else if (keys.size() != 1 || keys.front().equal(request->key().front(), dht::ring_position_comparator(*schema)) == false) {
throw exceptions::invalid_request_exception("BATCH with conditions cannot span multiple partitions");
}
@@ -366,7 +366,7 @@ future<shared_ptr<cql_transport::messages::result_message>> batch_statement::exe
request->add_row_update(statement, std::move(ranges), std::move(json_cache), statement_options);
}
if (!request) {
if (request.get() == nullptr) {
throw exceptions::invalid_request_exception(format("Unrestricted partition key in a conditional BATCH"));
}
@@ -377,10 +377,9 @@ future<shared_ptr<cql_transport::messages::result_message>> batch_statement::exe
);
}
auto* request_ptr = request.get();
return qp.proxy().cas(schema, std::move(cas_shard), *request_ptr, request->read_command(qp), request->key(),
return qp.proxy().cas(schema, std::move(cas_shard), request, request->read_command(qp), request->key(),
{read_timeout, qs.get_permit(), qs.get_client_state(), qs.get_trace_state()},
std::move(cl_for_paxos).assume_value(), cl_for_learn, batch_timeout, cas_timeout).then([this, request = std::move(request)] (bool is_applied) {
std::move(cl_for_paxos).assume_value(), cl_for_learn, batch_timeout, cas_timeout).then([this, request] (bool is_applied) {
return request->build_cas_result_set(_metadata, _columns_of_cas_result_set, is_applied);
});
}

View File

@@ -401,8 +401,7 @@ modification_statement::execute_with_condition(query_processor& qp, service::que
type.is_update() ? "update" : "deletion"));
}
auto request = std::make_unique<cas_request>(s, std::move(keys));
auto* request_ptr = request.get();
auto request = seastar::make_shared<cas_request>(s, std::move(keys));
// cas_request can be used for batches as well single statements; Here we have just a single
// modification in the list of CAS commands, since we're handling single-statement execution.
request->add_row_update(*this, std::move(ranges), std::move(json_cache), options);
@@ -428,9 +427,9 @@ modification_statement::execute_with_condition(query_processor& qp, service::que
tablet_info = erm->check_locality(token);
}
return qp.proxy().cas(s, std::move(cas_shard), *request_ptr, request->read_command(qp), request->key(),
return qp.proxy().cas(s, std::move(cas_shard), request, request->read_command(qp), request->key(),
{read_timeout, qs.get_permit(), qs.get_client_state(), qs.get_trace_state()},
std::move(cl_for_paxos).assume_value(), cl_for_learn, statement_timeout, cas_timeout).then([this, request = std::move(request), tablet_replicas = std::move(tablet_info->tablet_replicas), token_range = tablet_info->token_range] (bool is_applied) {
std::move(cl_for_paxos).assume_value(), cl_for_learn, statement_timeout, cas_timeout).then([this, request, tablet_replicas = std::move(tablet_info->tablet_replicas), token_range = tablet_info->token_range] (bool is_applied) {
auto result = request->build_cas_result_set(_metadata, _columns_of_cas_result_set, is_applied);
result->add_tablet_info(tablet_replicas, token_range);
return result;

View File

@@ -10,6 +10,7 @@ target_sources(db
schema_applier.cc
schema_tables.cc
cql_type_parser.cc
legacy_schema_migrator.cc
commitlog/commitlog.cc
commitlog/commitlog_replayer.cc
commitlog/commitlog_entry.cc

View File

@@ -165,7 +165,7 @@ future<> db::commitlog_replayer::impl::init() {
future<db::commitlog_replayer::impl::stats>
db::commitlog_replayer::impl::recover(const commitlog::descriptor& d, const commitlog::replay_state& rpstate) const {
SCYLLA_ASSERT(_column_mappings.local_is_initialized());
scylla_assert(_column_mappings.local_is_initialized());
replay_position rp{d};
auto gp = min_pos(rp.shard_id());

View File

@@ -10,6 +10,7 @@
#include "reader_concurrency_semaphore.hh"
#include "replica/database.hh"
#include "utils/UUID_gen.hh"
#include "utils/assert.hh"
static logging::logger corrupt_data_logger("corrupt_data");
@@ -75,14 +76,14 @@ future<corrupt_data_handler::entry_id> system_table_corrupt_data_handler::do_rec
auto set_cell_raw = [this, &entry_row, &corrupt_data_schema, timestamp] (const char* cell_name, managed_bytes cell_value) {
auto cdef = corrupt_data_schema->get_column_definition(cell_name);
SCYLLA_ASSERT(cdef);
scylla_assert(cdef);
entry_row.cells().apply(*cdef, atomic_cell::make_live(*cdef->type, timestamp, cell_value, _entry_ttl));
};
auto set_cell = [this, &entry_row, &corrupt_data_schema, timestamp] (const char* cell_name, data_value cell_value) {
auto cdef = corrupt_data_schema->get_column_definition(cell_name);
SCYLLA_ASSERT(cdef);
scylla_assert(cdef);
entry_row.cells().apply(*cdef, atomic_cell::make_live(*cdef->type, timestamp, cell_value.serialize_nonnull(), _entry_ttl));
};

View File

@@ -39,7 +39,7 @@ large_data_handler::large_data_handler(uint64_t partition_threshold_bytes, uint6
}
future<large_data_handler::partition_above_threshold> large_data_handler::maybe_record_large_partitions(const sstables::sstable& sst, const sstables::key& key, uint64_t partition_size, uint64_t rows, uint64_t range_tombstones, uint64_t dead_rows) {
SCYLLA_ASSERT(running());
scylla_assert(running());
partition_above_threshold above_threshold{partition_size > _partition_threshold_bytes, rows > _rows_count_threshold};
static_assert(std::is_same_v<decltype(above_threshold.size), bool>);
_stats.partitions_bigger_than_threshold += above_threshold.size; // increment if true
@@ -83,7 +83,7 @@ sstring large_data_handler::sst_filename(const sstables::sstable& sst) {
}
future<> large_data_handler::maybe_delete_large_data_entries(sstables::shared_sstable sst) {
SCYLLA_ASSERT(running());
scylla_assert(running());
auto schema = sst->get_schema();
auto filename = sst_filename(*sst);
using ldt = sstables::large_data_type;
@@ -247,7 +247,7 @@ future<> cql_table_large_data_handler::record_large_rows(const sstables::sstable
future<> cql_table_large_data_handler::delete_large_data_entries(const schema& s, sstring sstable_name, std::string_view large_table_name) const {
auto sys_ks = _sys_ks.get_permit();
SCYLLA_ASSERT(sys_ks);
scylla_assert(sys_ks);
const sstring req =
seastar::format("DELETE FROM system.{} WHERE keyspace_name = ? AND table_name = ? AND sstable_name = ?",
large_table_name);

View File

@@ -80,7 +80,7 @@ public:
future<bool> maybe_record_large_rows(const sstables::sstable& sst, const sstables::key& partition_key,
const clustering_key_prefix* clustering_key, uint64_t row_size) {
SCYLLA_ASSERT(running());
scylla_assert(running());
if (row_size > _row_threshold_bytes) [[unlikely]] {
return with_sem([&sst, &partition_key, clustering_key, row_size, this] {
return record_large_rows(sst, partition_key, clustering_key, row_size);
@@ -100,7 +100,7 @@ public:
future<bool> maybe_record_large_cells(const sstables::sstable& sst, const sstables::key& partition_key,
const clustering_key_prefix* clustering_key, const column_definition& cdef, uint64_t cell_size, uint64_t collection_elements) {
SCYLLA_ASSERT(running());
scylla_assert(running());
if (cell_size > _cell_threshold_bytes || collection_elements > _collection_elements_count_threshold) [[unlikely]] {
return with_sem([&sst, &partition_key, clustering_key, &cdef, cell_size, collection_elements, this] {
return record_large_cells(sst, partition_key, clustering_key, cdef, cell_size, collection_elements);

View File

@@ -0,0 +1,602 @@
/*
* Modified by ScyllaDB
* Copyright (C) 2017-present ScyllaDB
*/
/*
* SPDX-License-Identifier: (LicenseRef-ScyllaDB-Source-Available-1.0 and Apache-2.0)
*/
// Since Scylla 2.0, we use system tables whose schemas were introduced in
// Cassandra 3. If Scylla boots to find a data directory with system tables
// with older schemas - produced by pre-2.0 Scylla or by pre-3.0 Cassandra,
// we need to migrate these old tables to the new format.
//
// We provide here a function, db::legacy_schema_migrator::migrate(),
// for a one-time migration from old to new system tables. The function
// reads old system tables, write them back in the new format, and finally
// delete the old system tables. Scylla's main should call this function and
// wait for the returned future, before starting to serve the database.
#include <boost/iterator/filter_iterator.hpp>
#include <seastar/core/future-util.hh>
#include <seastar/util/log.hh>
#include <map>
#include <unordered_set>
#include <chrono>
#include "replica/database.hh"
#include "legacy_schema_migrator.hh"
#include "system_keyspace.hh"
#include "schema_tables.hh"
#include "schema/schema_builder.hh"
#include "service/storage_proxy.hh"
#include "utils/rjson.hh"
#include "cql3/query_processor.hh"
#include "cql3/untyped_result_set.hh"
#include "cql3/util.hh"
#include "cql3/statements/property_definitions.hh"
static seastar::logger mlogger("legacy_schema_migrator");
namespace db {
namespace legacy_schema_migrator {
// local data carriers
class migrator {
public:
static const std::unordered_set<sstring> legacy_schema_tables;
migrator(sharded<service::storage_proxy>& sp, sharded<replica::database>& db, sharded<db::system_keyspace>& sys_ks, cql3::query_processor& qp)
: _sp(sp), _db(db), _sys_ks(sys_ks), _qp(qp) {
}
migrator(migrator&&) = default;
typedef db_clock::time_point time_point;
// TODO: we don't support triggers.
// this is a placeholder.
struct trigger {
time_point timestamp;
sstring name;
std::unordered_map<sstring, sstring> options;
};
struct table {
time_point timestamp;
schema_ptr metadata;
std::vector<trigger> triggers;
};
struct type {
time_point timestamp;
user_type metadata;
};
struct function {
time_point timestamp;
sstring ks_name;
sstring fn_name;
std::vector<sstring> arg_names;
std::vector<sstring> arg_types;
sstring return_type;
bool called_on_null_input;
sstring language;
sstring body;
};
struct aggregate {
time_point timestamp;
sstring ks_name;
sstring fn_name;
std::vector<sstring> arg_names;
std::vector<sstring> arg_types;
sstring return_type;
sstring final_func;
sstring initcond;
sstring state_func;
sstring state_type;
};
struct keyspace {
time_point timestamp;
sstring name;
bool durable_writes;
std::map<sstring, sstring> replication_params;
std::vector<table> tables;
std::vector<type> types;
std::vector<function> functions;
std::vector<aggregate> aggregates;
};
class unsupported_feature : public std::runtime_error {
public:
using runtime_error::runtime_error;
};
static sstring fmt_query(const char* fmt, const char* table) {
return fmt::format(fmt::runtime(fmt), db::system_keyspace::NAME, table);
}
typedef ::shared_ptr<cql3::untyped_result_set> result_set_type;
typedef const cql3::untyped_result_set::row row_type;
future<> read_table(keyspace& dst, sstring cf_name, time_point timestamp) {
auto fmt = "SELECT * FROM {}.{} WHERE keyspace_name = ? AND columnfamily_name = ?";
auto tq = fmt_query(fmt, db::system_keyspace::legacy::COLUMNFAMILIES);
auto cq = fmt_query(fmt, db::system_keyspace::legacy::COLUMNS);
auto zq = fmt_query(fmt, db::system_keyspace::legacy::TRIGGERS);
typedef std::tuple<future<result_set_type>, future<result_set_type>, future<result_set_type>, future<db::schema_tables::legacy::schema_mutations>> result_tuple;
return when_all(_qp.execute_internal(tq, { dst.name, cf_name }, cql3::query_processor::cache_internal::yes),
_qp.execute_internal(cq, { dst.name, cf_name }, cql3::query_processor::cache_internal::yes),
_qp.execute_internal(zq, { dst.name, cf_name }, cql3::query_processor::cache_internal::yes),
db::schema_tables::legacy::read_table_mutations(_sp, dst.name, cf_name, db::system_keyspace::legacy::column_families()))
.then([&dst, cf_name, timestamp](result_tuple&& t) {
result_set_type tables = std::get<0>(t).get();
result_set_type columns = std::get<1>(t).get();
result_set_type triggers = std::get<2>(t).get();
db::schema_tables::legacy::schema_mutations sm = std::get<3>(t).get();
row_type& td = tables->one();
auto ks_name = td.get_as<sstring>("keyspace_name");
auto cf_name = td.get_as<sstring>("columnfamily_name");
auto id = table_id(td.get_or("cf_id", generate_legacy_id(ks_name, cf_name).uuid()));
schema_builder builder(dst.name, cf_name, id);
builder.with_version(sm.digest());
cf_type cf = sstring_to_cf_type(td.get_or("type", sstring("standard")));
if (cf == cf_type::super) {
fail(unimplemented::cause::SUPER);
}
auto comparator = td.get_as<sstring>("comparator");
bool is_compound = cell_comparator::check_compound(comparator);
builder.set_is_compound(is_compound);
cell_comparator::read_collections(builder, comparator);
bool filter_sparse = false;
data_type default_validator = {};
if (td.has("default_validator")) {
default_validator = db::schema_tables::parse_type(td.get_as<sstring>("default_validator"));
if (default_validator->is_counter()) {
builder.set_is_counter(true);
}
builder.set_default_validation_class(default_validator);
}
/*
* Determine whether or not the table is *really* dense
* We cannot trust is_dense value of true (see CASSANDRA-11502, that fixed the issue for 2.2 only, and not retroactively),
* but we can trust is_dense value of false.
*/
auto is_dense = td.get_opt<bool>("is_dense");
if (!is_dense || *is_dense) {
is_dense = [&] {
/*
* As said above, this method is only here because we need to deal with thrift upgrades.
* Once a CF has been "upgraded", i.e. we've rebuilt and save its CQL3 metadata at least once,
* then we'll have saved the "is_dense" value and will be good to go.
*
* But non-upgraded thrift CF (and pre-7744 CF) will have no value for "is_dense", so we need
* to infer that information without relying on it in that case. And for the most part this is
* easy, a CF that has at least one REGULAR definition is not dense. But the subtlety is that not
* having a REGULAR definition may not mean dense because of CQL3 definitions that have only the
* PRIMARY KEY defined.
*
* So we need to recognize those special case CQL3 table with only a primary key. If we have some
* clustering columns, we're fine as said above. So the only problem is that we cannot decide for
* sure if a CF without REGULAR columns nor CLUSTERING_COLUMN definition is meant to be dense, or if it
* has been created in CQL3 by say:
* CREATE TABLE test (k int PRIMARY KEY)
* in which case it should not be dense. However, we can limit our margin of error by assuming we are
* in the latter case only if the comparator is exactly CompositeType(UTF8Type).
*/
std::optional<column_id> max_cl_idx;
const cql3::untyped_result_set::row * regular = nullptr;
for (auto& row : *columns) {
auto kind_str = row.get_as<sstring>("type");
if (kind_str == "compact_value") {
continue;
}
auto kind = db::schema_tables::deserialize_kind(kind_str);
if (kind == column_kind::regular_column) {
if (regular != nullptr) {
return false;
}
regular = &row;
continue;
}
if (kind == column_kind::clustering_key) {
max_cl_idx = std::max(column_id(row.get_or("component_index", 0)), max_cl_idx.value_or(column_id()));
}
}
auto is_cql3_only_pk_comparator = [](const sstring& comparator) {
if (!cell_comparator::check_compound(comparator)) {
return false;
}
// CMH. We don't have composites, nor a parser for it. This is a simple way of c
// checking the same.
auto comma = comparator.find(',');
if (comma != sstring::npos) {
return false;
}
auto off = comparator.find('(');
auto end = comparator.find(')');
return comparator.compare(off, end - off, utf8_type->name()) == 0;
};
if (max_cl_idx) {
auto n = std::count(comparator.begin(), comparator.end(), ','); // num comp - 1
return *max_cl_idx == n;
}
if (regular) {
return false;
}
return !is_cql3_only_pk_comparator(comparator);
}();
// now, if switched to sparse, remove redundant compact_value column and the last clustering column,
// directly copying CASSANDRA-11502 logic. See CASSANDRA-11315.
filter_sparse = !*is_dense;
}
builder.set_is_dense(*is_dense);
auto is_cql = !*is_dense && is_compound;
auto is_static_compact = !*is_dense && !is_compound;
// org.apache.cassandra.schema.LegacySchemaMigrator#isEmptyCompactValueColumn
auto is_empty_compact_value = [](const cql3::untyped_result_set::row& column_row) {
auto kind_str = column_row.get_as<sstring>("type");
// Cassandra only checks for "compact_value", but Scylla generates "regular" instead (#2586)
return (kind_str == "compact_value" || kind_str == "regular")
&& column_row.get_as<sstring>("column_name").empty();
};
for (auto& row : *columns) {
auto kind_str = row.get_as<sstring>("type");
auto kind = db::schema_tables::deserialize_kind(kind_str);
auto component_index = kind > column_kind::clustering_key ? 0 : column_id(row.get_or("component_index", 0));
auto name = row.get_or<sstring>("column_name", sstring());
auto validator = db::schema_tables::parse_type(row.get_as<sstring>("validator"));
if (is_empty_compact_value(row)) {
continue;
}
if (filter_sparse) {
if (kind_str == "compact_value") {
continue;
}
if (kind == column_kind::clustering_key) {
if (cf == cf_type::super && component_index != 0) {
continue;
}
if (cf != cf_type::super && !is_compound) {
continue;
}
}
}
std::optional<index_metadata_kind> index_kind;
sstring index_name;
index_options_map options;
if (row.has("index_type")) {
index_kind = schema_tables::deserialize_index_kind(row.get_as<sstring>("index_type"));
}
if (row.has("index_name")) {
index_name = row.get_as<sstring>("index_name");
}
if (row.has("index_options")) {
sstring index_options_str = row.get_as<sstring>("index_options");
options = rjson::parse_to_map<index_options_map>(std::string_view(index_options_str));
sstring type;
auto i = options.find("index_keys");
if (i != options.end()) {
options.erase(i);
type = "KEYS";
}
i = options.find("index_keys_and_values");
if (i != options.end()) {
options.erase(i);
type = "KEYS_AND_VALUES";
}
if (type.empty()) {
if (validator->is_collection() && validator->is_multi_cell()) {
type = "FULL";
} else {
type = "VALUES";
}
}
auto column = cql3::util::maybe_quote(name);
options["target"] = validator->is_collection()
? type + "(" + column + ")"
: column;
}
if (index_kind) {
// Origin assumes index_name is always set, so let's do the same
builder.with_index(index_metadata(index_name, options, *index_kind, index_metadata::is_local_index::no));
}
data_type column_name_type = [&] {
if (is_static_compact && kind == column_kind::regular_column) {
return db::schema_tables::parse_type(comparator);
}
return utf8_type;
}();
auto column_name = [&] {
try {
return column_name_type->from_string(name);
} catch (marshal_exception&) {
// #2597: Scylla < 2.0 writes names in serialized form, try to recover
column_name_type->validate(to_bytes_view(name));
return to_bytes(name);
}
}();
builder.with_column_ordered(column_definition(std::move(column_name), std::move(validator), kind, component_index));
}
if (is_static_compact) {
builder.set_regular_column_name_type(db::schema_tables::parse_type(comparator));
}
if (td.has("gc_grace_seconds")) {
builder.set_gc_grace_seconds(td.get_as<int32_t>("gc_grace_seconds"));
}
if (td.has("min_compaction_threshold")) {
builder.set_min_compaction_threshold(td.get_as<int32_t>("min_compaction_threshold"));
}
if (td.has("max_compaction_threshold")) {
builder.set_max_compaction_threshold(td.get_as<int32_t>("max_compaction_threshold"));
}
if (td.has("comment")) {
builder.set_comment(td.get_as<sstring>("comment"));
}
if (td.has("memtable_flush_period_in_ms")) {
builder.set_memtable_flush_period(td.get_as<int32_t>("memtable_flush_period_in_ms"));
}
if (td.has("caching")) {
builder.set_caching_options(caching_options::from_sstring(td.get_as<sstring>("caching")));
}
if (td.has("default_time_to_live")) {
builder.set_default_time_to_live(gc_clock::duration(td.get_as<int32_t>("default_time_to_live")));
}
if (td.has("speculative_retry")) {
builder.set_speculative_retry(td.get_as<sstring>("speculative_retry"));
}
if (td.has("compaction_strategy_class")) {
auto strategy = td.get_as<sstring>("compaction_strategy_class");
try {
builder.set_compaction_strategy(compaction::compaction_strategy::type(strategy));
} catch (const exceptions::configuration_exception& e) {
// If compaction strategy class isn't supported, fallback to incremental.
mlogger.warn("Falling back to incremental compaction strategy after the problem: {}", e.what());
builder.set_compaction_strategy(compaction::compaction_strategy_type::incremental);
}
}
if (td.has("compaction_strategy_options")) {
sstring strategy_options_str = td.get_as<sstring>("compaction_strategy_options");
builder.set_compaction_strategy_options(rjson::parse_to_map<std::map<sstring, sstring>>(std::string_view(strategy_options_str)));
}
auto comp_param = td.get_as<sstring>("compression_parameters");
compression_parameters cp(rjson::parse_to_map<std::map<sstring, sstring>>(std::string_view(comp_param)));
builder.set_compressor_params(cp);
if (td.has("min_index_interval")) {
builder.set_min_index_interval(td.get_as<int32_t>("min_index_interval"));
} else if (td.has("index_interval")) { // compatibility
builder.set_min_index_interval(td.get_as<int32_t>("index_interval"));
}
if (td.has("max_index_interval")) {
builder.set_max_index_interval(td.get_as<int32_t>("max_index_interval"));
}
if (td.has("bloom_filter_fp_chance")) {
builder.set_bloom_filter_fp_chance(td.get_as<double>("bloom_filter_fp_chance"));
} else {
builder.set_bloom_filter_fp_chance(builder.get_bloom_filter_fp_chance());
}
if (td.has("dropped_columns")) {
auto map = td.get_map<sstring, int64_t>("dropped_columns");
for (auto&& e : map) {
builder.without_column(e.first, api::timestamp_type(e.second));
};
}
// ignore version. we're transient
if (!triggers->empty()) {
throw unsupported_feature("triggers");
}
dst.tables.emplace_back(table{timestamp, builder.build() });
});
}
future<> read_tables(keyspace& dst) {
auto query = fmt_query("SELECT columnfamily_name, writeTime(type) AS timestamp FROM {}.{} WHERE keyspace_name = ?",
db::system_keyspace::legacy::COLUMNFAMILIES);
return _qp.execute_internal(query, {dst.name}, cql3::query_processor::cache_internal::yes).then([this, &dst](result_set_type result) {
return parallel_for_each(*result, [this, &dst](row_type& row) {
return read_table(dst, row.get_as<sstring>("columnfamily_name"), row.get_as<time_point>("timestamp"));
}).finally([result] {});
});
}
future<time_point> read_type_timestamp(keyspace& dst, sstring type_name) {
// TODO: Unfortunately there is not a single REGULAR column in system.schema_usertypes, so annoyingly we cannot
// use the writeTime() CQL function, and must resort to a lower level.
// Origin digs up the actual cells of target partition and gets timestamp from there.
// We should do the same, but g-dam that's messy. Lets give back dung value for now.
return make_ready_future<time_point>(dst.timestamp);
}
future<> read_types(keyspace& dst) {
auto query = fmt_query("SELECT * FROM {}.{} WHERE keyspace_name = ?", db::system_keyspace::legacy::USERTYPES);
return _qp.execute_internal(query, {dst.name}, cql3::query_processor::cache_internal::yes).then([this, &dst](result_set_type result) {
return parallel_for_each(*result, [this, &dst](row_type& row) {
auto name = row.get_blob_unfragmented("type_name");
auto columns = row.get_list<bytes>("field_names");
auto types = row.get_list<sstring>("field_types");
std::vector<data_type> field_types;
for (auto&& value : types) {
field_types.emplace_back(db::schema_tables::parse_type(value));
}
auto ut = user_type_impl::get_instance(dst.name, name, columns, field_types, false);
return read_type_timestamp(dst, value_cast<sstring>(utf8_type->deserialize(name))).then([ut = std::move(ut), &dst](time_point timestamp) {
dst.types.emplace_back(type{timestamp, ut});
});
}).finally([result] {});
});
}
future<> read_functions(keyspace& dst) {
auto query = fmt_query("SELECT * FROM {}.{} WHERE keyspace_name = ?", db::system_keyspace::legacy::FUNCTIONS);
return _qp.execute_internal(query, {dst.name}, cql3::query_processor::cache_internal::yes).then([](result_set_type result) {
if (!result->empty()) {
throw unsupported_feature("functions");
}
});
}
future<> read_aggregates(keyspace& dst) {
auto query = fmt_query("SELECT * FROM {}.{} WHERE keyspace_name = ?", db::system_keyspace::legacy::AGGREGATES);
return _qp.execute_internal(query, {dst.name}, cql3::query_processor::cache_internal::yes).then([](result_set_type result) {
if (!result->empty()) {
throw unsupported_feature("aggregates");
}
});
}
future<keyspace> read_keyspace(sstring ks_name, bool durable_writes, sstring strategy_class, sstring strategy_options, time_point timestamp) {
auto map = rjson::parse_to_map<std::map<sstring, sstring>>(std::string_view(strategy_options));
map.emplace("class", std::move(strategy_class));
auto ks = ::make_lw_shared<keyspace>(keyspace{timestamp, std::move(ks_name), durable_writes, std::move(map) });
return read_tables(*ks).then([this, ks] {
//Collection<Type> types = readTypes(keyspaceName);
return read_types(*ks);
}).then([this, ks] {
return read_functions(*ks);
}).then([this, ks] {
return read_aggregates(*ks);
}).then([ks] {
return make_ready_future<keyspace>(std::move(*ks));
});
}
future<> read_all_keyspaces() {
static auto ks_filter = [](row_type& row) {
auto ks_name = row.get_as<sstring>("keyspace_name");
return ks_name != db::system_keyspace::NAME && ks_name != db::schema_tables::v3::NAME;
};
auto query = fmt_query("SELECT keyspace_name, durable_writes, strategy_options, strategy_class, writeTime(durable_writes) AS timestamp FROM {}.{}",
db::system_keyspace::legacy::KEYSPACES);
return _qp.execute_internal(query, cql3::query_processor::cache_internal::yes).then([this](result_set_type result) {
auto i = boost::make_filter_iterator(ks_filter, result->begin(), result->end());
auto e = boost::make_filter_iterator(ks_filter, result->end(), result->end());
return parallel_for_each(i, e, [this](row_type& row) {
return read_keyspace(row.get_as<sstring>("keyspace_name")
, row.get_as<bool>("durable_writes")
, row.get_as<sstring>("strategy_class")
, row.get_as<sstring>("strategy_options")
, row.get_as<db_clock::time_point>("timestamp")
).then([this](keyspace ks) {
_keyspaces.emplace_back(std::move(ks));
});
}).finally([result] {});
});
}
future<> drop_legacy_tables() {
mlogger.info("Dropping legacy schema tables");
auto with_snapshot = !_keyspaces.empty();
for (const sstring& cfname : legacy_schema_tables) {
co_await replica::database::legacy_drop_table_on_all_shards(_db, _sys_ks, db::system_keyspace::NAME, cfname, with_snapshot);
}
}
future<> store_keyspaces_in_new_schema_tables() {
mlogger.info("Moving {} keyspaces from legacy schema tables to the new schema keyspace ({})",
_keyspaces.size(), db::schema_tables::v3::NAME);
utils::chunked_vector<mutation> mutations;
for (auto& ks : _keyspaces) {
auto ksm = ::make_lw_shared<keyspace_metadata>(ks.name
, ks.replication_params["class"] // TODO, make ksm like c3?
, cql3::statements::property_definitions::to_extended_map(ks.replication_params)
, std::nullopt
, std::nullopt
, ks.durable_writes);
// we want separate time stamps for tables/types, so cannot bulk them into the ksm.
for (auto&& m : db::schema_tables::make_create_keyspace_mutations(schema_features::full(), ksm, ks.timestamp.time_since_epoch().count(), false)) {
mutations.emplace_back(std::move(m));
}
for (auto& t : ks.tables) {
db::schema_tables::add_table_or_view_to_schema_mutation(t.metadata, t.timestamp.time_since_epoch().count(), true, mutations);
}
for (auto& t : ks.types) {
db::schema_tables::add_type_to_schema_mutation(t.metadata, t.timestamp.time_since_epoch().count(), mutations);
}
}
return _qp.proxy().mutate_locally(std::move(mutations), tracing::trace_state_ptr());
}
future<> flush_schemas() {
auto& db = _qp.db().real_database().container();
return replica::database::flush_tables_on_all_shards(db, db::schema_tables::all_table_infos(schema_features::full()));
}
future<> migrate() {
return read_all_keyspaces().then([this]() {
// write metadata to the new schema tables
return store_keyspaces_in_new_schema_tables()
.then(std::bind(&migrator::flush_schemas, this))
.then(std::bind(&migrator::drop_legacy_tables, this))
.then([] { mlogger.info("Completed migration of legacy schema tables"); });
});
}
sharded<service::storage_proxy>& _sp;
sharded<replica::database>& _db;
sharded<db::system_keyspace>& _sys_ks;
cql3::query_processor& _qp;
std::vector<keyspace> _keyspaces;
};
const std::unordered_set<sstring> migrator::legacy_schema_tables = {
db::system_keyspace::legacy::KEYSPACES,
db::system_keyspace::legacy::COLUMNFAMILIES,
db::system_keyspace::legacy::COLUMNS,
db::system_keyspace::legacy::TRIGGERS,
db::system_keyspace::legacy::USERTYPES,
db::system_keyspace::legacy::FUNCTIONS,
db::system_keyspace::legacy::AGGREGATES,
};
}
}
future<>
db::legacy_schema_migrator::migrate(sharded<service::storage_proxy>& sp, sharded<replica::database>& db, sharded<db::system_keyspace>& sys_ks, cql3::query_processor& qp) {
return do_with(migrator(sp, db, sys_ks, qp), std::bind(&migrator::migrate, std::placeholders::_1));
}

View File

@@ -0,0 +1,37 @@
/*
* Modified by ScyllaDB
* Copyright (C) 2017-present ScyllaDB
*/
/*
* SPDX-License-Identifier: (LicenseRef-ScyllaDB-Source-Available-1.0 and Apache-2.0)
*/
#pragma once
#include <seastar/core/future.hh>
#include <seastar/core/sharded.hh>
#include "seastarx.hh"
namespace replica {
class database;
}
namespace cql3 {
class query_processor;
}
namespace service {
class storage_proxy;
}
namespace db {
class system_keyspace;
namespace legacy_schema_migrator {
future<> migrate(sharded<service::storage_proxy>&, sharded<replica::database>& db, sharded<db::system_keyspace>& sys_ks, cql3::query_processor&);
}
}

View File

@@ -542,7 +542,6 @@ public:
// Returns the range tombstone for the key range adjacent to the cursor's position from the side of smaller keys.
// Excludes the range for the row itself. That information is returned by range_tombstone_for_row().
// It's possible that range_tombstone() is empty and range_tombstone_for_row() is not empty.
// Note that this is different from the meaning of rows_entry::range_tombstone(), which includes the row itself.
tombstone range_tombstone() const { return _range_tombstone; }
// Can be called when cursor is pointing at a row.

View File

@@ -1287,15 +1287,6 @@ row_cache::row_cache(schema_ptr s, snapshot_source src, cache_tracker& tracker,
, _partitions(dht::raw_token_less_comparator{})
, _underlying(src())
, _snapshot_source(std::move(src))
, _update_section(abstract_formatter([this] (fmt::context& ctx) {
fmt::format_to(ctx.out(), "cache.update {}.{}", _schema->ks_name(), _schema->cf_name());
}))
, _populate_section(abstract_formatter([this] (fmt::context& ctx) {
fmt::format_to(ctx.out(), "cache.populate {}.{}", _schema->ks_name(), _schema->cf_name());
}))
, _read_section(abstract_formatter([this] (fmt::context& ctx) {
fmt::format_to(ctx.out(), "cache.read {}.{}", _schema->ks_name(), _schema->cf_name());
}))
{
try {
with_allocator(_tracker.allocator(), [this, cont] {

View File

@@ -1121,7 +1121,7 @@ future<> schema_applier::commit() {
// Run func first on shard 0
// to allow "seeding" of the effective_replication_map
// with a new e_r_m instance.
SCYLLA_ASSERT(this_shard_id() == 0);
scylla_assert(this_shard_id() == 0);
commit_on_shard(sharded_db.local());
co_await sharded_db.invoke_on_others([this] (replica::database& db) {
commit_on_shard(db);

View File

@@ -404,7 +404,10 @@ const std::unordered_set<table_id>& schema_tables_holding_schema_mutations() {
computed_columns(),
dropped_columns(),
indexes(),
scylla_tables()}) {
scylla_tables(),
db::system_keyspace::legacy::column_families(),
db::system_keyspace::legacy::columns(),
db::system_keyspace::legacy::triggers()}) {
SCYLLA_ASSERT(s->clustering_key_size() > 0);
auto&& first_column_name = s->clustering_column_at(0).name_as_text();
SCYLLA_ASSERT(first_column_name == "table_name"
@@ -2837,6 +2840,26 @@ void check_no_legacy_secondary_index_mv_schema(replica::database& db, const view
}
namespace legacy {
table_schema_version schema_mutations::digest() const {
md5_hasher h;
const db::schema_features no_features;
db::schema_tables::feed_hash_for_schema_digest(h, _columnfamilies, no_features);
db::schema_tables::feed_hash_for_schema_digest(h, _columns, no_features);
return table_schema_version(utils::UUID_gen::get_name_UUID(h.finalize()));
}
future<schema_mutations> read_table_mutations(sharded<service::storage_proxy>& proxy,
sstring keyspace_name, sstring table_name, schema_ptr s)
{
mutation cf_m = co_await read_schema_partition_for_table(proxy, s, keyspace_name, table_name);
mutation col_m = co_await read_schema_partition_for_table(proxy, db::system_keyspace::legacy::columns(), keyspace_name, table_name);
co_return schema_mutations{std::move(cf_m), std::move(col_m)};
}
} // namespace legacy
static auto GET_COLUMN_MAPPING_QUERY = format("SELECT column_name, clustering_order, column_name_bytes, kind, position, type FROM system.{} WHERE cf_id = ? AND schema_version = ?",
db::schema_tables::SCYLLA_TABLE_SCHEMA_HISTORY);

View File

@@ -155,6 +155,24 @@ schema_ptr scylla_table_schema_history();
const std::unordered_set<table_id>& schema_tables_holding_schema_mutations();
}
namespace legacy {
class schema_mutations {
mutation _columnfamilies;
mutation _columns;
public:
schema_mutations(mutation columnfamilies, mutation columns)
: _columnfamilies(std::move(columnfamilies))
, _columns(std::move(columns))
{ }
table_schema_version digest() const;
};
future<schema_mutations> read_table_mutations(sharded<service::storage_proxy>& proxy,
sstring keyspace_name, sstring table_name, schema_ptr s);
}
struct qualified_name {
sstring keyspace_name;
sstring table_name;

View File

@@ -187,7 +187,7 @@ static future<std::vector<token_range>> get_local_ranges(replica::database& db,
auto ranges = db.get_token_metadata().get_primary_ranges_for(std::move(tokens));
std::vector<token_range> local_ranges;
auto to_bytes = [](const std::optional<dht::token_range::bound>& b) {
SCYLLA_ASSERT(b);
scylla_assert(b);
return utf8_type->decompose(b->value().to_sstring());
};
// We merge the ranges to be compatible with how Cassandra shows it's size estimates table.

View File

@@ -231,7 +231,7 @@ static schema_ptr get_current_service_levels(data_dictionary::database db) {
}
static schema_ptr get_updated_service_levels(data_dictionary::database db, bool workload_prioritization_enabled) {
SCYLLA_ASSERT(this_shard_id() == 0);
scylla_assert(this_shard_id() == 0);
auto schema = get_current_service_levels(db);
schema_builder b(schema);
for (const auto& col : new_service_levels_columns(workload_prioritization_enabled)) {

View File

@@ -847,6 +847,8 @@ schema_ptr system_keyspace::corrupt_data() {
return corrupt_data;
}
static constexpr auto schema_gc_grace = std::chrono::duration_cast<std::chrono::seconds>(days(7)).count();
/*static*/ schema_ptr system_keyspace::scylla_local() {
static thread_local auto scylla_local = [] {
schema_builder builder(generate_legacy_id(NAME, SCYLLA_LOCAL), NAME, SCYLLA_LOCAL,
@@ -1358,6 +1360,289 @@ schema_ptr system_keyspace::role_permissions() {
return schema;
}
schema_ptr system_keyspace::legacy::hints() {
static thread_local auto schema = [] {
schema_builder builder(generate_legacy_id(NAME, HINTS), NAME, HINTS,
// partition key
{{"target_id", uuid_type}},
// clustering key
{{"hint_id", timeuuid_type}, {"message_version", int32_type}},
// regular columns
{{"mutation", bytes_type}},
// static columns
{},
// regular column name type
utf8_type,
// comment
"*DEPRECATED* hints awaiting delivery"
);
builder.set_gc_grace_seconds(0);
builder.set_compaction_strategy(compaction::compaction_strategy_type::incremental);
builder.set_compaction_strategy_options({{"enabled", "false"}});
builder.with(schema_builder::compact_storage::yes);
builder.with_hash_version();
return builder.build();
}();
return schema;
}
schema_ptr system_keyspace::legacy::batchlog() {
static thread_local auto schema = [] {
schema_builder builder(generate_legacy_id(NAME, BATCHLOG), NAME, BATCHLOG,
// partition key
{{"id", uuid_type}},
// clustering key
{},
// regular columns
{{"data", bytes_type}, {"version", int32_type}, {"written_at", timestamp_type}},
// static columns
{},
// regular column name type
utf8_type,
// comment
"*DEPRECATED* batchlog entries"
);
builder.set_gc_grace_seconds(0);
builder.set_compaction_strategy(compaction::compaction_strategy_type::incremental);
builder.set_compaction_strategy_options({{"min_threshold", "2"}});
builder.with(schema_builder::compact_storage::no);
builder.with_hash_version();
return builder.build();
}();
return schema;
}
schema_ptr system_keyspace::legacy::keyspaces() {
static thread_local auto schema = [] {
schema_builder builder(generate_legacy_id(NAME, KEYSPACES), NAME, KEYSPACES,
// partition key
{{"keyspace_name", utf8_type}},
// clustering key
{},
// regular columns
{
{"durable_writes", boolean_type},
{"strategy_class", utf8_type},
{"strategy_options", utf8_type}
},
// static columns
{},
// regular column name type
utf8_type,
// comment
"*DEPRECATED* keyspace definitions"
);
builder.set_gc_grace_seconds(schema_gc_grace);
builder.with(schema_builder::compact_storage::yes);
builder.with_hash_version();
return builder.build();
}();
return schema;
}
schema_ptr system_keyspace::legacy::column_families() {
static thread_local auto schema = [] {
schema_builder builder(generate_legacy_id(NAME, COLUMNFAMILIES), NAME, COLUMNFAMILIES,
// partition key
{{"keyspace_name", utf8_type}},
// clustering key
{{"columnfamily_name", utf8_type}},
// regular columns
{
{"bloom_filter_fp_chance", double_type},
{"caching", utf8_type},
{"cf_id", uuid_type},
{"comment", utf8_type},
{"compaction_strategy_class", utf8_type},
{"compaction_strategy_options", utf8_type},
{"comparator", utf8_type},
{"compression_parameters", utf8_type},
{"default_time_to_live", int32_type},
{"default_validator", utf8_type},
{"dropped_columns", map_type_impl::get_instance(utf8_type, long_type, true)},
{"gc_grace_seconds", int32_type},
{"is_dense", boolean_type},
{"key_validator", utf8_type},
{"max_compaction_threshold", int32_type},
{"max_index_interval", int32_type},
{"memtable_flush_period_in_ms", int32_type},
{"min_compaction_threshold", int32_type},
{"min_index_interval", int32_type},
{"speculative_retry", utf8_type},
{"subcomparator", utf8_type},
{"type", utf8_type},
// The following 4 columns are only present up until 2.1.8 tables
{"key_aliases", utf8_type},
{"value_alias", utf8_type},
{"column_aliases", utf8_type},
{"index_interval", int32_type},},
// static columns
{},
// regular column name type
utf8_type,
// comment
"*DEPRECATED* table definitions"
);
builder.set_gc_grace_seconds(schema_gc_grace);
builder.with(schema_builder::compact_storage::no);
builder.with_hash_version();
return builder.build();
}();
return schema;
}
schema_ptr system_keyspace::legacy::columns() {
static thread_local auto schema = [] {
schema_builder builder(generate_legacy_id(NAME, COLUMNS), NAME, COLUMNS,
// partition key
{{"keyspace_name", utf8_type}},
// clustering key
{{"columnfamily_name", utf8_type}, {"column_name", utf8_type}},
// regular columns
{
{"component_index", int32_type},
{"index_name", utf8_type},
{"index_options", utf8_type},
{"index_type", utf8_type},
{"type", utf8_type},
{"validator", utf8_type},
},
// static columns
{},
// regular column name type
utf8_type,
// comment
"column definitions"
);
builder.set_gc_grace_seconds(schema_gc_grace);
builder.with(schema_builder::compact_storage::no);
builder.with_hash_version();
return builder.build();
}();
return schema;
}
schema_ptr system_keyspace::legacy::triggers() {
static thread_local auto schema = [] {
schema_builder builder(generate_legacy_id(NAME, TRIGGERS), NAME, TRIGGERS,
// partition key
{{"keyspace_name", utf8_type}},
// clustering key
{{"columnfamily_name", utf8_type}, {"trigger_name", utf8_type}},
// regular columns
{
{"trigger_options", map_type_impl::get_instance(utf8_type, utf8_type, true)},
},
// static columns
{},
// regular column name type
utf8_type,
// comment
"trigger definitions"
);
builder.set_gc_grace_seconds(schema_gc_grace);
builder.with(schema_builder::compact_storage::no);
builder.with_hash_version();
return builder.build();
}();
return schema;
}
schema_ptr system_keyspace::legacy::usertypes() {
static thread_local auto schema = [] {
schema_builder builder(generate_legacy_id(NAME, USERTYPES), NAME, USERTYPES,
// partition key
{{"keyspace_name", utf8_type}},
// clustering key
{{"type_name", utf8_type}},
// regular columns
{
{"field_names", list_type_impl::get_instance(utf8_type, true)},
{"field_types", list_type_impl::get_instance(utf8_type, true)},
},
// static columns
{},
// regular column name type
utf8_type,
// comment
"user defined type definitions"
);
builder.set_gc_grace_seconds(schema_gc_grace);
builder.with(schema_builder::compact_storage::no);
builder.with_hash_version();
return builder.build();
}();
return schema;
}
schema_ptr system_keyspace::legacy::functions() {
/**
* Note: we have our own "legacy" version of this table (in schema_tables),
* but it is (afaik) not used, and differs slightly from the origin one.
* This is based on the origin schema, since we're more likely to encounter
* installations of that to migrate, rather than our own (if we dont use the table).
*/
static thread_local auto schema = [] {
schema_builder builder(generate_legacy_id(NAME, FUNCTIONS), NAME, FUNCTIONS,
// partition key
{{"keyspace_name", utf8_type}},
// clustering key
{{"function_name", utf8_type},{"signature", list_type_impl::get_instance(utf8_type, false)}},
// regular columns
{
{"argument_names", list_type_impl::get_instance(utf8_type, true)},
{"argument_types", list_type_impl::get_instance(utf8_type, true)},
{"body", utf8_type},
{"language", utf8_type},
{"return_type", utf8_type},
{"called_on_null_input", boolean_type},
},
// static columns
{},
// regular column name type
utf8_type,
// comment
"*DEPRECATED* user defined type definitions"
);
builder.set_gc_grace_seconds(schema_gc_grace);
builder.with(schema_builder::compact_storage::no);
builder.with_hash_version();
return builder.build();
}();
return schema;
}
schema_ptr system_keyspace::legacy::aggregates() {
static thread_local auto schema = [] {
schema_builder builder(generate_legacy_id(NAME, AGGREGATES), NAME, AGGREGATES,
// partition key
{{"keyspace_name", utf8_type}},
// clustering key
{{"aggregate_name", utf8_type},{"signature", list_type_impl::get_instance(utf8_type, false)}},
// regular columns
{
{"argument_types", list_type_impl::get_instance(utf8_type, true)},
{"final_func", utf8_type},
{"initcond", bytes_type},
{"return_type", utf8_type},
{"state_func", utf8_type},
{"state_type", utf8_type},
},
// static columns
{},
// regular column name type
utf8_type,
// comment
"*DEPRECATED* user defined aggregate definition"
);
builder.set_gc_grace_seconds(schema_gc_grace);
builder.with(schema_builder::compact_storage::no);
builder.with_hash_version();
return builder.build();
}();
return schema;
}
schema_ptr system_keyspace::dicts() {
static thread_local auto schema = [] {
auto id = generate_legacy_id(NAME, DICTS);
@@ -2330,6 +2615,13 @@ std::vector<schema_ptr> system_keyspace::all_tables(const db::config& cfg) {
if (cfg.check_experimental(db::experimental_features_t::feature::KEYSPACE_STORAGE_OPTIONS)) {
r.insert(r.end(), {sstables_registry()});
}
// legacy schema
r.insert(r.end(), {
// TODO: once we migrate hints/batchlog and add converter
// legacy::hints(), legacy::batchlog(),
legacy::keyspaces(), legacy::column_families(),
legacy::columns(), legacy::triggers(), legacy::usertypes(),
legacy::functions(), legacy::aggregates(), });
return r;
}

View File

@@ -241,6 +241,28 @@ public:
static schema_ptr cdc_local();
};
struct legacy {
static constexpr auto HINTS = "hints";
static constexpr auto BATCHLOG = "batchlog";
static constexpr auto KEYSPACES = "schema_keyspaces";
static constexpr auto COLUMNFAMILIES = "schema_columnfamilies";
static constexpr auto COLUMNS = "schema_columns";
static constexpr auto TRIGGERS = "schema_triggers";
static constexpr auto USERTYPES = "schema_usertypes";
static constexpr auto FUNCTIONS = "schema_functions";
static constexpr auto AGGREGATES = "schema_aggregates";
static schema_ptr keyspaces();
static schema_ptr column_families();
static schema_ptr columns();
static schema_ptr triggers();
static schema_ptr usertypes();
static schema_ptr functions();
static schema_ptr aggregates();
static schema_ptr hints();
static schema_ptr batchlog();
};
// Partition estimates for a given range of tokens.
struct range_estimates {
schema_ptr schema;

View File

@@ -153,14 +153,14 @@ row_locker::unlock(const dht::decorated_key* pk, bool partition_exclusive,
mylog.error("column_family::local_base_lock_holder::~local_base_lock_holder() can't find lock for partition", *pk);
return;
}
SCYLLA_ASSERT(&pli->first == pk);
scylla_assert(&pli->first == pk);
if (cpk) {
auto rli = pli->second._row_locks.find(*cpk);
if (rli == pli->second._row_locks.end()) {
mylog.error("column_family::local_base_lock_holder::~local_base_lock_holder() can't find lock for row", *cpk);
return;
}
SCYLLA_ASSERT(&rli->first == cpk);
scylla_assert(&rli->first == cpk);
mylog.debug("releasing {} lock for row {} in partition {}", (row_exclusive ? "exclusive" : "shared"), *cpk, *pk);
auto& lock = rli->second;
if (row_exclusive) {

View File

@@ -1744,115 +1744,6 @@ bool should_generate_view_updates_on_this_shard(const schema_ptr& base, const lo
&& std::ranges::contains(shards, this_shard_id());
}
static endpoints_to_update get_view_natural_endpoint_vnodes(
locator::host_id me,
std::vector<std::reference_wrapper<const locator::node>> base_nodes,
std::vector<std::reference_wrapper<const locator::node>> view_nodes,
locator::endpoint_dc_rack my_location,
const locator::network_topology_strategy* network_topology,
replica::cf_stats& cf_stats) {
using node_vector = std::vector<std::reference_wrapper<const locator::node>>;
node_vector base_endpoints, view_endpoints;
auto& my_datacenter = my_location.dc;
auto process_candidate = [&] (node_vector& nodes, std::reference_wrapper<const locator::node> node) {
if (!network_topology || node.get().dc() == my_datacenter) {
nodes.emplace_back(node);
}
};
for (auto&& base_node : base_nodes) {
process_candidate(base_endpoints, base_node);
}
for (auto&& view_node : view_nodes) {
auto it = std::ranges::find(base_endpoints, view_node.get().host_id(), std::mem_fn(&locator::node::host_id));
// If this base replica is also one of the view replicas, we use
// ourselves as the view replica.
// We don't return an extra endpoint, as it's only needed when
// using tablets (so !use_legacy_self_pairing)
if (view_node.get().host_id() == me && it != base_endpoints.end()) {
return {.natural_endpoint = me};
}
// We have to remove any endpoint which is shared between the base
// and the view, as it will select itself and throw off the counts
// otherwise.
if (it != base_endpoints.end()) {
base_endpoints.erase(it);
} else if (!network_topology || view_node.get().dc() == my_datacenter) {
view_endpoints.push_back(view_node);
}
}
auto base_it = std::ranges::find(base_endpoints, me, std::mem_fn(&locator::node::host_id));
if (base_it == base_endpoints.end()) {
// This node is not a base replica of this key, so we return empty
// FIXME: This case shouldn't happen, and if it happens, a view update
// would be lost.
++cf_stats.total_view_updates_on_wrong_node;
vlogger.warn("Could not find {} in base_endpoints={}", me,
base_endpoints | std::views::transform(std::mem_fn(&locator::node::host_id)));
return {};
}
size_t idx = base_it - base_endpoints.begin();
return {.natural_endpoint = view_endpoints[idx].get().host_id()};
}
static std::optional<locator::host_id> get_unpaired_view_endpoint(
std::vector<std::reference_wrapper<const locator::node>> base_nodes,
std::vector<std::reference_wrapper<const locator::node>> view_nodes,
replica::cf_stats& cf_stats) {
std::unordered_set<locator::endpoint_dc_rack> base_dc_racks;
for (auto&& base_node : base_nodes) {
if (base_dc_racks.contains(base_node.get().dc_rack())) {
// We can't do rack-aware pairing if there are multiple replicas in the same rack.
++cf_stats.total_view_updates_failed_pairing;
vlogger.warn("Can't perform base-view pairing in this topology. There are multiple base table replicas in the same dc/rack({}/{}):",
base_node.get().dc(), base_node.get().rack());
return std::nullopt;
}
base_dc_racks.insert(base_node.get().dc_rack());
}
std::unordered_set<locator::endpoint_dc_rack> paired_view_dc_racks;
std::unordered_map<locator::endpoint_dc_rack, locator::host_id> unpaired_view_dc_rack_replicas;
for (auto&& view_node : view_nodes) {
if (paired_view_dc_racks.contains(view_node.get().dc_rack()) || unpaired_view_dc_rack_replicas.contains(view_node.get().dc_rack())) {
// We can't do rack-aware pairing if there are multiple replicas in the same rack.
++cf_stats.total_view_updates_failed_pairing;
vlogger.warn("Can't perform base-view pairing in this topology. There are multiple view table replicas in the same dc/rack({}/{}):",
view_node.get().dc(), view_node.get().rack());
return std::nullopt;
}
// Track unpaired replicas in both sets
if (base_dc_racks.contains(view_node.get().dc_rack())) {
paired_view_dc_racks.insert(view_node.get().dc_rack());
} else {
unpaired_view_dc_rack_replicas.insert({view_node.get().dc_rack(), view_node.get().host_id()});
}
}
if (unpaired_view_dc_rack_replicas.size() > 0) {
// There are view replicas that can't be paired with any base replica
// This can happen as a result of an RF change when the view replica finishes streaming
// before the base replica.
// Because of this, a view replica might not get paired with any base replica, so we need
// to send an additional update to it.
++cf_stats.total_view_updates_due_to_replica_count_mismatch;
auto extra_replica = unpaired_view_dc_rack_replicas.begin()->second;
unpaired_view_dc_rack_replicas.erase(unpaired_view_dc_rack_replicas.begin());
if (unpaired_view_dc_rack_replicas.size() > 0) {
// We only expect one extra replica to appear due to an RF change. If there's more, that's an error,
// but we'll still perform updates to the paired and last replicas to minimize degradation.
vlogger.warn("There are too many view endpoints for base-view pairing. View updates may get lost on view_endpoints={}",
unpaired_view_dc_rack_replicas | std::views::values);
}
return extra_replica;
}
return std::nullopt;
}
// Calculate the node ("natural endpoint") to which this node should send
// a view update.
//
@@ -1865,19 +1756,29 @@ static std::optional<locator::host_id> get_unpaired_view_endpoint(
// of this function is to find, assuming that this node is one of the base
// replicas for a given partition, the paired view replica.
//
// When using vnodes, we have an optimization called "self-pairing" - if a single
// node is both a base replica and a view replica for a write, the pairing is
// modified so that this node sends the update to itself and this node is removed
// from the lists of nodes paired by index. This self-pairing optimization can
// cause the pairing to change after view ranges are moved between nodes.
// In the past, we used an optimization called "self-pairing" that if a single
// node was both a base replica and a view replica for a write, the pairing is
// modified so that this node would send the update to itself. This self-
// pairing optimization could cause the pairing to change after view ranges
// are moved between nodes, so currently we only use it if
// use_legacy_self_pairing is set to true. When using tablets - where range
// movements are common - it is strongly recommended to set it to false.
//
// If the keyspace's replication strategy is a NetworkTopologyStrategy,
// we pair only nodes in the same datacenter.
//
// If the table uses tablets, then pairing is rack-aware. In this case, in each
// rack where we have a base replica there is also one replica of each view tablet.
// Therefore, the base replicas are naturally paired with the view replicas that
// are in the same rack.
// When use_legacy_self_pairing is enabled, if one of the base replicas
// also happens to be a view replica, it is paired with itself
// (with the other nodes paired by order in the list
// after taking this node out).
//
// If the table uses tablets and the replication strategy is NetworkTopologyStrategy
// and the replication factor in the node's datacenter is a multiple of the number
// of racks in the datacenter, then pairing is rack-aware. In this case,
// all racks have the same number of replicas, and those are never migrated
// outside their racks. Therefore, the base replicas are naturally paired with the
// view replicas that are in the same rack, based on the ordinal position.
// Note that typically, there is a single replica per rack and pairing is trivial.
//
// If the assumption that the given base token belongs to this replica
// does not hold, we return an empty optional.
@@ -1905,12 +1806,19 @@ endpoints_to_update get_view_natural_endpoint(
const locator::abstract_replication_strategy& replication_strategy,
const dht::token& base_token,
const dht::token& view_token,
bool use_tablets,
bool use_legacy_self_pairing,
bool use_tablets_rack_aware_view_pairing,
replica::cf_stats& cf_stats) {
auto& topology = base_erm->get_token_metadata_ptr()->get_topology();
auto& view_topology = view_erm->get_token_metadata_ptr()->get_topology();
auto& my_location = topology.get_location(me);
auto& my_datacenter = my_location.dc;
auto* network_topology = dynamic_cast<const locator::network_topology_strategy*>(&replication_strategy);
auto rack_aware_pairing = use_tablets_rack_aware_view_pairing && network_topology;
bool simple_rack_aware_pairing = false;
using node_vector = std::vector<std::reference_wrapper<const locator::node>>;
node_vector orig_base_endpoints, orig_view_endpoints;
node_vector base_endpoints, view_endpoints;
auto resolve = [&] (const locator::topology& topology, const locator::host_id& ep, bool is_view) -> const locator::node& {
if (auto* np = topology.find_node(ep)) {
@@ -1921,7 +1829,6 @@ endpoints_to_update get_view_natural_endpoint(
// We need to use get_replicas() for pairing to be stable in case base or view tablet
// is rebuilding a replica which has left the ring. get_natural_endpoints() filters such replicas.
using node_vector = std::vector<std::reference_wrapper<const locator::node>>;
auto base_nodes = base_erm->get_replicas(base_token) | std::views::transform([&] (const locator::host_id& ep) -> const locator::node& {
return resolve(topology, ep, false);
}) | std::ranges::to<node_vector>();
@@ -1945,43 +1852,231 @@ endpoints_to_update get_view_natural_endpoint(
// note that the recursive call will not recurse again because leaving_base is in base_nodes.
auto leaving_base = it->get().host_id();
return get_view_natural_endpoint(leaving_base, base_erm, view_erm, replication_strategy, base_token,
view_token, use_tablets, cf_stats);
view_token, use_legacy_self_pairing, use_tablets_rack_aware_view_pairing, cf_stats);
}
}
}
if (!use_tablets) {
return get_view_natural_endpoint_vnodes(
me,
base_nodes,
view_nodes,
my_location,
network_topology,
cf_stats);
std::function<bool(const locator::node&)> is_candidate;
if (network_topology) {
is_candidate = [&] (const locator::node& node) { return node.dc() == my_datacenter; };
} else {
is_candidate = [&] (const locator::node&) { return true; };
}
auto process_candidate = [&] (node_vector& nodes, std::reference_wrapper<const locator::node> node) {
if (is_candidate(node)) {
nodes.emplace_back(node);
}
};
for (auto&& base_node : base_nodes) {
process_candidate(base_endpoints, base_node);
}
std::optional<locator::host_id> paired_replica;
for (auto&& view_node : view_nodes) {
if (view_node.get().dc_rack() == my_location) {
paired_replica = view_node.get().host_id();
break;
if (use_legacy_self_pairing) {
for (auto&& view_node : view_nodes) {
auto it = std::ranges::find(base_endpoints, view_node.get().host_id(), std::mem_fn(&locator::node::host_id));
// If this base replica is also one of the view replicas, we use
// ourselves as the view replica.
// We don't return an extra endpoint, as it's only needed when
// using tablets (so !use_legacy_self_pairing)
if (view_node.get().host_id() == me && it != base_endpoints.end()) {
return {.natural_endpoint = me};
}
// We have to remove any endpoint which is shared between the base
// and the view, as it will select itself and throw off the counts
// otherwise.
if (it != base_endpoints.end()) {
base_endpoints.erase(it);
} else if (is_candidate(view_node)) {
view_endpoints.push_back(view_node);
}
}
} else {
for (auto&& view_node : view_nodes) {
process_candidate(view_endpoints, view_node);
}
}
if (paired_replica && base_nodes.size() == view_nodes.size()) {
// We don't need to find any extra replicas, so we can return early
return {.natural_endpoint = paired_replica};
// Try optimizing for simple rack-aware pairing
// If the numbers of base and view replica differ, that means an RF change is taking place
// and we can't use simple rack-aware pairing.
if (rack_aware_pairing && base_endpoints.size() == view_endpoints.size()) {
auto dc_rf = network_topology->get_replication_factor(my_datacenter);
const auto& racks = topology.get_datacenter_rack_nodes().at(my_datacenter);
// Simple rack-aware pairing is possible when the datacenter replication factor
// is a multiple of the number of racks in the datacenter.
if (dc_rf % racks.size() == 0) {
simple_rack_aware_pairing = true;
size_t rack_rf = dc_rf / racks.size();
// If any rack doesn't have enough nodes to satisfy the per-rack rf
// simple rack-aware pairing is disabled.
for (const auto& [rack, nodes] : racks) {
if (nodes.size() < rack_rf) {
simple_rack_aware_pairing = false;
break;
}
}
}
if (dc_rf != base_endpoints.size()) {
// If the datacenter replication factor is not equal to the number of base replicas,
// we're in progress of a RF change and we can't use simple rack-aware pairing.
simple_rack_aware_pairing = false;
}
if (simple_rack_aware_pairing) {
std::erase_if(base_endpoints, [&] (const locator::node& node) { return node.dc_rack() != my_location; });
std::erase_if(view_endpoints, [&] (const locator::node& node) { return node.dc_rack() != my_location; });
}
}
if (!paired_replica) {
// We couldn't find any view replica in our rack
orig_base_endpoints = base_endpoints;
orig_view_endpoints = view_endpoints;
// For the complex rack_aware_pairing case, nodes are already filtered by datacenter
// Use best-match, for the minimum number of base and view replicas in each rack,
// and ordinal match for the rest.
std::optional<std::reference_wrapper<const locator::node>> paired_replica;
if (rack_aware_pairing && !simple_rack_aware_pairing) {
struct indexed_replica {
size_t idx;
std::reference_wrapper<const locator::node> node;
};
std::unordered_map<sstring, std::vector<indexed_replica>> base_racks, view_racks;
// First, index all replicas by rack
auto index_replica_set = [] (std::unordered_map<sstring, std::vector<indexed_replica>>& racks, const node_vector& replicas) {
size_t idx = 0;
for (const auto& r: replicas) {
racks[r.get().rack()].emplace_back(idx++, r);
}
};
index_replica_set(base_racks, base_endpoints);
index_replica_set(view_racks, view_endpoints);
// Try optimistically pairing `me` first
const auto& my_base_replicas = base_racks[my_location.rack];
auto base_it = std::ranges::find(my_base_replicas, me, [] (const indexed_replica& ir) { return ir.node.get().host_id(); });
if (base_it == my_base_replicas.end()) {
return {};
}
const auto& my_view_replicas = view_racks[my_location.rack];
size_t idx = base_it - my_base_replicas.begin();
if (idx < my_view_replicas.size()) {
if (orig_view_endpoints.size() <= orig_base_endpoints.size()) {
return {.natural_endpoint = my_view_replicas[idx].node.get().host_id()};
} else {
// If the number of view replicas is larger than the number of base replicas,
// we need to find the unpaired view replica, so we can't return yet.
paired_replica = my_view_replicas[idx].node;
}
}
// Collect all unpaired base and view replicas,
// where the number of replicas in the base rack is different than the respective view rack
std::vector<indexed_replica> unpaired_base_replicas, unpaired_view_replicas;
for (const auto& [rack, base_replicas] : base_racks) {
const auto& view_replicas = view_racks[rack];
for (auto i = view_replicas.size(); i < base_replicas.size(); ++i) {
unpaired_base_replicas.emplace_back(base_replicas[i]);
}
}
for (const auto& [rack, view_replicas] : view_racks) {
const auto& base_replicas = base_racks[rack];
for (auto i = base_replicas.size(); i < view_replicas.size(); ++i) {
unpaired_view_replicas.emplace_back(view_replicas[i]);
}
}
// Sort by the original ordinality, and copy the sorted results
// back into {base,view}_endpoints, for backward compatible processing below.
std::ranges::sort(unpaired_base_replicas, std::less(), std::mem_fn(&indexed_replica::idx));
base_endpoints.clear();
std::ranges::transform(unpaired_base_replicas, std::back_inserter(base_endpoints), std::mem_fn(&indexed_replica::node));
std::ranges::sort(unpaired_view_replicas, std::less(), std::mem_fn(&indexed_replica::idx));
view_endpoints.clear();
std::ranges::transform(unpaired_view_replicas, std::back_inserter(view_endpoints), std::mem_fn(&indexed_replica::node));
}
auto base_it = std::ranges::find(base_endpoints, me, std::mem_fn(&locator::node::host_id));
if (!paired_replica && base_it == base_endpoints.end()) {
// This node is not a base replica of this key, so we return empty
// FIXME: This case shouldn't happen, and if it happens, a view update
// would be lost.
++cf_stats.total_view_updates_on_wrong_node;
vlogger.warn("Could not find {} in base_endpoints={}", me,
orig_base_endpoints | std::views::transform(std::mem_fn(&locator::node::host_id)));
return {};
}
size_t idx = base_it - base_endpoints.begin();
std::optional<std::reference_wrapper<const locator::node>> no_pairing_replica;
if (!paired_replica && idx >= view_endpoints.size()) {
// There are fewer view replicas than base replicas
// FIXME: This might still happen when reducing replication factor with tablets,
// see https://github.com/scylladb/scylladb/issues/21492
++cf_stats.total_view_updates_failed_pairing;
vlogger.warn("Could not find a view replica in the same rack as base replica {} for base_endpoints={} view_endpoints={}",
me,
base_nodes | std::views::transform(std::mem_fn(&locator::node::host_id)),
view_nodes | std::views::transform(std::mem_fn(&locator::node::host_id)));
vlogger.warn("Could not pair {}: rack_aware={} base_endpoints={} view_endpoints={}", me,
rack_aware_pairing ? (simple_rack_aware_pairing ? "simple" : "complex") : "none",
orig_base_endpoints | std::views::transform(std::mem_fn(&locator::node::host_id)),
orig_view_endpoints | std::views::transform(std::mem_fn(&locator::node::host_id)));
return {};
} else if (base_endpoints.size() < view_endpoints.size()) {
// There are fewer base replicas than view replicas.
// This can happen as a result of an RF change when the view replica finishes streaming
// before the base replica.
// Because of this, a view replica might not get paired with any base replica, so we need
// to send an additional update to it.
++cf_stats.total_view_updates_due_to_replica_count_mismatch;
no_pairing_replica = view_endpoints.back();
if (base_endpoints.size() < view_endpoints.size() - 1) {
// We only expect one extra replica to appear due to an RF change. If there's more, that's an error,
// but we'll still perform updates to the paired and last replicas to minimize degradation.
vlogger.warn("There are too many view endpoints for base-view pairing. View updates may get lost on view_endpoints={}",
std::span(view_endpoints.begin() + base_endpoints.size(), view_endpoints.end() - 1) | std::views::transform(std::mem_fn(&locator::node::host_id)));
}
}
std::optional<locator::host_id> no_pairing_replica = get_unpaired_view_endpoint(base_nodes, view_nodes, cf_stats);
return {.natural_endpoint = paired_replica,
.endpoint_with_no_pairing = no_pairing_replica};
if (!paired_replica) {
paired_replica = view_endpoints[idx];
}
if (!no_pairing_replica && base_nodes.size() < view_nodes.size()) {
// This can happen when the view replica with no pairing is in another DC.
// We need to send an update to it if there are no base replicas in that DC yet,
// as it won't receive updates otherwise.
std::unordered_set<sstring> dcs_with_base_replicas;
for (const auto& base_node : base_nodes) {
dcs_with_base_replicas.insert(base_node.get().dc());
}
for (const auto& view_node : view_nodes) {
if (!dcs_with_base_replicas.contains(view_node.get().dc())) {
++cf_stats.total_view_updates_due_to_replica_count_mismatch;
no_pairing_replica = view_node;
break;
}
}
}
// https://github.com/scylladb/scylladb/issues/19439
// With tablets, a node being replaced might transition to "left" state
// but still be kept as a replica.
// As of writing this hints are not prepared to handle nodes that are left
// but are still replicas. Therefore, there is no other sensible option
// right now but to give up attempt to send the update or write a hint
// to the paired, permanently down replica.
// We use the same workaround for the extra replica.
auto return_host_id_if_not_left = [] (const auto& replica) -> std::optional<locator::host_id> {
if (!replica) {
return std::nullopt;
}
const auto& node = replica->get();
if (!node.left()) {
return node.host_id();
} else {
return std::nullopt;
}
};
return {.natural_endpoint = return_host_id_if_not_left(paired_replica),
.endpoint_with_no_pairing = return_host_id_if_not_left(no_pairing_replica)};
}
static future<> apply_to_remote_endpoints(service::storage_proxy& proxy, locator::effective_replication_map_ptr ermp,
@@ -2041,6 +2136,12 @@ future<> view_update_generator::mutate_MV(
{
auto& ks = _db.find_keyspace(base->ks_name());
auto& replication = ks.get_replication_strategy();
// We set legacy self-pairing for old vnode-based tables (for backward
// compatibility), and unset it for tablets - where range movements
// are more frequent and backward compatibility is less important.
// TODO: Maybe allow users to set use_legacy_self_pairing explicitly
// on a view, like we have the synchronous_updates_flag.
bool use_legacy_self_pairing = !ks.uses_tablets();
std::unordered_map<table_id, locator::effective_replication_map_ptr> erms;
auto get_erm = [&] (table_id id) {
auto it = erms.find(id);
@@ -2053,6 +2154,10 @@ future<> view_update_generator::mutate_MV(
for (const auto& mut : view_updates) {
(void)get_erm(mut.s->id());
}
// Enable rack-aware view updates pairing for tablets
// when the cluster feature is enabled so that all replicas agree
// on the pairing algorithm.
bool use_tablets_rack_aware_view_pairing = _db.features().tablet_rack_aware_view_pairing && ks.uses_tablets();
auto me = base_ermp->get_topology().my_host_id();
static constexpr size_t max_concurrent_updates = 128;
co_await utils::get_local_injector().inject("delay_before_get_view_natural_endpoint", 8000ms);
@@ -2060,7 +2165,7 @@ future<> view_update_generator::mutate_MV(
auto view_token = dht::get_token(*mut.s, mut.fm.key());
auto view_ermp = erms.at(mut.s->id());
auto [target_endpoint, no_pairing_endpoint] = get_view_natural_endpoint(me, base_ermp, view_ermp, replication, base_token, view_token,
ks.uses_tablets(), cf_stats);
use_legacy_self_pairing, use_tablets_rack_aware_view_pairing, cf_stats);
auto remote_endpoints = view_ermp->get_pending_replicas(view_token);
auto memory_units = seastar::make_lw_shared<db::timeout_semaphore_units>(pending_view_update_memory_units.split(memory_usage_of(mut)));
if (no_pairing_endpoint) {

View File

@@ -305,7 +305,8 @@ endpoints_to_update get_view_natural_endpoint(
const locator::abstract_replication_strategy& replication_strategy,
const dht::token& base_token,
const dht::token& view_token,
bool use_tablets,
bool use_legacy_self_pairing,
bool use_tablets_basic_rack_aware_view_pairing,
replica::cf_stats& cf_stats);
/// Verify that the provided keyspace is eligible for storing materialized views.

View File

@@ -1 +1 @@
SCYLLA_NODE_EXPORTER_ARGS="--collector.interrupts --collector.ethtool.metrics-include='(bw_in_allowance_exceeded|bw_out_allowance_exceeded|conntrack_allowance_exceeded|conntrack_allowance_available|linklocal_allowance_exceeded)' --collector.ethtool --no-collector.hwmon --no-collector.bcache --no-collector.btrfs --no-collector.fibrechannel --no-collector.infiniband --no-collector.ipvs --no-collector.nfs --no-collector.nfsd --no-collector.powersupplyclass --no-collector.rapl --no-collector.tapestats --no-collector.thermal_zone --no-collector.udp_queues --no-collector.zfs"
SCYLLA_NODE_EXPORTER_ARGS="--collector.interrupts --no-collector.hwmon --no-collector.bcache --no-collector.btrfs --no-collector.fibrechannel --no-collector.infiniband --no-collector.ipvs --no-collector.nfs --no-collector.nfsd --no-collector.powersupplyclass --no-collector.rapl --no-collector.tapestats --no-collector.thermal_zone --no-collector.udp_queues --no-collector.zfs"

View File

@@ -41,8 +41,6 @@ class MetricsProcessor:
# Get metrics from the file
try:
metrics_file = metrics.get_metrics_from_file(relative_path, "scylla_", metrics_info, strict=strict)
except SystemExit:
pass
finally:
os.chdir(old_cwd)
if metrics_file:

View File

@@ -0,0 +1,198 @@
# SCYLLA_ASSERT to scylla_assert() Conversion Guide
## Overview
This document tracks the conversion of `SCYLLA_ASSERT` to the new `scylla_assert()` macro based on `on_internal_error()`. The new macro throws exceptions instead of crashing the process, preventing cluster-wide crashes and loss of availability.
## Status Summary
- **Total SCYLLA_ASSERT usages**: ~1307 (including tests)
- **Non-test usages**: ~886
- **Unsafe conversions (noexcept)**: ~187
- **Unsafe conversions (destructors)**: ~36
- **Safe conversions possible**: ~668
- **Converted so far**: 112
## Safe vs Unsafe Contexts
### Safe to Convert ✓
- Regular functions (non-noexcept)
- Coroutine functions (returning `future<T>`)
- Member functions without noexcept specifier
- Functions where exception propagation is acceptable
### Unsafe to Convert ✗
1. **noexcept functions** - throwing exceptions from noexcept causes `std::terminate()`
2. **Destructors** - destructors are implicitly noexcept
3. **noexcept lambdas and callbacks**
4. **Code with explicit exception-safety requirements** that cannot handle exceptions
## Files with Unsafe Conversions
### Files with SCYLLA_ASSERT in noexcept contexts (examples)
1. **reader_concurrency_semaphore.cc**
- Lines with noexcept functions containing SCYLLA_ASSERT
- Must remain as SCYLLA_ASSERT
2. **db/large_data_handler.cc**
- Line 86: `maybe_delete_large_data_entries()` - marked noexcept but contains SCYLLA_ASSERT
- Analysis shows this is actually safe (not truly noexcept)
3. **db/row_cache.cc**
- Multiple SCYLLA_ASSERT usages in noexcept member functions
4. **db/schema_tables.cc**
- SCYLLA_ASSERT in noexcept contexts
5. **raft/server.cc**
- Multiple noexcept functions with SCYLLA_ASSERT
### Files with SCYLLA_ASSERT in destructors
1. **reader_concurrency_semaphore.cc**
- Line 1116: SCYLLA_ASSERT in destructor
2. **api/column_family.cc**
- Line 102: SCYLLA_ASSERT in destructor
3. **utils/logalloc.cc**
- Line 1991: SCYLLA_ASSERT in destructor
4. **utils/file_lock.cc**
- Lines 34, 36: SCYLLA_ASSERT in destructor
5. **utils/disk_space_monitor.cc**
- Line 66: SCYLLA_ASSERT in destructor
## Conversion Strategy
### Phase 1: Infrastructure (Completed)
- Created `scylla_assert()` macro in `utils/assert.hh`
- Uses `on_internal_error()` for exception-based error handling
- Supports optional message parameters
### Phase 2: Safe Conversions
Convert SCYLLA_ASSERT to scylla_assert in contexts where:
- Function is not noexcept
- Not in a destructor
- Exception propagation is safe
### Phase 3: Document Remaining Uses
For contexts that cannot be converted:
- Add comments explaining why SCYLLA_ASSERT must remain
- Consider alternative approaches (e.g., using `on_fatal_internal_error()` in noexcept)
## Converted Files
### Completed Conversions
1. **db/large_data_handler.cc** (3 conversions)
- Line 42: `maybe_record_large_partitions()`
- Line 86: `maybe_delete_large_data_entries()`
- Line 250: `delete_large_data_entries()`
2. **db/large_data_handler.hh** (2 conversions)
- Line 83: `maybe_record_large_rows()`
- Line 103: `maybe_record_large_cells()`
3. **db/schema_applier.cc** (1 conversion)
- Line 1124: `commit()` coroutine
4. **db/system_distributed_keyspace.cc** (1 conversion)
- Line 234: `get_updated_service_levels()`
5. **db/commitlog/commitlog_replayer.cc** (1 conversion)
- Line 168: `recover()` coroutine
6. **db/view/row_locking.cc** (2 conversions)
- Line 156: `unlock()` - partition lock check
- Line 163: `unlock()` - row lock check
7. **db/size_estimates_virtual_reader.cc** (1 conversion)
- Line 190: Lambda in `get_local_ranges()`
8. **db/corrupt_data_handler.cc** (2 conversions)
- Line 78: `set_cell_raw` lambda
- Line 85: `set_cell` lambda
9. **raft/tracker.cc** (2 conversions)
- Line 49: Switch default case with descriptive error
- Line 90: Switch default case with descriptive error
10. **service/topology_coordinator.cc** (11 conversions)
- Line 363: Node lookup assertion in `retake_node()`
- Line 2313: Bootstrapping state ring check
- Line 2362: Replacing state ring check
- Line 2365: Normal nodes lookup assertion
- Line 2366: Node ring and state validation
- Line 3025: Join request ring check
- Line 3036: Leave request ring check
- Line 3049: Remove request ring check
- Line 3061: Replace request ring check
- Line 3166: Transition nodes empty check
- Line 4016: Barrier validation in `stop()`
11. **service/storage_service.cc** (28 conversions, 3 unsafe kept as SCYLLA_ASSERT)
- Lines 603, 691, 857, 901, 969: Core service operations
- Lines 1523, 1575, 1844, 2086, 2170, 2195: Bootstrap and join operations
- Lines 2319, 2352, 2354: Replacement operations
- Lines 3003, 3028, 3228: Cluster join and drain operations
- Lines 3995, 4047, 4353: Decommission and removenode operations
- Lines 4473, 5787, 5834, 5958: CDC and topology change operations
- Lines 6490, 6491: Tablet streaming operations
- Line 7512: Join node response handler
- **Unsafe (kept as SCYLLA_ASSERT)**: Lines 3398, 5760, 5775 (noexcept functions)
12. **sstables/** (58 conversions across 22 files)
- **sstables/trie/bti_node_reader.cc** (6): Node reading operations
- **sstables/mx/writer.cc** (6): MX format writing
- **sstables/sstable_set.cc** (5): SSTable set management
- **sstables/compressor.cc** (5): Compression/decompression
- **sstables/trie/trie_writer.hh** (4): Trie writing
- **sstables/downsampling.hh** (4): Downsampling operations
- **sstables/storage.{cc,hh}** (6): Storage operations
- **sstables/sstables_manager.{cc,hh}** (6): SSTable lifecycle management
- **sstables/trie/writer_node.{hh,impl.hh}** (4): Trie node writing
- **sstables/trie/bti_key_translation.cc** (2): Key translation
- **sstables/sstable_directory.cc** (2): Directory management
- **sstables/trie/trie_writer.cc** (1): Trie writer implementation
- **sstables/trie/trie_traversal.hh** (1): Trie traversal
- **sstables/sstables.cc** (1): Core SSTable operations
- **sstables/partition_index_cache.hh** (1): Index caching
- **sstables/generation_type.hh** (1): Generation management
- **sstables/compress.{cc,hh}** (2): Compression utilities
- **sstables/exceptions.hh** (1): Comment update
## Testing
### Manual Testing
Created `test/manual/test_scylla_assert.cc` to verify:
- Passing assertions succeed
- Failing assertions throw exceptions
- Custom messages are properly formatted
### Integration Testing
- Run existing test suite with converted assertions
- Verify no regressions in error handling
- Confirm exception propagation works correctly
## Future Work
1. **Automated Analysis Tool**
- Create tool to identify safe vs unsafe conversion contexts
- Generate reports of remaining conversions
2. **Gradual Conversion**
- Convert additional safe usages incrementally
- Monitor for any unexpected issues
3. **noexcept Review**
- Review functions marked noexcept that contain SCYLLA_ASSERT
- Consider if they should use `on_fatal_internal_error()` instead
## References
- `utils/assert.hh` - Implementation of both SCYLLA_ASSERT and scylla_assert
- `utils/on_internal_error.hh` - Exception-based error handling infrastructure
- GitHub Issue: [Link to original issue tracking this work]

View File

@@ -0,0 +1,614 @@
# Unsafe SCYLLA_ASSERT Locations
This document lists specific locations where SCYLLA_ASSERT cannot be safely converted to scylla_assert().
## Summary
- Files with noexcept SCYLLA_ASSERT: 50
- Files with destructor SCYLLA_ASSERT: 25
- Total unsafe SCYLLA_ASSERT in noexcept: 187
- Total unsafe SCYLLA_ASSERT in destructors: 36
## SCYLLA_ASSERT in noexcept Functions
### auth/cache.cc
- Line 118: `SCYLLA_ASSERT(this_shard_id() == 0);`
Total: 1 usages
### db/cache_mutation_reader.hh
- Line 309: `SCYLLA_ASSERT(sr->is_static_row());`
Total: 1 usages
### db/commitlog/commitlog.cc
- Line 531: `SCYLLA_ASSERT(!*this);`
- Line 544: `SCYLLA_ASSERT(!*this);`
- Line 662: `SCYLLA_ASSERT(_iter != _end);`
- Line 1462: `SCYLLA_ASSERT(i->second >= count);`
Total: 4 usages
### db/hints/manager.hh
- Line 167: `SCYLLA_ASSERT(_ep_managers.empty());`
Total: 1 usages
### db/partition_snapshot_row_cursor.hh
- Line 384: `SCYLLA_ASSERT(_latest_it);`
Total: 1 usages
### db/row_cache.cc
- Line 1365: `SCYLLA_ASSERT(it->is_last_dummy());`
Total: 1 usages
### db/schema_tables.cc
- Line 774: `SCYLLA_ASSERT(this_shard_id() == 0);`
Total: 1 usages
### db/view/view.cc
- Line 3623: `SCYLLA_ASSERT(thread::running_in_thread());`
Total: 1 usages
### gms/gossiper.cc
- Line 876: `SCYLLA_ASSERT(ptr->pid == _permit_id);`
Total: 1 usages
### locator/production_snitch_base.hh
- Line 77: `SCYLLA_ASSERT(_backreference != nullptr);`
- Line 82: `SCYLLA_ASSERT(_backreference != nullptr);`
- Line 87: `SCYLLA_ASSERT(_backreference != nullptr);`
Total: 3 usages
### locator/topology.cc
- Line 135: `SCYLLA_ASSERT(_shard == this_shard_id());`
Total: 1 usages
### mutation/counters.hh
- Line 314: `SCYLLA_ASSERT(_cell.is_live());`
- Line 315: `SCYLLA_ASSERT(!_cell.is_counter_update());`
Total: 2 usages
### mutation/mutation_partition_v2.hh
- Line 271: `SCYLLA_ASSERT(s.version() == _schema_version);`
Total: 1 usages
### mutation/partition_version.cc
- Line 364: `SCYLLA_ASSERT(!_snapshot->is_locked());`
- Line 701: `SCYLLA_ASSERT(!rows.empty());`
- Line 703: `SCYLLA_ASSERT(last_dummy.is_last_dummy());`
- Line 746: `SCYLLA_ASSERT(!_snapshot->is_locked());`
- Line 770: `SCYLLA_ASSERT(at_latest_version());`
- Line 777: `SCYLLA_ASSERT(at_latest_version());`
Total: 6 usages
### mutation/partition_version.hh
- Line 211: `SCYLLA_ASSERT(_schema);`
- Line 217: `SCYLLA_ASSERT(_schema);`
- Line 254: `SCYLLA_ASSERT(!_version->_backref);`
- Line 282: `SCYLLA_ASSERT(_version);`
- Line 286: `SCYLLA_ASSERT(_version);`
- Line 290: `SCYLLA_ASSERT(_version);`
- Line 294: `SCYLLA_ASSERT(_version);`
Total: 7 usages
### mutation/partition_version_list.hh
- Line 36: `SCYLLA_ASSERT(!_head->is_referenced_from_entry());`
- Line 42: `SCYLLA_ASSERT(!_tail->is_referenced_from_entry());`
- Line 70: `SCYLLA_ASSERT(!_head->is_referenced_from_entry());`
Total: 3 usages
### mutation/range_tombstone_list.cc
- Line 412: `SCYLLA_ASSERT (it != rt_list.end());`
- Line 422: `SCYLLA_ASSERT (it != rt_list.end());`
Total: 2 usages
### raft/server.cc
- Line 1720: `SCYLLA_ASSERT(_non_joint_conf_commit_promise);`
Total: 1 usages
### reader_concurrency_semaphore.cc
- Line 109: `SCYLLA_ASSERT(_permit == o._permit);`
- Line 432: `SCYLLA_ASSERT(_need_cpu_branches);`
- Line 455: `SCYLLA_ASSERT(_awaits_branches);`
- Line 1257: `SCYLLA_ASSERT(!_stopped);`
- Line 1585: `SCYLLA_ASSERT(_stats.need_cpu_permits);`
- Line 1587: `SCYLLA_ASSERT(_stats.need_cpu_permits >= _stats.awaits_permits);`
- Line 1593: `SCYLLA_ASSERT(_stats.need_cpu_permits >= _stats.awaits_permits);`
- Line 1598: `SCYLLA_ASSERT(_stats.awaits_permits);`
Total: 8 usages
### readers/multishard.cc
- Line 296: `SCYLLA_ASSERT(!_irh);`
Total: 1 usages
### repair/repair.cc
- Line 1073: `SCYLLA_ASSERT(table_names().size() == table_ids.size());`
Total: 1 usages
### replica/database.cc
- Line 3299: `SCYLLA_ASSERT(!_cf_lock.try_write_lock()); // lock should be acquired before the`
- Line 3304: `SCYLLA_ASSERT(!_cf_lock.try_write_lock()); // lock should be acquired before the`
Total: 2 usages
### replica/database.hh
- Line 1971: `SCYLLA_ASSERT(_user_sstables_manager);`
- Line 1976: `SCYLLA_ASSERT(_system_sstables_manager);`
Total: 2 usages
### replica/dirty_memory_manager.cc
- Line 67: `SCYLLA_ASSERT(!child->_heap_handle);`
Total: 1 usages
### replica/dirty_memory_manager.hh
- Line 261: `SCYLLA_ASSERT(_shutdown_requested);`
Total: 1 usages
### replica/memtable.cc
- Line 563: `SCYLLA_ASSERT(_mt._flushed_memory <= static_cast<int64_t>(_mt.occupancy().total_`
- Line 860: `SCYLLA_ASSERT(!reclaiming_enabled());`
Total: 2 usages
### replica/table.cc
- Line 2829: `SCYLLA_ASSERT(!trange.start()->is_inclusive() && trange.end()->is_inclusive());`
Total: 1 usages
### schema/schema.hh
- Line 1022: `SCYLLA_ASSERT(_schema->is_view());`
Total: 1 usages
### schema/schema_registry.cc
- Line 257: `SCYLLA_ASSERT(_state >= state::LOADED);`
- Line 262: `SCYLLA_ASSERT(_state >= state::LOADED);`
- Line 329: `SCYLLA_ASSERT(o._cpu_of_origin == current);`
Total: 3 usages
### service/direct_failure_detector/failure_detector.cc
- Line 628: `SCYLLA_ASSERT(alive != endpoint_liveness.marked_alive);`
Total: 1 usages
### service/storage_service.cc
- Line 3398: `SCYLLA_ASSERT(this_shard_id() == 0);`
- Line 5760: `SCYLLA_ASSERT(this_shard_id() == 0);`
- Line 5775: `SCYLLA_ASSERT(this_shard_id() == 0);`
- Line 5787: `SCYLLA_ASSERT(this_shard_id() == 0);`
Total: 4 usages
### sstables/generation_type.hh
- Line 132: `SCYLLA_ASSERT(bool(gen));`
Total: 1 usages
### sstables/partition_index_cache.hh
- Line 62: `SCYLLA_ASSERT(!ready());`
Total: 1 usages
### sstables/sstables_manager.hh
- Line 244: `SCYLLA_ASSERT(_sstables_registry && "sstables_registry is not plugged");`
Total: 1 usages
### sstables/storage.hh
- Line 86: `SCYLLA_ASSERT(false && "Changing directory not implemented");`
- Line 89: `SCYLLA_ASSERT(false && "Direct links creation not implemented");`
- Line 92: `SCYLLA_ASSERT(false && "Direct move not implemented");`
Total: 3 usages
### sstables_loader.cc
- Line 735: `SCYLLA_ASSERT(p);`
Total: 1 usages
### tasks/task_manager.cc
- Line 56: `SCYLLA_ASSERT(inserted);`
- Line 76: `SCYLLA_ASSERT(child->get_status().progress_units == progress_units);`
- Line 454: `SCYLLA_ASSERT(this_shard_id() == 0);`
Total: 3 usages
### tools/schema_loader.cc
- Line 281: `SCYLLA_ASSERT(p);`
Total: 1 usages
### utils/UUID.hh
- Line 59: `SCYLLA_ASSERT(is_timestamp());`
Total: 1 usages
### utils/bptree.hh
- Line 289: `SCYLLA_ASSERT(n.is_leftmost());`
- Line 301: `SCYLLA_ASSERT(n.is_rightmost());`
- Line 343: `SCYLLA_ASSERT(leaf->is_leaf());`
- Line 434: `SCYLLA_ASSERT(d->attached());`
- Line 453: `SCYLLA_ASSERT(n._num_keys > 0);`
- Line 505: `SCYLLA_ASSERT(n->is_leftmost());`
- Line 511: `SCYLLA_ASSERT(n->is_rightmost());`
- Line 517: `SCYLLA_ASSERT(n->is_root());`
- Line 557: `SCYLLA_ASSERT(!is_end());`
- Line 566: `SCYLLA_ASSERT(!is_end());`
- Line 613: `SCYLLA_ASSERT(n->_num_keys > 0);`
- Line 833: `SCYLLA_ASSERT(_left->_num_keys > 0);`
- Line 926: `SCYLLA_ASSERT(rl == rb);`
- Line 927: `SCYLLA_ASSERT(rl <= nr);`
- Line 1037: `SCYLLA_ASSERT(is_leaf());`
- Line 1042: `SCYLLA_ASSERT(is_leaf());`
- Line 1047: `SCYLLA_ASSERT(is_leaf());`
- Line 1052: `SCYLLA_ASSERT(is_leaf());`
- Line 1062: `SCYLLA_ASSERT(t->_right == this);`
- Line 1083: `SCYLLA_ASSERT(t->_left == this);`
- Line 1091: `SCYLLA_ASSERT(t->_right == this);`
- Line 1103: `SCYLLA_ASSERT(false);`
- Line 1153: `SCYLLA_ASSERT(i <= _num_keys);`
- Line 1212: `SCYLLA_ASSERT(off <= _num_keys);`
- Line 1236: `SCYLLA_ASSERT(from._num_keys > 0);`
- Line 1389: `SCYLLA_ASSERT(!is_root());`
- Line 1450: `SCYLLA_ASSERT(_num_keys == NodeSize);`
- Line 1563: `SCYLLA_ASSERT(_num_keys < NodeSize);`
- Line 1577: `SCYLLA_ASSERT(i != 0 || left_kid_sorted(k, less));`
- Line 1647: `SCYLLA_ASSERT(nodes.empty());`
- Line 1684: `SCYLLA_ASSERT(_num_keys > 0);`
- Line 1686: `SCYLLA_ASSERT(p._kids[i].n == this);`
- Line 1788: `SCYLLA_ASSERT(_num_keys == 0);`
- Line 1789: `SCYLLA_ASSERT(is_root() || !is_leaf() || (get_prev() == this && get_next() == th`
- Line 1821: `SCYLLA_ASSERT(_parent->_kids[i].n == &other);`
- Line 1841: `SCYLLA_ASSERT(i <= _num_keys);`
- Line 1856: `SCYLLA_ASSERT(!_nodes.empty());`
- Line 1938: `SCYLLA_ASSERT(!attached());`
- Line 1943: `SCYLLA_ASSERT(attached());`
Total: 39 usages
### utils/cached_file.hh
- Line 104: `SCYLLA_ASSERT(!_use_count);`
Total: 1 usages
### utils/compact-radix-tree.hh
- Line 1026: `SCYLLA_ASSERT(check_capacity(head, ni));`
- Line 1027: `SCYLLA_ASSERT(!_data.has(ni));`
- Line 1083: `SCYLLA_ASSERT(next_cap > head._capacity);`
- Line 1149: `SCYLLA_ASSERT(capacity != 0);`
- Line 1239: `SCYLLA_ASSERT(i < Size);`
- Line 1240: `SCYLLA_ASSERT(_idx[i] == unused_node_index);`
- Line 1470: `SCYLLA_ASSERT(kid != nullptr);`
- Line 1541: `SCYLLA_ASSERT(ret.first != nullptr);`
- Line 1555: `SCYLLA_ASSERT(leaf_depth >= depth);`
- Line 1614: `SCYLLA_ASSERT(n->check_prefix(key, depth));`
- Line 1850: `SCYLLA_ASSERT(_root.is(nil_root));`
Total: 11 usages
### utils/cross-shard-barrier.hh
- Line 134: `SCYLLA_ASSERT(w.has_value());`
Total: 1 usages
### utils/double-decker.hh
- Line 200: `SCYLLA_ASSERT(!hint.match);`
- Line 366: `SCYLLA_ASSERT(nb == end._bucket);`
Total: 2 usages
### utils/intrusive-array.hh
- Line 217: `SCYLLA_ASSERT(!is_single_element());`
- Line 218: `SCYLLA_ASSERT(pos < max_len);`
- Line 225: `SCYLLA_ASSERT(pos > 0);`
- Line 238: `SCYLLA_ASSERT(train_len < max_len);`
- Line 329: `SCYLLA_ASSERT(idx < max_len); // may the force be with us...`
Total: 5 usages
### utils/intrusive_btree.hh
- Line 148: `SCYLLA_ASSERT(to.num_keys == 0);`
- Line 157: `SCYLLA_ASSERT(!attached());`
- Line 227: `SCYLLA_ASSERT(n->is_inline());`
- Line 232: `SCYLLA_ASSERT(n->is_inline());`
- Line 288: `SCYLLA_ASSERT(n.is_root());`
- Line 294: `SCYLLA_ASSERT(n.is_leftmost());`
- Line 302: `SCYLLA_ASSERT(n.is_rightmost());`
- Line 368: `SCYLLA_ASSERT(_root->is_leaf());`
- Line 371: `SCYLLA_ASSERT(_inline.empty());`
- Line 601: `SCYLLA_ASSERT(n->is_leaf());`
- Line 673: `SCYLLA_ASSERT(!is_end());`
- Line 674: `SCYLLA_ASSERT(h->attached());`
- Line 677: `SCYLLA_ASSERT(_idx < cur.n->_base.num_keys);`
- Line 679: `SCYLLA_ASSERT(_hook->attached());`
- Line 690: `SCYLLA_ASSERT(!is_end());`
- Line 764: `SCYLLA_ASSERT(n->num_keys > 0);`
- Line 994: `SCYLLA_ASSERT(!_it.is_end());`
- Line 1178: `SCYLLA_ASSERT(is_leaf());`
- Line 1183: `SCYLLA_ASSERT(is_root());`
- Line 1261: `SCYLLA_ASSERT(!is_root());`
- Line 1268: `SCYLLA_ASSERT(p->_base.num_keys > 0 && p->_kids[0] == this);`
- Line 1275: `SCYLLA_ASSERT(p->_base.num_keys > 0 && p->_kids[p->_base.num_keys] == this);`
- Line 1286: `SCYLLA_ASSERT(false);`
- Line 1291: `SCYLLA_ASSERT(!nb->is_inline());`
- Line 1296: `SCYLLA_ASSERT(!nb->is_inline());`
- Line 1338: `SCYLLA_ASSERT(_base.num_keys == 0);`
- Line 1373: `SCYLLA_ASSERT(!(is_leftmost() || is_rightmost()));`
- Line 1378: `SCYLLA_ASSERT(p->_kids[i] != this);`
- Line 1396: `SCYLLA_ASSERT(!is_leaf());`
- Line 1537: `SCYLLA_ASSERT(src != _base.num_keys); // need more keys for the next leaf`
- Line 1995: `SCYLLA_ASSERT(_parent.n->_base.num_keys > 0);`
- Line 2135: `SCYLLA_ASSERT(is_leaf());`
- Line 2144: `SCYLLA_ASSERT(_base.num_keys != 0);`
- Line 2160: `SCYLLA_ASSERT(_base.num_keys != 0);`
- Line 2172: `SCYLLA_ASSERT(!empty());`
- Line 2198: `SCYLLA_ASSERT(leaf == ret->is_leaf());`
Total: 36 usages
### utils/loading_shared_values.hh
- Line 203: `SCYLLA_ASSERT(!_set.size());`
Total: 1 usages
### utils/logalloc.cc
- Line 544: `SCYLLA_ASSERT(!_background_reclaimer);`
- Line 926: `SCYLLA_ASSERT(idx < _segments.size());`
- Line 933: `SCYLLA_ASSERT(idx < _segments.size());`
- Line 957: `SCYLLA_ASSERT(i != _segments.end());`
- Line 1323: `SCYLLA_ASSERT(_lsa_owned_segments_bitmap.test(idx_from_segment(seg)));`
- Line 1366: `SCYLLA_ASSERT(desc._region);`
- Line 1885: `SCYLLA_ASSERT(desc._buf_pointers.empty());`
- Line 1911: `SCYLLA_ASSERT(&desc == old_ptr->_desc);`
- Line 2105: `SCYLLA_ASSERT(seg);`
- Line 2116: `SCYLLA_ASSERT(seg);`
- Line 2341: `SCYLLA_ASSERT(pool.current_emergency_reserve_goal() >= n_segments);`
Total: 11 usages
### utils/logalloc.hh
- Line 307: `SCYLLA_ASSERT(this_shard_id() == _cpu);`
Total: 1 usages
### utils/reusable_buffer.hh
- Line 60: `SCYLLA_ASSERT(_refcount == 0);`
Total: 1 usages
## SCYLLA_ASSERT in Destructors
### api/column_family.cc
- Line 102: `SCYLLA_ASSERT(this_shard_id() == 0);`
Total: 1 usages
### cdc/generation.cc
- Line 846: `SCYLLA_ASSERT(_stopped);`
Total: 1 usages
### cdc/log.cc
- Line 173: `SCYLLA_ASSERT(_stopped);`
Total: 1 usages
### compaction/compaction_manager.cc
- Line 1074: `SCYLLA_ASSERT(_state == state::none || _state == state::stopped);`
Total: 1 usages
### db/hints/internal/hint_endpoint_manager.cc
- Line 188: `SCYLLA_ASSERT(stopped());`
Total: 1 usages
### mutation/partition_version.cc
- Line 347: `SCYLLA_ASSERT(!_snapshot->is_locked());`
Total: 1 usages
### reader_concurrency_semaphore.cc
- Line 1116: `SCYLLA_ASSERT(!_stats.waiters);`
- Line 1125: `SCYLLA_ASSERT(_inactive_reads.empty() && !_close_readers_gate.get_count() && !_p`
Total: 2 usages
### repair/row_level.cc
- Line 3647: `SCYLLA_ASSERT(_state == state::none || _state == state::stopped);`
Total: 1 usages
### replica/cell_locking.hh
- Line 371: `SCYLLA_ASSERT(_partitions.empty());`
Total: 1 usages
### replica/distributed_loader.cc
- Line 305: `SCYLLA_ASSERT(_sstable_directories.empty());`
Total: 1 usages
### schema/schema_registry.cc
- Line 45: `SCYLLA_ASSERT(!_schema);`
Total: 1 usages
### service/direct_failure_detector/failure_detector.cc
- Line 378: `SCYLLA_ASSERT(_ping_fiber.available());`
- Line 379: `SCYLLA_ASSERT(_notify_fiber.available());`
- Line 701: `SCYLLA_ASSERT(_shard_workers.empty());`
- Line 702: `SCYLLA_ASSERT(_destroy_subscriptions.available());`
- Line 703: `SCYLLA_ASSERT(_update_endpoint_fiber.available());`
- Line 707: `SCYLLA_ASSERT(!_impl);`
Total: 6 usages
### service/load_broadcaster.hh
- Line 37: `SCYLLA_ASSERT(_stopped);`
Total: 1 usages
### service/paxos/paxos_state.cc
- Line 323: `SCYLLA_ASSERT(_stopped);`
Total: 1 usages
### service/storage_proxy.cc
- Line 281: `SCYLLA_ASSERT(_stopped);`
- Line 3207: `SCYLLA_ASSERT(!_remote);`
Total: 2 usages
### service/tablet_allocator.cc
- Line 3288: `SCYLLA_ASSERT(_stopped);`
Total: 1 usages
### sstables/compressor.cc
- Line 1271: `SCYLLA_ASSERT(thread::running_in_thread());`
Total: 1 usages
### sstables/sstables_manager.cc
- Line 58: `SCYLLA_ASSERT(_closing);`
- Line 59: `SCYLLA_ASSERT(_active.empty());`
- Line 60: `SCYLLA_ASSERT(_undergoing_close.empty());`
Total: 3 usages
### sstables/sstables_manager.hh
- Line 188: `SCYLLA_ASSERT(_storage != nullptr);`
Total: 1 usages
### utils/cached_file.hh
- Line 477: `SCYLLA_ASSERT(_cache.empty());`
Total: 1 usages
### utils/disk_space_monitor.cc
- Line 66: `SCYLLA_ASSERT(_poller_fut.available());`
Total: 1 usages
### utils/file_lock.cc
- Line 34: `SCYLLA_ASSERT(_fd.get() != -1);`
- Line 36: `SCYLLA_ASSERT(r == 0);`
Total: 2 usages
### utils/logalloc.cc
- Line 1991: `SCYLLA_ASSERT(desc.is_empty());`
- Line 1996: `SCYLLA_ASSERT(segment_pool().descriptor(_active).is_empty());`
Total: 2 usages
### utils/lru.hh
- Line 41: `SCYLLA_ASSERT(!_lru_link.is_linked());`
Total: 1 usages
### utils/replicator.hh
- Line 221: `SCYLLA_ASSERT(_stopped);`
Total: 1 usages

View File

@@ -28,8 +28,7 @@ Incremental Repair is only supported for tables that use the tablets architectur
Incremental Repair Modes
------------------------
Incremental is currently disabled by default. You can control its behavior for a given repair operation using the ``incremental_mode`` parameter.
This is useful for enabling incremental repair, or in situations where you might need to force a full data validation.
While incremental repair is the default and recommended mode, you can control its behavior for a given repair operation using the ``incremental_mode`` parameter. This is useful for situations where you might need to force a full data validation.
The available modes are:

View File

@@ -53,13 +53,13 @@ ScyllaDB nodetool cluster repair command supports the following options:
nodetool cluster repair --tablet-tokens 1,10474535988
- ``--incremental-mode`` specifies the incremental repair mode. Can be 'disabled', 'incremental', or 'full'. 'incremental': The incremental repair logic is enabled. Unrepaired sstables will be included for repair. Repaired sstables will be skipped. The incremental repair states will be updated after repair. 'full': The incremental repair logic is enabled. Both repaired and unrepaired sstables will be included for repair. The incremental repair states will be updated after repair. 'disabled': The incremental repair logic is disabled completely. The incremental repair states, e.g., repaired_at in sstables and sstables_repaired_at in the system.tablets table, will not be updated after repair. When the option is not provided, it defaults to 'disabled'.
- ``--incremental-mode`` specifies the incremental repair mode. Can be 'disabled', 'incremental', or 'full'. 'incremental': The incremental repair logic is enabled. Unrepaired sstables will be included for repair. Repaired sstables will be skipped. The incremental repair states will be updated after repair. 'full': The incremental repair logic is enabled. Both repaired and unrepaired sstables will be included for repair. The incremental repair states will be updated after repair. 'disabled': The incremental repair logic is disabled completely. The incremental repair states, e.g., repaired_at in sstables and sstables_repaired_at in the system.tablets table, will not be updated after repair. When the option is not provided, it defaults to incremental.
For example:
::
nodetool cluster repair --incremental-mode disabled
nodetool cluster repair --incremental-mode regular
- ``keyspace`` executes a repair on a specific keyspace. The default is all keyspaces.

View File

@@ -110,6 +110,7 @@ To display the log classes (output changes with each version so your display may
keys
keyspace_utils
large_data
legacy_schema_migrator
lister
load_balancer
load_broadcaster

8
docs/poetry.lock generated
View File

@@ -1018,14 +1018,14 @@ sphinx-markdown-tables = "0.0.17"
[[package]]
name = "sphinx-scylladb-theme"
version = "1.8.10"
version = "1.8.9"
description = "A Sphinx Theme for ScyllaDB documentation projects"
optional = false
python-versions = "<4.0,>=3.10"
groups = ["main"]
files = [
{file = "sphinx_scylladb_theme-1.8.10-py3-none-any.whl", hash = "sha256:8b930f33bec7308ccaa92698ebb5ad85059bcbf93a463f92917aeaf473fce632"},
{file = "sphinx_scylladb_theme-1.8.10.tar.gz", hash = "sha256:8a78a9b692d9a946be2c4a64aa472fd82204cc8ea0b1ee7f60de6db35b356326"},
{file = "sphinx_scylladb_theme-1.8.9-py3-none-any.whl", hash = "sha256:f8649a7753a29494fd2b417d1cb855035dddb9ebd498ea033fd73f5f9338271e"},
{file = "sphinx_scylladb_theme-1.8.9.tar.gz", hash = "sha256:ab7cda4c10a0d067c5c3a45f7b1f68cb8ebefe135a0be0738bfa282a344769b6"},
]
[package.dependencies]
@@ -1603,4 +1603,4 @@ files = [
[metadata]
lock-version = "2.1"
python-versions = "^3.10"
content-hash = "0ae673106f45d3465cbdabbf511e165ca44feadd34d7753f2e68093afaa95c79"
content-hash = "74912627a3f424290ed7889451c0bdb1a862ab85b1d07c85f4f3b8c34f32a020"

View File

@@ -9,7 +9,7 @@ package-mode = false
python = "^3.10"
pygments = "^2.18.0"
redirects_cli ="^0.1.3"
sphinx-scylladb-theme = "^1.8.10"
sphinx-scylladb-theme = "^1.8.9"
sphinx-sitemap = "^2.6.0"
sphinx-autobuild = "^2024.4.19"
Sphinx = "^7.3.7"

View File

@@ -129,6 +129,6 @@ struct direct_fd_ping_reply {
std::variant<std::monostate, service::wrong_destination, service::group_liveness_info> result;
};
verb [[with_client_info, with_timeout, cancellable]] direct_fd_ping (raft::server_id dst_id) -> service::direct_fd_ping_reply;
verb [[with_client_info, cancellable]] direct_fd_ping (raft::server_id dst_id) -> service::direct_fd_ping_reply;
} // namespace service

View File

@@ -38,7 +38,6 @@ debian_base_packages=(
python3-aiohttp
python3-pyparsing
python3-colorama
python3-dev
python3-tabulate
python3-pytest
python3-pytest-asyncio
@@ -66,7 +65,6 @@ debian_base_packages=(
git-lfs
e2fsprogs
fuse3
libev-dev # for python driver
)
fedora_packages=(
@@ -92,7 +90,6 @@ fedora_packages=(
patchelf
python3
python3-aiohttp
python3-devel
python3-pip
python3-file-magic
python3-colorama
@@ -157,8 +154,6 @@ fedora_packages=(
https://github.com/scylladb/cassandra-stress/releases/download/v3.18.1/cassandra-stress-java21-3.18.1-1.noarch.rpm
elfutils
jq
libev-devel # for python driver
)
fedora_python3_packages=(

View File

@@ -15,22 +15,3 @@ with the Apache License (version 2) and ScyllaDB-Source-Available-1.0.
They contain the following tag:
SPDX-License-Identifier: (LicenseRef-ScyllaDB-Source-Available-1.0 and Apache-2.0)
### `musl libc` files
`licenses/musl-license.txt` is obtained from:
https://git.musl-libc.org/cgit/musl/tree/COPYRIGHT
`utils/crypt_sha512.cc` is obtained from:
https://git.musl-libc.org/cgit/musl/tree/src/crypt/crypt_sha512.c
Both files are obtained from git.musl-libc.org.
Import commit:
commit 1b76ff0767d01df72f692806ee5adee13c67ef88
Author: Alex Rønne Petersen <alex@alexrp.com>
Date: Sun Oct 12 05:35:19 2025 +0200
s390x: shuffle register usage in __tls_get_offset to avoid r0 as address
musl as a whole is licensed under the standard MIT license included in
`licenses/musl-license.txt`.

View File

@@ -1,193 +0,0 @@
musl as a whole is licensed under the following standard MIT license:
----------------------------------------------------------------------
Copyright © 2005-2020 Rich Felker, et al.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
----------------------------------------------------------------------
Authors/contributors include:
A. Wilcox
Ada Worcester
Alex Dowad
Alex Suykov
Alexander Monakov
Andre McCurdy
Andrew Kelley
Anthony G. Basile
Aric Belsito
Arvid Picciani
Bartosz Brachaczek
Benjamin Peterson
Bobby Bingham
Boris Brezillon
Brent Cook
Chris Spiegel
Clément Vasseur
Daniel Micay
Daniel Sabogal
Daurnimator
David Carlier
David Edelsohn
Denys Vlasenko
Dmitry Ivanov
Dmitry V. Levin
Drew DeVault
Emil Renner Berthing
Fangrui Song
Felix Fietkau
Felix Janda
Gianluca Anzolin
Hauke Mehrtens
He X
Hiltjo Posthuma
Isaac Dunham
Jaydeep Patil
Jens Gustedt
Jeremy Huntwork
Jo-Philipp Wich
Joakim Sindholt
John Spencer
Julien Ramseier
Justin Cormack
Kaarle Ritvanen
Khem Raj
Kylie McClain
Leah Neukirchen
Luca Barbato
Luka Perkov
Lynn Ochs
M Farkas-Dyck (Strake)
Mahesh Bodapati
Markus Wichmann
Masanori Ogino
Michael Clark
Michael Forney
Mikhail Kremnyov
Natanael Copa
Nicholas J. Kain
orc
Pascal Cuoq
Patrick Oppenlander
Petr Hosek
Petr Skocik
Pierre Carrier
Reini Urban
Rich Felker
Richard Pennington
Ryan Fairfax
Samuel Holland
Segev Finer
Shiz
sin
Solar Designer
Stefan Kristiansson
Stefan O'Rear
Szabolcs Nagy
Timo Teräs
Trutz Behn
Will Dietz
William Haddon
William Pitcock
Portions of this software are derived from third-party works licensed
under terms compatible with the above MIT license:
The TRE regular expression implementation (src/regex/reg* and
src/regex/tre*) is Copyright © 2001-2008 Ville Laurikari and licensed
under a 2-clause BSD license (license text in the source files). The
included version has been heavily modified by Rich Felker in 2012, in
the interests of size, simplicity, and namespace cleanliness.
Much of the math library code (src/math/* and src/complex/*) is
Copyright © 1993,2004 Sun Microsystems or
Copyright © 2003-2011 David Schultz or
Copyright © 2003-2009 Steven G. Kargl or
Copyright © 2003-2009 Bruce D. Evans or
Copyright © 2008 Stephen L. Moshier or
Copyright © 2017-2018 Arm Limited
and labelled as such in comments in the individual source files. All
have been licensed under extremely permissive terms.
The ARM memcpy code (src/string/arm/memcpy.S) is Copyright © 2008
The Android Open Source Project and is licensed under a two-clause BSD
license. It was taken from Bionic libc, used on Android.
The AArch64 memcpy and memset code (src/string/aarch64/*) are
Copyright © 1999-2019, Arm Limited.
The implementation of DES for crypt (src/crypt/crypt_des.c) is
Copyright © 1994 David Burren. It is licensed under a BSD license.
The implementation of blowfish crypt (src/crypt/crypt_blowfish.c) was
originally written by Solar Designer and placed into the public
domain. The code also comes with a fallback permissive license for use
in jurisdictions that may not recognize the public domain.
The smoothsort implementation (src/stdlib/qsort.c) is Copyright © 2011
Lynn Ochs and is licensed under an MIT-style license.
The x86_64 port was written by Nicholas J. Kain and is licensed under
the standard MIT terms.
The mips and microblaze ports were originally written by Richard
Pennington for use in the ellcc project. The original code was adapted
by Rich Felker for build system and code conventions during upstream
integration. It is licensed under the standard MIT terms.
The mips64 port was contributed by Imagination Technologies and is
licensed under the standard MIT terms.
The powerpc port was also originally written by Richard Pennington,
and later supplemented and integrated by John Spencer. It is licensed
under the standard MIT terms.
All other files which have no copyright comments are original works
produced specifically for use as part of this library, written either
by Rich Felker, the main author of the library, or by one or more
contibutors listed above. Details on authorship of individual files
can be found in the git version control history of the project. The
omission of copyright and license comments in each file is in the
interest of source tree size.
In addition, permission is hereby granted for all public header files
(include/* and arch/*/bits/*) and crt files intended to be linked into
applications (crt/*, ldso/dlstart.c, and arch/*/crt_arch.h) to omit
the copyright notice and permission notice otherwise required by the
license, and to use these files without any requirement of
attribution. These files include substantial contributions from:
Bobby Bingham
John Spencer
Nicholas J. Kain
Rich Felker
Richard Pennington
Stefan Kristiansson
Szabolcs Nagy
all of whom have explicitly granted such permission.
This file previously contained text expressing a belief that most of
the files covered by the above exception were sufficiently trivial not
to be subject to copyright, resulting in confusion over whether it
negated the permissions granted in the license. In the spirit of
permissive licensing, and of not having licensing issues being an
obstacle to adoption, that text has been removed.

View File

@@ -200,10 +200,7 @@ enum class tablet_repair_incremental_mode : uint8_t {
disabled,
};
// FIXME: Incremental repair is disabled by default due to
// https://github.com/scylladb/scylladb/issues/26041 and
// https://github.com/scylladb/scylladb/issues/27414
constexpr tablet_repair_incremental_mode default_tablet_repair_incremental_mode{tablet_repair_incremental_mode::disabled};
constexpr tablet_repair_incremental_mode default_tablet_repair_incremental_mode{tablet_repair_incremental_mode::incremental};
sstring tablet_repair_incremental_mode_to_string(tablet_repair_incremental_mode);
tablet_repair_incremental_mode tablet_repair_incremental_mode_from_string(const sstring&);

14
main.cc
View File

@@ -39,6 +39,7 @@
#include "api/api_init.hh"
#include "db/config.hh"
#include "db/extensions.hh"
#include "db/legacy_schema_migrator.hh"
#include "service/storage_service.hh"
#include "service/migration_manager.hh"
#include "service/tablet_allocator.hh"
@@ -748,6 +749,8 @@ To start the scylla server proper, simply invoke as: scylla server (or just scyl
// inherit Seastar's CPU affinity masks. We want this thread to be free
// to migrate between CPUs; we think that's what makes the most sense.
auto rpc_dict_training_worker = utils::alien_worker(startlog, 19, "rpc-dict");
// niceness=10 is ~10% of normal process time
auto hashing_worker = utils::alien_worker(startlog, 10, "pwd-hash");
return app.run(ac, av, [&] () -> future<int> {
@@ -777,7 +780,8 @@ To start the scylla server proper, simply invoke as: scylla server (or just scyl
return seastar::async([&app, cfg, ext, &disk_space_monitor_shard0, &cm, &sstm, &db, &qp, &bm, &proxy, &mapreduce_service, &mm, &mm_notifier, &ctx, &opts, &dirs,
&prometheus_server, &cf_cache_hitrate_calculator, &load_meter, &feature_service, &gossiper, &snitch,
&token_metadata, &erm_factory, &snapshot_ctl, &messaging, &sst_dir_semaphore, &raft_gr, &service_memory_limiter,
&repair, &sst_loader, &auth_cache, &ss, &lifecycle_notifier, &stream_manager, &task_manager, &rpc_dict_training_worker, &vector_store_client] {
&repair, &sst_loader, &auth_cache, &ss, &lifecycle_notifier, &stream_manager, &task_manager, &rpc_dict_training_worker,
&hashing_worker, &vector_store_client] {
try {
if (opts.contains("relabel-config-file") && !opts["relabel-config-file"].as<sstring>().empty()) {
// calling update_relabel_config_from_file can cause an exception that would stop startup
@@ -1637,7 +1641,7 @@ To start the scylla server proper, simply invoke as: scylla server (or just scyl
fd.start(
std::ref(fd_pinger), std::ref(fd_clock),
service::direct_fd_clock::base::duration{std::chrono::milliseconds{100}}.count(),
service::direct_fd_clock::base::duration{std::chrono::milliseconds{cfg->direct_failure_detector_ping_timeout_in_ms()}}.count(), dbcfg.gossip_scheduling_group).get();
service::direct_fd_clock::base::duration{std::chrono::milliseconds{cfg->direct_failure_detector_ping_timeout_in_ms()}}.count()).get();
auto stop_fd = defer_verbose_shutdown("direct_failure_detector", [] {
fd.stop().get();
@@ -1847,6 +1851,8 @@ To start the scylla server proper, simply invoke as: scylla server (or just scyl
group0_client.init().get();
checkpoint(stop_signal, "initializing system schema");
// schema migration, if needed, is also done on shard 0
db::legacy_schema_migrator::migrate(proxy, db, sys_ks, qp.local()).get();
db::schema_tables::save_system_schema(qp.local()).get();
db::schema_tables::recalculate_schema_version(sys_ks, proxy, feature_service.local()).get();
@@ -2057,7 +2063,7 @@ To start the scylla server proper, simply invoke as: scylla server (or just scyl
maintenance_auth_config.authenticator_java_name = sstring{auth::allow_all_authenticator_name};
maintenance_auth_config.role_manager_java_name = sstring{auth::maintenance_socket_role_manager_name};
maintenance_auth_service.start(perm_cache_config, std::ref(qp), std::ref(group0_client), std::ref(mm_notifier), std::ref(mm), maintenance_auth_config, maintenance_socket_enabled::yes, std::ref(auth_cache)).get();
maintenance_auth_service.start(perm_cache_config, std::ref(qp), std::ref(group0_client), std::ref(mm_notifier), std::ref(mm), maintenance_auth_config, maintenance_socket_enabled::yes, std::ref(auth_cache), std::ref(hashing_worker)).get();
cql_maintenance_server_ctl.emplace(maintenance_auth_service, mm_notifier, gossiper, qp, service_memory_limiter, sl_controller, lifecycle_notifier, *cfg, maintenance_cql_sg_stats_key, maintenance_socket_enabled::yes, dbcfg.statement_scheduling_group);
@@ -2333,7 +2339,7 @@ To start the scylla server proper, simply invoke as: scylla server (or just scyl
auth_config.authenticator_java_name = qualified_authenticator_name;
auth_config.role_manager_java_name = qualified_role_manager_name;
auth_service.start(std::move(perm_cache_config), std::ref(qp), std::ref(group0_client), std::ref(mm_notifier), std::ref(mm), auth_config, maintenance_socket_enabled::no, std::ref(auth_cache)).get();
auth_service.start(std::move(perm_cache_config), std::ref(qp), std::ref(group0_client), std::ref(mm_notifier), std::ref(mm), auth_config, maintenance_socket_enabled::no, std::ref(auth_cache), std::ref(hashing_worker)).get();
std::any stop_auth_service;
// Has to be called after node joined the cluster (join_cluster())

View File

@@ -686,7 +686,6 @@ static constexpr unsigned do_get_rpc_client_idx(messaging_verb verb) {
case messaging_verb::RAFT_MODIFY_CONFIG:
case messaging_verb::RAFT_PULL_SNAPSHOT:
case messaging_verb::NOTIFY_BANNED:
case messaging_verb::DIRECT_FD_PING:
// See comment above `TOPOLOGY_INDEPENDENT_IDX`.
// DO NOT put any 'hot' (e.g. data path) verbs in this group,
// only verbs which are 'rare' and 'cheap'.
@@ -748,6 +747,7 @@ static constexpr unsigned do_get_rpc_client_idx(messaging_verb verb) {
case messaging_verb::PAXOS_ACCEPT:
case messaging_verb::PAXOS_LEARN:
case messaging_verb::PAXOS_PRUNE:
case messaging_verb::DIRECT_FD_PING:
return 2;
case messaging_verb::MUTATION_DONE:
case messaging_verb::MUTATION_FAILED:

View File

@@ -575,15 +575,10 @@ utils::coroutine partition_entry::apply_to_incomplete(const schema& s,
}
res.row.set_range_tombstone(cur.range_tombstone_for_row() + src_cur.range_tombstone());
if (need_preempt()) {
lb = position_in_partition(cur.position());
++tracker.get_stats().rows_covered_by_range_tombstones_from_memtable;
return stop_iteration::no;
}
// FIXME: Compact the row
++tracker.get_stats().rows_covered_by_range_tombstones_from_memtable;
cur.next();
// FIXME: preempt
}
}
{

View File

@@ -1,3 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:3cbe2dd05945f8fb76ebce2ea70864063d2b282c4d5080af1f290ead43321ab3
size 6444732
oid sha256:80a47fe93866989aaf7e949168fcd308e95841e78c976a61f9eac20bfdd34d96
size 6448960

View File

@@ -46,7 +46,7 @@ bool follower_progress::is_stray_reject(const append_reply::rejected& rejected)
// any reject during snapshot transfer is stray one
return true;
default:
SCYLLA_ASSERT(false);
scylla_assert(false, "invalid follower_progress state: {}", static_cast<int>(state));
}
return false;
}
@@ -87,7 +87,7 @@ bool follower_progress::can_send_to() {
// before starting to sync the log.
return false;
}
SCYLLA_ASSERT(false);
scylla_assert(false, "invalid follower_progress state in can_send_to: {}", static_cast<int>(state));
return false;
}

View File

@@ -297,17 +297,17 @@ public:
const dht::token_range& token_range() const noexcept;
size_t memtable_count() const;
size_t memtable_count() const noexcept;
const compaction_group_ptr& main_compaction_group() const noexcept;
const std::vector<compaction_group_ptr>& split_ready_compaction_groups() const;
compaction_group_ptr& select_compaction_group(locator::tablet_range_side) noexcept;
uint64_t live_disk_space_used() const;
uint64_t live_disk_space_used() const noexcept;
void for_each_compaction_group(std::function<void(const compaction_group_ptr&)> action) const;
utils::small_vector<compaction_group_ptr, 3> compaction_groups();
utils::small_vector<const_compaction_group_ptr, 3> compaction_groups() const;
void for_each_compaction_group(std::function<void(const compaction_group_ptr&)> action) const noexcept;
utils::small_vector<compaction_group_ptr, 3> compaction_groups() noexcept;
utils::small_vector<const_compaction_group_ptr, 3> compaction_groups() const noexcept;
utils::small_vector<compaction_group_ptr, 3> split_unready_groups() const;
bool split_unready_groups_are_empty() const;
@@ -430,7 +430,7 @@ public:
virtual storage_group& storage_group_for_token(dht::token) const = 0;
virtual utils::chunked_vector<storage_group_ptr> storage_groups_for_token_range(dht::token_range tr) const = 0;
virtual locator::combined_load_stats table_load_stats(std::function<bool(const locator::tablet_map&, locator::global_tablet_id)> tablet_filter) const = 0;
virtual locator::combined_load_stats table_load_stats(std::function<bool(const locator::tablet_map&, locator::global_tablet_id)> tablet_filter) const noexcept = 0;
virtual bool all_storage_groups_split() = 0;
virtual future<> split_all_storage_groups(tasks::task_info tablet_split_task_info) = 0;
virtual future<> maybe_split_compaction_group_of(size_t idx) = 0;

View File

@@ -1133,7 +1133,7 @@ public:
// The tablet filter is used to not double account migrating tablets, so it's important that
// only one of pending or leaving replica is accounted based on current migration stage.
locator::combined_load_stats table_load_stats(std::function<bool(const locator::tablet_map&, locator::global_tablet_id)> tablet_filter) const;
locator::combined_load_stats table_load_stats(std::function<bool(const locator::tablet_map&, locator::global_tablet_id)> tablet_filter) const noexcept;
const db::view::stats& get_view_stats() const {
return _view_stats;

View File

@@ -234,12 +234,18 @@ distributed_loader::get_sstables_from_upload_dir(sharded<replica::database>& db,
}
future<std::tuple<table_id, std::vector<std::vector<sstables::shared_sstable>>>>
distributed_loader::get_sstables_from_object_store(sharded<replica::database>& db, sstring ks, sstring cf, std::vector<sstring> sstables, sstring endpoint, sstring type, sstring bucket, sstring prefix, sstables::sstable_open_config cfg, std::function<seastar::abort_source*()> get_abort_src) {
return get_sstables_from(db, ks, cf, cfg, [bucket, endpoint, type, prefix, sstables=std::move(sstables), &get_abort_src] (auto& global_table, auto& directory) {
distributed_loader::get_sstables_from_object_store(sharded<replica::database>& db, sstring ks, sstring cf, std::vector<sstring> sstables, sstring endpoint, sstring bucket, sstring prefix, sstables::sstable_open_config cfg, std::function<seastar::abort_source*()> get_abort_src) {
return get_sstables_from(db, ks, cf, cfg, [bucket, endpoint, prefix, sstables=std::move(sstables), &get_abort_src, &db] (auto& global_table, auto& directory) {
return directory.start(global_table.as_sharded_parameter(),
sharded_parameter([bucket, endpoint, type, prefix, &get_abort_src] {
sharded_parameter([bucket, endpoint, prefix, &get_abort_src, &db] {
auto eps = db.local().get_config().object_storage_endpoints()
| std::views::filter([&endpoint](auto& ep) { return ep.key() == endpoint; })
;
if (eps.empty()) {
throw std::invalid_argument(fmt::format("Undefined endpoint {}", endpoint));
}
seastar::abort_source* as = get_abort_src ? get_abort_src() : nullptr;
auto opts = data_dictionary::make_object_storage_options(endpoint, type, bucket, prefix, as);
auto opts = data_dictionary::make_object_storage_options(endpoint, eps.front().type(), bucket, prefix, as);
return make_lw_shared<const data_dictionary::storage_options>(std::move(opts));
}),
sstables,

View File

@@ -92,7 +92,7 @@ public:
static future<std::tuple<table_id, std::vector<std::vector<sstables::shared_sstable>>>>
get_sstables_from_upload_dir(sharded<replica::database>& db, sstring ks, sstring cf, sstables::sstable_open_config cfg);
static future<std::tuple<table_id, std::vector<std::vector<sstables::shared_sstable>>>>
get_sstables_from_object_store(sharded<replica::database>& db, sstring ks, sstring cf, std::vector<sstring> sstables, sstring endpoint, sstring type, sstring bucket, sstring prefix, sstables::sstable_open_config cfg, std::function<seastar::abort_source*()> = {});
get_sstables_from_object_store(sharded<replica::database>& db, sstring ks, sstring cf, std::vector<sstring> sstables, sstring endpoint, sstring bucket, sstring prefix, sstables::sstable_open_config cfg, std::function<seastar::abort_source*()> = {});
static future<> process_upload_dir(sharded<replica::database>& db, sharded<db::view::view_builder>& vb, sharded<db::view::view_building_worker>& vbw, sstring ks_name, sstring cf_name, bool skip_cleanup, bool skip_reshape);
};

View File

@@ -708,7 +708,7 @@ public:
return *_single_sg;
}
locator::combined_load_stats table_load_stats(std::function<bool(const locator::tablet_map&, locator::global_tablet_id)>) const override {
locator::combined_load_stats table_load_stats(std::function<bool(const locator::tablet_map&, locator::global_tablet_id)>) const noexcept override {
return locator::combined_load_stats{
.table_ls = locator::table_load_stats{
.size_in_bytes = _single_sg->live_disk_space_used(),
@@ -874,7 +874,7 @@ public:
return storage_group_for_id(storage_group_of(token).first);
}
locator::combined_load_stats table_load_stats(std::function<bool(const locator::tablet_map&, locator::global_tablet_id)> tablet_filter) const override;
locator::combined_load_stats table_load_stats(std::function<bool(const locator::tablet_map&, locator::global_tablet_id)> tablet_filter) const noexcept override;
bool all_storage_groups_split() override;
future<> split_all_storage_groups(tasks::task_info tablet_split_task_info) override;
future<> maybe_split_compaction_group_of(size_t idx) override;
@@ -922,7 +922,7 @@ compaction_group_ptr& storage_group::select_compaction_group(locator::tablet_ran
return _main_cg;
}
void storage_group::for_each_compaction_group(std::function<void(const compaction_group_ptr&)> action) const {
void storage_group::for_each_compaction_group(std::function<void(const compaction_group_ptr&)> action) const noexcept {
action(_main_cg);
for (auto& cg : _merging_groups) {
action(cg);
@@ -932,7 +932,7 @@ void storage_group::for_each_compaction_group(std::function<void(const compactio
}
}
utils::small_vector<compaction_group_ptr, 3> storage_group::compaction_groups() {
utils::small_vector<compaction_group_ptr, 3> storage_group::compaction_groups() noexcept {
utils::small_vector<compaction_group_ptr, 3> cgs;
for_each_compaction_group([&cgs] (const compaction_group_ptr& cg) {
cgs.push_back(cg);
@@ -940,7 +940,7 @@ utils::small_vector<compaction_group_ptr, 3> storage_group::compaction_groups()
return cgs;
}
utils::small_vector<const_compaction_group_ptr, 3> storage_group::compaction_groups() const {
utils::small_vector<const_compaction_group_ptr, 3> storage_group::compaction_groups() const noexcept {
utils::small_vector<const_compaction_group_ptr, 3> cgs;
for_each_compaction_group([&cgs] (const compaction_group_ptr& cg) {
cgs.push_back(cg);
@@ -1890,7 +1890,7 @@ sstables::file_size_stats compaction_group::live_disk_space_used_full_stats() co
return _main_sstables->get_file_size_stats() + _maintenance_sstables->get_file_size_stats();
}
uint64_t storage_group::live_disk_space_used() const {
uint64_t storage_group::live_disk_space_used() const noexcept {
auto cgs = const_cast<storage_group&>(*this).compaction_groups();
return std::ranges::fold_left(cgs | std::views::transform(std::mem_fn(&compaction_group::live_disk_space_used)), uint64_t(0), std::plus{});
}
@@ -2813,7 +2813,7 @@ void table::on_flush_timer() {
});
}
locator::combined_load_stats tablet_storage_group_manager::table_load_stats(std::function<bool(const locator::tablet_map&, locator::global_tablet_id)> tablet_filter) const {
locator::combined_load_stats tablet_storage_group_manager::table_load_stats(std::function<bool(const locator::tablet_map&, locator::global_tablet_id)> tablet_filter) const noexcept {
locator::table_load_stats table_stats;
table_stats.split_ready_seq_number = _split_ready_seq_number;
@@ -2836,7 +2836,7 @@ locator::combined_load_stats tablet_storage_group_manager::table_load_stats(std:
};
}
locator::combined_load_stats table::table_load_stats(std::function<bool(const locator::tablet_map&, locator::global_tablet_id)> tablet_filter) const {
locator::combined_load_stats table::table_load_stats(std::function<bool(const locator::tablet_map&, locator::global_tablet_id)> tablet_filter) const noexcept {
return _sg_manager->table_load_stats(std::move(tablet_filter));
}
@@ -3453,7 +3453,7 @@ size_t compaction_group::memtable_count() const noexcept {
return _memtables->size();
}
size_t storage_group::memtable_count() const {
size_t storage_group::memtable_count() const noexcept {
return std::ranges::fold_left(compaction_groups() | std::views::transform(std::mem_fn(&compaction_group::memtable_count)), size_t(0), std::plus{});
}

View File

@@ -22,7 +22,7 @@ format_match = re.compile(r'\s*(?:seastar::)?format\(\s*"([^"]+)"\s*,\s*(.*)\s*'
def handle_error(message, strict=True, verbose_mode=False):
if strict:
print(f"[ERROR] {message}")
exit(1)
exit(-1)
elif verbose_mode:
print(f"[WARNING] {message}")
@@ -180,11 +180,12 @@ def get_metrics_from_file(file_name, prefix, metrics_information, verb=None, str
groups = {}
if clean_name in metrics_information:
if (isinstance(metrics_information[clean_name], str) and metrics_information[clean_name] == "skip") or "skip" in metrics_information[clean_name]:
return {}
exit(0)
param_mapping = metrics_information[clean_name]["params"] if clean_name in metrics_information and "params" in metrics_information[clean_name] else {}
groups = metrics_information[clean_name]["groups"] if clean_name in metrics_information and "groups" in metrics_information[clean_name] else {}
metrics = {}
multi_line = False
names = undefined
typ = undefined
line_number = 0;

View File

@@ -1,6 +1,6 @@
"cdc/log.cc":
params:
cdc_group_name: "cdc"
cdc_group_name: cdc
part_name;suffix: [["static_row", "total"],["clustering_row", "total"], ["map", "total"], ["set", "total"], ["list", "total"], ["udt", "total"], ["range_tombstone", "total"],["partition_delete", "total"],["row_delete", "total"], ["static_row", "failed"],["clustering_row", "failed"], ["map", "failed"], ["set", "failed"], ["list", "failed"], ["udt", "failed"], ["range_tombstone", "failed"],["partition_delete", "failed"],["row_delete", "failed"]]
kind: ["total", "failed"]
"db/commitlog/commitlog.cc":
@@ -9,7 +9,7 @@
"cfg.max_active_flushes": "cfg.max_active_flushes"
"cql3/query_processor.cc":
groups:
"80": "query_processor"
"80": query_processor
"replica/dirty_memory_manager.cc":
params:
namestr: ["regular", "system"]
@@ -19,11 +19,10 @@
"replica/database.cc":
params:
"_dirty_memory_manager.throttle_threshold()": "throttle threshold"
"seastar/apps/metrics_tester/metrics_tester.cc": "skip"
"seastar/tests/unit/metrics_test.cc": "skip"
"seastar/tests/unit/metrics_tester.cc": "skip"
"seastar/tests/unit/prometheus_http_test.cc": "skip"
"seastar/tests/unit/prometheus_text_test.cc": "skip"
"seastar/apps/metrics_tester/metrics_tester.cc": skip
"seastar/tests/unit/metrics_test.cc": skip
"seastar/tests/unit/metrics_tester.cc": skip
"seastar/tests/unit/prometheus_http_test.cc": skip
"service/storage_proxy.cc":
params:
COORDINATOR_STATS_CATEGORY: "storage_proxy_coordinator"
@@ -33,25 +32,25 @@
_short_description_prefix: ["total_write_attempts", "write_errors", "background_replica_writes_failed", "read_repair_write_attempts"]
_long_description_prefix: ["total number of write requests", "number of write requests that failed", "background_replica_writes_failed", "number of write operations in a read repair context"]
_category: "storage_proxy_coordinator"
"thrift/server.cc": "skip"
"thrift/server.cc": skip
"tracing/tracing.cc":
params:
"max_pending_trace_records + write_event_records_threshold": "max_pending_trace_records + write_event_records_threshold"
"transport/server.cc":
groups:
"200": "transport"
"200": transport
params:
"_config.max_request_size": "max_request_size"
"seastar/src/net/dpdk.cc": "skip"
"seastar/src/net/dpdk.cc": skip
"db/hints/manager.cc":
params:
"group_name": ["hints_for_views_manager", "hints_manager"]
"seastar/src/core/execution_stage.cc":
groups:
"100": "execution_stages"
"100": execution_stages
"seastar/src/core/fair_queue.cc":
groups:
"300": "io_queue"
"300": io_queue
"seastar/src/net/net.cc":
params:
_stats_plugin_name: ["stats_plugin_name"]

View File

@@ -38,9 +38,8 @@ for required in jq curl; do
fi
done
FORCE=0
ALLOW_SUBMODULE=0
ALLOW_UNSTABLE=0
ALLOW_ANY_BRANCH=0
function print_usage {
cat << EOF
@@ -61,18 +60,12 @@ Options:
-h
Print this help message and exit.
--allow-submodule
Allow a PR to update a submudule
--allow-unstable
--force
Do not check current branch to be next*
Do not check jenkins job status
--allow-any-branch
Merge PR even if target branch is not next
--force
Sets all above --allow-* options
--allow-submodule
Allow a PR to update a submudule
EOF
}
@@ -80,23 +73,13 @@ while [[ $# -gt 0 ]]
do
case $1 in
"--force"|"-f")
ALLOW_UNSTABLE=1
ALLOW_SUBMODULE=1
ALLOW_ANY_BRANCH=1
FORCE=1
shift 1
;;
--allow-submodule)
ALLOW_SUBMODULE=1
shift
;;
--allow-unstable)
ALLOW_UNSTABLE=1
shift
;;
--allow-any-branch)
ALLOW_ANY_BRANCH=1
shift
;;
+([0-9]))
PR_NUM=$1
shift 1
@@ -164,7 +147,7 @@ check_jenkins_job_status() {
fi
}
if [[ $ALLOW_UNSTABLE -eq 0 ]]; then
if [[ $FORCE -eq 0 ]]; then
check_jenkins_job_status
fi
@@ -196,19 +179,17 @@ echo -n "Fetching full name of author $PR_LOGIN... "
USER_NAME=$(curl -s "https://api.github.com/users/$PR_LOGIN" | jq -r .name)
echo "$USER_NAME"
if [[ $ALLOW_ANY_BRANCH -eq 0 ]]; then
BASE_BRANCH=$(jq -r .base.ref <<< $PR_DATA)
CURRENT_BRANCH=$(git rev-parse --abbrev-ref HEAD)
TARGET_BASE="unknown"
if [[ ${BASE_BRANCH} == master ]]; then
TARGET_BASE="next"
elif [[ ${BASE_BRANCH} == branch-* ]]; then
TARGET_BASE=${BASE_BRANCH//branch/next}
fi
if [[ "${CURRENT_BRANCH}" != "${TARGET_BASE}" ]]; then
echo "Merging into wrong next, want ${TARGET_BASE}, have ${CURRENT_BRANCH}. Use --allow-any-branch or --force to skip this check"
exit 1
fi
BASE_BRANCH=$(jq -r .base.ref <<< $PR_DATA)
CURRENT_BRANCH=$(git rev-parse --abbrev-ref HEAD)
TARGET_BASE="unknown"
if [[ ${BASE_BRANCH} == master ]]; then
TARGET_BASE="next"
elif [[ ${BASE_BRANCH} == branch-* ]]; then
TARGET_BASE=${BASE_BRANCH//branch/next}
fi
if [[ "${CURRENT_BRANCH}" != "${TARGET_BASE}" ]]; then
echo "Merging into wrong next, want ${TARGET_BASE}, have ${CURRENT_BRANCH}"
exit 1
fi
git fetch "$REMOTE" pull/$PR_NUM/head

View File

@@ -6,7 +6,6 @@
* SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
*/
#include "seastar/core/scheduling.hh"
#include "utils/assert.hh"
#include <unordered_set>
@@ -18,7 +17,6 @@
#include <seastar/core/condition-variable.hh>
#include <seastar/coroutine/parallel_for_each.hh>
#include <seastar/util/defer.hh>
#include <seastar/coroutine/switch_to.hh>
#include "utils/log.hh"
@@ -120,7 +118,7 @@ struct failure_detector::impl {
// Fetches endpoint updates from _endpoint_queue and performs the add/remove operation.
// Runs on shard 0 only.
future<> update_endpoint_fiber(seastar::scheduling_group sg);
future<> update_endpoint_fiber();
future<> _update_endpoint_fiber = make_ready_future<>();
// Workers running on this shard.
@@ -142,7 +140,7 @@ struct failure_detector::impl {
// The unregistering process requires cross-shard operations which we perform on this fiber.
future<> _destroy_subscriptions = make_ready_future<>();
impl(failure_detector& parent, pinger&, clock&, clock::interval_t ping_period, clock::interval_t ping_timeout, seastar::scheduling_group sg);
impl(failure_detector& parent, pinger&, clock&, clock::interval_t ping_period, clock::interval_t ping_timeout);
~impl();
// Inform update_endpoint_fiber() about an added/removed endpoint.
@@ -179,19 +177,19 @@ struct failure_detector::impl {
};
failure_detector::failure_detector(
pinger& pinger, clock& clock, clock::interval_t ping_period, clock::interval_t ping_timeout, seastar::scheduling_group sg)
: _impl(std::make_unique<impl>(*this, pinger, clock, ping_period, ping_timeout, sg))
pinger& pinger, clock& clock, clock::interval_t ping_period, clock::interval_t ping_timeout)
: _impl(std::make_unique<impl>(*this, pinger, clock, ping_period, ping_timeout))
{}
failure_detector::impl::impl(
failure_detector& parent, pinger& pinger, clock& clock, clock::interval_t ping_period, clock::interval_t ping_timeout, seastar::scheduling_group sg)
failure_detector& parent, pinger& pinger, clock& clock, clock::interval_t ping_period, clock::interval_t ping_timeout)
: _parent(parent), _pinger(pinger), _clock(clock), _ping_period(ping_period), _ping_timeout(ping_timeout) {
if (this_shard_id() != 0) {
return;
}
_num_workers.resize(smp::count, 0);
_update_endpoint_fiber = update_endpoint_fiber(sg);
_update_endpoint_fiber = update_endpoint_fiber();
}
void failure_detector::impl::send_update_endpoint(pinger::endpoint_id ep, endpoint_update update) {
@@ -207,9 +205,9 @@ void failure_detector::impl::send_update_endpoint(pinger::endpoint_id ep, endpoi
_endpoint_changed.signal();
}
future<> failure_detector::impl::update_endpoint_fiber(seastar::scheduling_group sg) {
future<> failure_detector::impl::update_endpoint_fiber() {
SCYLLA_ASSERT(this_shard_id() == 0);
co_await coroutine::switch_to(sg);
while (true) {
co_await _endpoint_changed.wait([this] { return !_endpoint_updates.empty(); });
@@ -482,7 +480,7 @@ static future<bool> ping_with_timeout(pinger::endpoint_id id, clock::timepoint_t
}
});
auto f = pinger.ping(id, timeout, timeout_as, c);
auto f = pinger.ping(id, timeout_as);
auto sleep_and_abort = [] (clock::timepoint_t timeout, abort_source& timeout_as, clock& c) -> future<> {
co_await c.sleep_until(timeout, timeout_as).then_wrapped([&timeout_as] (auto&& f) {
// Avoid throwing if sleep was aborted.

View File

@@ -19,6 +19,26 @@ class abort_source;
namespace direct_failure_detector {
class pinger {
public:
// Opaque endpoint ID.
// A specific implementation of `pinger` maps those IDs to 'real' addresses.
using endpoint_id = utils::UUID;
// Send a message to `ep` and wait until it responds.
// The wait can be aborted using `as`.
// Abort should be signalized with `abort_requested_exception`.
//
// If the ping fails in an expected way (e.g. the endpoint is down and refuses to connect),
// returns `false`. If it succeeds, returns `true`.
virtual future<bool> ping(endpoint_id ep, abort_source& as) = 0;
protected:
// The `pinger` object must not be destroyed through the `pinger` interface.
// `failure_detector` does not take ownership of `pinger`, only a non-owning reference.
~pinger() = default;
};
// A clock that uses abstract units to measure time.
// The implementation is responsible for periodically advancing the clock.
//
@@ -40,33 +60,12 @@ public:
// Aborts should be signalized using `seastar::sleep_aborted`.
virtual future<> sleep_until(timepoint_t tp, abort_source& as) = 0;
virtual std::chrono::milliseconds to_milliseconds(timepoint_t tp) const = 0;
protected:
// The `clock` object must not be destroyed through the `clock` interface.
// `failure_detector` does not take ownership of `clock`, only a non-owning reference.
~clock() = default;
};
class pinger {
public:
// Opaque endpoint ID.
// A specific implementation of `pinger` maps those IDs to 'real' addresses.
using endpoint_id = utils::UUID;
// Send a message to `ep` and wait until it responds.
// The wait can be aborted using `as`.
// Abort should be signalized with `abort_requested_exception`.
//
// If the ping fails in an expected way (e.g. the endpoint is down and refuses to connect),
// returns `false`. If it succeeds, returns `true`.
virtual future<bool> ping(endpoint_id ep, clock::timepoint_t timeout, abort_source& as, clock& c) = 0;
protected:
// The `pinger` object must not be destroyed through the `pinger` interface.
// `failure_detector` does not take ownership of `pinger`, only a non-owning reference.
~pinger() = default;
};
class listener {
public:
// Called when an endpoint in the detected set (added by `failure_detector::add_endpoint`) responds to a ping
@@ -128,10 +127,7 @@ public:
// Duration after which a ping is aborted, so that next ping can be started
// (pings are sent sequentially).
clock::interval_t ping_timeout,
// Scheduling group used for fibers inside the failure detector.
seastar::scheduling_group sg
clock::interval_t ping_timeout
);
~failure_detector();

View File

@@ -18,7 +18,6 @@
#include "utils/error_injection.hh"
#include "seastar/core/shared_future.hh"
#include <chrono>
#include <seastar/core/coroutine.hh>
#include <seastar/core/when_all.hh>
#include <seastar/core/sleep.hh>
@@ -203,11 +202,8 @@ void raft_group_registry::init_rpc_verbs() {
});
ser::raft_rpc_verbs::register_direct_fd_ping(&_ms,
[this] (const rpc::client_info&, rpc::opt_time_point timeout, raft::server_id dst) -> future<direct_fd_ping_reply> {
if (timeout && *timeout <= netw::messaging_service::clock_type::now()) {
throw timed_out_error{};
}
[this] (const rpc::client_info&, raft::server_id dst) -> future<direct_fd_ping_reply> {
// XXX: update address map here as well?
if (_my_id != dst) {
return make_ready_future<direct_fd_ping_reply>(direct_fd_ping_reply {
@@ -217,10 +213,19 @@ void raft_group_registry::init_rpc_verbs() {
});
}
return make_ready_future<direct_fd_ping_reply>(direct_fd_ping_reply {
.result = service::group_liveness_info{
.group0_alive = _group0_is_alive,
return container().invoke_on(0, [] (raft_group_registry& me) -> future<direct_fd_ping_reply> {
bool group0_alive = false;
if (me._group0_id) {
auto* group0_server = me.find_server(*me._group0_id);
if (group0_server && group0_server->is_alive()) {
group0_alive = true;
}
}
co_return direct_fd_ping_reply {
.result = service::group_liveness_info{
.group0_alive = group0_alive,
}
};
});
});
}
@@ -375,12 +380,6 @@ future<> raft_group_registry::start_server_for_group(raft_server_for_group new_g
co_await server.abort();
std::rethrow_exception(ex);
}
if (gid == _group0_id) {
co_await container().invoke_on_all([] (raft_group_registry& rg) {
rg._group0_is_alive = true;
});
}
}
future<> raft_group_registry::abort_server(raft::group_id gid, sstring reason) {
@@ -390,18 +389,14 @@ future<> raft_group_registry::abort_server(raft::group_id gid, sstring reason) {
if (const auto it = _servers.find(gid); it != _servers.end()) {
auto& [gid, s] = *it;
if (!s.aborted) {
if (gid == _group0_id) {
co_await container().invoke_on_all([] (raft_group_registry& rg) {
rg._group0_is_alive = false;
});
}
s.aborted = s.server->abort(std::move(reason))
.handle_exception([gid] (std::exception_ptr ex) {
rslog.warn("Failed to abort raft group server {}: {}", gid, ex);
});
}
co_await s.aborted->get_future();
return s.aborted->get_future();
}
return make_ready_future<>();
}
unsigned raft_group_registry::shard_for_group(const raft::group_id& gid) const {
@@ -522,13 +517,11 @@ future<> raft_server_with_timeouts::read_barrier(seastar::abort_source* as, std:
}, "read_barrier", as, timeout);
}
future<bool> direct_fd_pinger::ping(direct_failure_detector::pinger::endpoint_id id, direct_failure_detector::clock::timepoint_t timeout, abort_source& as, direct_failure_detector::clock& c) {
future<bool> direct_fd_pinger::ping(direct_failure_detector::pinger::endpoint_id id, abort_source& as) {
auto dst_id = raft::server_id{id};
try {
std::chrono::milliseconds timeout_ms = c.to_milliseconds(timeout);
netw::messaging_service::clock_type::time_point deadline = netw::messaging_service::clock_type::now() + timeout_ms;
auto reply = co_await ser::raft_rpc_verbs::send_direct_fd_ping(&_ms, locator::host_id{id}, deadline, as, dst_id);
auto reply = co_await ser::raft_rpc_verbs::send_direct_fd_ping(&_ms, locator::host_id{id}, as, dst_id);
if (auto* wrong_dst = std::get_if<wrong_destination>(&reply.result)) {
// FIXME: after moving to host_id based verbs we will not get `wrong_destination`
// any more since the connection will fail
@@ -561,11 +554,4 @@ future<> direct_fd_clock::sleep_until(direct_failure_detector::clock::timepoint_
return sleep_abortable(t - n, as);
}
std::chrono::milliseconds direct_fd_clock::to_milliseconds(direct_failure_detector::clock::timepoint_t tp) const {
auto t = base::time_point{base::duration{tp}};
auto n = base::now();
return std::chrono::duration_cast<std::chrono::milliseconds>(t - n);
}
} // end of namespace service

View File

@@ -127,7 +127,6 @@ private:
// My Raft ID. Shared between different Raft groups.
raft::server_id _my_id;
bool _group0_is_alive = false;
public:
raft_group_registry(raft::server_id my_id, netw::messaging_service& ms,
direct_failure_detector::failure_detector& fd);
@@ -182,9 +181,6 @@ public:
unsigned shard_for_group(const raft::group_id& gid) const;
shared_ptr<raft::failure_detector> failure_detector();
direct_failure_detector::failure_detector& direct_fd() { return _direct_fd; }
bool is_group0_alive() const {
return _group0_is_alive;
}
};
// Implementation of `direct_failure_detector::pinger` which uses DIRECT_FD_PING verb for pinging.
@@ -202,7 +198,7 @@ public:
direct_fd_pinger(const direct_fd_pinger&) = delete;
direct_fd_pinger(direct_fd_pinger&&) = delete;
future<bool> ping(direct_failure_detector::pinger::endpoint_id id, direct_failure_detector::clock::timepoint_t timeout, abort_source& as, direct_failure_detector::clock& c) override;
future<bool> ping(direct_failure_detector::pinger::endpoint_id id, abort_source& as) override;
};
// XXX: find a better place to put this?
@@ -211,7 +207,6 @@ struct direct_fd_clock : public direct_failure_detector::clock {
direct_failure_detector::clock::timepoint_t now() noexcept override;
future<> sleep_until(direct_failure_detector::clock::timepoint_t tp, abort_source& as) override;
std::chrono::milliseconds to_milliseconds(direct_failure_detector::clock::timepoint_t tp) const override;
};
} // end of namespace service

View File

@@ -6688,11 +6688,10 @@ storage_proxy::do_query_with_paxos(schema_ptr s,
}
};
auto request = std::make_unique<read_cas_request>();
auto* request_ptr = request.get();
auto request = seastar::make_shared<read_cas_request>();
return cas(std::move(s), std::move(cas_shard), *request_ptr, cmd, std::move(partition_ranges), std::move(query_options),
cl, db::consistency_level::ANY, timeout, cas_timeout, false).then([request = std::move(request)] (bool is_applied) mutable {
return cas(std::move(s), std::move(cas_shard), request, cmd, std::move(partition_ranges), std::move(query_options),
cl, db::consistency_level::ANY, timeout, cas_timeout, false).then([request] (bool is_applied) mutable {
return make_ready_future<coordinator_query_result>(std::move(request->res));
});
}
@@ -6755,13 +6754,11 @@ static mutation_write_failure_exception read_failure_to_write(read_failure_excep
* NOTE: `cmd` argument can be nullptr, in which case it's guaranteed that this function would not perform
* any reads of committed values (in case user of the function is not interested in them).
*
* NOTE: The `request` object must be guaranteed to be alive until the returned future is resolved.
*
* WARNING: the function must be called on a shard that owns the key cas() operates on.
* The cas_shard must be created *before* selecting the shard, to protect against
* concurrent tablet migrations.
*/
future<bool> storage_proxy::cas(schema_ptr schema, cas_shard cas_shard, cas_request& request, lw_shared_ptr<query::read_command> cmd,
future<bool> storage_proxy::cas(schema_ptr schema, cas_shard cas_shard, shared_ptr<cas_request> request, lw_shared_ptr<query::read_command> cmd,
dht::partition_range_vector partition_ranges, storage_proxy::coordinator_query_options query_options,
db::consistency_level cl_for_paxos, db::consistency_level cl_for_learn,
clock_type::time_point write_timeout, clock_type::time_point cas_timeout, bool write, cdc::per_request_options cdc_opts) {
@@ -6862,7 +6859,7 @@ future<bool> storage_proxy::cas(schema_ptr schema, cas_shard cas_shard, cas_requ
qr = std::move(cqr.query_result);
}
auto mutation = request.apply(std::move(qr), cmd->slice, utils::UUID_gen::micros_timestamp(ballot), cdc_opts);
auto mutation = request->apply(std::move(qr), cmd->slice, utils::UUID_gen::micros_timestamp(ballot), cdc_opts);
condition_met = true;
if (!mutation) {
if (write) {

View File

@@ -829,7 +829,7 @@ public:
clock_type::time_point timeout,
tracing::trace_state_ptr trace_state = nullptr);
future<bool> cas(schema_ptr schema, cas_shard cas_shard, cas_request& request, lw_shared_ptr<query::read_command> cmd,
future<bool> cas(schema_ptr schema, cas_shard cas_shard, shared_ptr<cas_request> request, lw_shared_ptr<query::read_command> cmd,
dht::partition_range_vector partition_ranges, coordinator_query_options query_options,
db::consistency_level cl_for_paxos, db::consistency_level cl_for_learn,
clock_type::time_point write_timeout, clock_type::time_point cas_timeout, bool write = true, cdc::per_request_options cdc_opts = {});

View File

@@ -600,7 +600,7 @@ future<storage_service::nodes_to_notify_after_sync> storage_service::sync_raft_t
co_await update_topology_change_info(tmptr, ::format("{} {}/{}", rs.state, id, ip));
break;
case node_state::replacing: {
SCYLLA_ASSERT(_topology_state_machine._topology.req_param.contains(id));
scylla_assert(_topology_state_machine._topology.req_param.contains(id));
auto replaced_id = std::get<replace_param>(_topology_state_machine._topology.req_param[id]).replaced_id;
auto existing_ip = _address_map.find(locator::host_id{replaced_id.uuid()});
const auto replaced_host_id = locator::host_id(replaced_id.uuid());
@@ -688,7 +688,7 @@ future<> storage_service::notify_nodes_after_sync(nodes_to_notify_after_sync&& n
future<> storage_service::topology_state_load(state_change_hint hint) {
#ifdef SEASTAR_DEBUG
static bool running = false;
SCYLLA_ASSERT(!running); // The function is not re-entrant
scylla_assert(!running); // The function is not re-entrant
auto d = defer([] {
running = false;
});
@@ -854,7 +854,7 @@ future<> storage_service::topology_state_load(state_change_hint hint) {
}
future<> storage_service::topology_transition(state_change_hint hint) {
SCYLLA_ASSERT(this_shard_id() == 0);
scylla_assert(this_shard_id() == 0);
co_await topology_state_load(std::move(hint)); // reload new state
_topology_state_machine.event.broadcast();
@@ -898,7 +898,7 @@ future<> storage_service::view_building_state_load() {
}
future<> storage_service::view_building_transition() {
SCYLLA_ASSERT(this_shard_id() == 0);
scylla_assert(this_shard_id() == 0);
co_await view_building_state_load();
_view_building_state_machine.event.broadcast();
@@ -966,7 +966,7 @@ future<> storage_service::merge_topology_snapshot(raft_snapshot snp) {
}
future<> storage_service::update_service_levels_cache(qos::update_both_cache_levels update_only_effective_cache, qos::query_context ctx) {
SCYLLA_ASSERT(this_shard_id() == 0);
scylla_assert(this_shard_id() == 0);
if (_sl_controller.local().is_v2()) {
// Skip cache update unless the topology upgrade is done
co_await _sl_controller.local().update_cache(update_only_effective_cache, ctx);
@@ -1520,7 +1520,7 @@ future<> storage_service::update_topology_with_local_metadata(raft::server& raft
}
future<> storage_service::start_upgrade_to_raft_topology() {
SCYLLA_ASSERT(this_shard_id() == 0);
scylla_assert(this_shard_id() == 0);
if (_topology_state_machine._topology.upgrade_state != topology::upgrade_state_type::not_upgraded) {
co_return;
@@ -1572,7 +1572,7 @@ future<> storage_service::start_upgrade_to_raft_topology() {
}
topology::upgrade_state_type storage_service::get_topology_upgrade_state() const {
SCYLLA_ASSERT(this_shard_id() == 0);
scylla_assert(this_shard_id() == 0);
return _topology_state_machine._topology.upgrade_state;
}
@@ -1841,7 +1841,7 @@ future<> storage_service::join_topology(sharded<service::storage_proxy>& proxy,
slogger.info("Nodes {} are alive", get_sync_nodes());
}
SCYLLA_ASSERT(_group0);
scylla_assert(_group0);
join_node_request_params join_params {
.host_id = _group0->load_my_id(),
@@ -2083,7 +2083,7 @@ future<> storage_service::join_topology(sharded<service::storage_proxy>& proxy,
if (!_sys_ks.local().bootstrap_complete()) {
// If we're not bootstrapping then we shouldn't have chosen a CDC streams timestamp yet.
SCYLLA_ASSERT(should_bootstrap() || !cdc_gen_id);
scylla_assert(should_bootstrap() || !cdc_gen_id);
// Don't try rewriting CDC stream description tables.
// See cdc.md design notes, `Streams description table V1 and rewriting` section, for explanation.
@@ -2167,7 +2167,7 @@ future<> storage_service::join_topology(sharded<service::storage_proxy>& proxy,
throw std::runtime_error(err);
}
SCYLLA_ASSERT(_group0);
scylla_assert(_group0);
co_await _group0->finish_setup_after_join(*this, _qp, _migration_manager.local(), false);
co_await _cdc_gens.local().after_join(std::move(cdc_gen_id));
@@ -2192,7 +2192,7 @@ future<> storage_service::join_topology(sharded<service::storage_proxy>& proxy,
}
future<> storage_service::track_upgrade_progress_to_topology_coordinator(sharded<service::storage_proxy>& proxy) {
SCYLLA_ASSERT(_group0);
scylla_assert(_group0);
while (true) {
_group0_as.check();
@@ -2316,7 +2316,7 @@ future<> storage_service::bootstrap(std::unordered_set<token>& bootstrap_tokens,
// After we pick a generation timestamp, we start gossiping it, and we stick with it.
// We don't do any other generation switches (unless we crash before complecting bootstrap).
SCYLLA_ASSERT(!cdc_gen_id);
scylla_assert(!cdc_gen_id);
cdc_gen_id = _cdc_gens.local().legacy_make_new_generation(bootstrap_tokens, !is_first_node()).get();
@@ -2349,9 +2349,9 @@ future<> storage_service::bootstrap(std::unordered_set<token>& bootstrap_tokens,
slogger.debug("Removing replaced endpoint {} from system.peers", replace_addr);
_sys_ks.local().remove_endpoint(replace_addr).get();
SCYLLA_ASSERT(replaced_host_id);
scylla_assert(replaced_host_id);
auto raft_id = raft::server_id{replaced_host_id.uuid()};
SCYLLA_ASSERT(_group0);
scylla_assert(_group0);
bool raft_available = _group0->wait_for_raft().get();
if (raft_available) {
slogger.info("Replace: removing {}/{} from group 0...", replace_addr, raft_id);
@@ -3000,7 +3000,7 @@ future<> storage_service::stop_transport() {
}
future<> storage_service::drain_on_shutdown() {
SCYLLA_ASSERT(this_shard_id() == 0);
scylla_assert(this_shard_id() == 0);
return (_operation_mode == mode::DRAINING || _operation_mode == mode::DRAINED) ?
_drain_finished.get_future() : do_drain();
}
@@ -3025,7 +3025,7 @@ bool storage_service::is_topology_coordinator_enabled() const {
future<> storage_service::join_cluster(sharded<service::storage_proxy>& proxy,
start_hint_manager start_hm, gms::generation_type new_generation) {
SCYLLA_ASSERT(this_shard_id() == 0);
scylla_assert(this_shard_id() == 0);
if (_sys_ks.local().was_decommissioned()) {
auto msg = sstring("This node was decommissioned and will not rejoin the ring unless "
@@ -3225,7 +3225,7 @@ future<> storage_service::join_cluster(sharded<service::storage_proxy>& proxy,
}
future<token_metadata_change> storage_service::prepare_token_metadata_change(mutable_token_metadata_ptr tmptr, const schema_getter& schema_getter) {
SCYLLA_ASSERT(this_shard_id() == 0);
scylla_assert(this_shard_id() == 0);
std::exception_ptr ex;
token_metadata_change change;
@@ -3992,7 +3992,7 @@ future<> storage_service::decommission() {
slogger.info("DECOMMISSIONING: starts");
ctl.req.leaving_nodes = std::list<gms::inet_address>{endpoint};
SCYLLA_ASSERT(ss._group0);
scylla_assert(ss._group0);
bool raft_available = ss._group0->wait_for_raft().get();
try {
@@ -4044,7 +4044,7 @@ future<> storage_service::decommission() {
if (raft_available && left_token_ring) {
slogger.info("decommission[{}]: leaving Raft group 0", uuid);
SCYLLA_ASSERT(ss._group0);
scylla_assert(ss._group0);
ss._group0->leave_group0().get();
slogger.info("decommission[{}]: left Raft group 0", uuid);
}
@@ -4350,7 +4350,7 @@ future<> storage_service::removenode(locator::host_id host_id, locator::host_id_
auto stop_ctl = deferred_stop(ctl);
auto uuid = ctl.uuid();
const auto& tmptr = ctl.tmptr;
SCYLLA_ASSERT(ss._group0);
scylla_assert(ss._group0);
auto raft_id = raft::server_id{host_id.uuid()};
bool raft_available = ss._group0->wait_for_raft().get();
bool is_group0_member = raft_available && ss._group0->is_member(raft_id, false);
@@ -4470,7 +4470,7 @@ future<> storage_service::removenode(locator::host_id host_id, locator::host_id_
}
future<> storage_service::check_and_repair_cdc_streams() {
SCYLLA_ASSERT(this_shard_id() == 0);
scylla_assert(this_shard_id() == 0);
if (!_cdc_gens.local_is_initialized()) {
return make_exception_future<>(std::runtime_error("CDC generation service not initialized yet"));
@@ -5784,7 +5784,7 @@ future<> storage_service::mutate_token_metadata(std::function<future<> (mutable_
}
future<> storage_service::update_topology_change_info(mutable_token_metadata_ptr tmptr, sstring reason) {
SCYLLA_ASSERT(this_shard_id() == 0);
scylla_assert(this_shard_id() == 0);
try {
locator::dc_rack_fn get_dc_rack_by_host_id([this, &tm = *tmptr] (locator::host_id host_id) -> std::optional<locator::endpoint_dc_rack> {
@@ -5831,7 +5831,7 @@ future<> storage_service::keyspace_changed(const sstring& ks_name) {
}
future<locator::mutable_token_metadata_ptr> storage_service::prepare_tablet_metadata(const locator::tablet_metadata_change_hint& hint, mutable_token_metadata_ptr pending_token_metadata) {
SCYLLA_ASSERT(this_shard_id() == 0);
scylla_assert(this_shard_id() == 0);
if (hint) {
co_await replica::update_tablet_metadata(_db.local(), _qp, pending_token_metadata->tablets(), hint);
} else {
@@ -5955,7 +5955,7 @@ void storage_service::start_tablet_split_monitor() {
}
future<> storage_service::snitch_reconfigured() {
SCYLLA_ASSERT(this_shard_id() == 0);
scylla_assert(this_shard_id() == 0);
auto& snitch = _snitch.local();
co_await mutate_token_metadata([&snitch] (mutable_token_metadata_ptr tmptr) -> future<> {
// re-read local rack and DC info
@@ -6487,8 +6487,8 @@ future<> storage_service::stream_tablet(locator::global_tablet_id tablet) {
co_await utils::get_local_injector().inject("block_tablet_streaming", [this, &tablet] (auto& handler) -> future<> {
const auto keyspace = handler.get("keyspace");
const auto table = handler.get("table");
SCYLLA_ASSERT(keyspace);
SCYLLA_ASSERT(table);
scylla_assert(keyspace);
scylla_assert(table);
auto s = _db.local().find_column_family(tablet.table).schema();
bool should_block = s->ks_name() == *keyspace && s->cf_name() == *table;
while (should_block && !handler.poll_for_message() && !_async_gate.is_closed()) {
@@ -7509,7 +7509,7 @@ future<join_node_request_result> storage_service::join_node_request_handler(join
}
future<join_node_response_result> storage_service::join_node_response_handler(join_node_response_params params) {
SCYLLA_ASSERT(this_shard_id() == 0);
scylla_assert(this_shard_id() == 0);
// Usually this handler will only run once, but there are some cases where we might get more than one RPC,
// possibly happening at the same time, e.g.:

View File

@@ -360,7 +360,7 @@ class topology_coordinator : public endpoint_lifecycle_subscriber {
auto& topo = _topo_sm._topology;
auto it = topo.find(id);
SCYLLA_ASSERT(it);
scylla_assert(it);
std::optional<topology_request> req;
auto rit = topo.requests.find(id);
@@ -2310,7 +2310,7 @@ class topology_coordinator : public endpoint_lifecycle_subscriber {
switch (node.rs->state) {
case node_state::bootstrapping: {
SCYLLA_ASSERT(!node.rs->ring);
scylla_assert(!node.rs->ring);
auto num_tokens = std::get<join_param>(node.req_param.value()).num_tokens;
auto tokens_string = std::get<join_param>(node.req_param.value()).tokens_string;
@@ -2359,11 +2359,11 @@ class topology_coordinator : public endpoint_lifecycle_subscriber {
}
break;
case node_state::replacing: {
SCYLLA_ASSERT(!node.rs->ring);
scylla_assert(!node.rs->ring);
auto replaced_id = std::get<replace_param>(node.req_param.value()).replaced_id;
auto it = _topo_sm._topology.normal_nodes.find(replaced_id);
SCYLLA_ASSERT(it != _topo_sm._topology.normal_nodes.end());
SCYLLA_ASSERT(it->second.ring && it->second.state == node_state::normal);
scylla_assert(it != _topo_sm._topology.normal_nodes.end());
scylla_assert(it->second.ring && it->second.state == node_state::normal);
topology_mutation_builder builder(node.guard.write_timestamp());
@@ -3022,7 +3022,7 @@ class topology_coordinator : public endpoint_lifecycle_subscriber {
rtbuilder.set("start_time", db_clock::now());
switch (node.request.value()) {
case topology_request::join: {
SCYLLA_ASSERT(!node.rs->ring);
scylla_assert(!node.rs->ring);
// Write chosen tokens through raft.
builder.set_transition_state(topology::transition_state::join_group0)
.with_node(node.id)
@@ -3033,7 +3033,7 @@ class topology_coordinator : public endpoint_lifecycle_subscriber {
break;
}
case topology_request::leave:
SCYLLA_ASSERT(node.rs->ring);
scylla_assert(node.rs->ring);
// start decommission and put tokens of decommissioning nodes into write_both_read_old state
// meaning that reads will go to the replica being decommissioned
// but writes will go to new owner as well
@@ -3046,7 +3046,7 @@ class topology_coordinator : public endpoint_lifecycle_subscriber {
"start decommission");
break;
case topology_request::remove: {
SCYLLA_ASSERT(node.rs->ring);
scylla_assert(node.rs->ring);
builder.set_transition_state(topology::transition_state::tablet_draining)
.set_version(_topo_sm._topology.version + 1)
@@ -3058,7 +3058,7 @@ class topology_coordinator : public endpoint_lifecycle_subscriber {
break;
}
case topology_request::replace: {
SCYLLA_ASSERT(!node.rs->ring);
scylla_assert(!node.rs->ring);
builder.set_transition_state(topology::transition_state::join_group0)
.with_node(node.id)
@@ -3163,7 +3163,7 @@ class topology_coordinator : public endpoint_lifecycle_subscriber {
auto id = node.id;
SCYLLA_ASSERT(!_topo_sm._topology.transition_nodes.empty());
scylla_assert(!_topo_sm._topology.transition_nodes.empty());
release_node(std::move(node));
@@ -4013,7 +4013,7 @@ future<> topology_coordinator::stop() {
// but let's check all of them because we never reset these holders
// once they are added as barriers
for (auto& [stage, barrier]: tablet_state.barriers) {
SCYLLA_ASSERT(barrier.has_value());
scylla_assert(barrier.has_value());
co_await stop_background_action(barrier, gid, [stage] { return format("at stage {}", tablet_transition_stage_to_string(stage)); });
}

View File

@@ -251,7 +251,7 @@ void compression::discard_hidden_options() {
}
compressor& compression::get_compressor() const {
SCYLLA_ASSERT(_compressor);
scylla_assert(_compressor);
return *_compressor.get();
}

View File

@@ -170,7 +170,7 @@ struct compression {
const_iterator(const const_iterator& other) = default;
const_iterator& operator=(const const_iterator& other) {
SCYLLA_ASSERT(&_offsets == &other._offsets);
scylla_assert(&_offsets == &other._offsets);
_index = other._index;
return *this;
}

View File

@@ -24,6 +24,7 @@
#include "sstables/sstable_compressor_factory.hh"
#include "compressor.hh"
#include "exceptions/exceptions.hh"
#include "utils/assert.hh"
#include "utils/config_file_impl.hh"
#include "utils/class_registrator.hh"
#include "gms/feature_service.hh"
@@ -295,7 +296,7 @@ size_t zstd_processor::uncompress(const char* input, size_t input_len, char* out
if (_ddict) {
return ZSTD_decompress_usingDDict(dctx, output, output_len, input, input_len, _ddict->dict());
} else {
SCYLLA_ASSERT(!_cdict && "Write-only compressor used for reading");
scylla_assert(!_cdict && "Write-only compressor used for reading");
return ZSTD_decompressDCtx(dctx, output, output_len, input, input_len);
}
});
@@ -310,7 +311,7 @@ size_t zstd_processor::compress(const char* input, size_t input_len, char* outpu
if (_cdict) {
return ZSTD_compress_usingCDict(cctx, output, output_len, input, input_len, _cdict->dict());
} else {
SCYLLA_ASSERT(!_ddict && "Read-only compressor used for writing");
scylla_assert(!_ddict && "Read-only compressor used for writing");
return ZSTD_compressCCtx(cctx, output, output_len, input, input_len, _compression_level);
}
});
@@ -627,7 +628,7 @@ size_t lz4_processor::uncompress(const char* input, size_t input_len,
if (_ddict) {
ret = LZ4_decompress_safe_usingDict(input, output, input_len, output_len, reinterpret_cast<const char*>(_ddict->raw().data()), _ddict->raw().size());
} else {
SCYLLA_ASSERT(!_cdict && "Write-only compressor used for reading");
scylla_assert(!_cdict && "Write-only compressor used for reading");
ret = LZ4_decompress_safe(input, output, input_len, output_len);
}
if (ret < 0) {
@@ -657,7 +658,7 @@ size_t lz4_processor::compress(const char* input, size_t input_len,
LZ4_resetStream_fast(ctx);
}
} else {
SCYLLA_ASSERT(!_ddict && "Read-only compressor used for writing");
scylla_assert(!_ddict && "Read-only compressor used for writing");
ret = LZ4_compress_default(input, output + 4, input_len, LZ4_compressBound(input_len));
}
if (ret == 0) {
@@ -1268,7 +1269,7 @@ lz4_cdict::~lz4_cdict() {
}
std::unique_ptr<sstable_compressor_factory> make_sstable_compressor_factory_for_tests_in_thread() {
SCYLLA_ASSERT(thread::running_in_thread());
scylla_assert(thread::running_in_thread());
struct wrapper : sstable_compressor_factory {
using impl = default_sstable_compressor_factory;
sharded<impl> _impl;

View File

@@ -44,14 +44,14 @@ public:
* @return A list of `sampling_level` unique indices between 0 and `sampling_level`
*/
static const std::vector<int>& get_sampling_pattern(int sampling_level) {
SCYLLA_ASSERT(sampling_level > 0 && sampling_level <= BASE_SAMPLING_LEVEL);
scylla_assert(sampling_level > 0 && sampling_level <= BASE_SAMPLING_LEVEL);
auto& entry = _sample_pattern_cache[sampling_level-1];
if (!entry.empty()) {
return entry;
}
if (sampling_level <= 1) {
SCYLLA_ASSERT(_sample_pattern_cache[0].empty());
scylla_assert(_sample_pattern_cache[0].empty());
_sample_pattern_cache[0].push_back(0);
return _sample_pattern_cache[0];
}
@@ -96,7 +96,7 @@ public:
* @return a list of original indexes for current summary entries
*/
static const std::vector<int>& get_original_indexes(int sampling_level) {
SCYLLA_ASSERT(sampling_level > 0 && sampling_level <= BASE_SAMPLING_LEVEL);
scylla_assert(sampling_level > 0 && sampling_level <= BASE_SAMPLING_LEVEL);
auto& entry = _original_index_cache[sampling_level-1];
if (!entry.empty()) {
return entry;
@@ -128,7 +128,7 @@ public:
* @return the number of partitions before the next index summary entry, inclusive on one end
*/
static int get_effective_index_interval_after_index(int index, int sampling_level, int min_index_interval) {
SCYLLA_ASSERT(index >= -1);
scylla_assert(index >= -1);
const std::vector<int>& original_indexes = get_original_indexes(sampling_level);
if (index == -1) {
return original_indexes[0] * min_index_interval;

View File

@@ -31,7 +31,7 @@ public:
[[noreturn]] void on_parse_error(sstring message, std::optional<component_name> filename);
[[noreturn, gnu::noinline]] void on_bti_parse_error(uint64_t pos);
// Use this instead of SCYLLA_ASSERT() or assert() in code that is used while parsing SSTables.
// Use this instead of scylla_assert() or assert() in code that is used while parsing SSTables.
// SSTables can be corrupted either by ScyllaDB itself or by a freak accident like cosmic background
// radiation hitting the disk the wrong way. Either way a corrupt SSTable should not bring down the
// whole server. This method will call on_internal_error() if the condition is false.

View File

@@ -129,7 +129,7 @@ public:
/// way to determine that is overlapping its partition-ranges with the shard's
/// owned ranges.
static bool maybe_owned_by_this_shard(const sstables::generation_type& gen) {
SCYLLA_ASSERT(bool(gen));
scylla_assert(bool(gen));
int64_t hint = 0;
if (gen.is_uuid_based()) {
hint = std::hash<utils::UUID>{}(gen.as_uuid());

View File

@@ -57,10 +57,7 @@ public:
index_list indexes;
index_consumer(logalloc::region& r, schema_ptr s)
: _s(s)
, _alloc_section(abstract_formatter([s] (fmt::format_context& ctx) {
fmt::format_to(ctx.out(), "index_consumer {}.{}", s->ks_name(), s->cf_name());
}))
: _s(std::move(s))
, _region(r)
{ }
@@ -788,9 +785,6 @@ public:
_sstable->manager().get_cache_tracker().region(),
_sstable->manager().get_cache_tracker().get_partition_index_cache_stats()))
, _index_cache(caching ? *_sstable->_index_cache : *_local_index_cache)
, _alloc_section(abstract_formatter([sst = _sstable] (fmt::format_context& ctx) {
fmt::format_to(ctx.out(), "index_reader {}", sst->get_filename());
}))
, _region(_sstable->manager().get_cache_tracker().region())
, _use_caching(caching)
, _single_page_read(single_partition_read) // all entries for a given partition are within a single page

View File

@@ -284,9 +284,6 @@ public:
, _clustering_parser(s, permit, _ctr.clustering_column_value_fix_legths(), true)
, _block_parser(s, permit, _ctr.clustering_column_value_fix_legths())
, _permit(std::move(permit))
, _as(abstract_formatter([s] (fmt::format_context& ctx) {
fmt::format_to(ctx.out(), "cached_promoted_index {}.{}", s.ks_name(), s.cf_name());
}))
{ }
~cached_promoted_index() {

View File

@@ -91,7 +91,7 @@ public:
{}
void increment() {
SCYLLA_ASSERT(_range);
scylla_assert(_range);
if (!_range->next()) {
_range = nullptr;
}
@@ -102,7 +102,7 @@ public:
}
const ValueType dereference() const {
SCYLLA_ASSERT(_range);
scylla_assert(_range);
return _range->get_value();
}
@@ -153,7 +153,7 @@ public:
auto limit = std::min(_serialization_limit_size, _offset + clustering_block::max_block_size);
_current_block = {};
SCYLLA_ASSERT (_offset % clustering_block::max_block_size == 0);
scylla_assert (_offset % clustering_block::max_block_size == 0);
while (_offset < limit) {
auto shift = _offset % clustering_block::max_block_size;
if (_offset < _prefix.size(_schema)) {
@@ -280,7 +280,7 @@ public:
++_current_index;
}
} else {
SCYLLA_ASSERT(_mode == encoding_mode::large_encode_missing);
scylla_assert(_mode == encoding_mode::large_encode_missing);
while (_current_index < total_size) {
auto cell = _row.find_cell(_columns[_current_index].get().id);
if (!cell) {
@@ -1180,7 +1180,7 @@ void writer::write_cell(bytes_ostream& writer, const clustering_key_prefix* clus
if (cdef.is_counter()) {
if (!is_deleted) {
SCYLLA_ASSERT(!cell.is_counter_update());
scylla_assert(!cell.is_counter_update());
auto ccv = counter_cell_view(cell);
write_counter_value(ccv, writer, _sst.get_version(), [] (bytes_ostream& out, uint32_t value) {
return write_vint(out, value);
@@ -1489,7 +1489,7 @@ template <typename W>
requires Writer<W>
static void write_clustering_prefix(sstable_version_types v, W& writer, bound_kind_m kind,
const schema& s, const clustering_key_prefix& clustering) {
SCYLLA_ASSERT(kind != bound_kind_m::static_clustering);
scylla_assert(kind != bound_kind_m::static_clustering);
write(v, writer, kind);
auto is_ephemerally_full = ephemerally_full_prefix{s.is_compact_table()};
if (kind != bound_kind_m::clustering) {

View File

@@ -59,7 +59,7 @@ private:
// Live entry_ptr should keep the entry alive, except when the entry failed on loading.
// In that case, entry_ptr holders are not supposed to use the pointer, so it's safe
// to nullify those entry_ptrs.
SCYLLA_ASSERT(!ready());
scylla_assert(!ready());
}
}

View File

@@ -496,7 +496,7 @@ sstable_directory::move_foreign_sstables(sharded<sstable_directory>& source_dire
return make_ready_future<>();
}
// Should be empty, since an SSTable that belongs to this shard is not remote.
SCYLLA_ASSERT(shard_id != this_shard_id());
scylla_assert(shard_id != this_shard_id());
dirlog.debug("Moving {} unshared SSTables of {}.{} to shard {} ", info_vec.size(), _schema->ks_name(), _schema->cf_name(), shard_id);
return source_directory.invoke_on(shard_id, &sstables::sstable_directory::load_foreign_sstables, std::move(info_vec));
});
@@ -540,7 +540,7 @@ sstable_directory::collect_output_unshared_sstables(std::vector<sstables::shared
dirlog.debug("Collecting {} output SSTables (remote={})", resharded_sstables.size(), remote_ok);
return parallel_for_each(std::move(resharded_sstables), [this, remote_ok] (sstables::shared_sstable sst) {
auto shards = sst->get_shards_for_this_sstable();
SCYLLA_ASSERT(shards.size() == 1);
scylla_assert(shards.size() == 1);
auto shard = shards[0];
if (shard == this_shard_id()) {

View File

@@ -283,7 +283,7 @@ bool partitioned_sstable_set::store_as_unleveled(const shared_sstable& sst) cons
}
sstlog.info("SSTable {}, as_unleveled={}, expect_unleveled={}, sst_tr={}, overlap_ratio={}",
sst->generation(), as_unleveled, expect_unleveled, sst_tr, dht::overlap_ratio(_token_range, sst_tr));
SCYLLA_ASSERT(as_unleveled == expect_unleveled);
scylla_assert(as_unleveled == expect_unleveled);
});
return as_unleveled;
@@ -712,8 +712,8 @@ public:
// by !empty(bound) and `_it` invariant:
// _it != _end, _it->first <= bound, and filter(*_it->second) == true
SCYLLA_ASSERT(_cmp(_it->first, bound) <= 0);
// we don't SCYLLA_ASSERT(filter(*_it->second)) due to the requirement that `filter` is called at most once for each sstable
scylla_assert(_cmp(_it->first, bound) <= 0);
// we don't scylla_assert(filter(*_it->second)) due to the requirement that `filter` is called at most once for each sstable
// Find all sstables with the same position as `_it` (they form a contiguous range in the container).
auto next = std::find_if(std::next(_it), _end, [this] (const value_t& v) { return _cmp(v.first, _it->first) != 0; });
@@ -1301,7 +1301,7 @@ sstable_set::create_single_key_sstable_reader(
mutation_reader::forwarding fwd_mr,
const sstable_predicate& predicate,
sstables::integrity_check integrity) const {
SCYLLA_ASSERT(pr.is_singular() && pr.start()->value().has_key());
scylla_assert(pr.is_singular() && pr.start()->value().has_key());
return _impl->create_single_key_sstable_reader(cf, std::move(schema),
std::move(permit), sstable_histogram, pr, slice, std::move(trace_state), fwd, fwd_mr, predicate, integrity);
}
@@ -1408,7 +1408,7 @@ sstable_set::make_local_shard_sstable_reader(
{
auto reader_factory_fn = [s, permit, &slice, trace_state, fwd, fwd_mr, &monitor_generator, &predicate, integrity]
(shared_sstable& sst, const dht::partition_range& pr) mutable {
SCYLLA_ASSERT(!sst->is_shared());
scylla_assert(!sst->is_shared());
if (!predicate(*sst)) {
return make_empty_mutation_reader(s, permit);
}

View File

@@ -36,6 +36,7 @@
#include "utils/error_injection.hh"
#include "utils/to_string.hh"
#include "utils/assert.hh"
#include "data_dictionary/storage_options.hh"
#include "dht/sharder.hh"
#include "writer.hh"
@@ -2485,6 +2486,11 @@ void sstable::validate_originating_host_id() const {
}
return;
}
if (*originating_host_id != local_host_id) {
// FIXME refrain from throwing an exception because of #10148
sstlog.warn("Host id {} does not match local host id {} while validating SSTable: {}. Load foreign SSTables via the upload dir instead.", *originating_host_id, local_host_id, get_filename());
}
}
sstring sstable::component_basename(const sstring& ks, const sstring& cf, version_types version, generation_type generation,
@@ -4156,7 +4162,7 @@ future<data_sink> file_io_extension::wrap_sink(const sstable& sst, component_typ
}
future<data_source> file_io_extension::wrap_source(const sstable& sst, component_type c, data_source) {
SCYLLA_ASSERT(0 && "You are not supposed to get here, file_io_extension::wrap_source() is not implemented");
scylla_assert(0 && "You are not supposed to get here, file_io_extension::wrap_source() is not implemented");
}
namespace trie {

View File

@@ -55,9 +55,9 @@ sstables_manager::sstables_manager(
}
sstables_manager::~sstables_manager() {
SCYLLA_ASSERT(_closing);
SCYLLA_ASSERT(_active.empty());
SCYLLA_ASSERT(_undergoing_close.empty());
scylla_assert(_closing);
scylla_assert(_active.empty());
scylla_assert(_undergoing_close.empty());
}
void sstables_manager::subscribe(sstables_manager_event_handler& handler) {
@@ -135,17 +135,13 @@ future<> storage_manager::update_config(const db::config& cfg) {
co_return;
}
auto storage_manager::get_endpoint(const sstring& endpoint) -> object_storage_endpoint& {
shared_ptr<sstables::object_storage_client> storage_manager::get_endpoint_client(sstring endpoint) {
auto found = _object_storage_endpoints.find(endpoint);
if (found == _object_storage_endpoints.end()) {
smlogger.error("unable to find {} in configured object-storage endpoints", endpoint);
throw std::invalid_argument(format("endpoint {} not found", endpoint));
}
return found->second;
}
shared_ptr<sstables::object_storage_client> storage_manager::get_endpoint_client(sstring endpoint) {
auto& ep = get_endpoint(endpoint);
auto& ep = found->second;
if (ep.client == nullptr) {
ep.client = make_object_storage_client(ep.cfg, _object_storage_clients_memory, [&ct = container()] (std::string ep) {
return ct.local().get_endpoint_client(ep);
@@ -154,10 +150,6 @@ shared_ptr<sstables::object_storage_client> storage_manager::get_endpoint_client
return ep.client;
}
sstring storage_manager::get_endpoint_type(sstring endpoint) {
return get_endpoint(endpoint).cfg.type();
}
bool storage_manager::is_known_endpoint(sstring endpoint) const {
return _object_storage_endpoints.contains(endpoint);
}

Some files were not shown because too many files have changed in this diff Show More