Compare commits
5 Commits
copilot/co
...
copilot/fi
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
8e956fda27 | ||
|
|
d65c5926d5 | ||
|
|
1390736e81 | ||
|
|
624331018d | ||
|
|
3b4ff94437 |
11
.github/copilot-instructions.md
vendored
11
.github/copilot-instructions.md
vendored
@@ -84,14 +84,3 @@ ninja build/<mode>/scylla
|
||||
- Strive for simplicity and clarity, add complexity only when clearly justified
|
||||
- Question requests: don't blindly implement requests - evaluate trade-offs, identify issues, and suggest better alternatives when appropriate
|
||||
- Consider different approaches, weigh pros and cons, and recommend the best fit for the specific context
|
||||
|
||||
## Test Philosophy
|
||||
- Performance matters. Tests should run as quickly as possible. Sleeps in the code are highly discouraged and should be avoided, to reduce run time and flakiness.
|
||||
- Stability matters. Tests should be stable. New tests should be executed 100 times at least to ensure they pass 100 out of 100 times. (use --repeat 100 --max-failures 1 when running it)
|
||||
- Unit tests should ideally test one thing and one thing only.
|
||||
- Tests for bug fixes should run before the fix - and show the failure and after the fix - and show they now pass.
|
||||
- Tests for bug fixes should have in their comments which bug fixes (GitHub or JIRA issue) they test.
|
||||
- Tests in debug are always slower, so if needed, reduce number of iterations, rows, data used, cycles, etc. in debug mode.
|
||||
- Tests should strive to be repeatable, and not use random input that will make their results unpredictable.
|
||||
- Tests should consume as little resources as possible. Prefer running tests on a single node if it is sufficient, for example.
|
||||
|
||||
|
||||
@@ -18,7 +18,7 @@ jobs:
|
||||
|
||||
// Regular expression pattern to check for "Fixes" prefix
|
||||
// Adjusted to dynamically insert the repository full name
|
||||
const pattern = `Fixes:? ((?:#|${repo.replace('/', '\\/')}#|https://github\\.com/${repo.replace('/', '\\/')}/issues/)(\\d+)|(?:https://scylladb\\.atlassian\\.net/browse/)?([A-Z]+-\\d+))`;
|
||||
const pattern = `Fixes:? ((?:#|${repo.replace('/', '\\/')}#|https://github\\.com/${repo.replace('/', '\\/')}/issues/)(\\d+)|([A-Z]+-\\d+))`;
|
||||
const regex = new RegExp(pattern);
|
||||
|
||||
if (!regex.test(body)) {
|
||||
|
||||
12
.github/workflows/call_jira_status_in_progress.yml
vendored
Normal file
12
.github/workflows/call_jira_status_in_progress.yml
vendored
Normal file
@@ -0,0 +1,12 @@
|
||||
name: Call Jira Status In Progress
|
||||
|
||||
on:
|
||||
pull_request_target:
|
||||
types: [opened]
|
||||
|
||||
jobs:
|
||||
call-jira-status-in-progress:
|
||||
uses: scylladb/github-automation/.github/workflows/main_update_jira_status_to_in_progress.yml@main
|
||||
secrets:
|
||||
caller_jira_auth: ${{ secrets.USER_AND_KEY_FOR_JIRA_AUTOMATION }}
|
||||
|
||||
12
.github/workflows/call_jira_status_in_review.yml
vendored
Normal file
12
.github/workflows/call_jira_status_in_review.yml
vendored
Normal file
@@ -0,0 +1,12 @@
|
||||
name: Call Jira Status In Review
|
||||
|
||||
on:
|
||||
pull_request_target:
|
||||
types: [ready_for_review, review_requested]
|
||||
|
||||
jobs:
|
||||
call-jira-status-in-review:
|
||||
uses: scylladb/github-automation/.github/workflows/main_update_jira_status_to_in_review.yml@main
|
||||
secrets:
|
||||
caller_jira_auth: ${{ secrets.USER_AND_KEY_FOR_JIRA_AUTOMATION }}
|
||||
|
||||
12
.github/workflows/call_jira_status_ready_for_merge.yml
vendored
Normal file
12
.github/workflows/call_jira_status_ready_for_merge.yml
vendored
Normal file
@@ -0,0 +1,12 @@
|
||||
name: Call Jira Status Ready For Merge
|
||||
|
||||
on:
|
||||
pull_request_target:
|
||||
types: [labeled]
|
||||
|
||||
jobs:
|
||||
call-jira-status-update:
|
||||
uses: scylladb/github-automation/.github/workflows/main_update_jira_status_to_ready_for_merge.yml@main
|
||||
secrets:
|
||||
caller_jira_auth: ${{ secrets.USER_AND_KEY_FOR_JIRA_AUTOMATION }}
|
||||
|
||||
41
.github/workflows/call_jira_sync.yml
vendored
41
.github/workflows/call_jira_sync.yml
vendored
@@ -1,41 +0,0 @@
|
||||
name: Sync Jira Based on PR Events
|
||||
|
||||
on:
|
||||
pull_request_target:
|
||||
types: [opened, ready_for_review, review_requested, labeled, unlabeled, closed]
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
pull-requests: write
|
||||
issues: write
|
||||
|
||||
jobs:
|
||||
jira-sync-pr-opened:
|
||||
if: github.event.action == 'opened'
|
||||
uses: scylladb/github-automation/.github/workflows/main_jira_sync_pr_opened.yml@main
|
||||
secrets:
|
||||
caller_jira_auth: ${{ secrets.USER_AND_KEY_FOR_JIRA_AUTOMATION }}
|
||||
|
||||
jira-sync-in-review:
|
||||
if: github.event.action == 'ready_for_review' || github.event.action == 'review_requested'
|
||||
uses: scylladb/github-automation/.github/workflows/main_jira_sync_in_review.yml@main
|
||||
secrets:
|
||||
caller_jira_auth: ${{ secrets.USER_AND_KEY_FOR_JIRA_AUTOMATION }}
|
||||
|
||||
jira-sync-add-label:
|
||||
if: github.event.action == 'labeled'
|
||||
uses: scylladb/github-automation/.github/workflows/main_jira_sync_add_label.yml@main
|
||||
secrets:
|
||||
caller_jira_auth: ${{ secrets.USER_AND_KEY_FOR_JIRA_AUTOMATION }}
|
||||
|
||||
jira-status-remove-label:
|
||||
if: github.event.action == 'unlabeled'
|
||||
uses: scylladb/github-automation/.github/workflows/main_jira_sync_remove_label.yml@main
|
||||
secrets:
|
||||
caller_jira_auth: ${{ secrets.USER_AND_KEY_FOR_JIRA_AUTOMATION }}
|
||||
|
||||
jira-status-pr-closed:
|
||||
if: github.event.action == 'closed'
|
||||
uses: scylladb/github-automation/.github/workflows/main_jira_sync_pr_closed.yml@main
|
||||
secrets:
|
||||
caller_jira_auth: ${{ secrets.USER_AND_KEY_FOR_JIRA_AUTOMATION }}
|
||||
2
.github/workflows/codespell.yaml
vendored
2
.github/workflows/codespell.yaml
vendored
@@ -13,5 +13,5 @@ jobs:
|
||||
- uses: codespell-project/actions-codespell@master
|
||||
with:
|
||||
only_warn: 1
|
||||
ignore_words_list: "ans,datas,fo,ser,ue,crate,nd,reenable,strat,stap,te,raison,iif,tread"
|
||||
ignore_words_list: "ans,datas,fo,ser,ue,crate,nd,reenable,strat,stap,te,raison"
|
||||
skip: "./.git,./build,./tools,*.js,*.lock,./test,./licenses,./redis/lolwut.cc,*.svg"
|
||||
|
||||
2
.github/workflows/docs-pages.yaml
vendored
2
.github/workflows/docs-pages.yaml
vendored
@@ -18,8 +18,6 @@ on:
|
||||
|
||||
jobs:
|
||||
release:
|
||||
permissions:
|
||||
contents: write
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout
|
||||
|
||||
3
.github/workflows/docs-pr.yaml
vendored
3
.github/workflows/docs-pr.yaml
vendored
@@ -2,9 +2,6 @@ name: "Docs / Build PR"
|
||||
# For more information,
|
||||
# see https://sphinx-theme.scylladb.com/stable/deployment/production.html#available-workflows
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
env:
|
||||
FLAG: ${{ github.repository == 'scylladb/scylla-enterprise' && 'enterprise' || 'opensource' }}
|
||||
|
||||
|
||||
3
.github/workflows/docs-validate-metrics.yml
vendored
3
.github/workflows/docs-validate-metrics.yml
vendored
@@ -1,8 +1,5 @@
|
||||
name: Docs / Validate metrics
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
branches:
|
||||
|
||||
5
.github/workflows/iwyu.yaml
vendored
5
.github/workflows/iwyu.yaml
vendored
@@ -14,8 +14,7 @@ env:
|
||||
CLEANER_DIRS: test/unit exceptions alternator api auth cdc compaction db dht gms index lang message mutation mutation_writer node_ops raft redis replica service
|
||||
SEASTAR_BAD_INCLUDE_OUTPUT_PATH: build/seastar-bad-include.log
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
permissions: {}
|
||||
|
||||
# cancel the in-progress run upon a repush
|
||||
concurrency:
|
||||
@@ -35,6 +34,8 @@ jobs:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
submodules: true
|
||||
- run: |
|
||||
sudo dnf -y install clang-tools-extra
|
||||
- name: Generate compilation database
|
||||
run: |
|
||||
cmake \
|
||||
|
||||
2
.github/workflows/read-toolchain.yaml
vendored
2
.github/workflows/read-toolchain.yaml
vendored
@@ -10,8 +10,6 @@ on:
|
||||
jobs:
|
||||
read-toolchain:
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
contents: read
|
||||
outputs:
|
||||
image: ${{ steps.read.outputs.image }}
|
||||
steps:
|
||||
|
||||
@@ -78,7 +78,7 @@ fi
|
||||
|
||||
# Default scylla product/version tags
|
||||
PRODUCT=scylla
|
||||
VERSION=2026.2.0-dev
|
||||
VERSION=2026.1.0-dev
|
||||
|
||||
if test -f version
|
||||
then
|
||||
|
||||
@@ -18,7 +18,6 @@ target_sources(alternator
|
||||
consumed_capacity.cc
|
||||
ttl.cc
|
||||
parsed_expression_cache.cc
|
||||
http_compression.cc
|
||||
${cql_grammar_srcs})
|
||||
target_include_directories(alternator
|
||||
PUBLIC
|
||||
|
||||
@@ -244,10 +244,7 @@ static bool is_set_of(const rjson::value& type1, const rjson::value& type2) {
|
||||
|
||||
// Check if two JSON-encoded values match with the CONTAINS relation
|
||||
bool check_CONTAINS(const rjson::value* v1, const rjson::value& v2, bool v1_from_query, bool v2_from_query) {
|
||||
if (!v1 || !v1->IsObject() || v1->MemberCount() == 0) {
|
||||
return false;
|
||||
}
|
||||
if (!v2.IsObject() || v2.MemberCount() == 0) {
|
||||
if (!v1) {
|
||||
return false;
|
||||
}
|
||||
const auto& kv1 = *v1->MemberBegin();
|
||||
|
||||
@@ -45,7 +45,7 @@ bool consumed_capacity_counter::should_add_capacity(const rjson::value& request)
|
||||
}
|
||||
|
||||
void consumed_capacity_counter::add_consumed_capacity_to_response_if_needed(rjson::value& response) const noexcept {
|
||||
if (_should_add_to_response) {
|
||||
if (_should_add_to_reponse) {
|
||||
auto consumption = rjson::empty_object();
|
||||
rjson::add(consumption, "CapacityUnits", get_consumed_capacity_units());
|
||||
rjson::add(response, "ConsumedCapacity", std::move(consumption));
|
||||
@@ -53,9 +53,7 @@ void consumed_capacity_counter::add_consumed_capacity_to_response_if_needed(rjso
|
||||
}
|
||||
|
||||
static uint64_t calculate_half_units(uint64_t unit_block_size, uint64_t total_bytes, bool is_quorum) {
|
||||
// Avoid potential integer overflow when total_bytes is close to UINT64_MAX
|
||||
// by using division with modulo instead of addition before division
|
||||
uint64_t half_units = total_bytes / unit_block_size + (total_bytes % unit_block_size != 0 ? 1 : 0);
|
||||
uint64_t half_units = (total_bytes + unit_block_size -1) / unit_block_size; //divide by unit_block_size and round up
|
||||
|
||||
if (is_quorum) {
|
||||
half_units *= 2;
|
||||
|
||||
@@ -28,9 +28,9 @@ namespace alternator {
|
||||
class consumed_capacity_counter {
|
||||
public:
|
||||
consumed_capacity_counter() = default;
|
||||
consumed_capacity_counter(bool should_add_to_response) : _should_add_to_response(should_add_to_response){}
|
||||
consumed_capacity_counter(bool should_add_to_reponse) : _should_add_to_reponse(should_add_to_reponse){}
|
||||
bool operator()() const noexcept {
|
||||
return _should_add_to_response;
|
||||
return _should_add_to_reponse;
|
||||
}
|
||||
|
||||
consumed_capacity_counter& operator +=(uint64_t bytes);
|
||||
@@ -44,7 +44,7 @@ public:
|
||||
uint64_t _total_bytes = 0;
|
||||
static bool should_add_capacity(const rjson::value& request);
|
||||
protected:
|
||||
bool _should_add_to_response = false;
|
||||
bool _should_add_to_reponse = false;
|
||||
};
|
||||
|
||||
class rcu_consumed_capacity_counter : public consumed_capacity_counter {
|
||||
|
||||
@@ -28,7 +28,6 @@ static logging::logger logger("alternator_controller");
|
||||
controller::controller(
|
||||
sharded<gms::gossiper>& gossiper,
|
||||
sharded<service::storage_proxy>& proxy,
|
||||
sharded<service::storage_service>& ss,
|
||||
sharded<service::migration_manager>& mm,
|
||||
sharded<db::system_distributed_keyspace>& sys_dist_ks,
|
||||
sharded<cdc::generation_service>& cdc_gen_svc,
|
||||
@@ -40,7 +39,6 @@ controller::controller(
|
||||
: protocol_server(sg)
|
||||
, _gossiper(gossiper)
|
||||
, _proxy(proxy)
|
||||
, _ss(ss)
|
||||
, _mm(mm)
|
||||
, _sys_dist_ks(sys_dist_ks)
|
||||
, _cdc_gen_svc(cdc_gen_svc)
|
||||
@@ -91,7 +89,7 @@ future<> controller::start_server() {
|
||||
auto get_timeout_in_ms = [] (const db::config& cfg) -> utils::updateable_value<uint32_t> {
|
||||
return cfg.alternator_timeout_in_ms;
|
||||
};
|
||||
_executor.start(std::ref(_gossiper), std::ref(_proxy), std::ref(_ss), std::ref(_mm), std::ref(_sys_dist_ks),
|
||||
_executor.start(std::ref(_gossiper), std::ref(_proxy), std::ref(_mm), std::ref(_sys_dist_ks),
|
||||
sharded_parameter(get_cdc_metadata, std::ref(_cdc_gen_svc)), _ssg.value(),
|
||||
sharded_parameter(get_timeout_in_ms, std::ref(_config))).get();
|
||||
_server.start(std::ref(_executor), std::ref(_proxy), std::ref(_gossiper), std::ref(_auth_service), std::ref(_sl_controller)).get();
|
||||
@@ -105,23 +103,11 @@ future<> controller::start_server() {
|
||||
alternator_port = _config.alternator_port();
|
||||
_listen_addresses.push_back({addr, *alternator_port});
|
||||
}
|
||||
std::optional<uint16_t> alternator_port_proxy_protocol;
|
||||
if (_config.alternator_port_proxy_protocol()) {
|
||||
alternator_port_proxy_protocol = _config.alternator_port_proxy_protocol();
|
||||
_listen_addresses.push_back({addr, *alternator_port_proxy_protocol});
|
||||
}
|
||||
std::optional<uint16_t> alternator_https_port;
|
||||
std::optional<uint16_t> alternator_https_port_proxy_protocol;
|
||||
std::optional<tls::credentials_builder> creds;
|
||||
if (_config.alternator_https_port() || _config.alternator_https_port_proxy_protocol()) {
|
||||
if (_config.alternator_https_port()) {
|
||||
alternator_https_port = _config.alternator_https_port();
|
||||
_listen_addresses.push_back({addr, *alternator_https_port});
|
||||
}
|
||||
if (_config.alternator_https_port_proxy_protocol()) {
|
||||
alternator_https_port_proxy_protocol = _config.alternator_https_port_proxy_protocol();
|
||||
_listen_addresses.push_back({addr, *alternator_https_port_proxy_protocol});
|
||||
}
|
||||
if (_config.alternator_https_port()) {
|
||||
alternator_https_port = _config.alternator_https_port();
|
||||
_listen_addresses.push_back({addr, *alternator_https_port});
|
||||
creds.emplace();
|
||||
auto opts = _config.alternator_encryption_options();
|
||||
if (opts.empty()) {
|
||||
@@ -147,29 +133,20 @@ future<> controller::start_server() {
|
||||
}
|
||||
}
|
||||
_server.invoke_on_all(
|
||||
[this, addr, alternator_port, alternator_https_port, alternator_port_proxy_protocol, alternator_https_port_proxy_protocol, creds = std::move(creds)] (server& server) mutable {
|
||||
return server.init(addr, alternator_port, alternator_https_port, alternator_port_proxy_protocol, alternator_https_port_proxy_protocol, creds,
|
||||
[this, addr, alternator_port, alternator_https_port, creds = std::move(creds)] (server& server) mutable {
|
||||
return server.init(addr, alternator_port, alternator_https_port, creds,
|
||||
_config.alternator_enforce_authorization,
|
||||
_config.alternator_warn_authorization,
|
||||
_config.alternator_max_users_query_size_in_trace_output,
|
||||
&_memory_limiter.local().get_semaphore(),
|
||||
_config.max_concurrent_requests_per_shard);
|
||||
}).handle_exception([this, addr, alternator_port, alternator_https_port, alternator_port_proxy_protocol, alternator_https_port_proxy_protocol] (std::exception_ptr ep) {
|
||||
logger.error("Failed to set up Alternator HTTP server on {} port {}, TLS port {}, proxy-protocol port {}, TLS proxy-protocol port {}: {}",
|
||||
addr,
|
||||
alternator_port ? std::to_string(*alternator_port) : "OFF",
|
||||
alternator_https_port ? std::to_string(*alternator_https_port) : "OFF",
|
||||
alternator_port_proxy_protocol ? std::to_string(*alternator_port_proxy_protocol) : "OFF",
|
||||
alternator_https_port_proxy_protocol ? std::to_string(*alternator_https_port_proxy_protocol) : "OFF",
|
||||
ep);
|
||||
}).handle_exception([this, addr, alternator_port, alternator_https_port] (std::exception_ptr ep) {
|
||||
logger.error("Failed to set up Alternator HTTP server on {} port {}, TLS port {}: {}",
|
||||
addr, alternator_port ? std::to_string(*alternator_port) : "OFF", alternator_https_port ? std::to_string(*alternator_https_port) : "OFF", ep);
|
||||
return stop_server().then([ep = std::move(ep)] { return make_exception_future<>(ep); });
|
||||
}).then([addr, alternator_port, alternator_https_port, alternator_port_proxy_protocol, alternator_https_port_proxy_protocol] {
|
||||
logger.info("Alternator server listening on {}, HTTP port {}, HTTPS port {}, proxy-protocol port {}, TLS proxy-protocol port {}",
|
||||
addr,
|
||||
alternator_port ? std::to_string(*alternator_port) : "OFF",
|
||||
alternator_https_port ? std::to_string(*alternator_https_port) : "OFF",
|
||||
alternator_port_proxy_protocol ? std::to_string(*alternator_port_proxy_protocol) : "OFF",
|
||||
alternator_https_port_proxy_protocol ? std::to_string(*alternator_https_port_proxy_protocol) : "OFF");
|
||||
}).then([addr, alternator_port, alternator_https_port] {
|
||||
logger.info("Alternator server listening on {}, HTTP port {}, HTTPS port {}",
|
||||
addr, alternator_port ? std::to_string(*alternator_port) : "OFF", alternator_https_port ? std::to_string(*alternator_https_port) : "OFF");
|
||||
}).get();
|
||||
});
|
||||
}
|
||||
|
||||
@@ -15,7 +15,6 @@
|
||||
|
||||
namespace service {
|
||||
class storage_proxy;
|
||||
class storage_service;
|
||||
class migration_manager;
|
||||
class memory_limiter;
|
||||
}
|
||||
@@ -58,7 +57,6 @@ class server;
|
||||
class controller : public protocol_server {
|
||||
sharded<gms::gossiper>& _gossiper;
|
||||
sharded<service::storage_proxy>& _proxy;
|
||||
sharded<service::storage_service>& _ss;
|
||||
sharded<service::migration_manager>& _mm;
|
||||
sharded<db::system_distributed_keyspace>& _sys_dist_ks;
|
||||
sharded<cdc::generation_service>& _cdc_gen_svc;
|
||||
@@ -76,7 +74,6 @@ public:
|
||||
controller(
|
||||
sharded<gms::gossiper>& gossiper,
|
||||
sharded<service::storage_proxy>& proxy,
|
||||
sharded<service::storage_service>& ss,
|
||||
sharded<service::migration_manager>& mm,
|
||||
sharded<db::system_distributed_keyspace>& sys_dist_ks,
|
||||
sharded<cdc::generation_service>& cdc_gen_svc,
|
||||
|
||||
@@ -17,7 +17,6 @@
|
||||
#include "auth/service.hh"
|
||||
#include "db/config.hh"
|
||||
#include "db/view/view_build_status.hh"
|
||||
#include "locator/tablets.hh"
|
||||
#include "mutation/tombstone.hh"
|
||||
#include "locator/abstract_replication_strategy.hh"
|
||||
#include "utils/log.hh"
|
||||
@@ -68,14 +67,6 @@ using namespace std::chrono_literals;
|
||||
|
||||
logging::logger elogger("alternator-executor");
|
||||
|
||||
namespace std {
|
||||
template <> struct hash<std::pair<sstring, sstring>> {
|
||||
size_t operator () (const std::pair<sstring, sstring>& p) const {
|
||||
return std::hash<sstring>()(p.first) * 1009 + std::hash<sstring>()(p.second) * 3;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
namespace alternator {
|
||||
|
||||
// Alternator-specific table properties stored as hidden table tags:
|
||||
@@ -257,66 +248,14 @@ static const rjson::value::Member& get_single_member(const rjson::value& v, cons
|
||||
return *(v.MemberBegin());
|
||||
}
|
||||
|
||||
class executor::describe_table_info_manager : public service::migration_listener::empty_listener {
|
||||
executor &_executor;
|
||||
|
||||
struct table_info {
|
||||
utils::simple_value_with_expiry<std::uint64_t> size_in_bytes;
|
||||
};
|
||||
std::unordered_map<std::pair<sstring, sstring>, table_info> info_for_tables;
|
||||
bool active = false;
|
||||
|
||||
public:
|
||||
describe_table_info_manager(executor& executor) : _executor(executor) {
|
||||
_executor._proxy.data_dictionary().real_database_ptr()->get_notifier().register_listener(this);
|
||||
active = true;
|
||||
}
|
||||
describe_table_info_manager(const describe_table_info_manager &) = delete;
|
||||
describe_table_info_manager(describe_table_info_manager&&) = delete;
|
||||
~describe_table_info_manager() {
|
||||
if (active) {
|
||||
on_fatal_internal_error(elogger, "describe_table_info_manager was not stopped before destruction");
|
||||
}
|
||||
}
|
||||
|
||||
describe_table_info_manager &operator = (const describe_table_info_manager &) = delete;
|
||||
describe_table_info_manager &operator = (describe_table_info_manager&&) = delete;
|
||||
|
||||
static std::chrono::high_resolution_clock::time_point now() {
|
||||
return std::chrono::high_resolution_clock::now();
|
||||
}
|
||||
|
||||
std::optional<std::uint64_t> get_cached_size_in_bytes(const sstring &ks_name, const sstring &cf_name) const {
|
||||
auto it = info_for_tables.find({ks_name, cf_name});
|
||||
if (it != info_for_tables.end()) {
|
||||
return it->second.size_in_bytes.get();
|
||||
}
|
||||
return std::nullopt;
|
||||
}
|
||||
void cache_size_in_bytes(sstring ks_name, sstring cf_name, std::uint64_t size_in_bytes, std::chrono::high_resolution_clock::time_point expiry) {
|
||||
info_for_tables[{std::move(ks_name), std::move(cf_name)}].size_in_bytes.set_if_longer_expiry(size_in_bytes, expiry);
|
||||
}
|
||||
future<> stop() {
|
||||
co_await _executor._proxy.data_dictionary().real_database_ptr()->get_notifier().unregister_listener(this);
|
||||
active = false;
|
||||
co_return;
|
||||
}
|
||||
void on_drop_column_family(const sstring& ks_name, const sstring& cf_name) override {
|
||||
if (!ks_name.starts_with(executor::KEYSPACE_NAME_PREFIX)) return;
|
||||
info_for_tables.erase({ks_name, cf_name});
|
||||
}
|
||||
};
|
||||
|
||||
executor::executor(gms::gossiper& gossiper,
|
||||
service::storage_proxy& proxy,
|
||||
service::storage_service& ss,
|
||||
service::migration_manager& mm,
|
||||
db::system_distributed_keyspace& sdks,
|
||||
cdc::metadata& cdc_metadata,
|
||||
smp_service_group ssg,
|
||||
utils::updateable_value<uint32_t> default_timeout_in_ms)
|
||||
: _gossiper(gossiper),
|
||||
_ss(ss),
|
||||
_proxy(proxy),
|
||||
_mm(mm),
|
||||
_sdks(sdks),
|
||||
@@ -329,7 +268,6 @@ executor::executor(gms::gossiper& gossiper,
|
||||
_stats))
|
||||
{
|
||||
s_default_timeout_in_ms = std::move(default_timeout_in_ms);
|
||||
_describe_table_info_manager = std::make_unique<describe_table_info_manager>(*this);
|
||||
register_metrics(_metrics, _stats);
|
||||
}
|
||||
|
||||
@@ -814,46 +752,12 @@ static future<bool> is_view_built(
|
||||
|
||||
}
|
||||
|
||||
future<> executor::cache_newly_calculated_size_on_all_shards(schema_ptr schema, std::uint64_t size_in_bytes, std::chrono::nanoseconds ttl) {
|
||||
auto expiry = describe_table_info_manager::now() + ttl;
|
||||
return container().invoke_on_all(
|
||||
[schema, size_in_bytes, expiry] (executor& exec) {
|
||||
exec._describe_table_info_manager->cache_size_in_bytes(schema->ks_name(), schema->cf_name(), size_in_bytes, expiry);
|
||||
});
|
||||
}
|
||||
|
||||
future<> executor::fill_table_size(rjson::value &table_description, schema_ptr schema, bool deleting) {
|
||||
auto cached_size = _describe_table_info_manager->get_cached_size_in_bytes(schema->ks_name(), schema->cf_name());
|
||||
std::uint64_t total_size = 0;
|
||||
if (cached_size) {
|
||||
total_size = *cached_size;
|
||||
} else {
|
||||
// there's no point in trying to estimate value of table that is being deleted, as other nodes more often than not might
|
||||
// move forward with deletion faster than we calculate the size
|
||||
if (!deleting) {
|
||||
total_size = co_await _ss.estimate_total_sstable_volume(schema->id(), service::storage_service::ignore_errors::yes);
|
||||
const auto expiry = std::chrono::seconds{ _proxy.data_dictionary().get_config().alternator_describe_table_info_cache_validity_in_seconds() };
|
||||
// Note: we don't care when the notification of other shards will finish, as long as it will be done
|
||||
// A race condition is possible: if a DescribeTable request arrives on a different shard before
|
||||
// that shard receives the cached size, it will recalculate independently. This is acceptable because:
|
||||
// 1. Both calculations will cache their results with an expiry time
|
||||
// 2. Expiry times are unlikely to be identical, so eventually all shards converge to the most recent value
|
||||
// 3. Even if expiry times match, different shards may briefly return different table sizes
|
||||
// 4. This temporary inconsistency is acceptable per DynamoDB specification, which doesn't guarantee
|
||||
// exact precision for DescribeTable size information
|
||||
co_await cache_newly_calculated_size_on_all_shards(schema, total_size, expiry);
|
||||
}
|
||||
}
|
||||
rjson::add(table_description, "TableSizeBytes", total_size);
|
||||
}
|
||||
|
||||
future<rjson::value> executor::fill_table_description(schema_ptr schema, table_status tbl_status, service::client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit)
|
||||
static future<rjson::value> fill_table_description(schema_ptr schema, table_status tbl_status, service::storage_proxy& proxy, service::client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit)
|
||||
{
|
||||
rjson::value table_description = rjson::empty_object();
|
||||
auto tags_ptr = db::get_tags_of_table(schema);
|
||||
|
||||
rjson::add(table_description, "TableName", rjson::from_string(schema->cf_name()));
|
||||
co_await fill_table_size(table_description, schema, tbl_status == table_status::deleting);
|
||||
|
||||
auto creation_timestamp = get_table_creation_time(*schema);
|
||||
|
||||
@@ -897,7 +801,9 @@ future<rjson::value> executor::fill_table_description(schema_ptr schema, table_s
|
||||
rjson::add(table_description["ProvisionedThroughput"], "WriteCapacityUnits", wcu);
|
||||
rjson::add(table_description["ProvisionedThroughput"], "NumberOfDecreasesToday", 0);
|
||||
|
||||
data_dictionary::table t = _proxy.data_dictionary().find_column_family(schema);
|
||||
|
||||
|
||||
data_dictionary::table t = proxy.data_dictionary().find_column_family(schema);
|
||||
|
||||
if (tbl_status != table_status::deleting) {
|
||||
rjson::add(table_description, "CreationDateTime", rjson::value(creation_timestamp));
|
||||
@@ -934,7 +840,7 @@ future<rjson::value> executor::fill_table_description(schema_ptr schema, table_s
|
||||
// (for a built view) or CREATING+Backfilling (if view building
|
||||
// is in progress).
|
||||
if (!is_lsi) {
|
||||
if (co_await is_view_built(vptr, _proxy, client_state, trace_state, permit)) {
|
||||
if (co_await is_view_built(vptr, proxy, client_state, trace_state, permit)) {
|
||||
rjson::add(view_entry, "IndexStatus", "ACTIVE");
|
||||
} else {
|
||||
rjson::add(view_entry, "IndexStatus", "CREATING");
|
||||
@@ -962,8 +868,9 @@ future<rjson::value> executor::fill_table_description(schema_ptr schema, table_s
|
||||
}
|
||||
rjson::add(table_description, "AttributeDefinitions", std::move(attribute_definitions));
|
||||
}
|
||||
executor::supplement_table_stream_info(table_description, *schema, _proxy);
|
||||
executor::supplement_table_stream_info(table_description, *schema, proxy);
|
||||
|
||||
// FIXME: still missing some response fields (issue #5026)
|
||||
co_return table_description;
|
||||
}
|
||||
|
||||
@@ -983,7 +890,7 @@ future<executor::request_return_type> executor::describe_table(client_state& cli
|
||||
get_stats_from_schema(_proxy, *schema)->api_operations.describe_table++;
|
||||
tracing::add_alternator_table_name(trace_state, schema->cf_name());
|
||||
|
||||
rjson::value table_description = co_await fill_table_description(schema, table_status::active, client_state, trace_state, permit);
|
||||
rjson::value table_description = co_await fill_table_description(schema, table_status::active, _proxy, client_state, trace_state, permit);
|
||||
rjson::value response = rjson::empty_object();
|
||||
rjson::add(response, "Table", std::move(table_description));
|
||||
elogger.trace("returning {}", response);
|
||||
@@ -1086,7 +993,7 @@ future<executor::request_return_type> executor::delete_table(client_state& clien
|
||||
auto& p = _proxy.container();
|
||||
|
||||
schema_ptr schema = get_table(_proxy, request);
|
||||
rjson::value table_description = co_await fill_table_description(schema, table_status::deleting, client_state, trace_state, permit);
|
||||
rjson::value table_description = co_await fill_table_description(schema, table_status::deleting, _proxy, client_state, trace_state, permit);
|
||||
co_await verify_permission(_enforce_authorization, _warn_authorization, client_state, schema, auth::permission::DROP, _stats);
|
||||
co_await _mm.container().invoke_on(0, [&, cs = client_state.move_to_other_shard()] (service::migration_manager& mm) -> future<> {
|
||||
size_t retries = mm.get_concurrent_ddl_retries();
|
||||
@@ -1650,7 +1557,8 @@ static future<> mark_view_schemas_as_built(utils::chunked_vector<mutation>& out,
|
||||
}
|
||||
}
|
||||
|
||||
future<executor::request_return_type> executor::create_table_on_shard0(service::client_state&& client_state, tracing::trace_state_ptr trace_state, rjson::value request, bool enforce_authorization, bool warn_authorization, const db::tablets_mode_t::mode tablets_mode) {
|
||||
static future<executor::request_return_type> create_table_on_shard0(service::client_state&& client_state, tracing::trace_state_ptr trace_state, rjson::value request,
|
||||
service::storage_proxy& sp, service::migration_manager& mm, gms::gossiper& gossiper, bool enforce_authorization, bool warn_authorization, stats& stats, const db::tablets_mode_t::mode tablets_mode) {
|
||||
SCYLLA_ASSERT(this_shard_id() == 0);
|
||||
|
||||
// We begin by parsing and validating the content of the CreateTable
|
||||
@@ -1837,7 +1745,7 @@ future<executor::request_return_type> executor::create_table_on_shard0(service::
|
||||
|
||||
rjson::value* stream_specification = rjson::find(request, "StreamSpecification");
|
||||
if (stream_specification && stream_specification->IsObject()) {
|
||||
if (executor::add_stream_options(*stream_specification, builder, _proxy)) {
|
||||
if (executor::add_stream_options(*stream_specification, builder, sp)) {
|
||||
validate_cdc_log_name_length(builder.cf_name());
|
||||
}
|
||||
}
|
||||
@@ -1856,7 +1764,7 @@ future<executor::request_return_type> executor::create_table_on_shard0(service::
|
||||
set_table_creation_time(tags_map, db_clock::now());
|
||||
builder.add_extension(db::tags_extension::NAME, ::make_shared<db::tags_extension>(tags_map));
|
||||
|
||||
co_await verify_create_permission(enforce_authorization, warn_authorization, client_state, _stats);
|
||||
co_await verify_create_permission(enforce_authorization, warn_authorization, client_state, stats);
|
||||
|
||||
schema_ptr schema = builder.build();
|
||||
for (auto& view_builder : view_builders) {
|
||||
@@ -1872,49 +1780,38 @@ future<executor::request_return_type> executor::create_table_on_shard0(service::
|
||||
view_builder.with_view_info(schema, include_all_columns, ""/*where clause*/);
|
||||
}
|
||||
|
||||
size_t retries = _mm.get_concurrent_ddl_retries();
|
||||
size_t retries = mm.get_concurrent_ddl_retries();
|
||||
for (;;) {
|
||||
auto group0_guard = co_await _mm.start_group0_operation();
|
||||
auto group0_guard = co_await mm.start_group0_operation();
|
||||
auto ts = group0_guard.write_timestamp();
|
||||
utils::chunked_vector<mutation> schema_mutations;
|
||||
auto ksm = create_keyspace_metadata(keyspace_name, _proxy, _gossiper, ts, tags_map, _proxy.features(), tablets_mode);
|
||||
locator::replication_strategy_params params(ksm->strategy_options(), ksm->initial_tablets(), ksm->consistency_option());
|
||||
const auto& topo = _proxy.local_db().get_token_metadata().get_topology();
|
||||
auto rs = locator::abstract_replication_strategy::create_replication_strategy(ksm->strategy_name(), params, topo);
|
||||
auto ksm = create_keyspace_metadata(keyspace_name, sp, gossiper, ts, tags_map, sp.features(), tablets_mode);
|
||||
// Alternator Streams doesn't yet work when the table uses tablets (#23838)
|
||||
if (stream_specification && stream_specification->IsObject()) {
|
||||
auto stream_enabled = rjson::find(*stream_specification, "StreamEnabled");
|
||||
if (stream_enabled && stream_enabled->IsBool() && stream_enabled->GetBool()) {
|
||||
locator::replication_strategy_params params(ksm->strategy_options(), ksm->initial_tablets(), ksm->consistency_option());
|
||||
const auto& topo = sp.local_db().get_token_metadata().get_topology();
|
||||
auto rs = locator::abstract_replication_strategy::create_replication_strategy(ksm->strategy_name(), params, topo);
|
||||
if (rs->uses_tablets()) {
|
||||
co_return api_error::validation("Streams not yet supported on a table using tablets (issue #23838). "
|
||||
"If you want to use streams, create a table with vnodes by setting the tag 'system:initial_tablets' set to 'none'.");
|
||||
}
|
||||
}
|
||||
}
|
||||
// Creating an index in tablets mode requires the keyspace to be RF-rack-valid.
|
||||
// GSI and LSI indexes are based on materialized views which require RF-rack-validity to avoid consistency issues.
|
||||
if (!view_builders.empty() || _proxy.data_dictionary().get_config().rf_rack_valid_keyspaces()) {
|
||||
try {
|
||||
locator::assert_rf_rack_valid_keyspace(keyspace_name, _proxy.local_db().get_token_metadata_ptr(), *rs);
|
||||
} catch (const std::invalid_argument& ex) {
|
||||
if (!view_builders.empty()) {
|
||||
co_return api_error::validation(fmt::format("GlobalSecondaryIndexes and LocalSecondaryIndexes on a table "
|
||||
"using tablets require the number of racks in the cluster to be either 1 or 3"));
|
||||
} else {
|
||||
co_return api_error::validation(fmt::format("Cannot create table '{}' with tablets: the configuration "
|
||||
"option 'rf_rack_valid_keyspaces' is enabled, which enforces that tables using tablets can only be created in clusters "
|
||||
"that have either 1 or 3 racks", table_name));
|
||||
}
|
||||
}
|
||||
// Creating an index in tablets mode requires the rf_rack_valid_keyspaces option to be enabled.
|
||||
// GSI and LSI indexes are based on materialized views which require this option to avoid consistency issues.
|
||||
if (!view_builders.empty() && ksm->uses_tablets() && !sp.data_dictionary().get_config().rf_rack_valid_keyspaces()) {
|
||||
co_return api_error::validation("GlobalSecondaryIndexes and LocalSecondaryIndexes with tablets require the rf_rack_valid_keyspaces option to be enabled.");
|
||||
}
|
||||
try {
|
||||
schema_mutations = service::prepare_new_keyspace_announcement(_proxy.local_db(), ksm, ts);
|
||||
schema_mutations = service::prepare_new_keyspace_announcement(sp.local_db(), ksm, ts);
|
||||
} catch (exceptions::already_exists_exception&) {
|
||||
if (_proxy.data_dictionary().has_schema(keyspace_name, table_name)) {
|
||||
if (sp.data_dictionary().has_schema(keyspace_name, table_name)) {
|
||||
co_return api_error::resource_in_use(fmt::format("Table {} already exists", table_name));
|
||||
}
|
||||
}
|
||||
if (_proxy.data_dictionary().try_find_table(schema->id())) {
|
||||
if (sp.data_dictionary().try_find_table(schema->id())) {
|
||||
// This should never happen, the ID is supposed to be unique
|
||||
co_return api_error::internal(format("Table with ID {} already exists", schema->id()));
|
||||
}
|
||||
@@ -1923,9 +1820,9 @@ future<executor::request_return_type> executor::create_table_on_shard0(service::
|
||||
for (schema_builder& view_builder : view_builders) {
|
||||
schemas.push_back(view_builder.build());
|
||||
}
|
||||
co_await service::prepare_new_column_families_announcement(schema_mutations, _proxy, *ksm, schemas, ts);
|
||||
co_await service::prepare_new_column_families_announcement(schema_mutations, sp, *ksm, schemas, ts);
|
||||
if (ksm->uses_tablets()) {
|
||||
co_await mark_view_schemas_as_built(schema_mutations, schemas, ts, _proxy);
|
||||
co_await mark_view_schemas_as_built(schema_mutations, schemas, ts, sp);
|
||||
}
|
||||
|
||||
// If a role is allowed to create a table, we must give it permissions to
|
||||
@@ -1950,7 +1847,7 @@ future<executor::request_return_type> executor::create_table_on_shard0(service::
|
||||
}
|
||||
std::tie(schema_mutations, group0_guard) = co_await std::move(mc).extract();
|
||||
try {
|
||||
co_await _mm.announce(std::move(schema_mutations), std::move(group0_guard), fmt::format("alternator-executor: create {} table", table_name));
|
||||
co_await mm.announce(std::move(schema_mutations), std::move(group0_guard), fmt::format("alternator-executor: create {} table", table_name));
|
||||
break;
|
||||
} catch (const service::group0_concurrent_modification& ex) {
|
||||
elogger.info("Failed to execute CreateTable {} due to concurrent schema modifications. {}.",
|
||||
@@ -1962,9 +1859,9 @@ future<executor::request_return_type> executor::create_table_on_shard0(service::
|
||||
}
|
||||
}
|
||||
|
||||
co_await _mm.wait_for_schema_agreement(_proxy.local_db(), db::timeout_clock::now() + 10s, nullptr);
|
||||
co_await mm.wait_for_schema_agreement(sp.local_db(), db::timeout_clock::now() + 10s, nullptr);
|
||||
rjson::value status = rjson::empty_object();
|
||||
executor::supplement_table_info(request, *schema, _proxy);
|
||||
executor::supplement_table_info(request, *schema, sp);
|
||||
rjson::add(status, "TableDescription", std::move(request));
|
||||
co_return rjson::print(std::move(status));
|
||||
}
|
||||
@@ -1973,11 +1870,10 @@ future<executor::request_return_type> executor::create_table(client_state& clien
|
||||
_stats.api_operations.create_table++;
|
||||
elogger.trace("Creating table {}", request);
|
||||
|
||||
co_return co_await _mm.container().invoke_on(0, [&, tr = tracing::global_trace_state_ptr(trace_state), request = std::move(request), &e = this->container(), client_state_other_shard = client_state.move_to_other_shard(), enforce_authorization = bool(_enforce_authorization), warn_authorization = bool(_warn_authorization)]
|
||||
co_return co_await _mm.container().invoke_on(0, [&, tr = tracing::global_trace_state_ptr(trace_state), request = std::move(request), &sp = _proxy.container(), &g = _gossiper.container(), &e = this->container(), client_state_other_shard = client_state.move_to_other_shard(), enforce_authorization = bool(_enforce_authorization), warn_authorization = bool(_warn_authorization)]
|
||||
(service::migration_manager& mm) mutable -> future<executor::request_return_type> {
|
||||
const db::tablets_mode_t::mode tablets_mode = _proxy.data_dictionary().get_config().tablets_mode_for_new_keyspaces(); // type cast
|
||||
// `invoke_on` hopped us to shard 0, but `this` points to `executor` is from 'old' shard, we need to hop it too.
|
||||
co_return co_await e.local().create_table_on_shard0(client_state_other_shard.get(), tr, std::move(request), enforce_authorization, warn_authorization, std::move(tablets_mode));
|
||||
co_return co_await create_table_on_shard0(client_state_other_shard.get(), tr, std::move(request), sp.local(), mm, g.local(), enforce_authorization, warn_authorization, e.local()._stats, std::move(tablets_mode));
|
||||
});
|
||||
}
|
||||
|
||||
@@ -2128,12 +2024,9 @@ future<executor::request_return_type> executor::update_table(client_state& clien
|
||||
co_return api_error::validation(fmt::format(
|
||||
"LSI {} already exists in table {}, can't use same name for GSI", index_name, table_name));
|
||||
}
|
||||
try {
|
||||
locator::assert_rf_rack_valid_keyspace(keyspace_name, p.local().local_db().get_token_metadata_ptr(),
|
||||
p.local().local_db().find_keyspace(keyspace_name).get_replication_strategy());
|
||||
} catch (const std::invalid_argument& ex) {
|
||||
co_return api_error::validation(fmt::format("GlobalSecondaryIndexes on a table "
|
||||
"using tablets require the number of racks in the cluster to be either 1 or 3"));
|
||||
if (p.local().local_db().find_keyspace(keyspace_name).get_replication_strategy().uses_tablets() &&
|
||||
!p.local().data_dictionary().get_config().rf_rack_valid_keyspaces()) {
|
||||
co_return api_error::validation("GlobalSecondaryIndexes with tablets require the rf_rack_valid_keyspaces option to be enabled.");
|
||||
}
|
||||
|
||||
elogger.trace("Adding GSI {}", index_name);
|
||||
@@ -6003,11 +5896,6 @@ future<executor::request_return_type> executor::list_tables(client_state& client
|
||||
_stats.api_operations.list_tables++;
|
||||
elogger.trace("Listing tables {}", request);
|
||||
|
||||
co_await utils::get_local_injector().inject("alternator_list_tables", [] (auto& handler) -> future<> {
|
||||
handler.set("waiting", true);
|
||||
co_await handler.wait_for_message(std::chrono::steady_clock::now() + std::chrono::minutes{5});
|
||||
});
|
||||
|
||||
rjson::value* exclusive_start_json = rjson::find(request, "ExclusiveStartTableName");
|
||||
rjson::value* limit_json = rjson::find(request, "Limit");
|
||||
std::string exclusive_start = exclusive_start_json ? rjson::to_string(*exclusive_start_json) : "";
|
||||
@@ -6199,10 +6087,9 @@ future<> executor::start() {
|
||||
}
|
||||
|
||||
future<> executor::stop() {
|
||||
co_await _describe_table_info_manager->stop();
|
||||
// disconnect from the value source, but keep the value unchanged.
|
||||
s_default_timeout_in_ms = utils::updateable_value<uint32_t>{s_default_timeout_in_ms()};
|
||||
co_await _parsed_expression_cache->stop();
|
||||
return _parsed_expression_cache->stop();
|
||||
}
|
||||
|
||||
} // namespace alternator
|
||||
|
||||
@@ -17,13 +17,11 @@
|
||||
#include "service/client_state.hh"
|
||||
#include "service_permit.hh"
|
||||
#include "db/timeout_clock.hh"
|
||||
#include "db/config.hh"
|
||||
|
||||
#include "alternator/error.hh"
|
||||
#include "stats.hh"
|
||||
#include "utils/rjson.hh"
|
||||
#include "utils/updateable_value.hh"
|
||||
#include "utils/simple_value_with_expiry.hh"
|
||||
|
||||
#include "tracing/trace_state.hh"
|
||||
|
||||
@@ -43,7 +41,6 @@ namespace cql3::selection {
|
||||
namespace service {
|
||||
class storage_proxy;
|
||||
class cas_shard;
|
||||
class storage_service;
|
||||
}
|
||||
|
||||
namespace cdc {
|
||||
@@ -60,7 +57,6 @@ class schema_builder;
|
||||
|
||||
namespace alternator {
|
||||
|
||||
enum class table_status;
|
||||
class rmw_operation;
|
||||
class put_or_delete_item;
|
||||
|
||||
@@ -140,7 +136,6 @@ class expression_cache;
|
||||
|
||||
class executor : public peering_sharded_service<executor> {
|
||||
gms::gossiper& _gossiper;
|
||||
service::storage_service& _ss;
|
||||
service::storage_proxy& _proxy;
|
||||
service::migration_manager& _mm;
|
||||
db::system_distributed_keyspace& _sdks;
|
||||
@@ -153,11 +148,6 @@ class executor : public peering_sharded_service<executor> {
|
||||
|
||||
std::unique_ptr<parsed::expression_cache> _parsed_expression_cache;
|
||||
|
||||
struct describe_table_info_manager;
|
||||
std::unique_ptr<describe_table_info_manager> _describe_table_info_manager;
|
||||
|
||||
future<> cache_newly_calculated_size_on_all_shards(schema_ptr schema, std::uint64_t size_in_bytes, std::chrono::nanoseconds ttl);
|
||||
future<> fill_table_size(rjson::value &table_description, schema_ptr schema, bool deleting);
|
||||
public:
|
||||
using client_state = service::client_state;
|
||||
// request_return_type is the return type of the executor methods, which
|
||||
@@ -183,7 +173,6 @@ public:
|
||||
|
||||
executor(gms::gossiper& gossiper,
|
||||
service::storage_proxy& proxy,
|
||||
service::storage_service& ss,
|
||||
service::migration_manager& mm,
|
||||
db::system_distributed_keyspace& sdks,
|
||||
cdc::metadata& cdc_metadata,
|
||||
@@ -231,8 +220,6 @@ private:
|
||||
friend class rmw_operation;
|
||||
|
||||
static void describe_key_schema(rjson::value& parent, const schema&, std::unordered_map<std::string,std::string> * = nullptr, const std::map<sstring, sstring> *tags = nullptr);
|
||||
future<rjson::value> fill_table_description(schema_ptr schema, table_status tbl_status, service::client_state& client_state, tracing::trace_state_ptr trace_state, service_permit permit);
|
||||
future<executor::request_return_type> create_table_on_shard0(service::client_state&& client_state, tracing::trace_state_ptr trace_state, rjson::value request, bool enforce_authorization, bool warn_authorization, const db::tablets_mode_t::mode tablets_mode);
|
||||
|
||||
future<> do_batch_write(
|
||||
std::vector<std::pair<schema_ptr, put_or_delete_item>> mutation_builders,
|
||||
|
||||
@@ -1,301 +0,0 @@
|
||||
/*
|
||||
* Copyright 2025-present ScyllaDB
|
||||
*/
|
||||
|
||||
/*
|
||||
* SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
|
||||
*/
|
||||
|
||||
#include "alternator/http_compression.hh"
|
||||
#include "alternator/server.hh"
|
||||
#include <seastar/coroutine/maybe_yield.hh>
|
||||
#include <zlib.h>
|
||||
|
||||
static logging::logger slogger("alternator-http-compression");
|
||||
|
||||
namespace alternator {
|
||||
|
||||
|
||||
static constexpr size_t compressed_buffer_size = 1024;
|
||||
class zlib_compressor {
|
||||
z_stream _zs;
|
||||
temporary_buffer<char> _output_buf;
|
||||
noncopyable_function<future<>(temporary_buffer<char>&&)> _write_func;
|
||||
public:
|
||||
zlib_compressor(bool gzip, int compression_level, noncopyable_function<future<>(temporary_buffer<char>&&)> write_func)
|
||||
: _write_func(std::move(write_func)) {
|
||||
memset(&_zs, 0, sizeof(_zs));
|
||||
if (deflateInit2(&_zs, std::clamp(compression_level, Z_NO_COMPRESSION, Z_BEST_COMPRESSION), Z_DEFLATED,
|
||||
(gzip ? 16 : 0) + MAX_WBITS, 8, Z_DEFAULT_STRATEGY) != Z_OK) {
|
||||
// Should only happen if memory allocation fails
|
||||
throw std::bad_alloc();
|
||||
}
|
||||
}
|
||||
~zlib_compressor() {
|
||||
deflateEnd(&_zs);
|
||||
}
|
||||
future<> close() {
|
||||
return compress(nullptr, 0, true);
|
||||
}
|
||||
|
||||
future<> compress(const char* buf, size_t len, bool is_last_chunk = false) {
|
||||
_zs.next_in = reinterpret_cast<unsigned char*>(const_cast<char*>(buf));
|
||||
_zs.avail_in = (uInt) len;
|
||||
int mode = is_last_chunk ? Z_FINISH : Z_NO_FLUSH;
|
||||
while(_zs.avail_in > 0 || is_last_chunk) {
|
||||
co_await coroutine::maybe_yield();
|
||||
if (_output_buf.empty()) {
|
||||
if (is_last_chunk) {
|
||||
uint32_t max_buffer_size = 0;
|
||||
deflatePending(&_zs, &max_buffer_size, nullptr);
|
||||
max_buffer_size += deflateBound(&_zs, _zs.avail_in) + 1;
|
||||
_output_buf = temporary_buffer<char>(std::min(compressed_buffer_size, (size_t) max_buffer_size));
|
||||
} else {
|
||||
_output_buf = temporary_buffer<char>(compressed_buffer_size);
|
||||
}
|
||||
_zs.next_out = reinterpret_cast<unsigned char*>(_output_buf.get_write());
|
||||
_zs.avail_out = compressed_buffer_size;
|
||||
}
|
||||
int e = deflate(&_zs, mode);
|
||||
if (e < Z_OK) {
|
||||
throw api_error::internal("Error during compression of response body");
|
||||
}
|
||||
if (e == Z_STREAM_END || _zs.avail_out < compressed_buffer_size / 4) {
|
||||
_output_buf.trim(compressed_buffer_size - _zs.avail_out);
|
||||
co_await _write_func(std::move(_output_buf));
|
||||
if (e == Z_STREAM_END) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// Helper string_view functions for parsing Accept-Encoding header
|
||||
struct case_insensitive_cmp_sv {
|
||||
bool operator()(std::string_view s1, std::string_view s2) const {
|
||||
return std::equal(s1.begin(), s1.end(), s2.begin(), s2.end(),
|
||||
[](char a, char b) { return ::tolower(a) == ::tolower(b); });
|
||||
}
|
||||
};
|
||||
static inline std::string_view trim_left(std::string_view sv) {
|
||||
while (!sv.empty() && std::isspace(static_cast<unsigned char>(sv.front())))
|
||||
sv.remove_prefix(1);
|
||||
return sv;
|
||||
}
|
||||
static inline std::string_view trim_right(std::string_view sv) {
|
||||
while (!sv.empty() && std::isspace(static_cast<unsigned char>(sv.back())))
|
||||
sv.remove_suffix(1);
|
||||
return sv;
|
||||
}
|
||||
static inline std::string_view trim(std::string_view sv) {
|
||||
return trim_left(trim_right(sv));
|
||||
}
|
||||
|
||||
inline std::vector<std::string_view> split(std::string_view text, char separator) {
|
||||
std::vector<std::string_view> tokens;
|
||||
if (text == "") {
|
||||
return tokens;
|
||||
}
|
||||
|
||||
while (true) {
|
||||
auto pos = text.find_first_of(separator);
|
||||
if (pos != std::string_view::npos) {
|
||||
tokens.emplace_back(text.data(), pos);
|
||||
text.remove_prefix(pos + 1);
|
||||
} else {
|
||||
tokens.emplace_back(text);
|
||||
break;
|
||||
}
|
||||
}
|
||||
return tokens;
|
||||
}
|
||||
|
||||
constexpr response_compressor::compression_type response_compressor::get_compression_type(std::string_view encoding) {
|
||||
for (size_t i = 0; i < static_cast<size_t>(compression_type::count); ++i) {
|
||||
if (case_insensitive_cmp_sv{}(encoding, compression_names[i])) {
|
||||
return static_cast<compression_type>(i);
|
||||
}
|
||||
}
|
||||
return compression_type::unknown;
|
||||
}
|
||||
|
||||
response_compressor::compression_type response_compressor::find_compression(std::string_view accept_encoding, size_t response_size) {
|
||||
std::optional<float> ct_q[static_cast<size_t>(compression_type::count)];
|
||||
ct_q[static_cast<size_t>(compression_type::none)] = std::numeric_limits<float>::min(); // enabled, but lowest priority
|
||||
compression_type selected_ct = compression_type::none;
|
||||
|
||||
std::vector<std::string_view> entries = split(accept_encoding, ',');
|
||||
for (auto& e : entries) {
|
||||
std::vector<std::string_view> params = split(e, ';');
|
||||
if (params.size() == 0) {
|
||||
continue;
|
||||
}
|
||||
compression_type ct = get_compression_type(trim(params[0]));
|
||||
if (ct == compression_type::unknown) {
|
||||
continue; // ignore unknown encoding types
|
||||
}
|
||||
if (ct_q[static_cast<size_t>(ct)].has_value() && ct_q[static_cast<size_t>(ct)] != 0.0f) {
|
||||
continue; // already processed this encoding
|
||||
}
|
||||
if (response_size < _threshold[static_cast<size_t>(ct)]) {
|
||||
continue; // below threshold treat as unknown
|
||||
}
|
||||
for (size_t i = 1; i < params.size(); ++i) { // find "q=" parameter
|
||||
auto pos = params[i].find("q=");
|
||||
if (pos == std::string_view::npos) {
|
||||
continue;
|
||||
}
|
||||
std::string_view param = params[i].substr(pos + 2);
|
||||
param = trim(param);
|
||||
// parse quality value
|
||||
float q_value = 1.0f;
|
||||
auto [ptr, ec] = std::from_chars(param.data(), param.data() + param.size(), q_value);
|
||||
if (ec != std::errc() || ptr != param.data() + param.size()) {
|
||||
continue;
|
||||
}
|
||||
if (q_value < 0.0) {
|
||||
q_value = 0.0;
|
||||
} else if (q_value > 1.0) {
|
||||
q_value = 1.0;
|
||||
}
|
||||
ct_q[static_cast<size_t>(ct)] = q_value;
|
||||
break; // we parsed quality value
|
||||
}
|
||||
if (!ct_q[static_cast<size_t>(ct)].has_value()) {
|
||||
ct_q[static_cast<size_t>(ct)] = 1.0f; // default quality value
|
||||
}
|
||||
// keep the highest encoding (in the order, unless 'any')
|
||||
if (selected_ct == compression_type::any) {
|
||||
if (ct_q[static_cast<size_t>(ct)] >= ct_q[static_cast<size_t>(selected_ct)]) {
|
||||
selected_ct = ct;
|
||||
}
|
||||
} else {
|
||||
if (ct_q[static_cast<size_t>(ct)] > ct_q[static_cast<size_t>(selected_ct)]) {
|
||||
selected_ct = ct;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (selected_ct == compression_type::any) {
|
||||
// select any not mentioned or highest quality
|
||||
selected_ct = compression_type::none;
|
||||
for (size_t i = 0; i < static_cast<size_t>(compression_type::compressions_count); ++i) {
|
||||
if (!ct_q[i].has_value()) {
|
||||
return static_cast<compression_type>(i);
|
||||
}
|
||||
if (ct_q[i] > ct_q[static_cast<size_t>(selected_ct)]) {
|
||||
selected_ct = static_cast<compression_type>(i);
|
||||
}
|
||||
}
|
||||
}
|
||||
return selected_ct;
|
||||
}
|
||||
|
||||
static future<chunked_content> compress(response_compressor::compression_type ct, const db::config& cfg, std::string str) {
|
||||
chunked_content compressed;
|
||||
auto write = [&compressed](temporary_buffer<char>&& buf) -> future<> {
|
||||
compressed.push_back(std::move(buf));
|
||||
return make_ready_future<>();
|
||||
};
|
||||
zlib_compressor compressor(ct != response_compressor::compression_type::deflate,
|
||||
cfg.alternator_response_gzip_compression_level(), std::move(write));
|
||||
co_await compressor.compress(str.data(), str.size(), true);
|
||||
co_return compressed;
|
||||
}
|
||||
|
||||
static sstring flatten(chunked_content&& cc) {
|
||||
size_t total_size = 0;
|
||||
for (const auto& chunk : cc) {
|
||||
total_size += chunk.size();
|
||||
}
|
||||
sstring result = sstring{ sstring::initialized_later{}, total_size };
|
||||
size_t offset = 0;
|
||||
for (const auto& chunk : cc) {
|
||||
std::copy(chunk.begin(), chunk.end(), result.begin() + offset);
|
||||
offset += chunk.size();
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
future<std::unique_ptr<http::reply>> response_compressor::generate_reply(std::unique_ptr<http::reply> rep, sstring accept_encoding, const char* content_type, std::string&& response_body) {
|
||||
response_compressor::compression_type ct = find_compression(accept_encoding, response_body.size());
|
||||
if (ct != response_compressor::compression_type::none) {
|
||||
rep->add_header("Content-Encoding", get_encoding_name(ct));
|
||||
rep->set_content_type(content_type);
|
||||
return compress(ct, cfg, std::move(response_body)).then([rep = std::move(rep)] (chunked_content compressed) mutable {
|
||||
rep->_content = flatten(std::move(compressed));
|
||||
return make_ready_future<std::unique_ptr<http::reply>>(std::move(rep));
|
||||
});
|
||||
} else {
|
||||
// Note that despite the move, there is a copy here -
|
||||
// as str is std::string and rep->_content is sstring.
|
||||
rep->_content = std::move(response_body);
|
||||
rep->set_content_type(content_type);
|
||||
}
|
||||
return make_ready_future<std::unique_ptr<http::reply>>(std::move(rep));
|
||||
}
|
||||
|
||||
template<typename Compressor>
|
||||
class compressed_data_sink_impl : public data_sink_impl {
|
||||
output_stream<char> _out;
|
||||
Compressor _compressor;
|
||||
public:
|
||||
template<typename... Args>
|
||||
compressed_data_sink_impl(output_stream<char>&& out, Args&&... args)
|
||||
: _out(std::move(out)), _compressor(std::forward<Args>(args)..., [this](temporary_buffer<char>&& buf) {
|
||||
return _out.write(std::move(buf));
|
||||
}) { }
|
||||
|
||||
future<> put(std::span<temporary_buffer<char>> data) override {
|
||||
return data_sink_impl::fallback_put(data, [this] (temporary_buffer<char>&& buf) {
|
||||
return do_put(std::move(buf));
|
||||
});
|
||||
}
|
||||
|
||||
private:
|
||||
future<> do_put(temporary_buffer<char> buf) {
|
||||
co_return co_await _compressor.compress(buf.get(), buf.size());
|
||||
|
||||
}
|
||||
future<> close() override {
|
||||
return _compressor.close().then([this] {
|
||||
return _out.close();
|
||||
});
|
||||
}
|
||||
};
|
||||
|
||||
executor::body_writer compress(response_compressor::compression_type ct, const db::config& cfg, executor::body_writer&& bw) {
|
||||
return [bw = std::move(bw), ct, level = cfg.alternator_response_gzip_compression_level()](output_stream<char>&& out) mutable -> future<> {
|
||||
output_stream_options opts;
|
||||
opts.trim_to_size = true;
|
||||
std::unique_ptr<data_sink_impl> data_sink_impl;
|
||||
switch (ct) {
|
||||
case response_compressor::compression_type::gzip:
|
||||
data_sink_impl = std::make_unique<compressed_data_sink_impl<zlib_compressor>>(std::move(out), true, level);
|
||||
break;
|
||||
case response_compressor::compression_type::deflate:
|
||||
data_sink_impl = std::make_unique<compressed_data_sink_impl<zlib_compressor>>(std::move(out), false, level);
|
||||
break;
|
||||
case response_compressor::compression_type::none:
|
||||
case response_compressor::compression_type::any:
|
||||
case response_compressor::compression_type::unknown:
|
||||
on_internal_error(slogger,"Compression not selected");
|
||||
default:
|
||||
on_internal_error(slogger, "Unsupported compression type for data sink");
|
||||
}
|
||||
return bw(output_stream<char>(data_sink(std::move(data_sink_impl)), compressed_buffer_size, opts));
|
||||
};
|
||||
}
|
||||
|
||||
future<std::unique_ptr<http::reply>> response_compressor::generate_reply(std::unique_ptr<http::reply> rep, sstring accept_encoding, const char* content_type, executor::body_writer&& body_writer) {
|
||||
response_compressor::compression_type ct = find_compression(accept_encoding, std::numeric_limits<size_t>::max());
|
||||
if (ct != response_compressor::compression_type::none) {
|
||||
rep->add_header("Content-Encoding", get_encoding_name(ct));
|
||||
rep->write_body(content_type, compress(ct, cfg, std::move(body_writer)));
|
||||
} else {
|
||||
rep->write_body(content_type, std::move(body_writer));
|
||||
}
|
||||
return make_ready_future<std::unique_ptr<http::reply>>(std::move(rep));
|
||||
}
|
||||
|
||||
} // namespace alternator
|
||||
@@ -1,91 +0,0 @@
|
||||
/*
|
||||
* Copyright 2025-present ScyllaDB
|
||||
*/
|
||||
|
||||
/*
|
||||
* SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "alternator/executor.hh"
|
||||
#include <seastar/http/httpd.hh>
|
||||
#include "db/config.hh"
|
||||
|
||||
namespace alternator {
|
||||
|
||||
class response_compressor {
|
||||
public:
|
||||
enum class compression_type {
|
||||
gzip,
|
||||
deflate,
|
||||
compressions_count,
|
||||
any = compressions_count,
|
||||
none,
|
||||
count,
|
||||
unknown = count
|
||||
};
|
||||
static constexpr std::string_view compression_names[] = {
|
||||
"gzip",
|
||||
"deflate",
|
||||
"*",
|
||||
"identity"
|
||||
};
|
||||
|
||||
static sstring get_encoding_name(compression_type ct) {
|
||||
return sstring(compression_names[static_cast<size_t>(ct)]);
|
||||
}
|
||||
static constexpr compression_type get_compression_type(std::string_view encoding);
|
||||
|
||||
sstring get_accepted_encoding(const http::request& req) {
|
||||
if (get_threshold() == 0) {
|
||||
return "";
|
||||
}
|
||||
return req.get_header("Accept-Encoding");
|
||||
}
|
||||
compression_type find_compression(std::string_view accept_encoding, size_t response_size);
|
||||
|
||||
response_compressor(const db::config& cfg)
|
||||
: cfg(cfg)
|
||||
,_gzip_level_observer(
|
||||
cfg.alternator_response_gzip_compression_level.observe([this](int v) {
|
||||
update_threshold();
|
||||
}))
|
||||
,_gzip_threshold_observer(
|
||||
cfg.alternator_response_compression_threshold_in_bytes.observe([this](uint32_t v) {
|
||||
update_threshold();
|
||||
}))
|
||||
{
|
||||
update_threshold();
|
||||
}
|
||||
response_compressor(const response_compressor& rhs) : response_compressor(rhs.cfg) {}
|
||||
|
||||
private:
|
||||
const db::config& cfg;
|
||||
utils::observable<int>::observer _gzip_level_observer;
|
||||
utils::observable<uint32_t>::observer _gzip_threshold_observer;
|
||||
uint32_t _threshold[static_cast<size_t>(compression_type::count)];
|
||||
|
||||
size_t get_threshold() { return _threshold[static_cast<size_t>(compression_type::any)]; }
|
||||
void update_threshold() {
|
||||
_threshold[static_cast<size_t>(compression_type::none)] = std::numeric_limits<uint32_t>::max();
|
||||
_threshold[static_cast<size_t>(compression_type::any)] = std::numeric_limits<uint32_t>::max();
|
||||
uint32_t gzip = cfg.alternator_response_gzip_compression_level() <= 0 ? std::numeric_limits<uint32_t>::max()
|
||||
: cfg.alternator_response_compression_threshold_in_bytes();
|
||||
_threshold[static_cast<size_t>(compression_type::gzip)] = gzip;
|
||||
_threshold[static_cast<size_t>(compression_type::deflate)] = gzip;
|
||||
for (size_t i = 0; i < static_cast<size_t>(compression_type::compressions_count); ++i) {
|
||||
if (_threshold[i] < _threshold[static_cast<size_t>(compression_type::any)]) {
|
||||
_threshold[static_cast<size_t>(compression_type::any)] = _threshold[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public:
|
||||
future<std::unique_ptr<http::reply>> generate_reply(std::unique_ptr<http::reply> rep,
|
||||
sstring accept_encoding, const char* content_type, std::string&& response_body);
|
||||
future<std::unique_ptr<http::reply>> generate_reply(std::unique_ptr<http::reply> rep,
|
||||
sstring accept_encoding, const char* content_type, executor::body_writer&& body_writer);
|
||||
};
|
||||
|
||||
}
|
||||
@@ -34,7 +34,6 @@
|
||||
#include "client_data.hh"
|
||||
#include "utils/updateable_value.hh"
|
||||
#include <zlib.h>
|
||||
#include "alternator/http_compression.hh"
|
||||
|
||||
static logging::logger slogger("alternator-server");
|
||||
|
||||
@@ -112,12 +111,9 @@ class api_handler : public handler_base {
|
||||
// type applies to all replies, both success and error.
|
||||
static constexpr const char* REPLY_CONTENT_TYPE = "application/x-amz-json-1.0";
|
||||
public:
|
||||
api_handler(const std::function<future<executor::request_return_type>(std::unique_ptr<request> req)>& _handle,
|
||||
const db::config& config) : _response_compressor(config), _f_handle(
|
||||
api_handler(const std::function<future<executor::request_return_type>(std::unique_ptr<request> req)>& _handle) : _f_handle(
|
||||
[this, _handle](std::unique_ptr<request> req, std::unique_ptr<reply> rep) {
|
||||
sstring accept_encoding = _response_compressor.get_accepted_encoding(*req);
|
||||
return seastar::futurize_invoke(_handle, std::move(req)).then_wrapped(
|
||||
[this, rep = std::move(rep), accept_encoding=std::move(accept_encoding)](future<executor::request_return_type> resf) mutable {
|
||||
return seastar::futurize_invoke(_handle, std::move(req)).then_wrapped([this, rep = std::move(rep)](future<executor::request_return_type> resf) mutable {
|
||||
if (resf.failed()) {
|
||||
// Exceptions of type api_error are wrapped as JSON and
|
||||
// returned to the client as expected. Other types of
|
||||
@@ -137,20 +133,22 @@ public:
|
||||
return make_ready_future<std::unique_ptr<reply>>(std::move(rep));
|
||||
}
|
||||
auto res = resf.get();
|
||||
return std::visit(overloaded_functor {
|
||||
std::visit(overloaded_functor {
|
||||
[&] (std::string&& str) {
|
||||
return _response_compressor.generate_reply(std::move(rep), std::move(accept_encoding),
|
||||
REPLY_CONTENT_TYPE, std::move(str));
|
||||
// Note that despite the move, there is a copy here -
|
||||
// as str is std::string and rep->_content is sstring.
|
||||
rep->_content = std::move(str);
|
||||
rep->set_content_type(REPLY_CONTENT_TYPE);
|
||||
},
|
||||
[&] (executor::body_writer&& body_writer) {
|
||||
return _response_compressor.generate_reply(std::move(rep), std::move(accept_encoding),
|
||||
REPLY_CONTENT_TYPE, std::move(body_writer));
|
||||
rep->write_body(REPLY_CONTENT_TYPE, std::move(body_writer));
|
||||
},
|
||||
[&] (const api_error& err) {
|
||||
generate_error_reply(*rep, err);
|
||||
return make_ready_future<std::unique_ptr<reply>>(std::move(rep));
|
||||
}
|
||||
}, std::move(res));
|
||||
|
||||
return make_ready_future<std::unique_ptr<reply>>(std::move(rep));
|
||||
});
|
||||
}) { }
|
||||
|
||||
@@ -179,7 +177,6 @@ protected:
|
||||
slogger.trace("api_handler error case: {}", rep._content);
|
||||
}
|
||||
|
||||
response_compressor _response_compressor;
|
||||
future_handler_function _f_handle;
|
||||
};
|
||||
|
||||
@@ -374,40 +371,13 @@ future<std::string> server::verify_signature(const request& req, const chunked_c
|
||||
for (const auto& header : signed_headers) {
|
||||
signed_headers_map.emplace(header, std::string_view());
|
||||
}
|
||||
std::vector<std::string> modified_values;
|
||||
for (auto& header : req._headers) {
|
||||
std::string header_str;
|
||||
header_str.resize(header.first.size());
|
||||
std::transform(header.first.begin(), header.first.end(), header_str.begin(), ::tolower);
|
||||
auto it = signed_headers_map.find(header_str);
|
||||
if (it != signed_headers_map.end()) {
|
||||
// replace multiple spaces in the header value header.second with
|
||||
// a single space, as required by AWS SigV4 header canonization.
|
||||
// If we modify the value, we need to save it in modified_values
|
||||
// to keep it alive.
|
||||
std::string value;
|
||||
value.reserve(header.second.size());
|
||||
bool prev_space = false;
|
||||
bool modified = false;
|
||||
for (char ch : header.second) {
|
||||
if (ch == ' ') {
|
||||
if (!prev_space) {
|
||||
value += ch;
|
||||
prev_space = true;
|
||||
} else {
|
||||
modified = true; // skip a space
|
||||
}
|
||||
} else {
|
||||
value += ch;
|
||||
prev_space = false;
|
||||
}
|
||||
}
|
||||
if (modified) {
|
||||
modified_values.emplace_back(std::move(value));
|
||||
it->second = std::string_view(modified_values.back());
|
||||
} else {
|
||||
it->second = std::string_view(header.second);
|
||||
}
|
||||
it->second = std::string_view(header.second);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -420,7 +390,6 @@ future<std::string> server::verify_signature(const request& req, const chunked_c
|
||||
datestamp = std::move(datestamp),
|
||||
signed_headers_str = std::move(signed_headers_str),
|
||||
signed_headers_map = std::move(signed_headers_map),
|
||||
modified_values = std::move(modified_values),
|
||||
region = std::move(region),
|
||||
service = std::move(service),
|
||||
user_signature = std::move(user_signature)] (future<key_cache::value_ptr> key_ptr_fut) {
|
||||
@@ -591,11 +560,11 @@ read_entire_stream(input_stream<char>& inp, size_t length_limit) {
|
||||
class safe_gzip_zstream {
|
||||
z_stream _zs;
|
||||
public:
|
||||
// If gzip is true, decode a gzip header (for "Content-Encoding: gzip").
|
||||
// Otherwise, a zlib header (for "Content-Encoding: deflate").
|
||||
safe_gzip_zstream(bool gzip = true) {
|
||||
safe_gzip_zstream() {
|
||||
memset(&_zs, 0, sizeof(_zs));
|
||||
if (inflateInit2(&_zs, gzip ? 16 + MAX_WBITS : MAX_WBITS) != Z_OK) {
|
||||
// The strange 16 + WMAX_BITS tells zlib to expect and decode
|
||||
// a gzip header, not a zlib header.
|
||||
if (inflateInit2(&_zs, 16 + MAX_WBITS) != Z_OK) {
|
||||
// Should only happen if memory allocation fails
|
||||
throw std::bad_alloc();
|
||||
}
|
||||
@@ -614,21 +583,19 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
// ungzip() takes a chunked_content of a compressed request body, and returns
|
||||
// the uncompressed content as a chunked_content. If gzip is true, we expect
|
||||
// gzip header (for "Content-Encoding: gzip"), if gzip is false, we expect a
|
||||
// zlib header (for "Content-Encoding: deflate").
|
||||
// ungzip() takes a chunked_content with a gzip-compressed request body,
|
||||
// uncompresses it, and returns the uncompressed content as a chunked_content.
|
||||
// If the uncompressed content exceeds length_limit, an error is thrown.
|
||||
static future<chunked_content>
|
||||
ungzip(chunked_content&& compressed_body, size_t length_limit, bool gzip = true) {
|
||||
ungzip(chunked_content&& compressed_body, size_t length_limit) {
|
||||
chunked_content ret;
|
||||
// output_buf can be any size - when uncompressing input_buf, it doesn't
|
||||
// need to fit in a single output_buf, we'll use multiple output_buf for
|
||||
// a single input_buf if needed.
|
||||
constexpr size_t OUTPUT_BUF_SIZE = 4096;
|
||||
temporary_buffer<char> output_buf;
|
||||
safe_gzip_zstream strm(gzip);
|
||||
bool complete_stream = false; // empty input is not a valid gzip/deflate
|
||||
safe_gzip_zstream strm;
|
||||
bool complete_stream = false; // empty input is not a valid gzip
|
||||
size_t total_out_bytes = 0;
|
||||
for (const temporary_buffer<char>& input_buf : compressed_body) {
|
||||
if (input_buf.empty()) {
|
||||
@@ -731,8 +698,6 @@ future<executor::request_return_type> server::handle_api_request(std::unique_ptr
|
||||
sstring content_encoding = req->get_header("Content-Encoding");
|
||||
if (content_encoding == "gzip") {
|
||||
content = co_await ungzip(std::move(content), request_content_length_limit);
|
||||
} else if (content_encoding == "deflate") {
|
||||
content = co_await ungzip(std::move(content), request_content_length_limit, false);
|
||||
} else if (!content_encoding.empty()) {
|
||||
// DynamoDB returns a 500 error for unsupported Content-Encoding.
|
||||
// I'm not sure if this is the best error code, but let's do it too.
|
||||
@@ -793,7 +758,7 @@ future<executor::request_return_type> server::handle_api_request(std::unique_ptr
|
||||
void server::set_routes(routes& r) {
|
||||
api_handler* req_handler = new api_handler([this] (std::unique_ptr<request> req) mutable {
|
||||
return handle_api_request(std::move(req));
|
||||
}, _proxy.data_dictionary().get_config());
|
||||
});
|
||||
|
||||
r.put(operation_type::POST, "/", req_handler);
|
||||
r.put(operation_type::GET, "/", new health_handler(_pending_requests));
|
||||
@@ -904,9 +869,7 @@ server::server(executor& exec, service::storage_proxy& proxy, gms::gossiper& gos
|
||||
} {
|
||||
}
|
||||
|
||||
future<> server::init(net::inet_address addr, std::optional<uint16_t> port, std::optional<uint16_t> https_port,
|
||||
std::optional<uint16_t> port_proxy_protocol, std::optional<uint16_t> https_port_proxy_protocol,
|
||||
std::optional<tls::credentials_builder> creds,
|
||||
future<> server::init(net::inet_address addr, std::optional<uint16_t> port, std::optional<uint16_t> https_port, std::optional<tls::credentials_builder> creds,
|
||||
utils::updateable_value<bool> enforce_authorization, utils::updateable_value<bool> warn_authorization, utils::updateable_value<uint64_t> max_users_query_size_in_trace_output,
|
||||
semaphore* memory_limiter, utils::updateable_value<uint32_t> max_concurrent_requests) {
|
||||
_memory_limiter = memory_limiter;
|
||||
@@ -914,28 +877,20 @@ future<> server::init(net::inet_address addr, std::optional<uint16_t> port, std:
|
||||
_warn_authorization = std::move(warn_authorization);
|
||||
_max_concurrent_requests = std::move(max_concurrent_requests);
|
||||
_max_users_query_size_in_trace_output = std::move(max_users_query_size_in_trace_output);
|
||||
if (!port && !https_port && !port_proxy_protocol && !https_port_proxy_protocol) {
|
||||
if (!port && !https_port) {
|
||||
return make_exception_future<>(std::runtime_error("Either regular port or TLS port"
|
||||
" must be specified in order to init an alternator HTTP server instance"));
|
||||
}
|
||||
return seastar::async([this, addr, port, https_port, port_proxy_protocol, https_port_proxy_protocol, creds] {
|
||||
return seastar::async([this, addr, port, https_port, creds] {
|
||||
_executor.start().get();
|
||||
|
||||
if (port || port_proxy_protocol) {
|
||||
if (port) {
|
||||
set_routes(_http_server._routes);
|
||||
_http_server.set_content_streaming(true);
|
||||
if (port) {
|
||||
_http_server.listen(socket_address{addr, *port}).get();
|
||||
}
|
||||
if (port_proxy_protocol) {
|
||||
listen_options lo;
|
||||
lo.reuse_address = true;
|
||||
lo.proxy_protocol = true;
|
||||
_http_server.listen(socket_address{addr, *port_proxy_protocol}, lo).get();
|
||||
}
|
||||
_http_server.listen(socket_address{addr, *port}).get();
|
||||
_enabled_servers.push_back(std::ref(_http_server));
|
||||
}
|
||||
if (https_port || https_port_proxy_protocol) {
|
||||
if (https_port) {
|
||||
set_routes(_https_server._routes);
|
||||
_https_server.set_content_streaming(true);
|
||||
|
||||
@@ -955,15 +910,7 @@ future<> server::init(net::inet_address addr, std::optional<uint16_t> port, std:
|
||||
} else {
|
||||
_credentials = creds->build_server_credentials();
|
||||
}
|
||||
if (https_port) {
|
||||
_https_server.listen(socket_address{addr, *https_port}, _credentials).get();
|
||||
}
|
||||
if (https_port_proxy_protocol) {
|
||||
listen_options lo;
|
||||
lo.reuse_address = true;
|
||||
lo.proxy_protocol = true;
|
||||
_https_server.listen(socket_address{addr, *https_port_proxy_protocol}, lo, _credentials).get();
|
||||
}
|
||||
_https_server.listen(socket_address{addr, *https_port}, _credentials).get();
|
||||
_enabled_servers.push_back(std::ref(_https_server));
|
||||
}
|
||||
});
|
||||
@@ -1036,8 +983,9 @@ client_data server::ongoing_request::make_client_data() const {
|
||||
// and keep "driver_version" unset.
|
||||
cd.driver_name = _user_agent;
|
||||
// Leave "protocol_version" unset, it has no meaning in Alternator.
|
||||
// Leave "hostname", "ssl_protocol" and "ssl_cipher_suite" unset for Alternator.
|
||||
// Note: CQL sets ssl_protocol and ssl_cipher_suite via generic_server::connection base class.
|
||||
// Leave "hostname", "ssl_protocol" and "ssl_cipher_suite" unset.
|
||||
// As reported in issue #9216, we never set these fields in CQL
|
||||
// either (see cql_server::connection::make_client_data()).
|
||||
return cd;
|
||||
}
|
||||
|
||||
|
||||
@@ -100,9 +100,7 @@ class server : public peering_sharded_service<server> {
|
||||
public:
|
||||
server(executor& executor, service::storage_proxy& proxy, gms::gossiper& gossiper, auth::service& service, qos::service_level_controller& sl_controller);
|
||||
|
||||
future<> init(net::inet_address addr, std::optional<uint16_t> port, std::optional<uint16_t> https_port,
|
||||
std::optional<uint16_t> port_proxy_protocol, std::optional<uint16_t> https_port_proxy_protocol,
|
||||
std::optional<tls::credentials_builder> creds,
|
||||
future<> init(net::inet_address addr, std::optional<uint16_t> port, std::optional<uint16_t> https_port, std::optional<tls::credentials_builder> creds,
|
||||
utils::updateable_value<bool> enforce_authorization, utils::updateable_value<bool> warn_authorization, utils::updateable_value<uint64_t> max_users_query_size_in_trace_output,
|
||||
semaphore* memory_limiter, utils::updateable_value<uint32_t> max_concurrent_requests);
|
||||
future<> stop();
|
||||
|
||||
@@ -491,7 +491,7 @@ future<executor::request_return_type> executor::describe_stream(client_state& cl
|
||||
|
||||
if (!opts.enabled()) {
|
||||
rjson::add(ret, "StreamDescription", std::move(stream_desc));
|
||||
co_return rjson::print(std::move(ret));
|
||||
return make_ready_future<executor::request_return_type>(rjson::print(std::move(ret)));
|
||||
}
|
||||
|
||||
// TODO: label
|
||||
@@ -502,121 +502,123 @@ future<executor::request_return_type> executor::describe_stream(client_state& cl
|
||||
// filter out cdc generations older than the table or now() - cdc::ttl (typically dynamodb_streams_max_window - 24h)
|
||||
auto low_ts = std::max(as_timepoint(schema->id()), db_clock::now() - ttl);
|
||||
|
||||
std::map<db_clock::time_point, cdc::streams_version> topologies = co_await _sdks.cdc_get_versioned_streams(low_ts, { normal_token_owners });
|
||||
auto e = topologies.end();
|
||||
auto prev = e;
|
||||
auto shards = rjson::empty_array();
|
||||
return _sdks.cdc_get_versioned_streams(low_ts, { normal_token_owners }).then([db, shard_start, limit, ret = std::move(ret), stream_desc = std::move(stream_desc)] (std::map<db_clock::time_point, cdc::streams_version> topologies) mutable {
|
||||
|
||||
std::optional<shard_id> last;
|
||||
auto e = topologies.end();
|
||||
auto prev = e;
|
||||
auto shards = rjson::empty_array();
|
||||
|
||||
auto i = topologies.begin();
|
||||
// if we're a paged query, skip to the generation where we left of.
|
||||
if (shard_start) {
|
||||
i = topologies.find(shard_start->time);
|
||||
}
|
||||
std::optional<shard_id> last;
|
||||
|
||||
// for parent-child stuff we need id:s to be sorted by token
|
||||
// (see explanation above) since we want to find closest
|
||||
// token boundary when determining parent.
|
||||
// #7346 - we processed and searched children/parents in
|
||||
// stored order, which is not necessarily token order,
|
||||
// so the finding of "closest" token boundary (using upper bound)
|
||||
// could give somewhat weird results.
|
||||
static auto token_cmp = [](const cdc::stream_id& id1, const cdc::stream_id& id2) {
|
||||
return id1.token() < id2.token();
|
||||
};
|
||||
auto i = topologies.begin();
|
||||
// if we're a paged query, skip to the generation where we left of.
|
||||
if (shard_start) {
|
||||
i = topologies.find(shard_start->time);
|
||||
}
|
||||
|
||||
// #7409 - shards must be returned in lexicographical order,
|
||||
// normal bytes compare is string_traits<int8_t>::compare.
|
||||
// thus bytes 0x8000 is less than 0x0000. By doing unsigned
|
||||
// compare instead we inadvertently will sort in string lexical.
|
||||
static auto id_cmp = [](const cdc::stream_id& id1, const cdc::stream_id& id2) {
|
||||
return compare_unsigned(id1.to_bytes(), id2.to_bytes()) < 0;
|
||||
};
|
||||
|
||||
// need a prev even if we are skipping stuff
|
||||
if (i != topologies.begin()) {
|
||||
prev = std::prev(i);
|
||||
}
|
||||
|
||||
for (; limit > 0 && i != e; prev = i, ++i) {
|
||||
auto& [ts, sv] = *i;
|
||||
|
||||
last = std::nullopt;
|
||||
|
||||
auto lo = sv.streams.begin();
|
||||
auto end = sv.streams.end();
|
||||
// for parent-child stuff we need id:s to be sorted by token
|
||||
// (see explanation above) since we want to find closest
|
||||
// token boundary when determining parent.
|
||||
// #7346 - we processed and searched children/parents in
|
||||
// stored order, which is not necessarily token order,
|
||||
// so the finding of "closest" token boundary (using upper bound)
|
||||
// could give somewhat weird results.
|
||||
static auto token_cmp = [](const cdc::stream_id& id1, const cdc::stream_id& id2) {
|
||||
return id1.token() < id2.token();
|
||||
};
|
||||
|
||||
// #7409 - shards must be returned in lexicographical order,
|
||||
std::sort(lo, end, id_cmp);
|
||||
// normal bytes compare is string_traits<int8_t>::compare.
|
||||
// thus bytes 0x8000 is less than 0x0000. By doing unsigned
|
||||
// compare instead we inadvertently will sort in string lexical.
|
||||
static auto id_cmp = [](const cdc::stream_id& id1, const cdc::stream_id& id2) {
|
||||
return compare_unsigned(id1.to_bytes(), id2.to_bytes()) < 0;
|
||||
};
|
||||
|
||||
if (shard_start) {
|
||||
// find next shard position
|
||||
lo = std::upper_bound(lo, end, shard_start->id, id_cmp);
|
||||
shard_start = std::nullopt;
|
||||
// need a prev even if we are skipping stuff
|
||||
if (i != topologies.begin()) {
|
||||
prev = std::prev(i);
|
||||
}
|
||||
|
||||
if (lo != end && prev != e) {
|
||||
// We want older stuff sorted in token order so we can find matching
|
||||
// token range when determining parent shard.
|
||||
std::stable_sort(prev->second.streams.begin(), prev->second.streams.end(), token_cmp);
|
||||
}
|
||||
|
||||
auto expired = [&]() -> std::optional<db_clock::time_point> {
|
||||
auto j = std::next(i);
|
||||
if (j == e) {
|
||||
return std::nullopt;
|
||||
}
|
||||
// add this so we sort of match potential
|
||||
// sequence numbers in get_records result.
|
||||
return j->first + confidence_interval(db);
|
||||
}();
|
||||
|
||||
while (lo != end) {
|
||||
auto& id = *lo++;
|
||||
|
||||
auto shard = rjson::empty_object();
|
||||
|
||||
if (prev != e) {
|
||||
auto& pids = prev->second.streams;
|
||||
auto pid = std::upper_bound(pids.begin(), pids.end(), id.token(), [](const dht::token& t, const cdc::stream_id& id) {
|
||||
return t < id.token();
|
||||
});
|
||||
if (pid != pids.begin()) {
|
||||
pid = std::prev(pid);
|
||||
}
|
||||
if (pid != pids.end()) {
|
||||
rjson::add(shard, "ParentShardId", shard_id(prev->first, *pid));
|
||||
}
|
||||
}
|
||||
|
||||
last.emplace(ts, id);
|
||||
rjson::add(shard, "ShardId", *last);
|
||||
auto range = rjson::empty_object();
|
||||
rjson::add(range, "StartingSequenceNumber", sequence_number(utils::UUID_gen::min_time_UUID(ts.time_since_epoch())));
|
||||
if (expired) {
|
||||
rjson::add(range, "EndingSequenceNumber", sequence_number(utils::UUID_gen::min_time_UUID(expired->time_since_epoch())));
|
||||
}
|
||||
|
||||
rjson::add(shard, "SequenceNumberRange", std::move(range));
|
||||
rjson::push_back(shards, std::move(shard));
|
||||
|
||||
if (--limit == 0) {
|
||||
break;
|
||||
}
|
||||
for (; limit > 0 && i != e; prev = i, ++i) {
|
||||
auto& [ts, sv] = *i;
|
||||
|
||||
last = std::nullopt;
|
||||
|
||||
auto lo = sv.streams.begin();
|
||||
auto end = sv.streams.end();
|
||||
|
||||
// #7409 - shards must be returned in lexicographical order,
|
||||
std::sort(lo, end, id_cmp);
|
||||
|
||||
if (shard_start) {
|
||||
// find next shard position
|
||||
lo = std::upper_bound(lo, end, shard_start->id, id_cmp);
|
||||
shard_start = std::nullopt;
|
||||
}
|
||||
|
||||
if (lo != end && prev != e) {
|
||||
// We want older stuff sorted in token order so we can find matching
|
||||
// token range when determining parent shard.
|
||||
std::stable_sort(prev->second.streams.begin(), prev->second.streams.end(), token_cmp);
|
||||
}
|
||||
|
||||
auto expired = [&]() -> std::optional<db_clock::time_point> {
|
||||
auto j = std::next(i);
|
||||
if (j == e) {
|
||||
return std::nullopt;
|
||||
}
|
||||
// add this so we sort of match potential
|
||||
// sequence numbers in get_records result.
|
||||
return j->first + confidence_interval(db);
|
||||
}();
|
||||
|
||||
while (lo != end) {
|
||||
auto& id = *lo++;
|
||||
|
||||
auto shard = rjson::empty_object();
|
||||
|
||||
if (prev != e) {
|
||||
auto& pids = prev->second.streams;
|
||||
auto pid = std::upper_bound(pids.begin(), pids.end(), id.token(), [](const dht::token& t, const cdc::stream_id& id) {
|
||||
return t < id.token();
|
||||
});
|
||||
if (pid != pids.begin()) {
|
||||
pid = std::prev(pid);
|
||||
}
|
||||
if (pid != pids.end()) {
|
||||
rjson::add(shard, "ParentShardId", shard_id(prev->first, *pid));
|
||||
}
|
||||
}
|
||||
|
||||
last.emplace(ts, id);
|
||||
rjson::add(shard, "ShardId", *last);
|
||||
auto range = rjson::empty_object();
|
||||
rjson::add(range, "StartingSequenceNumber", sequence_number(utils::UUID_gen::min_time_UUID(ts.time_since_epoch())));
|
||||
if (expired) {
|
||||
rjson::add(range, "EndingSequenceNumber", sequence_number(utils::UUID_gen::min_time_UUID(expired->time_since_epoch())));
|
||||
}
|
||||
|
||||
rjson::add(shard, "SequenceNumberRange", std::move(range));
|
||||
rjson::push_back(shards, std::move(shard));
|
||||
|
||||
if (--limit == 0) {
|
||||
break;
|
||||
}
|
||||
|
||||
last = std::nullopt;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (last) {
|
||||
rjson::add(stream_desc, "LastEvaluatedShardId", *last);
|
||||
}
|
||||
if (last) {
|
||||
rjson::add(stream_desc, "LastEvaluatedShardId", *last);
|
||||
}
|
||||
|
||||
rjson::add(stream_desc, "Shards", std::move(shards));
|
||||
rjson::add(ret, "StreamDescription", std::move(stream_desc));
|
||||
|
||||
co_return rjson::print(std::move(ret));
|
||||
rjson::add(stream_desc, "Shards", std::move(shards));
|
||||
rjson::add(ret, "StreamDescription", std::move(stream_desc));
|
||||
|
||||
return make_ready_future<executor::request_return_type>(rjson::print(std::move(ret)));
|
||||
});
|
||||
}
|
||||
|
||||
enum class shard_iterator_type {
|
||||
@@ -896,169 +898,172 @@ future<executor::request_return_type> executor::get_records(client_state& client
|
||||
auto command = ::make_lw_shared<query::read_command>(schema->id(), schema->version(), partition_slice, _proxy.get_max_result_size(partition_slice),
|
||||
query::tombstone_limit(_proxy.get_tombstone_limit()), query::row_limit(limit * mul));
|
||||
|
||||
service::storage_proxy::coordinator_query_result qr = co_await _proxy.query(schema, std::move(command), std::move(partition_ranges), cl, service::storage_proxy::coordinator_query_options(default_timeout(), std::move(permit), client_state));
|
||||
cql3::selection::result_set_builder builder(*selection, gc_clock::now());
|
||||
query::result_view::consume(*qr.query_result, partition_slice, cql3::selection::result_set_builder::visitor(builder, *schema, *selection));
|
||||
co_return co_await _proxy.query(schema, std::move(command), std::move(partition_ranges), cl, service::storage_proxy::coordinator_query_options(default_timeout(), std::move(permit), client_state)).then(
|
||||
[this, schema, partition_slice = std::move(partition_slice), selection = std::move(selection), start_time = std::move(start_time), limit, key_names = std::move(key_names), attr_names = std::move(attr_names), type, iter, high_ts] (service::storage_proxy::coordinator_query_result qr) mutable {
|
||||
cql3::selection::result_set_builder builder(*selection, gc_clock::now());
|
||||
query::result_view::consume(*qr.query_result, partition_slice, cql3::selection::result_set_builder::visitor(builder, *schema, *selection));
|
||||
|
||||
auto result_set = builder.build();
|
||||
auto records = rjson::empty_array();
|
||||
auto result_set = builder.build();
|
||||
auto records = rjson::empty_array();
|
||||
|
||||
auto& metadata = result_set->get_metadata();
|
||||
auto& metadata = result_set->get_metadata();
|
||||
|
||||
auto op_index = std::distance(metadata.get_names().begin(),
|
||||
std::find_if(metadata.get_names().begin(), metadata.get_names().end(), [](const lw_shared_ptr<cql3::column_specification>& cdef) {
|
||||
return cdef->name->name() == op_column_name;
|
||||
})
|
||||
);
|
||||
auto ts_index = std::distance(metadata.get_names().begin(),
|
||||
std::find_if(metadata.get_names().begin(), metadata.get_names().end(), [](const lw_shared_ptr<cql3::column_specification>& cdef) {
|
||||
return cdef->name->name() == timestamp_column_name;
|
||||
})
|
||||
);
|
||||
auto eor_index = std::distance(metadata.get_names().begin(),
|
||||
std::find_if(metadata.get_names().begin(), metadata.get_names().end(), [](const lw_shared_ptr<cql3::column_specification>& cdef) {
|
||||
return cdef->name->name() == eor_column_name;
|
||||
})
|
||||
);
|
||||
auto op_index = std::distance(metadata.get_names().begin(),
|
||||
std::find_if(metadata.get_names().begin(), metadata.get_names().end(), [](const lw_shared_ptr<cql3::column_specification>& cdef) {
|
||||
return cdef->name->name() == op_column_name;
|
||||
})
|
||||
);
|
||||
auto ts_index = std::distance(metadata.get_names().begin(),
|
||||
std::find_if(metadata.get_names().begin(), metadata.get_names().end(), [](const lw_shared_ptr<cql3::column_specification>& cdef) {
|
||||
return cdef->name->name() == timestamp_column_name;
|
||||
})
|
||||
);
|
||||
auto eor_index = std::distance(metadata.get_names().begin(),
|
||||
std::find_if(metadata.get_names().begin(), metadata.get_names().end(), [](const lw_shared_ptr<cql3::column_specification>& cdef) {
|
||||
return cdef->name->name() == eor_column_name;
|
||||
})
|
||||
);
|
||||
|
||||
std::optional<utils::UUID> timestamp;
|
||||
auto dynamodb = rjson::empty_object();
|
||||
auto record = rjson::empty_object();
|
||||
const auto dc_name = _proxy.get_token_metadata_ptr()->get_topology().get_datacenter();
|
||||
std::optional<utils::UUID> timestamp;
|
||||
auto dynamodb = rjson::empty_object();
|
||||
auto record = rjson::empty_object();
|
||||
const auto dc_name = _proxy.get_token_metadata_ptr()->get_topology().get_datacenter();
|
||||
|
||||
using op_utype = std::underlying_type_t<cdc::operation>;
|
||||
using op_utype = std::underlying_type_t<cdc::operation>;
|
||||
|
||||
auto maybe_add_record = [&] {
|
||||
if (!dynamodb.ObjectEmpty()) {
|
||||
rjson::add(record, "dynamodb", std::move(dynamodb));
|
||||
dynamodb = rjson::empty_object();
|
||||
}
|
||||
if (!record.ObjectEmpty()) {
|
||||
rjson::add(record, "awsRegion", rjson::from_string(dc_name));
|
||||
rjson::add(record, "eventID", event_id(iter.shard.id, *timestamp));
|
||||
rjson::add(record, "eventSource", "scylladb:alternator");
|
||||
rjson::add(record, "eventVersion", "1.1");
|
||||
rjson::push_back(records, std::move(record));
|
||||
record = rjson::empty_object();
|
||||
--limit;
|
||||
}
|
||||
};
|
||||
auto maybe_add_record = [&] {
|
||||
if (!dynamodb.ObjectEmpty()) {
|
||||
rjson::add(record, "dynamodb", std::move(dynamodb));
|
||||
dynamodb = rjson::empty_object();
|
||||
}
|
||||
if (!record.ObjectEmpty()) {
|
||||
rjson::add(record, "awsRegion", rjson::from_string(dc_name));
|
||||
rjson::add(record, "eventID", event_id(iter.shard.id, *timestamp));
|
||||
rjson::add(record, "eventSource", "scylladb:alternator");
|
||||
rjson::add(record, "eventVersion", "1.1");
|
||||
rjson::push_back(records, std::move(record));
|
||||
record = rjson::empty_object();
|
||||
--limit;
|
||||
}
|
||||
};
|
||||
|
||||
for (auto& row : result_set->rows()) {
|
||||
auto op = static_cast<cdc::operation>(value_cast<op_utype>(data_type_for<op_utype>()->deserialize(*row[op_index])));
|
||||
auto ts = value_cast<utils::UUID>(data_type_for<utils::UUID>()->deserialize(*row[ts_index]));
|
||||
auto eor = row[eor_index].has_value() ? value_cast<bool>(boolean_type->deserialize(*row[eor_index])) : false;
|
||||
for (auto& row : result_set->rows()) {
|
||||
auto op = static_cast<cdc::operation>(value_cast<op_utype>(data_type_for<op_utype>()->deserialize(*row[op_index])));
|
||||
auto ts = value_cast<utils::UUID>(data_type_for<utils::UUID>()->deserialize(*row[ts_index]));
|
||||
auto eor = row[eor_index].has_value() ? value_cast<bool>(boolean_type->deserialize(*row[eor_index])) : false;
|
||||
|
||||
if (!dynamodb.HasMember("Keys")) {
|
||||
auto keys = rjson::empty_object();
|
||||
describe_single_item(*selection, row, key_names, keys);
|
||||
rjson::add(dynamodb, "Keys", std::move(keys));
|
||||
rjson::add(dynamodb, "ApproximateCreationDateTime", utils::UUID_gen::unix_timestamp_in_sec(ts).count());
|
||||
rjson::add(dynamodb, "SequenceNumber", sequence_number(ts));
|
||||
rjson::add(dynamodb, "StreamViewType", type);
|
||||
// TODO: SizeBytes
|
||||
}
|
||||
if (!dynamodb.HasMember("Keys")) {
|
||||
auto keys = rjson::empty_object();
|
||||
describe_single_item(*selection, row, key_names, keys);
|
||||
rjson::add(dynamodb, "Keys", std::move(keys));
|
||||
rjson::add(dynamodb, "ApproximateCreationDateTime", utils::UUID_gen::unix_timestamp_in_sec(ts).count());
|
||||
rjson::add(dynamodb, "SequenceNumber", sequence_number(ts));
|
||||
rjson::add(dynamodb, "StreamViewType", type);
|
||||
// TODO: SizeBytes
|
||||
}
|
||||
|
||||
/**
|
||||
* We merge rows with same timestamp into a single event.
|
||||
* This is pretty much needed, because a CDC row typically
|
||||
* encodes ~half the info of an alternator write.
|
||||
*
|
||||
* A big, big downside to how alternator records are written
|
||||
* (i.e. CQL), is that the distinction between INSERT and UPDATE
|
||||
* is somewhat lost/unmappable to actual eventName.
|
||||
* A write (currently) always looks like an insert+modify
|
||||
* regardless whether we wrote existing record or not.
|
||||
*
|
||||
* Maybe RMW ops could be done slightly differently so
|
||||
* we can distinguish them here...
|
||||
*
|
||||
* For now, all writes will become MODIFY.
|
||||
*
|
||||
* Note: we do not check the current pre/post
|
||||
* flags on CDC log, instead we use data to
|
||||
* drive what is returned. This is (afaict)
|
||||
* consistent with dynamo streams
|
||||
*/
|
||||
switch (op) {
|
||||
case cdc::operation::pre_image:
|
||||
case cdc::operation::post_image:
|
||||
{
|
||||
auto item = rjson::empty_object();
|
||||
describe_single_item(*selection, row, attr_names, item, nullptr, true);
|
||||
describe_single_item(*selection, row, key_names, item);
|
||||
rjson::add(dynamodb, op == cdc::operation::pre_image ? "OldImage" : "NewImage", std::move(item));
|
||||
break;
|
||||
}
|
||||
case cdc::operation::update:
|
||||
rjson::add(record, "eventName", "MODIFY");
|
||||
break;
|
||||
case cdc::operation::insert:
|
||||
rjson::add(record, "eventName", "INSERT");
|
||||
break;
|
||||
case cdc::operation::service_row_delete:
|
||||
case cdc::operation::service_partition_delete:
|
||||
{
|
||||
auto user_identity = rjson::empty_object();
|
||||
rjson::add(user_identity, "Type", "Service");
|
||||
rjson::add(user_identity, "PrincipalId", "dynamodb.amazonaws.com");
|
||||
rjson::add(record, "userIdentity", std::move(user_identity));
|
||||
rjson::add(record, "eventName", "REMOVE");
|
||||
break;
|
||||
}
|
||||
default:
|
||||
rjson::add(record, "eventName", "REMOVE");
|
||||
break;
|
||||
}
|
||||
if (eor) {
|
||||
maybe_add_record();
|
||||
timestamp = ts;
|
||||
if (limit == 0) {
|
||||
/**
|
||||
* We merge rows with same timestamp into a single event.
|
||||
* This is pretty much needed, because a CDC row typically
|
||||
* encodes ~half the info of an alternator write.
|
||||
*
|
||||
* A big, big downside to how alternator records are written
|
||||
* (i.e. CQL), is that the distinction between INSERT and UPDATE
|
||||
* is somewhat lost/unmappable to actual eventName.
|
||||
* A write (currently) always looks like an insert+modify
|
||||
* regardless whether we wrote existing record or not.
|
||||
*
|
||||
* Maybe RMW ops could be done slightly differently so
|
||||
* we can distinguish them here...
|
||||
*
|
||||
* For now, all writes will become MODIFY.
|
||||
*
|
||||
* Note: we do not check the current pre/post
|
||||
* flags on CDC log, instead we use data to
|
||||
* drive what is returned. This is (afaict)
|
||||
* consistent with dynamo streams
|
||||
*/
|
||||
switch (op) {
|
||||
case cdc::operation::pre_image:
|
||||
case cdc::operation::post_image:
|
||||
{
|
||||
auto item = rjson::empty_object();
|
||||
describe_single_item(*selection, row, attr_names, item, nullptr, true);
|
||||
describe_single_item(*selection, row, key_names, item);
|
||||
rjson::add(dynamodb, op == cdc::operation::pre_image ? "OldImage" : "NewImage", std::move(item));
|
||||
break;
|
||||
}
|
||||
case cdc::operation::update:
|
||||
rjson::add(record, "eventName", "MODIFY");
|
||||
break;
|
||||
case cdc::operation::insert:
|
||||
rjson::add(record, "eventName", "INSERT");
|
||||
break;
|
||||
case cdc::operation::service_row_delete:
|
||||
case cdc::operation::service_partition_delete:
|
||||
{
|
||||
auto user_identity = rjson::empty_object();
|
||||
rjson::add(user_identity, "Type", "Service");
|
||||
rjson::add(user_identity, "PrincipalId", "dynamodb.amazonaws.com");
|
||||
rjson::add(record, "userIdentity", std::move(user_identity));
|
||||
rjson::add(record, "eventName", "REMOVE");
|
||||
break;
|
||||
}
|
||||
default:
|
||||
rjson::add(record, "eventName", "REMOVE");
|
||||
break;
|
||||
}
|
||||
if (eor) {
|
||||
maybe_add_record();
|
||||
timestamp = ts;
|
||||
if (limit == 0) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
auto ret = rjson::empty_object();
|
||||
auto nrecords = records.Size();
|
||||
rjson::add(ret, "Records", std::move(records));
|
||||
auto ret = rjson::empty_object();
|
||||
auto nrecords = records.Size();
|
||||
rjson::add(ret, "Records", std::move(records));
|
||||
|
||||
if (nrecords != 0) {
|
||||
// #9642. Set next iterators threshold to > last
|
||||
shard_iterator next_iter(iter.table, iter.shard, *timestamp, false);
|
||||
// Note that here we unconditionally return NextShardIterator,
|
||||
// without checking if maybe we reached the end-of-shard. If the
|
||||
// shard did end, then the next read will have nrecords == 0 and
|
||||
// will notice end end of shard and not return NextShardIterator.
|
||||
rjson::add(ret, "NextShardIterator", next_iter);
|
||||
_stats.api_operations.get_records_latency.mark(std::chrono::steady_clock::now() - start_time);
|
||||
co_return rjson::print(std::move(ret));
|
||||
}
|
||||
if (nrecords != 0) {
|
||||
// #9642. Set next iterators threshold to > last
|
||||
shard_iterator next_iter(iter.table, iter.shard, *timestamp, false);
|
||||
// Note that here we unconditionally return NextShardIterator,
|
||||
// without checking if maybe we reached the end-of-shard. If the
|
||||
// shard did end, then the next read will have nrecords == 0 and
|
||||
// will notice end end of shard and not return NextShardIterator.
|
||||
rjson::add(ret, "NextShardIterator", next_iter);
|
||||
_stats.api_operations.get_records_latency.mark(std::chrono::steady_clock::now() - start_time);
|
||||
return make_ready_future<executor::request_return_type>(rjson::print(std::move(ret)));
|
||||
}
|
||||
|
||||
// ugh. figure out if we are and end-of-shard
|
||||
auto normal_token_owners = _proxy.get_token_metadata_ptr()->count_normal_token_owners();
|
||||
// ugh. figure out if we are and end-of-shard
|
||||
auto normal_token_owners = _proxy.get_token_metadata_ptr()->count_normal_token_owners();
|
||||
|
||||
db_clock::time_point ts = co_await _sdks.cdc_current_generation_timestamp({ normal_token_owners });
|
||||
auto& shard = iter.shard;
|
||||
return _sdks.cdc_current_generation_timestamp({ normal_token_owners }).then([this, iter, high_ts, start_time, ret = std::move(ret)](db_clock::time_point ts) mutable {
|
||||
auto& shard = iter.shard;
|
||||
|
||||
if (shard.time < ts && ts < high_ts) {
|
||||
// The DynamoDB documentation states that when a shard is
|
||||
// closed, reading it until the end has NextShardIterator
|
||||
// "set to null". Our test test_streams_closed_read
|
||||
// confirms that by "null" they meant not set at all.
|
||||
} else {
|
||||
// We could have return the same iterator again, but we did
|
||||
// a search from it until high_ts and found nothing, so we
|
||||
// can also start the next search from high_ts.
|
||||
// TODO: but why? It's simpler just to leave the iterator be.
|
||||
shard_iterator next_iter(iter.table, iter.shard, utils::UUID_gen::min_time_UUID(high_ts.time_since_epoch()), true);
|
||||
rjson::add(ret, "NextShardIterator", iter);
|
||||
}
|
||||
_stats.api_operations.get_records_latency.mark(std::chrono::steady_clock::now() - start_time);
|
||||
if (is_big(ret)) {
|
||||
co_return make_streamed(std::move(ret));
|
||||
}
|
||||
co_return rjson::print(std::move(ret));
|
||||
if (shard.time < ts && ts < high_ts) {
|
||||
// The DynamoDB documentation states that when a shard is
|
||||
// closed, reading it until the end has NextShardIterator
|
||||
// "set to null". Our test test_streams_closed_read
|
||||
// confirms that by "null" they meant not set at all.
|
||||
} else {
|
||||
// We could have return the same iterator again, but we did
|
||||
// a search from it until high_ts and found nothing, so we
|
||||
// can also start the next search from high_ts.
|
||||
// TODO: but why? It's simpler just to leave the iterator be.
|
||||
shard_iterator next_iter(iter.table, iter.shard, utils::UUID_gen::min_time_UUID(high_ts.time_since_epoch()), true);
|
||||
rjson::add(ret, "NextShardIterator", iter);
|
||||
}
|
||||
_stats.api_operations.get_records_latency.mark(std::chrono::steady_clock::now() - start_time);
|
||||
if (is_big(ret)) {
|
||||
return make_ready_future<executor::request_return_type>(make_streamed(std::move(ret)));
|
||||
}
|
||||
return make_ready_future<executor::request_return_type>(rjson::print(std::move(ret)));
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
bool executor::add_stream_options(const rjson::value& stream_specification, schema_builder& builder, service::storage_proxy& sp) {
|
||||
|
||||
@@ -3051,7 +3051,7 @@
|
||||
},
|
||||
{
|
||||
"name":"incremental_mode",
|
||||
"description":"Set the incremental repair mode. Can be 'disabled', 'incremental', or 'full'. 'incremental': The incremental repair logic is enabled. Unrepaired sstables will be included for repair. Repaired sstables will be skipped. The incremental repair states will be updated after repair. 'full': The incremental repair logic is enabled. Both repaired and unrepaired sstables will be included for repair. The incremental repair states will be updated after repair. 'disabled': The incremental repair logic is disabled completely. The incremental repair states, e.g., repaired_at in sstables and sstables_repaired_at in the system.tablets table, will not be updated after repair. When the option is not provided, it defaults to incremental mode.",
|
||||
"description":"Set the incremental repair mode. Can be 'disabled', 'incremental', or 'full'. 'incremental': The incremental repair logic is enabled. Unrepaired sstables will be included for repair. Repaired sstables will be skipped. The incremental repair states will be updated after repair. 'full': The incremental repair logic is enabled. Both repaired and unrepaired sstables will be included for repair. The incremental repair states will be updated after repair. 'disabled': The incremental repair logic is disabled completely. The incremental repair states, e.g., repaired_at in sstables and sstables_repaired_at in the system.tablets table, will not be updated after repair. When the option is not provided, it defaults to 'disabled' mode.",
|
||||
"required":false,
|
||||
"allowMultiple":false,
|
||||
"type":"string",
|
||||
|
||||
@@ -2016,14 +2016,12 @@ void set_snapshot(http_context& ctx, routes& r, sharded<db::snapshot_ctl>& snap_
|
||||
auto tag = req->get_query_param("tag");
|
||||
auto column_families = split(req->get_query_param("cf"), ",");
|
||||
auto sfopt = req->get_query_param("sf");
|
||||
db::snapshot_options opts = {
|
||||
.skip_flush = strcasecmp(sfopt.c_str(), "true") == 0,
|
||||
};
|
||||
auto sf = db::snapshot_ctl::skip_flush(strcasecmp(sfopt.c_str(), "true") == 0);
|
||||
|
||||
std::vector<sstring> keynames = split(req->get_query_param("kn"), ",");
|
||||
try {
|
||||
if (column_families.empty()) {
|
||||
co_await snap_ctl.local().take_snapshot(tag, keynames, opts);
|
||||
co_await snap_ctl.local().take_snapshot(tag, keynames, sf);
|
||||
} else {
|
||||
if (keynames.empty()) {
|
||||
throw httpd::bad_param_exception("The keyspace of column families must be specified");
|
||||
@@ -2031,7 +2029,7 @@ void set_snapshot(http_context& ctx, routes& r, sharded<db::snapshot_ctl>& snap_
|
||||
if (keynames.size() > 1) {
|
||||
throw httpd::bad_param_exception("Only one keyspace allowed when specifying a column family");
|
||||
}
|
||||
co_await snap_ctl.local().take_column_family_snapshot(keynames[0], column_families, tag, opts);
|
||||
co_await snap_ctl.local().take_column_family_snapshot(keynames[0], column_families, tag, sf);
|
||||
}
|
||||
co_return json_void();
|
||||
} catch (...) {
|
||||
@@ -2066,8 +2064,7 @@ void set_snapshot(http_context& ctx, routes& r, sharded<db::snapshot_ctl>& snap_
|
||||
auto info = parse_scrub_options(ctx, std::move(req));
|
||||
|
||||
if (!info.snapshot_tag.empty()) {
|
||||
db::snapshot_options opts = {.skip_flush = false};
|
||||
co_await snap_ctl.local().take_column_family_snapshot(info.keyspace, info.column_families, info.snapshot_tag, opts);
|
||||
co_await snap_ctl.local().take_column_family_snapshot(info.keyspace, info.column_families, info.snapshot_tag, db::snapshot_ctl::skip_flush::no);
|
||||
}
|
||||
|
||||
compaction::compaction_stats stats;
|
||||
|
||||
@@ -9,7 +9,6 @@
|
||||
#include <seastar/core/chunked_fifo.hh>
|
||||
#include <seastar/core/coroutine.hh>
|
||||
#include <seastar/coroutine/exception.hh>
|
||||
#include <seastar/coroutine/maybe_yield.hh>
|
||||
#include <seastar/http/exception.hh>
|
||||
|
||||
#include "task_manager.hh"
|
||||
@@ -265,7 +264,7 @@ void set_task_manager(http_context& ctx, routes& r, sharded<tasks::task_manager>
|
||||
if (id) {
|
||||
module->unregister_task(id);
|
||||
}
|
||||
co_await coroutine::maybe_yield();
|
||||
co_await maybe_yield();
|
||||
}
|
||||
});
|
||||
co_return json_void();
|
||||
|
||||
@@ -146,8 +146,7 @@ void set_tasks_compaction_module(http_context& ctx, routes& r, sharded<service::
|
||||
auto info = parse_scrub_options(ctx, std::move(req));
|
||||
|
||||
if (!info.snapshot_tag.empty()) {
|
||||
db::snapshot_options opts = {.skip_flush = false};
|
||||
co_await snap_ctl.local().take_column_family_snapshot(info.keyspace, info.column_families, info.snapshot_tag, opts);
|
||||
co_await snap_ctl.local().take_column_family_snapshot(info.keyspace, info.column_families, info.snapshot_tag, db::snapshot_ctl::skip_flush::no);
|
||||
}
|
||||
|
||||
auto& compaction_module = db.local().get_compaction_manager().get_task_manager_module();
|
||||
|
||||
@@ -209,11 +209,15 @@ future<> audit::stop_audit() {
|
||||
});
|
||||
}
|
||||
|
||||
audit_info_ptr audit::create_audit_info(statement_category cat, const sstring& keyspace, const sstring& table, bool batch) {
|
||||
audit_info_ptr audit::create_audit_info(statement_category cat, const sstring& keyspace, const sstring& table) {
|
||||
if (!audit_instance().local_is_initialized()) {
|
||||
return nullptr;
|
||||
}
|
||||
return std::make_unique<audit_info>(cat, keyspace, table, batch);
|
||||
return std::make_unique<audit_info>(cat, keyspace, table);
|
||||
}
|
||||
|
||||
audit_info_ptr audit::create_no_audit_info() {
|
||||
return audit_info_ptr();
|
||||
}
|
||||
|
||||
future<> audit::start(const db::config& cfg) {
|
||||
@@ -263,21 +267,18 @@ future<> audit::log_login(const sstring& username, socket_address client_ip, boo
|
||||
}
|
||||
|
||||
future<> inspect(shared_ptr<cql3::cql_statement> statement, service::query_state& query_state, const cql3::query_options& options, bool error) {
|
||||
auto audit_info = statement->get_audit_info();
|
||||
if (!audit_info) {
|
||||
return make_ready_future<>();
|
||||
}
|
||||
if (audit_info->batch()) {
|
||||
cql3::statements::batch_statement* batch = static_cast<cql3::statements::batch_statement*>(statement.get());
|
||||
cql3::statements::batch_statement* batch = dynamic_cast<cql3::statements::batch_statement*>(statement.get());
|
||||
if (batch != nullptr) {
|
||||
return do_for_each(batch->statements().begin(), batch->statements().end(), [&query_state, &options, error] (auto&& m) {
|
||||
return inspect(m.statement, query_state, options, error);
|
||||
});
|
||||
} else {
|
||||
if (audit::local_audit_instance().should_log(audit_info)) {
|
||||
auto audit_info = statement->get_audit_info();
|
||||
if (bool(audit_info) && audit::local_audit_instance().should_log(audit_info)) {
|
||||
return audit::local_audit_instance().log(audit_info, query_state, options, error);
|
||||
}
|
||||
return make_ready_future<>();
|
||||
}
|
||||
return make_ready_future<>();
|
||||
}
|
||||
|
||||
future<> inspect_login(const sstring& username, socket_address client_ip, bool error) {
|
||||
|
||||
@@ -75,13 +75,11 @@ class audit_info final {
|
||||
sstring _keyspace;
|
||||
sstring _table;
|
||||
sstring _query;
|
||||
bool _batch;
|
||||
public:
|
||||
audit_info(statement_category cat, sstring keyspace, sstring table, bool batch)
|
||||
audit_info(statement_category cat, sstring keyspace, sstring table)
|
||||
: _category(cat)
|
||||
, _keyspace(std::move(keyspace))
|
||||
, _table(std::move(table))
|
||||
, _batch(batch)
|
||||
{ }
|
||||
void set_query_string(const std::string_view& query_string) {
|
||||
_query = sstring(query_string);
|
||||
@@ -91,7 +89,6 @@ public:
|
||||
const sstring& query() const { return _query; }
|
||||
sstring category_string() const;
|
||||
statement_category category() const { return _category; }
|
||||
bool batch() const { return _batch; }
|
||||
};
|
||||
|
||||
using audit_info_ptr = std::unique_ptr<audit_info>;
|
||||
@@ -129,7 +126,8 @@ public:
|
||||
}
|
||||
static future<> start_audit(const db::config& cfg, sharded<locator::shared_token_metadata>& stm, sharded<cql3::query_processor>& qp, sharded<service::migration_manager>& mm);
|
||||
static future<> stop_audit();
|
||||
static audit_info_ptr create_audit_info(statement_category cat, const sstring& keyspace, const sstring& table, bool batch = false);
|
||||
static audit_info_ptr create_audit_info(statement_category cat, const sstring& keyspace, const sstring& table);
|
||||
static audit_info_ptr create_no_audit_info();
|
||||
audit(locator::shared_token_metadata& stm,
|
||||
cql3::query_processor& qp,
|
||||
service::migration_manager& mm,
|
||||
|
||||
@@ -53,10 +53,10 @@ static std::string json_escape(std::string_view str) {
|
||||
|
||||
}
|
||||
|
||||
future<> audit_syslog_storage_helper::syslog_send_helper(temporary_buffer<char> msg) {
|
||||
future<> audit_syslog_storage_helper::syslog_send_helper(const sstring& msg) {
|
||||
try {
|
||||
auto lock = co_await get_units(_semaphore, 1, std::chrono::hours(1));
|
||||
co_await _sender.send(_syslog_address, std::span(&msg, 1));
|
||||
co_await _sender.send(_syslog_address, net::packet{msg.data(), msg.size()});
|
||||
}
|
||||
catch (const std::exception& e) {
|
||||
auto error_msg = seastar::format(
|
||||
@@ -90,7 +90,7 @@ future<> audit_syslog_storage_helper::start(const db::config& cfg) {
|
||||
co_return;
|
||||
}
|
||||
|
||||
co_await syslog_send_helper(temporary_buffer<char>::copy_of("Initializing syslog audit backend."));
|
||||
co_await syslog_send_helper("Initializing syslog audit backend.");
|
||||
}
|
||||
|
||||
future<> audit_syslog_storage_helper::stop() {
|
||||
@@ -120,7 +120,7 @@ future<> audit_syslog_storage_helper::write(const audit_info* audit_info,
|
||||
audit_info->table(),
|
||||
username);
|
||||
|
||||
co_await syslog_send_helper(std::move(msg).release());
|
||||
co_await syslog_send_helper(msg);
|
||||
}
|
||||
|
||||
future<> audit_syslog_storage_helper::write_login(const sstring& username,
|
||||
@@ -139,7 +139,7 @@ future<> audit_syslog_storage_helper::write_login(const sstring& username,
|
||||
client_ip,
|
||||
username);
|
||||
|
||||
co_await syslog_send_helper(std::move(msg).release());
|
||||
co_await syslog_send_helper(msg.c_str());
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -26,7 +26,7 @@ class audit_syslog_storage_helper : public storage_helper {
|
||||
net::datagram_channel _sender;
|
||||
seastar::semaphore _semaphore;
|
||||
|
||||
future<> syslog_send_helper(seastar::temporary_buffer<char> msg);
|
||||
future<> syslog_send_helper(const sstring& msg);
|
||||
public:
|
||||
explicit audit_syslog_storage_helper(cql3::query_processor&, service::migration_manager&);
|
||||
virtual ~audit_syslog_storage_helper();
|
||||
|
||||
@@ -76,14 +76,11 @@ sstring generate_salt(RandomNumberEngine& g, scheme scheme) {
|
||||
|
||||
///
|
||||
/// Hash a password combined with an implementation-specific salt string.
|
||||
/// Deprecated in favor of `hash_with_salt_async`. This function is still used
|
||||
/// when generating password hashes for storage to ensure that
|
||||
/// `hash_with_salt` and `hash_with_salt_async` produce identical results,
|
||||
/// preserving backward compatibility.
|
||||
/// Deprecated in favor of `hash_with_salt_async`.
|
||||
///
|
||||
/// \throws \ref std::system_error when an unexpected implementation-specific error occurs.
|
||||
///
|
||||
sstring hash_with_salt(const sstring& pass, const sstring& salt);
|
||||
[[deprecated("Use hash_with_salt_async instead")]] sstring hash_with_salt(const sstring& pass, const sstring& salt);
|
||||
|
||||
///
|
||||
/// Async version of `hash_with_salt` that returns a future.
|
||||
|
||||
@@ -876,6 +876,22 @@ future<> migrate_to_auth_v2(db::system_keyspace& sys_ks, ::service::raft_group0_
|
||||
continue; // some tables might not have been created if they were not used
|
||||
}
|
||||
|
||||
// use longer than usual timeout as we scan the whole table
|
||||
// but not infinite or very long as we want to fail reasonably fast
|
||||
const auto t = 5min;
|
||||
const timeout_config tc{t, t, t, t, t, t, t};
|
||||
::service::client_state cs(::service::client_state::internal_tag{}, tc);
|
||||
::service::query_state qs(cs, empty_service_permit());
|
||||
|
||||
auto rows = co_await qp.execute_internal(
|
||||
seastar::format("SELECT * FROM {}.{}", meta::legacy::AUTH_KS, cf_name),
|
||||
db::consistency_level::ALL,
|
||||
qs,
|
||||
{},
|
||||
cql3::query_processor::cache_internal::no);
|
||||
if (rows->empty()) {
|
||||
continue;
|
||||
}
|
||||
std::vector<sstring> col_names;
|
||||
for (const auto& col : schema->all_columns()) {
|
||||
col_names.push_back(col.name_as_cql_string());
|
||||
@@ -884,51 +900,30 @@ future<> migrate_to_auth_v2(db::system_keyspace& sys_ks, ::service::raft_group0_
|
||||
for (size_t i = 1; i < col_names.size(); ++i) {
|
||||
val_binders_str += ", ?";
|
||||
}
|
||||
|
||||
std::vector<mutation> collected;
|
||||
// use longer than usual timeout as we scan the whole table
|
||||
// but not infinite or very long as we want to fail reasonably fast
|
||||
const auto t = 5min;
|
||||
const timeout_config tc{t, t, t, t, t, t, t};
|
||||
::service::client_state cs(::service::client_state::internal_tag{}, tc);
|
||||
::service::query_state qs(cs, empty_service_permit());
|
||||
|
||||
co_await qp.query_internal(
|
||||
seastar::format("SELECT * FROM {}.{}", meta::legacy::AUTH_KS, cf_name),
|
||||
db::consistency_level::ALL,
|
||||
{},
|
||||
1000,
|
||||
[&qp, &cf_name, &col_names, &val_binders_str, &schema, ts, &collected] (const cql3::untyped_result_set::row& row) -> future<stop_iteration> {
|
||||
std::vector<data_value_or_unset> values;
|
||||
for (const auto& col : schema->all_columns()) {
|
||||
if (row.has(col.name_as_text())) {
|
||||
values.push_back(
|
||||
col.type->deserialize(row.get_blob_unfragmented(col.name_as_text())));
|
||||
} else {
|
||||
values.push_back(unset_value{});
|
||||
}
|
||||
for (const auto& row : *rows) {
|
||||
std::vector<data_value_or_unset> values;
|
||||
for (const auto& col : schema->all_columns()) {
|
||||
if (row.has(col.name_as_text())) {
|
||||
values.push_back(
|
||||
col.type->deserialize(row.get_blob_unfragmented(col.name_as_text())));
|
||||
} else {
|
||||
values.push_back(unset_value{});
|
||||
}
|
||||
auto muts = co_await qp.get_mutations_internal(
|
||||
seastar::format("INSERT INTO {}.{} ({}) VALUES ({})",
|
||||
db::system_keyspace::NAME,
|
||||
cf_name,
|
||||
fmt::join(col_names, ", "),
|
||||
val_binders_str),
|
||||
internal_distributed_query_state(),
|
||||
ts,
|
||||
std::move(values));
|
||||
if (muts.size() != 1) {
|
||||
on_internal_error(log,
|
||||
format("expecting single insert mutation, got {}", muts.size()));
|
||||
}
|
||||
|
||||
collected.push_back(std::move(muts[0]));
|
||||
co_return stop_iteration::no;
|
||||
},
|
||||
std::move(qs));
|
||||
|
||||
for (auto& m : collected) {
|
||||
co_yield std::move(m);
|
||||
}
|
||||
auto muts = co_await qp.get_mutations_internal(
|
||||
seastar::format("INSERT INTO {}.{} ({}) VALUES ({})",
|
||||
db::system_keyspace::NAME,
|
||||
cf_name,
|
||||
fmt::join(col_names, ", "),
|
||||
val_binders_str),
|
||||
internal_distributed_query_state(),
|
||||
ts,
|
||||
std::move(values));
|
||||
if (muts.size() != 1) {
|
||||
on_internal_error(log,
|
||||
format("expecting single insert mutation, got {}", muts.size()));
|
||||
}
|
||||
co_yield std::move(muts[0]);
|
||||
}
|
||||
}
|
||||
co_yield co_await sys_ks.make_auth_version_mutation(ts,
|
||||
|
||||
@@ -52,6 +52,13 @@ static const class_registrator<
|
||||
::service::migration_manager&,
|
||||
cache&> registration("org.apache.cassandra.auth.CassandraRoleManager");
|
||||
|
||||
struct record final {
|
||||
sstring name;
|
||||
bool is_superuser;
|
||||
bool can_login;
|
||||
role_set member_of;
|
||||
};
|
||||
|
||||
static db::consistency_level consistency_for_role(std::string_view role_name) noexcept {
|
||||
if (role_name == meta::DEFAULT_SUPERUSER_NAME) {
|
||||
return db::consistency_level::QUORUM;
|
||||
@@ -60,13 +67,13 @@ static db::consistency_level consistency_for_role(std::string_view role_name) no
|
||||
return db::consistency_level::LOCAL_ONE;
|
||||
}
|
||||
|
||||
future<std::optional<standard_role_manager::record>> standard_role_manager::legacy_find_record(std::string_view role_name) {
|
||||
static future<std::optional<record>> find_record(cql3::query_processor& qp, std::string_view role_name) {
|
||||
const sstring query = seastar::format("SELECT * FROM {}.{} WHERE {} = ?",
|
||||
get_auth_ks_name(_qp),
|
||||
get_auth_ks_name(qp),
|
||||
meta::roles_table::name,
|
||||
meta::roles_table::role_col_name);
|
||||
|
||||
const auto results = co_await _qp.execute_internal(
|
||||
const auto results = co_await qp.execute_internal(
|
||||
query,
|
||||
consistency_for_role(role_name),
|
||||
internal_distributed_query_state(),
|
||||
@@ -86,25 +93,8 @@ future<std::optional<standard_role_manager::record>> standard_role_manager::lega
|
||||
: role_set())});
|
||||
}
|
||||
|
||||
future<std::optional<standard_role_manager::record>> standard_role_manager::find_record(std::string_view role_name) {
|
||||
if (legacy_mode(_qp)) {
|
||||
return legacy_find_record(role_name);
|
||||
}
|
||||
auto name = sstring(role_name);
|
||||
auto role = _cache.get(name);
|
||||
if (!role) {
|
||||
return make_ready_future<std::optional<record>>(std::nullopt);
|
||||
}
|
||||
return make_ready_future<std::optional<record>>(std::make_optional(record{
|
||||
.name = std::move(name),
|
||||
.is_superuser = role->is_superuser,
|
||||
.can_login = role->can_login,
|
||||
.member_of = role->member_of
|
||||
}));
|
||||
}
|
||||
|
||||
future<standard_role_manager::record> standard_role_manager::require_record(std::string_view role_name) {
|
||||
return find_record(role_name).then([role_name](std::optional<record> mr) {
|
||||
static future<record> require_record(cql3::query_processor& qp, std::string_view role_name) {
|
||||
return find_record(qp, role_name).then([role_name](std::optional<record> mr) {
|
||||
if (!mr) {
|
||||
throw nonexistant_role(role_name);
|
||||
}
|
||||
@@ -396,7 +386,7 @@ standard_role_manager::alter(std::string_view role_name, const role_config_updat
|
||||
return fmt::to_string(fmt::join(assignments, ", "));
|
||||
};
|
||||
|
||||
return require_record(role_name).then([this, role_name, &u, &mc](record) {
|
||||
return require_record(_qp, role_name).then([this, role_name, &u, &mc](record) {
|
||||
if (!u.is_superuser && !u.can_login) {
|
||||
return make_ready_future<>();
|
||||
}
|
||||
@@ -630,17 +620,18 @@ standard_role_manager::revoke(std::string_view revokee_name, std::string_view ro
|
||||
});
|
||||
}
|
||||
|
||||
future<> standard_role_manager::collect_roles(
|
||||
static future<> collect_roles(
|
||||
cql3::query_processor& qp,
|
||||
std::string_view grantee_name,
|
||||
bool recurse,
|
||||
role_set& roles) {
|
||||
return require_record(grantee_name).then([this, &roles, recurse](standard_role_manager::record r) {
|
||||
return do_with(std::move(r.member_of), [this, &roles, recurse](const role_set& memberships) {
|
||||
return do_for_each(memberships.begin(), memberships.end(), [this, &roles, recurse](const sstring& role_name) {
|
||||
return require_record(qp, grantee_name).then([&qp, &roles, recurse](record r) {
|
||||
return do_with(std::move(r.member_of), [&qp, &roles, recurse](const role_set& memberships) {
|
||||
return do_for_each(memberships.begin(), memberships.end(), [&qp, &roles, recurse](const sstring& role_name) {
|
||||
roles.insert(role_name);
|
||||
|
||||
if (recurse) {
|
||||
return collect_roles(role_name, true, roles);
|
||||
return collect_roles(qp, role_name, true, roles);
|
||||
}
|
||||
|
||||
return make_ready_future<>();
|
||||
@@ -655,7 +646,7 @@ future<role_set> standard_role_manager::query_granted(std::string_view grantee_n
|
||||
return do_with(
|
||||
role_set{sstring(grantee_name)},
|
||||
[this, grantee_name, recurse](role_set& roles) {
|
||||
return collect_roles(grantee_name, recurse, roles).then([&roles] { return roles; });
|
||||
return collect_roles(_qp, grantee_name, recurse, roles).then([&roles] { return roles; });
|
||||
});
|
||||
}
|
||||
|
||||
@@ -715,21 +706,27 @@ future<role_set> standard_role_manager::query_all(::service::query_state& qs) {
|
||||
}
|
||||
|
||||
future<bool> standard_role_manager::exists(std::string_view role_name) {
|
||||
return find_record(role_name).then([](std::optional<record> mr) {
|
||||
return find_record(_qp, role_name).then([](std::optional<record> mr) {
|
||||
return static_cast<bool>(mr);
|
||||
});
|
||||
}
|
||||
|
||||
future<bool> standard_role_manager::is_superuser(std::string_view role_name) {
|
||||
return require_record(role_name).then([](record r) {
|
||||
return require_record(_qp, role_name).then([](record r) {
|
||||
return r.is_superuser;
|
||||
});
|
||||
}
|
||||
|
||||
future<bool> standard_role_manager::can_login(std::string_view role_name) {
|
||||
return require_record(role_name).then([](record r) {
|
||||
return r.can_login;
|
||||
});
|
||||
if (legacy_mode(_qp)) {
|
||||
const auto r = co_await require_record(_qp, role_name);
|
||||
co_return r.can_login;
|
||||
}
|
||||
auto role = _cache.get(sstring(role_name));
|
||||
if (!role) {
|
||||
throw nonexistant_role(role_name);
|
||||
}
|
||||
co_return role->can_login;
|
||||
}
|
||||
|
||||
future<std::optional<sstring>> standard_role_manager::get_attribute(std::string_view role_name, std::string_view attribute_name, ::service::query_state& qs) {
|
||||
|
||||
@@ -90,12 +90,6 @@ public:
|
||||
|
||||
private:
|
||||
enum class membership_change { add, remove };
|
||||
struct record final {
|
||||
sstring name;
|
||||
bool is_superuser;
|
||||
bool can_login;
|
||||
role_set member_of;
|
||||
};
|
||||
|
||||
future<> create_legacy_metadata_tables_if_missing() const;
|
||||
|
||||
@@ -113,14 +107,6 @@ private:
|
||||
future<> legacy_modify_membership(std::string_view role_name, std::string_view grantee_name, membership_change);
|
||||
|
||||
future<> modify_membership(std::string_view role_name, std::string_view grantee_name, membership_change, ::service::group0_batch& mc);
|
||||
|
||||
future<std::optional<record>> legacy_find_record(std::string_view role_name);
|
||||
future<std::optional<record>> find_record(std::string_view role_name);
|
||||
future<record> require_record(std::string_view role_name);
|
||||
future<> collect_roles(
|
||||
std::string_view grantee_name,
|
||||
bool recurse,
|
||||
role_set& roles);
|
||||
};
|
||||
|
||||
} // namespace auth
|
||||
|
||||
@@ -204,7 +204,7 @@ future<topology_description> topology_description::clone_async() const {
|
||||
|
||||
for (const auto& entry : _entries) {
|
||||
vec.push_back(entry);
|
||||
co_await coroutine::maybe_yield();
|
||||
co_await seastar::maybe_yield();
|
||||
}
|
||||
|
||||
co_return topology_description{std::move(vec)};
|
||||
|
||||
@@ -15,7 +15,7 @@
|
||||
#include "mutation/tombstone.hh"
|
||||
#include "schema/schema.hh"
|
||||
|
||||
#include <seastar/core/sstring.hh>
|
||||
#include "seastar/core/sstring.hh"
|
||||
#include "types/concrete_types.hh"
|
||||
#include "types/types.hh"
|
||||
#include "types/user.hh"
|
||||
|
||||
@@ -2296,7 +2296,7 @@ compaction_manager::maybe_split_new_sstable(sstables::shared_sstable sst, compac
|
||||
}
|
||||
// Throw an error if split cannot be performed due to e.g. out of space prevention.
|
||||
// We don't want to prevent split because compaction is temporarily disabled on a view only for synchronization,
|
||||
// which is unneeded against new sstables that aren't part of any set yet, so never use can_proceed(&t) here.
|
||||
// which is uneeded against new sstables that aren't part of any set yet, so never use can_proceed(&t) here.
|
||||
if (is_disabled()) {
|
||||
co_return coroutine::exception(std::make_exception_ptr(std::runtime_error(format("Cannot split {} because manager has compaction disabled, " \
|
||||
"reason might be out of space prevention", sst->get_filename()))));
|
||||
|
||||
@@ -571,10 +571,10 @@ commitlog_total_space_in_mb: -1
|
||||
# - "none": auditing is disabled (default)
|
||||
# - "table": save audited events in audit.audit_log column family
|
||||
# - "syslog": send audited events via syslog (depends on OS, but usually to /dev/log)
|
||||
audit: "table"
|
||||
# audit: "none"
|
||||
#
|
||||
# List of statement categories that should be audited.
|
||||
audit_categories: "DCL,DDL,AUTH,ADMIN"
|
||||
# audit_categories: "DCL,DDL,AUTH"
|
||||
#
|
||||
# List of tables that should be audited.
|
||||
# audit_tables: "<keyspace_name>.<table_name>,<keyspace_name>.<table_name>"
|
||||
|
||||
102
configure.py
102
configure.py
@@ -391,9 +391,9 @@ def find_compiler(name):
|
||||
return None
|
||||
|
||||
|
||||
def resolve_compilers_for_compiler_cache(args, compiler_cache):
|
||||
def resolve_compilers_for_sccache(args, compiler_cache):
|
||||
"""
|
||||
When using a compiler cache, resolve compiler paths to avoid ccache directories.
|
||||
When using sccache, resolve compiler paths to avoid ccache directories.
|
||||
|
||||
This prevents double-caching when ccache symlinks are in PATH.
|
||||
|
||||
@@ -401,7 +401,7 @@ def resolve_compilers_for_compiler_cache(args, compiler_cache):
|
||||
args: The argument namespace with cc and cxx attributes.
|
||||
compiler_cache: Path to the compiler cache binary, or None.
|
||||
"""
|
||||
if not compiler_cache:
|
||||
if not compiler_cache or 'sccache' not in compiler_cache:
|
||||
return
|
||||
if not os.path.isabs(args.cxx):
|
||||
real_cxx = find_compiler(args.cxx)
|
||||
@@ -725,9 +725,7 @@ raft_tests = set([
|
||||
vector_search_tests = set([
|
||||
'test/vector_search/vector_store_client_test',
|
||||
'test/vector_search/load_balancer_test',
|
||||
'test/vector_search/client_test',
|
||||
'test/vector_search/filter_test',
|
||||
'test/vector_search/rescoring_test'
|
||||
'test/vector_search/client_test'
|
||||
])
|
||||
|
||||
vector_search_validator_bin = 'vector-search-validator/bin/vector-search-validator'
|
||||
@@ -817,9 +815,6 @@ arg_parser.add_argument('--c-compiler', action='store', dest='cc', default='clan
|
||||
help='C compiler path')
|
||||
arg_parser.add_argument('--compiler-cache', action='store', dest='compiler_cache', default='auto',
|
||||
help='Compiler cache to use: auto (default, prefers sccache), sccache, ccache, none, or a path to a binary')
|
||||
# Workaround for https://github.com/mozilla/sccache/issues/2575
|
||||
arg_parser.add_argument('--sccache-rust', action=argparse.BooleanOptionalAction, default=False,
|
||||
help='Use sccache for rust code (if sccache is selected as compiler cache). Doesn\'t work with distributed builds.')
|
||||
add_tristate(arg_parser, name='dpdk', dest='dpdk', default=False,
|
||||
help='Use dpdk (from seastar dpdk sources)')
|
||||
arg_parser.add_argument('--dpdk-target', action='store', dest='dpdk_target', default='',
|
||||
@@ -950,7 +945,8 @@ scylla_core = (['message/messaging_service.cc',
|
||||
'utils/crypt_sha512.cc',
|
||||
'utils/logalloc.cc',
|
||||
'utils/large_bitset.cc',
|
||||
'test/lib/limiting_data_source.cc',
|
||||
'utils/buffer_input_stream.cc',
|
||||
'utils/limiting_data_source.cc',
|
||||
'utils/updateable_value.cc',
|
||||
'message/dictionary_service.cc',
|
||||
'utils/directories.cc',
|
||||
@@ -1038,10 +1034,6 @@ scylla_core = (['message/messaging_service.cc',
|
||||
'cql3/functions/aggregate_fcts.cc',
|
||||
'cql3/functions/castas_fcts.cc',
|
||||
'cql3/functions/error_injection_fcts.cc',
|
||||
'cql3/statements/strong_consistency/modification_statement.cc',
|
||||
'cql3/statements/strong_consistency/select_statement.cc',
|
||||
'cql3/statements/strong_consistency/statement_helpers.cc',
|
||||
'cql3/functions/vector_similarity_fcts.cc',
|
||||
'cql3/statements/cf_prop_defs.cc',
|
||||
'cql3/statements/cf_statement.cc',
|
||||
'cql3/statements/authentication_statement.cc',
|
||||
@@ -1066,8 +1058,8 @@ scylla_core = (['message/messaging_service.cc',
|
||||
'cql3/statements/raw/parsed_statement.cc',
|
||||
'cql3/statements/property_definitions.cc',
|
||||
'cql3/statements/update_statement.cc',
|
||||
'cql3/statements/broadcast_modification_statement.cc',
|
||||
'cql3/statements/broadcast_select_statement.cc',
|
||||
'cql3/statements/strongly_consistent_modification_statement.cc',
|
||||
'cql3/statements/strongly_consistent_select_statement.cc',
|
||||
'cql3/statements/delete_statement.cc',
|
||||
'cql3/statements/prune_materialized_view_statement.cc',
|
||||
'cql3/statements/batch_statement.cc',
|
||||
@@ -1099,7 +1091,6 @@ scylla_core = (['message/messaging_service.cc',
|
||||
'cql3/statements/list_service_level_attachments_statement.cc',
|
||||
'cql3/statements/list_effective_service_level_statement.cc',
|
||||
'cql3/statements/describe_statement.cc',
|
||||
'cql3/statements/view_prop_defs.cc',
|
||||
'cql3/update_parameters.cc',
|
||||
'cql3/util.cc',
|
||||
'cql3/ut_name.cc',
|
||||
@@ -1358,9 +1349,6 @@ scylla_core = (['message/messaging_service.cc',
|
||||
'lang/wasm.cc',
|
||||
'lang/wasm_alien_thread_runner.cc',
|
||||
'lang/wasm_instance_cache.cc',
|
||||
'service/strong_consistency/groups_manager.cc',
|
||||
'service/strong_consistency/coordinator.cc',
|
||||
'service/strong_consistency/state_machine.cc',
|
||||
'service/raft/group0_state_id_handler.cc',
|
||||
'service/raft/group0_state_machine.cc',
|
||||
'service/raft/group0_state_machine_merger.cc',
|
||||
@@ -1390,7 +1378,6 @@ scylla_core = (['message/messaging_service.cc',
|
||||
'vector_search/dns.cc',
|
||||
'vector_search/client.cc',
|
||||
'vector_search/clients.cc',
|
||||
'vector_search/filter.cc',
|
||||
'vector_search/truststore.cc'
|
||||
] + [Antlr3Grammar('cql3/Cql.g')] \
|
||||
+ scylla_raft_core
|
||||
@@ -1466,7 +1453,6 @@ alternator = [
|
||||
'alternator/auth.cc',
|
||||
'alternator/streams.cc',
|
||||
'alternator/ttl.cc',
|
||||
'alternator/http_compression.cc'
|
||||
]
|
||||
|
||||
idls = ['idl/gossip_digest.idl.hh',
|
||||
@@ -1500,7 +1486,6 @@ idls = ['idl/gossip_digest.idl.hh',
|
||||
'idl/hinted_handoff.idl.hh',
|
||||
'idl/storage_proxy.idl.hh',
|
||||
'idl/sstables.idl.hh',
|
||||
'idl/strong_consistency/state_machine.idl.hh',
|
||||
'idl/group0_state_machine.idl.hh',
|
||||
'idl/mapreduce_request.idl.hh',
|
||||
'idl/replica_exception.idl.hh',
|
||||
@@ -1559,7 +1544,6 @@ scylla_perfs = ['test/perf/perf_alternator.cc',
|
||||
'test/perf/perf_fast_forward.cc',
|
||||
'test/perf/perf_row_cache_update.cc',
|
||||
'test/perf/perf_simple_query.cc',
|
||||
'test/perf/perf_cql_raw.cc',
|
||||
'test/perf/perf_sstable.cc',
|
||||
'test/perf/perf_tablets.cc',
|
||||
'test/perf/tablet_load_balancing.cc',
|
||||
@@ -1714,7 +1698,6 @@ deps['test/boost/combined_tests'] += [
|
||||
'test/boost/schema_registry_test.cc',
|
||||
'test/boost/secondary_index_test.cc',
|
||||
'test/boost/sessions_test.cc',
|
||||
'test/boost/simple_value_with_expiry_test.cc',
|
||||
'test/boost/sstable_compaction_test.cc',
|
||||
'test/boost/sstable_compressor_factory_test.cc',
|
||||
'test/boost/sstable_compression_config_test.cc',
|
||||
@@ -1797,8 +1780,6 @@ deps['test/raft/discovery_test'] = ['test/raft/discovery_test.cc',
|
||||
deps['test/vector_search/vector_store_client_test'] = ['test/vector_search/vector_store_client_test.cc'] + scylla_tests_dependencies
|
||||
deps['test/vector_search/load_balancer_test'] = ['test/vector_search/load_balancer_test.cc'] + scylla_tests_dependencies
|
||||
deps['test/vector_search/client_test'] = ['test/vector_search/client_test.cc'] + scylla_tests_dependencies
|
||||
deps['test/vector_search/filter_test'] = ['test/vector_search/filter_test.cc'] + scylla_tests_dependencies
|
||||
deps['test/vector_search/rescoring_test'] = ['test/vector_search/rescoring_test.cc'] + scylla_tests_dependencies
|
||||
|
||||
boost_tests_prefixes = ["test/boost/", "test/vector_search/", "test/raft/", "test/manual/", "test/ldap/"]
|
||||
|
||||
@@ -2408,7 +2389,7 @@ def write_build_file(f,
|
||||
# If compiler cache is available, prefix the compiler with it
|
||||
cxx_with_cache = f'{compiler_cache} {args.cxx}' if compiler_cache else args.cxx
|
||||
# For Rust, sccache is used via RUSTC_WRAPPER environment variable
|
||||
rustc_wrapper = f'RUSTC_WRAPPER={compiler_cache} ' if compiler_cache and 'sccache' in compiler_cache and args.sccache_rust else ''
|
||||
rustc_wrapper = f'RUSTC_WRAPPER={compiler_cache} ' if compiler_cache and 'sccache' in compiler_cache else ''
|
||||
f.write(textwrap.dedent('''\
|
||||
configure_args = {configure_args}
|
||||
builddir = {outdir}
|
||||
@@ -2821,35 +2802,38 @@ def write_build_file(f,
|
||||
|
||||
seastar_dep = f'$builddir/{mode}/seastar/libseastar.{seastar_lib_ext}'
|
||||
seastar_testing_dep = f'$builddir/{mode}/seastar/libseastar_testing.{seastar_lib_ext}'
|
||||
f.write(f'build {seastar_dep}: ninja $builddir/{mode}/seastar/build.ninja | always {profile_dep}\n')
|
||||
f.write('build {seastar_dep}: ninja $builddir/{mode}/seastar/build.ninja | always {profile_dep}\n'
|
||||
.format(**locals()))
|
||||
f.write(' pool = submodule_pool\n')
|
||||
f.write(f' subdir = $builddir/{mode}/seastar\n')
|
||||
f.write(' target = seastar\n')
|
||||
f.write(f'build {seastar_testing_dep}: ninja $builddir/{mode}/seastar/build.ninja | always {profile_dep}\n')
|
||||
f.write(' subdir = $builddir/{mode}/seastar\n'.format(**locals()))
|
||||
f.write(' target = seastar\n'.format(**locals()))
|
||||
f.write('build {seastar_testing_dep}: ninja $builddir/{mode}/seastar/build.ninja | always {profile_dep}\n'
|
||||
.format(**locals()))
|
||||
f.write(' pool = submodule_pool\n')
|
||||
f.write(f' subdir = $builddir/{mode}/seastar\n')
|
||||
f.write(' target = seastar_testing\n')
|
||||
f.write(f' profile_dep = {profile_dep}\n')
|
||||
f.write(' subdir = $builddir/{mode}/seastar\n'.format(**locals()))
|
||||
f.write(' target = seastar_testing\n'.format(**locals()))
|
||||
f.write(' profile_dep = {profile_dep}\n'.format(**locals()))
|
||||
|
||||
for lib in abseil_libs:
|
||||
f.write(f'build $builddir/{mode}/abseil/{lib}: ninja $builddir/{mode}/abseil/build.ninja | always {profile_dep}\n')
|
||||
f.write(f' pool = submodule_pool\n')
|
||||
f.write(f' subdir = $builddir/{mode}/abseil\n')
|
||||
f.write(f' target = {lib}\n')
|
||||
f.write(f' profile_dep = {profile_dep}\n')
|
||||
f.write('build $builddir/{mode}/abseil/{lib}: ninja $builddir/{mode}/abseil/build.ninja | always {profile_dep}\n'.format(**locals()))
|
||||
f.write(' pool = submodule_pool\n')
|
||||
f.write(' subdir = $builddir/{mode}/abseil\n'.format(**locals()))
|
||||
f.write(' target = {lib}\n'.format(**locals()))
|
||||
f.write(' profile_dep = {profile_dep}\n'.format(**locals()))
|
||||
|
||||
f.write(f'build $builddir/{mode}/stdafx.hh.pch: cxx_build_precompiled_header.{mode} stdafx.hh | {profile_dep} {seastar_dep} {abseil_dep} {gen_headers_dep} {pch_dep}\n')
|
||||
|
||||
f.write(f'build $builddir/{mode}/seastar/apps/iotune/iotune: ninja $builddir/{mode}/seastar/build.ninja | $builddir/{mode}/seastar/libseastar.{seastar_lib_ext}\n')
|
||||
f.write('build $builddir/{mode}/seastar/apps/iotune/iotune: ninja $builddir/{mode}/seastar/build.ninja | $builddir/{mode}/seastar/libseastar.{seastar_lib_ext}\n'
|
||||
.format(**locals()))
|
||||
f.write(' pool = submodule_pool\n')
|
||||
f.write(f' subdir = $builddir/{mode}/seastar\n')
|
||||
f.write(' target = iotune\n')
|
||||
f.write(f' profile_dep = {profile_dep}\n')
|
||||
f.write(textwrap.dedent(f'''\
|
||||
f.write(' subdir = $builddir/{mode}/seastar\n'.format(**locals()))
|
||||
f.write(' target = iotune\n'.format(**locals()))
|
||||
f.write(' profile_dep = {profile_dep}\n'.format(**locals()))
|
||||
f.write(textwrap.dedent('''\
|
||||
build $builddir/{mode}/iotune: copy $builddir/{mode}/seastar/apps/iotune/iotune
|
||||
build $builddir/{mode}/iotune.stripped: strip $builddir/{mode}/iotune
|
||||
build $builddir/{mode}/iotune.debug: phony $builddir/{mode}/iotune.stripped
|
||||
'''))
|
||||
''').format(**locals()))
|
||||
if args.dist_only:
|
||||
include_scylla_and_iotune = ''
|
||||
include_scylla_and_iotune_stripped = ''
|
||||
@@ -2858,16 +2842,16 @@ def write_build_file(f,
|
||||
include_scylla_and_iotune = f'$builddir/{mode}/scylla $builddir/{mode}/iotune $builddir/{mode}/patchelf'
|
||||
include_scylla_and_iotune_stripped = f'$builddir/{mode}/scylla.stripped $builddir/{mode}/iotune.stripped $builddir/{mode}/patchelf.stripped'
|
||||
include_scylla_and_iotune_debug = f'$builddir/{mode}/scylla.debug $builddir/{mode}/iotune.debug'
|
||||
f.write(f'build $builddir/{mode}/dist/tar/{scylla_product}-unstripped-{scylla_version}-{scylla_release}.{arch}.tar.gz: package {include_scylla_and_iotune} $builddir/SCYLLA-RELEASE-FILE $builddir/SCYLLA-VERSION-FILE $builddir/debian/debian $builddir/node_exporter/node_exporter | always\n')
|
||||
f.write(f' mode = {mode}\n')
|
||||
f.write(f'build $builddir/{mode}/dist/tar/{scylla_product}-{scylla_version}-{scylla_release}.{arch}.tar.gz: stripped_package {include_scylla_and_iotune_stripped} $builddir/SCYLLA-RELEASE-FILE $builddir/SCYLLA-VERSION-FILE $builddir/debian/debian $builddir/node_exporter/node_exporter.stripped | always\n')
|
||||
f.write(f' mode = {mode}\n')
|
||||
f.write(f'build $builddir/{mode}/dist/tar/{scylla_product}-debuginfo-{scylla_version}-{scylla_release}.{arch}.tar.gz: debuginfo_package {include_scylla_and_iotune_debug} $builddir/SCYLLA-RELEASE-FILE $builddir/SCYLLA-VERSION-FILE $builddir/debian/debian $builddir/node_exporter/node_exporter.debug | always\n')
|
||||
f.write(f' mode = {mode}\n')
|
||||
f.write(f'build $builddir/{mode}/dist/tar/{scylla_product}-package.tar.gz: copy $builddir/{mode}/dist/tar/{scylla_product}-{scylla_version}-{scylla_release}.{arch}.tar.gz\n')
|
||||
f.write(f' mode = {mode}\n')
|
||||
f.write(f'build $builddir/{mode}/dist/tar/{scylla_product}-{arch}-package.tar.gz: copy $builddir/{mode}/dist/tar/{scylla_product}-{scylla_version}-{scylla_release}.{arch}.tar.gz\n')
|
||||
f.write(f' mode = {mode}\n')
|
||||
f.write('build $builddir/{mode}/dist/tar/{scylla_product}-unstripped-{scylla_version}-{scylla_release}.{arch}.tar.gz: package {include_scylla_and_iotune} $builddir/SCYLLA-RELEASE-FILE $builddir/SCYLLA-VERSION-FILE $builddir/debian/debian $builddir/node_exporter/node_exporter | always\n'.format(**locals()))
|
||||
f.write(' mode = {mode}\n'.format(**locals()))
|
||||
f.write('build $builddir/{mode}/dist/tar/{scylla_product}-{scylla_version}-{scylla_release}.{arch}.tar.gz: stripped_package {include_scylla_and_iotune_stripped} $builddir/SCYLLA-RELEASE-FILE $builddir/SCYLLA-VERSION-FILE $builddir/debian/debian $builddir/node_exporter/node_exporter.stripped | always\n'.format(**locals()))
|
||||
f.write(' mode = {mode}\n'.format(**locals()))
|
||||
f.write('build $builddir/{mode}/dist/tar/{scylla_product}-debuginfo-{scylla_version}-{scylla_release}.{arch}.tar.gz: debuginfo_package {include_scylla_and_iotune_debug} $builddir/SCYLLA-RELEASE-FILE $builddir/SCYLLA-VERSION-FILE $builddir/debian/debian $builddir/node_exporter/node_exporter.debug | always\n'.format(**locals()))
|
||||
f.write(' mode = {mode}\n'.format(**locals()))
|
||||
f.write('build $builddir/{mode}/dist/tar/{scylla_product}-package.tar.gz: copy $builddir/{mode}/dist/tar/{scylla_product}-{scylla_version}-{scylla_release}.{arch}.tar.gz\n'.format(**locals()))
|
||||
f.write(' mode = {mode}\n'.format(**locals()))
|
||||
f.write('build $builddir/{mode}/dist/tar/{scylla_product}-{arch}-package.tar.gz: copy $builddir/{mode}/dist/tar/{scylla_product}-{scylla_version}-{scylla_release}.{arch}.tar.gz\n'.format(**locals()))
|
||||
f.write(' mode = {mode}\n'.format(**locals()))
|
||||
|
||||
f.write(f'build $builddir/dist/{mode}/redhat: rpmbuild $builddir/{mode}/dist/tar/{scylla_product}-unstripped-{scylla_version}-{scylla_release}.{arch}.tar.gz\n')
|
||||
f.write(f' mode = {mode}\n')
|
||||
@@ -3041,7 +3025,7 @@ def create_build_system(args):
|
||||
os.makedirs(outdir, exist_ok=True)
|
||||
|
||||
compiler_cache = find_compiler_cache(args.compiler_cache)
|
||||
resolve_compilers_for_compiler_cache(args, compiler_cache)
|
||||
resolve_compilers_for_sccache(args, compiler_cache)
|
||||
|
||||
scylla_product, scylla_version, scylla_release = generate_version(args.date_stamp)
|
||||
|
||||
@@ -3128,7 +3112,7 @@ def configure_using_cmake(args):
|
||||
in selected_modes)
|
||||
|
||||
compiler_cache = find_compiler_cache(args.compiler_cache)
|
||||
resolve_compilers_for_compiler_cache(args, compiler_cache)
|
||||
resolve_compilers_for_sccache(args, compiler_cache)
|
||||
|
||||
settings = {
|
||||
'CMAKE_CONFIGURATION_TYPES': selected_configs,
|
||||
@@ -3152,7 +3136,7 @@ def configure_using_cmake(args):
|
||||
settings['CMAKE_CXX_COMPILER_LAUNCHER'] = compiler_cache
|
||||
settings['CMAKE_C_COMPILER_LAUNCHER'] = compiler_cache
|
||||
# For Rust, sccache is used via RUSTC_WRAPPER
|
||||
if 'sccache' in compiler_cache and args.sccache_rust:
|
||||
if 'sccache' in compiler_cache:
|
||||
settings['Scylla_RUSTC_WRAPPER'] = compiler_cache
|
||||
|
||||
if args.date_stamp:
|
||||
|
||||
@@ -47,10 +47,6 @@ target_sources(cql3
|
||||
functions/aggregate_fcts.cc
|
||||
functions/castas_fcts.cc
|
||||
functions/error_injection_fcts.cc
|
||||
statements/strong_consistency/select_statement.cc
|
||||
statements/strong_consistency/modification_statement.cc
|
||||
statements/strong_consistency/statement_helpers.cc
|
||||
functions/vector_similarity_fcts.cc
|
||||
statements/cf_prop_defs.cc
|
||||
statements/cf_statement.cc
|
||||
statements/authentication_statement.cc
|
||||
@@ -75,8 +71,8 @@ target_sources(cql3
|
||||
statements/raw/parsed_statement.cc
|
||||
statements/property_definitions.cc
|
||||
statements/update_statement.cc
|
||||
statements/broadcast_modification_statement.cc
|
||||
statements/broadcast_select_statement.cc
|
||||
statements/strongly_consistent_modification_statement.cc
|
||||
statements/strongly_consistent_select_statement.cc
|
||||
statements/delete_statement.cc
|
||||
statements/prune_materialized_view_statement.cc
|
||||
statements/batch_statement.cc
|
||||
@@ -108,7 +104,6 @@ target_sources(cql3
|
||||
statements/list_service_level_attachments_statement.cc
|
||||
statements/list_effective_service_level_statement.cc
|
||||
statements/describe_statement.cc
|
||||
statements/view_prop_defs.cc
|
||||
update_parameters.cc
|
||||
util.cc
|
||||
ut_name.cc
|
||||
|
||||
50
cql3/Cql.g
50
cql3/Cql.g
@@ -431,7 +431,6 @@ unaliasedSelector returns [uexpression tmp]
|
||||
| K_TTL '(' c=cident ')' { tmp = column_mutation_attribute{column_mutation_attribute::attribute_kind::ttl,
|
||||
unresolved_identifier{std::move(c)}}; }
|
||||
| f=functionName args=selectionFunctionArgs { tmp = function_call{std::move(f), std::move(args)}; }
|
||||
| f=similarityFunctionName args=vectorSimilarityArgs { tmp = function_call{std::move(f), std::move(args)}; }
|
||||
| K_CAST '(' arg=unaliasedSelector K_AS t=native_type ')' { tmp = cast{.style = cast::cast_style::sql, .arg = std::move(arg), .type = std::move(t)}; }
|
||||
)
|
||||
( '.' fi=cident { tmp = field_selection{std::move(tmp), std::move(fi)}; }
|
||||
@@ -446,18 +445,6 @@ selectionFunctionArgs returns [std::vector<expression> a]
|
||||
')'
|
||||
;
|
||||
|
||||
vectorSimilarityArgs returns [std::vector<expression> a]
|
||||
: '(' ')'
|
||||
| '(' v1=vectorSimilarityArg { a.push_back(std::move(v1)); }
|
||||
( ',' vn=vectorSimilarityArg { a.push_back(std::move(vn)); } )*
|
||||
')'
|
||||
;
|
||||
|
||||
vectorSimilarityArg returns [uexpression a]
|
||||
: s=unaliasedSelector { a = std::move(s); }
|
||||
| v=value { a = std::move(v); }
|
||||
;
|
||||
|
||||
countArgument
|
||||
: '*'
|
||||
| i=INTEGER { if (i->getText() != "1") {
|
||||
@@ -898,10 +885,6 @@ pkDef[cql3::statements::create_table_statement::raw_statement& expr]
|
||||
| '(' k1=ident { l.push_back(k1); } ( ',' kn=ident { l.push_back(kn); } )* ')' { $expr.add_key_aliases(l); }
|
||||
;
|
||||
|
||||
cfamProperties[cql3::statements::cf_properties& expr]
|
||||
: cfamProperty[expr] (K_AND cfamProperty[expr])*
|
||||
;
|
||||
|
||||
cfamProperty[cql3::statements::cf_properties& expr]
|
||||
: property[*$expr.properties()]
|
||||
| K_COMPACT K_STORAGE { $expr.set_compact_storage(); }
|
||||
@@ -939,22 +922,16 @@ typeColumns[create_type_statement& expr]
|
||||
*/
|
||||
createIndexStatement returns [std::unique_ptr<create_index_statement> expr]
|
||||
@init {
|
||||
auto idx_props = make_shared<index_specific_prop_defs>();
|
||||
auto props = index_prop_defs();
|
||||
auto props = make_shared<index_prop_defs>();
|
||||
bool if_not_exists = false;
|
||||
auto name = ::make_shared<cql3::index_name>();
|
||||
std::vector<::shared_ptr<index_target::raw>> targets;
|
||||
}
|
||||
: K_CREATE (K_CUSTOM { idx_props->is_custom = true; })? K_INDEX (K_IF K_NOT K_EXISTS { if_not_exists = true; } )?
|
||||
: K_CREATE (K_CUSTOM { props->is_custom = true; })? K_INDEX (K_IF K_NOT K_EXISTS { if_not_exists = true; } )?
|
||||
(idxName[*name])? K_ON cf=columnFamilyName '(' (target1=indexIdent { targets.emplace_back(target1); } (',' target2=indexIdent { targets.emplace_back(target2); } )*)? ')'
|
||||
(K_USING cls=STRING_LITERAL { idx_props->custom_class = sstring{$cls.text}; })?
|
||||
(K_WITH cfamProperties[props])?
|
||||
{
|
||||
props.extract_index_specific_properties_to(*idx_props);
|
||||
view_prop_defs view_props = std::move(props).into_view_prop_defs();
|
||||
|
||||
$expr = std::make_unique<create_index_statement>(cf, name, targets, std::move(idx_props), std::move(view_props), if_not_exists);
|
||||
}
|
||||
(K_USING cls=STRING_LITERAL { props->custom_class = sstring{$cls.text}; })?
|
||||
(K_WITH properties[*props])?
|
||||
{ $expr = std::make_unique<create_index_statement>(cf, name, targets, props, if_not_exists); }
|
||||
;
|
||||
|
||||
indexIdent returns [::shared_ptr<index_target::raw> id]
|
||||
@@ -1102,9 +1079,9 @@ alterTypeStatement returns [std::unique_ptr<alter_type_statement> expr]
|
||||
*/
|
||||
alterViewStatement returns [std::unique_ptr<alter_view_statement> expr]
|
||||
@init {
|
||||
auto props = cql3::statements::view_prop_defs();
|
||||
auto props = cql3::statements::cf_prop_defs();
|
||||
}
|
||||
: K_ALTER K_MATERIALIZED K_VIEW cf=columnFamilyName K_WITH properties[*props.properties()]
|
||||
: K_ALTER K_MATERIALIZED K_VIEW cf=columnFamilyName K_WITH properties[props]
|
||||
{
|
||||
$expr = std::make_unique<alter_view_statement>(std::move(cf), std::move(props));
|
||||
}
|
||||
@@ -1706,10 +1683,6 @@ functionName returns [cql3::functions::function_name s]
|
||||
: (ks=keyspaceName '.')? f=allowedFunctionName { $s.keyspace = std::move(ks); $s.name = std::move(f); }
|
||||
;
|
||||
|
||||
similarityFunctionName returns [cql3::functions::function_name s]
|
||||
: f=allowedSimilarityFunctionName { $s = cql3::functions::function_name::native_function(std::move(f)); }
|
||||
;
|
||||
|
||||
allowedFunctionName returns [sstring s]
|
||||
: f=IDENT { $s = $f.text; std::transform(s.begin(), s.end(), s.begin(), ::tolower); }
|
||||
| f=QUOTED_NAME { $s = $f.text; }
|
||||
@@ -1718,11 +1691,6 @@ allowedFunctionName returns [sstring s]
|
||||
| K_COUNT { $s = "count"; }
|
||||
;
|
||||
|
||||
allowedSimilarityFunctionName returns [sstring s]
|
||||
: f=(K_SIMILARITY_COSINE | K_SIMILARITY_EUCLIDEAN | K_SIMILARITY_DOT_PRODUCT)
|
||||
{ $s = $f.text; std::transform(s.begin(), s.end(), s.begin(), ::tolower); }
|
||||
;
|
||||
|
||||
functionArgs returns [std::vector<expression> a]
|
||||
: '(' ')'
|
||||
| '(' t1=term { a.push_back(std::move(t1)); }
|
||||
@@ -2419,10 +2387,6 @@ K_MUTATION_FRAGMENTS: M U T A T I O N '_' F R A G M E N T S;
|
||||
|
||||
K_VECTOR_SEARCH_INDEXING: V E C T O R '_' S E A R C H '_' I N D E X I N G;
|
||||
|
||||
K_SIMILARITY_EUCLIDEAN: S I M I L A R I T Y '_' E U C L I D E A N;
|
||||
K_SIMILARITY_COSINE: S I M I L A R I T Y '_' C O S I N E;
|
||||
K_SIMILARITY_DOT_PRODUCT: S I M I L A R I T Y '_' D O T '_' P R O D U C T;
|
||||
|
||||
// Case-insensitive alpha characters
|
||||
fragment A: ('a'|'A');
|
||||
fragment B: ('b'|'B');
|
||||
|
||||
@@ -25,11 +25,6 @@ public:
|
||||
NOT_ASSIGNABLE,
|
||||
};
|
||||
|
||||
struct vector_test_result {
|
||||
test_result result;
|
||||
std::optional<size_t> dimension_opt;
|
||||
};
|
||||
|
||||
static bool is_assignable(test_result tr) {
|
||||
return tr != test_result::NOT_ASSIGNABLE;
|
||||
}
|
||||
@@ -49,8 +44,6 @@ public:
|
||||
*/
|
||||
virtual test_result test_assignment(data_dictionary::database db, const sstring& keyspace, const schema* schema_opt, const column_specification& receiver) const = 0;
|
||||
|
||||
virtual vector_test_result test_assignment_any_size_float_vector() const = 0;
|
||||
|
||||
virtual std::optional<data_type> assignment_testable_type_opt() const = 0;
|
||||
|
||||
// for error reporting
|
||||
|
||||
@@ -1434,112 +1434,6 @@ test_assignment(const expression& expr, data_dictionary::database db, const sstr
|
||||
}, expr);
|
||||
}
|
||||
|
||||
template <cql3_type::kind... Kinds>
|
||||
assignment_testable::vector_test_result
|
||||
test_assignment_any_size_float_vector(const expression& expr) {
|
||||
using test_result = assignment_testable::vector_test_result;
|
||||
const test_result NOT_ASSIGNABLE = {assignment_testable::test_result::NOT_ASSIGNABLE, std::nullopt};
|
||||
const test_result WEAKLY_ASSIGNABLE = {assignment_testable::test_result::WEAKLY_ASSIGNABLE, std::nullopt};
|
||||
auto is_float_or_bind = [] (const expression& e) {
|
||||
return expr::visit(overloaded_functor{
|
||||
[] (const bind_variable&) {
|
||||
return true;
|
||||
},
|
||||
[] (const untyped_constant& uc) {
|
||||
return uc.partial_type == untyped_constant::type_class::floating_point
|
||||
|| uc.partial_type == untyped_constant::type_class::integer;
|
||||
},
|
||||
[] (const constant& value) {
|
||||
auto kind = value.type->as_cql3_type().get_kind();
|
||||
return cql3_type::kind_enum_set::frozen<Kinds...>().contains(kind);
|
||||
},
|
||||
[] (const auto&) {
|
||||
return false;
|
||||
},
|
||||
}, e);
|
||||
};
|
||||
auto validate_assignment = [&] (const data_type& dt) -> test_result {
|
||||
auto vt = dynamic_pointer_cast<const vector_type_impl>(dt->underlying_type());
|
||||
if (!vt) {
|
||||
return NOT_ASSIGNABLE;
|
||||
}
|
||||
auto elem_kind = vt->get_elements_type()->as_cql3_type().get_kind();
|
||||
if (cql3_type::kind_enum_set::frozen<Kinds...>().contains(elem_kind)) {
|
||||
return {assignment_testable::test_result::WEAKLY_ASSIGNABLE, vt->get_dimension()};
|
||||
}
|
||||
return NOT_ASSIGNABLE;
|
||||
};
|
||||
return expr::visit(overloaded_functor{
|
||||
[&] (const constant& value) -> test_result {
|
||||
return validate_assignment(value.type);
|
||||
},
|
||||
[&] (const binary_operator&) -> test_result {
|
||||
return NOT_ASSIGNABLE;
|
||||
},
|
||||
[&] (const conjunction&) -> test_result {
|
||||
return NOT_ASSIGNABLE;
|
||||
},
|
||||
[&] (const column_value& col_val) -> test_result {
|
||||
return validate_assignment(col_val.col->type);
|
||||
},
|
||||
[&] (const subscript&) -> test_result {
|
||||
return NOT_ASSIGNABLE;
|
||||
},
|
||||
[&] (const unresolved_identifier& ui) -> test_result {
|
||||
return NOT_ASSIGNABLE;
|
||||
},
|
||||
[&] (const column_mutation_attribute& cma) -> test_result {
|
||||
return NOT_ASSIGNABLE;
|
||||
},
|
||||
[&] (const function_call& fc) -> test_result {
|
||||
return NOT_ASSIGNABLE;
|
||||
},
|
||||
[&] (const cast& c) -> test_result {
|
||||
return NOT_ASSIGNABLE;
|
||||
},
|
||||
[&] (const field_selection& fs) -> test_result {
|
||||
return NOT_ASSIGNABLE;
|
||||
},
|
||||
[&] (const bind_variable& bv) -> test_result {
|
||||
return WEAKLY_ASSIGNABLE;
|
||||
},
|
||||
[&] (const untyped_constant& uc) -> test_result {
|
||||
return uc.partial_type == untyped_constant::type_class::null
|
||||
? WEAKLY_ASSIGNABLE
|
||||
: NOT_ASSIGNABLE;
|
||||
},
|
||||
[&] (const tuple_constructor& tc) -> test_result {
|
||||
return NOT_ASSIGNABLE;
|
||||
},
|
||||
[&] (const collection_constructor& c) -> test_result {
|
||||
switch (c.style) {
|
||||
case collection_constructor::style_type::list_or_vector: {
|
||||
if(std::ranges::all_of(c.elements, is_float_or_bind)) {
|
||||
return {assignment_testable::test_result::WEAKLY_ASSIGNABLE, c.elements.size()};
|
||||
}
|
||||
return NOT_ASSIGNABLE;
|
||||
}
|
||||
case collection_constructor::style_type::set: return NOT_ASSIGNABLE;
|
||||
case collection_constructor::style_type::map: return NOT_ASSIGNABLE;
|
||||
case collection_constructor::style_type::vector:
|
||||
on_internal_error(expr_logger, "vector style type found in test_assignment, should have been introduced post-prepare");
|
||||
}
|
||||
on_internal_error(expr_logger, fmt::format("unexpected collection_constructor style {}", static_cast<unsigned>(c.style)));
|
||||
},
|
||||
[&] (const usertype_constructor& uc) -> test_result {
|
||||
return NOT_ASSIGNABLE;
|
||||
},
|
||||
[&] (const temporary& t) -> test_result {
|
||||
return NOT_ASSIGNABLE;
|
||||
},
|
||||
}, expr);
|
||||
}
|
||||
|
||||
assignment_testable::vector_test_result
|
||||
test_assignment_any_size_float_vector(const expression& expr) {
|
||||
return test_assignment_any_size_float_vector<cql3_type::kind::FLOAT, cql3_type::kind::DOUBLE>(expr);
|
||||
}
|
||||
|
||||
expression
|
||||
prepare_expression(const expression& expr, data_dictionary::database db, const sstring& keyspace, const schema* schema_opt, lw_shared_ptr<column_specification> receiver) {
|
||||
auto e_opt = try_prepare_expression(expr, db, keyspace, schema_opt, std::move(receiver));
|
||||
@@ -1573,9 +1467,6 @@ public:
|
||||
virtual test_result test_assignment(data_dictionary::database db, const sstring& keyspace, const schema* schema_opt, const column_specification& receiver) const override {
|
||||
return expr::test_assignment(_e, db, keyspace, schema_opt, receiver);
|
||||
}
|
||||
virtual vector_test_result test_assignment_any_size_float_vector() const override {
|
||||
return expr::test_assignment_any_size_float_vector(_e);
|
||||
}
|
||||
virtual sstring assignment_testable_source_context() const override {
|
||||
return fmt::format("{}", _e);
|
||||
}
|
||||
|
||||
@@ -16,7 +16,6 @@
|
||||
#include "cql3/functions/user_function.hh"
|
||||
#include "cql3/functions/user_aggregate.hh"
|
||||
#include "cql3/functions/uuid_fcts.hh"
|
||||
#include "cql3/functions/vector_similarity_fcts.hh"
|
||||
#include "data_dictionary/data_dictionary.hh"
|
||||
#include "as_json_function.hh"
|
||||
#include "cql3/prepare_context.hh"
|
||||
@@ -399,14 +398,6 @@ functions::get(data_dictionary::database db,
|
||||
}
|
||||
});
|
||||
|
||||
const auto func_name = name.has_keyspace() ? name : name.as_native_function();
|
||||
if (SIMILARITY_FUNCTIONS.contains(func_name)) {
|
||||
auto arg_types = retrieve_vector_arg_types(func_name, provided_args);
|
||||
auto fun = ::make_shared<vector_similarity_fct>(func_name.name, arg_types);
|
||||
validate_types(db, keyspace, schema.get(), fun, provided_args, receiver_ks, receiver_cf);
|
||||
return fun;
|
||||
}
|
||||
|
||||
if (name.has_keyspace()
|
||||
? name == TOKEN_FUNCTION_NAME
|
||||
: name.name == TOKEN_FUNCTION_NAME.name) {
|
||||
|
||||
@@ -1,150 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2025-present ScyllaDB
|
||||
*/
|
||||
|
||||
/*
|
||||
* SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
|
||||
*/
|
||||
|
||||
#include "vector_similarity_fcts.hh"
|
||||
#include "types/types.hh"
|
||||
#include "types/vector.hh"
|
||||
#include "exceptions/exceptions.hh"
|
||||
|
||||
namespace cql3 {
|
||||
namespace functions {
|
||||
namespace {
|
||||
|
||||
// The computations of similarity scores match the exact formulas of Cassandra's (jVector's) implementation to ensure compatibility.
|
||||
// There exist tests checking the compliance of the results.
|
||||
// Reference:
|
||||
// https://github.com/datastax/jvector/blob/f967f1c9249035b63b55a566fac7d4dc38380349/jvector-base/src/main/java/io/github/jbellis/jvector/vector/VectorSimilarityFunction.java#L36-L69
|
||||
|
||||
// You should only use this function if you need to preserve the original vectors and cannot normalize
|
||||
// them in advance.
|
||||
float compute_cosine_similarity(const std::vector<data_value>& v1, const std::vector<data_value>& v2) {
|
||||
double dot_product = 0.0;
|
||||
double squared_norm_a = 0.0;
|
||||
double squared_norm_b = 0.0;
|
||||
|
||||
for (size_t i = 0; i < v1.size(); ++i) {
|
||||
double a = value_cast<float>(v1[i]);
|
||||
double b = value_cast<float>(v2[i]);
|
||||
|
||||
dot_product += a * b;
|
||||
squared_norm_a += a * a;
|
||||
squared_norm_b += b * b;
|
||||
}
|
||||
|
||||
if (squared_norm_a == 0 || squared_norm_b == 0) {
|
||||
throw exceptions::invalid_request_exception("Function system.similarity_cosine doesn't support all-zero vectors");
|
||||
}
|
||||
|
||||
// The cosine similarity is in the range [-1, 1].
|
||||
// It is mapped to a similarity score in the range [0, 1] (-1 -> 0, 1 -> 1)
|
||||
// for consistency with other similarity functions.
|
||||
return (1 + (dot_product / (std::sqrt(squared_norm_a * squared_norm_b)))) / 2;
|
||||
}
|
||||
|
||||
float compute_euclidean_similarity(const std::vector<data_value>& v1, const std::vector<data_value>& v2) {
|
||||
double sum = 0.0;
|
||||
|
||||
for (size_t i = 0; i < v1.size(); ++i) {
|
||||
double a = value_cast<float>(v1[i]);
|
||||
double b = value_cast<float>(v2[i]);
|
||||
|
||||
double diff = a - b;
|
||||
sum += diff * diff;
|
||||
}
|
||||
|
||||
// The squared Euclidean (L2) distance is of range [0, inf).
|
||||
// It is mapped to a similarity score in the range (0, 1] (0 -> 1, inf -> 0)
|
||||
// for consistency with other similarity functions.
|
||||
return (1 / (1 + sum));
|
||||
}
|
||||
|
||||
// Assumes that both vectors are L2-normalized.
|
||||
// This similarity is intended as an optimized way to perform cosine similarity calculation.
|
||||
float compute_dot_product_similarity(const std::vector<data_value>& v1, const std::vector<data_value>& v2) {
|
||||
double dot_product = 0.0;
|
||||
|
||||
for (size_t i = 0; i < v1.size(); ++i) {
|
||||
double a = value_cast<float>(v1[i]);
|
||||
double b = value_cast<float>(v2[i]);
|
||||
dot_product += a * b;
|
||||
}
|
||||
|
||||
// The dot product is in the range [-1, 1] for L2-normalized vectors.
|
||||
// It is mapped to a similarity score in the range [0, 1] (-1 -> 0, 1 -> 1)
|
||||
// for consistency with other similarity functions.
|
||||
return ((1 + dot_product) / 2);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
thread_local const std::unordered_map<function_name, similarity_function_t> SIMILARITY_FUNCTIONS = {
|
||||
{SIMILARITY_COSINE_FUNCTION_NAME, compute_cosine_similarity},
|
||||
{SIMILARITY_EUCLIDEAN_FUNCTION_NAME, compute_euclidean_similarity},
|
||||
{SIMILARITY_DOT_PRODUCT_FUNCTION_NAME, compute_dot_product_similarity},
|
||||
};
|
||||
|
||||
std::vector<data_type> retrieve_vector_arg_types(const function_name& name, const std::vector<shared_ptr<assignment_testable>>& provided_args) {
|
||||
if (provided_args.size() != 2) {
|
||||
throw exceptions::invalid_request_exception(fmt::format("Invalid number of arguments for function {}(vector<float, n>, vector<float, n>)", name));
|
||||
}
|
||||
|
||||
auto [first_result, first_dim_opt] = provided_args[0]->test_assignment_any_size_float_vector();
|
||||
auto [second_result, second_dim_opt] = provided_args[1]->test_assignment_any_size_float_vector();
|
||||
|
||||
auto invalid_type_error_message = [&name](const shared_ptr<assignment_testable>& arg) {
|
||||
auto type = arg->assignment_testable_type_opt();
|
||||
const auto& source_context = arg->assignment_testable_source_context();
|
||||
if (type) {
|
||||
return fmt::format("Function {} requires a float vector argument, but found {} of type {}", name, source_context, type.value()->cql3_type_name());
|
||||
} else {
|
||||
return fmt::format("Function {} requires a float vector argument, but found {}", name, source_context);
|
||||
}
|
||||
};
|
||||
|
||||
if (!is_assignable(first_result)) {
|
||||
throw exceptions::invalid_request_exception(invalid_type_error_message(provided_args[0]));
|
||||
}
|
||||
if (!is_assignable(second_result)) {
|
||||
throw exceptions::invalid_request_exception(invalid_type_error_message(provided_args[1]));
|
||||
}
|
||||
|
||||
if (!first_dim_opt && !second_dim_opt) {
|
||||
throw exceptions::invalid_request_exception(fmt::format("Cannot infer type of argument {} for function {}(vector<float, n>, vector<float, n>)",
|
||||
provided_args[0]->assignment_testable_source_context(), name));
|
||||
}
|
||||
if (first_dim_opt && second_dim_opt) {
|
||||
if (*first_dim_opt != *second_dim_opt) {
|
||||
throw exceptions::invalid_request_exception(fmt::format(
|
||||
"All arguments must have the same vector dimensions, but found vector<float, {}> and vector<float, {}>", *first_dim_opt, *second_dim_opt));
|
||||
}
|
||||
}
|
||||
|
||||
size_t dimension = first_dim_opt ? *first_dim_opt : *second_dim_opt;
|
||||
auto type = vector_type_impl::get_instance(float_type, dimension);
|
||||
return {type, type};
|
||||
}
|
||||
|
||||
bytes_opt vector_similarity_fct::execute(std::span<const bytes_opt> parameters) {
|
||||
if (std::any_of(parameters.begin(), parameters.end(), [](const auto& param) {
|
||||
return !param;
|
||||
})) {
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
const auto& type = arg_types()[0];
|
||||
data_value v1 = type->deserialize(*parameters[0]);
|
||||
data_value v2 = type->deserialize(*parameters[1]);
|
||||
const auto& v1_elements = value_cast<std::vector<data_value>>(v1);
|
||||
const auto& v2_elements = value_cast<std::vector<data_value>>(v2);
|
||||
|
||||
float result = SIMILARITY_FUNCTIONS.at(_name)(v1_elements, v2_elements);
|
||||
return float_type->decompose(result);
|
||||
}
|
||||
|
||||
} // namespace functions
|
||||
} // namespace cql3
|
||||
@@ -1,37 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2025-present ScyllaDB
|
||||
*/
|
||||
|
||||
/*
|
||||
* SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "native_scalar_function.hh"
|
||||
#include "cql3/assignment_testable.hh"
|
||||
#include "cql3/functions/function_name.hh"
|
||||
|
||||
namespace cql3 {
|
||||
namespace functions {
|
||||
|
||||
static const function_name SIMILARITY_COSINE_FUNCTION_NAME = function_name::native_function("similarity_cosine");
|
||||
static const function_name SIMILARITY_EUCLIDEAN_FUNCTION_NAME = function_name::native_function("similarity_euclidean");
|
||||
static const function_name SIMILARITY_DOT_PRODUCT_FUNCTION_NAME = function_name::native_function("similarity_dot_product");
|
||||
|
||||
using similarity_function_t = float (*)(const std::vector<data_value>&, const std::vector<data_value>&);
|
||||
extern thread_local const std::unordered_map<function_name, similarity_function_t> SIMILARITY_FUNCTIONS;
|
||||
|
||||
std::vector<data_type> retrieve_vector_arg_types(const function_name& name, const std::vector<shared_ptr<assignment_testable>>& provided_args);
|
||||
|
||||
class vector_similarity_fct : public native_scalar_function {
|
||||
public:
|
||||
vector_similarity_fct(const sstring& name, const std::vector<data_type>& arg_types)
|
||||
: native_scalar_function(name, float_type, arg_types) {
|
||||
}
|
||||
|
||||
virtual bytes_opt execute(std::span<const bytes_opt> parameters) override;
|
||||
};
|
||||
|
||||
} // namespace functions
|
||||
} // namespace cql3
|
||||
@@ -14,7 +14,6 @@
|
||||
#include <seastar/core/shared_ptr.hh>
|
||||
#include <seastar/coroutine/parallel_for_each.hh>
|
||||
#include <seastar/coroutine/as_future.hh>
|
||||
#include <seastar/coroutine/try_future.hh>
|
||||
|
||||
#include "service/storage_proxy.hh"
|
||||
#include "service/migration_manager.hh"
|
||||
@@ -48,10 +47,8 @@ const std::chrono::minutes prepared_statements_cache::entry_expiry = std::chrono
|
||||
|
||||
struct query_processor::remote {
|
||||
remote(service::migration_manager& mm, service::mapreduce_service& fwd,
|
||||
service::storage_service& ss, service::raft_group0_client& group0_client,
|
||||
service::strong_consistency::coordinator& _sc_coordinator)
|
||||
service::storage_service& ss, service::raft_group0_client& group0_client)
|
||||
: mm(mm), mapreducer(fwd), ss(ss), group0_client(group0_client)
|
||||
, sc_coordinator(_sc_coordinator)
|
||||
, gate("query_processor::remote")
|
||||
{}
|
||||
|
||||
@@ -59,7 +56,6 @@ struct query_processor::remote {
|
||||
service::mapreduce_service& mapreducer;
|
||||
service::storage_service& ss;
|
||||
service::raft_group0_client& group0_client;
|
||||
service::strong_consistency::coordinator& sc_coordinator;
|
||||
|
||||
seastar::named_gate gate;
|
||||
};
|
||||
@@ -517,16 +513,9 @@ query_processor::~query_processor() {
|
||||
}
|
||||
}
|
||||
|
||||
std::pair<std::reference_wrapper<service::strong_consistency::coordinator>, gate::holder>
|
||||
query_processor::acquire_strongly_consistent_coordinator() {
|
||||
auto [remote_, holder] = remote();
|
||||
return {remote_.get().sc_coordinator, std::move(holder)};
|
||||
}
|
||||
|
||||
void query_processor::start_remote(service::migration_manager& mm, service::mapreduce_service& mapreducer,
|
||||
service::storage_service& ss, service::raft_group0_client& group0_client,
|
||||
service::strong_consistency::coordinator& sc_coordinator) {
|
||||
_remote = std::make_unique<struct remote>(mm, mapreducer, ss, group0_client, sc_coordinator);
|
||||
service::storage_service& ss, service::raft_group0_client& group0_client) {
|
||||
_remote = std::make_unique<struct remote>(mm, mapreducer, ss, group0_client);
|
||||
}
|
||||
|
||||
future<> query_processor::stop_remote() {
|
||||
@@ -870,7 +859,6 @@ struct internal_query_state {
|
||||
sstring query_string;
|
||||
std::unique_ptr<query_options> opts;
|
||||
statements::prepared_statement::checked_weak_ptr p;
|
||||
std::optional<service::query_state> qs;
|
||||
bool more_results = true;
|
||||
};
|
||||
|
||||
@@ -878,14 +866,10 @@ internal_query_state query_processor::create_paged_state(
|
||||
const sstring& query_string,
|
||||
db::consistency_level cl,
|
||||
const data_value_list& values,
|
||||
int32_t page_size,
|
||||
std::optional<service::query_state> qs) {
|
||||
int32_t page_size) {
|
||||
auto p = prepare_internal(query_string);
|
||||
auto opts = make_internal_options(p, values, cl, page_size);
|
||||
if (!qs) {
|
||||
qs.emplace(query_state_for_internal_call());
|
||||
}
|
||||
return internal_query_state{query_string, std::make_unique<cql3::query_options>(std::move(opts)), std::move(p), std::move(qs), true};
|
||||
return internal_query_state{query_string, std::make_unique<cql3::query_options>(std::move(opts)), std::move(p), true};
|
||||
}
|
||||
|
||||
bool query_processor::has_more_results(cql3::internal_query_state& state) const {
|
||||
@@ -908,8 +892,9 @@ future<> query_processor::for_each_cql_result(
|
||||
future<::shared_ptr<untyped_result_set>>
|
||||
query_processor::execute_paged_internal(internal_query_state& state) {
|
||||
state.p->statement->validate(*this, service::client_state::for_internal_calls());
|
||||
auto qs = query_state_for_internal_call();
|
||||
::shared_ptr<cql_transport::messages::result_message> msg =
|
||||
co_await state.p->statement->execute(*this, *state.qs, *state.opts, std::nullopt);
|
||||
co_await state.p->statement->execute(*this, qs, *state.opts, std::nullopt);
|
||||
|
||||
class visitor : public result_message::visitor_base {
|
||||
internal_query_state& _state;
|
||||
@@ -1008,7 +993,7 @@ query_processor::execute_with_params(
|
||||
auto opts = make_internal_options(p, values, cl);
|
||||
auto statement = p->statement;
|
||||
|
||||
auto msg = co_await coroutine::try_future(execute_maybe_with_guard(query_state, std::move(statement), opts, &query_processor::do_execute_with_params));
|
||||
auto msg = co_await execute_maybe_with_guard(query_state, std::move(statement), opts, &query_processor::do_execute_with_params);
|
||||
co_return ::make_shared<untyped_result_set>(msg);
|
||||
}
|
||||
|
||||
@@ -1018,7 +1003,7 @@ query_processor::do_execute_with_params(
|
||||
shared_ptr<cql_statement> statement,
|
||||
const query_options& options, std::optional<service::group0_guard> guard) {
|
||||
statement->validate(*this, service::client_state::for_internal_calls());
|
||||
co_return co_await coroutine::try_future(statement->execute(*this, query_state, options, std::move(guard)));
|
||||
co_return co_await statement->execute(*this, query_state, options, std::move(guard));
|
||||
}
|
||||
|
||||
|
||||
@@ -1216,9 +1201,8 @@ future<> query_processor::query_internal(
|
||||
db::consistency_level cl,
|
||||
const data_value_list& values,
|
||||
int32_t page_size,
|
||||
noncopyable_function<future<stop_iteration>(const cql3::untyped_result_set_row&)> f,
|
||||
std::optional<service::query_state> qs) {
|
||||
auto query_state = create_paged_state(query_string, cl, values, page_size, std::move(qs));
|
||||
noncopyable_function<future<stop_iteration>(const cql3::untyped_result_set_row&)> f) {
|
||||
auto query_state = create_paged_state(query_string, cl, values, page_size);
|
||||
co_return co_await for_each_cql_result(query_state, std::move(f));
|
||||
}
|
||||
|
||||
|
||||
@@ -44,10 +44,6 @@ class query_state;
|
||||
class mapreduce_service;
|
||||
class raft_group0_client;
|
||||
|
||||
namespace strong_consistency {
|
||||
class coordinator;
|
||||
}
|
||||
|
||||
namespace broadcast_tables {
|
||||
struct query;
|
||||
}
|
||||
@@ -159,8 +155,7 @@ public:
|
||||
~query_processor();
|
||||
|
||||
void start_remote(service::migration_manager&, service::mapreduce_service&,
|
||||
service::storage_service& ss, service::raft_group0_client&,
|
||||
service::strong_consistency::coordinator&);
|
||||
service::storage_service& ss, service::raft_group0_client&);
|
||||
future<> stop_remote();
|
||||
|
||||
data_dictionary::database db() {
|
||||
@@ -179,9 +174,6 @@ public:
|
||||
return _proxy;
|
||||
}
|
||||
|
||||
std::pair<std::reference_wrapper<service::strong_consistency::coordinator>, gate::holder>
|
||||
acquire_strongly_consistent_coordinator();
|
||||
|
||||
cql_stats& get_cql_stats() {
|
||||
return _cql_stats;
|
||||
}
|
||||
@@ -330,7 +322,6 @@ public:
|
||||
* page_size - maximum page size
|
||||
* f - a function to be run on each row of the query result,
|
||||
* if the function returns stop_iteration::yes the iteration will stop
|
||||
* qs - optional query state (default: std::nullopt)
|
||||
*
|
||||
* \note This function is optimized for convenience, not performance.
|
||||
*/
|
||||
@@ -339,8 +330,7 @@ public:
|
||||
db::consistency_level cl,
|
||||
const data_value_list& values,
|
||||
int32_t page_size,
|
||||
noncopyable_function<future<stop_iteration>(const cql3::untyped_result_set_row&)> f,
|
||||
std::optional<service::query_state> qs = std::nullopt);
|
||||
noncopyable_function<future<stop_iteration>(const cql3::untyped_result_set_row&)> f);
|
||||
|
||||
/*
|
||||
* \brief iterate over all cql results using paging
|
||||
@@ -509,8 +499,7 @@ private:
|
||||
const sstring& query_string,
|
||||
db::consistency_level,
|
||||
const data_value_list& values,
|
||||
int32_t page_size,
|
||||
std::optional<service::query_state> qs = std::nullopt);
|
||||
int32_t page_size);
|
||||
|
||||
/*!
|
||||
* \brief run a query using paging
|
||||
|
||||
@@ -46,13 +46,6 @@ void metadata::add_non_serialized_column(lw_shared_ptr<column_specification> nam
|
||||
_column_info->_names.emplace_back(std::move(name));
|
||||
}
|
||||
|
||||
void metadata::hide_last_column() {
|
||||
if (_column_info->_column_count == 0) {
|
||||
utils::on_internal_error("Trying to hide a column when there are no columns visible.");
|
||||
}
|
||||
_column_info->_column_count--;
|
||||
}
|
||||
|
||||
void metadata::set_paging_state(lw_shared_ptr<const service::pager::paging_state> paging_state) {
|
||||
_flags.set<flag::HAS_MORE_PAGES>();
|
||||
_paging_state = std::move(paging_state);
|
||||
|
||||
@@ -73,7 +73,6 @@ public:
|
||||
uint32_t value_count() const;
|
||||
|
||||
void add_non_serialized_column(lw_shared_ptr<column_specification> name);
|
||||
void hide_last_column();
|
||||
|
||||
public:
|
||||
void set_paging_state(lw_shared_ptr<const service::pager::paging_state> paging_state);
|
||||
|
||||
@@ -32,7 +32,7 @@ bool
|
||||
selectable_processes_selection(const expr::expression& selectable) {
|
||||
return expr::visit(overloaded_functor{
|
||||
[&] (const expr::constant&) -> bool {
|
||||
return true;
|
||||
on_internal_error(slogger, "no way to express SELECT constant in the grammar yet");
|
||||
},
|
||||
[&] (const expr::conjunction& conj) -> bool {
|
||||
on_internal_error(slogger, "no way to express 'SELECT a AND b' in the grammar yet");
|
||||
|
||||
@@ -19,7 +19,7 @@
|
||||
#include "locator/abstract_replication_strategy.hh"
|
||||
#include "mutation/canonical_mutation.hh"
|
||||
#include "prepared_statement.hh"
|
||||
#include <seastar/coroutine/exception.hh>
|
||||
#include "seastar/coroutine/exception.hh"
|
||||
#include "service/migration_manager.hh"
|
||||
#include "service/storage_proxy.hh"
|
||||
#include "service/topology_mutation.hh"
|
||||
@@ -206,9 +206,8 @@ cql3::statements::alter_keyspace_statement::prepare_schema_mutations(query_proce
|
||||
locator::replication_strategy_params(ks_md_update->strategy_options(), ks_md_update->initial_tablets(), ks_md_update->consistency_option()),
|
||||
topo);
|
||||
|
||||
// If RF-rack-validity must be enforced for the keyspace according to `enforce_rf_rack_validity_for_keyspace`,
|
||||
// it's forbidden to perform a schema change that would lead to an RF-rack-invalid keyspace.
|
||||
// Verify that this change does not.
|
||||
// If `rf_rack_valid_keyspaces` is enabled, it's forbidden to perform a schema change that
|
||||
// would lead to an RF-rack-valid keyspace. Verify that this change does not.
|
||||
// For more context, see: scylladb/scylladb#23071.
|
||||
try {
|
||||
// There are two things to note here:
|
||||
@@ -225,13 +224,14 @@ cql3::statements::alter_keyspace_statement::prepare_schema_mutations(query_proce
|
||||
// The second hyphen is not really true because currently topological changes can
|
||||
// disturb it (see scylladb/scylladb#23345), but we ignore that.
|
||||
locator::assert_rf_rack_valid_keyspace(_name, tmptr, *rs);
|
||||
} catch (const std::invalid_argument& e) {
|
||||
if (replica::database::enforce_rf_rack_validity_for_keyspace(qp.db().get_config(), *ks_md)) {
|
||||
// wrap the exception manually here in a type that can be passed to the user.
|
||||
} catch (const std::exception& e) {
|
||||
if (qp.db().get_config().rf_rack_valid_keyspaces()) {
|
||||
// There's no guarantee what the type of the exception will be, so we need to
|
||||
// wrap it manually here in a type that can be passed to the user.
|
||||
throw exceptions::invalid_request_exception(e.what());
|
||||
} else {
|
||||
// Even when RF-rack-validity is not enforced for the keyspace, we'd
|
||||
// like to inform the user that the keyspace they're altering will not
|
||||
// Even when the configuration option `rf_rack_valid_keyspaces` is set to false,
|
||||
// we'd like to inform the user that the keyspace they're altering will not
|
||||
// satisfy the restriction after the change--but just as a warning.
|
||||
// For more context, see issue: scylladb/scylladb#23330.
|
||||
warnings.push_back(seastar::format(
|
||||
|
||||
@@ -11,7 +11,6 @@
|
||||
#include <seastar/core/coroutine.hh>
|
||||
#include "cql3/statements/alter_view_statement.hh"
|
||||
#include "cql3/statements/prepared_statement.hh"
|
||||
#include "cql3/statements/view_prop_defs.hh"
|
||||
#include "service/migration_manager.hh"
|
||||
#include "service/storage_proxy.hh"
|
||||
#include "validation.hh"
|
||||
@@ -23,7 +22,7 @@ namespace cql3 {
|
||||
|
||||
namespace statements {
|
||||
|
||||
alter_view_statement::alter_view_statement(cf_name view_name, std::optional<view_prop_defs> properties)
|
||||
alter_view_statement::alter_view_statement(cf_name view_name, std::optional<cf_prop_defs> properties)
|
||||
: schema_altering_statement{std::move(view_name)}
|
||||
, _properties{std::move(properties)}
|
||||
{
|
||||
@@ -53,8 +52,8 @@ view_ptr alter_view_statement::prepare_view(data_dictionary::database db) const
|
||||
throw exceptions::invalid_request_exception("ALTER MATERIALIZED VIEW WITH invoked, but no parameters found");
|
||||
}
|
||||
|
||||
auto schema_extensions = _properties->properties()->make_schema_extensions(db.extensions());
|
||||
_properties->validate_raw(view_prop_defs::op_type::alter, db, keyspace(), schema_extensions);
|
||||
auto schema_extensions = _properties->make_schema_extensions(db.extensions());
|
||||
_properties->validate(db, keyspace(), schema_extensions);
|
||||
|
||||
bool is_colocated = [&] {
|
||||
if (!db.find_keyspace(keyspace()).get_replication_strategy().uses_tablets()) {
|
||||
@@ -71,15 +70,28 @@ view_ptr alter_view_statement::prepare_view(data_dictionary::database db) const
|
||||
}();
|
||||
|
||||
if (is_colocated) {
|
||||
auto gc_opts = _properties->properties()->get_tombstone_gc_options(schema_extensions);
|
||||
auto gc_opts = _properties->get_tombstone_gc_options(schema_extensions);
|
||||
if (gc_opts && gc_opts->mode() == tombstone_gc_mode::repair) {
|
||||
throw exceptions::invalid_request_exception("The 'repair' mode for tombstone_gc is not allowed on co-located materialized view tables.");
|
||||
}
|
||||
}
|
||||
|
||||
auto builder = schema_builder(schema);
|
||||
_properties->apply_to_builder(view_prop_defs::op_type::alter, builder, std::move(schema_extensions),
|
||||
db, keyspace(), is_colocated);
|
||||
_properties->apply_to_builder(builder, std::move(schema_extensions), db, keyspace(), !is_colocated);
|
||||
|
||||
if (builder.get_gc_grace_seconds() == 0) {
|
||||
throw exceptions::invalid_request_exception(
|
||||
"Cannot alter gc_grace_seconds of a materialized view to 0, since this "
|
||||
"value is used to TTL undelivered updates. Setting gc_grace_seconds too "
|
||||
"low might cause undelivered updates to expire before being replayed.");
|
||||
}
|
||||
|
||||
if (builder.default_time_to_live().count() > 0) {
|
||||
throw exceptions::invalid_request_exception(
|
||||
"Cannot set or alter default_time_to_live for a materialized view. "
|
||||
"Data in a materialized view always expire at the same time than "
|
||||
"the corresponding data in the parent table.");
|
||||
}
|
||||
|
||||
return view_ptr(builder.build());
|
||||
}
|
||||
|
||||
@@ -12,8 +12,8 @@
|
||||
|
||||
#include <seastar/core/shared_ptr.hh>
|
||||
|
||||
#include "cql3/statements/view_prop_defs.hh"
|
||||
#include "data_dictionary/data_dictionary.hh"
|
||||
#include "cql3/statements/cf_prop_defs.hh"
|
||||
#include "cql3/statements/schema_altering_statement.hh"
|
||||
|
||||
namespace cql3 {
|
||||
@@ -26,10 +26,10 @@ namespace statements {
|
||||
/** An <code>ALTER MATERIALIZED VIEW</code> parsed from a CQL query statement. */
|
||||
class alter_view_statement : public schema_altering_statement {
|
||||
private:
|
||||
std::optional<view_prop_defs> _properties;
|
||||
std::optional<cf_prop_defs> _properties;
|
||||
view_ptr prepare_view(data_dictionary::database db) const;
|
||||
public:
|
||||
alter_view_statement(cf_name view_name, std::optional<view_prop_defs> properties);
|
||||
alter_view_statement(cf_name view_name, std::optional<cf_prop_defs> properties);
|
||||
|
||||
virtual future<> check_access(query_processor& qp, const service::client_state& state) const override;
|
||||
|
||||
|
||||
@@ -19,8 +19,7 @@ namespace statements {
|
||||
/**
|
||||
* Class for common statement properties.
|
||||
*/
|
||||
class cf_properties {
|
||||
protected:
|
||||
class cf_properties final {
|
||||
const ::shared_ptr<cf_prop_defs> _properties = ::make_shared<cf_prop_defs>();
|
||||
bool _use_compact_storage = false;
|
||||
std::vector<std::pair<::shared_ptr<column_identifier>, bool>> _defined_ordering; // Insertion ordering is important
|
||||
|
||||
@@ -14,7 +14,6 @@
|
||||
#include "db/view/view.hh"
|
||||
#include "exceptions/exceptions.hh"
|
||||
#include "index/vector_index.hh"
|
||||
#include "locator/token_metadata_fwd.hh"
|
||||
#include "prepared_statement.hh"
|
||||
#include "replica/database.hh"
|
||||
#include "types/types.hh"
|
||||
@@ -219,24 +218,18 @@ view_ptr create_index_statement::create_view_for_index(const schema_ptr schema,
|
||||
std::map<sstring, sstring> tags_map = {{db::SYNCHRONOUS_VIEW_UPDATES_TAG_KEY, "true"}};
|
||||
builder.add_extension(db::tags_extension::NAME, ::make_shared<db::tags_extension>(tags_map));
|
||||
}
|
||||
|
||||
const schema::extensions_map exts = _view_properties.properties()->make_schema_extensions(db.extensions());
|
||||
_view_properties.apply_to_builder(view_prop_defs::op_type::create, builder, exts, db, keyspace(), is_colocated);
|
||||
|
||||
return view_ptr{builder.build()};
|
||||
}
|
||||
|
||||
create_index_statement::create_index_statement(cf_name name,
|
||||
::shared_ptr<index_name> index_name,
|
||||
std::vector<::shared_ptr<index_target::raw>> raw_targets,
|
||||
::shared_ptr<index_specific_prop_defs> idx_properties,
|
||||
view_prop_defs view_properties,
|
||||
::shared_ptr<index_prop_defs> properties,
|
||||
bool if_not_exists)
|
||||
: schema_altering_statement(name)
|
||||
, _index_name(index_name->get_idx())
|
||||
, _raw_targets(raw_targets)
|
||||
, _idx_properties(std::move(idx_properties))
|
||||
, _view_properties(std::move(view_properties))
|
||||
, _properties(properties)
|
||||
, _if_not_exists(if_not_exists)
|
||||
{
|
||||
}
|
||||
@@ -259,53 +252,14 @@ static sstring target_type_name(index_target::target_type type) {
|
||||
void
|
||||
create_index_statement::validate(query_processor& qp, const service::client_state& state) const
|
||||
{
|
||||
if (_raw_targets.empty() && !_idx_properties->is_custom) {
|
||||
if (_raw_targets.empty() && !_properties->is_custom) {
|
||||
throw exceptions::invalid_request_exception("Only CUSTOM indexes can be created without specifying a target column");
|
||||
}
|
||||
|
||||
_idx_properties->validate();
|
||||
|
||||
// FIXME: This is ugly and can be improved.
|
||||
const bool is_vector_index = _idx_properties->custom_class && *_idx_properties->custom_class == "vector_index";
|
||||
const bool uses_view_properties = _view_properties.properties()->count() > 0
|
||||
|| _view_properties.use_compact_storage()
|
||||
|| _view_properties.defined_ordering().size() > 0;
|
||||
|
||||
if (is_vector_index && uses_view_properties) {
|
||||
throw exceptions::invalid_request_exception("You cannot use view properties with a vector index");
|
||||
}
|
||||
|
||||
const schema::extensions_map exts = _view_properties.properties()->make_schema_extensions(qp.db().extensions());
|
||||
_view_properties.validate_raw(view_prop_defs::op_type::create, qp.db(), keyspace(), exts);
|
||||
|
||||
// These keywords are still accepted by other schema entities, but they don't have effect on them.
|
||||
// Since indexes are not bound by any backward compatibility contract in this regard, let's forbid these.
|
||||
static sstring obsolete_keywords[] = {
|
||||
"index_interval",
|
||||
"replicate_on_write",
|
||||
"populate_io_cache_on_flush",
|
||||
"read_repair_chance",
|
||||
"dclocal_read_repair_chance",
|
||||
};
|
||||
|
||||
for (const sstring& keyword : obsolete_keywords) {
|
||||
if (_view_properties.properties()->has_property(keyword)) {
|
||||
// We use the same type of exception and the same error message as would be thrown for
|
||||
// an invalid property via `_view_properties.validate_raw`.
|
||||
throw exceptions::syntax_exception(seastar::format("Unknown property '{}'", keyword));
|
||||
}
|
||||
}
|
||||
|
||||
// FIXME: This is a temporary limitation as it might deserve more attention.
|
||||
if (!_view_properties.defined_ordering().empty()) {
|
||||
throw exceptions::invalid_request_exception("Indexes do not allow for specifying the clustering order");
|
||||
}
|
||||
_properties->validate();
|
||||
}
|
||||
|
||||
std::pair<std::vector<::shared_ptr<index_target>>, cql3::cql_warnings_vec>
|
||||
create_index_statement::validate_while_executing(data_dictionary::database db, locator::token_metadata_ptr tmptr) const {
|
||||
cql3::cql_warnings_vec warnings;
|
||||
|
||||
std::vector<::shared_ptr<index_target>> create_index_statement::validate_while_executing(data_dictionary::database db) const {
|
||||
auto schema = validation::validate_column_family(db, keyspace(), column_family());
|
||||
|
||||
if (schema->is_counter()) {
|
||||
@@ -327,22 +281,13 @@ create_index_statement::validate_while_executing(data_dictionary::database db, l
|
||||
|
||||
// Regular secondary indexes require rf-rack-validity.
|
||||
// Custom indexes need to validate this property themselves, if they need it.
|
||||
if (!_idx_properties || !_idx_properties->custom_class) {
|
||||
if (!_properties || !_properties->custom_class) {
|
||||
try {
|
||||
db::view::validate_view_keyspace(db, keyspace(), tmptr);
|
||||
db::view::validate_view_keyspace(db, keyspace());
|
||||
} catch (const std::exception& e) {
|
||||
// The type of the thrown exception is not specified, so we need to wrap it here.
|
||||
throw exceptions::invalid_request_exception(e.what());
|
||||
}
|
||||
|
||||
if (db.find_keyspace(keyspace()).uses_tablets()) {
|
||||
warnings.emplace_back(
|
||||
"Creating an index in a keyspace that uses tablets requires "
|
||||
"the keyspace to remain RF-rack-valid while the index exists. "
|
||||
"Some operations will be restricted to enforce this: altering the keyspace's replication "
|
||||
"factor, adding a node in a new rack, and removing or decommissioning a node that would "
|
||||
"eliminate a rack.");
|
||||
}
|
||||
}
|
||||
|
||||
validate_for_local_index(*schema);
|
||||
@@ -352,14 +297,14 @@ create_index_statement::validate_while_executing(data_dictionary::database db, l
|
||||
targets.emplace_back(raw_target->prepare(*schema));
|
||||
}
|
||||
|
||||
if (_idx_properties && _idx_properties->custom_class) {
|
||||
auto custom_index_factory = secondary_index::secondary_index_manager::get_custom_class_factory(*_idx_properties->custom_class);
|
||||
if (_properties && _properties->custom_class) {
|
||||
auto custom_index_factory = secondary_index::secondary_index_manager::get_custom_class_factory(*_properties->custom_class);
|
||||
if (!custom_index_factory) {
|
||||
throw exceptions::invalid_request_exception(format("Non-supported custom class \'{}\' provided", *_idx_properties->custom_class));
|
||||
throw exceptions::invalid_request_exception(format("Non-supported custom class \'{}\' provided", *(_properties->custom_class)));
|
||||
}
|
||||
auto custom_index = (*custom_index_factory)();
|
||||
custom_index->validate(*schema, *_idx_properties, targets, db.features(), db);
|
||||
_idx_properties->index_version = custom_index->index_version(*schema);
|
||||
custom_index->validate(*schema, *_properties, targets, db.features(), db);
|
||||
_properties->index_version = custom_index->index_version(*schema);
|
||||
}
|
||||
|
||||
if (targets.size() > 1) {
|
||||
@@ -439,7 +384,7 @@ create_index_statement::validate_while_executing(data_dictionary::database db, l
|
||||
}
|
||||
}
|
||||
|
||||
return std::make_pair(std::move(targets), std::move(warnings));
|
||||
return targets;
|
||||
}
|
||||
|
||||
void create_index_statement::validate_for_local_index(const schema& schema) const {
|
||||
@@ -578,7 +523,7 @@ void create_index_statement::validate_target_column_is_map_if_index_involves_key
|
||||
|
||||
void create_index_statement::validate_targets_for_multi_column_index(std::vector<::shared_ptr<index_target>> targets) const
|
||||
{
|
||||
if (!_idx_properties->is_custom) {
|
||||
if (!_properties->is_custom) {
|
||||
if (targets.size() > 2 || (targets.size() == 2 && std::holds_alternative<index_target::single_column>(targets.front()->value))) {
|
||||
throw exceptions::invalid_request_exception("Only CUSTOM indexes support multiple columns");
|
||||
}
|
||||
@@ -592,9 +537,8 @@ void create_index_statement::validate_targets_for_multi_column_index(std::vector
|
||||
}
|
||||
}
|
||||
|
||||
std::pair<std::optional<create_index_statement::base_schema_with_new_index>, cql3::cql_warnings_vec>
|
||||
create_index_statement::build_index_schema(data_dictionary::database db, locator::token_metadata_ptr tmptr) const {
|
||||
auto [targets, warnings] = validate_while_executing(db, tmptr);
|
||||
std::optional<create_index_statement::base_schema_with_new_index> create_index_statement::build_index_schema(data_dictionary::database db) const {
|
||||
auto targets = validate_while_executing(db);
|
||||
|
||||
auto schema = db.find_schema(keyspace(), column_family());
|
||||
|
||||
@@ -610,8 +554,8 @@ create_index_statement::build_index_schema(data_dictionary::database db, locator
|
||||
}
|
||||
index_metadata_kind kind;
|
||||
index_options_map index_options;
|
||||
if (_idx_properties->custom_class) {
|
||||
index_options = _idx_properties->get_options();
|
||||
if (_properties->custom_class) {
|
||||
index_options = _properties->get_options();
|
||||
kind = index_metadata_kind::custom;
|
||||
} else {
|
||||
kind = schema->is_compound() ? index_metadata_kind::composites : index_metadata_kind::keys;
|
||||
@@ -620,17 +564,17 @@ create_index_statement::build_index_schema(data_dictionary::database db, locator
|
||||
auto existing_index = schema->find_index_noname(index);
|
||||
if (existing_index) {
|
||||
if (_if_not_exists) {
|
||||
return std::make_pair(std::nullopt, std::move(warnings));
|
||||
return {};
|
||||
} else {
|
||||
throw exceptions::invalid_request_exception(
|
||||
format("Index {} is a duplicate of existing index {}", index.name(), existing_index.value().name()));
|
||||
}
|
||||
}
|
||||
bool existing_vector_index = _idx_properties->custom_class && _idx_properties->custom_class == "vector_index" && secondary_index::vector_index::has_vector_index_on_column(*schema, targets[0]->column_name());
|
||||
bool custom_index_with_same_name = _idx_properties->custom_class && db.existing_index_names(keyspace()).contains(_index_name);
|
||||
bool existing_vector_index = _properties->custom_class && _properties->custom_class == "vector_index" && secondary_index::vector_index::has_vector_index_on_column(*schema, targets[0]->column_name());
|
||||
bool custom_index_with_same_name = _properties->custom_class && db.existing_index_names(keyspace()).contains(_index_name);
|
||||
if (existing_vector_index || custom_index_with_same_name) {
|
||||
if (_if_not_exists) {
|
||||
return std::make_pair(std::nullopt, std::move(warnings));
|
||||
return {};
|
||||
} else {
|
||||
throw exceptions::invalid_request_exception("There exists a duplicate custom index");
|
||||
}
|
||||
@@ -646,13 +590,13 @@ create_index_statement::build_index_schema(data_dictionary::database db, locator
|
||||
schema_builder builder{schema};
|
||||
builder.with_index(index);
|
||||
|
||||
return std::make_pair(base_schema_with_new_index{builder.build(), index}, std::move(warnings));
|
||||
return base_schema_with_new_index{builder.build(), index};
|
||||
}
|
||||
|
||||
future<std::tuple<::shared_ptr<cql_transport::event::schema_change>, utils::chunked_vector<mutation>, cql3::cql_warnings_vec>>
|
||||
create_index_statement::prepare_schema_mutations(query_processor& qp, const query_options&, api::timestamp_type ts) const {
|
||||
using namespace cql_transport;
|
||||
auto [res, warnings] = build_index_schema(qp.db(), qp.proxy().get_token_metadata_ptr());
|
||||
auto res = build_index_schema(qp.db());
|
||||
|
||||
::shared_ptr<event::schema_change> ret;
|
||||
utils::chunked_vector<mutation> muts;
|
||||
@@ -682,7 +626,7 @@ create_index_statement::prepare_schema_mutations(query_processor& qp, const quer
|
||||
column_family());
|
||||
}
|
||||
|
||||
co_return std::make_tuple(std::move(ret), std::move(muts), std::move(warnings));
|
||||
co_return std::make_tuple(std::move(ret), std::move(muts), std::vector<sstring>());
|
||||
}
|
||||
|
||||
std::unique_ptr<cql3::statements::prepared_statement>
|
||||
|
||||
@@ -10,8 +10,6 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "cql3/statements/index_prop_defs.hh"
|
||||
#include "cql3/statements/view_prop_defs.hh"
|
||||
#include "schema_altering_statement.hh"
|
||||
#include "index_target.hh"
|
||||
|
||||
@@ -29,25 +27,20 @@ class index_name;
|
||||
|
||||
namespace statements {
|
||||
|
||||
class index_specific_prop_defs;
|
||||
class index_prop_defs;
|
||||
|
||||
/** A <code>CREATE INDEX</code> statement parsed from a CQL query. */
|
||||
class create_index_statement : public schema_altering_statement {
|
||||
const sstring _index_name;
|
||||
const std::vector<::shared_ptr<index_target::raw>> _raw_targets;
|
||||
|
||||
// Options specific to this index.
|
||||
const ::shared_ptr<index_specific_prop_defs> _idx_properties;
|
||||
// Options corresponding to the underlying materialized view.
|
||||
const view_prop_defs _view_properties;
|
||||
|
||||
const ::shared_ptr<index_prop_defs> _properties;
|
||||
const bool _if_not_exists;
|
||||
cql_stats* _cql_stats = nullptr;
|
||||
|
||||
public:
|
||||
create_index_statement(cf_name name, ::shared_ptr<index_name> index_name,
|
||||
std::vector<::shared_ptr<index_target::raw>> raw_targets,
|
||||
::shared_ptr<index_specific_prop_defs> idx_properties, view_prop_defs view_properties, bool if_not_exists);
|
||||
::shared_ptr<index_prop_defs> properties, bool if_not_exists);
|
||||
|
||||
future<> check_access(query_processor& qp, const service::client_state& state) const override;
|
||||
void validate(query_processor&, const service::client_state& state) const override;
|
||||
@@ -60,7 +53,7 @@ public:
|
||||
schema_ptr schema;
|
||||
index_metadata index;
|
||||
};
|
||||
std::pair<std::optional<base_schema_with_new_index>, cql3::cql_warnings_vec> build_index_schema(data_dictionary::database db, locator::token_metadata_ptr tmptr) const;
|
||||
std::optional<base_schema_with_new_index> build_index_schema(data_dictionary::database db) const;
|
||||
view_ptr create_view_for_index(const schema_ptr, const index_metadata& im, const data_dictionary::database&) const;
|
||||
private:
|
||||
void validate_for_local_index(const schema& schema) const;
|
||||
@@ -76,7 +69,7 @@ private:
|
||||
const sstring& name,
|
||||
index_metadata_kind kind,
|
||||
const index_options_map& options);
|
||||
std::pair<std::vector<::shared_ptr<index_target>>, cql3::cql_warnings_vec> validate_while_executing(data_dictionary::database db, locator::token_metadata_ptr tmptr) const;
|
||||
std::vector<::shared_ptr<index_target>> validate_while_executing(data_dictionary::database db) const;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
@@ -116,20 +116,21 @@ future<std::tuple<::shared_ptr<cql_transport::event::schema_change>, utils::chun
|
||||
warnings.push_back("Keyspace `initial` tablets option is deprecated. Use per-table tablet options instead.");
|
||||
}
|
||||
|
||||
// If RF-rack-validity must be enforced for the keyspace according to `enforce_rf_rack_validity_for_keyspace`,
|
||||
// it's forbidden to create an RF-rack-invalid keyspace. Verify that it's RF-rack-valid.
|
||||
// If `rf_rack_valid_keyspaces` is enabled, it's forbidden to create an RF-rack-invalid keyspace.
|
||||
// Verify that it's RF-rack-valid.
|
||||
// For more context, see: scylladb/scylladb#23071.
|
||||
try {
|
||||
// We hold a group0_guard, so it's correct to check this here.
|
||||
// The topology or schema cannot change while we're performing this query.
|
||||
locator::assert_rf_rack_valid_keyspace(_name, tmptr, *rs);
|
||||
} catch (const std::invalid_argument& e) {
|
||||
if (replica::database::enforce_rf_rack_validity_for_keyspace(cfg, *ksm)) {
|
||||
// wrap the exception in a type that can be passed to the user.
|
||||
} catch (const std::exception& e) {
|
||||
if (cfg.rf_rack_valid_keyspaces()) {
|
||||
// There's no guarantee what the type of the exception will be, so we need to
|
||||
// wrap it manually here in a type that can be passed to the user.
|
||||
throw exceptions::invalid_request_exception(e.what());
|
||||
} else {
|
||||
// Even when RF-rack-validity is not enforced for the keyspace, we'd
|
||||
// like to inform the user that the keyspace they're creating does not
|
||||
// Even when the configuration option `rf_rack_valid_keyspaces` is set to false,
|
||||
// we'd like to inform the user that the keyspace they're creating does not
|
||||
// satisfy the restriction--but just as a warning.
|
||||
// For more context, see issue: scylladb/scylladb#23330.
|
||||
warnings.push_back(seastar::format(
|
||||
|
||||
@@ -31,6 +31,8 @@
|
||||
#include "db/config.hh"
|
||||
#include "compaction/time_window_compaction_strategy.hh"
|
||||
|
||||
bool is_internal_keyspace(std::string_view name);
|
||||
|
||||
namespace cql3 {
|
||||
|
||||
namespace statements {
|
||||
@@ -122,6 +124,10 @@ void create_table_statement::apply_properties_to(schema_builder& builder, const
|
||||
addColumnMetadataFromAliases(cfmd, Collections.singletonList(valueAlias), defaultValidator, ColumnDefinition.Kind.COMPACT_VALUE);
|
||||
#endif
|
||||
|
||||
if (!_properties->get_compression_options() && !is_internal_keyspace(keyspace())) {
|
||||
builder.set_compressor_params(db.get_config().sstable_compression_user_table_options());
|
||||
}
|
||||
|
||||
_properties->apply_to_builder(builder, _properties->make_schema_extensions(db.extensions()), db, keyspace(), true);
|
||||
}
|
||||
|
||||
|
||||
@@ -8,7 +8,6 @@
|
||||
* SPDX-License-Identifier: (LicenseRef-ScyllaDB-Source-Available-1.0 and Apache-2.0)
|
||||
*/
|
||||
|
||||
#include "cql3/statements/view_prop_defs.hh"
|
||||
#include "exceptions/exceptions.hh"
|
||||
#include "utils/assert.hh"
|
||||
#include <unordered_set>
|
||||
@@ -106,7 +105,7 @@ static bool validate_primary_key(
|
||||
return new_non_pk_column;
|
||||
}
|
||||
|
||||
std::pair<view_ptr, cql3::cql_warnings_vec> create_view_statement::prepare_view(data_dictionary::database db, locator::token_metadata_ptr tmptr) const {
|
||||
std::pair<view_ptr, cql3::cql_warnings_vec> create_view_statement::prepare_view(data_dictionary::database db) const {
|
||||
// We need to make sure that:
|
||||
// - materialized view name is valid
|
||||
// - primary key includes all columns in base table's primary key
|
||||
@@ -120,7 +119,15 @@ std::pair<view_ptr, cql3::cql_warnings_vec> create_view_statement::prepare_view(
|
||||
cql3::cql_warnings_vec warnings;
|
||||
|
||||
auto schema_extensions = _properties.properties()->make_schema_extensions(db.extensions());
|
||||
_properties.validate_raw(view_prop_defs::op_type::create, db, keyspace(), schema_extensions);
|
||||
_properties.validate(db, keyspace(), schema_extensions);
|
||||
|
||||
if (_properties.use_compact_storage()) {
|
||||
throw exceptions::invalid_request_exception(format("Cannot use 'COMPACT STORAGE' when defining a materialized view"));
|
||||
}
|
||||
|
||||
if (_properties.properties()->get_cdc_options(schema_extensions)) {
|
||||
throw exceptions::invalid_request_exception("Cannot enable CDC for a materialized view");
|
||||
}
|
||||
|
||||
// View and base tables must be in the same keyspace, to ensure that RF
|
||||
// is the same (because we assign a view replica to each base replica).
|
||||
@@ -146,21 +153,12 @@ std::pair<view_ptr, cql3::cql_warnings_vec> create_view_statement::prepare_view(
|
||||
schema_ptr schema = validation::validate_column_family(db, _base_name.get_keyspace(), _base_name.get_column_family());
|
||||
|
||||
try {
|
||||
db::view::validate_view_keyspace(db, keyspace(), tmptr);
|
||||
db::view::validate_view_keyspace(db, keyspace());
|
||||
} catch (const std::exception& e) {
|
||||
// The type of the thrown exception is not specified, so we need to wrap it here.
|
||||
throw exceptions::invalid_request_exception(e.what());
|
||||
}
|
||||
|
||||
if (db.find_keyspace(keyspace()).uses_tablets()) {
|
||||
warnings.emplace_back(
|
||||
"Creating a materialized view in a keyspaces that uses tablets requires "
|
||||
"the keyspace to remain RF-rack-valid while the materialized view exists. "
|
||||
"Some operations will be restricted to enforce this: altering the keyspace's replication "
|
||||
"factor, adding a node in a new rack, and removing or decommissioning a node that would "
|
||||
"eliminate a rack.");
|
||||
}
|
||||
|
||||
if (schema->is_counter()) {
|
||||
throw exceptions::invalid_request_exception(format("Materialized views are not supported on counter tables"));
|
||||
}
|
||||
@@ -343,7 +341,16 @@ std::pair<view_ptr, cql3::cql_warnings_vec> create_view_statement::prepare_view(
|
||||
warnings.emplace_back(std::move(warning_text));
|
||||
}
|
||||
|
||||
schema_builder builder{keyspace(), column_family()};
|
||||
const auto maybe_id = _properties.properties()->get_id();
|
||||
if (maybe_id && db.try_find_table(*maybe_id)) {
|
||||
const auto schema_ptr = db.find_schema(*maybe_id);
|
||||
const auto& ks_name = schema_ptr->ks_name();
|
||||
const auto& cf_name = schema_ptr->cf_name();
|
||||
|
||||
throw exceptions::invalid_request_exception(seastar::format("Table with ID {} already exists: {}.{}", *maybe_id, ks_name, cf_name));
|
||||
}
|
||||
|
||||
schema_builder builder{keyspace(), column_family(), maybe_id};
|
||||
auto add_columns = [this, &builder] (std::vector<const column_definition*>& defs, column_kind kind) mutable {
|
||||
for (auto* def : defs) {
|
||||
auto&& type = _properties.get_reversable_type(*def->column_specification->name, def->type);
|
||||
@@ -389,8 +396,14 @@ std::pair<view_ptr, cql3::cql_warnings_vec> create_view_statement::prepare_view(
|
||||
}
|
||||
}
|
||||
|
||||
_properties.apply_to_builder(view_prop_defs::op_type::create, builder, std::move(schema_extensions),
|
||||
db, keyspace(), is_colocated);
|
||||
_properties.properties()->apply_to_builder(builder, std::move(schema_extensions), db, keyspace(), !is_colocated);
|
||||
|
||||
if (builder.default_time_to_live().count() > 0) {
|
||||
throw exceptions::invalid_request_exception(
|
||||
"Cannot set or alter default_time_to_live for a materialized view. "
|
||||
"Data in a materialized view always expire at the same time than "
|
||||
"the corresponding data in the parent table.");
|
||||
}
|
||||
|
||||
auto where_clause_text = util::relations_to_where_clause(_where_clause);
|
||||
builder.with_view_info(schema, included.empty(), std::move(where_clause_text));
|
||||
@@ -401,7 +414,7 @@ std::pair<view_ptr, cql3::cql_warnings_vec> create_view_statement::prepare_view(
|
||||
future<std::tuple<::shared_ptr<cql_transport::event::schema_change>, utils::chunked_vector<mutation>, cql3::cql_warnings_vec>>
|
||||
create_view_statement::prepare_schema_mutations(query_processor& qp, const query_options&, api::timestamp_type ts) const {
|
||||
utils::chunked_vector<mutation> m;
|
||||
auto [definition, warnings] = prepare_view(qp.db(), qp.proxy().get_token_metadata_ptr());
|
||||
auto [definition, warnings] = prepare_view(qp.db());
|
||||
try {
|
||||
m = co_await service::prepare_new_view_announcement(qp.proxy(), std::move(definition), ts);
|
||||
} catch (const exceptions::already_exists_exception& e) {
|
||||
|
||||
@@ -7,9 +7,9 @@
|
||||
#pragma once
|
||||
|
||||
#include "cql3/statements/schema_altering_statement.hh"
|
||||
#include "cql3/statements/cf_properties.hh"
|
||||
#include "cql3/cf_name.hh"
|
||||
#include "cql3/expr/expression.hh"
|
||||
#include "cql3/statements/view_prop_defs.hh"
|
||||
|
||||
#include <seastar/core/shared_ptr.hh>
|
||||
|
||||
@@ -35,7 +35,7 @@ private:
|
||||
expr::expression _where_clause;
|
||||
std::vector<::shared_ptr<cql3::column_identifier::raw>> _partition_keys;
|
||||
std::vector<::shared_ptr<cql3::column_identifier::raw>> _clustering_keys;
|
||||
view_prop_defs _properties;
|
||||
cf_properties _properties;
|
||||
bool _if_not_exists;
|
||||
|
||||
public:
|
||||
@@ -48,7 +48,7 @@ public:
|
||||
std::vector<::shared_ptr<cql3::column_identifier::raw>> clustering_keys,
|
||||
bool if_not_exists);
|
||||
|
||||
std::pair<view_ptr, cql3::cql_warnings_vec> prepare_view(data_dictionary::database db, locator::token_metadata_ptr tmptr) const;
|
||||
std::pair<view_ptr, cql3::cql_warnings_vec> prepare_view(data_dictionary::database db) const;
|
||||
|
||||
auto& properties() {
|
||||
return _properties;
|
||||
|
||||
@@ -710,12 +710,11 @@ std::vector<lw_shared_ptr<column_specification>> listing_describe_statement::get
|
||||
|
||||
future<std::vector<std::vector<managed_bytes_opt>>> listing_describe_statement::describe(cql3::query_processor& qp, const service::client_state& client_state) const {
|
||||
auto db = qp.db();
|
||||
auto raw_ks = client_state.get_raw_keyspace();
|
||||
|
||||
std::vector<sstring> keyspaces;
|
||||
// For most describe statements we should limit the results to the USEd
|
||||
// keyspace (client_state.get_raw_keyspace()), if any. However for DESC
|
||||
// KEYSPACES we must list all keyspaces, not just the USEd one.
|
||||
if (_element != element_type::keyspace && !client_state.get_raw_keyspace().empty()) {
|
||||
keyspaces.push_back(client_state.get_raw_keyspace());
|
||||
if (!raw_ks.empty()) {
|
||||
keyspaces.push_back(raw_ks);
|
||||
} else {
|
||||
keyspaces = db.get_all_keyspaces();
|
||||
std::ranges::sort(keyspaces);
|
||||
|
||||
@@ -11,7 +11,6 @@
|
||||
#include <set>
|
||||
#include <seastar/core/format.hh>
|
||||
#include "index_prop_defs.hh"
|
||||
#include "cql3/statements/view_prop_defs.hh"
|
||||
#include "index/secondary_index.hh"
|
||||
#include "exceptions/exceptions.hh"
|
||||
|
||||
@@ -22,9 +21,7 @@ static void check_system_option_specified(const index_options_map& options, cons
|
||||
}
|
||||
}
|
||||
|
||||
namespace cql3::statements {
|
||||
|
||||
void index_specific_prop_defs::validate() const {
|
||||
void cql3::statements::index_prop_defs::validate() const {
|
||||
static std::set<sstring> keywords({ sstring(KW_OPTIONS) });
|
||||
|
||||
property_definitions::validate(keywords);
|
||||
@@ -43,13 +40,13 @@ void index_specific_prop_defs::validate() const {
|
||||
}
|
||||
|
||||
index_options_map
|
||||
index_specific_prop_defs::get_raw_options() const {
|
||||
cql3::statements::index_prop_defs::get_raw_options() const {
|
||||
auto options = get_map(KW_OPTIONS);
|
||||
return !options ? std::unordered_map<sstring, sstring>() : std::unordered_map<sstring, sstring>(options->begin(), options->end());
|
||||
}
|
||||
|
||||
index_options_map
|
||||
index_specific_prop_defs::get_options() const {
|
||||
cql3::statements::index_prop_defs::get_options() const {
|
||||
auto options = get_raw_options();
|
||||
options.emplace(db::index::secondary_index::custom_class_option_name, *custom_class);
|
||||
if (index_version.has_value()) {
|
||||
@@ -57,25 +54,3 @@ index_specific_prop_defs::get_options() const {
|
||||
}
|
||||
return options;
|
||||
}
|
||||
|
||||
void index_prop_defs::extract_index_specific_properties_to(index_specific_prop_defs& target) {
|
||||
if (properties()->has_property(index_specific_prop_defs::KW_OPTIONS)) {
|
||||
auto value = properties()->extract_property(index_specific_prop_defs::KW_OPTIONS);
|
||||
|
||||
std::visit([&target] <typename T> (T&& val) {
|
||||
target.add_property(index_specific_prop_defs::KW_OPTIONS, std::forward<T>(val));
|
||||
}, std::move(value));
|
||||
}
|
||||
}
|
||||
|
||||
view_prop_defs index_prop_defs::into_view_prop_defs() && {
|
||||
if (properties()->has_property(index_specific_prop_defs::KW_OPTIONS)) {
|
||||
utils::on_internal_error(seastar::format(
|
||||
"Precondition has been violated. The property '{}' is still present", index_specific_prop_defs::KW_OPTIONS));
|
||||
}
|
||||
|
||||
view_prop_defs result = std::move(static_cast<view_prop_defs&>(*this));
|
||||
return result;
|
||||
}
|
||||
|
||||
} // namespace cql3::statements
|
||||
|
||||
@@ -10,7 +10,6 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "cql3/statements/view_prop_defs.hh"
|
||||
#include "property_definitions.hh"
|
||||
#include <seastar/core/sstring.hh>
|
||||
#include "schema/schema_fwd.hh"
|
||||
@@ -24,7 +23,7 @@ namespace cql3 {
|
||||
|
||||
namespace statements {
|
||||
|
||||
class index_specific_prop_defs : public property_definitions {
|
||||
class index_prop_defs : public property_definitions {
|
||||
public:
|
||||
static constexpr auto KW_OPTIONS = "options";
|
||||
|
||||
@@ -38,19 +37,6 @@ public:
|
||||
index_options_map get_options() const;
|
||||
};
|
||||
|
||||
struct index_prop_defs : public view_prop_defs {
|
||||
/// Extract all of the index-specific properties to `target`.
|
||||
///
|
||||
/// If there's a property at an index-specific key, and if `target` already has
|
||||
/// a value at that key, that value will be replaced.
|
||||
void extract_index_specific_properties_to(index_specific_prop_defs& target);
|
||||
|
||||
/// Turns this object into an object of type `view_prop_defs`, as if moved.
|
||||
///
|
||||
/// Precondition: the object MUST NOT contain any index-specific property.
|
||||
view_prop_defs into_view_prop_defs() &&;
|
||||
};
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -8,8 +8,8 @@
|
||||
* SPDX-License-Identifier: (LicenseRef-ScyllaDB-Source-Available-1.0 and Apache-2.0)
|
||||
*/
|
||||
|
||||
#include <seastar/core/format.hh>
|
||||
#include <seastar/core/sstring.hh>
|
||||
#include "seastar/core/format.hh"
|
||||
#include "seastar/core/sstring.hh"
|
||||
#include "utils/assert.hh"
|
||||
#include "cql3/statements/ks_prop_defs.hh"
|
||||
#include "cql3/statements/request_validations.hh"
|
||||
@@ -98,7 +98,6 @@ static locator::replication_strategy_config_options prepare_options(
|
||||
const sstring& strategy_class,
|
||||
const locator::token_metadata& tm,
|
||||
bool rf_rack_valid_keyspaces,
|
||||
bool enforce_rack_list,
|
||||
locator::replication_strategy_config_options options,
|
||||
const locator::replication_strategy_config_options& old_options,
|
||||
bool rack_list_enabled,
|
||||
@@ -108,7 +107,7 @@ static locator::replication_strategy_config_options prepare_options(
|
||||
auto is_nts = locator::abstract_replication_strategy::to_qualified_class_name(strategy_class) == "org.apache.cassandra.locator.NetworkTopologyStrategy";
|
||||
auto is_alter = !old_options.empty();
|
||||
const auto& all_dcs = tm.get_datacenter_racks_token_owners();
|
||||
auto auto_expand_racks = uses_tablets && rack_list_enabled && (rf_rack_valid_keyspaces || enforce_rack_list);
|
||||
auto auto_expand_racks = uses_tablets && rf_rack_valid_keyspaces && rack_list_enabled;
|
||||
|
||||
logger.debug("prepare_options: {}: is_nts={} auto_expand_racks={} rack_list_enabled={} old_options={} new_options={} all_dcs={}",
|
||||
strategy_class, is_nts, auto_expand_racks, rack_list_enabled, old_options, options, all_dcs);
|
||||
@@ -418,7 +417,7 @@ lw_shared_ptr<data_dictionary::keyspace_metadata> ks_prop_defs::as_ks_metadata(s
|
||||
auto initial_tablets = get_initial_tablets(default_initial_tablets, cfg.enforce_tablets());
|
||||
bool uses_tablets = initial_tablets.has_value();
|
||||
bool rack_list_enabled = utils::get_local_injector().enter("create_with_numeric") ? false : feat.rack_list_rf;
|
||||
auto options = prepare_options(sc, tm, cfg.rf_rack_valid_keyspaces(), cfg.enforce_rack_list(), get_replication_options(), {}, rack_list_enabled, uses_tablets);
|
||||
auto options = prepare_options(sc, tm, cfg.rf_rack_valid_keyspaces(), get_replication_options(), {}, rack_list_enabled, uses_tablets);
|
||||
return data_dictionary::keyspace_metadata::new_keyspace(ks_name, sc,
|
||||
std::move(options), initial_tablets, get_consistency_option(), get_boolean(KW_DURABLE_WRITES, true), get_storage_options());
|
||||
}
|
||||
@@ -435,7 +434,7 @@ lw_shared_ptr<data_dictionary::keyspace_metadata> ks_prop_defs::as_ks_metadata_u
|
||||
auto sc = get_replication_strategy_class();
|
||||
bool rack_list_enabled = utils::get_local_injector().enter("create_with_numeric") ? false : feat.rack_list_rf;
|
||||
if (sc) {
|
||||
options = prepare_options(*sc, tm, cfg.rf_rack_valid_keyspaces(), cfg.enforce_rack_list(), get_replication_options(), old_options, rack_list_enabled, uses_tablets);
|
||||
options = prepare_options(*sc, tm, cfg.rf_rack_valid_keyspaces(), get_replication_options(), old_options, rack_list_enabled, uses_tablets);
|
||||
} else {
|
||||
sc = old->strategy_name();
|
||||
options = old_options;
|
||||
|
||||
@@ -11,7 +11,7 @@
|
||||
#include "utils/assert.hh"
|
||||
#include "cql3/cql_statement.hh"
|
||||
#include "cql3/statements/modification_statement.hh"
|
||||
#include "cql3/statements/broadcast_modification_statement.hh"
|
||||
#include "cql3/statements/strongly_consistent_modification_statement.hh"
|
||||
#include "cql3/statements/raw/modification_statement.hh"
|
||||
#include "cql3/statements/prepared_statement.hh"
|
||||
#include "cql3/expr/expr-utils.hh"
|
||||
@@ -29,8 +29,6 @@
|
||||
#include "cql3/query_processor.hh"
|
||||
#include "service/storage_proxy.hh"
|
||||
#include "service/broadcast_tables/experimental/lang.hh"
|
||||
#include "cql3/statements/strong_consistency/modification_statement.hh"
|
||||
#include "cql3/statements/strong_consistency/statement_helpers.hh"
|
||||
|
||||
#include <boost/lexical_cast.hpp>
|
||||
|
||||
@@ -548,7 +546,7 @@ modification_statement::process_where_clause(data_dictionary::database db, expr:
|
||||
}
|
||||
}
|
||||
|
||||
::shared_ptr<broadcast_modification_statement>
|
||||
::shared_ptr<strongly_consistent_modification_statement>
|
||||
modification_statement::prepare_for_broadcast_tables() const {
|
||||
// FIXME: implement for every type of `modification_statement`.
|
||||
throw service::broadcast_tables::unsupported_operation_error{};
|
||||
@@ -556,27 +554,24 @@ modification_statement::prepare_for_broadcast_tables() const {
|
||||
|
||||
namespace raw {
|
||||
|
||||
::shared_ptr<cql_statement_opt_metadata>
|
||||
modification_statement::prepare_statement(data_dictionary::database db, prepare_context& ctx, cql_stats& stats) {
|
||||
::shared_ptr<cql3::statements::modification_statement> statement = prepare(db, ctx, stats);
|
||||
|
||||
if (service::broadcast_tables::is_broadcast_table_statement(keyspace(), column_family())) {
|
||||
return statement->prepare_for_broadcast_tables();
|
||||
} else {
|
||||
return statement;
|
||||
}
|
||||
}
|
||||
|
||||
std::unique_ptr<prepared_statement>
|
||||
modification_statement::prepare(data_dictionary::database db, cql_stats& stats) {
|
||||
schema_ptr schema = validation::validate_column_family(db, keyspace(), column_family());
|
||||
auto meta = get_prepare_context();
|
||||
|
||||
auto statement = std::invoke([&] -> shared_ptr<cql_statement> {
|
||||
auto result = prepare(db, meta, stats);
|
||||
|
||||
if (strong_consistency::is_strongly_consistent(db, schema->ks_name())) {
|
||||
return ::make_shared<strong_consistency::modification_statement>(std::move(result));
|
||||
}
|
||||
|
||||
if (service::broadcast_tables::is_broadcast_table_statement(keyspace(), column_family())) {
|
||||
return result->prepare_for_broadcast_tables();
|
||||
}
|
||||
return result;
|
||||
});
|
||||
|
||||
auto statement = prepare_statement(db, meta, stats);
|
||||
auto partition_key_bind_indices = meta.get_partition_key_bind_indexes(*schema);
|
||||
return std::make_unique<prepared_statement>(audit_info(), std::move(statement), meta,
|
||||
std::move(partition_key_bind_indices));
|
||||
return std::make_unique<prepared_statement>(audit_info(), std::move(statement), meta, std::move(partition_key_bind_indices));
|
||||
}
|
||||
|
||||
::shared_ptr<cql3::statements::modification_statement>
|
||||
|
||||
@@ -30,7 +30,7 @@ class operation;
|
||||
|
||||
namespace statements {
|
||||
|
||||
class broadcast_modification_statement;
|
||||
class strongly_consistent_modification_statement;
|
||||
|
||||
namespace raw { class modification_statement; }
|
||||
|
||||
@@ -113,15 +113,15 @@ public:
|
||||
|
||||
virtual void add_update_for_key(mutation& m, const query::clustering_range& range, const update_parameters& params, const json_cache_opt& json_cache) const = 0;
|
||||
|
||||
uint32_t get_bound_terms() const override;
|
||||
virtual uint32_t get_bound_terms() const override;
|
||||
|
||||
const sstring& keyspace() const;
|
||||
virtual const sstring& keyspace() const;
|
||||
|
||||
const sstring& column_family() const;
|
||||
virtual const sstring& column_family() const;
|
||||
|
||||
bool is_counter() const;
|
||||
virtual bool is_counter() const;
|
||||
|
||||
bool is_view() const;
|
||||
virtual bool is_view() const;
|
||||
|
||||
int64_t get_timestamp(int64_t now, const query_options& options) const;
|
||||
|
||||
@@ -129,12 +129,12 @@ public:
|
||||
|
||||
std::optional<gc_clock::duration> get_time_to_live(const query_options& options) const;
|
||||
|
||||
future<> check_access(query_processor& qp, const service::client_state& state) const override;
|
||||
virtual future<> check_access(query_processor& qp, const service::client_state& state) const override;
|
||||
|
||||
// Validate before execute, using client state and current schema
|
||||
void validate(query_processor&, const service::client_state& state) const override;
|
||||
|
||||
bool depends_on(std::string_view ks_name, std::optional<std::string_view> cf_name) const override;
|
||||
virtual bool depends_on(std::string_view ks_name, std::optional<std::string_view> cf_name) const override;
|
||||
|
||||
void add_operation(::shared_ptr<operation> op);
|
||||
|
||||
@@ -256,9 +256,7 @@ public:
|
||||
|
||||
virtual json_cache_opt maybe_prepare_json_cache(const query_options& options) const;
|
||||
|
||||
virtual ::shared_ptr<broadcast_modification_statement> prepare_for_broadcast_tables() const;
|
||||
|
||||
db::timeout_clock::duration get_timeout(const service::client_state& state, const query_options& options) const;
|
||||
virtual ::shared_ptr<strongly_consistent_modification_statement> prepare_for_broadcast_tables() const;
|
||||
|
||||
protected:
|
||||
/**
|
||||
@@ -266,7 +264,9 @@ protected:
|
||||
* processed to check that they are compatible.
|
||||
* @throws InvalidRequestException
|
||||
*/
|
||||
void validate_where_clause_for_conditions() const;
|
||||
virtual void validate_where_clause_for_conditions() const;
|
||||
|
||||
db::timeout_clock::duration get_timeout(const service::client_state& state, const query_options& options) const;
|
||||
|
||||
friend class raw::modification_statement;
|
||||
};
|
||||
|
||||
@@ -11,7 +11,6 @@
|
||||
#include <ranges>
|
||||
|
||||
#include <seastar/core/format.hh>
|
||||
#include <stdexcept>
|
||||
#include "cql3/statements/property_definitions.hh"
|
||||
#include "exceptions/exceptions.hh"
|
||||
#include "utils/overloaded_functor.hh"
|
||||
@@ -103,18 +102,6 @@ bool property_definitions::has_property(const sstring& name) const {
|
||||
return _properties.contains(name);
|
||||
}
|
||||
|
||||
property_definitions::value_type property_definitions::extract_property(const sstring& name) {
|
||||
auto it = _properties.find(name);
|
||||
|
||||
if (it == _properties.end()) {
|
||||
throw std::out_of_range{std::format("No property of name '{}'", std::string_view(name))};
|
||||
}
|
||||
|
||||
value_type result = std::move(it->second);
|
||||
_properties.erase(it);
|
||||
return result;
|
||||
}
|
||||
|
||||
std::optional<property_definitions::value_type> property_definitions::get(const sstring& name) const {
|
||||
if (auto it = _properties.find(name); it != _properties.end()) {
|
||||
return it->second;
|
||||
|
||||
@@ -59,8 +59,6 @@ protected:
|
||||
public:
|
||||
bool has_property(const sstring& name) const;
|
||||
|
||||
value_type extract_property(const sstring& name);
|
||||
|
||||
std::optional<value_type> get(const sstring& name) const;
|
||||
|
||||
std::optional<extended_map_type> get_extended_map(const sstring& name) const;
|
||||
|
||||
@@ -50,8 +50,8 @@ public:
|
||||
protected:
|
||||
virtual audit::statement_category category() const override;
|
||||
virtual audit::audit_info_ptr audit_info() const override {
|
||||
constexpr bool batch = true;
|
||||
return audit::audit::create_audit_info(category(), sstring(), sstring(), batch);
|
||||
// We don't audit batch statements. Instead we audit statements that are inside the batch.
|
||||
return audit::audit::create_no_audit_info();
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@@ -40,6 +40,7 @@ protected:
|
||||
|
||||
public:
|
||||
virtual std::unique_ptr<prepared_statement> prepare(data_dictionary::database db, cql_stats& stats) override;
|
||||
::shared_ptr<cql_statement_opt_metadata> prepare_statement(data_dictionary::database db, prepare_context& ctx, cql_stats& stats);
|
||||
::shared_ptr<cql3::statements::modification_statement> prepare(data_dictionary::database db, prepare_context& ctx, cql_stats& stats) const;
|
||||
void add_raw(sstring&& raw) { _raw_cql = std::move(raw); }
|
||||
const sstring& get_raw_cql() const { return _raw_cql; }
|
||||
|
||||
@@ -131,6 +131,8 @@ private:
|
||||
|
||||
void verify_ordering_is_valid(const prepared_orderings_type&, const schema&, const restrictions::statement_restrictions& restrictions) const;
|
||||
|
||||
prepared_ann_ordering_type prepare_ann_ordering(const schema& schema, prepare_context& ctx, data_dictionary::database db) const;
|
||||
|
||||
// Checks whether this ordering reverses all results.
|
||||
// We only allow leaving select results unchanged or reversing them.
|
||||
bool is_ordering_reversed(const prepared_orderings_type&) const;
|
||||
|
||||
@@ -8,8 +8,6 @@
|
||||
* SPDX-License-Identifier: (LicenseRef-ScyllaDB-Source-Available-1.0 and Apache-2.0)
|
||||
*/
|
||||
|
||||
#include "cql3/statements/strong_consistency/select_statement.hh"
|
||||
#include "cql3/statements/strong_consistency/statement_helpers.hh"
|
||||
#include "cql3/statements/select_statement.hh"
|
||||
#include "cql3/expr/expression.hh"
|
||||
#include "cql3/expr/evaluate.hh"
|
||||
@@ -18,7 +16,7 @@
|
||||
#include "cql3/statements/raw/select_statement.hh"
|
||||
#include "cql3/query_processor.hh"
|
||||
#include "cql3/statements/prune_materialized_view_statement.hh"
|
||||
#include "cql3/statements/broadcast_select_statement.hh"
|
||||
#include "cql3/statements/strongly_consistent_select_statement.hh"
|
||||
|
||||
#include "exceptions/exceptions.hh"
|
||||
#include <seastar/core/future.hh>
|
||||
@@ -27,14 +25,12 @@
|
||||
#include "service/broadcast_tables/experimental/lang.hh"
|
||||
#include "service/qos/qos_common.hh"
|
||||
#include "transport/messages/result_message.hh"
|
||||
#include "cql3/functions/functions.hh"
|
||||
#include "cql3/functions/as_json_function.hh"
|
||||
#include "cql3/selection/selection.hh"
|
||||
#include "cql3/util.hh"
|
||||
#include "cql3/restrictions/statement_restrictions.hh"
|
||||
#include "index/secondary_index.hh"
|
||||
#include "types/vector.hh"
|
||||
#include "vector_search/filter.hh"
|
||||
#include "validation.hh"
|
||||
#include "exceptions/unrecognized_entity_exception.hh"
|
||||
#include <optional>
|
||||
@@ -265,8 +261,7 @@ future<> select_statement::check_access(query_processor& qp, const service::clie
|
||||
auto& cf_name = s->is_view()
|
||||
? s->view_info()->base_name()
|
||||
: (cdc ? cdc->cf_name() : column_family());
|
||||
const schema_ptr& base_schema = cdc ? cdc : _schema;
|
||||
bool is_vector_indexed = secondary_index::vector_index::has_vector_index(*base_schema);
|
||||
bool is_vector_indexed = secondary_index::vector_index::has_vector_index(*_schema);
|
||||
co_await state.has_column_family_access(keyspace(), cf_name, auth::permission::SELECT, auth::command_desc::type::OTHER, is_vector_indexed);
|
||||
} catch (const data_dictionary::no_such_column_family& e) {
|
||||
// Will be validated afterwards.
|
||||
@@ -372,9 +367,8 @@ uint64_t select_statement::get_inner_loop_limit(uint64_t limit, bool is_aggregat
|
||||
}
|
||||
|
||||
bool select_statement::needs_post_query_ordering() const {
|
||||
// We need post-query ordering for queries with IN on the partition key and an ORDER BY
|
||||
// and ANN index queries with rescoring.
|
||||
return static_cast<bool>(_ordering_comparator);
|
||||
// We need post-query ordering only for queries with IN on the partition key and an ORDER BY.
|
||||
return _restrictions->key_is_in_relation() && !_parameters->orderings().empty();
|
||||
}
|
||||
|
||||
struct select_statement_executor {
|
||||
@@ -1963,46 +1957,14 @@ mutation_fragments_select_statement::do_execute(query_processor& qp, service::qu
|
||||
}));
|
||||
}
|
||||
|
||||
struct ann_ordering_info {
|
||||
secondary_index::index _index;
|
||||
raw::select_statement::prepared_ann_ordering_type _prepared_ann_ordering;
|
||||
bool is_rescoring_enabled;
|
||||
};
|
||||
|
||||
static std::optional<ann_ordering_info> get_ann_ordering_info(
|
||||
data_dictionary::database db,
|
||||
schema_ptr schema,
|
||||
lw_shared_ptr<const raw::select_statement::parameters> parameters,
|
||||
prepare_context& ctx) {
|
||||
|
||||
if (parameters->orderings().empty()) {
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
auto [column_id, ordering] = parameters->orderings().front();
|
||||
const auto& ann_vector = std::get_if<raw::select_statement::ann_vector>(&ordering);
|
||||
if (!ann_vector) {
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
::shared_ptr<column_identifier> column = column_id->prepare_column_identifier(*schema);
|
||||
const column_definition* def = schema->get_column_definition(column->name());
|
||||
if (!def) {
|
||||
throw exceptions::invalid_request_exception(
|
||||
fmt::format("Undefined column name {}", column->text()));
|
||||
}
|
||||
|
||||
if (!def->type->is_vector() || static_cast<const vector_type_impl*>(def->type.get())->get_elements_type()->get_kind() != abstract_type::kind::float_kind) {
|
||||
throw exceptions::invalid_request_exception("ANN ordering is only supported on float vector indexes");
|
||||
}
|
||||
|
||||
auto e = expr::prepare_expression(*ann_vector, db, schema->ks_name(), nullptr, def->column_specification);
|
||||
expr::fill_prepare_context(e, ctx);
|
||||
|
||||
raw::select_statement::prepared_ann_ordering_type prepared_ann_ordering = std::make_pair(std::move(def), std::move(e));
|
||||
|
||||
::shared_ptr<cql3::statements::select_statement> vector_indexed_table_select_statement::prepare(data_dictionary::database db, schema_ptr schema,
|
||||
uint32_t bound_terms, lw_shared_ptr<const parameters> parameters, ::shared_ptr<selection::selection> selection,
|
||||
::shared_ptr<restrictions::statement_restrictions> restrictions, ::shared_ptr<std::vector<size_t>> group_by_cell_indices, bool is_reversed,
|
||||
ordering_comparator_type ordering_comparator, prepared_ann_ordering_type prepared_ann_ordering, std::optional<expr::expression> limit,
|
||||
std::optional<expr::expression> per_partition_limit, cql_stats& stats, std::unique_ptr<attributes> attrs) {
|
||||
auto cf = db.find_column_family(schema);
|
||||
auto& sim = cf.get_index_manager();
|
||||
auto [index_opt, _] = restrictions->find_idx(sim);
|
||||
|
||||
auto indexes = sim.list_indexes();
|
||||
auto it = std::find_if(indexes.begin(), indexes.end(), [&prepared_ann_ordering](const auto& ind) {
|
||||
@@ -2014,90 +1976,29 @@ static std::optional<ann_ordering_info> get_ann_ordering_info(
|
||||
if (it == indexes.end()) {
|
||||
throw exceptions::invalid_request_exception("ANN ordering by vector requires the column to be indexed using 'vector_index'");
|
||||
}
|
||||
|
||||
return ann_ordering_info{
|
||||
*it,
|
||||
std::move(prepared_ann_ordering),
|
||||
secondary_index::vector_index::is_rescoring_enabled(it->metadata().options())
|
||||
};
|
||||
}
|
||||
|
||||
static uint32_t add_similarity_function_to_selectors(
|
||||
std::vector<selection::prepared_selector>& prepared_selectors,
|
||||
const ann_ordering_info& ann_ordering_info,
|
||||
data_dictionary::database db,
|
||||
schema_ptr schema) {
|
||||
auto similarity_function_name = secondary_index::vector_index::get_cql_similarity_function_name(ann_ordering_info._index.metadata().options());
|
||||
// Create the function name
|
||||
auto func_name = functions::function_name::native_function(sstring(similarity_function_name));
|
||||
|
||||
// Create the function arguments
|
||||
std::vector<expr::expression> args;
|
||||
args.push_back(expr::column_value(ann_ordering_info._prepared_ann_ordering.first));
|
||||
args.push_back(ann_ordering_info._prepared_ann_ordering.second);
|
||||
|
||||
// Get the function object
|
||||
std::vector<shared_ptr<assignment_testable>> provided_args;
|
||||
provided_args.push_back(expr::as_assignment_testable(args[0], expr::type_of(args[0])));
|
||||
provided_args.push_back(expr::as_assignment_testable(args[1], expr::type_of(args[1])));
|
||||
|
||||
auto func = cql3::functions::instance().get(db, schema->ks_name(), func_name, provided_args, schema->ks_name(), schema->cf_name(), nullptr);
|
||||
|
||||
// Create the function call expression
|
||||
expr::function_call similarity_func_call{
|
||||
.func = func,
|
||||
.args = std::move(args),
|
||||
};
|
||||
|
||||
// Add the similarity function as a prepared selector (last)
|
||||
prepared_selectors.push_back(selection::prepared_selector{
|
||||
.expr = std::move(similarity_func_call),
|
||||
.alias = nullptr,
|
||||
});
|
||||
return prepared_selectors.size() - 1;
|
||||
}
|
||||
|
||||
static select_statement::ordering_comparator_type get_similarity_ordering_comparator(std::vector<selection::prepared_selector>& prepared_selectors, uint32_t similarity_column_index) {
|
||||
auto type = expr::type_of(prepared_selectors[similarity_column_index].expr);
|
||||
if (type->get_kind() != abstract_type::kind::float_kind) {
|
||||
seastar::on_internal_error(logger, "Similarity function must return float type.");
|
||||
if (index_opt || parameters->allow_filtering() || !(restrictions->is_empty()) || check_needs_allow_filtering_anyway(*restrictions)) {
|
||||
throw exceptions::invalid_request_exception("ANN ordering by vector does not support filtering");
|
||||
}
|
||||
return [similarity_column_index, type] (const raw::select_statement::result_row_type& r1, const raw::select_statement::result_row_type& r2) {
|
||||
auto& c1 = r1[similarity_column_index];
|
||||
auto& c2 = r2[similarity_column_index];
|
||||
auto f1 = c1 ? value_cast<float>(type->deserialize(*c1)) : std::numeric_limits<float>::quiet_NaN();
|
||||
auto f2 = c2 ? value_cast<float>(type->deserialize(*c2)) : std::numeric_limits<float>::quiet_NaN();
|
||||
if (std::isfinite(f1) && std::isfinite(f2)) {
|
||||
return f1 > f2;
|
||||
}
|
||||
return std::isfinite(f1);
|
||||
};
|
||||
}
|
||||
index_opt = *it;
|
||||
|
||||
::shared_ptr<cql3::statements::select_statement> vector_indexed_table_select_statement::prepare(data_dictionary::database db, schema_ptr schema,
|
||||
uint32_t bound_terms, lw_shared_ptr<const parameters> parameters, ::shared_ptr<selection::selection> selection,
|
||||
::shared_ptr<restrictions::statement_restrictions> restrictions, ::shared_ptr<std::vector<size_t>> group_by_cell_indices, bool is_reversed,
|
||||
ordering_comparator_type ordering_comparator, prepared_ann_ordering_type prepared_ann_ordering, std::optional<expr::expression> limit,
|
||||
std::optional<expr::expression> per_partition_limit, cql_stats& stats, const secondary_index::index& index, std::unique_ptr<attributes> attrs) {
|
||||
|
||||
auto prepared_filter = vector_search::prepare_filter(*restrictions, parameters->allow_filtering());
|
||||
if (!index_opt) {
|
||||
throw std::runtime_error("No index found.");
|
||||
}
|
||||
|
||||
return ::make_shared<cql3::statements::vector_indexed_table_select_statement>(schema, bound_terms, parameters, std::move(selection), std::move(restrictions),
|
||||
std::move(group_by_cell_indices), is_reversed, std::move(ordering_comparator), std::move(prepared_ann_ordering), std::move(limit),
|
||||
std::move(per_partition_limit), stats, index, std::move(prepared_filter), std::move(attrs));
|
||||
std::move(per_partition_limit), stats, *index_opt, std::move(attrs));
|
||||
}
|
||||
|
||||
vector_indexed_table_select_statement::vector_indexed_table_select_statement(schema_ptr schema, uint32_t bound_terms, lw_shared_ptr<const parameters> parameters,
|
||||
::shared_ptr<selection::selection> selection, ::shared_ptr<const restrictions::statement_restrictions> restrictions,
|
||||
::shared_ptr<std::vector<size_t>> group_by_cell_indices, bool is_reversed, ordering_comparator_type ordering_comparator,
|
||||
prepared_ann_ordering_type prepared_ann_ordering, std::optional<expr::expression> limit,
|
||||
std::optional<expr::expression> per_partition_limit, cql_stats& stats, const secondary_index::index& index,
|
||||
vector_search::prepared_filter prepared_filter, std::unique_ptr<attributes> attrs)
|
||||
std::optional<expr::expression> per_partition_limit, cql_stats& stats, const secondary_index::index& index, std::unique_ptr<attributes> attrs)
|
||||
: select_statement{schema, bound_terms, parameters, selection, restrictions, group_by_cell_indices, is_reversed, ordering_comparator, limit,
|
||||
per_partition_limit, stats, std::move(attrs)}
|
||||
, _index{index}
|
||||
, _prepared_ann_ordering(std::move(prepared_ann_ordering))
|
||||
, _prepared_filter(std::move(prepared_filter)) {
|
||||
, _prepared_ann_ordering(std::move(prepared_ann_ordering)) {
|
||||
|
||||
if (!limit.has_value()) {
|
||||
throw exceptions::invalid_request_exception("Vector ANN queries must have a limit specified");
|
||||
@@ -2132,19 +2033,13 @@ future<shared_ptr<cql_transport::messages::result_message>> vector_indexed_table
|
||||
|
||||
auto timeout = db::timeout_clock::now() + get_timeout(state.get_client_state(), options);
|
||||
auto aoe = abort_on_expiry(timeout);
|
||||
auto filter_json = _prepared_filter.to_json(options);
|
||||
uint64_t fetch = static_cast<uint64_t>(std::ceil(limit * secondary_index::vector_index::get_oversampling(_index.metadata().options())));
|
||||
auto pkeys = co_await qp.vector_store_client().ann(
|
||||
_schema->ks_name(), _index.metadata().name(), _schema, get_ann_ordering_vector(options), fetch, filter_json, aoe.abort_source());
|
||||
_schema->ks_name(), _index.metadata().name(), _schema, get_ann_ordering_vector(options), limit, aoe.abort_source());
|
||||
if (!pkeys.has_value()) {
|
||||
co_await coroutine::return_exception(
|
||||
exceptions::invalid_request_exception(std::visit(vector_search::vector_store_client::ann_error_visitor{}, pkeys.error())));
|
||||
}
|
||||
|
||||
if (pkeys->size() > limit && !secondary_index::vector_index::is_rescoring_enabled(_index.metadata().options())) {
|
||||
pkeys->erase(pkeys->begin() + limit, pkeys->end());
|
||||
}
|
||||
|
||||
co_return co_await query_base_table(qp, state, options, pkeys.value(), timeout);
|
||||
});
|
||||
|
||||
@@ -2161,11 +2056,11 @@ void vector_indexed_table_select_statement::update_stats() const {
|
||||
}
|
||||
|
||||
lw_shared_ptr<query::read_command> vector_indexed_table_select_statement::prepare_command_for_base_query(
|
||||
query_processor& qp, service::query_state& state, const query_options& options, uint64_t fetch_limit) const {
|
||||
query_processor& qp, service::query_state& state, const query_options& options) const {
|
||||
auto slice = make_partition_slice(options);
|
||||
return ::make_lw_shared<query::read_command>(_schema->id(), _schema->version(), std::move(slice), qp.proxy().get_max_result_size(slice),
|
||||
query::tombstone_limit(qp.proxy().get_tombstone_limit()),
|
||||
query::row_limit(get_inner_loop_limit(fetch_limit, _selection->is_aggregate())), query::partition_limit(query::max_partitions),
|
||||
query::row_limit(get_inner_loop_limit(get_limit(options, _limit), _selection->is_aggregate())), query::partition_limit(query::max_partitions),
|
||||
_query_start_time_point, tracing::make_trace_info(state.get_trace_state()), query_id::create_null_id(), query::is_first_page::no,
|
||||
options.get_timestamp(state));
|
||||
}
|
||||
@@ -2183,7 +2078,7 @@ std::vector<float> vector_indexed_table_select_statement::get_ann_ordering_vecto
|
||||
future<::shared_ptr<cql_transport::messages::result_message>> vector_indexed_table_select_statement::query_base_table(query_processor& qp,
|
||||
service::query_state& state, const query_options& options, const std::vector<vector_search::primary_key>& pkeys,
|
||||
lowres_clock::time_point timeout) const {
|
||||
auto command = prepare_command_for_base_query(qp, state, options, pkeys.size());
|
||||
auto command = prepare_command_for_base_query(qp, state, options);
|
||||
|
||||
// For tables without clustering columns, we can optimize by querying
|
||||
// partition ranges instead of individual primary keys, since the
|
||||
@@ -2222,7 +2117,6 @@ future<::shared_ptr<cql_transport::messages::result_message>> vector_indexed_tab
|
||||
query::result_merger{command->get_row_limit(), query::max_partitions});
|
||||
|
||||
co_return co_await wrap_result_to_error_message([this, &command, &options](auto result) {
|
||||
command->set_row_limit(get_limit(options, _limit));
|
||||
return process_results(std::move(result), command, options, _query_start_time_point);
|
||||
})(std::move(result));
|
||||
}
|
||||
@@ -2236,7 +2130,6 @@ future<::shared_ptr<cql_transport::messages::result_message>> vector_indexed_tab
|
||||
{timeout, state.get_permit(), state.get_client_state(), state.get_trace_state(), {}, {}, options.get_specific_options().node_local_only},
|
||||
std::nullopt)
|
||||
.then(wrap_result_to_error_message([this, &options, command](service::storage_proxy::coordinator_query_result qr) {
|
||||
command->set_row_limit(get_limit(options, _limit));
|
||||
return this->process_results(std::move(qr.query_result), command, options, _query_start_time_point);
|
||||
}));
|
||||
}
|
||||
@@ -2331,41 +2224,32 @@ std::unique_ptr<prepared_statement> select_statement::prepare(data_dictionary::d
|
||||
|
||||
prepared_selectors = maybe_jsonize_select_clause(std::move(prepared_selectors), db, schema);
|
||||
|
||||
std::optional<ann_ordering_info> ann_ordering_info_opt = get_ann_ordering_info(db, schema, _parameters, ctx);
|
||||
bool is_ann_query = ann_ordering_info_opt.has_value();
|
||||
auto aggregation_depth = 0u;
|
||||
|
||||
if (prepared_selectors.empty() && (!_group_by_columns.empty() || (is_ann_query && ann_ordering_info_opt->is_rescoring_enabled))) {
|
||||
// We have a "SELECT * GROUP BY" or "SELECT * ORDER BY ANN" with rescoring enabled. If we leave prepared_selectors
|
||||
// empty, below we choose selection::wildcard() for SELECT *, and either:
|
||||
// - forget to do the "levellize" trick needed for the GROUP BY. See #16531.
|
||||
// - forget to add the similarity function needed for ORDER BY ANN with rescoring. See below.
|
||||
// So we need to set prepared_selectors.
|
||||
auto all_columns = selection::selection::wildcard_columns(schema);
|
||||
std::vector<::shared_ptr<selection::raw_selector>> select_all;
|
||||
select_all.reserve(all_columns.size());
|
||||
for (const column_definition *cdef : all_columns) {
|
||||
auto name = ::make_shared<cql3::column_identifier::raw>(cdef->name_as_text(), true);
|
||||
select_all.push_back(::make_shared<selection::raw_selector>(
|
||||
expr::unresolved_identifier(std::move(name)), nullptr));
|
||||
// Force aggregation if GROUP BY is used. This will wrap every column x as first(x).
|
||||
if (!_group_by_columns.empty()) {
|
||||
aggregation_depth = std::max(aggregation_depth, 1u);
|
||||
if (prepared_selectors.empty()) {
|
||||
// We have a "SELECT * GROUP BY". If we leave prepared_selectors
|
||||
// empty, below we choose selection::wildcard() for SELECT *, and
|
||||
// forget to do the "levellize" trick needed for the GROUP BY.
|
||||
// So we need to set prepared_selectors. See #16531.
|
||||
auto all_columns = selection::selection::wildcard_columns(schema);
|
||||
std::vector<::shared_ptr<selection::raw_selector>> select_all;
|
||||
select_all.reserve(all_columns.size());
|
||||
for (const column_definition *cdef : all_columns) {
|
||||
auto name = ::make_shared<cql3::column_identifier::raw>(cdef->name_as_text(), true);
|
||||
select_all.push_back(::make_shared<selection::raw_selector>(
|
||||
expr::unresolved_identifier(std::move(name)), nullptr));
|
||||
}
|
||||
prepared_selectors = selection::raw_selector::to_prepared_selectors(select_all, *schema, db, keyspace());
|
||||
}
|
||||
prepared_selectors = selection::raw_selector::to_prepared_selectors(select_all, *schema, db, keyspace());
|
||||
}
|
||||
|
||||
for (auto& ps : prepared_selectors) {
|
||||
expr::fill_prepare_context(ps.expr, ctx);
|
||||
}
|
||||
|
||||
// Force aggregation if GROUP BY is used. This will wrap every column x as first(x).
|
||||
auto aggregation_depth = _group_by_columns.empty() ? 0u : 1u;
|
||||
|
||||
select_statement::ordering_comparator_type ordering_comparator;
|
||||
bool hide_last_column = false;
|
||||
if (is_ann_query && ann_ordering_info_opt->is_rescoring_enabled) {
|
||||
uint32_t similarity_column_index = add_similarity_function_to_selectors(prepared_selectors, *ann_ordering_info_opt, db, schema);
|
||||
hide_last_column = true;
|
||||
ordering_comparator = get_similarity_ordering_comparator(prepared_selectors, similarity_column_index);
|
||||
}
|
||||
|
||||
for (auto& ps : prepared_selectors) {
|
||||
aggregation_depth = std::max(aggregation_depth, expr::aggregation_depth(ps.expr));
|
||||
}
|
||||
@@ -2383,11 +2267,6 @@ std::unique_ptr<prepared_statement> select_statement::prepare(data_dictionary::d
|
||||
? selection::selection::wildcard(schema)
|
||||
: selection::selection::from_selectors(db, schema, keyspace(), levellized_prepared_selectors);
|
||||
|
||||
if (is_ann_query && hide_last_column) {
|
||||
// Hide the similarity selector from the client by reducing column_count
|
||||
selection->get_result_metadata()->hide_last_column();
|
||||
}
|
||||
|
||||
// Cassandra 5.0.2 disallows PER PARTITION LIMIT with aggregate queries
|
||||
// but only if GROUP BY is not used.
|
||||
// See #9879 for more details.
|
||||
@@ -2395,21 +2274,26 @@ std::unique_ptr<prepared_statement> select_statement::prepare(data_dictionary::d
|
||||
throw exceptions::invalid_request_exception("PER PARTITION LIMIT is not allowed with aggregate queries.");
|
||||
}
|
||||
|
||||
auto restrictions = prepare_restrictions(db, schema, ctx, selection, for_view, _parameters->allow_filtering() || is_ann_query,
|
||||
auto restrictions = prepare_restrictions(db, schema, ctx, selection, for_view, _parameters->allow_filtering(),
|
||||
restrictions::check_indexes(!_parameters->is_mutation_fragments()));
|
||||
|
||||
if (_parameters->is_distinct()) {
|
||||
validate_distinct_selection(*schema, *selection, *restrictions);
|
||||
}
|
||||
|
||||
select_statement::ordering_comparator_type ordering_comparator;
|
||||
bool is_reversed_ = false;
|
||||
|
||||
std::optional<prepared_ann_ordering_type> prepared_ann_ordering;
|
||||
|
||||
auto orderings = _parameters->orderings();
|
||||
|
||||
if (!orderings.empty() && !is_ann_query) {
|
||||
if (!orderings.empty()) {
|
||||
std::visit([&](auto&& ordering) {
|
||||
using T = std::decay_t<decltype(ordering)>;
|
||||
if constexpr (!std::is_same_v<T, select_statement::ann_vector>) {
|
||||
if constexpr (std::is_same_v<T, select_statement::ann_vector>) {
|
||||
prepared_ann_ordering = prepare_ann_ordering(*schema, ctx, db);
|
||||
} else {
|
||||
SCYLLA_ASSERT(!for_view);
|
||||
verify_ordering_is_allowed(*_parameters, *restrictions);
|
||||
prepared_orderings_type prepared_orderings = prepare_orderings(*schema);
|
||||
@@ -2422,7 +2306,7 @@ std::unique_ptr<prepared_statement> select_statement::prepare(data_dictionary::d
|
||||
}
|
||||
|
||||
std::vector<sstring> warnings;
|
||||
if (!is_ann_query) {
|
||||
if (!prepared_ann_ordering.has_value()) {
|
||||
check_needs_filtering(*restrictions, db.get_config().strict_allow_filtering(), warnings);
|
||||
ensure_filtering_columns_retrieval(db, *selection, *restrictions);
|
||||
}
|
||||
@@ -2476,21 +2360,7 @@ std::unique_ptr<prepared_statement> select_statement::prepare(data_dictionary::d
|
||||
&& restrictions->partition_key_restrictions_size() == schema->partition_key_size());
|
||||
};
|
||||
|
||||
if (strong_consistency::is_strongly_consistent(db, schema->ks_name())) {
|
||||
stmt = ::make_shared<strong_consistency::select_statement>(
|
||||
schema,
|
||||
ctx.bound_variables_size(),
|
||||
_parameters,
|
||||
std::move(selection),
|
||||
std::move(restrictions),
|
||||
std::move(group_by_cell_indices),
|
||||
is_reversed_,
|
||||
std::move(ordering_comparator),
|
||||
prepare_limit(db, ctx, _limit),
|
||||
prepare_limit(db, ctx, _per_partition_limit),
|
||||
stats,
|
||||
std::move(prepared_attrs));
|
||||
} else if (_parameters->is_prune_materialized_view()) {
|
||||
if (_parameters->is_prune_materialized_view()) {
|
||||
stmt = ::make_shared<cql3::statements::prune_materialized_view_statement>(
|
||||
schema,
|
||||
ctx.bound_variables_size(),
|
||||
@@ -2519,10 +2389,10 @@ std::unique_ptr<prepared_statement> select_statement::prepare(data_dictionary::d
|
||||
prepare_limit(db, ctx, _per_partition_limit),
|
||||
stats,
|
||||
std::move(prepared_attrs));
|
||||
} else if (is_ann_query) {
|
||||
} else if (prepared_ann_ordering) {
|
||||
stmt = vector_indexed_table_select_statement::prepare(db, schema, ctx.bound_variables_size(), _parameters, std::move(selection), std::move(restrictions),
|
||||
std::move(group_by_cell_indices), is_reversed_, std::move(ordering_comparator), std::move(ann_ordering_info_opt->_prepared_ann_ordering),
|
||||
prepare_limit(db, ctx, _limit), prepare_limit(db, ctx, _per_partition_limit), stats, ann_ordering_info_opt->_index, std::move(prepared_attrs));
|
||||
std::move(group_by_cell_indices), is_reversed_, std::move(ordering_comparator), std::move(*prepared_ann_ordering),
|
||||
prepare_limit(db, ctx, _limit), prepare_limit(db, ctx, _per_partition_limit), stats, std::move(prepared_attrs));
|
||||
} else if (restrictions->uses_secondary_indexing()) {
|
||||
stmt = view_indexed_table_select_statement::prepare(
|
||||
db,
|
||||
@@ -2554,7 +2424,7 @@ std::unique_ptr<prepared_statement> select_statement::prepare(data_dictionary::d
|
||||
std::move(prepared_attrs)
|
||||
);
|
||||
} else if (service::broadcast_tables::is_broadcast_table_statement(keyspace(), column_family())) {
|
||||
stmt = ::make_shared<cql3::statements::broadcast_select_statement>(
|
||||
stmt = ::make_shared<cql3::statements::strongly_consistent_select_statement>(
|
||||
schema,
|
||||
ctx.bound_variables_size(),
|
||||
_parameters,
|
||||
@@ -2744,6 +2614,28 @@ void select_statement::verify_ordering_is_valid(const prepared_orderings_type& o
|
||||
}
|
||||
}
|
||||
|
||||
select_statement::prepared_ann_ordering_type select_statement::prepare_ann_ordering(const schema& schema, prepare_context& ctx, data_dictionary::database db) const {
|
||||
auto [column_id, ordering] = _parameters->orderings().front();
|
||||
const auto& ann_vector = std::get_if<select_statement::ann_vector>(&ordering);
|
||||
SCYLLA_ASSERT(ann_vector);
|
||||
|
||||
::shared_ptr<column_identifier> column = column_id->prepare_column_identifier(schema);
|
||||
const column_definition* def = schema.get_column_definition(column->name());
|
||||
if (!def) {
|
||||
throw exceptions::invalid_request_exception(
|
||||
fmt::format("Undefined column name {}", column->text()));
|
||||
}
|
||||
|
||||
if (!def->type->is_vector() || static_cast<const vector_type_impl*>(def->type.get())->get_elements_type()->get_kind() != abstract_type::kind::float_kind) {
|
||||
throw exceptions::invalid_request_exception("ANN ordering is only supported on float vector indexes");
|
||||
}
|
||||
|
||||
auto e = expr::prepare_expression(*ann_vector, db, keyspace(), nullptr, def->column_specification);
|
||||
expr::fill_prepare_context(e, ctx);
|
||||
|
||||
return std::make_pair(std::move(def), std::move(e));
|
||||
}
|
||||
|
||||
select_statement::ordering_comparator_type select_statement::get_ordering_comparator(const prepared_orderings_type& orderings,
|
||||
selection::selection& selection,
|
||||
const restrictions::statement_restrictions& restrictions) {
|
||||
|
||||
@@ -22,7 +22,6 @@
|
||||
#include "locator/host_id.hh"
|
||||
#include "service/cas_shard.hh"
|
||||
#include "vector_search/vector_store_client.hh"
|
||||
#include "vector_search/filter.hh"
|
||||
|
||||
namespace service {
|
||||
class client_state;
|
||||
@@ -363,7 +362,6 @@ private:
|
||||
class vector_indexed_table_select_statement : public select_statement {
|
||||
secondary_index::index _index;
|
||||
prepared_ann_ordering_type _prepared_ann_ordering;
|
||||
vector_search::prepared_filter _prepared_filter;
|
||||
mutable gc_clock::time_point _query_start_time_point;
|
||||
|
||||
public:
|
||||
@@ -373,13 +371,13 @@ public:
|
||||
lw_shared_ptr<const parameters> parameters, ::shared_ptr<selection::selection> selection,
|
||||
::shared_ptr<restrictions::statement_restrictions> restrictions, ::shared_ptr<std::vector<size_t>> group_by_cell_indices, bool is_reversed,
|
||||
ordering_comparator_type ordering_comparator, prepared_ann_ordering_type prepared_ann_ordering, std::optional<expr::expression> limit,
|
||||
std::optional<expr::expression> per_partition_limit, cql_stats& stats, const secondary_index::index& index, std::unique_ptr<cql3::attributes> attrs);
|
||||
std::optional<expr::expression> per_partition_limit, cql_stats& stats, std::unique_ptr<cql3::attributes> attrs);
|
||||
|
||||
vector_indexed_table_select_statement(schema_ptr schema, uint32_t bound_terms, lw_shared_ptr<const parameters> parameters,
|
||||
::shared_ptr<selection::selection> selection, ::shared_ptr<const restrictions::statement_restrictions> restrictions,
|
||||
::shared_ptr<std::vector<size_t>> group_by_cell_indices, bool is_reversed, ordering_comparator_type ordering_comparator,
|
||||
prepared_ann_ordering_type prepared_ann_ordering, std::optional<expr::expression> limit, std::optional<expr::expression> per_partition_limit,
|
||||
cql_stats& stats, const secondary_index::index& index, vector_search::prepared_filter prepared_filter, std::unique_ptr<cql3::attributes> attrs);
|
||||
cql_stats& stats, const secondary_index::index& index, std::unique_ptr<cql3::attributes> attrs);
|
||||
|
||||
private:
|
||||
future<::shared_ptr<cql_transport::messages::result_message>> do_execute(
|
||||
@@ -387,7 +385,7 @@ private:
|
||||
|
||||
void update_stats() const;
|
||||
|
||||
lw_shared_ptr<query::read_command> prepare_command_for_base_query(query_processor& qp, service::query_state& state, const query_options& options, uint64_t fetch_limit) const;
|
||||
lw_shared_ptr<query::read_command> prepare_command_for_base_query(query_processor& qp, service::query_state& state, const query_options& options) const;
|
||||
|
||||
std::vector<float> get_ann_ordering_vector(const query_options& options) const;
|
||||
|
||||
|
||||
@@ -1,82 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2025-present ScyllaDB
|
||||
*/
|
||||
|
||||
/*
|
||||
* SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
|
||||
*/
|
||||
|
||||
#include "modification_statement.hh"
|
||||
|
||||
#include "transport/messages/result_message.hh"
|
||||
#include "cql3/query_processor.hh"
|
||||
#include "service/strong_consistency/coordinator.hh"
|
||||
#include "cql3/statements/strong_consistency/statement_helpers.hh"
|
||||
|
||||
namespace cql3::statements::strong_consistency {
|
||||
static logging::logger logger("sc_modification_statement");
|
||||
|
||||
modification_statement::modification_statement(shared_ptr<base_statement> statement)
|
||||
: cql_statement_opt_metadata(&timeout_config::write_timeout)
|
||||
, _statement(std::move(statement))
|
||||
{
|
||||
}
|
||||
|
||||
using result_message = cql_transport::messages::result_message;
|
||||
|
||||
future<shared_ptr<result_message>> modification_statement::execute(query_processor& qp, service::query_state& qs,
|
||||
const query_options& options, std::optional<service::group0_guard> guard) const
|
||||
{
|
||||
return execute_without_checking_exception_message(qp, qs, options, std::move(guard))
|
||||
.then(cql_transport::messages::propagate_exception_as_future<shared_ptr<result_message>>);
|
||||
}
|
||||
|
||||
future<shared_ptr<result_message>> modification_statement::execute_without_checking_exception_message(
|
||||
query_processor& qp, service::query_state& qs, const query_options& options,
|
||||
std::optional<service::group0_guard> guard) const
|
||||
{
|
||||
auto json_cache = base_statement::json_cache_opt{};
|
||||
const auto keys = _statement->build_partition_keys(options, json_cache);
|
||||
if (keys.size() != 1 || !query::is_single_partition(keys[0])) {
|
||||
throw exceptions::invalid_request_exception("Strongly consistent queries can only target a single partition");
|
||||
}
|
||||
if (_statement->requires_read()) {
|
||||
throw exceptions::invalid_request_exception("Strongly consistent updates don't support data prefetch");
|
||||
}
|
||||
|
||||
auto [coordinator, holder] = qp.acquire_strongly_consistent_coordinator();
|
||||
const auto mutate_result = co_await coordinator.get().mutate(_statement->s,
|
||||
keys[0].start()->value().token(),
|
||||
[&](api::timestamp_type ts) {
|
||||
const auto prefetch_data = update_parameters::prefetch_data(_statement->s);
|
||||
const auto ttl = _statement->get_time_to_live(options);
|
||||
const auto params = update_parameters(_statement->s, options, ts, ttl, prefetch_data);
|
||||
const auto ranges = _statement->create_clustering_ranges(options, json_cache);
|
||||
auto muts = _statement->apply_updates(keys, ranges, params, json_cache);
|
||||
if (muts.size() != 1) {
|
||||
on_internal_error(logger, ::format("statement '{}' has unexpected number of mutations {}",
|
||||
raw_cql_statement, muts.size()));
|
||||
}
|
||||
return std::move(*muts.begin());
|
||||
});
|
||||
|
||||
using namespace service::strong_consistency;
|
||||
if (const auto* redirect = get_if<need_redirect>(&mutate_result)) {
|
||||
co_return co_await redirect_statement(qp, options, redirect->target);
|
||||
}
|
||||
|
||||
co_return seastar::make_shared<result_message::void_message>();
|
||||
}
|
||||
|
||||
future<> modification_statement::check_access(query_processor& qp, const service::client_state& state) const {
|
||||
return _statement->check_access(qp, state);
|
||||
}
|
||||
|
||||
uint32_t modification_statement::get_bound_terms() const {
|
||||
return _statement->get_bound_terms();
|
||||
}
|
||||
|
||||
bool modification_statement::depends_on(std::string_view ks_name, std::optional<std::string_view> cf_name) const {
|
||||
return _statement->depends_on(ks_name, cf_name);
|
||||
}
|
||||
}
|
||||
@@ -1,39 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2025-present ScyllaDB
|
||||
*/
|
||||
|
||||
/*
|
||||
* SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "cql3/cql_statement.hh"
|
||||
#include "cql3/expr/expression.hh"
|
||||
#include "cql3/statements/modification_statement.hh"
|
||||
|
||||
namespace cql3::statements::strong_consistency {
|
||||
|
||||
class modification_statement : public cql_statement_opt_metadata {
|
||||
using result_message = cql_transport::messages::result_message;
|
||||
using base_statement = cql3::statements::modification_statement;
|
||||
|
||||
shared_ptr<base_statement> _statement;
|
||||
public:
|
||||
modification_statement(shared_ptr<base_statement> statement);
|
||||
|
||||
future<shared_ptr<result_message>> execute(query_processor& qp, service::query_state& state,
|
||||
const query_options& options, std::optional<service::group0_guard> guard) const override;
|
||||
|
||||
future<shared_ptr<result_message>> execute_without_checking_exception_message(query_processor& qp,
|
||||
service::query_state& qs, const query_options& options,
|
||||
std::optional<service::group0_guard> guard) const override;
|
||||
|
||||
future<> check_access(query_processor& qp, const service::client_state& state) const override;
|
||||
|
||||
uint32_t get_bound_terms() const override;
|
||||
|
||||
bool depends_on(std::string_view ks_name, std::optional<std::string_view> cf_name) const override;
|
||||
};
|
||||
|
||||
}
|
||||
@@ -1,56 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2025-present ScyllaDB
|
||||
*/
|
||||
|
||||
/*
|
||||
* SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
|
||||
*/
|
||||
|
||||
#include "select_statement.hh"
|
||||
|
||||
#include "query/query-request.hh"
|
||||
#include "cql3/query_processor.hh"
|
||||
#include "service/strong_consistency/coordinator.hh"
|
||||
#include "cql3/statements/strong_consistency/statement_helpers.hh"
|
||||
|
||||
namespace cql3::statements::strong_consistency {
|
||||
|
||||
using result_message = cql_transport::messages::result_message;
|
||||
|
||||
future<::shared_ptr<result_message>> select_statement::do_execute(query_processor& qp,
|
||||
service::query_state& state,
|
||||
const query_options& options) const
|
||||
{
|
||||
const auto key_ranges = _restrictions->get_partition_key_ranges(options);
|
||||
if (key_ranges.size() != 1 || !query::is_single_partition(key_ranges[0])) {
|
||||
throw exceptions::invalid_request_exception("Strongly consistent queries can only target a single partition");
|
||||
}
|
||||
const auto now = gc_clock::now();
|
||||
auto read_command = make_lw_shared<query::read_command>(
|
||||
_query_schema->id(),
|
||||
_query_schema->version(),
|
||||
make_partition_slice(options),
|
||||
query::max_result_size(query::result_memory_limiter::maximum_result_size),
|
||||
query::tombstone_limit(query::tombstone_limit::max),
|
||||
query::row_limit(get_inner_loop_limit(get_limit(options, _limit), _selection->is_aggregate())),
|
||||
query::partition_limit(query::max_partitions),
|
||||
now,
|
||||
tracing::make_trace_info(state.get_trace_state()),
|
||||
query_id::create_null_id(),
|
||||
query::is_first_page::no,
|
||||
options.get_timestamp(state));
|
||||
const auto timeout = db::timeout_clock::now() + get_timeout(state.get_client_state(), options);
|
||||
auto [coordinator, holder] = qp.acquire_strongly_consistent_coordinator();
|
||||
auto query_result = co_await coordinator.get().query(_query_schema, *read_command,
|
||||
key_ranges, state.get_trace_state(), timeout);
|
||||
|
||||
using namespace service::strong_consistency;
|
||||
if (const auto* redirect = get_if<need_redirect>(&query_result)) {
|
||||
co_return co_await redirect_statement(qp, options, redirect->target);
|
||||
}
|
||||
|
||||
co_return co_await process_results(get<lw_shared_ptr<query::result>>(std::move(query_result)),
|
||||
read_command, options, now);
|
||||
}
|
||||
|
||||
}
|
||||
@@ -1,26 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2025-present ScyllaDB
|
||||
*/
|
||||
|
||||
/*
|
||||
* SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "cql3/cql_statement.hh"
|
||||
#include "cql3/statements/select_statement.hh"
|
||||
|
||||
namespace cql3::statements::strong_consistency {
|
||||
|
||||
class select_statement : public cql3::statements::select_statement {
|
||||
using result_message = cql_transport::messages::result_message;
|
||||
|
||||
public:
|
||||
using cql3::statements::select_statement::select_statement;
|
||||
|
||||
future<::shared_ptr<cql_transport::messages::result_message>> do_execute(query_processor& qp,
|
||||
service::query_state& state, const query_options& options) const override;
|
||||
};
|
||||
|
||||
}
|
||||
@@ -1,37 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2025-present ScyllaDB
|
||||
*/
|
||||
|
||||
/*
|
||||
* SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
|
||||
*/
|
||||
|
||||
#include "statement_helpers.hh"
|
||||
|
||||
#include "transport/messages/result_message_base.hh"
|
||||
#include "cql3/query_processor.hh"
|
||||
#include "replica/database.hh"
|
||||
#include "locator/tablet_replication_strategy.hh"
|
||||
|
||||
namespace cql3::statements::strong_consistency {
|
||||
future<::shared_ptr<cql_transport::messages::result_message>> redirect_statement(query_processor& qp,
|
||||
const query_options& options,
|
||||
const locator::tablet_replica& target)
|
||||
{
|
||||
const auto my_host_id = qp.db().real_database().get_token_metadata().get_topology().my_host_id();
|
||||
if (target.host != my_host_id) {
|
||||
throw exceptions::invalid_request_exception(format(
|
||||
"Strongly consistent writes can be executed only on the leader node, "
|
||||
"leader id {}, current host id {}",
|
||||
target.host, my_host_id));
|
||||
}
|
||||
auto&& func_values_cache = const_cast<cql3::query_options&>(options).take_cached_pk_function_calls();
|
||||
co_return qp.bounce_to_shard(target.shard, std::move(func_values_cache));
|
||||
}
|
||||
|
||||
bool is_strongly_consistent(data_dictionary::database db, std::string_view ks_name) {
|
||||
const auto* tablet_aware_rs = db.find_keyspace(ks_name).get_replication_strategy().maybe_as_tablet_aware();
|
||||
return tablet_aware_rs && tablet_aware_rs->get_consistency() != data_dictionary::consistency_config_option::eventual;
|
||||
}
|
||||
|
||||
}
|
||||
@@ -1,23 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2025-present ScyllaDB
|
||||
*/
|
||||
|
||||
/*
|
||||
* SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "cql3/cql_statement.hh"
|
||||
#include "locator/tablets.hh"
|
||||
|
||||
namespace cql3::statements::strong_consistency {
|
||||
|
||||
future<::shared_ptr<cql_transport::messages::result_message>> redirect_statement(
|
||||
query_processor& qp,
|
||||
const query_options& options,
|
||||
const locator::tablet_replica& target);
|
||||
|
||||
bool is_strongly_consistent(data_dictionary::database db, std::string_view ks_name);
|
||||
|
||||
}
|
||||
@@ -9,7 +9,7 @@
|
||||
*/
|
||||
|
||||
|
||||
#include "cql3/statements/broadcast_modification_statement.hh"
|
||||
#include "cql3/statements/strongly_consistent_modification_statement.hh"
|
||||
|
||||
#include <optional>
|
||||
|
||||
@@ -28,11 +28,11 @@
|
||||
|
||||
namespace cql3 {
|
||||
|
||||
static logging::logger logger("broadcast_modification_statement");
|
||||
static logging::logger logger("strongly_consistent_modification_statement");
|
||||
|
||||
namespace statements {
|
||||
|
||||
broadcast_modification_statement::broadcast_modification_statement(
|
||||
strongly_consistent_modification_statement::strongly_consistent_modification_statement(
|
||||
uint32_t bound_terms,
|
||||
schema_ptr schema,
|
||||
broadcast_tables::prepared_update query)
|
||||
@@ -43,7 +43,7 @@ broadcast_modification_statement::broadcast_modification_statement(
|
||||
{ }
|
||||
|
||||
future<::shared_ptr<cql_transport::messages::result_message>>
|
||||
broadcast_modification_statement::execute(query_processor& qp, service::query_state& qs, const query_options& options, std::optional<service::group0_guard> guard) const {
|
||||
strongly_consistent_modification_statement::execute(query_processor& qp, service::query_state& qs, const query_options& options, std::optional<service::group0_guard> guard) const {
|
||||
return execute_without_checking_exception_message(qp, qs, options, std::move(guard))
|
||||
.then(cql_transport::messages::propagate_exception_as_future<shared_ptr<cql_transport::messages::result_message>>);
|
||||
}
|
||||
@@ -63,7 +63,7 @@ evaluate_prepared(
|
||||
}
|
||||
|
||||
future<::shared_ptr<cql_transport::messages::result_message>>
|
||||
broadcast_modification_statement::execute_without_checking_exception_message(query_processor& qp, service::query_state& qs, const query_options& options, std::optional<service::group0_guard> guard) const {
|
||||
strongly_consistent_modification_statement::execute_without_checking_exception_message(query_processor& qp, service::query_state& qs, const query_options& options, std::optional<service::group0_guard> guard) const {
|
||||
if (this_shard_id() != 0) {
|
||||
co_return ::make_shared<cql_transport::messages::result_message::bounce_to_shard>(0, cql3::computed_function_values{});
|
||||
}
|
||||
@@ -103,11 +103,11 @@ broadcast_modification_statement::execute_without_checking_exception_message(que
|
||||
), result);
|
||||
}
|
||||
|
||||
uint32_t broadcast_modification_statement::get_bound_terms() const {
|
||||
uint32_t strongly_consistent_modification_statement::get_bound_terms() const {
|
||||
return _bound_terms;
|
||||
}
|
||||
|
||||
future<> broadcast_modification_statement::check_access(query_processor& qp, const service::client_state& state) const {
|
||||
future<> strongly_consistent_modification_statement::check_access(query_processor& qp, const service::client_state& state) const {
|
||||
auto f = state.has_column_family_access(_schema->ks_name(), _schema->cf_name(), auth::permission::MODIFY);
|
||||
if (_query.value_condition.has_value()) {
|
||||
f = f.then([this, &state] {
|
||||
@@ -117,7 +117,7 @@ future<> broadcast_modification_statement::check_access(query_processor& qp, con
|
||||
return f;
|
||||
}
|
||||
|
||||
bool broadcast_modification_statement::depends_on(std::string_view ks_name, std::optional<std::string_view> cf_name) const {
|
||||
bool strongly_consistent_modification_statement::depends_on(std::string_view ks_name, std::optional<std::string_view> cf_name) const {
|
||||
return _schema->ks_name() == ks_name && (!cf_name || _schema->cf_name() == *cf_name);
|
||||
}
|
||||
|
||||
@@ -27,13 +27,13 @@ struct prepared_update {
|
||||
|
||||
}
|
||||
|
||||
class broadcast_modification_statement : public cql_statement_opt_metadata {
|
||||
class strongly_consistent_modification_statement : public cql_statement_opt_metadata {
|
||||
const uint32_t _bound_terms;
|
||||
const schema_ptr _schema;
|
||||
const broadcast_tables::prepared_update _query;
|
||||
|
||||
public:
|
||||
broadcast_modification_statement(uint32_t bound_terms, schema_ptr schema, broadcast_tables::prepared_update query);
|
||||
strongly_consistent_modification_statement(uint32_t bound_terms, schema_ptr schema, broadcast_tables::prepared_update query);
|
||||
|
||||
virtual future<::shared_ptr<cql_transport::messages::result_message>>
|
||||
execute(query_processor& qp, service::query_state& qs, const query_options& options, std::optional<service::group0_guard> guard) const override;
|
||||
@@ -9,7 +9,7 @@
|
||||
*/
|
||||
|
||||
|
||||
#include "cql3/statements/broadcast_select_statement.hh"
|
||||
#include "cql3/statements/strongly_consistent_select_statement.hh"
|
||||
|
||||
#include <seastar/core/future.hh>
|
||||
#include <seastar/core/on_internal_error.hh>
|
||||
@@ -24,7 +24,7 @@ namespace cql3 {
|
||||
|
||||
namespace statements {
|
||||
|
||||
static logging::logger logger("broadcast_select_statement");
|
||||
static logging::logger logger("strongly_consistent_select_statement");
|
||||
|
||||
static
|
||||
expr::expression get_key(const cql3::expr::expression& partition_key_restrictions) {
|
||||
@@ -58,7 +58,7 @@ bool is_selecting_only_value(const cql3::selection::selection& selection) {
|
||||
selection.get_columns()[0]->name() == "value";
|
||||
}
|
||||
|
||||
broadcast_select_statement::broadcast_select_statement(schema_ptr schema, uint32_t bound_terms,
|
||||
strongly_consistent_select_statement::strongly_consistent_select_statement(schema_ptr schema, uint32_t bound_terms,
|
||||
lw_shared_ptr<const parameters> parameters,
|
||||
::shared_ptr<selection::selection> selection,
|
||||
::shared_ptr<const restrictions::statement_restrictions> restrictions,
|
||||
@@ -73,7 +73,7 @@ broadcast_select_statement::broadcast_select_statement(schema_ptr schema, uint32
|
||||
_query{prepare_query()}
|
||||
{ }
|
||||
|
||||
broadcast_tables::prepared_select broadcast_select_statement::prepare_query() const {
|
||||
broadcast_tables::prepared_select strongly_consistent_select_statement::prepare_query() const {
|
||||
if (!is_selecting_only_value(*_selection)) {
|
||||
throw service::broadcast_tables::unsupported_operation_error("only 'value' selector is allowed");
|
||||
}
|
||||
@@ -94,7 +94,7 @@ evaluate_prepared(
|
||||
}
|
||||
|
||||
future<::shared_ptr<cql_transport::messages::result_message>>
|
||||
broadcast_select_statement::execute_without_checking_exception_message(query_processor& qp, service::query_state& qs, const query_options& options, std::optional<service::group0_guard> guard) const {
|
||||
strongly_consistent_select_statement::execute_without_checking_exception_message(query_processor& qp, service::query_state& qs, const query_options& options, std::optional<service::group0_guard> guard) const {
|
||||
if (this_shard_id() != 0) {
|
||||
co_return ::make_shared<cql_transport::messages::result_message::bounce_to_shard>(0, cql3::computed_function_values{});
|
||||
}
|
||||
@@ -25,12 +25,12 @@ struct prepared_select {
|
||||
|
||||
}
|
||||
|
||||
class broadcast_select_statement : public select_statement {
|
||||
class strongly_consistent_select_statement : public select_statement {
|
||||
const broadcast_tables::prepared_select _query;
|
||||
|
||||
broadcast_tables::prepared_select prepare_query() const;
|
||||
public:
|
||||
broadcast_select_statement(schema_ptr schema,
|
||||
strongly_consistent_select_statement(schema_ptr schema,
|
||||
uint32_t bound_terms,
|
||||
lw_shared_ptr<const parameters> parameters,
|
||||
::shared_ptr<selection::selection> selection,
|
||||
@@ -13,7 +13,7 @@
|
||||
#include "cql3/expr/expression.hh"
|
||||
#include "cql3/expr/evaluate.hh"
|
||||
#include "cql3/expr/expr-utils.hh"
|
||||
#include "cql3/statements/broadcast_modification_statement.hh"
|
||||
#include "cql3/statements/strongly_consistent_modification_statement.hh"
|
||||
#include "service/broadcast_tables/experimental/lang.hh"
|
||||
#include "raw/update_statement.hh"
|
||||
|
||||
@@ -333,7 +333,7 @@ std::optional<expr::expression> get_value_condition(const expr::expression& the_
|
||||
return binop->rhs;
|
||||
}
|
||||
|
||||
::shared_ptr<broadcast_modification_statement>
|
||||
::shared_ptr<strongly_consistent_modification_statement>
|
||||
update_statement::prepare_for_broadcast_tables() const {
|
||||
if (attrs) {
|
||||
if (attrs->is_time_to_live_set()) {
|
||||
@@ -359,7 +359,7 @@ update_statement::prepare_for_broadcast_tables() const {
|
||||
.value_condition = get_value_condition(_condition),
|
||||
};
|
||||
|
||||
return ::make_shared<broadcast_modification_statement>(
|
||||
return ::make_shared<strongly_consistent_modification_statement>(
|
||||
get_bound_terms(),
|
||||
s,
|
||||
query
|
||||
|
||||
@@ -45,7 +45,7 @@ private:
|
||||
virtual void execute_operations_for_key(mutation& m, const clustering_key_prefix& prefix, const update_parameters& params, const json_cache_opt& json_cache) const;
|
||||
|
||||
public:
|
||||
virtual ::shared_ptr<broadcast_modification_statement> prepare_for_broadcast_tables() const override;
|
||||
virtual ::shared_ptr<strongly_consistent_modification_statement> prepare_for_broadcast_tables() const override;
|
||||
};
|
||||
|
||||
/*
|
||||
|
||||
@@ -1,67 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2025-present ScyllaDB
|
||||
*/
|
||||
|
||||
/*
|
||||
* SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
|
||||
*/
|
||||
|
||||
#include "cql3/statements/view_prop_defs.hh"
|
||||
|
||||
namespace cql3::statements {
|
||||
|
||||
void view_prop_defs::validate_raw(op_type op, const data_dictionary::database db, sstring ks_name,
|
||||
const schema::extensions_map& exts) const
|
||||
{
|
||||
cf_properties::validate(db, std::move(ks_name), exts);
|
||||
|
||||
if (use_compact_storage()) {
|
||||
throw exceptions::invalid_request_exception(format("Cannot use 'COMPACT STORAGE' when defining a materialized view"));
|
||||
}
|
||||
|
||||
if (properties()->get_cdc_options(exts)) {
|
||||
throw exceptions::invalid_request_exception("Cannot enable CDC for a materialized view");
|
||||
}
|
||||
|
||||
if (op == op_type::create) {
|
||||
const auto maybe_id = properties()->get_id();
|
||||
if (maybe_id && db.try_find_table(*maybe_id)) {
|
||||
const auto schema_ptr = db.find_schema(*maybe_id);
|
||||
const auto& ks_name = schema_ptr->ks_name();
|
||||
const auto& cf_name = schema_ptr->cf_name();
|
||||
|
||||
throw exceptions::invalid_request_exception(seastar::format("Table with ID {} already exists: {}.{}", *maybe_id, ks_name, cf_name));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void view_prop_defs::apply_to_builder(op_type op, schema_builder& builder, schema::extensions_map exts,
|
||||
const data_dictionary::database db, sstring ks_name, bool is_colocated) const
|
||||
{
|
||||
_properties->apply_to_builder(builder, exts, db, std::move(ks_name), !is_colocated);
|
||||
|
||||
if (op == op_type::create) {
|
||||
const auto maybe_id = properties()->get_id();
|
||||
if (maybe_id) {
|
||||
builder.set_uuid(*maybe_id);
|
||||
}
|
||||
}
|
||||
|
||||
if (op == op_type::alter) {
|
||||
if (builder.get_gc_grace_seconds() == 0) {
|
||||
throw exceptions::invalid_request_exception(
|
||||
"Cannot alter gc_grace_seconds of a materialized view to 0, since this "
|
||||
"value is used to TTL undelivered updates. Setting gc_grace_seconds too "
|
||||
"low might cause undelivered updates to expire before being replayed.");
|
||||
}
|
||||
}
|
||||
|
||||
if (builder.default_time_to_live().count() > 0) {
|
||||
throw exceptions::invalid_request_exception(
|
||||
"Cannot set or alter default_time_to_live for a materialized view. "
|
||||
"Data in a materialized view always expire at the same time than "
|
||||
"the corresponding data in the parent table.");
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace cql3::statements
|
||||
@@ -1,62 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2025-present ScyllaDB
|
||||
*/
|
||||
|
||||
/*
|
||||
* SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "cql3/statements/cf_properties.hh"
|
||||
|
||||
namespace cql3::statements {
|
||||
|
||||
/// This type represents the possible properties of the following CQL statements:
|
||||
///
|
||||
/// * CREATE MATERIALIZED VIEW,
|
||||
/// * ALTER MATERIALIZED VIEW.
|
||||
///
|
||||
/// Since the sets of the valid properties may differ between those statements, this type
|
||||
/// is supposed to represent a superset of them.
|
||||
///
|
||||
/// This type does NOT guarantee that all of the necessary validation logic will be performed
|
||||
/// by it. It strives to do that, but you should keep this in mind. What does that mean?
|
||||
/// Some parts of validation may require more context that's not accessible from here.
|
||||
///
|
||||
/// As of yet, this type does not cover all of the validation logic that could be here either.
|
||||
class view_prop_defs : public cf_properties {
|
||||
public:
|
||||
/// The type of a schema operation on a materialized view.
|
||||
/// These values will be used to guide the validation logic.
|
||||
enum class op_type {
|
||||
create,
|
||||
alter
|
||||
};
|
||||
|
||||
public:
|
||||
template <typename... Args>
|
||||
view_prop_defs(Args&&... args) : cf_properties(std::forward<Args>(args)...) {}
|
||||
|
||||
// Explicitly delete this method. It's declared in the inherited types.
|
||||
// The user of this interface should use `validate_raw` instead.
|
||||
void validate(const data_dictionary::database, sstring ks_name, const schema::extensions_map&) const = delete;
|
||||
|
||||
/// Validate the properties for the specified schema operation.
|
||||
///
|
||||
/// The validation is *raw* because we mostly validate the properties in their string form (checking if
|
||||
/// a property exists or not for instance) and only focus on the properties on their own, without
|
||||
/// having access to any other information.
|
||||
void validate_raw(op_type, const data_dictionary::database, sstring ks_name, const schema::extensions_map&) const;
|
||||
|
||||
/// Apply the properties to the provided schema_builder and validate them.
|
||||
///
|
||||
/// NOTE: If the validation fails, this function will throw an exception. What's more important,
|
||||
/// however, is that the provided schema_builder might have already been modified by that
|
||||
/// point. Because of that, in presence of an exception, the schema builder should NOT be
|
||||
/// used anymore.
|
||||
void apply_to_builder(op_type, schema_builder&, schema::extensions_map, const data_dictionary::database,
|
||||
sstring ks_name, bool is_colocated) const;
|
||||
};
|
||||
|
||||
} // namespace cql3::statements
|
||||
@@ -16,7 +16,6 @@
|
||||
#include <seastar/core/semaphore.hh>
|
||||
#include <seastar/core/metrics.hh>
|
||||
#include <seastar/core/coroutine.hh>
|
||||
#include <seastar/coroutine/maybe_yield.hh>
|
||||
#include <seastar/core/sleep.hh>
|
||||
#include <seastar/coroutine/parallel_for_each.hh>
|
||||
|
||||
@@ -320,8 +319,8 @@ future<db::all_batches_replayed> db::batchlog_manager::replay_all_failed_batches
|
||||
|
||||
std::unordered_map<int32_t, replay_stats> replay_stats_per_shard;
|
||||
|
||||
// Use a stable `now` across all batches, so skip/replay decisions are the
|
||||
// same across a while prefix of written_at (across all ids).
|
||||
// Use a stable `now` accross all batches, so skip/replay decisions are the
|
||||
// same accross a while prefix of written_at (accross all ids).
|
||||
const auto now = db_clock::now();
|
||||
|
||||
auto batch = [this, cleanup, limiter, schema, &all_replayed, &replay_stats_per_shard, now] (const cql3::untyped_result_set::row& row) -> future<stop_iteration> {
|
||||
@@ -378,7 +377,7 @@ future<db::all_batches_replayed> db::batchlog_manager::replay_all_failed_batches
|
||||
|
||||
for (const auto& [fm, s] : fms) {
|
||||
mutations.emplace_back(fm.to_mutation(s));
|
||||
co_await coroutine::maybe_yield();
|
||||
co_await maybe_yield();
|
||||
}
|
||||
|
||||
if (!mutations.empty()) {
|
||||
|
||||
@@ -323,9 +323,6 @@ void cache_mutation_reader::touch_partition() {
|
||||
|
||||
inline
|
||||
future<> cache_mutation_reader::fill_buffer() {
|
||||
if (const auto& ex = get_abort_exception(); ex) {
|
||||
return make_exception_future<>(ex);
|
||||
}
|
||||
if (_state == state::before_static_row) {
|
||||
touch_partition();
|
||||
auto after_static_row = [this] {
|
||||
|
||||
@@ -502,9 +502,6 @@ public:
|
||||
void flush_segments(uint64_t size_to_remove);
|
||||
void check_no_data_older_than_allowed();
|
||||
|
||||
// whitebox testing
|
||||
std::function<future<>()> _oversized_pre_wait_memory_func;
|
||||
|
||||
private:
|
||||
class shutdown_marker{};
|
||||
|
||||
@@ -1600,15 +1597,8 @@ future<> db::commitlog::segment_manager::oversized_allocation(entry_writer& writ
|
||||
|
||||
scope_increment_counter allocating(totals.active_allocations);
|
||||
|
||||
// #27992 - whitebox testing. signal we are trying to lock out
|
||||
// all allocators
|
||||
if (_oversized_pre_wait_memory_func) {
|
||||
co_await _oversized_pre_wait_memory_func();
|
||||
}
|
||||
|
||||
auto permit = co_await std::move(fut);
|
||||
// #27992 - task reordering _can_ force the available units to negative. this is ok.
|
||||
SCYLLA_ASSERT(_request_controller.available_units() <= 0);
|
||||
SCYLLA_ASSERT(_request_controller.available_units() == 0);
|
||||
|
||||
decltype(permit) fake_permit; // can't have allocate+sync release semaphore.
|
||||
bool failed = false;
|
||||
@@ -1869,15 +1859,13 @@ future<> db::commitlog::segment_manager::oversized_allocation(entry_writer& writ
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
auto avail = _request_controller.available_units();
|
||||
SCYLLA_ASSERT(avail <= 0);
|
||||
SCYLLA_ASSERT(_request_controller.available_units() == 0);
|
||||
SCYLLA_ASSERT(permit.count() == max_request_controller_units());
|
||||
auto nw = _request_controller.waiters();
|
||||
permit.return_all();
|
||||
// #20633 cannot guarantee controller avail is now full, since we could have had waiters when doing
|
||||
// return all -> now will be less avail
|
||||
SCYLLA_ASSERT(nw > 0 || _request_controller.available_units() == (avail + ssize_t(max_request_controller_units())));
|
||||
SCYLLA_ASSERT(nw > 0 || _request_controller.available_units() == ssize_t(max_request_controller_units()));
|
||||
|
||||
if (!failed) {
|
||||
clogger.trace("Oversized allocation succeeded.");
|
||||
@@ -3961,9 +3949,6 @@ void db::commitlog::update_max_data_lifetime(std::optional<uint64_t> commitlog_d
|
||||
_segment_manager->cfg.commitlog_data_max_lifetime_in_seconds = commitlog_data_max_lifetime_in_seconds;
|
||||
}
|
||||
|
||||
void db::commitlog::set_oversized_pre_wait_memory_func(std::function<future<>()> f) {
|
||||
_segment_manager->_oversized_pre_wait_memory_func = std::move(f);
|
||||
}
|
||||
|
||||
future<std::vector<sstring>> db::commitlog::get_segments_to_replay() const {
|
||||
return _segment_manager->get_segments_to_replay();
|
||||
|
||||
@@ -385,9 +385,6 @@ public:
|
||||
// (Re-)set data mix lifetime.
|
||||
void update_max_data_lifetime(std::optional<uint64_t> commitlog_data_max_lifetime_in_seconds);
|
||||
|
||||
// Whitebox testing. Do not use for production
|
||||
void set_oversized_pre_wait_memory_func(std::function<future<>()>);
|
||||
|
||||
using commit_load_reader_func = std::function<future<>(buffer_and_replay_position)>;
|
||||
|
||||
class segment_error : public std::exception {};
|
||||
|
||||
89
db/config.cc
89
db/config.cc
@@ -621,6 +621,25 @@ db::config::config(std::shared_ptr<db::extensions> exts)
|
||||
* @GroupDescription: Provides an overview of the group.
|
||||
*/
|
||||
/**
|
||||
* @Group Ungrouped properties
|
||||
*/
|
||||
, background_writer_scheduling_quota(this, "background_writer_scheduling_quota", value_status::Deprecated, 1.0,
|
||||
"max cpu usage ratio (between 0 and 1) for compaction process. Not intended for setting in normal operations. Setting it to 1 or higher will disable it, recommended operational setting is 0.5.")
|
||||
, auto_adjust_flush_quota(this, "auto_adjust_flush_quota", value_status::Deprecated, false,
|
||||
"true: auto-adjust memtable shares for flush processes")
|
||||
, memtable_flush_static_shares(this, "memtable_flush_static_shares", liveness::LiveUpdate, value_status::Used, 0,
|
||||
"If set to higher than 0, ignore the controller's output and set the memtable shares statically. Do not set this unless you know what you are doing and suspect a problem in the controller. This option will be retired when the controller reaches more maturity.")
|
||||
, compaction_static_shares(this, "compaction_static_shares", liveness::LiveUpdate, value_status::Used, 0,
|
||||
"If set to higher than 0, ignore the controller's output and set the compaction shares statically. Do not set this unless you know what you are doing and suspect a problem in the controller. This option will be retired when the controller reaches more maturity.")
|
||||
, compaction_max_shares(this, "compaction_max_shares", liveness::LiveUpdate, value_status::Used, default_compaction_maximum_shares,
|
||||
"Set the maximum shares of regular compaction to the specific value. Do not set this unless you know what you are doing and suspect a problem in the controller. This option will be retired when the controller reaches more maturity.")
|
||||
, compaction_enforce_min_threshold(this, "compaction_enforce_min_threshold", liveness::LiveUpdate, value_status::Used, false,
|
||||
"If set to true, enforce the min_threshold option for compactions strictly. If false (default), Scylla may decide to compact even if below min_threshold.")
|
||||
, compaction_flush_all_tables_before_major_seconds(this, "compaction_flush_all_tables_before_major_seconds", value_status::Used, 86400,
|
||||
"Set the minimum interval in seconds between flushing all tables before each major compaction (default is 86400)."
|
||||
"This option is useful for maximizing tombstone garbage collection by releasing all active commitlog segments."
|
||||
"Set to 0 to disable automatic flushing all tables before major compaction.")
|
||||
/**
|
||||
* @Group Initialization properties
|
||||
* @GroupDescription The minimal properties needed for configuring a cluster.
|
||||
*/
|
||||
@@ -1322,7 +1341,7 @@ db::config::config(std::shared_ptr<db::extensions> exts)
|
||||
, sstable_compression_user_table_options(this, "sstable_compression_user_table_options", value_status::Used, compression_parameters{compression_parameters::algorithm::lz4_with_dicts},
|
||||
"Server-global user table compression options. If enabled, all user tables"
|
||||
"will be compressed using the provided options, unless overridden"
|
||||
"by compression options in the table schema. User tables are all tables in non-system keyspaces. The available options are:\n"
|
||||
"by compression options in the table schema. The available options are:\n"
|
||||
"* sstable_compression: The compression algorithm to use. Supported values: LZ4Compressor, LZ4WithDictsCompressor (default), SnappyCompressor, DeflateCompressor, ZstdCompressor, ZstdWithDictsCompressor, '' (empty string; disables compression).\n"
|
||||
"* chunk_length_in_kb: (Default: 4) The size of chunks to compress in kilobytes. Allowed values are powers of two between 1 and 128.\n"
|
||||
"* crc_check_chance: (Default: 1.0) Not implemented (option value is ignored).\n"
|
||||
@@ -1375,10 +1394,6 @@ db::config::config(std::shared_ptr<db::extensions> exts)
|
||||
"Start killing reads after their collective memory consumption goes above $normal_limit * $multiplier.")
|
||||
, reader_concurrency_semaphore_cpu_concurrency(this, "reader_concurrency_semaphore_cpu_concurrency", liveness::LiveUpdate, value_status::Used, 2,
|
||||
"Admit new reads while there are less than this number of requests that need CPU.")
|
||||
, reader_concurrency_semaphore_preemptive_abort_factor(this, "reader_concurrency_semaphore_preemptive_abort_factor", liveness::LiveUpdate, value_status::Used, 0.3,
|
||||
"Admit new reads while their remaining time is more than this factor times their timeout times when arrived to a semaphore. Its vale means\n"
|
||||
"* <= 0.0 means new reads will never get rejected during admission\n"
|
||||
"* >= 1.0 means new reads will always get rejected during admission\n")
|
||||
, view_update_reader_concurrency_semaphore_serialize_limit_multiplier(this, "view_update_reader_concurrency_semaphore_serialize_limit_multiplier", liveness::LiveUpdate, value_status::Used, 2,
|
||||
"Start serializing view update reads after their collective memory consumption goes above $normal_limit * $multiplier.")
|
||||
, view_update_reader_concurrency_semaphore_kill_limit_multiplier(this, "view_update_reader_concurrency_semaphore_kill_limit_multiplier", liveness::LiveUpdate, value_status::Used, 4,
|
||||
@@ -1432,10 +1447,6 @@ db::config::config(std::shared_ptr<db::extensions> exts)
|
||||
"SELECT statements with aggregation or GROUP BYs or a secondary index may use this page size for their internal reading data, not the page size specified in the query options.")
|
||||
, alternator_port(this, "alternator_port", value_status::Used, 0, "Alternator API port.")
|
||||
, alternator_https_port(this, "alternator_https_port", value_status::Used, 0, "Alternator API HTTPS port.")
|
||||
, alternator_port_proxy_protocol(this, "alternator_port_proxy_protocol", value_status::Used, 0,
|
||||
"Port on which the Alternator API listens for clients using proxy protocol v2. Disabled (0) by default.")
|
||||
, alternator_https_port_proxy_protocol(this, "alternator_https_port_proxy_protocol", value_status::Used, 0,
|
||||
"Port on which the Alternator HTTPS API listens for clients using proxy protocol v2. Disabled (0) by default.")
|
||||
, alternator_address(this, "alternator_address", value_status::Used, "0.0.0.0", "Alternator API listening address.")
|
||||
, alternator_enforce_authorization(this, "alternator_enforce_authorization", liveness::LiveUpdate, value_status::Used, false, "Enforce checking the authorization header for every request in Alternator.")
|
||||
, alternator_warn_authorization(this, "alternator_warn_authorization", liveness::LiveUpdate, value_status::Used, false, "Count and log warnings about failed authentication or authorization")
|
||||
@@ -1467,15 +1478,6 @@ db::config::config(std::shared_ptr<db::extensions> exts)
|
||||
, alternator_max_expression_cache_entries_per_shard(this, "alternator_max_expression_cache_entries_per_shard", liveness::LiveUpdate, value_status::Used, 2000, "Maximum number of cached parsed request expressions, per shard.")
|
||||
, alternator_max_users_query_size_in_trace_output(this, "alternator_max_users_query_size_in_trace_output", liveness::LiveUpdate, value_status::Used, uint64_t(4096),
|
||||
"Maximum size of user's command in trace output (`alternator_op` entry). Larger traces will be truncated and have `<truncated>` message appended - which doesn't count to the maximum limit.")
|
||||
, alternator_describe_table_info_cache_validity_in_seconds(this, "alternator_describe_table_info_cache_validity_in_seconds", liveness::LiveUpdate, value_status::Used, 60 * 60 * 6,
|
||||
"The validity of DescribeTable information - table size in bytes. This is how long calculated value will be reused before recalculation.")
|
||||
, alternator_response_gzip_compression_level(this, "alternator_response_gzip_compression_level", liveness::LiveUpdate, value_status::Used, int8_t(6),
|
||||
"Controls gzip and deflate compression level for Alternator response bodies (if the client requests it via Accept-Encoding header) Default of 6 is a compromise between speed and compression.\n"
|
||||
"Valid values:\n"
|
||||
"\t0 : No compression (disables gzip/deflate)\n"
|
||||
"\t1-9: Compression levels (1 = fastest, 9 = best compression)")
|
||||
, alternator_response_compression_threshold_in_bytes(this, "alternator_response_compression_threshold_in_bytes", liveness::LiveUpdate, value_status::Used, uint64_t(4096),
|
||||
"When the compression is enabled, this value indicates the minimum size of data to compress. Smaller responses will not be compressed.")
|
||||
, abort_on_ebadf(this, "abort_on_ebadf", value_status::Used, true, "Abort the server on incorrect file descriptor access. Throws exception when disabled.")
|
||||
, sanitizer_report_backtrace(this, "sanitizer_report_backtrace", value_status::Used, false,
|
||||
"In debug mode, report log-structured allocator sanitizer violations with a backtrace. Slow.")
|
||||
@@ -1555,8 +1557,6 @@ db::config::config(std::shared_ptr<db::extensions> exts)
|
||||
"\tdisabled: New keyspaces use vnodes by default, unless enabled by the tablets={'enabled':true} option\n"
|
||||
"\tenabled: New keyspaces use tablets by default, unless disabled by the tablets={'enabled':false} option\n"
|
||||
"\tenforced: New keyspaces must use tablets. Tablets cannot be disabled using the CREATE KEYSPACE option")
|
||||
, auto_repair_enabled_default(this, "auto_repair_enabled_default", liveness::LiveUpdate, value_status::Used, false, "Set true to enable auto repair for tablet tables by default. The value will be overridden by the per keyspace or per table configuration which is not implemented yet.")
|
||||
, auto_repair_threshold_default_in_seconds(this, "auto_repair_threshold_default_in_seconds", liveness::LiveUpdate, value_status::Used, 24 * 3600 , "Set the default time in seconds for the auto repair threshold for tablet tables. If the time since last repair is bigger than the configured time, the tablet is eligible for auto repair. The value will be overridden by the per keyspace or per table configuration which is not implemented yet.")
|
||||
, view_flow_control_delay_limit_in_ms(this, "view_flow_control_delay_limit_in_ms", liveness::LiveUpdate, value_status::Used, 1000,
|
||||
"The maximal amount of time that materialized-view update flow control may delay responses "
|
||||
"to try to slow down the client and prevent buildup of unfinished view updates. "
|
||||
@@ -1569,43 +1569,11 @@ db::config::config(std::shared_ptr<db::extensions> exts)
|
||||
, enable_create_table_with_compact_storage(this, "enable_create_table_with_compact_storage", liveness::LiveUpdate, value_status::Used, false, "Enable the deprecated feature of CREATE TABLE WITH COMPACT STORAGE. This feature will eventually be removed in a future version.")
|
||||
, rf_rack_valid_keyspaces(this, "rf_rack_valid_keyspaces", liveness::MustRestart, value_status::Used, false,
|
||||
"Enforce RF-rack-valid keyspaces. Additionally, if there are existing RF-rack-invalid "
|
||||
"keyspaces, attempting to start a node with this option ON will fail. "
|
||||
"DEPRECATED. Use enforce_rack_list instead.")
|
||||
, enforce_rack_list(this, "enforce_rack_list", liveness::MustRestart, value_status::Used, false,
|
||||
"Enforce rack list for tablet keyspaces. "
|
||||
"When the option is on, CREATE STATEMENT expands numeric rfs to rack lists "
|
||||
"and ALTER STATEMENT is allowed only when rack lists are used in all DCs."
|
||||
"Additionally, if there are existing tablet keyspaces with numeric rf in any DC "
|
||||
"attempting to start a node with this option ON will fail.")
|
||||
"keyspaces, attempting to start a node with this option ON will fail.")
|
||||
// FIXME: make frequency per table in order to reduce work in each iteration.
|
||||
// Bigger tables will take longer to be resized. similar-sized tables can be batched into same iteration.
|
||||
, tablet_load_stats_refresh_interval_in_seconds(this, "tablet_load_stats_refresh_interval_in_seconds", liveness::LiveUpdate, value_status::Used, 60,
|
||||
"Tablet load stats refresh rate in seconds.")
|
||||
, force_capacity_based_balancing(this, "force_capacity_based_balancing", liveness::LiveUpdate, value_status::Used, false,
|
||||
"Forces the load balancer to perform capacity based balancing, instead of size based balancing.")
|
||||
, size_based_balance_threshold_percentage(this, "size_based_balance_threshold_percentage", liveness::LiveUpdate, value_status::Used, 1.0,
|
||||
"Sets the maximum difference in percentages between the most loaded and least loaded nodes, below which the load balancer considers nodes balanced.")
|
||||
, minimal_tablet_size_for_balancing(this, "minimal_tablet_size_for_balancing", liveness::LiveUpdate, value_status::Used, service::default_target_tablet_size / 100,
|
||||
"Sets the minimal tablet size for the load balancer. For any tablet smaller than this, the balancer will use this size instead of the actual tablet size.")
|
||||
/**
|
||||
* @Group Ungrouped properties
|
||||
*/
|
||||
, background_writer_scheduling_quota(this, "background_writer_scheduling_quota", value_status::Deprecated, 1.0,
|
||||
"max cpu usage ratio (between 0 and 1) for compaction process. Not intended for setting in normal operations. Setting it to 1 or higher will disable it, recommended operational setting is 0.5.")
|
||||
, auto_adjust_flush_quota(this, "auto_adjust_flush_quota", value_status::Deprecated, false,
|
||||
"true: auto-adjust memtable shares for flush processes")
|
||||
, memtable_flush_static_shares(this, "memtable_flush_static_shares", liveness::LiveUpdate, value_status::Used, 0,
|
||||
"If set to higher than 0, ignore the controller's output and set the memtable shares statically. Do not set this unless you know what you are doing and suspect a problem in the controller. This option will be retired when the controller reaches more maturity.")
|
||||
, compaction_static_shares(this, "compaction_static_shares", liveness::LiveUpdate, value_status::Used, 0,
|
||||
"If set to higher than 0, ignore the controller's output and set the compaction shares statically. Do not set this unless you know what you are doing and suspect a problem in the controller. This option will be retired when the controller reaches more maturity.")
|
||||
, compaction_max_shares(this, "compaction_max_shares", liveness::LiveUpdate, value_status::Used, default_compaction_maximum_shares,
|
||||
"Set the maximum shares of regular compaction to the specific value. Do not set this unless you know what you are doing and suspect a problem in the controller. This option will be retired when the controller reaches more maturity.")
|
||||
, compaction_enforce_min_threshold(this, "compaction_enforce_min_threshold", liveness::LiveUpdate, value_status::Used, false,
|
||||
"If set to true, enforce the min_threshold option for compactions strictly. If false (default), Scylla may decide to compact even if below min_threshold.")
|
||||
, compaction_flush_all_tables_before_major_seconds(this, "compaction_flush_all_tables_before_major_seconds", value_status::Used, 86400,
|
||||
"Set the minimum interval in seconds between flushing all tables before each major compaction (default is 86400)."
|
||||
"This option is useful for maximizing tombstone garbage collection by releasing all active commitlog segments."
|
||||
"Set to 0 to disable automatic flushing all tables before major compaction.")
|
||||
, default_log_level(this, "default_log_level", value_status::Used, seastar::log_level::info, "Default log level for log messages")
|
||||
, logger_log_level(this, "logger_log_level", value_status::Used, {}, "Map of logger name to log level. Valid log levels are 'error', 'warn', 'info', 'debug' and 'trace'")
|
||||
, log_to_stdout(this, "log_to_stdout", value_status::Used, true, "Send log output to stdout")
|
||||
@@ -1796,21 +1764,6 @@ const db::extensions& db::config::extensions() const {
|
||||
return *_extensions;
|
||||
}
|
||||
|
||||
compression_parameters db::config::get_sstable_compression_user_table_options(bool dicts_feature_enabled) const {
|
||||
if (sstable_compression_user_table_options.is_set()
|
||||
|| dicts_feature_enabled
|
||||
|| !sstable_compression_user_table_options().uses_dictionary_compressor()) {
|
||||
return sstable_compression_user_table_options();
|
||||
} else {
|
||||
// Fall back to non-dict if dictionary compression is not enabled cluster-wide.
|
||||
auto options = sstable_compression_user_table_options();
|
||||
auto params = options.get_options();
|
||||
auto algo = compression_parameters::non_dict_equivalent(options.get_algorithm());
|
||||
params[compression_parameters::SSTABLE_COMPRESSION] = sstring(compression_parameters::algorithm_to_name(algo));
|
||||
return compression_parameters{params};
|
||||
}
|
||||
}
|
||||
|
||||
std::map<sstring, db::experimental_features_t::feature> db::experimental_features_t::map() {
|
||||
// We decided against using the construct-on-first-use idiom here:
|
||||
// https://github.com/scylladb/scylla/pull/5369#discussion_r353614807
|
||||
|
||||
33
db/config.hh
33
db/config.hh
@@ -185,6 +185,13 @@ public:
|
||||
* All values and documentation taken from
|
||||
* http://docs.datastax.com/en/cassandra/2.1/cassandra/configuration/configCassandra_yaml_r.html
|
||||
*/
|
||||
named_value<double> background_writer_scheduling_quota;
|
||||
named_value<bool> auto_adjust_flush_quota;
|
||||
named_value<float> memtable_flush_static_shares;
|
||||
named_value<float> compaction_static_shares;
|
||||
named_value<float> compaction_max_shares;
|
||||
named_value<bool> compaction_enforce_min_threshold;
|
||||
named_value<uint32_t> compaction_flush_all_tables_before_major_seconds;
|
||||
named_value<sstring> cluster_name;
|
||||
named_value<sstring> listen_address;
|
||||
named_value<sstring> listen_interface;
|
||||
@@ -412,13 +419,7 @@ public:
|
||||
named_value<bool> enable_sstables_mc_format;
|
||||
named_value<bool> enable_sstables_md_format;
|
||||
named_value<sstring> sstable_format;
|
||||
|
||||
// NOTE: Do not use this option directly.
|
||||
// Use get_sstable_compression_user_table_options() instead.
|
||||
named_value<compression_parameters> sstable_compression_user_table_options;
|
||||
|
||||
compression_parameters get_sstable_compression_user_table_options(bool dicts_feature_enabled) const;
|
||||
|
||||
named_value<bool> sstable_compression_dictionaries_allow_in_ddl;
|
||||
named_value<bool> sstable_compression_dictionaries_enable_writing;
|
||||
named_value<float> sstable_compression_dictionaries_memory_budget_fraction;
|
||||
@@ -439,7 +440,6 @@ public:
|
||||
named_value<uint32_t> reader_concurrency_semaphore_serialize_limit_multiplier;
|
||||
named_value<uint32_t> reader_concurrency_semaphore_kill_limit_multiplier;
|
||||
named_value<uint32_t> reader_concurrency_semaphore_cpu_concurrency;
|
||||
named_value<float> reader_concurrency_semaphore_preemptive_abort_factor;
|
||||
named_value<uint32_t> view_update_reader_concurrency_semaphore_serialize_limit_multiplier;
|
||||
named_value<uint32_t> view_update_reader_concurrency_semaphore_kill_limit_multiplier;
|
||||
named_value<uint32_t> view_update_reader_concurrency_semaphore_cpu_concurrency;
|
||||
@@ -464,8 +464,6 @@ public:
|
||||
|
||||
named_value<uint16_t> alternator_port;
|
||||
named_value<uint16_t> alternator_https_port;
|
||||
named_value<uint16_t> alternator_port_proxy_protocol;
|
||||
named_value<uint16_t> alternator_https_port_proxy_protocol;
|
||||
named_value<sstring> alternator_address;
|
||||
named_value<bool> alternator_enforce_authorization;
|
||||
named_value<bool> alternator_warn_authorization;
|
||||
@@ -479,9 +477,6 @@ public:
|
||||
named_value<bool> alternator_allow_system_table_write;
|
||||
named_value<uint32_t> alternator_max_expression_cache_entries_per_shard;
|
||||
named_value<uint64_t> alternator_max_users_query_size_in_trace_output;
|
||||
named_value<uint32_t> alternator_describe_table_info_cache_validity_in_seconds;
|
||||
named_value<int> alternator_response_gzip_compression_level;
|
||||
named_value<uint32_t> alternator_response_compression_threshold_in_bytes;
|
||||
|
||||
named_value<bool> abort_on_ebadf;
|
||||
|
||||
@@ -571,8 +566,6 @@ public:
|
||||
named_value<double> topology_barrier_stall_detector_threshold_seconds;
|
||||
named_value<bool> enable_tablets;
|
||||
named_value<enum_option<tablets_mode_t>> tablets_mode_for_new_keyspaces;
|
||||
named_value<bool> auto_repair_enabled_default;
|
||||
named_value<int32_t> auto_repair_threshold_default_in_seconds;
|
||||
|
||||
bool enable_tablets_by_default() const noexcept {
|
||||
switch (tablets_mode_for_new_keyspaces()) {
|
||||
@@ -599,20 +592,8 @@ public:
|
||||
named_value<bool> enable_create_table_with_compact_storage;
|
||||
|
||||
named_value<bool> rf_rack_valid_keyspaces;
|
||||
named_value<bool> enforce_rack_list;
|
||||
|
||||
named_value<uint32_t> tablet_load_stats_refresh_interval_in_seconds;
|
||||
named_value<bool> force_capacity_based_balancing;
|
||||
named_value<float> size_based_balance_threshold_percentage;
|
||||
named_value<uint64_t> minimal_tablet_size_for_balancing;
|
||||
|
||||
named_value<double> background_writer_scheduling_quota;
|
||||
named_value<bool> auto_adjust_flush_quota;
|
||||
named_value<float> memtable_flush_static_shares;
|
||||
named_value<float> compaction_static_shares;
|
||||
named_value<float> compaction_max_shares;
|
||||
named_value<bool> compaction_enforce_min_threshold;
|
||||
named_value<uint32_t> compaction_flush_all_tables_before_major_seconds;
|
||||
|
||||
static const sstring default_tls_priority;
|
||||
private:
|
||||
|
||||
@@ -31,23 +31,19 @@ size_t quorum_for(const locator::effective_replication_map& erm) {
|
||||
return replication_factor ? (replication_factor / 2) + 1 : 0;
|
||||
}
|
||||
|
||||
static size_t get_replication_factor_for_dc(const locator::effective_replication_map& erm, const sstring& dc) {
|
||||
size_t local_quorum_for(const locator::effective_replication_map& erm, const sstring& dc) {
|
||||
using namespace locator;
|
||||
|
||||
const auto& rs = erm.get_replication_strategy();
|
||||
|
||||
if (rs.get_type() == replication_strategy_type::network_topology) {
|
||||
const network_topology_strategy* nts =
|
||||
const network_topology_strategy* nrs =
|
||||
static_cast<const network_topology_strategy*>(&rs);
|
||||
return nts->get_replication_factor(dc);
|
||||
size_t replication_factor = nrs->get_replication_factor(dc);
|
||||
return replication_factor ? (replication_factor / 2) + 1 : 0;
|
||||
}
|
||||
|
||||
return erm.get_replication_factor();
|
||||
}
|
||||
|
||||
size_t local_quorum_for(const locator::effective_replication_map& erm, const sstring& dc) {
|
||||
auto rf = get_replication_factor_for_dc(erm, dc);
|
||||
return rf ? (rf / 2) + 1 : 0;
|
||||
return quorum_for(erm);
|
||||
}
|
||||
|
||||
size_t block_for_local_serial(const locator::effective_replication_map& erm) {
|
||||
@@ -192,30 +188,18 @@ void assure_sufficient_live_nodes(
|
||||
return pending <= live ? live - pending : 0;
|
||||
};
|
||||
|
||||
auto make_rf_zero_error_msg = [cl] (const sstring& local_dc) {
|
||||
return format("Cannot achieve consistency level {} in datacenter '{}' with replication factor 0. "
|
||||
"Ensure the keyspace is replicated to this datacenter or use a non-local consistency level.", cl, local_dc);
|
||||
};
|
||||
|
||||
const auto& topo = erm.get_topology();
|
||||
const sstring& local_dc = topo.get_datacenter();
|
||||
|
||||
switch (cl) {
|
||||
case consistency_level::ANY:
|
||||
// local hint is acceptable, and local node is always live
|
||||
break;
|
||||
case consistency_level::LOCAL_ONE:
|
||||
if (size_t local_rf = get_replication_factor_for_dc(erm, local_dc); local_rf == 0) {
|
||||
throw exceptions::unavailable_exception(make_rf_zero_error_msg(local_dc), cl, 1, 0);
|
||||
}
|
||||
if (topo.count_local_endpoints(live_endpoints) < topo.count_local_endpoints(pending_endpoints) + 1) {
|
||||
throw exceptions::unavailable_exception(cl, 1, 0);
|
||||
}
|
||||
break;
|
||||
case consistency_level::LOCAL_QUORUM: {
|
||||
if (size_t local_rf = get_replication_factor_for_dc(erm, local_dc); local_rf == 0) {
|
||||
throw exceptions::unavailable_exception(make_rf_zero_error_msg(local_dc), cl, need, 0);
|
||||
}
|
||||
size_t local_live = topo.count_local_endpoints(live_endpoints);
|
||||
size_t pending = topo.count_local_endpoints(pending_endpoints);
|
||||
if (local_live < need + pending) {
|
||||
|
||||
@@ -158,7 +158,7 @@ void hint_endpoint_manager::cancel_draining() noexcept {
|
||||
_sender.cancel_draining();
|
||||
}
|
||||
|
||||
hint_endpoint_manager::hint_endpoint_manager(const endpoint_id& key, fs::path hint_directory, manager& shard_manager, scheduling_group send_sg)
|
||||
hint_endpoint_manager::hint_endpoint_manager(const endpoint_id& key, fs::path hint_directory, manager& shard_manager)
|
||||
: _key(key)
|
||||
, _shard_manager(shard_manager)
|
||||
, _store_gate("hint_endpoint_manager")
|
||||
@@ -169,7 +169,7 @@ hint_endpoint_manager::hint_endpoint_manager(const endpoint_id& key, fs::path hi
|
||||
// Approximate the position of the last written hint by using the same formula as for segment id calculation in commitlog
|
||||
// TODO: Should this logic be deduplicated with what is in the commitlog?
|
||||
, _last_written_rp(this_shard_id(), std::chrono::duration_cast<std::chrono::milliseconds>(runtime::get_boot_time().time_since_epoch()).count())
|
||||
, _sender(*this, _shard_manager.local_storage_proxy(), _shard_manager.local_db(), _shard_manager.local_gossiper(), send_sg)
|
||||
, _sender(*this, _shard_manager.local_storage_proxy(), _shard_manager.local_db(), _shard_manager.local_gossiper())
|
||||
{}
|
||||
|
||||
hint_endpoint_manager::hint_endpoint_manager(hint_endpoint_manager&& other)
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user