Compare commits

..

1 Commits

Author SHA1 Message Date
Kamil Braun
33901e1681 Fix version numbers in upgrade page title 2022-11-02 15:36:51 +01:00
921 changed files with 20784 additions and 44336 deletions

24
.github/CODEOWNERS vendored
View File

@@ -12,7 +12,7 @@ test/cql/cdc_* @kbr- @elcallio @piodul @jul-stas
test/boost/cdc_* @kbr- @elcallio @piodul @jul-stas
# COMMITLOG / BATCHLOG
db/commitlog/* @elcallio @eliransin
db/commitlog/* @elcallio
db/batch* @elcallio
# COORDINATOR
@@ -25,7 +25,7 @@ compaction/* @raphaelsc @nyh
transport/*
# CQL QUERY LANGUAGE
cql3/* @tgrabiec @cvybhu @nyh
cql3/* @tgrabiec @psarna @cvybhu
# COUNTERS
counters* @jul-stas
@@ -33,7 +33,7 @@ tests/counter_test* @jul-stas
# DOCS
docs/* @annastuchlik @tzach
docs/alternator @annastuchlik @tzach @nyh @havaker @nuivall
docs/alternator @annastuchlik @tzach @nyh @psarna
# GOSSIP
gms/* @tgrabiec @asias
@@ -45,9 +45,9 @@ dist/docker/*
utils/logalloc* @tgrabiec
# MATERIALIZED VIEWS
db/view/* @nyh @cvybhu @piodul
cql3/statements/*view* @nyh @cvybhu @piodul
test/boost/view_* @nyh @cvybhu @piodul
db/view/* @nyh @psarna
cql3/statements/*view* @nyh @psarna
test/boost/view_* @nyh @psarna
# PACKAGING
dist/* @syuu1228
@@ -62,9 +62,9 @@ service/migration* @tgrabiec @nyh
schema* @tgrabiec @nyh
# SECONDARY INDEXES
index/* @nyh @cvybhu @piodul
cql3/statements/*index* @nyh @cvybhu @piodul
test/boost/*index* @nyh @cvybhu @piodul
db/index/* @nyh @psarna
cql3/statements/*index* @nyh @psarna
test/boost/*index* @nyh @psarna
# SSTABLES
sstables/* @tgrabiec @raphaelsc @nyh
@@ -74,11 +74,11 @@ streaming/* @tgrabiec @asias
service/storage_service.* @tgrabiec @asias
# ALTERNATOR
alternator/* @nyh @havaker @nuivall
test/alternator/* @nyh @havaker @nuivall
alternator/* @nyh @psarna
test/alternator/* @nyh @psarna
# HINTED HANDOFF
db/hints/* @piodul @vladzcloudius @eliransin
db/hints/* @piodul @vladzcloudius
# REDIS
redis/* @nyh @syuu1228

View File

@@ -1,17 +0,0 @@
name: "Docs / Amplify enhanced"
on: issue_comment
jobs:
build:
runs-on: ubuntu-latest
if: ${{ github.event.issue.pull_request }}
steps:
- name: Checkout
uses: actions/checkout@v3
with:
fetch-depth: 0
- name: Amplify enhanced
env:
TOKEN: ${{ secrets.GITHUB_TOKEN }}
uses: scylladb/sphinx-scylladb-theme/.github/actions/amplify-enhanced@master

8
.gitmodules vendored
View File

@@ -1,11 +1,17 @@
[submodule "seastar"]
path = seastar
url = ../scylla-seastar
url = ../seastar
ignore = dirty
[submodule "swagger-ui"]
path = swagger-ui
url = ../scylla-swagger-ui
ignore = dirty
[submodule "libdeflate"]
path = libdeflate
url = ../libdeflate
[submodule "abseil"]
path = abseil
url = ../abseil-cpp
[submodule "scylla-jmx"]
path = tools/jmx
url = ../scylla-jmx

View File

@@ -42,13 +42,22 @@ set(Seastar_CXX_FLAGS ${cxx_coro_flag} ${target_arch_flag} CACHE INTERNAL "" FOR
set(Seastar_CXX_DIALECT gnu++20 CACHE INTERNAL "" FORCE)
add_subdirectory(seastar)
add_subdirectory(abseil)
# Exclude absl::strerror from the default "all" target since it's not
# used in Scylla build and, moreover, makes use of deprecated glibc APIs,
# such as sys_nerr, which are not exposed from "stdio.h" since glibc 2.32,
# which happens to be the case for recent Fedora distribution versions.
#
# Need to use the internal "absl_strerror" target name instead of namespaced
# variant because `set_target_properties` does not understand the latter form,
# unfortunately.
set_target_properties(absl_strerror PROPERTIES EXCLUDE_FROM_ALL TRUE)
# System libraries dependencies
find_package(Boost COMPONENTS filesystem program_options system thread regex REQUIRED)
find_package(Lua REQUIRED)
find_package(ZLIB REQUIRED)
find_package(ICU COMPONENTS uc REQUIRED)
find_package(Abseil REQUIRED)
set(scylla_build_dir "${CMAKE_BINARY_DIR}/build/${BUILD_TYPE}")
set(scylla_gen_build_dir "${scylla_build_dir}/gen")
@@ -737,6 +746,7 @@ target_compile_definitions(scylla PRIVATE XXH_PRIVATE_API HAVE_LZ4_COMPRESS_DEFA
target_include_directories(scylla PRIVATE
"${CMAKE_CURRENT_SOURCE_DIR}"
libdeflate
abseil
"${scylla_gen_build_dir}")
###

View File

@@ -34,7 +34,7 @@ END
DATE=""
while [ $# -gt 0 ]; do
while [[ $# -gt 0 ]]; do
opt="$1"
case $opt in
-h|--help)
@@ -72,7 +72,7 @@ fi
# Default scylla product/version tags
PRODUCT=scylla
VERSION=5.2.19
VERSION=5.2.0-dev
if test -f version
then

1
abseil Submodule

Submodule abseil added at 7f3c0d7811

View File

@@ -141,7 +141,7 @@ future<std::string> get_key_from_roles(service::storage_proxy& proxy, std::strin
service::storage_proxy::coordinator_query_result qr = co_await proxy.query(schema, std::move(command), std::move(partition_ranges), cl,
service::storage_proxy::coordinator_query_options(executor::default_timeout(), empty_service_permit(), client_state));
cql3::selection::result_set_builder builder(*selection, gc_clock::now());
cql3::selection::result_set_builder builder(*selection, gc_clock::now(), cql_serialization_format::latest());
query::result_view::consume(*qr.query_result, partition_slice, cql3::selection::result_set_builder::visitor(builder, *schema, *selection));
auto result_set = builder.build();

View File

@@ -23,7 +23,7 @@ namespace alternator {
// api_error into a JSON object, and that is returned to the user.
class api_error final : public std::exception {
public:
using status_type = http::reply::status_type;
using status_type = httpd::reply::status_type;
status_type _http_code;
std::string _type;
std::string _msg;
@@ -77,7 +77,7 @@ public:
return api_error("TableNotFoundException", std::move(msg));
}
static api_error internal(std::string msg) {
return api_error("InternalServerError", std::move(msg), http::reply::status_type::internal_server_error);
return api_error("InternalServerError", std::move(msg), reply::status_type::internal_server_error);
}
// Provide the "std::exception" interface, to make it easier to print this

View File

@@ -34,7 +34,6 @@
#include "expressions.hh"
#include "conditions.hh"
#include "cql3/constants.hh"
#include "cql3/util.hh"
#include <optional>
#include "utils/overloaded_functor.hh"
#include <seastar/json/json_elements.hh>
@@ -88,20 +87,17 @@ json::json_return_type make_streamed(rjson::value&& value) {
// move objects to coroutine frame.
auto los = std::move(os);
auto lrs = std::move(rs);
std::exception_ptr ex;
try {
co_await rjson::print(*lrs, los);
co_await los.flush();
co_await los.close();
} catch (...) {
// at this point, we cannot really do anything. HTTP headers and return code are
// already written, and quite potentially a portion of the content data.
// just log + rethrow. It is probably better the HTTP server closes connection
// abruptly or something...
ex = std::current_exception();
elogger.error("Exception during streaming HTTP response: {}", ex);
}
co_await los.close();
if (ex) {
co_await coroutine::return_exception_ptr(std::move(ex));
elogger.error("Unhandled exception in data streaming: {}", std::current_exception());
throw;
}
co_return;
};
@@ -764,6 +760,7 @@ future<executor::request_return_type> executor::tag_resource(client_state& clien
co_return api_error::access_denied("Incorrect resource identifier");
}
schema_ptr schema = get_table_from_arn(_proxy, rjson::to_string_view(*arn));
std::map<sstring, sstring> tags_map = get_tags_of_table_or_throw(schema);
const rjson::value* tags = rjson::find(request, "Tags");
if (!tags || !tags->IsArray()) {
co_return api_error::validation("Cannot parse tags");
@@ -771,9 +768,8 @@ future<executor::request_return_type> executor::tag_resource(client_state& clien
if (tags->Size() < 1) {
co_return api_error::validation("The number of tags must be at least 1") ;
}
co_await db::modify_tags(_mm, schema->ks_name(), schema->cf_name(), [tags](std::map<sstring, sstring>& tags_map) {
update_tags_map(*tags, tags_map, update_tags_action::add_tags);
});
update_tags_map(*tags, tags_map, update_tags_action::add_tags);
co_await db::update_tags(_mm, schema, std::move(tags_map));
co_return json_string("");
}
@@ -791,9 +787,9 @@ future<executor::request_return_type> executor::untag_resource(client_state& cli
schema_ptr schema = get_table_from_arn(_proxy, rjson::to_string_view(*arn));
co_await db::modify_tags(_mm, schema->ks_name(), schema->cf_name(), [tags](std::map<sstring, sstring>& tags_map) {
update_tags_map(*tags, tags_map, update_tags_action::delete_tags);
});
std::map<sstring, sstring> tags_map = get_tags_of_table_or_throw(schema);
update_tags_map(*tags, tags_map, update_tags_action::delete_tags);
co_await db::update_tags(_mm, schema, std::move(tags_map));
co_return json_string("");
}
@@ -931,10 +927,9 @@ static future<executor::request_return_type> create_table_on_shard0(tracing::tra
if (!range_key.empty() && range_key != view_hash_key && range_key != view_range_key) {
add_column(view_builder, range_key, attribute_definitions, column_kind::clustering_key);
}
sstring where_clause = format("{} IS NOT NULL", cql3::util::maybe_quote(view_hash_key));
sstring where_clause = "\"" + view_hash_key + "\" IS NOT NULL";
if (!view_range_key.empty()) {
where_clause = format("{} AND {} IS NOT NULL", where_clause,
cql3::util::maybe_quote(view_range_key));
where_clause = where_clause + " AND \"" + view_hash_key + "\" IS NOT NULL";
}
where_clauses.push_back(std::move(where_clause));
view_builders.emplace_back(std::move(view_builder));
@@ -989,10 +984,9 @@ static future<executor::request_return_type> create_table_on_shard0(tracing::tra
// Note above we don't need to add virtual columns, as all
// base columns were copied to view. TODO: reconsider the need
// for virtual columns when we support Projection.
sstring where_clause = format("{} IS NOT NULL", cql3::util::maybe_quote(view_hash_key));
sstring where_clause = "\"" + view_hash_key + "\" IS NOT NULL";
if (!view_range_key.empty()) {
where_clause = format("{} AND {} IS NOT NULL", where_clause,
cql3::util::maybe_quote(view_range_key));
where_clause = where_clause + " AND \"" + view_range_key + "\" IS NOT NULL";
}
where_clauses.push_back(std::move(where_clause));
view_builders.emplace_back(std::move(view_builder));
@@ -2308,7 +2302,7 @@ void executor::describe_single_item(const cql3::selection::selection& selection,
rjson::add_with_string_name(field, type_to_string((*column_it)->type), json_key_column_value(*cell, **column_it));
}
} else if (cell) {
auto deserialized = attrs_type()->deserialize(*cell);
auto deserialized = attrs_type()->deserialize(*cell, cql_serialization_format::latest());
auto keys_and_values = value_cast<map_type_impl::native_type>(deserialized);
for (auto entry : keys_and_values) {
std::string attr_name = value_cast<sstring>(entry.first);
@@ -2343,7 +2337,7 @@ std::optional<rjson::value> executor::describe_single_item(schema_ptr schema,
const std::optional<attrs_to_get>& attrs_to_get) {
rjson::value item = rjson::empty_object();
cql3::selection::result_set_builder builder(selection, gc_clock::now());
cql3::selection::result_set_builder builder(selection, gc_clock::now(), cql_serialization_format::latest());
query::result_view::consume(query_result, slice, cql3::selection::result_set_builder::visitor(builder, *schema, selection));
auto result_set = builder.build();
@@ -2361,22 +2355,21 @@ std::optional<rjson::value> executor::describe_single_item(schema_ptr schema,
return item;
}
future<std::vector<rjson::value>> executor::describe_multi_item(schema_ptr schema,
const query::partition_slice&& slice,
shared_ptr<cql3::selection::selection> selection,
foreign_ptr<lw_shared_ptr<query::result>> query_result,
shared_ptr<const std::optional<attrs_to_get>> attrs_to_get) {
cql3::selection::result_set_builder builder(*selection, gc_clock::now());
query::result_view::consume(*query_result, slice, cql3::selection::result_set_builder::visitor(builder, *schema, *selection));
std::vector<rjson::value> executor::describe_multi_item(schema_ptr schema,
const query::partition_slice& slice,
const cql3::selection::selection& selection,
const query::result& query_result,
const std::optional<attrs_to_get>& attrs_to_get) {
cql3::selection::result_set_builder builder(selection, gc_clock::now(), cql_serialization_format::latest());
query::result_view::consume(query_result, slice, cql3::selection::result_set_builder::visitor(builder, *schema, selection));
auto result_set = builder.build();
std::vector<rjson::value> ret;
for (auto& result_row : result_set->rows()) {
rjson::value item = rjson::empty_object();
describe_single_item(*selection, result_row, *attrs_to_get, item);
describe_single_item(selection, result_row, attrs_to_get, item);
ret.push_back(std::move(item));
co_await coroutine::maybe_yield();
}
co_return ret;
return ret;
}
static bool check_needs_read_before_write(const parsed::value& v) {
@@ -3258,7 +3251,8 @@ future<executor::request_return_type> executor::batch_get_item(client_state& cli
service::storage_proxy::coordinator_query_options(executor::default_timeout(), permit, client_state, trace_state)).then(
[schema = rs.schema, partition_slice = std::move(partition_slice), selection = std::move(selection), attrs_to_get = rs.attrs_to_get] (service::storage_proxy::coordinator_query_result qr) mutable {
utils::get_local_injector().inject("alternator_batch_get_item", [] { throw std::runtime_error("batch_get_item injection"); });
return describe_multi_item(std::move(schema), std::move(partition_slice), std::move(selection), std::move(qr.query_result), std::move(attrs_to_get));
std::vector<rjson::value> jsons = describe_multi_item(schema, partition_slice, *selection, *qr.query_result, *attrs_to_get);
return make_ready_future<std::vector<rjson::value>>(std::move(jsons));
});
response_futures.push_back(std::move(f));
}
@@ -3514,7 +3508,7 @@ public:
rjson::add_with_string_name(field, type_to_string((*_column_it)->type), json_key_column_value(bv, **_column_it));
}
} else {
auto deserialized = attrs_type()->deserialize(bv);
auto deserialized = attrs_type()->deserialize(bv, cql_serialization_format::latest());
auto keys_and_values = value_cast<map_type_impl::native_type>(deserialized);
for (auto entry : keys_and_values) {
std::string attr_name = value_cast<sstring>(entry.first);
@@ -3648,7 +3642,7 @@ static future<executor::request_return_type> do_query(service::storage_proxy& pr
if (exclusive_start_key) {
partition_key pk = pk_from_json(*exclusive_start_key, schema);
auto pos = position_in_partition::for_partition_start();
auto pos = position_in_partition(position_in_partition::partition_start_tag_t());
if (schema->clustering_key_size() > 0) {
pos = pos_from_json(*exclusive_start_key, schema);
}

View File

@@ -222,11 +222,11 @@ public:
const query::result&,
const std::optional<attrs_to_get>&);
static future<std::vector<rjson::value>> describe_multi_item(schema_ptr schema,
const query::partition_slice&& slice,
shared_ptr<cql3::selection::selection> selection,
foreign_ptr<lw_shared_ptr<query::result>> query_result,
shared_ptr<const std::optional<attrs_to_get>> attrs_to_get);
static std::vector<rjson::value> describe_multi_item(schema_ptr schema,
const query::partition_slice& slice,
const cql3::selection::selection& selection,
const query::result& query_result,
const std::optional<attrs_to_get>& attrs_to_get);
static void describe_single_item(const cql3::selection::selection&,
const std::vector<bytes_opt>&,

View File

@@ -73,7 +73,7 @@ struct from_json_visitor {
}
// default
void operator()(const abstract_type& t) const {
bo.write(from_json_object(t, v));
bo.write(from_json_object(t, v, cql_serialization_format::internal()));
}
};
@@ -279,7 +279,7 @@ position_in_partition pos_from_json(const rjson::value& item, schema_ptr schema)
return position_in_partition(region, weight, region == partition_region::clustered ? std::optional(std::move(ck)) : std::nullopt);
}
if (ck.is_empty()) {
return position_in_partition::for_partition_start();
return position_in_partition(position_in_partition::partition_start_tag_t());
}
return position_in_partition::for_key(std::move(ck));
}

View File

@@ -28,8 +28,6 @@
static logging::logger slogger("alternator-server");
using namespace httpd;
using request = http::request;
using reply = http::reply;
namespace alternator {

View File

@@ -27,7 +27,7 @@ using chunked_content = rjson::chunked_content;
class server {
static constexpr size_t content_length_limit = 16*MB;
using alternator_callback = std::function<future<executor::request_return_type>(executor&, executor::client_state&,
tracing::trace_state_ptr, service_permit, rjson::value, std::unique_ptr<http::request>)>;
tracing::trace_state_ptr, service_permit, rjson::value, std::unique_ptr<request>)>;
using alternator_callbacks_map = std::unordered_map<std::string_view, alternator_callback>;
http_server _http_server;
@@ -76,8 +76,8 @@ public:
private:
void set_routes(seastar::httpd::routes& r);
// If verification succeeds, returns the authenticated user's username
future<std::string> verify_signature(const seastar::http::request&, const chunked_content&);
future<executor::request_return_type> handle_api_request(std::unique_ptr<http::request> req);
future<std::string> verify_signature(const seastar::httpd::request&, const chunked_content&);
future<executor::request_return_type> handle_api_request(std::unique_ptr<request> req);
};
}

View File

@@ -145,24 +145,19 @@ future<alternator::executor::request_return_type> alternator::executor::list_str
auto table = find_table(_proxy, request);
auto db = _proxy.data_dictionary();
auto cfs = db.get_tables();
auto i = cfs.begin();
auto e = cfs.end();
if (limit < 1) {
throw api_error::validation("Limit must be 1 or more");
}
// # 12601 (maybe?) - sort the set of tables on ID. This should ensure we never
// generate duplicates in a paged listing here. Can obviously miss things if they
// are added between paged calls and end up with a "smaller" UUID/ARN, but that
// is to be expected.
std::sort(cfs.begin(), cfs.end(), [](const data_dictionary::table& t1, const data_dictionary::table& t2) {
return t1.schema()->id().uuid() < t2.schema()->id().uuid();
});
auto i = cfs.begin();
auto e = cfs.end();
// TODO: the unordered_map here is not really well suited for partial
// querying - we're sorting on local hash order, and creating a table
// between queries may or may not miss info. But that should be rare,
// and we can probably expect this to be a single call.
if (streams_start) {
i = std::find_if(i, e, [&](const data_dictionary::table& t) {
i = std::find_if(i, e, [&](data_dictionary::table t) {
return t.schema()->id().uuid() == streams_start
&& cdc::get_base_table(db.real_database(), *t.schema())
&& is_alternator_keyspace(t.schema()->ks_name())
@@ -888,7 +883,7 @@ future<executor::request_return_type> executor::get_records(client_state& client
return _proxy.query(schema, std::move(command), std::move(partition_ranges), cl, service::storage_proxy::coordinator_query_options(default_timeout(), std::move(permit), client_state)).then(
[this, schema, partition_slice = std::move(partition_slice), selection = std::move(selection), start_time = std::move(start_time), limit, key_names = std::move(key_names), attr_names = std::move(attr_names), type, iter, high_ts] (service::storage_proxy::coordinator_query_result qr) mutable {
cql3::selection::result_set_builder builder(*selection, gc_clock::now());
cql3::selection::result_set_builder builder(*selection, gc_clock::now(), cql_serialization_format::latest());
query::result_view::consume(*qr.query_result, partition_slice, cql3::selection::result_set_builder::visitor(builder, *schema, *selection));
auto result_set = builder.build();

View File

@@ -8,7 +8,6 @@
#include <chrono>
#include <cstdint>
#include <exception>
#include <optional>
#include <seastar/core/sstring.hh>
#include <seastar/core/coroutine.hh>
@@ -18,7 +17,6 @@
#include <seastar/coroutine/maybe_yield.hh>
#include <boost/multiprecision/cpp_int.hpp>
#include "exceptions/exceptions.hh"
#include "gms/gossiper.hh"
#include "gms/inet_address.hh"
#include "inet_address_vectors.hh"
@@ -94,25 +92,24 @@ future<executor::request_return_type> executor::update_time_to_live(client_state
}
sstring attribute_name(v->GetString(), v->GetStringLength());
co_await db::modify_tags(_mm, schema->ks_name(), schema->cf_name(), [&](std::map<sstring, sstring>& tags_map) {
if (enabled) {
if (tags_map.contains(TTL_TAG_KEY)) {
throw api_error::validation("TTL is already enabled");
}
tags_map[TTL_TAG_KEY] = attribute_name;
} else {
auto i = tags_map.find(TTL_TAG_KEY);
if (i == tags_map.end()) {
throw api_error::validation("TTL is already disabled");
} else if (i->second != attribute_name) {
throw api_error::validation(format(
"Requested to disable TTL on attribute {}, but a different attribute {} is enabled.",
attribute_name, i->second));
}
tags_map.erase(TTL_TAG_KEY);
std::map<sstring, sstring> tags_map = get_tags_of_table_or_throw(schema);
if (enabled) {
if (tags_map.contains(TTL_TAG_KEY)) {
co_return api_error::validation("TTL is already enabled");
}
});
tags_map[TTL_TAG_KEY] = attribute_name;
} else {
auto i = tags_map.find(TTL_TAG_KEY);
if (i == tags_map.end()) {
co_return api_error::validation("TTL is already disabled");
} else if (i->second != attribute_name) {
co_return api_error::validation(format(
"Requested to disable TTL on attribute {}, but a different attribute {} is enabled.",
attribute_name, i->second));
}
tags_map.erase(TTL_TAG_KEY);
}
co_await db::update_tags(_mm, schema, std::move(tags_map));
// Prepare the response, which contains a TimeToLiveSpecification
// basically identical to the request's
rjson::value response = rjson::empty_object();
@@ -551,34 +548,13 @@ static future<> scan_table_ranges(
co_return;
}
auto units = co_await get_units(page_sem, 1);
// We don't need to limit page size in number of rows because there is
// a builtin limit of the page's size in bytes. Setting this limit to
// 1 is useful for debugging the paging code with moderate-size data.
// We don't to limit page size in number of rows because there is a
// builtin limit of the page's size in bytes. Setting this limit to 1
// is useful for debugging the paging code with moderate-size data.
uint32_t limit = std::numeric_limits<uint32_t>::max();
// Read a page, and if that times out, try again after a small sleep.
// If we didn't catch the timeout exception, it would cause the scan
// be aborted and only be restarted at the next scanning period.
// If we retry too many times, give up and restart the scan later.
std::unique_ptr<cql3::result_set> rs;
for (int retries=0; ; retries++) {
try {
// FIXME: which timeout?
rs = co_await p->fetch_page(limit, gc_clock::now(), executor::default_timeout());
break;
} catch(exceptions::read_timeout_exception&) {
tlogger.warn("expiration scanner read timed out, will retry: {}",
std::current_exception());
}
// If we didn't break out of this loop, add a minimal sleep
if (retries >= 10) {
// Don't get stuck forever asking the same page, maybe there's
// a bug or a real problem in several replicas. Give up on
// this scan an retry the scan from a random position later,
// in the next scan period.
throw runtime_exception("scanner thread failed after too many timeouts for the same page");
}
co_await sleep_abortable(std::chrono::seconds(1), abort_source);
}
// FIXME: which timeout?
// FIXME: if read times out, need to retry it.
std::unique_ptr<cql3::result_set> rs = co_await p->fetch_page(limit, gc_clock::now(), executor::default_timeout());
auto rows = rs->rows();
auto meta = rs->get_metadata().get_names();
std::optional<unsigned> expiration_column;

View File

@@ -1,15 +0,0 @@
version: 1
applications:
- frontend:
phases:
build:
commands:
- make setupenv
- make dirhtml
artifacts:
baseDirectory: _build/dirhtml
files:
- '**/*'
cache:
paths: []
appRoot: docs

View File

@@ -1,43 +0,0 @@
{
"apiVersion":"0.0.1",
"swaggerVersion":"1.2",
"basePath":"{{Protocol}}://{{Host}}",
"resourcePath":"/raft",
"produces":[
"application/json"
],
"apis":[
{
"path":"/raft/trigger_snapshot/{group_id}",
"operations":[
{
"method":"POST",
"summary":"Triggers snapshot creation and log truncation for the given Raft group",
"type":"string",
"nickname":"trigger_snapshot",
"produces":[
"application/json"
],
"parameters":[
{
"name":"group_id",
"description":"The ID of the group which should get snapshotted",
"required":true,
"allowMultiple":false,
"type":"string",
"paramType":"path"
},
{
"name":"timeout",
"description":"Timeout in seconds after which the endpoint returns a failure. If not provided, 60s is used.",
"required":false,
"allowMultiple":false,
"type":"long",
"paramType":"query"
}
]
}
]
}
]
}

View File

@@ -1946,7 +1946,7 @@
"operations":[
{
"method":"POST",
"summary":"Forces this node to recalculate versions of schema objects.",
"summary":"Reset local schema",
"type":"void",
"nickname":"reset_local_schema",
"produces":[

View File

@@ -148,34 +148,7 @@
]
}
]
},
{
"path":"/task_manager/task_status_recursive/{task_id}",
"operations":[
{
"method":"GET",
"summary":"Get statuses of the task and all its descendants",
"type":"array",
"items":{
"type":"task_status"
},
"nickname":"get_task_status_recursively",
"produces":[
"application/json"
],
"parameters":[
{
"name":"task_id",
"description":"The uuid of a task to query about",
"required":true,
"allowMultiple":false,
"type":"string",
"paramType":"path"
}
]
}
]
}
}
],
"models":{
"task_stats" :{
@@ -195,26 +168,6 @@
"failed"
],
"description":"The state of a task"
},
"type":{
"type":"string",
"description":"The description of the task"
},
"keyspace":{
"type":"string",
"description":"The keyspace the task is working on (if applicable)"
},
"table":{
"type":"string",
"description":"The table the task is working on (if applicable)"
},
"entity":{
"type":"string",
"description":"Task-specific entity description"
},
"sequence_number":{
"type":"long",
"description":"The running sequence number of the task"
}
}
},
@@ -291,13 +244,6 @@
"progress_completed":{
"type":"double",
"description":"The number of units completed so far"
},
"children_ids":{
"type":"array",
"items":{
"type":"string"
},
"description":"Task IDs of children of this task"
}
}
}

View File

@@ -86,6 +86,14 @@
"type":"string",
"paramType":"query"
},
{
"name":"type",
"description":"The type of the task",
"required":false,
"allowMultiple":false,
"type":"string",
"paramType":"query"
},
{
"name":"entity",
"description":"Task-specific entity description",

View File

@@ -31,7 +31,6 @@
#include "api/config.hh"
#include "task_manager.hh"
#include "task_manager_test.hh"
#include "raft.hh"
logging::logger apilog("api");
@@ -278,18 +277,6 @@ future<> set_server_task_manager_test(http_context& ctx, lw_shared_ptr<db::confi
#endif
future<> set_server_raft(http_context& ctx, sharded<service::raft_group_registry>& raft_gr) {
auto rb = std::make_shared<api_registry_builder>(ctx.api_doc);
return ctx.http_server.set_routes([rb, &ctx, &raft_gr] (routes& r) {
rb->register_function(r, "raft", "The Raft API");
set_raft(ctx, r, raft_gr);
});
}
future<> unset_server_raft(http_context& ctx) {
return ctx.http_server.set_routes([&ctx] (routes& r) { unset_raft(ctx, r); });
}
void req_params::process(const request& req) {
// Process mandatory parameters
for (auto& [name, ent] : params) {

View File

@@ -14,15 +14,11 @@
#include "tasks/task_manager.hh"
#include "seastarx.hh"
using request = http::request;
using reply = http::reply;
namespace service {
class load_meter;
class storage_proxy;
class storage_service;
class raft_group_registry;
} // namespace service
@@ -117,7 +113,5 @@ future<> set_server_compaction_manager(http_context& ctx);
future<> set_server_done(http_context& ctx);
future<> set_server_task_manager(http_context& ctx);
future<> set_server_task_manager_test(http_context& ctx, lw_shared_ptr<db::config> cfg);
future<> set_server_raft(http_context&, sharded<service::raft_group_registry>&);
future<> unset_server_raft(http_context&);
}

View File

@@ -334,13 +334,13 @@ void set_column_family(http_context& ctx, routes& r) {
cf::get_memtable_columns_count.set(r, [&ctx] (std::unique_ptr<request> req) {
return map_reduce_cf(ctx, req->param["name"], uint64_t{0}, [](replica::column_family& cf) {
return boost::accumulate(cf.active_memtables() | boost::adaptors::transformed(std::mem_fn(&replica::memtable::partition_count)), uint64_t(0));
return cf.active_memtable().partition_count();
}, std::plus<>());
});
cf::get_all_memtable_columns_count.set(r, [&ctx] (std::unique_ptr<request> req) {
return map_reduce_cf(ctx, uint64_t{0}, [](replica::column_family& cf) {
return boost::accumulate(cf.active_memtables() | boost::adaptors::transformed(std::mem_fn(&replica::memtable::partition_count)), uint64_t(0));
return cf.active_memtable().partition_count();
}, std::plus<>());
});
@@ -354,33 +354,25 @@ void set_column_family(http_context& ctx, routes& r) {
cf::get_memtable_off_heap_size.set(r, [&ctx] (std::unique_ptr<request> req) {
return map_reduce_cf(ctx, req->param["name"], int64_t(0), [](replica::column_family& cf) {
return boost::accumulate(cf.active_memtables() | boost::adaptors::transformed([] (replica::memtable* active_memtable) {
return active_memtable->region().occupancy().total_space();
}), uint64_t(0));
return cf.active_memtable().region().occupancy().total_space();
}, std::plus<int64_t>());
});
cf::get_all_memtable_off_heap_size.set(r, [&ctx] (std::unique_ptr<request> req) {
return map_reduce_cf(ctx, int64_t(0), [](replica::column_family& cf) {
return boost::accumulate(cf.active_memtables() | boost::adaptors::transformed([] (replica::memtable* active_memtable) {
return active_memtable->region().occupancy().total_space();
}), uint64_t(0));
return cf.active_memtable().region().occupancy().total_space();
}, std::plus<int64_t>());
});
cf::get_memtable_live_data_size.set(r, [&ctx] (std::unique_ptr<request> req) {
return map_reduce_cf(ctx, req->param["name"], int64_t(0), [](replica::column_family& cf) {
return boost::accumulate(cf.active_memtables() | boost::adaptors::transformed([] (replica::memtable* active_memtable) {
return active_memtable->region().occupancy().used_space();
}), uint64_t(0));
return cf.active_memtable().region().occupancy().used_space();
}, std::plus<int64_t>());
});
cf::get_all_memtable_live_data_size.set(r, [&ctx] (std::unique_ptr<request> req) {
return map_reduce_cf(ctx, int64_t(0), [](replica::column_family& cf) {
return boost::accumulate(cf.active_memtables() | boost::adaptors::transformed([] (replica::memtable* active_memtable) {
return active_memtable->region().occupancy().used_space();
}), uint64_t(0));
return cf.active_memtable().region().occupancy().used_space();
}, std::plus<int64_t>());
});
@@ -418,9 +410,7 @@ void set_column_family(http_context& ctx, routes& r) {
cf::get_all_cf_all_memtables_live_data_size.set(r, [&ctx] (std::unique_ptr<request> req) {
warn(unimplemented::cause::INDEXES);
return map_reduce_cf(ctx, int64_t(0), [](replica::column_family& cf) {
return boost::accumulate(cf.active_memtables() | boost::adaptors::transformed([] (replica::memtable* active_memtable) {
return active_memtable->region().occupancy().used_space();
}), uint64_t(0));
return cf.active_memtable().region().occupancy().used_space();
}, std::plus<int64_t>());
});
@@ -539,13 +529,13 @@ void set_column_family(http_context& ctx, routes& r) {
cf::get_pending_compactions.set(r, [&ctx] (std::unique_ptr<request> req) {
return map_reduce_cf(ctx, req->param["name"], int64_t(0), [](replica::column_family& cf) {
return cf.estimate_pending_compactions();
return cf.get_compaction_strategy().estimated_pending_compactions(cf.as_table_state());
}, std::plus<int64_t>());
});
cf::get_all_pending_compactions.set(r, [&ctx] (std::unique_ptr<request> req) {
return map_reduce_cf(ctx, int64_t(0), [](replica::column_family& cf) {
return cf.estimate_pending_compactions();
return cf.get_compaction_strategy().estimated_pending_compactions(cf.as_table_state());
}, std::plus<int64_t>());
});

View File

@@ -41,6 +41,7 @@ static std::unordered_map<std::pair<sstring, sstring>, uint64_t, utils::tuple_ha
return std::move(a);
}
void set_compaction_manager(http_context& ctx, routes& r) {
cm::get_compactions.set(r, [&ctx] (std::unique_ptr<request> req) {
return ctx.db.map_reduce0([](replica::database& db) {
@@ -67,9 +68,9 @@ void set_compaction_manager(http_context& ctx, routes& r) {
cm::get_pending_tasks_by_table.set(r, [&ctx] (std::unique_ptr<request> req) {
return ctx.db.map_reduce0([&ctx](replica::database& db) {
return do_with(std::unordered_map<std::pair<sstring, sstring>, uint64_t, utils::tuple_hash>(), [&ctx, &db](std::unordered_map<std::pair<sstring, sstring>, uint64_t, utils::tuple_hash>& tasks) {
return do_for_each(db.get_column_families(), [&tasks](const std::pair<table_id, seastar::lw_shared_ptr<replica::table>>& i) -> future<> {
return do_for_each(db.get_column_families(), [&tasks](const std::pair<table_id, seastar::lw_shared_ptr<replica::table>>& i) {
replica::table& cf = *i.second.get();
tasks[std::make_pair(cf.schema()->ks_name(), cf.schema()->cf_name())] = cf.estimate_pending_compactions();
tasks[std::make_pair(cf.schema()->ks_name(), cf.schema()->cf_name())] = cf.get_compaction_strategy().estimated_pending_compactions(cf.as_table_state());
return make_ready_future<>();
}).then([&tasks] {
return std::move(tasks);
@@ -118,9 +119,7 @@ void set_compaction_manager(http_context& ctx, routes& r) {
auto& cm = db.get_compaction_manager();
return parallel_for_each(table_names, [&db, &cm, &ks_name, type] (sstring& table_name) {
auto& t = db.find_column_family(ks_name, table_name);
return t.parallel_foreach_table_state([&] (compaction::table_state& ts) {
return cm.stop_compaction(type, &ts);
});
return cm.stop_compaction(type, &t.as_table_state());
});
});
co_return json_void();
@@ -128,7 +127,7 @@ void set_compaction_manager(http_context& ctx, routes& r) {
cm::get_pending_tasks.set(r, [&ctx] (std::unique_ptr<request> req) {
return map_reduce_cf(ctx, int64_t(0), [](replica::column_family& cf) {
return cf.estimate_pending_compactions();
return cf.get_compaction_strategy().estimated_pending_compactions(cf.as_table_state());
}, std::plus<int64_t>());
});

View File

@@ -25,7 +25,7 @@ void set_endpoint_snitch(http_context& ctx, routes& r, sharded<locator::snitch_p
httpd::endpoint_snitch_info_json::get_datacenter.set(r, [&ctx](const_req req) {
auto& topology = ctx.shared_token_metadata.local().get()->get_topology();
auto ep = host_or_broadcast(req);
if (!topology.has_endpoint(ep)) {
if (!topology.has_endpoint(ep, locator::topology::pending::yes)) {
// Cannot return error here, nodetool status can race, request
// info about just-left node and not handle it nicely
return sstring(locator::production_snitch_base::default_dc);
@@ -36,7 +36,7 @@ void set_endpoint_snitch(http_context& ctx, routes& r, sharded<locator::snitch_p
httpd::endpoint_snitch_info_json::get_rack.set(r, [&ctx](const_req req) {
auto& topology = ctx.shared_token_metadata.local().get()->get_topology();
auto ep = host_or_broadcast(req);
if (!topology.has_endpoint(ep)) {
if (!topology.has_endpoint(ep, locator::topology::pending::yes)) {
// Cannot return error here, nodetool status can race, request
// info about just-left node and not handle it nicely
return sstring(locator::production_snitch_base::default_rack);

View File

@@ -17,42 +17,36 @@ namespace fd = httpd::failure_detector_json;
void set_failure_detector(http_context& ctx, routes& r, gms::gossiper& g) {
fd::get_all_endpoint_states.set(r, [&g](std::unique_ptr<request> req) {
return g.container().invoke_on(0, [] (gms::gossiper& g) {
std::vector<fd::endpoint_state> res;
for (auto i : g.get_endpoint_states()) {
fd::endpoint_state val;
val.addrs = boost::lexical_cast<std::string>(i.first);
val.is_alive = i.second.is_alive();
val.generation = i.second.get_heart_beat_state().get_generation();
val.version = i.second.get_heart_beat_state().get_heart_beat_version();
val.update_time = i.second.get_update_timestamp().time_since_epoch().count();
for (auto a : i.second.get_application_state_map()) {
fd::version_value version_val;
// We return the enum index and not it's name to stay compatible to origin
// method that the state index are static but the name can be changed.
version_val.application_state = static_cast<std::underlying_type<gms::application_state>::type>(a.first);
version_val.value = a.second.value;
version_val.version = a.second.version;
val.application_state.push(version_val);
}
res.push_back(val);
std::vector<fd::endpoint_state> res;
for (auto i : g.get_endpoint_states()) {
fd::endpoint_state val;
val.addrs = boost::lexical_cast<std::string>(i.first);
val.is_alive = i.second.is_alive();
val.generation = i.second.get_heart_beat_state().get_generation();
val.version = i.second.get_heart_beat_state().get_heart_beat_version();
val.update_time = i.second.get_update_timestamp().time_since_epoch().count();
for (auto a : i.second.get_application_state_map()) {
fd::version_value version_val;
// We return the enum index and not it's name to stay compatible to origin
// method that the state index are static but the name can be changed.
version_val.application_state = static_cast<std::underlying_type<gms::application_state>::type>(a.first);
version_val.value = a.second.value;
version_val.version = a.second.version;
val.application_state.push(version_val);
}
return make_ready_future<json::json_return_type>(res);
});
res.push_back(val);
}
return make_ready_future<json::json_return_type>(res);
});
fd::get_up_endpoint_count.set(r, [&g](std::unique_ptr<request> req) {
return g.container().invoke_on(0, [] (gms::gossiper& g) {
int res = g.get_up_endpoint_count();
return make_ready_future<json::json_return_type>(res);
});
int res = g.get_up_endpoint_count();
return make_ready_future<json::json_return_type>(res);
});
fd::get_down_endpoint_count.set(r, [&g](std::unique_ptr<request> req) {
return g.container().invoke_on(0, [] (gms::gossiper& g) {
int res = g.get_down_endpoint_count();
return make_ready_future<json::json_return_type>(res);
});
int res = g.get_down_endpoint_count();
return make_ready_future<json::json_return_type>(res);
});
fd::get_phi_convict_threshold.set(r, [] (std::unique_ptr<request> req) {
@@ -60,13 +54,11 @@ void set_failure_detector(http_context& ctx, routes& r, gms::gossiper& g) {
});
fd::get_simple_states.set(r, [&g] (std::unique_ptr<request> req) {
return g.container().invoke_on(0, [] (gms::gossiper& g) {
std::map<sstring, sstring> nodes_status;
for (auto& entry : g.get_endpoint_states()) {
nodes_status.emplace(entry.first.to_sstring(), entry.second.is_alive() ? "UP" : "DOWN");
}
return make_ready_future<json::json_return_type>(map_to_key_value<fd::mapper>(nodes_status));
});
std::map<sstring, sstring> nodes_status;
for (auto& entry : g.get_endpoint_states()) {
nodes_status.emplace(entry.first.to_sstring(), entry.second.is_alive() ? "UP" : "DOWN");
}
return make_ready_future<json::json_return_type>(map_to_key_value<fd::mapper>(nodes_status));
});
fd::set_phi_convict_threshold.set(r, [](std::unique_ptr<request> req) {
@@ -75,15 +67,13 @@ void set_failure_detector(http_context& ctx, routes& r, gms::gossiper& g) {
});
fd::get_endpoint_state.set(r, [&g] (std::unique_ptr<request> req) {
return g.container().invoke_on(0, [req = std::move(req)] (gms::gossiper& g) {
auto* state = g.get_endpoint_state_for_endpoint_ptr(gms::inet_address(req->param["addr"]));
if (!state) {
return make_ready_future<json::json_return_type>(format("unknown endpoint {}", req->param["addr"]));
}
std::stringstream ss;
g.append_endpoint_state(ss, *state);
return make_ready_future<json::json_return_type>(sstring(ss.str()));
});
auto* state = g.get_endpoint_state_for_endpoint_ptr(gms::inet_address(req->param["addr"]));
if (!state) {
return make_ready_future<json::json_return_type>(format("unknown endpoint {}", req->param["addr"]));
}
std::stringstream ss;
g.append_endpoint_state(ss, *state);
return make_ready_future<json::json_return_type>(sstring(ss.str()));
});
fd::get_endpoint_phi_values.set(r, [](std::unique_ptr<request> req) {

View File

@@ -6,8 +6,6 @@
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
#include <seastar/core/coroutine.hh>
#include "gossiper.hh"
#include "api/api-doc/gossiper.json.hh"
#include "gms/gossiper.hh"
@@ -16,23 +14,19 @@ namespace api {
using namespace json;
void set_gossiper(http_context& ctx, routes& r, gms::gossiper& g) {
httpd::gossiper_json::get_down_endpoint.set(r, [&g] (std::unique_ptr<request> req) -> future<json::json_return_type> {
auto res = co_await g.get_unreachable_members_synchronized();
co_return json::json_return_type(container_to_vec(res));
httpd::gossiper_json::get_down_endpoint.set(r, [&g] (const_req req) {
auto res = g.get_unreachable_members();
return container_to_vec(res);
});
httpd::gossiper_json::get_live_endpoint.set(r, [&g] (std::unique_ptr<request> req) {
return g.get_live_members_synchronized().then([] (auto res) {
return make_ready_future<json::json_return_type>(container_to_vec(res));
});
httpd::gossiper_json::get_live_endpoint.set(r, [&g] (const_req req) {
auto res = g.get_live_members();
return container_to_vec(res);
});
httpd::gossiper_json::get_endpoint_downtime.set(r, [&g] (std::unique_ptr<request> req) -> future<json::json_return_type> {
gms::inet_address ep(req->param["addr"]);
// synchronize unreachable_members on all shards
co_await g.get_unreachable_members_synchronized();
co_return g.get_endpoint_downtime(ep);
httpd::gossiper_json::get_endpoint_downtime.set(r, [&g] (const_req req) {
gms::inet_address ep(req.param["addr"]);
return g.get_endpoint_downtime(ep);
});
httpd::gossiper_json::get_current_generation_number.set(r, [&g] (std::unique_ptr<request> req) {

View File

@@ -1,70 +0,0 @@
/*
* Copyright (C) 2024-present ScyllaDB
*/
/*
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
#include <seastar/core/coroutine.hh>
#include "api/api.hh"
#include "api/api-doc/raft.json.hh"
#include "service/raft/raft_group_registry.hh"
using namespace seastar::httpd;
extern logging::logger apilog;
namespace api {
namespace r = httpd::raft_json;
using namespace json;
void set_raft(http_context&, httpd::routes& r, sharded<service::raft_group_registry>& raft_gr) {
r::trigger_snapshot.set(r, [&raft_gr] (std::unique_ptr<http::request> req) -> future<json_return_type> {
raft::group_id gid{utils::UUID{req->param["group_id"]}};
auto timeout_dur = std::invoke([timeout_str = req->get_query_param("timeout")] {
if (timeout_str.empty()) {
return std::chrono::seconds{60};
}
auto dur = std::stoll(timeout_str);
if (dur <= 0) {
throw std::runtime_error{"Timeout must be a positive number."};
}
return std::chrono::seconds{dur};
});
std::atomic<bool> found_srv{false};
co_await raft_gr.invoke_on_all([gid, timeout_dur, &found_srv] (service::raft_group_registry& raft_gr) -> future<> {
auto* srv = raft_gr.find_server(gid);
if (!srv) {
co_return;
}
found_srv = true;
abort_on_expiry aoe(lowres_clock::now() + timeout_dur);
apilog.info("Triggering Raft group {} snapshot", gid);
auto result = co_await srv->trigger_snapshot(&aoe.abort_source());
if (result) {
apilog.info("New snapshot for Raft group {} created", gid);
} else {
apilog.info("Could not create new snapshot for Raft group {}, no new entries applied", gid);
}
});
if (!found_srv) {
throw std::runtime_error{fmt::format("Server for group ID {} not found", gid)};
}
co_return json_void{};
});
}
void unset_raft(http_context&, httpd::routes& r) {
r::trigger_snapshot.unset(r);
}
}

View File

@@ -1,18 +0,0 @@
/*
* Copyright (C) 2023-present ScyllaDB
*/
/*
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
#pragma once
#include "api_init.hh"
namespace api {
void set_raft(http_context& ctx, httpd::routes& r, sharded<service::raft_group_registry>& raft_gr);
void unset_raft(http_context& ctx, httpd::routes& r);
}

View File

@@ -49,14 +49,6 @@
extern logging::logger apilog;
namespace std {
std::ostream& operator<<(std::ostream& os, const api::table_info& ti) {
return os << "table{name=" << ti.name << ", id=" << ti.id << "}";
}
} // namespace std
namespace api {
const locator::token_metadata& http_context::get_token_metadata() {
@@ -108,55 +100,6 @@ std::vector<sstring> parse_tables(const sstring& ks_name, http_context& ctx, con
return parse_tables(ks_name, ctx, it->second);
}
std::vector<table_info> parse_table_infos(const sstring& ks_name, http_context& ctx, sstring value) {
std::vector<table_info> res;
try {
if (value.empty()) {
const auto& cf_meta_data = ctx.db.local().find_keyspace(ks_name).metadata().get()->cf_meta_data();
res.reserve(cf_meta_data.size());
for (const auto& [name, schema] : cf_meta_data) {
res.emplace_back(table_info{name, schema->id()});
}
} else {
std::vector<sstring> names = split(value, ",");
res.reserve(names.size());
const auto& db = ctx.db.local();
for (const auto& table_name : names) {
res.emplace_back(table_info{table_name, db.find_uuid(ks_name, table_name)});
}
}
} catch (const replica::no_such_keyspace& e) {
throw bad_param_exception(e.what());
} catch (const replica::no_such_column_family& e) {
throw bad_param_exception(e.what());
}
return res;
}
std::vector<table_info> parse_table_infos(const sstring& ks_name, http_context& ctx, const std::unordered_map<sstring, sstring>& query_params, sstring param_name) {
auto it = query_params.find(param_name);
return parse_table_infos(ks_name, ctx, it != query_params.end() ? it->second : "");
}
// Run on all tables, skipping dropped tables
future<> run_on_existing_tables(sstring op, replica::database& db, std::string_view keyspace, const std::vector<table_info> local_tables, std::function<future<> (replica::table&)> func) {
std::exception_ptr ex;
for (const auto& ti : local_tables) {
apilog.debug("Starting {} on {}.{}", op, keyspace, ti);
try {
co_await func(db.find_column_family(ti.id));
} catch (const replica::no_such_column_family& e) {
apilog.warn("Skipping {} of {}.{}: {}", op, keyspace, ti, e.what());
} catch (...) {
ex = std::current_exception();
apilog.error("Failed {} of {}.{}: {}", op, keyspace, ti, ex);
}
if (ex) {
co_await coroutine::return_exception_ptr(std::move(ex));
}
}
}
static ss::token_range token_range_endpoints_to_json(const dht::token_range_endpoints& d) {
ss::token_range r;
r.start_token = d._start_token;
@@ -175,13 +118,16 @@ static ss::token_range token_range_endpoints_to_json(const dht::token_range_endp
return r;
}
using ks_cf_func = std::function<future<json::json_return_type>(http_context&, std::unique_ptr<request>, sstring, std::vector<table_info>)>;
using ks_cf_func = std::function<future<json::json_return_type>(http_context&, std::unique_ptr<request>, sstring, std::vector<sstring>)>;
static auto wrap_ks_cf(http_context &ctx, ks_cf_func f) {
return [&ctx, f = std::move(f)](std::unique_ptr<request> req) {
auto keyspace = validate_keyspace(ctx, req->param);
auto table_infos = parse_table_infos(keyspace, ctx, req->query_parameters, "cf");
return f(ctx, std::move(req), std::move(keyspace), std::move(table_infos));
auto column_families = parse_tables(keyspace, ctx, req->query_parameters, "cf");
if (column_families.empty()) {
column_families = map_keys(ctx.db.local().find_keyspace(keyspace).metadata().get()->cf_meta_data());
}
return f(ctx, std::move(req), std::move(keyspace), std::move(column_families));
};
}
@@ -243,21 +189,17 @@ future<json::json_return_type> set_tables_autocompaction(http_context& ctx, cons
}
void set_transport_controller(http_context& ctx, routes& r, cql_transport::controller& ctl) {
ss::start_native_transport.set(r, [&ctx, &ctl](std::unique_ptr<request> req) {
ss::start_native_transport.set(r, [&ctl](std::unique_ptr<request> req) {
return smp::submit_to(0, [&] {
return with_scheduling_group(ctx.db.local().get_statement_scheduling_group(), [&ctl] {
return ctl.start_server();
});
return ctl.start_server();
}).then([] {
return make_ready_future<json::json_return_type>(json_void());
});
});
ss::stop_native_transport.set(r, [&ctx, &ctl](std::unique_ptr<request> req) {
ss::stop_native_transport.set(r, [&ctl](std::unique_ptr<request> req) {
return smp::submit_to(0, [&] {
return with_scheduling_group(ctx.db.local().get_statement_scheduling_group(), [&ctl] {
return ctl.request_stop_server();
});
return ctl.request_stop_server();
}).then([] {
return make_ready_future<json::json_return_type>(json_void());
});
@@ -279,21 +221,17 @@ void unset_transport_controller(http_context& ctx, routes& r) {
}
void set_rpc_controller(http_context& ctx, routes& r, thrift_controller& ctl) {
ss::stop_rpc_server.set(r, [&ctx, &ctl] (std::unique_ptr<request> req) {
return smp::submit_to(0, [&ctx, &ctl] {
return with_scheduling_group(ctx.db.local().get_statement_scheduling_group(), [&ctl] () mutable {
return ctl.request_stop_server();
});
ss::stop_rpc_server.set(r, [&ctl](std::unique_ptr<request> req) {
return smp::submit_to(0, [&] {
return ctl.request_stop_server();
}).then([] {
return make_ready_future<json::json_return_type>(json_void());
});
});
ss::start_rpc_server.set(r, [&ctx, &ctl](std::unique_ptr<request> req) {
return smp::submit_to(0, [&ctx, &ctl] {
return with_scheduling_group(ctx.db.local().get_statement_scheduling_group(), [&ctl] () mutable {
return ctl.start_server();
});
ss::start_rpc_server.set(r, [&ctl](std::unique_ptr<request> req) {
return smp::submit_to(0, [&] {
return ctl.start_server();
}).then([] {
return make_ready_future<json::json_return_type>(json_void());
});
@@ -671,114 +609,93 @@ void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_
});
});
ss::force_keyspace_compaction.set(r, [&ctx](std::unique_ptr<request> req) -> future<json::json_return_type> {
auto& db = ctx.db;
ss::force_keyspace_compaction.set(r, [&ctx](std::unique_ptr<request> req) {
auto keyspace = validate_keyspace(ctx, req->param);
auto table_infos = parse_table_infos(keyspace, ctx, req->query_parameters, "cf");
apilog.debug("force_keyspace_compaction: keyspace={} tables={}", keyspace, table_infos);
try {
co_await db.invoke_on_all([&] (replica::database& db) -> future<> {
auto local_tables = table_infos;
// major compact smaller tables first, to increase chances of success if low on space.
std::ranges::sort(local_tables, std::less<>(), [&] (const table_info& ti) {
try {
return db.find_column_family(ti.id).get_stats().live_disk_space_used;
} catch (const replica::no_such_column_family& e) {
return int64_t(-1);
}
});
co_await run_on_existing_tables("force_keyspace_compaction", db, keyspace, local_tables, [] (replica::table& t) {
return t.compact_all_sstables();
});
});
} catch (...) {
apilog.error("force_keyspace_compaction: keyspace={} tables={} failed: {}", keyspace, table_infos, std::current_exception());
throw;
auto column_families = parse_tables(keyspace, ctx, req->query_parameters, "cf");
if (column_families.empty()) {
column_families = map_keys(ctx.db.local().find_keyspace(keyspace).metadata().get()->cf_meta_data());
}
co_return json_void();
return ctx.db.invoke_on_all([keyspace, column_families] (replica::database& db) -> future<> {
auto table_ids = boost::copy_range<std::vector<table_id>>(column_families | boost::adaptors::transformed([&] (auto& cf_name) {
return db.find_uuid(keyspace, cf_name);
}));
// major compact smaller tables first, to increase chances of success if low on space.
std::ranges::sort(table_ids, std::less<>(), [&] (const table_id& id) {
return db.find_column_family(id).get_stats().live_disk_space_used;
});
// as a table can be dropped during loop below, let's find it before issuing major compaction request.
for (auto& id : table_ids) {
co_await db.find_column_family(id).compact_all_sstables();
}
co_return;
}).then([]{
return make_ready_future<json::json_return_type>(json_void());
});
});
ss::force_keyspace_cleanup.set(r, [&ctx, &ss](std::unique_ptr<request> req) -> future<json::json_return_type> {
auto& db = ctx.db;
ss::force_keyspace_cleanup.set(r, [&ctx, &ss](std::unique_ptr<request> req) {
auto keyspace = validate_keyspace(ctx, req->param);
auto table_infos = parse_table_infos(keyspace, ctx, req->query_parameters, "cf");
apilog.info("force_keyspace_cleanup: keyspace={} tables={}", keyspace, table_infos);
if (!co_await ss.local().is_cleanup_allowed(keyspace)) {
auto msg = "Can not perform cleanup operation when topology changes";
apilog.warn("force_keyspace_cleanup: keyspace={} tables={}: {}", keyspace, table_infos, msg);
co_await coroutine::return_exception(std::runtime_error(msg));
auto column_families = parse_tables(keyspace, ctx, req->query_parameters, "cf");
if (column_families.empty()) {
column_families = map_keys(ctx.db.local().find_keyspace(keyspace).metadata().get()->cf_meta_data());
}
try {
co_await db.invoke_on_all([&] (replica::database& db) -> future<> {
auto local_tables = table_infos;
return ss.local().is_cleanup_allowed(keyspace).then([&ctx, keyspace,
column_families = std::move(column_families)] (bool is_cleanup_allowed) mutable {
if (!is_cleanup_allowed) {
return make_exception_future<json::json_return_type>(
std::runtime_error("Can not perform cleanup operation when topology changes"));
}
return ctx.db.invoke_on_all([keyspace, column_families] (replica::database& db) -> future<> {
auto table_ids = boost::copy_range<std::vector<table_id>>(column_families | boost::adaptors::transformed([&] (auto& table_name) {
return db.find_uuid(keyspace, table_name);
}));
// cleanup smaller tables first, to increase chances of success if low on space.
std::ranges::sort(local_tables, std::less<>(), [&] (const table_info& ti) {
try {
return db.find_column_family(ti.id).get_stats().live_disk_space_used;
} catch (const replica::no_such_column_family& e) {
return int64_t(-1);
}
std::ranges::sort(table_ids, std::less<>(), [&] (const table_id& id) {
return db.find_column_family(id).get_stats().live_disk_space_used;
});
auto& cm = db.get_compaction_manager();
auto owned_ranges_ptr = compaction::make_owned_ranges_ptr(db.get_keyspace_local_ranges(keyspace));
co_await run_on_existing_tables("force_keyspace_cleanup", db, keyspace, local_tables, [&] (replica::table& t) {
return t.perform_cleanup_compaction(owned_ranges_ptr);
});
// as a table can be dropped during loop below, let's find it before issuing the cleanup request.
for (auto& id : table_ids) {
replica::table& t = db.find_column_family(id);
co_await cm.perform_cleanup(owned_ranges_ptr, t.as_table_state());
}
co_return;
}).then([]{
return make_ready_future<json::json_return_type>(0);
});
} catch (...) {
apilog.error("force_keyspace_cleanup: keyspace={} tables={} failed: {}", keyspace, table_infos, std::current_exception());
throw;
}
co_return json::json_return_type(0);
});
});
ss::perform_keyspace_offstrategy_compaction.set(r, wrap_ks_cf(ctx, [] (http_context& ctx, std::unique_ptr<request> req, sstring keyspace, std::vector<table_info> table_infos) -> future<json::json_return_type> {
apilog.info("perform_keyspace_offstrategy_compaction: keyspace={} tables={}", keyspace, table_infos);
bool res = false;
try {
res = co_await ctx.db.map_reduce0([&] (replica::database& db) -> future<bool> {
bool needed = false;
co_await run_on_existing_tables("perform_keyspace_offstrategy_compaction", db, keyspace, table_infos, [&needed] (replica::table& t) -> future<> {
needed |= co_await t.perform_offstrategy_compaction();
});
co_return needed;
}, false, std::plus<bool>());
} catch (...) {
apilog.error("perform_keyspace_offstrategy_compaction: keyspace={} tables={} failed: {}", keyspace, table_infos, std::current_exception());
throw;
}
co_return json::json_return_type(res);
ss::perform_keyspace_offstrategy_compaction.set(r, wrap_ks_cf(ctx, [] (http_context& ctx, std::unique_ptr<request> req, sstring keyspace, std::vector<sstring> tables) -> future<json::json_return_type> {
co_return co_await ctx.db.map_reduce0([&keyspace, &tables] (replica::database& db) -> future<bool> {
bool needed = false;
for (const auto& table : tables) {
auto& t = db.find_column_family(keyspace, table);
needed |= co_await t.perform_offstrategy_compaction();
}
co_return needed;
}, false, std::plus<bool>());
}));
ss::upgrade_sstables.set(r, wrap_ks_cf(ctx, [] (http_context& ctx, std::unique_ptr<request> req, sstring keyspace, std::vector<table_info> table_infos) -> future<json::json_return_type> {
auto& db = ctx.db;
ss::upgrade_sstables.set(r, wrap_ks_cf(ctx, [] (http_context& ctx, std::unique_ptr<request> req, sstring keyspace, std::vector<sstring> column_families) {
bool exclude_current_version = req_param<bool>(*req, "exclude_current_version", false);
apilog.info("upgrade_sstables: keyspace={} tables={} exclude_current_version={}", keyspace, table_infos, exclude_current_version);
try {
co_await db.invoke_on_all([&] (replica::database& db) -> future<> {
auto owned_ranges_ptr = compaction::make_owned_ranges_ptr(db.get_keyspace_local_ranges(keyspace));
co_await run_on_existing_tables("upgrade_sstables", db, keyspace, table_infos, [&] (replica::table& t) {
return t.parallel_foreach_table_state([&] (compaction::table_state& ts) {
return t.get_compaction_manager().perform_sstable_upgrade(owned_ranges_ptr, ts, exclude_current_version);
});
});
return ctx.db.invoke_on_all([=] (replica::database& db) {
auto owned_ranges_ptr = compaction::make_owned_ranges_ptr(db.get_keyspace_local_ranges(keyspace));
return do_for_each(column_families, [=, &db](sstring cfname) {
auto& cm = db.get_compaction_manager();
auto& cf = db.find_column_family(keyspace, cfname);
return cm.perform_sstable_upgrade(owned_ranges_ptr, cf.as_table_state(), exclude_current_version);
});
} catch (...) {
apilog.error("upgrade_sstables: keyspace={} tables={} failed: {}", keyspace, table_infos, std::current_exception());
throw;
}
co_return json::json_return_type(0);
}).then([]{
return make_ready_future<json::json_return_type>(0);
});
}));
ss::force_keyspace_flush.set(r, [&ctx](std::unique_ptr<request> req) -> future<json::json_return_type> {
auto keyspace = validate_keyspace(ctx, req->param);
auto column_families = parse_tables(keyspace, ctx, req->query_parameters, "cf");
apilog.info("perform_keyspace_flush: keyspace={} tables={}", keyspace, column_families);
auto& db = ctx.db;
if (column_families.empty()) {
co_await replica::database::flush_keyspace_on_all_shards(db, keyspace);
@@ -790,7 +707,6 @@ void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_
ss::decommission.set(r, [&ss](std::unique_ptr<request> req) {
apilog.info("decommission");
return ss.local().decommission().then([] {
return make_ready_future<json::json_return_type>(json_void());
});
@@ -806,7 +722,6 @@ void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_
ss::remove_node.set(r, [&ss](std::unique_ptr<request> req) {
auto host_id = validate_host_id(req->get_query_param("host_id"));
std::vector<sstring> ignore_nodes_strs= split(req->get_query_param("ignore_nodes"), ",");
apilog.info("remove_node: host_id={} ignore_nodes={}", host_id, ignore_nodes_strs);
auto ignore_nodes = std::list<locator::host_id_or_endpoint>();
for (std::string n : ignore_nodes_strs) {
try {
@@ -882,7 +797,6 @@ void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_
});
ss::drain.set(r, [&ss](std::unique_ptr<request> req) {
apilog.info("drain");
return ss.local().drain().then([] {
return make_ready_future<json::json_return_type>(json_void());
});
@@ -906,14 +820,12 @@ void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_
});
ss::stop_gossiping.set(r, [&ss](std::unique_ptr<request> req) {
apilog.info("stop_gossiping");
return ss.local().stop_gossiping().then([] {
return make_ready_future<json::json_return_type>(json_void());
});
});
ss::start_gossiping.set(r, [&ss](std::unique_ptr<request> req) {
apilog.info("start_gossiping");
return ss.local().start_gossiping().then([] {
return make_ready_future<json::json_return_type>(json_void());
});
@@ -1016,7 +928,6 @@ void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_
ss::rebuild.set(r, [&ss](std::unique_ptr<request> req) {
auto source_dc = req->get_query_param("source_dc");
apilog.info("rebuild: source_dc={}", source_dc);
return ss.local().rebuild(std::move(source_dc)).then([] {
return make_ready_future<json::json_return_type>(json_void());
});
@@ -1049,16 +960,17 @@ void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_
return make_ready_future<json::json_return_type>(res);
});
ss::reset_local_schema.set(r, [&ss](std::unique_ptr<http::request> req) -> future<json::json_return_type> {
ss::reset_local_schema.set(r, [&sys_ks](std::unique_ptr<request> req) {
// FIXME: We should truncate schema tables if more than one node in the cluster.
apilog.info("reset_local_schema");
co_await ss.local().reload_schema();
co_return json_void();
auto& sp = service::get_storage_proxy();
auto& fs = sp.local().features();
return db::schema_tables::recalculate_schema_version(sys_ks, sp, fs).then([] {
return make_ready_future<json::json_return_type>(json_void());
});
});
ss::set_trace_probability.set(r, [](std::unique_ptr<request> req) {
auto probability = req->get_query_param("probability");
apilog.info("set_trace_probability: probability={}", probability);
return futurize_invoke([probability] {
double real_prob = std::stod(probability.c_str());
return tracing::tracing::tracing_instance().invoke_on_all([real_prob] (auto& local_tracing) {
@@ -1096,7 +1008,6 @@ void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_
auto ttl = req->get_query_param("ttl");
auto threshold = req->get_query_param("threshold");
auto fast = req->get_query_param("fast");
apilog.info("set_slow_query: enable={} ttl={} threshold={} fast={}", enable, ttl, threshold, fast);
try {
return tracing::tracing::tracing_instance().invoke_on_all([enable, ttl, threshold, fast] (auto& local_tracing) {
if (threshold != "") {
@@ -1123,7 +1034,6 @@ void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_
auto keyspace = validate_keyspace(ctx, req->param);
auto tables = parse_tables(keyspace, ctx, req->query_parameters, "cf");
apilog.info("enable_auto_compaction: keyspace={} tables={}", keyspace, tables);
return set_tables_autocompaction(ctx, keyspace, tables, true);
});
@@ -1131,7 +1041,6 @@ void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_
auto keyspace = validate_keyspace(ctx, req->param);
auto tables = parse_tables(keyspace, ctx, req->query_parameters, "cf");
apilog.info("disable_auto_compaction: keyspace={} tables={}", keyspace, tables);
return set_tables_autocompaction(ctx, keyspace, tables, false);
});
@@ -1457,8 +1366,7 @@ void set_snapshot(http_context& ctx, routes& r, sharded<db::snapshot_ctl>& snap_
});
});
ss::scrub.set(r, [&ctx, &snap_ctl] (std::unique_ptr<request> req) -> future<json::json_return_type> {
auto& db = ctx.db;
ss::scrub.set(r, [&ctx, &snap_ctl] (std::unique_ptr<request> req) {
auto rp = req_params({
{"keyspace", {mandatory::yes}},
{"cf", {""}},
@@ -1494,9 +1402,10 @@ void set_snapshot(http_context& ctx, routes& r, sharded<db::snapshot_ctl>& snap_
}
}
auto f = make_ready_future<>();
if (!req_param<bool>(*req, "disable_snapshot", false)) {
auto tag = format("pre-scrub-{:d}", db_clock::now().time_since_epoch().count());
co_await coroutine::parallel_for_each(column_families, [&snap_ctl, keyspace, tag](sstring cf) {
f = parallel_for_each(column_families, [&snap_ctl, keyspace, tag](sstring cf) {
// We always pass here db::snapshot_ctl::snap_views::no since:
// 1. When scrubbing particular tables, there's no need to auto-snapshot their views.
// 2. When scrubbing the whole keyspace, column_families will contain both base tables and views.
@@ -1525,30 +1434,28 @@ void set_snapshot(http_context& ctx, routes& r, sharded<db::snapshot_ctl>& snap_
return stats;
};
try {
auto opt_stats = co_await db.map_reduce0([&] (replica::database& db) {
return map_reduce(column_families, [&] (sstring cfname) -> future<std::optional<sstables::compaction_stats>> {
return f.then([&ctx, keyspace, column_families, opts, &reduce_compaction_stats] {
return ctx.db.map_reduce0([=] (replica::database& db) {
return map_reduce(column_families, [=, &db] (sstring cfname) {
auto& cm = db.get_compaction_manager();
auto& cf = db.find_column_family(keyspace, cfname);
sstables::compaction_stats stats{};
co_await cf.parallel_foreach_table_state([&] (compaction::table_state& ts) mutable -> future<> {
auto r = co_await cm.perform_sstable_scrub(ts, opts);
stats += r.value_or(sstables::compaction_stats{});
});
co_return stats;
return cm.perform_sstable_scrub(cf.as_table_state(), opts);
}, std::make_optional(sstables::compaction_stats{}), reduce_compaction_stats);
}, std::make_optional(sstables::compaction_stats{}), reduce_compaction_stats);
if (opt_stats && opt_stats->validation_errors) {
co_return json::json_return_type(static_cast<int>(scrub_status::validation_errors));
}).then_wrapped([] (auto f) {
if (f.failed()) {
auto ex = f.get_exception();
if (try_catch<sstables::compaction_aborted_exception>(ex)) {
return make_ready_future<json::json_return_type>(static_cast<int>(scrub_status::aborted));
} else {
return make_exception_future<json::json_return_type>(std::move(ex));
}
} else if (f.get()->validation_errors) {
return make_ready_future<json::json_return_type>(static_cast<int>(scrub_status::validation_errors));
} else {
return make_ready_future<json::json_return_type>(static_cast<int>(scrub_status::successful));
}
} catch (const sstables::compaction_aborted_exception&) {
co_return json::json_return_type(static_cast<int>(scrub_status::aborted));
} catch (...) {
apilog.error("scrub keyspace={} tables={} failed: {}", keyspace, column_families, std::current_exception());
throw;
}
co_return json::json_return_type(static_cast<int>(scrub_status::successful));
});
});
}

View File

@@ -8,8 +8,6 @@
#pragma once
#include <iostream>
#include <seastar/core/sharded.hh>
#include "api.hh"
#include "db/data_listeners.hh"
@@ -43,22 +41,8 @@ sstring validate_keyspace(http_context& ctx, const parameters& param);
// splits a request parameter assumed to hold a comma-separated list of table names
// verify that the tables are found, otherwise a bad_param_exception exception is thrown
// containing the description of the respective no_such_column_family error.
// Returns an empty vector if no parameter was found.
// If the parameter is found and empty, returns a list of all table names in the keyspace.
std::vector<sstring> parse_tables(const sstring& ks_name, http_context& ctx, const std::unordered_map<sstring, sstring>& query_params, sstring param_name);
struct table_info {
sstring name;
table_id id;
};
// splits a request parameter assumed to hold a comma-separated list of table names
// verify that the tables are found, otherwise a bad_param_exception exception is thrown
// containing the description of the respective no_such_column_family error.
// Returns a vector of all table infos given by the parameter, or
// if the parameter is not found or is empty, returns a list of all table infos in the keyspace.
std::vector<table_info> parse_table_infos(const sstring& ks_name, http_context& ctx, const std::unordered_map<sstring, sstring>& query_params, sstring param_name);
void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_service>& ss, gms::gossiper& g, sharded<cdc::generation_service>& cdc_gs, sharded<db::system_keyspace>& sys_ls);
void set_sstables_loader(http_context& ctx, routes& r, sharded<sstables_loader>& sst_loader);
void unset_sstables_loader(http_context& ctx, routes& r);
@@ -74,10 +58,4 @@ void set_snapshot(http_context& ctx, routes& r, sharded<db::snapshot_ctl>& snap_
void unset_snapshot(http_context& ctx, routes& r);
seastar::future<json::json_return_type> run_toppartitions_query(db::toppartitions_query& q, http_context &ctx, bool legacy_request = false);
} // namespace api
namespace std {
std::ostream& operator<<(std::ostream& os, const api::table_info& ti);
} // namespace std
}

View File

@@ -30,32 +30,17 @@ inline bool filter_tasks(tasks::task_manager::task_ptr task, std::unordered_map<
struct full_task_status {
tasks::task_manager::task::status task_status;
std::string type;
tasks::task_manager::task::progress progress;
std::string module;
tasks::task_id parent_id;
tasks::is_abortable abortable;
std::vector<std::string> children_ids;
};
struct task_stats {
task_stats(tasks::task_manager::task_ptr task)
: task_id(task->id().to_sstring())
, state(task->get_status().state)
, type(task->type())
, keyspace(task->get_status().keyspace)
, table(task->get_status().table)
, entity(task->get_status().entity)
, sequence_number(task->get_status().sequence_number)
{ }
task_stats(tasks::task_manager::task_ptr task) : task_id(task->id().to_sstring()), state(task->get_status().state) {}
sstring task_id;
tasks::task_manager::task_state state;
std::string type;
std::string keyspace;
std::string table;
std::string entity;
uint64_t sequence_number;
};
tm::task_status make_status(full_task_status status) {
@@ -67,7 +52,7 @@ tm::task_status make_status(full_task_status status) {
tm::task_status res{};
res.id = status.task_status.id.to_sstring();
res.type = status.type;
res.type = status.task_status.type;
res.state = status.task_status.state;
res.is_abortable = bool(status.abortable);
res.start_time = st;
@@ -82,29 +67,22 @@ tm::task_status make_status(full_task_status status) {
res.progress_units = status.task_status.progress_units;
res.progress_total = status.progress.total;
res.progress_completed = status.progress.completed;
res.children_ids = std::move(status.children_ids);
return res;
}
future<full_task_status> retrieve_status(const tasks::task_manager::foreign_task_ptr& task) {
future<json::json_return_type> retrieve_status(tasks::task_manager::foreign_task_ptr task) {
if (task.get() == nullptr) {
co_return coroutine::return_exception(httpd::bad_param_exception("Task not found"));
}
auto progress = co_await task->get_progress();
full_task_status s;
s.task_status = task->get_status();
s.type = task->type();
s.parent_id = task->get_parent_id();
s.abortable = task->is_abortable();
s.module = task->get_module_name();
s.progress.completed = progress.completed;
s.progress.total = progress.total;
std::vector<std::string> ct{task->get_children().size()};
boost::transform(task->get_children(), ct.begin(), [] (const auto& child) {
return child->id().to_sstring();
});
s.children_ids = std::move(ct);
co_return s;
co_return make_status(s);
}
void set_task_manager(http_context& ctx, routes& r) {
@@ -156,8 +134,7 @@ void set_task_manager(http_context& ctx, routes& r) {
}
co_return std::move(task);
}));
auto s = co_await retrieve_status(task);
co_return make_status(s);
co_return co_await retrieve_status(std::move(task));
});
tm::abort_task.set(r, [&ctx] (std::unique_ptr<request> req) -> future<json::json_return_type> {
@@ -176,55 +153,11 @@ void set_task_manager(http_context& ctx, routes& r) {
auto task = co_await tasks::task_manager::invoke_on_task(ctx.tm, id, std::function([] (tasks::task_manager::task_ptr task) {
return task->done().then_wrapped([task] (auto f) {
task->unregister_task();
// done() is called only because we want the task to be complete before getting its status.
// The future should be ignored here as the result does not matter.
f.ignore_ready_future();
f.get();
return make_foreign(task);
});
}));
auto s = co_await retrieve_status(task);
co_return make_status(s);
});
tm::get_task_status_recursively.set(r, [&ctx] (std::unique_ptr<request> req) -> future<json::json_return_type> {
auto& _ctx = ctx;
auto id = tasks::task_id{utils::UUID{req->param["task_id"]}};
std::queue<tasks::task_manager::foreign_task_ptr> q;
utils::chunked_vector<full_task_status> res;
// Get requested task.
auto task = co_await tasks::task_manager::invoke_on_task(_ctx.tm, id, std::function([] (tasks::task_manager::task_ptr task) -> future<tasks::task_manager::foreign_task_ptr> {
auto state = task->get_status().state;
if (state == tasks::task_manager::task_state::done || state == tasks::task_manager::task_state::failed) {
task->unregister_task();
}
co_return task;
}));
// Push children's statuses in BFS order.
q.push(co_await task.copy()); // Task cannot be moved since we need it to be alive during whole loop execution.
while (!q.empty()) {
auto& current = q.front();
res.push_back(co_await retrieve_status(current));
for (auto& child: current->get_children()) {
q.push(co_await child.copy());
}
q.pop();
}
std::function<future<>(output_stream<char>&&)> f = [r = std::move(res)] (output_stream<char>&& os) -> future<> {
auto s = std::move(os);
auto res = std::move(r);
co_await s.write("[");
std::string delim = "";
for (auto& status: res) {
co_await s.write(std::exchange(delim, ", "));
co_await formatter::write(s, make_status(status));
}
co_await s.write("]");
co_await s.close();
};
co_return f;
co_return co_await retrieve_status(std::move(task));
});
}

View File

@@ -47,6 +47,8 @@ void set_task_manager_test(http_context& ctx, routes& r, db::config& cfg) {
std::string keyspace = it != req->query_parameters.end() ? it->second : "";
it = req->query_parameters.find("table");
std::string table = it != req->query_parameters.end() ? it->second : "";
it = req->query_parameters.find("type");
std::string type = it != req->query_parameters.end() ? it->second : "";
it = req->query_parameters.find("entity");
std::string entity = it != req->query_parameters.end() ? it->second : "";
it = req->query_parameters.find("parent_id");
@@ -58,7 +60,7 @@ void set_task_manager_test(http_context& ctx, routes& r, db::config& cfg) {
}
auto module = tms.local().find_module("test");
id = co_await module->make_task<tasks::test_task_impl>(shard, id, keyspace, table, entity, data);
id = co_await module->make_task<tasks::test_task_impl>(shard, id, keyspace, table, type, entity, data);
co_await tms.invoke_on(shard, [id] (tasks::task_manager& tm) {
auto it = tm.get_all_tasks().find(id);
if (it != tm.get_all_tasks().end()) {
@@ -97,7 +99,7 @@ void set_task_manager_test(http_context& ctx, routes& r, db::config& cfg) {
tmt::get_and_update_ttl.set(r, [&ctx, &cfg] (std::unique_ptr<request> req) -> future<json::json_return_type> {
uint32_t ttl = cfg.task_ttl_seconds();
co_await cfg.task_ttl_seconds.set_value_on_all_shards(req->query_parameters["ttl"], utils::config_file::config_source::API);
cfg.task_ttl_seconds.set(boost::lexical_cast<uint32_t>(req->query_parameters["ttl"]));
co_return json::json_return_type(ttl);
});
}

View File

@@ -66,48 +66,36 @@ atomic_cell::atomic_cell(const abstract_type& type, atomic_cell_view other)
set_view(_data);
}
// Based on Cassandra's resolveRegular function:
// - https://github.com/apache/cassandra/blob/e4f31b73c21b04966269c5ac2d3bd2562e5f6c63/src/java/org/apache/cassandra/db/rows/Cells.java#L79-L119
//
// Note: the ordering algorithm for cell is the same as for rows,
// except that the cell value is used to break a tie in case all other attributes are equal.
// See compare_row_marker_for_merge.
// Based on:
// - org.apache.cassandra.db.AbstractCell#reconcile()
// - org.apache.cassandra.db.BufferExpiringCell#reconcile()
// - org.apache.cassandra.db.BufferDeletedCell#reconcile()
std::strong_ordering
compare_atomic_cell_for_merge(atomic_cell_view left, atomic_cell_view right) {
// Largest write timestamp wins.
if (left.timestamp() != right.timestamp()) {
return left.timestamp() <=> right.timestamp();
}
// Tombstones always win reconciliation with live cells of the same timestamp
if (left.is_live() != right.is_live()) {
return left.is_live() ? std::strong_ordering::less : std::strong_ordering::greater;
}
if (left.is_live()) {
// Prefer expiring cells (which will become tombstones at some future date) over live cells.
// See https://issues.apache.org/jira/browse/CASSANDRA-14592
auto c = compare_unsigned(left.value(), right.value()) <=> 0;
if (c != 0) {
return c;
}
if (left.is_live_and_has_ttl() != right.is_live_and_has_ttl()) {
// prefer expiring cells.
return left.is_live_and_has_ttl() ? std::strong_ordering::greater : std::strong_ordering::less;
}
// If both are expiring, choose the cell with the latest expiry or derived write time.
if (left.is_live_and_has_ttl()) {
// Prefer cell with latest expiry
if (left.expiry() != right.expiry()) {
return left.expiry() <=> right.expiry();
} else if (right.ttl() != left.ttl()) {
// The cell write time is derived by (expiry - ttl).
// Prefer the cell that was written later,
// so it survives longer after it expires, until purged,
// as it become purgeable gc_grace_seconds after it was written.
//
// Note that this is an extension to Cassandra's algorithm
// which stops at the expiration time, and if equal,
// move forward to compare the cell values.
} else {
// prefer the cell that was written later,
// so it survives longer after it expires, until purged.
return right.ttl() <=> left.ttl();
}
}
// The cell with the largest value wins, if all other attributes of the cells are identical.
// This is quite arbitrary, but still required to break the tie in a deterministic way.
return compare_unsigned(left.value(), right.value());
} else {
// Both are deleted

View File

@@ -229,8 +229,6 @@ future<authenticated_user> password_authenticator::authenticate(
std::throw_with_nested(exceptions::authentication_exception(e.what()));
} catch (exceptions::authentication_exception& e) {
std::throw_with_nested(e);
} catch (exceptions::unavailable_exception& e) {
std::throw_with_nested(exceptions::authentication_exception(e.get_message()));
} catch (...) {
std::throw_with_nested(exceptions::authentication_exception("authentication failed"));
}

View File

@@ -55,7 +55,6 @@ future<bool> default_role_row_satisfies(
return qp.execute_internal(
query,
db::consistency_level::ONE,
internal_distributed_query_state(),
{meta::DEFAULT_SUPERUSER_NAME},
cql3::query_processor::cache_internal::yes).then([&qp, &p](::shared_ptr<cql3::untyped_result_set> results) {
if (results->empty()) {

View File

@@ -457,9 +457,7 @@ public:
_begin.ptr->size = _size;
_current = nullptr;
_size = 0;
auto begin_ptr = _begin.ptr;
_begin.ptr = nullptr;
return managed_bytes(begin_ptr);
return managed_bytes(std::exchange(_begin.ptr, {}));
} else {
return managed_bytes();
}

View File

@@ -572,7 +572,7 @@ void cache_flat_mutation_reader::maybe_add_to_cache(const clustering_row& cr) {
_read_context.cache().on_mispopulate();
return;
}
auto rt_opt = _rt_assembler.flush(*_schema, position_in_partition::after_key(*_schema, cr.key()));
auto rt_opt = _rt_assembler.flush(*_schema, position_in_partition::after_key(cr.key()));
clogger.trace("csm {}: populate({})", fmt::ptr(this), clustering_row::printer(*_schema, cr));
_lsa_manager.run_in_update_section_with_allocator([this, &cr, &rt_opt] {
mutation_partition& mp = _snp->version()->partition();
@@ -634,8 +634,8 @@ inline
void cache_flat_mutation_reader::copy_from_cache_to_buffer() {
clogger.trace("csm {}: copy_from_cache, next={}, next_row_in_range={}", fmt::ptr(this), _next_row.position(), _next_row_in_range);
_next_row.touch();
auto next_lower_bound = position_in_partition_view::after_key(table_schema(), _next_row.position());
auto upper_bound = _next_row_in_range ? next_lower_bound.view : _upper_bound;
position_in_partition_view next_lower_bound = _next_row.dummy() ? _next_row.position() : position_in_partition_view::after_key(_next_row.key());
auto upper_bound = _next_row_in_range ? next_lower_bound : _upper_bound;
if (_snp->range_tombstones(_lower_bound, upper_bound, [&] (range_tombstone rts) {
add_range_tombstone_to_buffer(std::move(rts));
return stop_iteration(_lower_bound_changed && is_buffer_full());
@@ -774,14 +774,14 @@ void cache_flat_mutation_reader::move_to_next_entry() {
}
}
void cache_flat_mutation_reader::flush_tombstones(position_in_partition_view pos_, bool end_of_range) {
void cache_flat_mutation_reader::flush_tombstones(position_in_partition_view pos, bool end_of_range) {
// Ensure position is appropriate for range tombstone bound
auto pos = position_in_partition_view::after_key(*_schema, pos_);
clogger.trace("csm {}: flush_tombstones({}) end_of_range: {}", fmt::ptr(this), pos.view, end_of_range);
_rt_gen.flush(pos.view, [this] (range_tombstone_change&& rtc) {
pos = position_in_partition_view::after_key(pos);
clogger.trace("csm {}: flush_tombstones({}) end_of_range: {}", fmt::ptr(this), pos, end_of_range);
_rt_gen.flush(pos, [this] (range_tombstone_change&& rtc) {
add_to_buffer(std::move(rtc), source::cache);
}, end_of_range);
if (auto rtc_opt = _rt_merger.flush(pos.view, end_of_range)) {
if (auto rtc_opt = _rt_merger.flush(pos, end_of_range)) {
do_add_to_buffer(std::move(*rtc_opt));
}
}
@@ -832,7 +832,7 @@ inline
void cache_flat_mutation_reader::add_clustering_row_to_buffer(mutation_fragment_v2&& mf) {
clogger.trace("csm {}: add_clustering_row_to_buffer({})", fmt::ptr(this), mutation_fragment_v2::printer(*_schema, mf));
auto& row = mf.as_clustering_row();
auto new_lower_bound = position_in_partition::after_key(*_schema, row.key());
auto new_lower_bound = position_in_partition::after_key(row.key());
push_mutation_fragment(std::move(mf));
_lower_bound = std::move(new_lower_bound);
_lower_bound_changed = true;

View File

@@ -25,7 +25,6 @@
#include "gms/gossiper.hh"
#include "gms/feature_service.hh"
#include "utils/UUID_gen.hh"
#include "utils/error_injection.hh"
#include "cdc/generation.hh"
#include "cdc/cdc_options.hh"
@@ -45,16 +44,8 @@ static unsigned get_sharding_ignore_msb(const gms::inet_address& endpoint, const
namespace cdc {
api::timestamp_clock::duration get_generation_leeway() {
static thread_local auto generation_leeway =
std::chrono::duration_cast<api::timestamp_clock::duration>(std::chrono::seconds(5));
utils::get_local_injector().inject("increase_cdc_generation_leeway", [&] {
generation_leeway = std::chrono::duration_cast<api::timestamp_clock::duration>(std::chrono::minutes(5));
});
return generation_leeway;
}
extern const api::timestamp_clock::duration generation_leeway =
std::chrono::duration_cast<api::timestamp_clock::duration>(std::chrono::seconds(5));
static void copy_int_to_bytes(int64_t i, size_t offset, bytes& b) {
i = net::hton(i);
@@ -169,18 +160,18 @@ bool token_range_description::operator==(const token_range_description& o) const
&& sharding_ignore_msb == o.sharding_ignore_msb;
}
topology_description::topology_description(utils::chunked_vector<token_range_description> entries)
topology_description::topology_description(std::vector<token_range_description> entries)
: _entries(std::move(entries)) {}
bool topology_description::operator==(const topology_description& o) const {
return _entries == o._entries;
}
const utils::chunked_vector<token_range_description>& topology_description::entries() const& {
const std::vector<token_range_description>& topology_description::entries() const& {
return _entries;
}
utils::chunked_vector<token_range_description>&& topology_description::entries() && {
std::vector<token_range_description>&& topology_description::entries() && {
return std::move(_entries);
}
@@ -272,7 +263,7 @@ public:
topology_description generate() const {
const auto tokens = get_tokens();
utils::chunked_vector<token_range_description> vnode_descriptions;
std::vector<token_range_description> vnode_descriptions;
vnode_descriptions.reserve(tokens.size());
vnode_descriptions.push_back(
@@ -340,7 +331,7 @@ future<cdc::generation_id> generation_service::make_new_generation(const std::un
auto new_generation_timestamp = [add_delay, ring_delay = _cfg.ring_delay] {
auto ts = db_clock::now();
if (add_delay && ring_delay != 0ms) {
ts += 2 * ring_delay + duration_cast<milliseconds>(get_generation_leeway());
ts += 2 * ring_delay + duration_cast<milliseconds>(generation_leeway);
}
return ts;
};

View File

@@ -46,8 +46,6 @@ namespace gms {
namespace cdc {
api::timestamp_clock::duration get_generation_leeway();
class stream_id final {
bytes _value;
public:
@@ -96,13 +94,13 @@ struct token_range_description {
* in the `_entries` vector. See the comment above `token_range_description` for explanation.
*/
class topology_description {
utils::chunked_vector<token_range_description> _entries;
std::vector<token_range_description> _entries;
public:
topology_description(utils::chunked_vector<token_range_description> entries);
topology_description(std::vector<token_range_description> entries);
bool operator==(const topology_description&) const;
const utils::chunked_vector<token_range_description>& entries() const&;
utils::chunked_vector<token_range_description>&& entries() &&;
const std::vector<token_range_description>& entries() const&;
std::vector<token_range_description>&& entries() &&;
};
/**

View File

@@ -605,7 +605,7 @@ private:
public:
collection_iterator(managed_bytes_view_opt v = {})
: _v(v.value_or(managed_bytes_view{}))
, _rem(_v.empty() ? 0 : read_collection_size(_v))
, _rem(_v.empty() ? 0 : read_collection_size(_v, cql_serialization_format::internal()))
{
if (_rem != 0) {
parse();
@@ -650,8 +650,8 @@ template<>
void collection_iterator<std::pair<managed_bytes_view, managed_bytes_view>>::parse() {
assert(_rem > 0);
_next = _v;
auto k = read_collection_value(_next);
auto v = read_collection_value(_next);
auto k = read_collection_value(_next, cql_serialization_format::internal());
auto v = read_collection_value(_next, cql_serialization_format::internal());
_current = std::make_pair(k, v);
}
@@ -659,7 +659,7 @@ template<>
void collection_iterator<managed_bytes_view>::parse() {
assert(_rem > 0);
_next = _v;
auto k = read_collection_value(_next);
auto k = read_collection_value(_next, cql_serialization_format::internal());
_current = k;
}
@@ -728,7 +728,7 @@ auto make_maybe_back_inserter(Container& c, const abstract_type& type, collectio
static size_t collection_size(const managed_bytes_opt& bo) {
if (bo) {
managed_bytes_view mbv(*bo);
return read_collection_size(mbv);
return read_collection_size(mbv, cql_serialization_format::internal());
}
return 0;
}
@@ -750,7 +750,7 @@ static managed_bytes merge(const collection_type_impl& ctype, const managed_byte
// note order: set_union, when finding doubles, use value from first1 (j here). So
// since this is next, it has prio
std::set_union(j, e, i, e, make_maybe_back_inserter(res, *type, collection_iterator<managed_bytes_view>(deleted)), cmp);
return map_type_impl::serialize_partially_deserialized_form_fragmented(res);
return map_type_impl::serialize_partially_deserialized_form_fragmented(res, cql_serialization_format::internal());
}
static managed_bytes merge(const set_type_impl& ctype, const managed_bytes_opt& prev, const managed_bytes_opt& next, const managed_bytes_opt& deleted) {
std::vector<managed_bytes_view> res;
@@ -761,7 +761,7 @@ static managed_bytes merge(const set_type_impl& ctype, const managed_bytes_opt&
};
collection_iterator<managed_bytes_view> e, i(prev), j(next), d(deleted);
std::set_union(j, e, i, e, make_maybe_back_inserter(res, *type, d), cmp);
return set_type_impl::serialize_partially_deserialized_form_fragmented(res);
return set_type_impl::serialize_partially_deserialized_form_fragmented(res, cql_serialization_format::internal());
}
static managed_bytes merge(const user_type_impl& type, const managed_bytes_opt& prev, const managed_bytes_opt& next, const managed_bytes_opt& deleted) {
std::vector<managed_bytes_view_opt> res(type.size());
@@ -812,14 +812,15 @@ static managed_bytes_opt get_preimage_col_value(const column_definition& cdef, c
// flatten set
[&] (const set_type_impl& type) {
auto v = pirow->get_view(cdef.name_as_text());
auto n = read_collection_size(v);
auto f = cql_serialization_format::internal();
auto n = read_collection_size(v, f);
std::vector<managed_bytes> tmp;
tmp.reserve(n);
while (n--) {
tmp.emplace_back(read_collection_value(v)); // key
read_collection_value(v); // value. ignore.
tmp.emplace_back(read_collection_value(v, f)); // key
read_collection_value(v, f); // value. ignore.
}
return set_type_impl::serialize_partially_deserialized_form_fragmented({tmp.begin(), tmp.end()});
return set_type_impl::serialize_partially_deserialized_form_fragmented({tmp.begin(), tmp.end()}, f);
},
[&] (const abstract_type& o) -> managed_bytes {
return pirow->get_blob_fragmented(cdef.name_as_text());
@@ -1121,7 +1122,7 @@ struct process_row_visitor {
visit_collection(v);
managed_bytes_opt added_keys = v._added_keys.empty() ? std::nullopt :
std::optional{set_type_impl::serialize_partially_deserialized_form_fragmented(v._added_keys)};
std::optional{set_type_impl::serialize_partially_deserialized_form_fragmented(v._added_keys, cql_serialization_format::internal())};
return {
v._is_column_delete,
@@ -1177,7 +1178,7 @@ struct process_row_visitor {
visit_collection(v);
managed_bytes_opt added_cells = v._added_cells.empty() ? std::nullopt :
std::optional{map_type_impl::serialize_partially_deserialized_form_fragmented(v._added_cells)};
std::optional{map_type_impl::serialize_partially_deserialized_form_fragmented(v._added_cells, cql_serialization_format::internal())};
return {
v._is_column_delete,
@@ -1197,7 +1198,7 @@ struct process_row_visitor {
// then we deserialize again when merging images below
managed_bytes_opt deleted_elements = std::nullopt;
if (!deleted_keys.empty()) {
deleted_elements = set_type_impl::serialize_partially_deserialized_form_fragmented(deleted_keys);
deleted_elements = set_type_impl::serialize_partially_deserialized_form_fragmented(deleted_keys, cql_serialization_format::internal());
}
// delta

View File

@@ -15,6 +15,10 @@
extern logging::logger cdc_log;
namespace cdc {
extern const api::timestamp_clock::duration generation_leeway;
} // namespace cdc
static api::timestamp_type to_ts(db_clock::time_point tp) {
// This assumes that timestamp_clock and db_clock have the same epochs.
return std::chrono::duration_cast<api::timestamp_clock::duration>(tp.time_since_epoch()).count();
@@ -36,7 +40,7 @@ static cdc::stream_id get_stream(
// non-static for testing
cdc::stream_id get_stream(
const utils::chunked_vector<cdc::token_range_description>& entries,
const std::vector<cdc::token_range_description>& entries,
dht::token tok) {
if (entries.empty()) {
on_internal_error(cdc_log, "get_stream: entries empty");
@@ -69,7 +73,7 @@ bool cdc::metadata::streams_available() const {
cdc::stream_id cdc::metadata::get_stream(api::timestamp_type ts, dht::token tok) {
auto now = api::new_timestamp();
if (ts > now + get_generation_leeway().count()) {
if (ts > now + generation_leeway.count()) {
throw exceptions::invalid_request_exception(format(
"cdc: attempted to get a stream \"from the future\" ({}; current server time: {})."
" With CDC you cannot send writes with timestamps arbitrarily into the future, because we don't"
@@ -82,43 +86,27 @@ cdc::stream_id cdc::metadata::get_stream(api::timestamp_type ts, dht::token tok)
// Nothing protects us from that until we start using transactions for generation switching.
}
auto it = gen_used_at(now - get_generation_leeway().count());
if (it != _gens.end()) {
// Garbage-collect generations that will no longer be used.
it = _gens.erase(_gens.begin(), it);
}
if (ts <= now - get_generation_leeway().count()) {
// We reject the write if `ts <= now - generation_leeway` and the write is not to the current generation, which
// happens iff one of the following is true:
// - the write is to no generation,
// - the write is to a generation older than the generation under `it`,
// - the write is to the generation under `it` and that generation is not the current generation.
// Note that we cannot distinguish the first and second cases because we garbage-collect obsolete generations,
// but we can check if one of them takes place (`it == _gens.end() || ts < it->first`). These three conditions
// are sufficient. The write with `ts <= now - generation_leeway` cannot be to one of the generations following
// the generation under `it` because that generation was operating at `now - generation_leeway`.
bool is_previous_gen = it != _gens.end() && std::next(it) != _gens.end() && std::next(it)->first <= now;
if (it == _gens.end() || ts < it->first || is_previous_gen) {
throw exceptions::invalid_request_exception(format(
"cdc: attempted to get a stream \"from the past\" ({}; current server time: {})."
" With CDC you cannot send writes with timestamps too far into the past, because that would break"
" consistency properties.\n"
"We *do* allow sending writes into the near past, but our ability to do that is limited."
" Are you using client-side timestamps? Make sure your clocks are well-synchronized"
" with the database's clocks.", format_timestamp(ts), format_timestamp(now)));
}
}
it = _gens.begin();
if (it == _gens.end() || ts < it->first) {
auto it = gen_used_at(now);
if (it == _gens.end()) {
throw std::runtime_error(format(
"cdc::metadata::get_stream: could not find any CDC stream for timestamp {}."
" Are we in the middle of a cluster upgrade?", format_timestamp(ts)));
"cdc::metadata::get_stream: could not find any CDC stream (current time: {})."
" Are we in the middle of a cluster upgrade?", format_timestamp(now)));
}
// Find the generation operating at `ts`.
// Garbage-collect generations that will no longer be used.
it = _gens.erase(_gens.begin(), it);
if (it->first > ts) {
throw exceptions::invalid_request_exception(format(
"cdc: attempted to get a stream from an earlier generation than the currently used one."
" With CDC you cannot send writes with timestamps too far into the past, because that would break"
" consistency properties (write timestamp: {}, current generation started at: {})",
format_timestamp(ts), format_timestamp(it->first)));
}
// With `generation_leeway` we allow sending writes to the near future. It might happen
// that `ts` doesn't belong to the current generation ("current" according to our clock),
// but to the next generation. Adjust for this case:
{
auto next_it = std::next(it);
while (next_it != _gens.end() && next_it->first <= ts) {
@@ -159,8 +147,8 @@ bool cdc::metadata::known_or_obsolete(db_clock::time_point tp) const {
++it;
}
// Check if the generation is obsolete.
return it != _gens.end() && it->first <= api::new_timestamp() - get_generation_leeway().count();
// Check if some new generation has already superseded this one.
return it != _gens.end() && it->first <= api::new_timestamp();
}
bool cdc::metadata::insert(db_clock::time_point tp, topology_description&& gen) {
@@ -169,7 +157,7 @@ bool cdc::metadata::insert(db_clock::time_point tp, topology_description&& gen)
}
auto now = api::new_timestamp();
auto it = gen_used_at(now - get_generation_leeway().count());
auto it = gen_used_at(now);
if (it != _gens.end()) {
// Garbage-collect generations that will no longer be used.

View File

@@ -42,9 +42,7 @@ class metadata final {
container_t::const_iterator gen_used_at(api::timestamp_type ts) const;
public:
/* Is a generation with the given timestamp already known or obsolete? It is obsolete if and only if
* it is older than the generation operating at `now - get_generation_leeway()`.
*/
/* Is a generation with the given timestamp already known or superseded by a newer generation? */
bool known_or_obsolete(db_clock::time_point) const;
/* Are there streams available. I.e. valid for time == now. If this is false, any writes to
@@ -56,9 +54,8 @@ public:
*
* If the provided timestamp is too far away "into the future" (where "now" is defined according to our local clock),
* we reject the get_stream query. This is because the resulting stream might belong to a generation which we don't
* yet know about. Similarly, we reject queries to the previous generations if the timestamp is too far away "into
* the past". The amount of leeway (how much "into the future" or "into the past" we allow `ts` to be) is defined by
* `get_generation_leeway()`.
* yet know about. The amount of leeway (how much "into the future" we allow `ts` to be) is defined
* by the `cdc::generation_leeway` constant.
*/
stream_id get_stream(api::timestamp_type ts, dht::token tok);

View File

@@ -21,6 +21,8 @@ class row_tombstone;
class collection_mutation;
class cql_serialization_format;
// An auxiliary struct used to (de)construct collection_mutations.
// Unlike collection_mutation which is a serialized blob, this struct allows to inspect logical units of information
// (tombstone and cells) inside the mutation easily.
@@ -129,4 +131,4 @@ collection_mutation merge(const abstract_type&, collection_mutation_view, collec
collection_mutation difference(const abstract_type&, collection_mutation_view, collection_mutation_view);
// Serializes the given collection of cells to a sequence of bytes ready to be sent over the CQL protocol.
bytes_ostream serialize_for_cql(const abstract_type&, collection_mutation_view);
bytes_ostream serialize_for_cql(const abstract_type&, collection_mutation_view, cql_serialization_format);

View File

@@ -12,11 +12,11 @@
class schema;
class partition_key;
class clustering_row;
struct atomic_cell_view;
struct tombstone;
namespace db::view {
struct clustering_or_static_row;
struct view_key_and_action;
}
@@ -118,7 +118,7 @@ class collection_column_computation final : public column_computation {
using collection_kv = std::pair<bytes_view, atomic_cell_view>;
void operate_on_collection_entries(
std::invocable<collection_kv*, collection_kv*, tombstone> auto&& old_and_new_row_func, const schema& schema,
const partition_key& key, const db::view::clustering_or_static_row& update, const std::optional<db::view::clustering_or_static_row>& existing) const;
const partition_key& key, const clustering_row& update, const std::optional<clustering_row>& existing) const;
public:
static collection_column_computation for_keys(const bytes& collection_name) {
@@ -141,6 +141,5 @@ public:
return true;
}
std::vector<db::view::view_key_and_action> compute_values_with_action(const schema& schema, const partition_key& key,
const db::view::clustering_or_static_row& row, const std::optional<db::view::clustering_or_static_row>& existing) const;
std::vector<db::view::view_key_and_action> compute_values_with_action(const schema& schema, const partition_key& key, const clustering_row& row, const std::optional<clustering_row>& existing) const;
};

View File

@@ -28,7 +28,6 @@
#include <seastar/util/closeable.hh>
#include <seastar/core/shared_ptr.hh>
#include "dht/i_partitioner.hh"
#include "sstables/sstables.hh"
#include "sstables/sstable_writer.hh"
#include "sstables/progress_monitor.hh"
@@ -42,7 +41,6 @@
#include "mutation_compactor.hh"
#include "leveled_manifest.hh"
#include "dht/token.hh"
#include "dht/partition_filter.hh"
#include "mutation_writer/shard_based_splitting_writer.hh"
#include "mutation_writer/partition_based_splitting_writer.hh"
#include "mutation_source_metadata.hh"
@@ -168,7 +166,7 @@ std::ostream& operator<<(std::ostream& os, pretty_printed_throughput tp) {
}
static api::timestamp_type get_max_purgeable_timestamp(const table_state& table_s, sstable_set::incremental_selector& selector,
const std::unordered_set<shared_sstable>& compacting_set, const dht::decorated_key& dk, uint64_t& bloom_filter_checks) {
const std::unordered_set<shared_sstable>& compacting_set, const dht::decorated_key& dk) {
auto timestamp = table_s.min_memtable_timestamp();
std::optional<utils::hashed_key> hk;
for (auto&& sst : boost::range::join(selector.select(dk).sstables, table_s.compacted_undeleted_sstables())) {
@@ -179,7 +177,6 @@ static api::timestamp_type get_max_purgeable_timestamp(const table_state& table_
hk = sstables::sstable::make_hashed_key(*table_s.schema(), dk.key());
}
if (sst->filter_has_key(*hk)) {
bloom_filter_checks++;
timestamp = std::min(timestamp, sst->get_stats_metadata().min_timestamp);
}
}
@@ -223,13 +220,13 @@ public:
~compaction_write_monitor() {
if (_sst) {
_table_s.get_backlog_tracker().revert_charges(_sst);
_table_s.get_compaction_strategy().get_backlog_tracker().revert_charges(_sst);
}
}
virtual void on_write_started(const sstables::writer_offset_tracker& tracker) override {
_tracker = &tracker;
_table_s.get_backlog_tracker().register_partially_written_sstable(_sst, *this);
_table_s.get_compaction_strategy().get_backlog_tracker().register_partially_written_sstable(_sst, *this);
}
virtual void on_data_write_completed() override {
@@ -354,7 +351,7 @@ struct compaction_read_monitor_generator final : public read_monitor_generator {
public:
virtual void on_read_started(const sstables::reader_position_tracker& tracker) override {
_tracker = &tracker;
_table_s.get_backlog_tracker().register_compacting_sstable(_sst, *this);
_table_s.get_compaction_strategy().get_backlog_tracker().register_compacting_sstable(_sst, *this);
}
virtual void on_read_completed() override {
@@ -373,7 +370,7 @@ struct compaction_read_monitor_generator final : public read_monitor_generator {
void remove_sstable() {
if (_sst) {
_table_s.get_backlog_tracker().revert_charges(_sst);
_table_s.get_compaction_strategy().get_backlog_tracker().revert_charges(_sst);
}
_sst = {};
}
@@ -385,7 +382,7 @@ struct compaction_read_monitor_generator final : public read_monitor_generator {
// We failed to finish handling this SSTable, so we have to update the backlog_tracker
// about it.
if (_sst) {
_table_s.get_backlog_tracker().revert_charges(_sst);
_table_s.get_compaction_strategy().get_backlog_tracker().revert_charges(_sst);
}
}
@@ -415,12 +412,9 @@ private:
class formatted_sstables_list {
bool _include_origin = true;
std::vector<std::string> _ssts;
std::vector<sstring> _ssts;
public:
formatted_sstables_list() = default;
void reserve(size_t n) {
_ssts.reserve(n);
}
explicit formatted_sstables_list(const std::vector<shared_sstable>& ssts, bool include_origin) : _include_origin(include_origin) {
_ssts.reserve(ssts.size());
for (const auto& sst : ssts) {
@@ -439,7 +433,9 @@ public:
};
std::ostream& operator<<(std::ostream& os, const formatted_sstables_list& lst) {
fmt::print(os, "[{}]", fmt::join(lst._ssts, ","));
os << "[";
os << boost::algorithm::join(lst._ssts, ",");
os << "]";
return os;
}
@@ -464,8 +460,6 @@ protected:
uint64_t _start_size = 0;
uint64_t _end_size = 0;
uint64_t _estimated_partitions = 0;
double _estimated_droppable_tombstone_ratio = 0;
uint64_t _bloom_filter_checks = 0;
db::replay_position _rp;
encoding_stats_collector _stats_collector;
bool _can_split_large_partition = false;
@@ -522,7 +516,7 @@ protected:
auto max_sstable_size = std::max<uint64_t>(_max_sstable_size, 1);
uint64_t estimated_sstables = std::max(1UL, uint64_t(ceil(double(_start_size) / max_sstable_size)));
return std::min(uint64_t(ceil(double(_estimated_partitions) / estimated_sstables)),
_table_s.get_compaction_strategy().adjust_partition_estimate(_ms_metadata, _estimated_partitions, _schema));
_table_s.get_compaction_strategy().adjust_partition_estimate(_ms_metadata, _estimated_partitions));
}
void setup_new_sstable(shared_sstable& sst) {
@@ -577,15 +571,14 @@ protected:
return bool(_sstable_set);
}
compaction_writer create_gc_compaction_writer(run_id gc_run) const {
compaction_writer create_gc_compaction_writer() const {
auto sst = _sstable_creator(this_shard_id());
auto&& priority = _io_priority;
auto monitor = std::make_unique<compaction_write_monitor>(sst, _table_s, maximum_timestamp(), _sstable_level);
sstable_writer_config cfg = _table_s.configure_writer("garbage_collection");
cfg.run_identifier = gc_run;
cfg.run_identifier = _run_identifier;
cfg.monitor = monitor.get();
uint64_t estimated_partitions = std::max(1UL, uint64_t(ceil(partitions_per_sstable() * _estimated_droppable_tombstone_ratio)));
auto writer = sst->get_writer(*schema(), partitions_per_sstable(), cfg, get_encoding_stats(), priority);
return compaction_writer(std::move(monitor), std::move(writer), std::move(sst));
}
@@ -605,14 +598,8 @@ protected:
// When compaction finishes, all the temporary sstables generated here will be deleted and removed
// from table's sstable set.
compacted_fragments_writer get_gc_compacted_fragments_writer() {
// because the temporary sstable run can overlap with the non-gc sstables run created by
// get_compacted_fragments_writer(), we have to use a different run_id. the gc_run_id is
// created here as:
// 1. it can be shared across all sstables created by this writer
// 2. it is optional, as gc writer is not always used
auto gc_run = run_id::create_random_id();
return compacted_fragments_writer(*this,
[this, gc_run] (const dht::decorated_key&) { return create_gc_compaction_writer(gc_run); },
[this] (const dht::decorated_key&) { return create_gc_compaction_writer(); },
[this] (compaction_writer* cw) { stop_gc_compaction_writer(cw); },
_stop_request_observable);
}
@@ -629,8 +616,8 @@ protected:
return _used_garbage_collected_sstables;
}
virtual bool enable_garbage_collected_sstable_writer() const noexcept {
return _contains_multi_fragment_runs && _max_sstable_size != std::numeric_limits<uint64_t>::max() && bool(_replacer);
bool enable_garbage_collected_sstable_writer() const noexcept {
return _contains_multi_fragment_runs && _max_sstable_size != std::numeric_limits<uint64_t>::max();
}
public:
compaction& operator=(const compaction&) = delete;
@@ -652,11 +639,9 @@ private:
future<> setup() {
auto ssts = make_lw_shared<sstables::sstable_set>(make_sstable_set_for_input());
formatted_sstables_list formatted_msg;
formatted_msg.reserve(_sstables.size());
auto fully_expired = _table_s.fully_expired_sstables(_sstables, gc_clock::now());
min_max_tracker<api::timestamp_type> timestamp_tracker;
double sum_of_estimated_droppable_tombstone_ratio = 0;
_input_sstable_generations.reserve(_sstables.size());
for (auto& sst : _sstables) {
co_await coroutine::maybe_yield();
@@ -691,16 +676,12 @@ private:
// this is kind of ok, esp. since we will hopefully not be trying to recover based on
// compacted sstables anyway (CL should be clean by then).
_rp = std::max(_rp, sst_stats.position);
auto gc_before = sst->get_gc_before_for_drop_estimation(gc_clock::now(), _table_s.get_tombstone_gc_state());
sum_of_estimated_droppable_tombstone_ratio += sst->estimate_droppable_tombstone_ratio(gc_before);
}
log_info("{} {}", report_start_desc(), formatted_msg);
if (ssts->all()->size() < _sstables.size()) {
log_debug("{} out of {} input sstables are fully expired sstables that will not be actually compacted",
_sstables.size() - ssts->all()->size(), _sstables.size());
}
// _estimated_droppable_tombstone_ratio could exceed 1.0 in certain cases, so limit it to 1.0.
_estimated_droppable_tombstone_ratio = std::min(1.0, sum_of_estimated_droppable_tombstone_ratio / ssts->all()->size());
_compacting = std::move(ssts);
@@ -774,7 +755,6 @@ protected:
.ended_at = ended_at,
.start_size = _start_size,
.end_size = _end_size,
.bloom_filter_checks = _bloom_filter_checks,
},
};
@@ -794,7 +774,7 @@ protected:
log_info("{} {} sstables to {}. {} to {} (~{}% of original) in {}ms = {}. ~{} total partitions merged to {}.",
report_finish_desc(),
_input_sstable_generations.size(), new_sstables_msg, pretty_printed_data_size(_start_size), pretty_printed_data_size(_end_size), int(ratio * 100),
std::chrono::duration_cast<std::chrono::milliseconds>(duration).count(), pretty_printed_throughput(_start_size, duration),
std::chrono::duration_cast<std::chrono::milliseconds>(duration).count(), pretty_printed_throughput(_end_size, duration),
_cdata.total_partitions, _cdata.total_keys_written);
return ret;
@@ -815,7 +795,7 @@ private:
};
}
return [this] (const dht::decorated_key& dk) {
return get_max_purgeable_timestamp(_table_s, *_selector, _compacting_for_max_purgeable_func, dk, _bloom_filter_checks);
return get_max_purgeable_timestamp(_table_s, *_selector, _compacting_for_max_purgeable_func, dk);
};
}
@@ -925,7 +905,7 @@ void compacted_fragments_writer::split_large_partition() {
// will result in current fragment storing an inclusive end bound for last pos, and the
// next fragment storing an exclusive start bound for last pos. This is very important
// for not losing information on the range tombstone.
auto after_last_pos = position_in_partition::after_key(*_c.schema(), _current_partition.last_pos.key());
auto after_last_pos = position_in_partition::after_key(_current_partition.last_pos.key());
if (_current_partition.current_emitted_tombstone) {
auto rtc = range_tombstone_change(after_last_pos, tombstone{});
_c.log_debug("Closing active tombstone {} with {} for partition {}", _current_partition.current_emitted_tombstone, rtc, *_current_partition.dk);
@@ -968,7 +948,7 @@ void compacted_fragments_writer::consume_new_partition(const dht::decorated_key&
.dk = dk,
.tombstone = tombstone(),
.current_emitted_tombstone = tombstone(),
.last_pos = position_in_partition::for_partition_start(),
.last_pos = position_in_partition(position_in_partition::partition_start_tag_t()),
.is_splitting_partition = false
};
do_consume_new_partition(dk);
@@ -1015,6 +995,51 @@ void compacted_fragments_writer::consume_end_of_stream() {
}
}
class reshape_compaction : public compaction {
public:
reshape_compaction(table_state& table_s, compaction_descriptor descriptor, compaction_data& cdata)
: compaction(table_s, std::move(descriptor), cdata) {
}
virtual sstables::sstable_set make_sstable_set_for_input() const override {
return sstables::make_partitioned_sstable_set(_schema, false);
}
flat_mutation_reader_v2 make_sstable_reader() const override {
return _compacting->make_local_shard_sstable_reader(_schema,
_permit,
query::full_partition_range,
_schema->full_slice(),
_io_priority,
tracing::trace_state_ptr(),
::streamed_mutation::forwarding::no,
::mutation_reader::forwarding::no,
default_read_monitor_generator());
}
std::string_view report_start_desc() const override {
return "Reshaping";
}
std::string_view report_finish_desc() const override {
return "Reshaped";
}
virtual compaction_writer create_compaction_writer(const dht::decorated_key& dk) override {
auto sst = _sstable_creator(this_shard_id());
setup_new_sstable(sst);
sstable_writer_config cfg = make_sstable_writer_config(compaction_type::Reshape);
return compaction_writer{sst->get_writer(*_schema, partitions_per_sstable(), cfg, get_encoding_stats(), _io_priority), sst};
}
virtual void stop_sstable_writer(compaction_writer* writer) override {
if (writer) {
finish_new_sstable(writer);
}
}
};
class regular_compaction : public compaction {
// keeps track of monitors for input sstable, which are responsible for adjusting backlog as compaction progresses.
mutable compaction_read_monitor_generator _monitor_generator;
@@ -1124,13 +1149,12 @@ private:
}
void update_pending_ranges() {
auto pending_replacements = std::exchange(_cdata.pending_replacements, {});
if (!_sstable_set || _sstable_set->all()->empty() || pending_replacements.empty()) { // set can be empty for testing scenario.
if (!_sstable_set || _sstable_set->all()->empty() || _cdata.pending_replacements.empty()) { // set can be empty for testing scenario.
return;
}
// Releases reference to sstables compacted by this compaction or another, both of which belongs
// to the same column family
for (auto& pending_replacement : pending_replacements) {
for (auto& pending_replacement : _cdata.pending_replacements) {
for (auto& sst : pending_replacement.removed) {
// Set may not contain sstable to be removed because this compaction may have started
// before the creation of that sstable.
@@ -1144,76 +1168,35 @@ private:
}
}
_selector.emplace(_sstable_set->make_incremental_selector());
}
};
class reshape_compaction : public regular_compaction {
private:
bool has_sstable_replacer() const noexcept {
return bool(_replacer);
}
public:
reshape_compaction(table_state& table_s, compaction_descriptor descriptor, compaction_data& cdata)
: regular_compaction(table_s, std::move(descriptor), cdata) {
}
virtual sstables::sstable_set make_sstable_set_for_input() const override {
return sstables::make_partitioned_sstable_set(_schema, false);
}
// Unconditionally enable incremental compaction if the strategy specifies a max output size, e.g. LCS.
virtual bool enable_garbage_collected_sstable_writer() const noexcept override {
return _max_sstable_size != std::numeric_limits<uint64_t>::max() && bool(_replacer);
}
flat_mutation_reader_v2 make_sstable_reader() const override {
return _compacting->make_local_shard_sstable_reader(_schema,
_permit,
query::full_partition_range,
_schema->full_slice(),
_io_priority,
tracing::trace_state_ptr(),
::streamed_mutation::forwarding::no,
::mutation_reader::forwarding::no,
default_read_monitor_generator());
}
std::string_view report_start_desc() const override {
return "Reshaping";
}
std::string_view report_finish_desc() const override {
return "Reshaped";
}
virtual compaction_writer create_compaction_writer(const dht::decorated_key& dk) override {
auto sst = _sstable_creator(this_shard_id());
setup_new_sstable(sst);
sstable_writer_config cfg = make_sstable_writer_config(compaction_type::Reshape);
return compaction_writer{sst->get_writer(*_schema, partitions_per_sstable(), cfg, get_encoding_stats(), _io_priority), sst};
}
virtual void stop_sstable_writer(compaction_writer* writer) override {
if (writer) {
if (has_sstable_replacer()) {
regular_compaction::stop_sstable_writer(writer);
} else {
finish_new_sstable(writer);
}
}
}
virtual void on_end_of_compaction() override {
if (has_sstable_replacer()) {
regular_compaction::on_end_of_compaction();
}
_cdata.pending_replacements.clear();
}
};
class cleanup_compaction final : public regular_compaction {
class incremental_owned_ranges_checker {
const dht::token_range_vector& _sorted_owned_ranges;
mutable dht::token_range_vector::const_iterator _it;
public:
incremental_owned_ranges_checker(const dht::token_range_vector& sorted_owned_ranges)
: _sorted_owned_ranges(sorted_owned_ranges)
, _it(_sorted_owned_ranges.begin()) {
}
// Must be called with increasing token values.
bool belongs_to_current_node(const dht::token& t) const {
// While token T is after a range Rn, advance the iterator.
// iterator will be stopped at a range which either overlaps with T (if T belongs to node),
// or at a range which is after T (if T doesn't belong to this node).
while (_it != _sorted_owned_ranges.end() && _it->after(t, dht::token_comparator())) {
_it++;
}
return _it != _sorted_owned_ranges.end() && _it->contains(t, dht::token_comparator());
}
};
owned_ranges_ptr _owned_ranges;
mutable dht::incremental_owned_ranges_checker _owned_ranges_checker;
incremental_owned_ranges_checker _owned_ranges_checker;
private:
// Called in a seastar thread
dht::partition_range_vector
@@ -1226,8 +1209,21 @@ private:
return dht::partition_range::make({sst->get_first_decorated_key(), true},
{sst->get_last_decorated_key(), true});
}));
// optimize set of potentially overlapping ranges by deoverlapping them.
non_owned_ranges = dht::partition_range::deoverlap(std::move(non_owned_ranges), dht::ring_position_comparator(*_schema));
return dht::subtract_ranges(*_schema, non_owned_ranges, std::move(owned_ranges)).get();
// subtract *each* owned range from the partition range of *each* sstable*,
// such that we'll be left only with a set of non-owned ranges.
for (auto& owned_range : owned_ranges) {
dht::partition_range_vector new_non_owned_ranges;
for (auto& non_owned_range : non_owned_ranges) {
auto ret = non_owned_range.subtract(owned_range, dht::ring_position_comparator(*_schema));
new_non_owned_ranges.insert(new_non_owned_ranges.end(), ret.begin(), ret.end());
seastar::thread::maybe_yield();
}
non_owned_ranges = std::move(new_non_owned_ranges);
}
return non_owned_ranges;
}
protected:
virtual compaction_completion_desc
@@ -1627,7 +1623,7 @@ private:
uint64_t partitions_per_sstable(shard_id s) const {
uint64_t estimated_sstables = std::max(uint64_t(1), uint64_t(ceil(double(_estimation_per_shard[s].estimated_size) / _max_sstable_size)));
return std::min(uint64_t(ceil(double(_estimation_per_shard[s].estimated_partitions) / estimated_sstables)),
_table_s.get_compaction_strategy().adjust_partition_estimate(_ms_metadata, _estimation_per_shard[s].estimated_partitions, _schema));
_table_s.get_compaction_strategy().adjust_partition_estimate(_ms_metadata, _estimation_per_shard[s].estimated_partitions));
}
public:
resharding_compaction(table_state& table_s, sstables::compaction_descriptor descriptor, compaction_data& cdata)

View File

@@ -80,10 +80,8 @@ struct compaction_data {
}
void stop(sstring reason) {
if (!abort.abort_requested()) {
stop_requested = std::move(reason);
abort.request_abort();
}
stop_requested = std::move(reason);
abort.request_abort();
}
};
@@ -92,15 +90,12 @@ struct compaction_stats {
uint64_t start_size = 0;
uint64_t end_size = 0;
uint64_t validation_errors = 0;
// Bloom filter checks during max purgeable calculation
uint64_t bloom_filter_checks = 0;
compaction_stats& operator+=(const compaction_stats& r) {
ended_at = std::max(ended_at, r.ended_at);
start_size += r.start_size;
end_size += r.end_size;
validation_errors += r.validation_errors;
bloom_filter_checks += r.bloom_filter_checks;
return *this;
}
friend compaction_stats operator+(const compaction_stats& l, const compaction_stats& r) {

View File

@@ -66,8 +66,7 @@ public:
};
compaction_backlog_tracker(std::unique_ptr<impl> impl) : _impl(std::move(impl)) {}
compaction_backlog_tracker(compaction_backlog_tracker&&);
compaction_backlog_tracker& operator=(compaction_backlog_tracker&&) noexcept;
compaction_backlog_tracker(compaction_backlog_tracker&&) = default;
compaction_backlog_tracker(const compaction_backlog_tracker&) = delete;
~compaction_backlog_tracker();
@@ -75,7 +74,7 @@ public:
void replace_sstables(const std::vector<sstables::shared_sstable>& old_ssts, const std::vector<sstables::shared_sstable>& new_ssts);
void register_partially_written_sstable(sstables::shared_sstable sst, backlog_write_progress_manager& wp);
void register_compacting_sstable(sstables::shared_sstable sst, backlog_read_progress_manager& rp);
void copy_ongoing_charges(compaction_backlog_tracker& new_bt, bool move_read_charges = true) const;
void transfer_ongoing_charges(compaction_backlog_tracker& new_bt, bool move_read_charges = true);
void revert_charges(sstables::shared_sstable sst);
void disable() {

View File

@@ -7,19 +7,15 @@
*/
#include "compaction_manager.hh"
#include "compaction_descriptor.hh"
#include "compaction_strategy.hh"
#include "compaction_backlog_manager.hh"
#include "sstables/sstables.hh"
#include "sstables/sstables_manager.hh"
#include <memory>
#include <seastar/core/metrics.hh>
#include <seastar/core/coroutine.hh>
#include <seastar/coroutine/switch_to.hh>
#include <seastar/coroutine/parallel_for_each.hh>
#include <seastar/coroutine/maybe_yield.hh>
#include "sstables/exceptions.hh"
#include "sstables/sstable_directory.hh"
#include "locator/abstract_replication_strategy.hh"
#include "utils/fb_utilities.hh"
#include "utils/UUID_gen.hh"
@@ -80,23 +76,6 @@ public:
_compacting.erase(sst);
}
}
class update_me : public compaction_manager::task::on_replacement {
compacting_sstable_registration& _registration;
public:
update_me(compacting_sstable_registration& registration)
: _registration{registration} {}
void on_removal(const std::vector<sstables::shared_sstable>& sstables) override {
_registration.release_compacting(sstables);
}
void on_addition(const std::vector<sstables::shared_sstable>& sstables) override {
_registration.register_compacting(sstables);
}
};
auto update_on_sstable_replacement() {
return update_me(*this);
}
};
sstables::compaction_data compaction_manager::create_compaction_data() {
@@ -298,7 +277,7 @@ compaction_manager::task::task(compaction_manager& mgr, compaction::table_state*
, _description(std::move(desc))
{}
future<compaction_manager::compaction_stats_opt> compaction_manager::perform_task(shared_ptr<compaction_manager::task> task, throw_if_stopping do_throw_if_stopping) {
future<compaction_manager::compaction_stats_opt> compaction_manager::perform_task(shared_ptr<compaction_manager::task> task) {
_tasks.push_back(task);
auto unregister_task = defer([this, task] {
_tasks.remove(task);
@@ -311,9 +290,6 @@ future<compaction_manager::compaction_stats_opt> compaction_manager::perform_tas
co_return res;
} catch (sstables::compaction_stopped_exception& e) {
cmlog.info("{}: stopped, reason: {}", *task, e.what());
if (do_throw_if_stopping) {
throw;
}
} catch (sstables::compaction_aborted_exception& e) {
cmlog.error("{}: aborted, reason: {}", *task, e.what());
_stats.errors++;
@@ -332,14 +308,14 @@ future<compaction_manager::compaction_stats_opt> compaction_manager::perform_tas
co_return std::nullopt;
}
future<sstables::compaction_result> compaction_manager::task::compact_sstables_and_update_history(sstables::compaction_descriptor descriptor, sstables::compaction_data& cdata, on_replacement& on_replace, can_purge_tombstones can_purge) {
future<sstables::compaction_result> compaction_manager::task::compact_sstables_and_update_history(sstables::compaction_descriptor descriptor, sstables::compaction_data& cdata, release_exhausted_func_t release_exhausted, can_purge_tombstones can_purge) {
if (!descriptor.sstables.size()) {
// if there is nothing to compact, just return.
co_return sstables::compaction_result{};
}
bool should_update_history = this->should_update_history(descriptor.options.type());
sstables::compaction_result res = co_await compact_sstables(std::move(descriptor), cdata, on_replace, std::move(can_purge));
sstables::compaction_result res = co_await compact_sstables(std::move(descriptor), cdata, std::move(release_exhausted), std::move(can_purge));
if (should_update_history) {
co_await update_history(*_compacting_table, res, cdata);
@@ -347,11 +323,8 @@ future<sstables::compaction_result> compaction_manager::task::compact_sstables_a
co_return res;
}
future<sstables::compaction_result> compaction_manager::task::compact_sstables(sstables::compaction_descriptor descriptor, sstables::compaction_data& cdata, on_replacement& on_replace, can_purge_tombstones can_purge,
sstables::offstrategy offstrategy) {
future<sstables::compaction_result> compaction_manager::task::compact_sstables(sstables::compaction_descriptor descriptor, sstables::compaction_data& cdata, release_exhausted_func_t release_exhausted, can_purge_tombstones can_purge) {
compaction::table_state& t = *_compacting_table;
if (can_purge) {
descriptor.enable_garbage_collection(t.main_sstable_set());
}
@@ -359,26 +332,15 @@ future<sstables::compaction_result> compaction_manager::task::compact_sstables(s
auto sst = t.make_sstable();
return sst;
};
descriptor.replacer = [this, &t, &on_replace, offstrategy] (sstables::compaction_completion_desc desc) {
descriptor.replacer = [this, &t, release_exhausted] (sstables::compaction_completion_desc desc) {
t.get_compaction_strategy().notify_completion(desc.old_sstables, desc.new_sstables);
_cm.propagate_replacement(t, desc.old_sstables, desc.new_sstables);
// on_replace updates the compacting registration with the old and new
// sstables. while on_compaction_completion() removes the old sstables
// from the table's sstable set, and adds the new ones to the sstable
// set.
// since the regular compactions exclude the sstables in the sstable
// set which are currently being compacted, if we want to ensure the
// exclusive access of compactions to an sstable we should guard it
// with the registration when adding/removing it to/from the sstable
// set. otherwise, the regular compaction would pick it up in the time
// window, where the sstables:
// - are still in the main set
// - are not being compacted.
on_replace.on_addition(desc.new_sstables);
auto old_sstables = desc.old_sstables;
t.on_compaction_completion(std::move(desc), offstrategy).get();
on_replace.on_removal(old_sstables);
t.on_compaction_completion(std::move(desc), sstables::offstrategy::no).get();
// Calls compaction manager's task for this compaction to release reference to exhausted SSTables.
if (release_exhausted) {
release_exhausted(old_sstables);
}
};
co_return co_await sstables::compact_sstables(std::move(descriptor), cdata, t);
@@ -423,7 +385,9 @@ protected:
sstables::compaction_strategy cs = t->get_compaction_strategy();
sstables::compaction_descriptor descriptor = cs.get_major_compaction_job(*t, _cm.get_candidates(*t));
auto compacting = compacting_sstable_registration(_cm, descriptor.sstables);
auto on_replace = compacting.update_on_sstable_replacement();
auto release_exhausted = [&compacting] (const std::vector<sstables::shared_sstable>& exhausted_sstables) {
compacting.release_compacting(exhausted_sstables);
};
setup_new_compaction(descriptor.run_identifier);
cmlog.info0("User initiated compaction started on behalf of {}.{}", t->schema()->ks_name(), t->schema()->cf_name());
@@ -435,7 +399,7 @@ protected:
// the exclusive lock can be freed to let regular compaction run in parallel to major
lock_holder.return_all();
co_await compact_sstables_and_update_history(std::move(descriptor), _compaction_data, on_replace);
co_await compact_sstables_and_update_history(std::move(descriptor), _compaction_data, std::move(release_exhausted));
finish_compaction();
@@ -482,12 +446,12 @@ protected:
}
};
future<> compaction_manager::run_custom_job(compaction::table_state& t, sstables::compaction_type type, const char* desc, noncopyable_function<future<>(sstables::compaction_data&)> job, throw_if_stopping do_throw_if_stopping) {
future<> compaction_manager::run_custom_job(compaction::table_state& t, sstables::compaction_type type, const char* desc, noncopyable_function<future<>(sstables::compaction_data&)> job) {
if (_state != state::enabled) {
return make_ready_future<>();
}
return perform_task(make_shared<custom_compaction_task>(*this, &t, type, desc, std::move(job)), do_throw_if_stopping).discard_result();
return perform_task(make_shared<custom_compaction_task>(*this, &t, type, desc, std::move(job))).discard_result();
}
future<> compaction_manager::update_static_shares(float static_shares) {
@@ -681,7 +645,6 @@ sstables::compaction_stopped_exception compaction_manager::task::make_compaction
compaction_manager::compaction_manager(config cfg, abort_source& as)
: _cfg(std::move(cfg))
, _compaction_submission_timer(compaction_sg().cpu, compaction_submission_callback())
, _compaction_controller(make_compaction_controller(compaction_sg(), static_shares(), [this] () -> float {
_last_backlog = backlog();
auto b = _last_backlog / available_memory();
@@ -716,7 +679,6 @@ compaction_manager::compaction_manager(config cfg, abort_source& as)
compaction_manager::compaction_manager()
: _cfg(config{ .available_memory = 1 })
, _compaction_submission_timer(compaction_sg().cpu, compaction_submission_callback())
, _compaction_controller(make_compaction_controller(compaction_sg(), 1, [] () -> float { return 1.0; }))
, _backlog_manager(_compaction_controller)
, _throughput_updater(serialized_action([this] { return update_throughput(throughput_mbs()); }))
@@ -774,46 +736,38 @@ void compaction_manager::register_metrics() {
void compaction_manager::enable() {
assert(_state == state::none || _state == state::disabled);
_state = state::enabled;
_compaction_submission_timer.arm_periodic(periodic_compaction_submission_interval());
_waiting_reevalution = postponed_compactions_reevaluation();
_compaction_submission_timer.arm(periodic_compaction_submission_interval());
postponed_compactions_reevaluation();
}
std::function<void()> compaction_manager::compaction_submission_callback() {
return [this] () mutable {
for (auto& e: _compaction_state) {
postpone_compaction_for_table(e.first);
submit(*e.first);
}
reevaluate_postponed_compactions();
};
}
future<> compaction_manager::postponed_compactions_reevaluation() {
while (true) {
co_await _postponed_reevaluation.when();
if (_state != state::enabled) {
_postponed.clear();
co_return;
}
// A task_state being reevaluated can re-insert itself into postponed list, which is the reason
// for moving the list to be processed into a local.
auto postponed = std::exchange(_postponed, {});
try {
for (auto it = postponed.begin(); it != postponed.end();) {
compaction::table_state* t = *it;
it = postponed.erase(it);
// skip reevaluation of a table_state that became invalid post its removal
if (!_compaction_state.contains(t)) {
continue;
}
auto s = t->schema();
cmlog.debug("resubmitting postponed compaction for table {}.{} [{}]", s->ks_name(), s->cf_name(), fmt::ptr(t));
submit(*t);
co_await coroutine::maybe_yield();
void compaction_manager::postponed_compactions_reevaluation() {
_waiting_reevalution = repeat([this] {
return _postponed_reevaluation.wait().then([this] {
if (_state != state::enabled) {
_postponed.clear();
return stop_iteration::yes;
}
} catch (...) {
_postponed.insert(postponed.begin(), postponed.end());
}
}
auto postponed = std::move(_postponed);
try {
for (auto& t : postponed) {
auto s = t->schema();
cmlog.debug("resubmitting postponed compaction for table {}.{} [{}]", s->ks_name(), s->cf_name(), fmt::ptr(t));
submit(*t);
}
} catch (...) {
_postponed = std::move(postponed);
}
return stop_iteration::no;
});
});
}
void compaction_manager::reevaluate_postponed_compactions() noexcept {
@@ -1018,7 +972,9 @@ protected:
}
auto compacting = compacting_sstable_registration(_cm, descriptor.sstables);
auto weight_r = compaction_weight_registration(&_cm, weight);
auto on_replace = compacting.update_on_sstable_replacement();
auto release_exhausted = [&compacting] (const std::vector<sstables::shared_sstable>& exhausted_sstables) {
compacting.release_compacting(exhausted_sstables);
};
cmlog.debug("Accepted compaction job: task={} ({} sstable(s)) of weight {} for {}.{}",
fmt::ptr(this), descriptor.sstables.size(), weight, t.schema()->ks_name(), t.schema()->cf_name());
@@ -1027,7 +983,7 @@ protected:
try {
bool should_update_history = this->should_update_history(descriptor.options.type());
sstables::compaction_result res = co_await compact_sstables(std::move(descriptor), _compaction_data, on_replace);
sstables::compaction_result res = co_await compact_sstables(std::move(descriptor), _compaction_data, std::move(release_exhausted));
finish_compaction();
if (should_update_history) {
// update_history can take a long time compared to
@@ -1068,7 +1024,7 @@ void compaction_manager::submit(compaction::table_state& t) {
// OK to drop future.
// waited via task->stop()
(void)perform_task(make_shared<regular_compaction_task>(*this, t)).then_wrapped([] (auto f) { f.ignore_ready_future(); });
(void)perform_task(make_shared<regular_compaction_task>(*this, t));
}
bool compaction_manager::can_perform_regular_compaction(compaction::table_state& t) {
@@ -1089,7 +1045,7 @@ future<> compaction_manager::maybe_wait_for_sstable_count_reduction(compaction::
desc.sstables
| boost::adaptors::transformed(std::mem_fn(&sstables::sstable::run_identifier))).size();
};
const auto threshold = size_t(std::max(schema->max_compaction_threshold(), 32));
const auto threshold = std::max(schema->max_compaction_threshold(), 32);
auto count = num_runs_for_compaction();
if (count <= threshold) {
cmlog.trace("No need to wait for sstable count reduction in {}.{}: {} <= {}",
@@ -1127,40 +1083,49 @@ public:
}
private:
future<> run_offstrategy_compaction(sstables::compaction_data& cdata) {
// Incrementally reshape the SSTables in maintenance set. The output of each reshape
// round is merged into the main set. The common case is that off-strategy input
// is mostly disjoint, e.g. repair-based node ops, then all the input will be
// reshaped in a single round. The incremental approach allows us to be space
// efficient (avoiding a 100% overhead) as we will incrementally replace input
// SSTables from maintenance set by output ones into main set.
// This procedure will reshape sstables in maintenance set until it's ready for
// integration into main set.
// It may require N reshape rounds before the set satisfies the strategy invariant.
// This procedure also only updates maintenance set at the end, on success.
// Otherwise, some overlapping could be introduced in the set after each reshape
// round, progressively degrading read amplification until integration happens.
// The drawback of this approach is the 2x space requirement as the old sstables
// will only be deleted at the end. The impact of this space requirement is reduced
// by the fact that off-strategy is serialized across all tables, meaning that the
// actual requirement is the size of the largest table's maintenance set.
compaction::table_state& t = *_compacting_table;
const auto& maintenance_sstables = t.maintenance_sstable_set();
// Filter out sstables that require view building, to avoid a race between off-strategy
// and view building. Refs: #11882
auto get_reshape_candidates = [&t] () {
auto maintenance_ssts = t.maintenance_sstable_set().all();
return boost::copy_range<std::vector<sstables::shared_sstable>>(*maintenance_ssts
| boost::adaptors::filtered([](const sstables::shared_sstable& sst) {
return !sst->requires_view_building();
}));
};
const auto old_sstables = boost::copy_range<std::vector<sstables::shared_sstable>>(*maintenance_sstables.all());
std::vector<sstables::shared_sstable> reshape_candidates = old_sstables;
std::vector<sstables::shared_sstable> sstables_to_remove;
std::unordered_set<sstables::shared_sstable> new_unused_sstables;
auto cleanup_new_unused_sstables_on_failure = defer([&new_unused_sstables] {
for (auto& sst : new_unused_sstables) {
sst->mark_for_deletion();
}
});
auto get_next_job = [&] () -> std::optional<sstables::compaction_descriptor> {
auto& iop = service::get_local_streaming_priority(); // run reshape in maintenance mode
auto desc = t.get_compaction_strategy().get_reshaping_job(get_reshape_candidates(), t.schema(), iop, sstables::reshape_mode::strict);
auto desc = t.get_compaction_strategy().get_reshaping_job(reshape_candidates, t.schema(), iop, sstables::reshape_mode::strict);
return desc.sstables.size() ? std::make_optional(std::move(desc)) : std::nullopt;
};
std::exception_ptr err;
while (auto desc = get_next_job()) {
auto compacting = compacting_sstable_registration(_cm, desc->sstables);
auto on_replace = compacting.update_on_sstable_replacement();
desc->creator = [this, &new_unused_sstables, &t] (shard_id dummy) {
auto sst = t.make_sstable();
new_unused_sstables.insert(sst);
return sst;
};
auto input = boost::copy_range<std::unordered_set<sstables::shared_sstable>>(desc->sstables);
sstables::compaction_result ret;
try {
sstables::compaction_result _ = co_await compact_sstables(std::move(*desc), _compaction_data, on_replace,
compaction_manager::can_purge_tombstones::no,
sstables::offstrategy::yes);
ret = co_await sstables::compact_sstables(std::move(*desc), cdata, t);
} catch (sstables::compaction_stopped_exception&) {
// If off-strategy compaction stopped on user request, let's not discard the partial work.
// Therefore, both un-reshaped and reshaped data will be integrated into main set, allowing
@@ -1169,20 +1134,41 @@ private:
break;
}
_performed = true;
// update list of reshape candidates without input but with output added to it
auto it = boost::remove_if(reshape_candidates, [&] (auto& s) { return input.contains(s); });
reshape_candidates.erase(it, reshape_candidates.end());
std::move(ret.new_sstables.begin(), ret.new_sstables.end(), std::back_inserter(reshape_candidates));
// If compaction strategy is unable to reshape input data in a single round, it may happen that a SSTable A
// created in round 1 will be compacted in a next round producing SSTable B. As SSTable A is no longer needed,
// it can be removed immediately. Let's remove all such SSTables immediately to reduce off-strategy space requirement.
// Input SSTables from maintenance set can only be removed later, as SSTable sets are only updated on completion.
auto can_remove_now = [&] (const sstables::shared_sstable& s) { return new_unused_sstables.contains(s); };
for (auto&& sst : input) {
if (can_remove_now(sst)) {
co_await sst->unlink();
new_unused_sstables.erase(std::move(sst));
} else {
sstables_to_remove.push_back(std::move(sst));
}
}
}
// There might be some remaining sstables in maintenance set that didn't require reshape, or the
// user has aborted off-strategy. So we can only integrate them into the main set, such that
// they become candidates for regular compaction. We cannot hold them forever in maintenance set,
// as that causes read and space amplification issues.
if (auto sstables = get_reshape_candidates(); sstables.size()) {
auto completion_desc = sstables::compaction_completion_desc{
.old_sstables = sstables, // removes from maintenance set.
.new_sstables = sstables, // adds into main set.
};
co_await t.on_compaction_completion(std::move(completion_desc), sstables::offstrategy::yes);
}
// at this moment reshape_candidates contains a set of sstables ready for integration into main set
auto completion_desc = sstables::compaction_completion_desc{
.old_sstables = std::move(old_sstables),
.new_sstables = std::move(reshape_candidates)
};
co_await t.on_compaction_completion(std::move(completion_desc), sstables::offstrategy::yes);
cleanup_new_unused_sstables_on_failure.cancel();
// By marking input sstables for deletion instead, the ones which require view building will stay in the staging
// directory until they're moved to the main dir when the time comes. Also, that allows view building to resume
// on restart if there's a crash midway.
for (auto& sst : sstables_to_remove) {
sst->mark_for_deletion();
}
if (err) {
co_await coroutine::return_exception_ptr(std::move(err));
}
@@ -1205,11 +1191,9 @@ protected:
std::exception_ptr ex;
try {
compaction::table_state& t = *_compacting_table;
{
auto maintenance_sstables = t.maintenance_sstable_set().all();
cmlog.info("Starting off-strategy compaction for {}.{}, {} candidates were found",
t.schema()->ks_name(), t.schema()->cf_name(), maintenance_sstables->size());
}
auto maintenance_sstables = t.maintenance_sstable_set().all();
cmlog.info("Starting off-strategy compaction for {}.{}, {} candidates were found",
t.schema()->ks_name(), t.schema()->cf_name(), maintenance_sstables->size());
co_await run_offstrategy_compaction(_compaction_data);
finish_compaction();
cmlog.info("Done with off-strategy compaction for {}.{}", t.schema()->ks_name(), t.schema()->cf_name());
@@ -1282,7 +1266,9 @@ private:
sstable_level, sstables::compaction_descriptor::default_max_sstable_bytes, run_identifier, _options);
// Releases reference to cleaned sstable such that respective used disk space can be freed.
auto on_replace = _compacting.update_on_sstable_replacement();
auto release_exhausted = [this] (const std::vector<sstables::shared_sstable>& exhausted_sstables) {
_compacting.release_compacting(exhausted_sstables);
};
setup_new_compaction(descriptor.run_identifier);
@@ -1291,7 +1277,7 @@ private:
std::exception_ptr ex;
try {
sstables::compaction_result res = co_await compact_sstables_and_update_history(std::move(descriptor), _compaction_data, on_replace, _can_purge);
sstables::compaction_result res = co_await compact_sstables_and_update_history(std::move(descriptor), _compaction_data, std::move(release_exhausted), _can_purge);
finish_compaction();
_cm.reevaluate_postponed_compactions();
co_return res; // done with current sstable
@@ -1448,26 +1434,14 @@ protected:
co_return std::nullopt;
}
private:
// Releases reference to cleaned files such that respective used disk space can be freed.
void release_exhausted(std::vector<sstables::shared_sstable> exhausted_sstables) {
_compacting.release_compacting(exhausted_sstables);
}
future<> run_cleanup_job(sstables::compaction_descriptor descriptor) {
co_await coroutine::switch_to(_cm.compaction_sg().cpu);
// Releases reference to cleaned files such that respective used disk space can be freed.
using update_registration = compacting_sstable_registration::update_me;
class release_exhausted : public update_registration {
sstables::compaction_descriptor& _desc;
public:
release_exhausted(compacting_sstable_registration& registration, sstables::compaction_descriptor& desc)
: update_registration{registration}
, _desc{desc} {}
void on_removal(const std::vector<sstables::shared_sstable>& sstables) override {
auto exhausted = boost::copy_range<std::unordered_set<sstables::shared_sstable>>(sstables);
std::erase_if(_desc.sstables, [&] (const sstables::shared_sstable& sst) {
return exhausted.contains(sst);
});
update_registration::on_removal(sstables);
}
};
release_exhausted on_replace{_compacting, descriptor};
for (;;) {
compaction_backlog_tracker user_initiated(std::make_unique<user_initiated_backlog_tracker>(_cm._compaction_controller.backlog_of_shares(200), _cm.available_memory()));
_cm.register_backlog_tracker(user_initiated);
@@ -1475,7 +1449,8 @@ private:
std::exception_ptr ex;
try {
setup_new_compaction(descriptor.run_identifier);
co_await compact_sstables_and_update_history(descriptor, _compaction_data, on_replace);
co_await compact_sstables_and_update_history(descriptor, _compaction_data,
std::bind(&cleanup_sstables_compaction_task::release_exhausted, this, std::placeholders::_1));
finish_compaction();
_cm.reevaluate_postponed_compactions();
co_return; // done with current job
@@ -1495,8 +1470,10 @@ private:
bool needs_cleanup(const sstables::shared_sstable& sst,
const dht::token_range_vector& sorted_owned_ranges,
schema_ptr s) {
auto first_token = sst->get_first_decorated_key().token();
auto last_token = sst->get_last_decorated_key().token();
auto first = sst->get_first_partition_key();
auto last = sst->get_last_partition_key();
auto first_token = dht::get_token(*s, first);
auto last_token = dht::get_token(*s, last);
dht::token_range sst_token_range = dht::token_range::make(first_token, last_token);
auto r = std::lower_bound(sorted_owned_ranges.begin(), sorted_owned_ranges.end(), first_token,
@@ -1596,13 +1573,8 @@ future<compaction_manager::compaction_stats_opt> compaction_manager::perform_sst
}, can_purge_tombstones::no);
}
compaction_manager::compaction_state::compaction_state(table_state& t)
: backlog_tracker(t.get_compaction_strategy().make_backlog_tracker())
{
}
void compaction_manager::add(compaction::table_state& t) {
auto [_, inserted] = _compaction_state.try_emplace(&t, t);
auto [_, inserted] = _compaction_state.insert({&t, compaction_state{}});
if (!inserted) {
auto s = t.schema();
on_internal_error(cmlog, format("compaction_state for table {}.{} [{}] already exists", s->ks_name(), s->cf_name(), fmt::ptr(&t)));
@@ -1610,21 +1582,22 @@ void compaction_manager::add(compaction::table_state& t) {
}
future<> compaction_manager::remove(compaction::table_state& t) noexcept {
auto& c_state = get_compaction_state(&t);
auto handle = _compaction_state.extract(&t);
// We need to guarantee that a task being stopped will not retry to compact
// a table being removed.
// The requirement above is provided by stop_ongoing_compactions().
_postponed.erase(&t);
if (!handle.empty()) {
auto& c_state = handle.mapped();
// Wait for all compaction tasks running under gate to terminate
// and prevent new tasks from entering the gate.
co_await seastar::when_all_succeed(stop_ongoing_compactions("table removal", &t), c_state.gate.close()).discard_result();
// We need to guarantee that a task being stopped will not retry to compact
// a table being removed.
// The requirement above is provided by stop_ongoing_compactions().
_postponed.erase(&t);
c_state.backlog_tracker.disable();
_compaction_state.erase(&t);
// Wait for the termination of an ongoing compaction on table T, if any.
co_await stop_ongoing_compactions("table removal", &t);
// Wait for all functions running under gate to terminate.
co_await c_state.gate.close();
}
#ifdef DEBUG
auto found = false;
sstring msg;
@@ -1783,7 +1756,7 @@ void compaction_backlog_tracker::register_compacting_sstable(sstables::shared_ss
}
}
void compaction_backlog_tracker::copy_ongoing_charges(compaction_backlog_tracker& new_bt, bool move_read_charges) const {
void compaction_backlog_tracker::transfer_ongoing_charges(compaction_backlog_tracker& new_bt, bool move_read_charges) {
for (auto&& w : _ongoing_writes) {
new_bt.register_partially_written_sstable(w.first, *w.second);
}
@@ -1793,6 +1766,8 @@ void compaction_backlog_tracker::copy_ongoing_charges(compaction_backlog_tracker
new_bt.register_compacting_sstable(w.first, *w.second);
}
}
_ongoing_writes = {};
_ongoing_compactions = {};
}
void compaction_backlog_tracker::revert_charges(sstables::shared_sstable sst) {
@@ -1800,26 +1775,6 @@ void compaction_backlog_tracker::revert_charges(sstables::shared_sstable sst) {
_ongoing_compactions.erase(sst);
}
compaction_backlog_tracker::compaction_backlog_tracker(compaction_backlog_tracker&& other)
: _impl(std::move(other._impl))
, _ongoing_writes(std::move(other._ongoing_writes))
, _ongoing_compactions(std::move(other._ongoing_compactions))
, _manager(std::exchange(other._manager, nullptr)) {
}
compaction_backlog_tracker&
compaction_backlog_tracker::operator=(compaction_backlog_tracker&& x) noexcept {
if (this != &x) {
if (auto manager = std::exchange(_manager, x._manager)) {
manager->remove_backlog_tracker(this);
}
_impl = std::move(x._impl);
_ongoing_writes = std::move(x._ongoing_writes);
_ongoing_compactions = std::move(x._ongoing_compactions);
}
return *this;
}
compaction_backlog_tracker::~compaction_backlog_tracker() {
if (_manager) {
_manager->remove_backlog_tracker(this);
@@ -1857,14 +1812,3 @@ compaction_backlog_manager::~compaction_backlog_manager() {
tracker->_manager = nullptr;
}
}
void compaction_manager::register_backlog_tracker(compaction::table_state& t, compaction_backlog_tracker new_backlog_tracker) {
auto& cs = get_compaction_state(&t);
cs.backlog_tracker = std::move(new_backlog_tracker);
register_backlog_tracker(cs.backlog_tracker);
}
compaction_backlog_tracker& compaction_manager::get_backlog_tracker(compaction::table_state& t) {
auto& cs = get_compaction_state(&t);
return cs.backlog_tracker;
}

View File

@@ -32,7 +32,6 @@
#include "compaction.hh"
#include "compaction_weight_registration.hh"
#include "compaction_backlog_manager.hh"
#include "compaction/compaction_descriptor.hh"
#include "strategy_control.hh"
#include "backlog_controller.hh"
#include "seastarx.hh"
@@ -50,8 +49,6 @@ public:
boost::icl::interval_map<dht::token, gc_clock::time_point, boost::icl::partial_absorber, std::less, boost::icl::inplace_max> map;
};
using throw_if_stopping = bool_class<struct throw_if_stopping_tag>;
// Compaction manager provides facilities to submit and track compaction jobs on
// behalf of existing tables.
class compaction_manager {
@@ -86,10 +83,8 @@ private:
// Signaled whenever a compaction task completes.
condition_variable compaction_done;
compaction_backlog_tracker backlog_tracker;
explicit compaction_state(table_state& t);
compaction_state(compaction_state&&) = delete;
compaction_state() = default;
compaction_state(compaction_state&&) = default;
~compaction_state();
bool compaction_disabled() const noexcept {
@@ -140,20 +135,11 @@ public:
virtual ~task();
// called when a compaction replaces the exhausted sstables with the new set
struct on_replacement {
virtual ~on_replacement() {}
// called after the replacement completes
// @param sstables the old sstable which are replaced in this replacement
virtual void on_removal(const std::vector<sstables::shared_sstable>& sstables) = 0;
// called before the replacement happens
// @param sstables the new sstables to be added to the table's sstable set
virtual void on_addition(const std::vector<sstables::shared_sstable>& sstables) = 0;
};
protected:
virtual future<compaction_stats_opt> do_run() = 0;
using throw_if_stopping = bool_class<struct throw_if_stopping_tag>;
state switch_state(state new_state);
future<semaphore_units<named_semaphore_exception_factory>> acquire_semaphore(named_semaphore& sem, size_t units = 1);
@@ -170,10 +156,12 @@ public:
// otherwise, returns stop_iteration::no after sleep for exponential retry.
future<stop_iteration> maybe_retry(std::exception_ptr err, bool throw_on_abort = false);
future<sstables::compaction_result> compact_sstables_and_update_history(sstables::compaction_descriptor descriptor, sstables::compaction_data& cdata, on_replacement&,
// Compacts set of SSTables according to the descriptor.
using release_exhausted_func_t = std::function<void(const std::vector<sstables::shared_sstable>& exhausted_sstables)>;
future<sstables::compaction_result> compact_sstables_and_update_history(sstables::compaction_descriptor descriptor, sstables::compaction_data& cdata, release_exhausted_func_t release_exhausted,
can_purge_tombstones can_purge = can_purge_tombstones::yes);
future<sstables::compaction_result> compact_sstables(sstables::compaction_descriptor descriptor, sstables::compaction_data& cdata, release_exhausted_func_t release_exhausted,
can_purge_tombstones can_purge = can_purge_tombstones::yes);
future<sstables::compaction_result> compact_sstables(sstables::compaction_descriptor descriptor, sstables::compaction_data& cdata, on_replacement&,
can_purge_tombstones can_purge = can_purge_tombstones::yes, sstables::offstrategy offstrategy = sstables::offstrategy::no);
future<> update_history(compaction::table_state& t, const sstables::compaction_result& res, const sstables::compaction_data& cdata);
bool should_update_history(sstables::compaction_type ct) {
return ct == sstables::compaction_type::Compaction;
@@ -306,10 +294,10 @@ private:
std::function<void()> compaction_submission_callback();
// all registered tables are reevaluated at a constant interval.
// Submission is a NO-OP when there's nothing to do, so it's fine to call it regularly.
timer<lowres_clock> _compaction_submission_timer = timer<lowres_clock>(compaction_submission_callback());
static constexpr std::chrono::seconds periodic_compaction_submission_interval() { return std::chrono::seconds(3600); }
config _cfg;
timer<lowres_clock> _compaction_submission_timer;
compaction_controller _compaction_controller;
compaction_backlog_manager _backlog_manager;
optimized_optional<abort_source::subscription> _early_abort_subscription;
@@ -325,7 +313,7 @@ private:
per_table_history_maps _repair_history_maps;
tombstone_gc_state _tombstone_gc_state;
private:
future<compaction_stats_opt> perform_task(shared_ptr<task>, throw_if_stopping do_throw_if_stopping = throw_if_stopping::no);
future<compaction_stats_opt> perform_task(shared_ptr<task>);
future<> stop_tasks(std::vector<shared_ptr<task>> tasks, sstring reason);
future<> update_throughput(uint32_t value_mbs);
@@ -360,7 +348,7 @@ private:
// table still exists and compaction is not disabled for the table.
inline bool can_proceed(compaction::table_state* t) const;
future<> postponed_compactions_reevaluation();
void postponed_compactions_reevaluation();
void reevaluate_postponed_compactions() noexcept;
// Postpone compaction for a table that couldn't be executed due to ongoing
// similar-sized compaction.
@@ -470,7 +458,7 @@ public:
// parameter type is the compaction type the operation can most closely be
// associated with, use compaction_type::Compaction, if none apply.
// parameter job is a function that will carry the operation
future<> run_custom_job(compaction::table_state& s, sstables::compaction_type type, const char *desc, noncopyable_function<future<>(sstables::compaction_data&)> job, throw_if_stopping do_throw_if_stopping);
future<> run_custom_job(compaction::table_state& s, sstables::compaction_type type, const char *desc, noncopyable_function<future<>(sstables::compaction_data&)> job);
class compaction_reenabler {
compaction_manager& _cm;
@@ -536,9 +524,6 @@ public:
void register_backlog_tracker(compaction_backlog_tracker& backlog_tracker) {
_backlog_manager.register_backlog_tracker(backlog_tracker);
}
void register_backlog_tracker(compaction::table_state& t, compaction_backlog_tracker new_backlog_tracker);
compaction_backlog_tracker& get_backlog_tracker(compaction::table_state& t);
static sstables::compaction_data create_compaction_data();

View File

@@ -65,7 +65,7 @@ bool compaction_strategy_impl::worth_dropping_tombstones(const shared_sstable& s
return sst->estimate_droppable_tombstone_ratio(gc_before) >= _tombstone_threshold;
}
uint64_t compaction_strategy_impl::adjust_partition_estimate(const mutation_source_metadata& ms_meta, uint64_t partition_estimate, schema_ptr schema) {
uint64_t compaction_strategy_impl::adjust_partition_estimate(const mutation_source_metadata& ms_meta, uint64_t partition_estimate) {
return partition_estimate;
}
@@ -409,9 +409,7 @@ public:
l0_old_ssts.push_back(std::move(sst));
}
}
if (l0_old_ssts.size() || l0_new_ssts.size()) {
_l0_scts.replace_sstables(std::move(l0_old_ssts), std::move(l0_new_ssts));
}
_l0_scts.replace_sstables(std::move(l0_old_ssts), std::move(l0_new_ssts));
}
};
@@ -429,6 +427,14 @@ struct null_backlog_tracker final : public compaction_backlog_tracker::impl {
virtual void replace_sstables(std::vector<sstables::shared_sstable> old_ssts, std::vector<sstables::shared_sstable> new_ssts) override {}
};
// Just so that if we have more than one CF with NullStrategy, we don't create a lot
// of objects to iterate over for no reason
// Still thread local because of make_unique. But this will disappear soon
static thread_local compaction_backlog_tracker null_backlog_tracker(std::make_unique<null_backlog_tracker>());
compaction_backlog_tracker& get_null_backlog_tracker() {
return null_backlog_tracker;
}
//
// Null compaction strategy is the default compaction strategy.
// As the name implies, it does nothing.
@@ -447,8 +453,8 @@ public:
return compaction_strategy_type::null;
}
virtual std::unique_ptr<compaction_backlog_tracker::impl> make_backlog_tracker() override {
return std::make_unique<null_backlog_tracker>();
virtual compaction_backlog_tracker& get_backlog_tracker() override {
return get_null_backlog_tracker();
}
};
@@ -456,14 +462,11 @@ leveled_compaction_strategy::leveled_compaction_strategy(const std::map<sstring,
: compaction_strategy_impl(options)
, _max_sstable_size_in_mb(calculate_max_sstable_size_in_mb(compaction_strategy_impl::get_value(options, SSTABLE_SIZE_OPTION)))
, _stcs_options(options)
, _backlog_tracker(std::make_unique<leveled_compaction_backlog_tracker>(_max_sstable_size_in_mb, _stcs_options))
{
_compaction_counter.resize(leveled_manifest::MAX_LEVELS);
}
std::unique_ptr<compaction_backlog_tracker::impl> leveled_compaction_strategy::make_backlog_tracker() {
return std::make_unique<leveled_compaction_backlog_tracker>(_max_sstable_size_in_mb, _stcs_options);
}
int32_t
leveled_compaction_strategy::calculate_max_sstable_size_in_mb(std::optional<sstring> option_value) const {
using namespace cql3::statements;
@@ -483,6 +486,7 @@ time_window_compaction_strategy::time_window_compaction_strategy(const std::map<
: compaction_strategy_impl(options)
, _options(options)
, _stcs_options(options)
, _backlog_tracker(std::make_unique<time_window_backlog_tracker>(_options, _stcs_options))
{
if (!options.contains(TOMBSTONE_COMPACTION_INTERVAL_OPTION) && !options.contains(TOMBSTONE_THRESHOLD_OPTION)) {
_disable_tombstone_compaction = true;
@@ -493,10 +497,6 @@ time_window_compaction_strategy::time_window_compaction_strategy(const std::map<
_use_clustering_key_filter = true;
}
std::unique_ptr<compaction_backlog_tracker::impl> time_window_compaction_strategy::make_backlog_tracker() {
return std::make_unique<time_window_backlog_tracker>(_options, _stcs_options);
}
} // namespace sstables
std::vector<sstables::shared_sstable>
@@ -640,6 +640,7 @@ namespace sstables {
date_tiered_compaction_strategy::date_tiered_compaction_strategy(const std::map<sstring, sstring>& options)
: compaction_strategy_impl(options)
, _manifest(options)
, _backlog_tracker(std::make_unique<unimplemented_backlog_tracker>())
{
clogger.warn("DateTieredCompactionStrategy is deprecated. Usually cases for which it is used are better handled by TimeWindowCompactionStrategy."
" Please change your compaction strategy to TWCS as DTCS will be retired in the near future");
@@ -684,23 +685,17 @@ compaction_descriptor date_tiered_compaction_strategy::get_sstables_for_compacti
return sstables::compaction_descriptor({ *it }, service::get_local_compaction_priority());
}
std::unique_ptr<compaction_backlog_tracker::impl> date_tiered_compaction_strategy::make_backlog_tracker() {
return std::make_unique<unimplemented_backlog_tracker>();
}
size_tiered_compaction_strategy::size_tiered_compaction_strategy(const std::map<sstring, sstring>& options)
: compaction_strategy_impl(options)
, _options(options)
, _backlog_tracker(std::make_unique<size_tiered_backlog_tracker>(_options))
{}
size_tiered_compaction_strategy::size_tiered_compaction_strategy(const size_tiered_compaction_strategy_options& options)
: _options(options)
, _backlog_tracker(std::make_unique<size_tiered_backlog_tracker>(_options))
{}
std::unique_ptr<compaction_backlog_tracker::impl> size_tiered_compaction_strategy::make_backlog_tracker() {
return std::make_unique<size_tiered_backlog_tracker>(_options);
}
compaction_strategy::compaction_strategy(::shared_ptr<compaction_strategy_impl> impl)
: _compaction_strategy_impl(std::move(impl)) {}
compaction_strategy::compaction_strategy() = default;
@@ -741,8 +736,8 @@ bool compaction_strategy::use_clustering_key_filter() const {
return _compaction_strategy_impl->use_clustering_key_filter();
}
compaction_backlog_tracker compaction_strategy::make_backlog_tracker() {
return compaction_backlog_tracker(_compaction_strategy_impl->make_backlog_tracker());
compaction_backlog_tracker& compaction_strategy::get_backlog_tracker() {
return _compaction_strategy_impl->get_backlog_tracker();
}
sstables::compaction_descriptor
@@ -750,8 +745,8 @@ compaction_strategy::get_reshaping_job(std::vector<shared_sstable> input, schema
return _compaction_strategy_impl->get_reshaping_job(std::move(input), schema, iop, mode);
}
uint64_t compaction_strategy::adjust_partition_estimate(const mutation_source_metadata& ms_meta, uint64_t partition_estimate, schema_ptr schema) {
return _compaction_strategy_impl->adjust_partition_estimate(ms_meta, partition_estimate, std::move(schema));
uint64_t compaction_strategy::adjust_partition_estimate(const mutation_source_metadata& ms_meta, uint64_t partition_estimate) {
return _compaction_strategy_impl->adjust_partition_estimate(ms_meta, partition_estimate);
}
reader_consumer_v2 compaction_strategy::make_interposer_consumer(const mutation_source_metadata& ms_meta, reader_consumer_v2 end_consumer) {

View File

@@ -106,9 +106,9 @@ public:
sstable_set make_sstable_set(schema_ptr schema) const;
compaction_backlog_tracker make_backlog_tracker();
compaction_backlog_tracker& get_backlog_tracker();
uint64_t adjust_partition_estimate(const mutation_source_metadata& ms_meta, uint64_t partition_estimate, schema_ptr);
uint64_t adjust_partition_estimate(const mutation_source_metadata& ms_meta, uint64_t partition_estimate);
reader_consumer_v2 make_interposer_consumer(const mutation_source_metadata& ms_meta, reader_consumer_v2 end_consumer);

View File

@@ -22,6 +22,8 @@ class strategy_control;
namespace sstables {
compaction_backlog_tracker& get_unimplemented_backlog_tracker();
class sstable_set_impl;
class resharding_descriptor;
@@ -68,9 +70,9 @@ public:
// droppable tombstone histogram and gc_before.
bool worth_dropping_tombstones(const shared_sstable& sst, gc_clock::time_point compaction_time, const tombstone_gc_state& gc_state);
virtual std::unique_ptr<compaction_backlog_tracker::impl> make_backlog_tracker() = 0;
virtual compaction_backlog_tracker& get_backlog_tracker() = 0;
virtual uint64_t adjust_partition_estimate(const mutation_source_metadata& ms_meta, uint64_t partition_estimate, schema_ptr schema);
virtual uint64_t adjust_partition_estimate(const mutation_source_metadata& ms_meta, uint64_t partition_estimate);
virtual reader_consumer_v2 make_interposer_consumer(const mutation_source_metadata& ms_meta, reader_consumer_v2 end_consumer);

View File

@@ -259,6 +259,7 @@ namespace sstables {
class date_tiered_compaction_strategy : public compaction_strategy_impl {
date_tiered_manifest _manifest;
compaction_backlog_tracker _backlog_tracker;
public:
date_tiered_compaction_strategy(const std::map<sstring, sstring>& options);
virtual compaction_descriptor get_sstables_for_compaction(table_state& table_s, strategy_control& control, std::vector<sstables::shared_sstable> candidates) override;
@@ -271,7 +272,9 @@ public:
return compaction_strategy_type::date_tiered;
}
virtual std::unique_ptr<compaction_backlog_tracker::impl> make_backlog_tracker() override;
virtual compaction_backlog_tracker& get_backlog_tracker() override {
return _backlog_tracker;
}
};
}

View File

@@ -144,8 +144,6 @@ leveled_compaction_strategy::get_reshaping_job(std::vector<shared_sstable> input
auto max_sstable_size_in_bytes = _max_sstable_size_in_mb * 1024 * 1024;
leveled_manifest::logger.debug("get_reshaping_job: mode={} input.size={} max_sstable_size_in_bytes={}", mode == reshape_mode::relaxed ? "relaxed" : "strict", input.size(), max_sstable_size_in_bytes);
for (auto& sst : input) {
auto sst_level = sst->get_sstable_level();
if (sst_level > leveled_manifest::MAX_LEVELS - 1) {
@@ -202,8 +200,10 @@ leveled_compaction_strategy::get_reshaping_job(std::vector<shared_sstable> input
auto [disjoint, overlapping_sstables] = is_disjoint(level_info[level], tolerance(level));
if (!disjoint) {
leveled_manifest::logger.warn("Turns out that level {} is not disjoint, found {} overlapping SSTables, so the level will be entirely compacted on behalf of {}.{}", level, overlapping_sstables, schema->ks_name(), schema->cf_name());
compaction_descriptor desc(std::move(level_info[level]), iop, level, max_sstable_size_in_bytes);
auto ideal_level = ideal_level_for_input(input, max_sstable_size_in_bytes);
leveled_manifest::logger.warn("Turns out that level {} is not disjoint, found {} overlapping SSTables, so compacting everything on behalf of {}.{}", level, overlapping_sstables, schema->ks_name(), schema->cf_name());
// Unfortunately no good limit to limit input size to max_sstables for LCS major
compaction_descriptor desc(std::move(input), iop, ideal_level, max_sstable_size_in_bytes);
desc.options = compaction_type_options::make_reshape();
return desc;
}
@@ -229,9 +229,6 @@ leveled_compaction_strategy::get_cleanup_compaction_jobs(table_state& table_s, s
}
unsigned leveled_compaction_strategy::ideal_level_for_input(const std::vector<sstables::shared_sstable>& input, uint64_t max_sstable_size) {
if (!max_sstable_size) {
return 1;
}
auto log_fanout = [fanout = leveled_manifest::leveled_fan_out] (double x) {
double inv_log_fanout = 1.0f / std::log(fanout);
return log(x) * inv_log_fanout;

View File

@@ -35,6 +35,7 @@ class leveled_compaction_strategy : public compaction_strategy_impl {
std::optional<std::vector<std::optional<dht::decorated_key>>> _last_compacted_keys;
std::vector<int> _compaction_counter;
size_tiered_compaction_strategy_options _stcs_options;
compaction_backlog_tracker _backlog_tracker;
int32_t calculate_max_sstable_size_in_mb(std::optional<sstring> option_value) const;
public:
static unsigned ideal_level_for_input(const std::vector<sstables::shared_sstable>& input, uint64_t max_sstable_size);
@@ -63,7 +64,9 @@ public:
}
virtual std::unique_ptr<sstable_set_impl> make_sstable_set(schema_ptr schema) const override;
virtual std::unique_ptr<compaction_backlog_tracker::impl> make_backlog_tracker() override;
virtual compaction_backlog_tracker& get_backlog_tracker() override {
return _backlog_tracker;
}
virtual compaction_descriptor get_reshaping_job(std::vector<shared_sstable> input, schema_ptr schema, const ::io_priority_class& iop, reshape_mode mode) override;
};

View File

@@ -6,7 +6,6 @@
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
#include "sstables/sstables.hh"
#include "size_tiered_compaction_strategy.hh"
#include <boost/range/adaptor/transformed.hpp>

View File

@@ -10,7 +10,7 @@
#include "compaction_strategy_impl.hh"
#include "compaction.hh"
#include "sstables/shared_sstable.hh"
#include "sstables/sstables.hh"
#include <boost/algorithm/cxx11/any_of.hpp>
class size_tiered_backlog_tracker;
@@ -82,6 +82,7 @@ public:
class size_tiered_compaction_strategy : public compaction_strategy_impl {
size_tiered_compaction_strategy_options _options;
compaction_backlog_tracker _backlog_tracker;
// Return a list of pair of shared_sstable and its respective size.
static std::vector<std::pair<sstables::shared_sstable, uint64_t>> create_sstable_and_length_pairs(const std::vector<sstables::shared_sstable>& sstables);
@@ -127,7 +128,9 @@ public:
most_interesting_bucket(const std::vector<sstables::shared_sstable>& candidates, int min_threshold, int max_threshold,
size_tiered_compaction_strategy_options options = {});
virtual std::unique_ptr<compaction_backlog_tracker::impl> make_backlog_tracker() override;
virtual compaction_backlog_tracker& get_backlog_tracker() override {
return _backlog_tracker;
}
virtual compaction_descriptor get_reshaping_job(std::vector<shared_sstable> input, schema_ptr schema, const ::io_priority_class& iop, reshape_mode mode) override;

View File

@@ -10,15 +10,14 @@
#pragma once
#include "schema_fwd.hh"
#include "sstables/sstable_set.hh"
#include "sstables/sstables_manager.hh"
#include "compaction_descriptor.hh"
class reader_permit;
class compaction_backlog_tracker;
namespace sstables {
class sstable_set;
class compaction_strategy;
class sstables_manager;
struct sstable_writer_config;
}
@@ -44,7 +43,6 @@ public:
virtual future<> on_compaction_completion(sstables::compaction_completion_desc desc, sstables::offstrategy offstrategy) = 0;
virtual bool is_auto_compaction_disabled_by_user() const noexcept = 0;
virtual const tombstone_gc_state& get_tombstone_gc_state() const noexcept = 0;
virtual compaction_backlog_tracker& get_backlog_tracker() = 0;
};
}

View File

@@ -100,27 +100,16 @@ public:
};
};
uint64_t time_window_compaction_strategy::adjust_partition_estimate(const mutation_source_metadata& ms_meta, uint64_t partition_estimate, schema_ptr s) {
// If not enough information, we assume the worst
auto estimated_window_count = max_data_segregation_window_count;
auto default_ttl = std::chrono::duration_cast<std::chrono::microseconds>(s->default_time_to_live());
bool min_and_max_ts_available = ms_meta.min_timestamp && ms_meta.max_timestamp;
auto estimate_window_count = [this] (timestamp_type min_window, timestamp_type max_window) {
const auto window_size = get_window_size(_options);
return (max_window + (window_size - 1) - min_window) / window_size;
};
if (!min_and_max_ts_available && default_ttl.count()) {
auto min_window = get_window_for(_options, timestamp_type(0));
auto max_window = get_window_for(_options, timestamp_type(default_ttl.count()));
estimated_window_count = estimate_window_count(min_window, max_window);
} else if (min_and_max_ts_available) {
auto min_window = get_window_for(_options, *ms_meta.min_timestamp);
auto max_window = get_window_for(_options, *ms_meta.max_timestamp);
estimated_window_count = estimate_window_count(min_window, max_window);
uint64_t time_window_compaction_strategy::adjust_partition_estimate(const mutation_source_metadata& ms_meta, uint64_t partition_estimate) {
if (!ms_meta.min_timestamp || !ms_meta.max_timestamp) {
// Not enough information, we assume the worst
return partition_estimate / max_data_segregation_window_count;
}
const auto min_window = get_window_for(_options, *ms_meta.min_timestamp);
const auto max_window = get_window_for(_options, *ms_meta.max_timestamp);
const auto window_size = get_window_size(_options);
auto estimated_window_count = (max_window + (window_size - 1) - min_window) / window_size;
return partition_estimate / std::max(1UL, uint64_t(estimated_window_count));
}

View File

@@ -15,7 +15,7 @@
#include "size_tiered_compaction_strategy.hh"
#include "timestamp.hh"
#include "exceptions/exceptions.hh"
#include "sstables/shared_sstable.hh"
#include "sstables/sstables.hh"
#include "service/priority_manager.hh"
namespace sstables {
@@ -73,6 +73,7 @@ class time_window_compaction_strategy : public compaction_strategy_impl {
// Keep track of all recent active windows that still need to be compacted into a single SSTable
std::unordered_set<timestamp_type> _recent_active_windows;
size_tiered_compaction_strategy_options _stcs_options;
compaction_backlog_tracker _backlog_tracker;
public:
// The maximum amount of buckets we segregate data into when writing into sstables.
// To prevent an explosion in the number of sstables we cap it.
@@ -155,9 +156,11 @@ public:
virtual std::unique_ptr<sstable_set_impl> make_sstable_set(schema_ptr schema) const override;
virtual std::unique_ptr<compaction_backlog_tracker::impl> make_backlog_tracker() override;
virtual compaction_backlog_tracker& get_backlog_tracker() override {
return _backlog_tracker;
}
virtual uint64_t adjust_partition_estimate(const mutation_source_metadata& ms_meta, uint64_t partition_estimate, schema_ptr s) override;
virtual uint64_t adjust_partition_estimate(const mutation_source_metadata& ms_meta, uint64_t partition_estimate) override;
virtual reader_consumer_v2 make_interposer_consumer(const mutation_source_metadata& ms_meta, reader_consumer_v2 end_consumer) override;

View File

@@ -16,6 +16,7 @@
#include <boost/range/adaptor/transformed.hpp>
#include "utils/serialization.hh"
#include <seastar/util/backtrace.hh>
#include "cql_serialization_format.hh"
enum class allow_prefixes { no, yes };
@@ -279,7 +280,7 @@ public:
}
for (size_t i = 0; i != values.size(); ++i) {
//FIXME: is it safe to assume internal serialization-format format?
_types[i]->validate(values[i]);
_types[i]->validate(values[i], cql_serialization_format::internal());
}
}
bool equal(managed_bytes_view v1, managed_bytes_view v2) const {

View File

@@ -560,7 +560,7 @@ public:
auto marker = it->second;
++it;
if (it != e && marker != composite::eoc::none) {
throw runtime_exception(format("non-zero component divider found ({:#02x}) mid", composite::eoc_type(marker) & 0xff));
throw runtime_exception(format("non-zero component divider found ({:d}) mid", format("0x{:02x}", composite::eoc_type(marker) & 0xff)));
}
}
return ret;

View File

@@ -117,8 +117,6 @@ struct date_type_impl final : public concrete_type<db_clock::time_point> {
using timestamp_date_base_class = concrete_type<db_clock::time_point>;
sstring timestamp_to_json_string(const timestamp_date_base_class& t, const bytes_view& bv);
struct timeuuid_type_impl final : public concrete_type<utils::UUID> {
timeuuid_type_impl();
static utils::UUID from_sstring(sstring_view s);

View File

@@ -65,13 +65,6 @@ commitlog_sync_period_in_ms: 10000
# is reasonable.
commitlog_segment_size_in_mb: 32
# The size of the individual schema commitlog file segments.
# The segment size puts a limit on the mutation size that can be
# written at once, and some schema mutation writes are much larger
# than average.
schema_commitlog_segment_size_in_mb: 32
# seed_provider class_name is saved for future use.
# A seed address is mandatory.
seed_provider:
@@ -455,20 +448,20 @@ commitlog_total_space_in_mb: -1
# internode_encryption: none
# certificate: conf/scylla.crt
# keyfile: conf/scylla.key
# truststore: <not set, use system trust>
# certficate_revocation_list: <not set>
# truststore: <none, use system trust>
# certficate_revocation_list: <none>
# require_client_auth: False
# priority_string: <not set, use default>
# priority_string: <none, use default>
# enable or disable client/server encryption.
# client_encryption_options:
# enabled: false
# certificate: conf/scylla.crt
# keyfile: conf/scylla.key
# truststore: <not set, use system trust>
# certficate_revocation_list: <not set>
# truststore: <none, use system trust>
# certficate_revocation_list: <none>
# require_client_auth: False
# priority_string: <not set, use default>
# priority_string: <none, use default>
# internode_compression controls whether traffic between nodes is
# compressed.
@@ -560,16 +553,4 @@ murmur3_partitioner_ignore_msb_bits: 12
# WARNING: It's unsafe to set this to false if the node previously booted
# with the schema commit log enabled. In such case, some schema changes
# may be lost if the node was not cleanly stopped.
force_schema_commit_log: true
# Use Raft to consistently manage schema information in the cluster.
# Refer to https://docs.scylladb.com/master/architecture/raft.html for more details.
# The 'Handling Failures' section is especially important.
#
# Once enabled in a cluster, this cannot be turned off.
# If you want to bootstrap a new cluster without Raft, make sure to set this to `false`
# before starting your nodes for the first time.
#
# A cluster not using Raft can be 'upgraded' to use Raft. Refer to the aforementioned
# documentation, section 'Enabling Raft in ScyllaDB 5.2 and further', for the procedure.
consistent_cluster_management: true
force_schema_commit_log: true

View File

@@ -44,12 +44,16 @@ distro_extra_cflags = ''
distro_extra_ldflags = ''
distro_extra_cmake_args = []
employ_ld_trickery = True
has_wasmtime = False
use_wasmtime_as_library = False
# distro-specific setup
def distro_setup_nix():
global os_ids, employ_ld_trickery
global os_ids, employ_ld_trickery, has_wasmtime, use_wasmtime_as_library
os_ids = ['linux']
employ_ld_trickery = False
has_wasmtime = True
use_wasmtime_as_library = True
if os.environ.get('NIX_CC'):
distro_setup_nix()
@@ -196,7 +200,7 @@ def linker_flags(compiler):
def maybe_static(flag, libs):
if flag:
if flag and not args.static:
libs = '-Wl,-Bstatic {} -Wl,-Bdynamic'.format(libs)
return libs
@@ -285,8 +289,7 @@ modes = {
'cxxflags': '-DDEBUG -DSANITIZE -DDEBUG_LSA_SANITIZER -DSCYLLA_ENABLE_ERROR_INJECTION',
'cxx_ld_flags': '',
'stack-usage-threshold': 1024*40,
# -fasan -Og breaks some coroutines on aarch64, use -O0 instead
'optimization-level': ('0' if platform.machine() == 'aarch64' else 'g'),
'optimization-level': 'g',
'per_src_extra_cxxflags': {},
'cmake_build_type': 'Debug',
'can_have_debug_info': True,
@@ -409,7 +412,6 @@ scylla_tests = set([
'test/boost/limiting_data_source_test',
'test/boost/linearizing_input_stream_test',
'test/boost/loading_cache_test',
'test/boost/locator_topology_test',
'test/boost/log_heap_test',
'test/boost/estimated_histogram_test',
'test/boost/summary_test',
@@ -480,8 +482,6 @@ scylla_tests = set([
'test/boost/virtual_reader_test',
'test/boost/virtual_table_mutation_source_test',
'test/boost/virtual_table_test',
'test/boost/wasm_test',
'test/boost/wasm_alloc_test',
'test/boost/bptree_test',
'test/boost/btree_test',
'test/boost/radix_tree_test',
@@ -573,6 +573,13 @@ all_artifacts = apps | tests | other
arg_parser = argparse.ArgumentParser('Configure scylla')
arg_parser.add_argument('--out', dest='buildfile', action='store', default='build.ninja',
help='Output build-file name (by default build.ninja)')
arg_parser.add_argument('--static', dest='static', action='store_const', default='',
const='-static',
help='Static link (useful for running on hosts outside the build environment')
arg_parser.add_argument('--pie', dest='pie', action='store_true',
help='Build position-independent executable (PIE)')
arg_parser.add_argument('--so', dest='so', action='store_true',
help='Build shared object (SO) instead of executable')
arg_parser.add_argument('--mode', action='append', choices=list(modes.keys()), dest='selected_modes',
help="Build modes to generate ninja files for. The available build modes are:\n{}".format("; ".join(["{} - {}".format(m, cfg['description']) for m, cfg in modes.items()])))
arg_parser.add_argument('--with', dest='artifacts', action='append', default=[],
@@ -663,7 +670,7 @@ scylla_core = (['message/messaging_service.cc',
'replica/distributed_loader.cc',
'replica/memtable.cc',
'replica/exceptions.cc',
'replica/dirty_memory_manager.cc',
'dirty_memory_manager.cc',
'absl-flat_hash_map.cc',
'atomic_cell.cc',
'caching_options.cc',
@@ -698,7 +705,6 @@ scylla_core = (['message/messaging_service.cc',
'mutation_partition.cc',
'mutation_partition_view.cc',
'mutation_partition_serializer.cc',
'utils/on_internal_error.cc',
'converting_mutation_partition_applier.cc',
'readers/combined.cc',
'readers/multishard.cc',
@@ -818,7 +824,6 @@ scylla_core = (['message/messaging_service.cc',
'cql3/statements/detach_service_level_statement.cc',
'cql3/statements/list_service_level_statement.cc',
'cql3/statements/list_service_level_attachments_statement.cc',
'cql3/statements/describe_statement.cc',
'cql3/update_parameters.cc',
'cql3/util.cc',
'cql3/ut_name.cc',
@@ -904,7 +909,6 @@ scylla_core = (['message/messaging_service.cc',
'utils/config_file.cc',
'utils/multiprecision_int.cc',
'utils/gz/crc_combine.cc',
'utils/gz/crc_combine_table.cc',
'gms/version_generator.cc',
'gms/versioned_value.cc',
'gms/gossiper.cc',
@@ -939,8 +943,6 @@ scylla_core = (['message/messaging_service.cc',
'locator/ec2_snitch.cc',
'locator/ec2_multi_region_snitch.cc',
'locator/gce_snitch.cc',
'locator/topology.cc',
'locator/util.cc',
'service/client_state.cc',
'service/storage_service.cc',
'service/misc_services.cc',
@@ -970,7 +972,6 @@ scylla_core = (['message/messaging_service.cc',
'utils/lister.cc',
'repair/repair.cc',
'repair/row_level.cc',
'repair/table_check.cc',
'exceptions/exceptions.cc',
'auth/allow_all_authenticator.cc',
'auth/allow_all_authorizer.cc',
@@ -1032,7 +1033,6 @@ scylla_core = (['message/messaging_service.cc',
'service/raft/raft_group0_client.cc',
'service/broadcast_tables/experimental/lang.cc',
'tasks/task_manager.cc',
'rust/wasmtime_bindings/src/lib.rs',
] + [Antlr3Grammar('cql3/Cql.g')] + [Thrift('interface/cassandra.thrift', 'Cassandra')] \
+ scylla_raft_core
)
@@ -1079,8 +1079,6 @@ api = ['api/api.cc',
Json2Code('api/api-doc/error_injection.json'),
'api/authorization_cache.cc',
Json2Code('api/api-doc/authorization_cache.json'),
'api/raft.cc',
Json2Code('api/api-doc/raft.json'),
]
alternator = [
@@ -1152,6 +1150,10 @@ idls = ['idl/gossip_digest.idl.hh',
'idl/experimental/broadcast_tables_lang.idl.hh',
]
rusts = [
'rust/inc/src/lib.rs',
]
headers = find_headers('.', excluded_dirs=['idl', 'build', 'seastar', '.git'])
scylla_tests_generic_dependencies = [
@@ -1175,7 +1177,7 @@ scylla_tests_dependencies = scylla_core + idls + scylla_tests_generic_dependenci
scylla_raft_dependencies = scylla_raft_core + ['utils/uuid.cc', 'utils/error_injection.cc']
scylla_tools = ['tools/scylla-types.cc', 'tools/scylla-sstable.cc', 'tools/schema_loader.cc', 'tools/utils.cc', 'tools/lua_sstable_consumer.cc']
scylla_tools = ['tools/scylla-types.cc', 'tools/scylla-sstable.cc', 'tools/schema_loader.cc', 'tools/utils.cc']
deps = {
'scylla': idls + ['main.cc'] + scylla_core + api + alternator + redis + scylla_tools,
@@ -1273,7 +1275,7 @@ deps['test/boost/bytes_ostream_test'] = [
"test/lib/log.cc",
]
deps['test/boost/input_stream_test'] = ['test/boost/input_stream_test.cc']
deps['test/boost/UUID_test'] = ['utils/UUID_gen.cc', 'test/boost/UUID_test.cc', 'utils/uuid.cc', 'utils/dynamic_bitset.cc', 'hashers.cc', 'utils/on_internal_error.cc']
deps['test/boost/UUID_test'] = ['utils/UUID_gen.cc', 'test/boost/UUID_test.cc', 'utils/uuid.cc', 'utils/dynamic_bitset.cc', 'hashers.cc']
deps['test/boost/murmur_hash_test'] = ['bytes.cc', 'utils/murmur_hash.cc', 'test/boost/murmur_hash_test.cc']
deps['test/boost/allocation_strategy_test'] = ['test/boost/allocation_strategy_test.cc', 'utils/logalloc.cc', 'utils/dynamic_bitset.cc']
deps['test/boost/log_heap_test'] = ['test/boost/log_heap_test.cc']
@@ -1304,7 +1306,7 @@ deps['test/boost/exceptions_fallback_test'] = ['test/boost/exceptions_fallback_t
deps['test/boost/duration_test'] += ['test/lib/exception_utils.cc']
deps['test/boost/schema_loader_test'] += ['tools/schema_loader.cc']
deps['test/boost/rust_test'] += ['rust/inc/src/lib.rs']
deps['test/boost/rust_test'] += rusts
deps['test/raft/replication_test'] = ['test/raft/replication_test.cc', 'test/raft/replication.cc', 'test/raft/helpers.cc'] + scylla_raft_dependencies
deps['test/raft/raft_server_test'] = ['test/raft/raft_server_test.cc', 'test/raft/replication.cc', 'test/raft/helpers.cc'] + scylla_raft_dependencies
@@ -1321,6 +1323,8 @@ deps['test/raft/discovery_test'] = ['test/raft/discovery_test.cc',
'test/lib/log.cc',
'service/raft/discovery.cc'] + scylla_raft_dependencies
deps['utils/gz/gen_crc_combine_table'] = ['utils/gz/gen_crc_combine_table.cc']
warnings = [
'-Wall',
@@ -1370,7 +1374,7 @@ warnings = [w
warnings = ' '.join(warnings + ['-Wno-error=deprecated-declarations'])
def get_clang_inline_threshold():
def clang_inline_threshold():
if args.clang_inline_threshold != -1:
return args.clang_inline_threshold
elif platform.machine() == 'aarch64':
@@ -1391,7 +1395,7 @@ for mode in modes:
optimization_flags = [
'--param inline-unit-growth=300', # gcc
f'-mllvm -inline-threshold={get_clang_inline_threshold()}', # clang
f'-mllvm -inline-threshold={clang_inline_threshold()}', # clang
# clang generates 16-byte loads that break store-to-load forwarding
# gcc also has some trouble: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=103554
'-fno-slp-vectorize',
@@ -1405,6 +1409,19 @@ if flag_supported(flag='-Wstack-usage=4096', compiler=args.cxx):
for mode in modes:
modes[mode]['cxxflags'] += f' -Wstack-usage={modes[mode]["stack-usage-threshold"]} -Wno-error=stack-usage='
if not has_wasmtime:
has_wasmtime = os.path.isfile('/usr/lib64/libwasmtime.a') and os.path.isdir('/usr/local/include/wasmtime')
if has_wasmtime:
if platform.machine() == 'aarch64':
print("wasmtime is temporarily not supported on aarch64. Ref: issue #9387")
has_wasmtime = False
else:
for mode in modes:
modes[mode]['cxxflags'] += ' -DSCYLLA_ENABLE_WASMTIME'
else:
print("wasmtime not found - WASM support will not be enabled in this build")
linker_flags = linker_flags(compiler=args.cxx)
dbgflag = '-g -gz' if args.debuginfo else ''
@@ -1415,6 +1432,16 @@ perf_tests_link_rule = 'link' if args.perf_tests_debuginfo else 'link_stripped'
# debug info from the libraries we static link with
regular_link_rule = 'link' if args.debuginfo else 'link_stripped'
if args.so:
args.pie = '-shared'
args.fpie = '-fpic'
elif args.pie:
args.pie = '-pie'
args.fpie = '-fpie'
else:
args.pie = ''
args.fpie = ''
# a list element means a list of alternative packages to consider
# the first element becomes the HAVE_pkg define
# a string element is a package name with no alternatives
@@ -1571,14 +1598,13 @@ args.user_ldflags = forced_ldflags + ' ' + args.user_ldflags
args.user_cflags += f" -ffile-prefix-map={curdir}=."
if args.target != '':
args.user_cflags += ' -march=' + args.target
seastar_cflags = args.user_cflags
for mode in modes:
# Those flags are passed not only to Scylla objects, but also to libraries
# that we compile ourselves.
modes[mode]['lib_cflags'] = args.user_cflags
modes[mode]['lib_ldflags'] = args.user_ldflags + linker_flags
if args.target != '':
seastar_cflags += ' -march=' + args.target
seastar_ldflags = args.user_ldflags
libdeflate_cflags = seastar_cflags
# cmake likes to separate things with semicolons
def semicolon_separated(*flags):
@@ -1598,8 +1624,8 @@ def configure_seastar(build_dir, mode, mode_config):
'-DCMAKE_C_COMPILER={}'.format(args.cc),
'-DCMAKE_CXX_COMPILER={}'.format(args.cxx),
'-DCMAKE_EXPORT_NO_PACKAGE_REGISTRY=ON',
'-DSeastar_CXX_FLAGS=SHELL:{}'.format(mode_config['lib_cflags']),
'-DSeastar_LD_FLAGS={}'.format(semicolon_separated(mode_config['lib_ldflags'], mode_config['cxx_ld_flags'])),
'-DSeastar_CXX_FLAGS={}'.format((seastar_cflags).replace(' ', ';')),
'-DSeastar_LD_FLAGS={}'.format(semicolon_separated(seastar_ldflags, modes[mode]['cxx_ld_flags'])),
'-DSeastar_CXX_DIALECT=gnu++20',
'-DSeastar_API_LEVEL=6',
'-DSeastar_UNUSED_RESULT_ERROR=ON',
@@ -1660,16 +1686,52 @@ for mode in build_modes:
seastar_pc_cflags, seastar_pc_libs = query_seastar_flags(pc[mode], link_static_cxx=args.staticcxx)
modes[mode]['seastar_cflags'] = seastar_pc_cflags
modes[mode]['seastar_libs'] = seastar_pc_libs
modes[mode]['seastar_testing_libs'] = pkg_config(pc[mode].replace('seastar.pc', 'seastar-testing.pc'), '--libs', '--static')
abseil_pkgs = [
'absl_raw_hash_set',
'absl_hash',
]
def configure_abseil(build_dir, mode, mode_config):
abseil_build_dir = os.path.join(build_dir, mode, 'abseil')
pkgs += abseil_pkgs
abseil_cflags = seastar_cflags + ' ' + modes[mode]['cxx_ld_flags']
cmake_mode = mode_config['cmake_build_type']
abseil_cmake_args = [
'-DCMAKE_BUILD_TYPE={}'.format(cmake_mode),
'-DCMAKE_INSTALL_PREFIX={}'.format(build_dir + '/inst'), # just to avoid a warning from absl
'-DCMAKE_C_COMPILER={}'.format(args.cc),
'-DCMAKE_CXX_COMPILER={}'.format(args.cxx),
'-DCMAKE_CXX_FLAGS_{}={}'.format(cmake_mode.upper(), abseil_cflags),
'-DCMAKE_EXPORT_COMPILE_COMMANDS=ON',
'-DCMAKE_CXX_STANDARD=20',
'-DABSL_PROPAGATE_CXX_STD=ON',
] + distro_extra_cmake_args
abseil_cmd = ['cmake', '-G', 'Ninja', real_relpath('abseil', abseil_build_dir)] + abseil_cmake_args
os.makedirs(abseil_build_dir, exist_ok=True)
subprocess.check_call(abseil_cmd, shell=False, cwd=abseil_build_dir)
abseil_libs = ['absl/' + lib for lib in [
'container/libabsl_hashtablez_sampler.a',
'container/libabsl_raw_hash_set.a',
'synchronization/libabsl_synchronization.a',
'synchronization/libabsl_graphcycles_internal.a',
'debugging/libabsl_stacktrace.a',
'debugging/libabsl_symbolize.a',
'debugging/libabsl_debugging_internal.a',
'debugging/libabsl_demangle_internal.a',
'time/libabsl_time.a',
'time/libabsl_time_zone.a',
'numeric/libabsl_int128.a',
'hash/libabsl_city.a',
'hash/libabsl_hash.a',
'hash/libabsl_low_level_hash.a',
'base/libabsl_malloc_internal.a',
'base/libabsl_spinlock_wait.a',
'base/libabsl_base.a',
'base/libabsl_raw_logging_internal.a',
'profiling/libabsl_exponential_biased.a',
'base/libabsl_throw_delegate.a']]
args.user_cflags += " " + pkg_config('jsoncpp', '--cflags')
args.user_cflags += ' -march=' + args.target
libs = ' '.join([maybe_static(args.staticyamlcpp, '-lyaml-cpp'), '-latomic', '-llz4', '-lz', '-lsnappy', pkg_config('jsoncpp', '--libs'),
' -lstdc++fs', ' -lcrypt', ' -lcryptopp', ' -lpthread',
# Must link with static version of libzstd, since
@@ -1677,8 +1739,11 @@ libs = ' '.join([maybe_static(args.staticyamlcpp, '-lyaml-cpp'), '-latomic', '-l
maybe_static(True, '-lzstd'),
maybe_static(args.staticboost, '-lboost_date_time -lboost_regex -licuuc -licui18n'),
'-lxxhash',
'-ldeflate',
])
if has_wasmtime:
print("Found wasmtime dependency, linking with libwasmtime")
if use_wasmtime_as_library:
libs += " -lwasmtime"
if not args.staticboost:
args.user_cflags += ' -DBOOST_TEST_DYN_LINK'
@@ -1697,6 +1762,7 @@ if any(filter(thrift_version.startswith, thrift_boost_versions)):
for pkg in pkgs:
args.user_cflags += ' ' + pkg_config(pkg, '--cflags')
libs += ' ' + pkg_config(pkg, '--libs')
args.user_cflags += ' -isystem abseil'
user_cflags = args.user_cflags + ' -fvisibility=hidden'
user_ldflags = args.user_ldflags + ' -fvisibility=hidden'
if args.staticcxx:
@@ -1718,6 +1784,10 @@ if args.ragel_exec:
else:
ragel_exec = "ragel"
if not args.dist_only:
for mode, mode_config in build_modes.items():
configure_abseil(outdir, mode, mode_config)
with open(buildfile, 'w') as f:
f.write(textwrap.dedent('''\
configure_args = {configure_args}
@@ -1770,24 +1840,18 @@ with open(buildfile, 'w') as f:
rule unified
command = unified/build_unified.sh --mode $mode --unified-pkg $out
rule rust_header
command = cxxbridge --include rust/cxx.h --header $in > $out
command = cxxbridge $in > $out
description = RUST_HEADER $out
rule rust_source
command = cxxbridge --include rust/cxx.h $in > $out
description = RUST_SOURCE $out
rule cxxbridge_header
command = cxxbridge --header > $out
''').format(**globals()))
for mode in build_modes:
modeval = modes[mode]
fmt_lib = 'fmt'
f.write(textwrap.dedent('''\
cxx_ld_flags_{mode} = {cxx_ld_flags}
ld_flags_{mode} = $cxx_ld_flags_{mode} {lib_ldflags}
cxxflags_{mode} = $cxx_ld_flags_{mode} {lib_cflags} {cxxflags} -iquote. -iquote $builddir/{mode}/gen
ld_flags_{mode} = $cxx_ld_flags_{mode}
cxxflags_{mode} = $cxx_ld_flags_{mode} {cxxflags} -iquote. -iquote $builddir/{mode}/gen
libs_{mode} = -l{fmt_lib}
seastar_libs_{mode} = {seastar_libs}
seastar_testing_libs_{mode} = {seastar_testing_libs}
rule cxx.{mode}
command = $cxx -MD -MT $out -MF $out.d {seastar_cflags} $cxxflags_{mode} $cxxflags $obj_cxxflags -c -o $out $in
description = CXX $out
@@ -1837,8 +1901,7 @@ with open(buildfile, 'w') as f:
pool = console
description = TEST {mode}
rule rust_lib.{mode}
command = CARGO_BUILD_DEP_INFO_BASEDIR='.' cargo build --locked --manifest-path=rust/Cargo.toml --target-dir=$builddir/{mode} --profile=rust-{mode} $
&& touch $out
command = CARGO_HOME=build/{mode}/rust/.cargo cargo build --release --manifest-path=rust/Cargo.toml --target-dir=build/{mode}/rust -p ${{pkg}}
description = RUST_LIB $out
''').format(mode=mode, antlr3_exec=antlr3_exec, fmt_lib=fmt_lib, test_repeat=test_repeat, test_timeout=test_timeout, **modeval))
f.write(
@@ -1857,6 +1920,7 @@ with open(buildfile, 'w') as f:
ragels = {}
antlr3_grammars = set()
rust_headers = {}
rust_libs = {}
seastar_dep = '$builddir/{}/seastar/libseastar.a'.format(mode)
seastar_testing_dep = '$builddir/{}/seastar/libseastar_testing.a'.format(mode)
for binary in sorted(build_artifacts):
@@ -1867,8 +1931,9 @@ with open(buildfile, 'w') as f:
for src in srcs
if src.endswith('.cc')]
objs.append('$builddir/../utils/arch/powerpc/crc32-vpmsum/crc32.S')
if has_wasmtime and not use_wasmtime_as_library:
objs.append('/usr/lib64/libwasmtime.a')
has_thrift = False
has_rust = False
for dep in deps[binary]:
if isinstance(dep, Thrift):
has_thrift = True
@@ -1877,36 +1942,43 @@ with open(buildfile, 'w') as f:
objs += dep.objects('$builddir/' + mode + '/gen')
if isinstance(dep, Json2Code):
objs += dep.objects('$builddir/' + mode + '/gen')
if dep.endswith('.rs'):
has_rust = True
idx = dep.rindex('/src/')
obj = dep[:idx].replace('rust/','') + '.o'
objs.append('$builddir/' + mode + '/gen/rust/' + obj)
if has_rust:
objs.append('$builddir/' + mode +'/rust-' + mode + '/librust_combined.a')
local_libs = '$seastar_libs_{} $libs'.format(mode)
if has_thrift:
local_libs += ' ' + thrift_libs + ' ' + maybe_static(args.staticboost, '-lboost_system')
if binary in tests:
if binary in pure_boost_tests:
local_libs += ' ' + maybe_static(args.staticboost, '-lboost_unit_test_framework')
if binary not in tests_not_using_seastar_test_framework:
local_libs += ' ' + "$seastar_testing_libs_{}".format(mode)
# Our code's debugging information is huge, and multiplied
# by many tests yields ridiculous amounts of disk space.
# So we strip the tests by default; The user can very
# quickly re-link the test unstripped by adding a "_g"
# to the test name, e.g., "ninja build/release/testname_g"
link_rule = perf_tests_link_rule if binary.startswith('test/perf/') else tests_link_rule
f.write('build $builddir/{}/{}: {}.{} {} | {} {}\n'.format(mode, binary, link_rule, mode, str.join(' ', objs), seastar_dep, seastar_testing_dep))
f.write(' libs = {}\n'.format(local_libs))
f.write('build $builddir/{}/{}_g: {}.{} {} | {} {}\n'.format(mode, binary, regular_link_rule, mode, str.join(' ', objs), seastar_dep, seastar_testing_dep))
f.write(' libs = {}\n'.format(local_libs))
if dep.endswith('/src/lib.rs'):
lib = dep.replace('/src/lib.rs', '.a').replace('rust/','lib')
objs.append('$builddir/' + mode + '/rust/release/' + lib)
if binary.endswith('.a'):
f.write('build $builddir/{}/{}: ar.{} {}\n'.format(mode, binary, mode, str.join(' ', objs)))
else:
f.write('build $builddir/{}/{}: {}.{} {} | {}\n'.format(mode, binary, regular_link_rule, mode, str.join(' ', objs), seastar_dep))
f.write(' libs = {}\n'.format(local_libs))
f.write(f'build $builddir/{mode}/{binary}.stripped: strip $builddir/{mode}/{binary}\n')
f.write(f'build $builddir/{mode}/{binary}.debug: phony $builddir/{mode}/{binary}.stripped\n')
objs.extend(['$builddir/' + mode + '/' + artifact for artifact in [
'libdeflate/libdeflate.a',
] + [
'abseil/' + x for x in abseil_libs
]])
objs.append('$builddir/' + mode + '/gen/utils/gz/crc_combine_table.o')
if binary in tests:
local_libs = '$seastar_libs_{} $libs'.format(mode)
if binary in pure_boost_tests:
local_libs += ' ' + maybe_static(args.staticboost, '-lboost_unit_test_framework')
if binary not in tests_not_using_seastar_test_framework:
pc_path = pc[mode].replace('seastar.pc', 'seastar-testing.pc')
local_libs += ' ' + pkg_config(pc_path, '--libs', '--static')
if has_thrift:
local_libs += ' ' + thrift_libs + ' ' + maybe_static(args.staticboost, '-lboost_system')
# Our code's debugging information is huge, and multiplied
# by many tests yields ridiculous amounts of disk space.
# So we strip the tests by default; The user can very
# quickly re-link the test unstripped by adding a "_g"
# to the test name, e.g., "ninja build/release/testname_g"
link_rule = perf_tests_link_rule if binary.startswith('test/perf/') else tests_link_rule
f.write('build $builddir/{}/{}: {}.{} {} | {} {}\n'.format(mode, binary, link_rule, mode, str.join(' ', objs), seastar_dep, seastar_testing_dep))
f.write(' libs = {}\n'.format(local_libs))
f.write('build $builddir/{}/{}_g: {}.{} {} | {} {}\n'.format(mode, binary, regular_link_rule, mode, str.join(' ', objs), seastar_dep, seastar_testing_dep))
f.write(' libs = {}\n'.format(local_libs))
else:
f.write('build $builddir/{}/{}: {}.{} {} | {}\n'.format(mode, binary, regular_link_rule, mode, str.join(' ', objs), seastar_dep))
if has_thrift:
f.write(' libs = {} {} $seastar_libs_{} $libs\n'.format(thrift_libs, maybe_static(args.staticboost, '-lboost_system'), mode))
f.write(f'build $builddir/{mode}/{binary}.stripped: strip $builddir/{mode}/{binary}\n')
f.write(f'build $builddir/{mode}/{binary}.debug: phony $builddir/{mode}/{binary}.stripped\n')
for src in srcs:
if src.endswith('.cc'):
obj = '$builddir/' + mode + '/' + src.replace('.cc', '.o')
@@ -1923,12 +1995,19 @@ with open(buildfile, 'w') as f:
thrifts.add(src)
elif src.endswith('.g'):
antlr3_grammars.add(src)
elif src.endswith('.rs'):
idx = src.rindex('/src/')
hh = '$builddir/' + mode + '/gen/' + src[:idx] + '.hh'
elif src.endswith('/src/lib.rs'):
hh = '$builddir/' + mode + '/gen/' + src.replace('/src/lib.rs', '.hh')
rust_headers[hh] = src
staticlib = src.replace('rust/', '$builddir/' + mode + '/rust/release/lib').replace('/src/lib.rs', '.a')
rust_libs[staticlib] = src
else:
raise Exception('No rule for ' + src)
compiles['$builddir/' + mode + '/gen/utils/gz/crc_combine_table.o'] = '$builddir/' + mode + '/gen/utils/gz/crc_combine_table.cc'
compiles['$builddir/' + mode + '/utils/gz/gen_crc_combine_table.o'] = 'utils/gz/gen_crc_combine_table.cc'
f.write('build {}: run {}\n'.format('$builddir/' + mode + '/gen/utils/gz/crc_combine_table.cc',
'$builddir/' + mode + '/utils/gz/gen_crc_combine_table'))
f.write('build {}: link_build.{} {}\n'.format('$builddir/' + mode + '/utils/gz/gen_crc_combine_table', mode,
'$builddir/' + mode + '/utils/gz/gen_crc_combine_table.o'))
f.write(' libs = $seastar_libs_{}\n'.format(mode))
f.write(
'build {mode}-objects: phony {objs}\n'.format(
@@ -1966,7 +2045,6 @@ with open(buildfile, 'w') as f:
gen_headers += list(serializers.keys())
gen_headers += list(ragels.keys())
gen_headers += list(rust_headers.keys())
gen_headers.append('$builddir/{}/gen/rust/cxx.h'.format(mode))
gen_headers_dep = ' '.join(gen_headers)
for obj in compiles:
@@ -1990,13 +2068,10 @@ with open(buildfile, 'w') as f:
for hh in rust_headers:
src = rust_headers[hh]
f.write('build {}: rust_header {}\n'.format(hh, src))
cc = hh.replace('.hh', '.cc')
f.write('build {}: rust_source {}\n'.format(cc, src))
obj = cc.replace('.cc', '.o')
f.write('build {}: cxx.{} {} || {}\n'.format(obj, mode, cc, gen_headers_dep))
f.write('build {}: cxxbridge_header\n'.format('$builddir/{}/gen/rust/cxx.h'.format(mode)))
librust = '$builddir/{}/rust-{}/librust_combined'.format(mode, mode)
f.write('build {}.a: rust_lib.{} rust/Cargo.lock\n depfile={}.d\n'.format(librust, mode, librust))
for lib in rust_libs:
src = rust_libs[lib]
package = src.replace('/src/lib.rs', '').replace('rust/','')
f.write('build {}: rust_lib.{} {}\n pkg = {}\n'.format(lib, mode, src, package))
for thrift in thrifts:
outs = ' '.join(thrift.generated('$builddir/{}/gen'.format(mode)))
f.write('build {}: thrift.{} {}\n'.format(outs, mode, thrift.source))
@@ -2012,8 +2087,7 @@ with open(buildfile, 'w') as f:
f.write('build {}: cxx.{} {} || {}\n'.format(obj, mode, cc, ' '.join(serializers)))
if cc.endswith('Parser.cpp'):
# Unoptimized parsers end up using huge amounts of stack space and overflowing their stack
flags = '-O1' if modes[mode]['optimization-level'] in ['0', 'g', 's'] else ''
flags = '-O1'
if has_sanitize_address_use_after_scope:
flags += ' -fno-sanitize-address-use-after-scope'
f.write(' obj_cxxflags = %s\n' % flags)
@@ -2065,16 +2139,30 @@ with open(buildfile, 'w') as f:
f.write(f' mode = {mode}\n')
f.write(f'build $builddir/dist/{mode}/debian: debbuild $builddir/{mode}/dist/tar/{scylla_product}-unstripped-{scylla_version}-{scylla_release}.{arch}.tar.gz\n')
f.write(f' mode = {mode}\n')
f.write(f'build dist-server-{mode}: phony $builddir/dist/{mode}/redhat $builddir/dist/{mode}/debian\n')
f.write(f'build dist-server-{mode}: phony $builddir/dist/{mode}/redhat $builddir/dist/{mode}/debian dist-server-compat-{mode} dist-server-compat-arch-{mode}\n')
f.write(f'build dist-server-compat-{mode}: phony $builddir/{mode}/dist/tar/{scylla_product}-package.tar.gz\n')
f.write(f'build dist-server-compat-arch-{mode}: phony $builddir/{mode}/dist/tar/{scylla_product}-{arch}-package.tar.gz\n')
f.write(f'build dist-server-debuginfo-{mode}: phony $builddir/{mode}/dist/tar/{scylla_product}-debuginfo-{scylla_version}-{scylla_release}.{arch}.tar.gz\n')
f.write(f'build dist-jmx-{mode}: phony $builddir/{mode}/dist/tar/{scylla_product}-jmx-{scylla_version}-{scylla_release}.noarch.tar.gz dist-jmx-rpm dist-jmx-deb\n')
f.write(f'build dist-tools-{mode}: phony $builddir/{mode}/dist/tar/{scylla_product}-tools-{scylla_version}-{scylla_release}.noarch.tar.gz dist-tools-rpm dist-tools-deb\n')
f.write(f'build dist-python3-{mode}: phony dist-python3-tar dist-python3-rpm dist-python3-deb\n')
f.write(f'build dist-unified-{mode}: phony $builddir/{mode}/dist/tar/{scylla_product}-unified-{scylla_version}-{scylla_release}.{arch}.tar.gz\n')
f.write(f'build dist-jmx-{mode}: phony $builddir/{mode}/dist/tar/{scylla_product}-jmx-{scylla_version}-{scylla_release}.noarch.tar.gz dist-jmx-rpm dist-jmx-deb dist-jmx-compat\n')
f.write(f'build dist-tools-{mode}: phony $builddir/{mode}/dist/tar/{scylla_product}-tools-{scylla_version}-{scylla_release}.noarch.tar.gz dist-tools-rpm dist-tools-deb dist-tools-compat\n')
f.write(f'build dist-python3-{mode}: phony dist-python3-tar dist-python3-rpm dist-python3-deb dist-python3-compat dist-python3-compat-arch\n')
f.write(f'build dist-unified-{mode}: phony $builddir/{mode}/dist/tar/{scylla_product}-unified-{scylla_version}-{scylla_release}.{arch}.tar.gz dist-unified-compat-{mode} dist-unified-compat-arch-{mode}\n')
f.write(f'build dist-unified-compat-{mode}: phony $builddir/{mode}/dist/tar/{scylla_product}-unified-package-{scylla_version}-{scylla_release}.tar.gz\n')
f.write(f'build dist-unified-compat-arch-{mode}: phony $builddir/{mode}/dist/tar/{scylla_product}-unified-{arch}-package-{scylla_version}-{scylla_release}.tar.gz\n')
f.write(f'build $builddir/{mode}/dist/tar/{scylla_product}-unified-{scylla_version}-{scylla_release}.{arch}.tar.gz: unified $builddir/{mode}/dist/tar/{scylla_product}-{scylla_version}-{scylla_release}.{arch}.tar.gz $builddir/{mode}/dist/tar/{scylla_product}-python3-{scylla_version}-{scylla_release}.{arch}.tar.gz $builddir/{mode}/dist/tar/{scylla_product}-jmx-{scylla_version}-{scylla_release}.noarch.tar.gz $builddir/{mode}/dist/tar/{scylla_product}-tools-{scylla_version}-{scylla_release}.noarch.tar.gz | always\n')
f.write(f' mode = {mode}\n')
f.write(f'build $builddir/{mode}/dist/tar/{scylla_product}-unified-package-{scylla_version}-{scylla_release}.tar.gz: copy $builddir/{mode}/dist/tar/{scylla_product}-unified-{scylla_version}-{scylla_release}.{arch}.tar.gz\n')
f.write(f'build $builddir/{mode}/dist/tar/{scylla_product}-unified-{arch}-package-{scylla_version}-{scylla_release}.tar.gz: copy $builddir/{mode}/dist/tar/{scylla_product}-unified-{scylla_version}-{scylla_release}.{arch}.tar.gz\n')
f.write('rule libdeflate.{mode}\n'.format(**locals()))
f.write(' command = make -C libdeflate BUILD_DIR=../$builddir/{mode}/libdeflate/ CFLAGS="{libdeflate_cflags}" CC={args.cc} ../$builddir/{mode}/libdeflate//libdeflate.a\n'.format(**locals()))
f.write('build $builddir/{mode}/libdeflate/libdeflate.a: libdeflate.{mode}\n'.format(**locals()))
f.write(' pool = submodule_pool\n')
for lib in abseil_libs:
f.write('build $builddir/{mode}/abseil/{lib}: ninja $builddir/{mode}/abseil/build.ninja\n'.format(**locals()))
f.write(' pool = submodule_pool\n')
f.write(' subdir = $builddir/{mode}/abseil\n'.format(**locals()))
f.write(' target = {lib}\n'.format(**locals()))
checkheaders_mode = 'dev' if 'dev' in modes else modes.keys()[0]
f.write('build checkheaders: phony || {}\n'.format(' '.join(['$builddir/{}/{}.o'.format(checkheaders_mode, hh) for hh in headers])))
@@ -2091,13 +2179,17 @@ with open(buildfile, 'w') as f:
f.write(textwrap.dedent(f'''\
build dist-unified-tar: phony {' '.join([f'$builddir/{mode}/dist/tar/{scylla_product}-unified-{scylla_version}-{scylla_release}.{arch}.tar.gz' for mode in default_modes])}
build dist-unified: phony dist-unified-tar
build dist-unified-compat: phony {' '.join([f'$builddir/{mode}/dist/tar/{scylla_product}-unified-package-{scylla_version}-{scylla_release}.tar.gz' for mode in default_modes])}
build dist-unified-compat-arch: phony {' '.join([f'$builddir/{mode}/dist/tar/{scylla_product}-unified-{arch}-package-{scylla_version}-{scylla_release}.tar.gz' for mode in default_modes])}
build dist-unified: phony dist-unified-tar dist-unified-compat dist-unified-compat-arch
build dist-server-deb: phony {' '.join(['$builddir/dist/{mode}/debian'.format(mode=mode) for mode in build_modes])}
build dist-server-rpm: phony {' '.join(['$builddir/dist/{mode}/redhat'.format(mode=mode) for mode in build_modes])}
build dist-server-tar: phony {' '.join(['$builddir/{mode}/dist/tar/{scylla_product}-{scylla_version}-{scylla_release}.{arch}.tar.gz'.format(mode=mode, scylla_product=scylla_product, arch=arch, scylla_version=scylla_version, scylla_release=scylla_release) for mode in default_modes])}
build dist-server-debuginfo: phony {' '.join(['$builddir/{mode}/dist/tar/{scylla_product}-debuginfo-{scylla_version}-{scylla_release}.{arch}.tar.gz'.format(mode=mode, scylla_product=scylla_product, arch=arch, scylla_version=scylla_version, scylla_release=scylla_release) for mode in default_modes])}
build dist-server: phony dist-server-tar dist-server-debuginfo dist-server-rpm dist-server-deb
build dist-server-compat: phony {' '.join(['$builddir/{mode}/dist/tar/{scylla_product}-package.tar.gz'.format(mode=mode, scylla_product=scylla_product, arch=arch) for mode in default_modes])}
build dist-server-compat-arch: phony {' '.join(['$builddir/{mode}/dist/tar/{scylla_product}-{arch}-package.tar.gz'.format(mode=mode, scylla_product=scylla_product, arch=arch) for mode in default_modes])}
build dist-server: phony dist-server-tar dist-server-debuginfo dist-server-compat dist-server-compat-arch dist-server-rpm dist-server-deb
rule build-submodule-reloc
command = cd $reloc_dir && ./reloc/build_reloc.sh --version $$(<../../build/SCYLLA-PRODUCT-FILE)-$$(sed 's/-/~/' <../../build/SCYLLA-VERSION-FILE)-$$(<../../build/SCYLLA-RELEASE-FILE) --nodeps $args
@@ -2115,7 +2207,8 @@ with open(buildfile, 'w') as f:
dir = tools/jmx
artifact = $builddir/{scylla_product}-jmx-{scylla_version}-{scylla_release}.noarch.tar.gz
build dist-jmx-tar: phony {' '.join(['$builddir/{mode}/dist/tar/{scylla_product}-jmx-{scylla_version}-{scylla_release}.noarch.tar.gz'.format(mode=mode, scylla_product=scylla_product, scylla_version=scylla_version, scylla_release=scylla_release) for mode in default_modes])}
build dist-jmx: phony dist-jmx-tar dist-jmx-rpm dist-jmx-deb
build dist-jmx-compat: phony {' '.join(['$builddir/{mode}/dist/tar/{scylla_product}-jmx-package.tar.gz'.format(mode=mode, scylla_product=scylla_product, arch=arch) for mode in default_modes])}
build dist-jmx: phony dist-jmx-tar dist-jmx-compat dist-jmx-rpm dist-jmx-deb
build tools/java/build/{scylla_product}-tools-{scylla_version}-{scylla_release}.noarch.tar.gz: build-submodule-reloc | build/SCYLLA-PRODUCT-FILE build/SCYLLA-VERSION-FILE build/SCYLLA-RELEASE-FILE
reloc_dir = tools/java
@@ -2126,7 +2219,8 @@ with open(buildfile, 'w') as f:
dir = tools/java
artifact = $builddir/{scylla_product}-tools-{scylla_version}-{scylla_release}.noarch.tar.gz
build dist-tools-tar: phony {' '.join(['$builddir/{mode}/dist/tar/{scylla_product}-tools-{scylla_version}-{scylla_release}.noarch.tar.gz'.format(mode=mode, scylla_product=scylla_product, scylla_version=scylla_version, scylla_release=scylla_release) for mode in default_modes])}
build dist-tools: phony dist-tools-tar dist-tools-rpm dist-tools-deb
build dist-tools-compat: phony {' '.join(['$builddir/{mode}/dist/tar/{scylla_product}-tools-package.tar.gz'.format(mode=mode, scylla_product=scylla_product, arch=arch) for mode in default_modes])}
build dist-tools: phony dist-tools-tar dist-tools-compat dist-tools-rpm dist-tools-deb
build tools/python3/build/{scylla_product}-python3-{scylla_version}-{scylla_release}.{arch}.tar.gz: build-submodule-reloc | build/SCYLLA-PRODUCT-FILE build/SCYLLA-VERSION-FILE build/SCYLLA-RELEASE-FILE
reloc_dir = tools/python3
@@ -2138,10 +2232,14 @@ with open(buildfile, 'w') as f:
dir = tools/python3
artifact = $builddir/{scylla_product}-python3-{scylla_version}-{scylla_release}.{arch}.tar.gz
build dist-python3-tar: phony {' '.join(['$builddir/{mode}/dist/tar/{scylla_product}-python3-{scylla_version}-{scylla_release}.{arch}.tar.gz'.format(mode=mode, scylla_product=scylla_product, arch=arch, scylla_version=scylla_version, scylla_release=scylla_release) for mode in default_modes])}
build dist-python3: phony dist-python3-tar dist-python3-rpm dist-python3-deb
build dist-python3-compat: phony {' '.join(['$builddir/{mode}/dist/tar/{scylla_product}-python3-package.tar.gz'.format(mode=mode, scylla_product=scylla_product, arch=arch) for mode in default_modes])}
build dist-python3-compat-arch: phony {' '.join(['$builddir/{mode}/dist/tar/{scylla_product}-python3-{arch}-package.tar.gz'.format(mode=mode, scylla_product=scylla_product, arch=arch) for mode in default_modes])}
build dist-python3: phony dist-python3-tar dist-python3-compat dist-python3-compat-arch dist-python3-rpm dist-python3-deb
build dist-deb: phony dist-server-deb dist-python3-deb dist-jmx-deb dist-tools-deb
build dist-rpm: phony dist-server-rpm dist-python3-rpm dist-jmx-rpm dist-tools-rpm
build dist-tar: phony dist-unified-tar dist-server-tar dist-python3-tar dist-jmx-tar dist-tools-tar
build dist-compat: phony dist-unified-compat dist-server-compat dist-python3-compat
build dist-compat-arch: phony dist-unified-compat-arch dist-server-compat-arch dist-python3-compat-arch
build dist: phony dist-unified dist-server dist-python3 dist-jmx dist-tools
'''))
@@ -2186,7 +2284,7 @@ with open(buildfile, 'w') as f:
description = List configured modes
build mode_list: mode_list
default {modes_list}
''').format(modes_list=' '.join(default_modes), build_ninja_list=' '.join([f'build/{mode}/{dir}/build.ninja' for mode in build_modes for dir in ['seastar']]), **globals()))
''').format(modes_list=' '.join(default_modes), build_ninja_list=' '.join([f'build/{mode}/{dir}/build.ninja' for mode in build_modes for dir in ['seastar', 'abseil']]), **globals()))
unit_test_list = set(test for test in build_artifacts if test in set(tests))
f.write(textwrap.dedent('''\
rule unit_test_list
@@ -2215,7 +2313,7 @@ with open(buildfile, 'w') as f:
compdb = 'compile_commands.json'
# per-mode compdbs are built by taking the relevant entries from the
# output of "ninja -t compdb" and combining them with the CMake-made
# compdbs for Seastar in the relevant mode.
# compdbs for Seastar and Abseil in the relevant mode.
#
# "ninja -t compdb" output has to be filtered because
# - it contains rules for all selected modes, and several entries for
@@ -2230,7 +2328,7 @@ with tempfile.NamedTemporaryFile() as ninja_compdb:
# build mode-specific compdbs
for mode in selected_modes:
mode_out = outdir + '/' + mode
submodule_compdbs = [mode_out + '/' + submodule + '/' + compdb for submodule in ['seastar']]
submodule_compdbs = [mode_out + '/' + submodule + '/' + compdb for submodule in ['abseil', 'seastar']]
with open(mode_out + '/' + compdb, 'w+b') as combined_mode_specific_compdb:
subprocess.run(['./scripts/merge-compdb.py', 'build/' + mode,
ninja_compdb.name] + submodule_compdbs, stdout=combined_mode_specific_compdb)

View File

@@ -51,7 +51,6 @@ options {
#include "cql3/statements/index_prop_defs.hh"
#include "cql3/statements/raw/use_statement.hh"
#include "cql3/statements/raw/batch_statement.hh"
#include "cql3/statements/raw/describe_statement.hh"
#include "cql3/statements/list_users_statement.hh"
#include "cql3/statements/grant_statement.hh"
#include "cql3/statements/revoke_statement.hh"
@@ -359,7 +358,6 @@ cqlStatement returns [std::unique_ptr<raw::parsed_statement> stmt]
| st46=listServiceLevelStatement { $stmt = std::move(st46); }
| st47=listServiceLevelAttachStatement { $stmt = std::move(st47); }
| st48=pruneMaterializedViewStatement { $stmt = std::move(st48); }
| st49=describeStatement { $stmt = std::move(st49); }
;
/*
@@ -463,7 +461,8 @@ orderByClause[raw::select_statement::parameters::orderings_type& orderings]
;
jsonValue returns [expression value]
: s=STRING_LITERAL { $value = untyped_constant{untyped_constant::string, $s.text}; }
:
| s=STRING_LITERAL { $value = untyped_constant{untyped_constant::string, $s.text}; }
| m=marker { $value = std::move(m); }
;
@@ -1369,59 +1368,6 @@ listServiceLevelAttachStatement returns [std::unique_ptr<list_service_level_atta
{ $stmt = std::make_unique<list_service_level_attachments_statement>(); }
;
/**
* (DESCRIBE | DESC) (
* CLUSTER
* [FULL] SCHEMA
* KEYSPACES
* [ONLY] KEYSPACE <name>?
* TABLES
* TABLE <name>
* TYPES
* TYPE <name>
* FUNCTIONS
* FUNCTION <name>
* AGGREGATES
* AGGREGATE <name>
* ) (WITH INTERNALS)?
*/
describeStatement returns [std::unique_ptr<cql3::statements::raw::describe_statement> stmt]
@init {
bool fullSchema = false;
bool pending = false;
bool config = false;
bool only = false;
std::optional<sstring> keyspace;
sstring generic_name = "";
}
: ( K_DESCRIBE | K_DESC )
( (K_CLUSTER) => K_CLUSTER { $stmt = cql3::statements::raw::describe_statement::cluster(); }
| (K_FULL { fullSchema=true; })? K_SCHEMA { $stmt = cql3::statements::raw::describe_statement::schema(fullSchema); }
| (K_KEYSPACES) => K_KEYSPACES { $stmt = cql3::statements::raw::describe_statement::keyspaces(); }
| (K_ONLY { only=true; })? K_KEYSPACE ( ks=keyspaceName { keyspace = ks; })?
{ $stmt = cql3::statements::raw::describe_statement::keyspace(keyspace, only); }
| (K_TABLES) => K_TABLES { $stmt = cql3::statements::raw::describe_statement::tables(); }
| K_COLUMNFAMILY cf=columnFamilyName { $stmt = cql3::statements::raw::describe_statement::table(cf); }
| K_INDEX idx=columnFamilyName { $stmt = cql3::statements::raw::describe_statement::index(idx); }
| K_MATERIALIZED K_VIEW view=columnFamilyName { $stmt = cql3::statements::raw::describe_statement::view(view); }
| (K_TYPES) => K_TYPES { $stmt = cql3::statements::raw::describe_statement::types(); }
| K_TYPE tn=userTypeName { $stmt = cql3::statements::raw::describe_statement::type(tn); }
| (K_FUNCTIONS) => K_FUNCTIONS { $stmt = cql3::statements::raw::describe_statement::functions(); }
| K_FUNCTION fn=functionName { $stmt = cql3::statements::raw::describe_statement::function(fn); }
| (K_AGGREGATES) => K_AGGREGATES { $stmt = cql3::statements::raw::describe_statement::aggregates(); }
| K_AGGREGATE ag=functionName { $stmt = cql3::statements::raw::describe_statement::aggregate(ag); }
| ( ( ksT=IDENT { keyspace = sstring{$ksT.text}; }
| ksT=QUOTED_NAME { keyspace = sstring{$ksT.text}; }
| ksK=unreserved_keyword { keyspace = ksK; } )
'.' )?
( tT=IDENT { generic_name = sstring{$tT.text}; }
| tT=QUOTED_NAME { generic_name = sstring{$tT.text}; }
| tK=unreserved_keyword { generic_name = tK; } )
{ $stmt = cql3::statements::raw::describe_statement::generic(keyspace, generic_name); }
)
( K_WITH K_INTERNALS { $stmt->with_internals_details(); } )?
;
/** DEFINITIONS **/
// Column Identifiers. These need to be treated differently from other
@@ -1473,7 +1419,7 @@ serviceLevelOrRoleName returns [sstring name]
std::transform($name.begin(), $name.end(), $name.begin(), ::tolower); }
| t=STRING_LITERAL { $name = sstring($t.text); }
| t=QUOTED_NAME { $name = sstring($t.text); }
| k=unreserved_keyword { $name = k;
| k=unreserved_keyword { $name = sstring($t.text);
std::transform($name.begin(), $name.end(), $name.begin(), ::tolower);}
| QMARK {add_recognition_error("Bind variables cannot be used for service levels or role names");}
;
@@ -1567,7 +1513,7 @@ value returns [expression value]
| l=collectionLiteral { $value = std::move(l); }
| u=usertypeLiteral { $value = std::move(u); }
| t=tupleLiteral { $value = std::move(t); }
| K_NULL { $value = make_untyped_null(); }
| K_NULL { $value = null(); }
| e=marker { $value = std::move(e); }
;
@@ -1577,7 +1523,8 @@ marker returns [expression value]
;
intValue returns [expression value]
: t=INTEGER { $value = untyped_constant{untyped_constant::integer, $t.text}; }
:
| t=INTEGER { $value = untyped_constant{untyped_constant::integer, $t.text}; }
| e=marker { $value = std::move(e); }
;
@@ -1731,7 +1678,7 @@ relation returns [expression e]
| K_TOKEN l=tupleOfIdentifiers type=relationType t=term
{ $e = binary_operator(token{std::move(l.elements)}, type, std::move(t)); }
| name=cident K_IS K_NOT K_NULL {
$e = binary_operator(unresolved_identifier{std::move(name)}, oper_t::IS_NOT, make_untyped_null()); }
$e = binary_operator(unresolved_identifier{std::move(name)}, oper_t::IS_NOT, null()); }
| name=cident K_IN marker1=marker
{ $e = binary_operator(unresolved_identifier{std::move(name)}, oper_t::IN, std::move(marker1)); }
| name=cident K_IN in_values=singleColumnInValues
@@ -1950,13 +1897,10 @@ unreserved_function_keyword returns [sstring str]
basic_unreserved_keyword returns [sstring str]
: k=( K_KEYS
| K_AS
| K_CLUSTER
| K_CLUSTERING
| K_COMPACT
| K_STORAGE
| K_TABLES
| K_TYPE
| K_TYPES
| K_VALUES
| K_MAP
| K_LIST
@@ -1980,14 +1924,11 @@ basic_unreserved_keyword returns [sstring str]
| K_TRIGGER
| K_DISTINCT
| K_CONTAINS
| K_INTERNALS
| K_STATIC
| K_FROZEN
| K_TUPLE
| K_FUNCTION
| K_FUNCTIONS
| K_AGGREGATE
| K_AGGREGATES
| K_SFUNC
| K_STYPE
| K_REDUCEFUNC
@@ -2015,9 +1956,6 @@ basic_unreserved_keyword returns [sstring str]
| K_LEVEL
| K_LEVELS
| K_PRUNE
| K_ONLY
| K_DESCRIBE
| K_DESC
) { $str = $k.text; }
;
@@ -2075,14 +2013,11 @@ K_TRUNCATE: T R U N C A T E;
K_DELETE: D E L E T E;
K_IN: I N;
K_CREATE: C R E A T E;
K_SCHEMA: S C H E M A;
K_KEYSPACE: ( K E Y S P A C E
| K_SCHEMA );
| S C H E M A );
K_KEYSPACES: K E Y S P A C E S;
K_COLUMNFAMILY:( C O L U M N F A M I L Y
| T A B L E );
K_TABLES: ( C O L U M N F A M I L I E S
| T A B L E S );
K_MATERIALIZED:M A T E R I A L I Z E D;
K_VIEW: V I E W;
K_INDEX: I N D E X;
@@ -2099,7 +2034,6 @@ K_ALTER: A L T E R;
K_RENAME: R E N A M E;
K_ADD: A D D;
K_TYPE: T Y P E;
K_TYPES: T Y P E S;
K_COMPACT: C O M P A C T;
K_STORAGE: S T O R A G E;
K_ORDER: O R D E R;
@@ -2111,8 +2045,6 @@ K_FILTERING: F I L T E R I N G;
K_IF: I F;
K_IS: I S;
K_CONTAINS: C O N T A I N S;
K_INTERNALS: I N T E R N A L S;
K_ONLY: O N L Y;
K_GRANT: G R A N T;
K_ALL: A L L;
@@ -2136,7 +2068,6 @@ K_LOGIN: L O G I N;
K_NOLOGIN: N O L O G I N;
K_OPTIONS: O P T I O N S;
K_CLUSTER: C L U S T E R;
K_CLUSTERING: C L U S T E R I N G;
K_ASCII: A S C I I;
K_BIGINT: B I G I N T;
@@ -2176,9 +2107,7 @@ K_STATIC: S T A T I C;
K_FROZEN: F R O Z E N;
K_FUNCTION: F U N C T I O N;
K_FUNCTIONS: F U N C T I O N S;
K_AGGREGATE: A G G R E G A T E;
K_AGGREGATES: A G G R E G A T E S;
K_SFUNC: S F U N C;
K_STYPE: S T Y P E;
K_REDUCEFUNC: R E D U C E F U N C;

View File

@@ -10,7 +10,6 @@
#include "cql3/attributes.hh"
#include "cql3/column_identifier.hh"
#include <optional>
namespace cql3 {
@@ -21,9 +20,7 @@ std::unique_ptr<attributes> attributes::none() {
attributes::attributes(std::optional<cql3::expr::expression>&& timestamp,
std::optional<cql3::expr::expression>&& time_to_live,
std::optional<cql3::expr::expression>&& timeout)
: _timestamp_unset_guard(timestamp)
, _timestamp{std::move(timestamp)}
, _time_to_live_unset_guard(time_to_live)
: _timestamp{std::move(timestamp)}
, _time_to_live{std::move(time_to_live)}
, _timeout{std::move(timeout)}
{ }
@@ -41,7 +38,7 @@ bool attributes::is_timeout_set() const {
}
int64_t attributes::get_timestamp(int64_t now, const query_options& options) {
if (!_timestamp.has_value() || _timestamp_unset_guard.is_unset(options)) {
if (!_timestamp.has_value()) {
return now;
}
@@ -49,25 +46,31 @@ int64_t attributes::get_timestamp(int64_t now, const query_options& options) {
if (tval.is_null()) {
throw exceptions::invalid_request_exception("Invalid null value of timestamp");
}
if (tval.is_unset_value()) {
return now;
}
try {
return tval.view().validate_and_deserialize<int64_t>(*long_type);
return tval.view().validate_and_deserialize<int64_t>(*long_type, cql_serialization_format::internal());
} catch (marshal_exception& e) {
throw exceptions::invalid_request_exception("Invalid timestamp value");
}
}
std::optional<int32_t> attributes::get_time_to_live(const query_options& options) {
if (!_time_to_live.has_value() || _time_to_live_unset_guard.is_unset(options))
return std::nullopt;
int32_t attributes::get_time_to_live(const query_options& options) {
if (!_time_to_live.has_value())
return 0;
cql3::raw_value tval = expr::evaluate(*_time_to_live, options);
if (tval.is_null()) {
throw exceptions::invalid_request_exception("Invalid null value of TTL");
}
if (tval.is_unset_value()) {
return 0;
}
int32_t ttl;
try {
ttl = tval.view().validate_and_deserialize<int32_t>(*int32_type);
ttl = tval.view().validate_and_deserialize<int32_t>(*int32_type, cql_serialization_format::internal());
}
catch (marshal_exception& e) {
throw exceptions::invalid_request_exception("Invalid TTL value");
@@ -88,8 +91,8 @@ std::optional<int32_t> attributes::get_time_to_live(const query_options& options
db::timeout_clock::duration attributes::get_timeout(const query_options& options) const {
cql3::raw_value timeout = expr::evaluate(*_timeout, options);
if (timeout.is_null()) {
throw exceptions::invalid_request_exception("Timeout value cannot be null");
if (timeout.is_null() || timeout.is_unset_value()) {
throw exceptions::invalid_request_exception("Timeout value cannot be unset/null");
}
cql_duration duration = timeout.view().deserialize<cql_duration>(*duration_type);
if (duration.months || duration.days) {

View File

@@ -11,7 +11,6 @@
#pragma once
#include "cql3/expr/expression.hh"
#include "cql3/expr/unset.hh"
#include "db/timeout_clock.hh"
namespace cql3 {
@@ -25,9 +24,7 @@ class prepare_context;
*/
class attributes final {
private:
expr::unset_bind_variable_guard _timestamp_unset_guard;
std::optional<cql3::expr::expression> _timestamp;
expr::unset_bind_variable_guard _time_to_live_unset_guard;
std::optional<cql3::expr::expression> _time_to_live;
std::optional<cql3::expr::expression> _timeout;
public:
@@ -45,7 +42,7 @@ public:
int64_t get_timestamp(int64_t now, const query_options& options);
std::optional<int32_t> get_time_to_live(const query_options& options);
int32_t get_time_to_live(const query_options& options);
db::timeout_clock::duration get_timeout(const query_options& options) const;

View File

@@ -139,6 +139,10 @@ bool column_condition::applies_to(const data_value* cell_value, const query_opti
cql3::raw_value key_constant = expr::evaluate(*_collection_element, options);
cql3::raw_value_view key = key_constant.view();
if (key.is_unset_value()) {
throw exceptions::invalid_request_exception(
format("Invalid 'unset' value in {} element access", cell_type.cql3_type_name()));
}
if (key.is_null()) {
throw exceptions::invalid_request_exception(
format("Invalid null value for {} element access", cell_type.cql3_type_name()));
@@ -192,6 +196,9 @@ bool column_condition::applies_to(const data_value* cell_value, const query_opti
// <, >, >=, <=, !=
cql3::raw_value param = expr::evaluate(*_value, options);
if (param.is_unset_value()) {
throw exceptions::invalid_request_exception("Invalid 'unset' value in condition");
}
if (param.is_null()) {
if (_op == expr::oper_t::EQ) {
return cell_value == nullptr;
@@ -217,6 +224,9 @@ bool column_condition::applies_to(const data_value* cell_value, const query_opti
return (*_matcher)(bytes_view(cell_value->serialize_nonnull()));
} else {
auto param = expr::evaluate(*_value, options); // LIKE pattern
if (param.is_unset_value()) {
throw exceptions::invalid_request_exception("Invalid 'unset' value in LIKE pattern");
}
if (param.is_null()) {
throw exceptions::invalid_request_exception("Invalid NULL value in LIKE pattern");
}
@@ -299,7 +309,7 @@ column_condition::raw::prepare(data_dictionary::database db, const sstring& keys
if (_op == expr::oper_t::LIKE) {
auto literal_term = expr::as_if<expr::untyped_constant>(&*_value);
if (literal_term && literal_term->partial_type != expr::untyped_constant::type_class::null) {
if (literal_term) {
// Pass matcher object
const sstring& pattern = literal_term->raw_text;
return column_condition::condition(receiver, std::move(collection_element_expression),

View File

@@ -33,9 +33,9 @@ public:
private static final Logger logger = LoggerFactory.getLogger(Constants.class);
#endif
public:
class setter : public operation_skip_if_unset {
class setter : public operation {
public:
using operation_skip_if_unset::operation_skip_if_unset;
using operation::operation;
virtual void execute(mutation& m, const clustering_key_prefix& prefix, const update_parameters& params) override {
auto value = expr::evaluate(*_e, params._options);
@@ -53,26 +53,30 @@ public:
virtual void prepare_for_broadcast_tables(statements::broadcast_tables::prepared_update& query) const override;
};
struct adder final : operation_skip_if_unset {
using operation_skip_if_unset::operation_skip_if_unset;
struct adder final : operation {
using operation::operation;
virtual void execute(mutation& m, const clustering_key_prefix& prefix, const update_parameters& params) override {
auto value = expr::evaluate(*_e, params._options);
if (value.is_null()) {
throw exceptions::invalid_request_exception("Invalid null value for counter increment");
} else if (value.is_unset_value()) {
return;
}
auto increment = value.view().deserialize<int64_t>(*long_type);
m.set_cell(prefix, column, params.make_counter_update_cell(increment));
}
};
struct subtracter final : operation_skip_if_unset {
using operation_skip_if_unset::operation_skip_if_unset;
struct subtracter final : operation {
using operation::operation;
virtual void execute(mutation& m, const clustering_key_prefix& prefix, const update_parameters& params) override {
auto value = expr::evaluate(*_e, params._options);
if (value.is_null()) {
throw exceptions::invalid_request_exception("Invalid null value for counter increment");
} else if (value.is_unset_value()) {
return;
}
auto increment = value.view().deserialize<int64_t>(*long_type);
if (increment == std::numeric_limits<int64_t>::min()) {
@@ -82,10 +86,10 @@ public:
}
};
class deleter : public operation_no_unset_support {
class deleter : public operation {
public:
deleter(const column_definition& column)
: operation_no_unset_support(column, std::nullopt)
: operation(column, std::nullopt)
{ }
virtual void execute(mutation& m, const clustering_key_prefix& prefix, const update_parameters& params) override;

View File

@@ -473,40 +473,27 @@ sstring maybe_quote(const sstring& identifier) {
return result;
}
template <char C>
static sstring quote_with(const sstring& str) {
static const std::string quote_str{C};
sstring quote(const sstring& identifier) {
// quote empty string
if (str.empty()) {
return make_sstring(quote_str, quote_str);
if (identifier.empty()) {
return "\"\"";
}
size_t num_quotes = 0;
for (char c : str) {
num_quotes += (c == C);
for (char c : identifier) {
num_quotes += (c == '"');
}
if (num_quotes == 0) {
return make_sstring(quote_str, str, quote_str);
return make_sstring("\"", identifier, "\"");
}
static const std::string double_quote_str{C, C};
static const std::regex quote_re(std::string{C});
static const std::regex double_quote_re("\"");
std::string result;
result.reserve(2 + str.size() + num_quotes);
result.push_back(C);
std::regex_replace(std::back_inserter(result), str.begin(), str.end(), quote_re, double_quote_str);
result.push_back(C);
result.reserve(2 + identifier.size() + num_quotes);
result.push_back('"');
std::regex_replace(std::back_inserter(result), identifier.begin(), identifier.end(), double_quote_re, "\"\"");
result.push_back('"');
return result;
}
sstring quote(const sstring& identifier) {
return quote_with<'"'>(identifier);
}
sstring single_quote(const sstring& str) {
return quote_with<'\''>(str);
}
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -76,6 +76,7 @@ struct column_mutation_attribute;
struct function_call;
struct cast;
struct field_selection;
struct null;
struct bind_variable;
struct untyped_constant;
struct constant;
@@ -95,6 +96,7 @@ concept ExpressionElement
|| std::same_as<T, function_call>
|| std::same_as<T, cast>
|| std::same_as<T, field_selection>
|| std::same_as<T, null>
|| std::same_as<T, bind_variable>
|| std::same_as<T, untyped_constant>
|| std::same_as<T, constant>
@@ -115,6 +117,7 @@ concept invocable_on_expression
&& std::invocable<Func, function_call>
&& std::invocable<Func, cast>
&& std::invocable<Func, field_selection>
&& std::invocable<Func, null>
&& std::invocable<Func, bind_variable>
&& std::invocable<Func, untyped_constant>
&& std::invocable<Func, constant>
@@ -135,6 +138,7 @@ concept invocable_on_expression_ref
&& std::invocable<Func, function_call&>
&& std::invocable<Func, cast&>
&& std::invocable<Func, field_selection&>
&& std::invocable<Func, null&>
&& std::invocable<Func, bind_variable&>
&& std::invocable<Func, untyped_constant&>
&& std::invocable<Func, constant&>
@@ -143,7 +147,7 @@ concept invocable_on_expression_ref
&& std::invocable<Func, usertype_constructor&>
;
/// A CQL expression -- union of all possible expression types.
/// A CQL expression -- union of all possible expression types. bool means a Boolean constant.
class expression final {
// 'impl' holds a variant of all expression types, but since
// variants of incomplete types are not allowed, we forward declare it
@@ -194,7 +198,9 @@ bool operator==(const expression& e1, const expression& e2);
// An expression that doesn't contain subexpressions
template <typename E>
concept LeafExpression
= std::same_as<unresolved_identifier, E>
= std::same_as<bool, E>
|| std::same_as<unresolved_identifier, E>
|| std::same_as<null, E>
|| std::same_as<bind_variable, E>
|| std::same_as<untyped_constant, E>
|| std::same_as<constant, E>
@@ -340,6 +346,12 @@ struct field_selection {
friend bool operator==(const field_selection&, const field_selection&) = default;
};
struct null {
data_type type; // may be null before prepare
friend bool operator==(const null&, const null&) = default;
};
struct bind_variable {
int32_t bind_index;
@@ -353,18 +365,17 @@ struct bind_variable {
// A constant which does not yet have a date type. It is partially typed
// (we know if it's floating or int) but not sized.
struct untyped_constant {
enum type_class { integer, floating_point, string, boolean, duration, uuid, hex, null };
enum type_class { integer, floating_point, string, boolean, duration, uuid, hex };
type_class partial_type;
sstring raw_text;
friend bool operator==(const untyped_constant&, const untyped_constant&) = default;
};
untyped_constant make_untyped_null();
// Represents a constant value with known value and type
// For null and unset the type can sometimes be set to empty_type
struct constant {
// A value serialized using the internal (latest) cql_serialization_format
cql3::raw_value value;
// Never nullptr, for NULL and UNSET might be empty_type
@@ -372,6 +383,7 @@ struct constant {
constant(cql3::raw_value value, data_type type);
static constant make_null(data_type val_type = empty_type);
static constant make_unset_value(data_type val_type = empty_type);
static constant make_bool(bool bool_val);
bool is_null() const;
@@ -424,7 +436,7 @@ struct usertype_constructor {
struct expression::impl final {
using variant_type = std::variant<
conjunction, binary_operator, column_value, token, unresolved_identifier,
column_mutation_attribute, function_call, cast, field_selection,
column_mutation_attribute, function_call, cast, field_selection, null,
bind_variable, untyped_constant, constant, tuple_constructor, collection_constructor,
usertype_constructor, subscript>;
variant_type v;

View File

@@ -78,7 +78,7 @@ static
void
usertype_constructor_validate_assignable_to(const usertype_constructor& u, data_dictionary::database db, const sstring& keyspace, const column_specification& receiver) {
if (!receiver.type->is_user_type()) {
throw exceptions::invalid_request_exception(format("Invalid user type literal for {} of type {}", *receiver.name, receiver.type->as_cql3_type()));
throw exceptions::invalid_request_exception(format("Invalid user type literal for {} of type {}", receiver.name, receiver.type->as_cql3_type()));
}
auto ut = static_pointer_cast<const user_type_impl>(receiver.type);
@@ -90,7 +90,7 @@ usertype_constructor_validate_assignable_to(const usertype_constructor& u, data_
const expression& value = u.elements.at(field);
auto&& field_spec = usertype_field_spec_of(receiver, i);
if (!assignment_testable::is_assignable(test_assignment(value, db, keyspace, *field_spec))) {
throw exceptions::invalid_request_exception(format("Invalid user type literal for {}: field {} is not of type {}", *receiver.name, field, field_spec->type->as_cql3_type()));
throw exceptions::invalid_request_exception(format("Invalid user type literal for {}: field {} is not of type {}", receiver.name, field, field_spec->type->as_cql3_type()));
}
}
}
@@ -123,7 +123,7 @@ usertype_constructor_prepare_expression(const usertype_constructor& u, data_dict
auto iraw = u.elements.find(field);
expression raw;
if (iraw == u.elements.end()) {
raw = expr::make_untyped_null();
raw = expr::null();
} else {
raw = iraw->second;
++found_values;
@@ -246,21 +246,6 @@ map_prepare_expression(const collection_constructor& c, data_dictionary::databas
auto key_spec = maps::key_spec_of(*receiver);
auto value_spec = maps::value_spec_of(*receiver);
const map_type_impl* map_type = dynamic_cast<const map_type_impl*>(&receiver->type->without_reversed());
if (map_type == nullptr) {
on_internal_error(expr_logger,
format("map_prepare_expression bad non-map receiver type: {}", receiver->type->name()));
}
data_type map_element_tuple_type = tuple_type_impl::get_instance({map_type->get_keys_type(), map_type->get_values_type()});
// In Cassandra, an empty (unfrozen) map/set/list is equivalent to the column being null. In
// other words a non-frozen collection only exists if it has elements. Return nullptr right
// away to simplify predicate evaluation. See also
// https://issues.apache.org/jira/browse/CASSANDRA-5141
if (map_type->is_multi_cell() && c.elements.empty()) {
return constant::make_null(receiver->type);
}
std::vector<expression> values;
values.reserve(c.elements.size());
bool all_terminal = true;
@@ -279,7 +264,7 @@ map_prepare_expression(const collection_constructor& c, data_dictionary::databas
values.emplace_back(tuple_constructor {
.elements = {std::move(k), std::move(v)},
.type = map_element_tuple_type
.type = entry_tuple.type
});
}
@@ -313,7 +298,7 @@ set_validate_assignable_to(const collection_constructor& c, data_dictionary::dat
return;
}
throw exceptions::invalid_request_exception(format("Invalid set literal for {} of type {}", *receiver.name, receiver.type->as_cql3_type()));
throw exceptions::invalid_request_exception(format("Invalid set literal for {} of type {}", receiver.name, receiver.type->as_cql3_type()));
}
auto&& value_spec = set_value_spec_of(receiver);
@@ -501,18 +486,18 @@ void
tuple_constructor_validate_assignable_to(const tuple_constructor& tc, data_dictionary::database db, const sstring& keyspace, const column_specification& receiver) {
auto tt = dynamic_pointer_cast<const tuple_type_impl>(receiver.type->underlying_type());
if (!tt) {
throw exceptions::invalid_request_exception(format("Invalid tuple type literal for {} of type {}", *receiver.name, receiver.type->as_cql3_type()));
throw exceptions::invalid_request_exception(format("Invalid tuple type literal for {} of type {}", receiver.name, receiver.type->as_cql3_type()));
}
for (size_t i = 0; i < tc.elements.size(); ++i) {
if (i >= tt->size()) {
throw exceptions::invalid_request_exception(format("Invalid tuple literal for {}: too many elements. Type {} expects {:d} but got {:d}",
*receiver.name, tt->as_cql3_type(), tt->size(), tc.elements.size()));
receiver.name, tt->as_cql3_type(), tt->size(), tc.elements.size()));
}
auto&& value = tc.elements[i];
auto&& spec = component_spec_of(receiver, i);
if (!assignment_testable::is_assignable(test_assignment(value, db, keyspace, *spec))) {
throw exceptions::invalid_request_exception(format("Invalid tuple literal for {}: component {:d} is not of type {}", *receiver.name, i, spec->type->as_cql3_type()));
throw exceptions::invalid_request_exception(format("Invalid tuple literal for {}: component {:d} is not of type {}", receiver.name, i, spec->type->as_cql3_type()));
}
}
}
@@ -582,7 +567,6 @@ operator<<(std::ostream&out, untyped_constant::type_class t)
case untyped_constant::type_class::boolean: return out << "BOOLEAN";
case untyped_constant::type_class::hex: return out << "HEX";
case untyped_constant::type_class::duration: return out << "DURATION";
case untyped_constant::type_class::null: return out << "NULL";
}
abort();
}
@@ -610,9 +594,8 @@ static
assignment_testable::test_result
untyped_constant_test_assignment(const untyped_constant& uc, data_dictionary::database db, const sstring& keyspace, const column_specification& receiver)
{
bool uc_is_null = uc.partial_type == untyped_constant::type_class::null;
auto receiver_type = receiver.type->as_cql3_type();
if ((receiver_type.is_collection() || receiver_type.is_user_type()) && !uc_is_null) {
if (receiver_type.is_collection() || receiver_type.is_user_type()) {
return assignment_testable::test_result::NOT_ASSIGNABLE;
}
if (!receiver_type.is_native()) {
@@ -677,10 +660,6 @@ untyped_constant_test_assignment(const untyped_constant& uc, data_dictionary::da
return assignment_testable::test_result::EXACT_MATCH;
}
break;
case untyped_constant::type_class::null:
return receiver.type->is_counter()
? assignment_testable::test_result::NOT_ASSIGNABLE
: assignment_testable::test_result::WEAKLY_ASSIGNABLE;
}
return assignment_testable::test_result::NOT_ASSIGNABLE;
}
@@ -694,18 +673,9 @@ untyped_constant_prepare_expression(const untyped_constant& uc, data_dictionary:
return std::nullopt;
}
if (!is_assignable(untyped_constant_test_assignment(uc, db, keyspace, *receiver))) {
if (uc.partial_type != untyped_constant::type_class::null) {
throw exceptions::invalid_request_exception(format("Invalid {} constant ({}) for \"{}\" of type {}",
uc.partial_type, uc.raw_text, *receiver->name, receiver->type->as_cql3_type().to_string()));
} else {
throw exceptions::invalid_request_exception("Invalid null value for counter increment/decrement");
}
}
if (uc.partial_type == untyped_constant::type_class::null) {
return constant::make_null(receiver->type);
}
raw_value raw_val = cql3::raw_value::make_value(untyped_constant_parsed_value(uc, receiver->type));
return constant(std::move(raw_val), receiver->type);
}
@@ -717,19 +687,38 @@ bind_variable_test_assignment(const bind_variable& bv, data_dictionary::database
}
static
std::optional<bind_variable>
bind_variable
bind_variable_prepare_expression(const bind_variable& bv, data_dictionary::database db, const sstring& keyspace, lw_shared_ptr<column_specification> receiver)
{
if (!receiver) {
return std::nullopt;
}
return bind_variable {
.bind_index = bv.bind_index,
.receiver = receiver
};
}
static
assignment_testable::test_result
null_test_assignment(data_dictionary::database db,
const sstring& keyspace,
const column_specification& receiver) {
return receiver.type->is_counter()
? assignment_testable::test_result::NOT_ASSIGNABLE
: assignment_testable::test_result::WEAKLY_ASSIGNABLE;
}
static
std::optional<expression>
null_prepare_expression(data_dictionary::database db, const sstring& keyspace, lw_shared_ptr<column_specification> receiver) {
if (!receiver) {
// TODO: It is not possible to infer the type of NULL, but perhaps we can have a matcing null_type that can be cast to anything
return std::nullopt;
}
if (!is_assignable(null_test_assignment(db, keyspace, *receiver))) {
throw exceptions::invalid_request_exception("Invalid null value for counter increment/decrement");
}
return constant::make_null(receiver->type);
}
static
sstring
cast_display_name(const cast& c) {
@@ -875,53 +864,6 @@ test_assignment_function_call(const cql3::expr::function_call& fc, data_dictiona
}
}
std::optional<expression> prepare_conjunction(const conjunction& conj,
data_dictionary::database db,
const sstring& keyspace,
const schema* schema_opt,
lw_shared_ptr<column_specification> receiver) {
if (receiver.get() != nullptr && receiver->type->without_reversed().get_kind() != abstract_type::kind::boolean) {
throw exceptions::invalid_request_exception(
format("AND conjunction produces a boolean value, which doesn't match the type: {} of {}",
receiver->type->name(), receiver->name->text()));
}
lw_shared_ptr<column_specification> child_receiver;
if (receiver.get() != nullptr) {
::shared_ptr<column_identifier> child_receiver_name =
::make_shared<column_identifier>(format("AND_element({})", receiver->name->text()), true);
child_receiver = make_lw_shared<column_specification>(receiver->ks_name, receiver->cf_name,
std::move(child_receiver_name), boolean_type);
} else {
::shared_ptr<column_identifier> child_receiver_name =
::make_shared<column_identifier>("AND_element(unknown)", true);
sstring cf_name = schema_opt ? schema_opt->cf_name() : "unknown_cf";
child_receiver = make_lw_shared<column_specification>(keyspace, std::move(cf_name),
std::move(child_receiver_name), boolean_type);
}
std::vector<expression> prepared_children;
bool all_terminal = true;
for (const expression& child : conj.children) {
std::optional<expression> prepared_child =
try_prepare_expression(child, db, keyspace, schema_opt, child_receiver);
if (!prepared_child.has_value()) {
throw exceptions::invalid_request_exception(fmt::format("Could not infer type of {}", child));
}
if (!is<constant>(*prepared_child)) {
all_terminal = false;
}
prepared_children.push_back(std::move(*prepared_child));
}
conjunction result = conjunction{std::move(prepared_children)};
if (all_terminal) {
return constant(evaluate(result, evaluation_inputs{}), boolean_type);
}
return result;
}
std::optional<expression>
try_prepare_expression(const expression& expr, data_dictionary::database db, const sstring& keyspace, const schema* schema_opt, lw_shared_ptr<column_specification> receiver) {
return expr::visit(overloaded_functor{
@@ -931,8 +873,8 @@ try_prepare_expression(const expression& expr, data_dictionary::database db, con
[&] (const binary_operator&) -> std::optional<expression> {
on_internal_error(expr_logger, "binary_operators are not yet reachable via prepare_expression()");
},
[&] (const conjunction& conj) -> std::optional<expression> {
return prepare_conjunction(conj, db, keyspace, schema_opt, receiver);
[&] (const conjunction&) -> std::optional<expression> {
on_internal_error(expr_logger, "conjunctions are not yet reachable via prepare_expression()");
},
[] (const column_value& cv) -> std::optional<expression> {
return cv;
@@ -1003,6 +945,9 @@ try_prepare_expression(const expression& expr, data_dictionary::database db, con
[&] (const field_selection&) -> std::optional<expression> {
on_internal_error(expr_logger, "field_selections are not yet reachable via prepare_expression()");
},
[&] (const null&) -> std::optional<expression> {
return null_prepare_expression(db, keyspace, receiver);
},
[&] (const bind_variable& bv) -> std::optional<expression> {
return bind_variable_prepare_expression(bv, db, keyspace, receiver);
},
@@ -1064,6 +1009,9 @@ test_assignment(const expression& expr, data_dictionary::database db, const sstr
[&] (const field_selection&) -> test_result {
on_internal_error(expr_logger, "field_selections are not yet reachable via test_assignment()");
},
[&] (const null&) -> test_result {
return null_test_assignment(db, keyspace, receiver);
},
[&] (const bind_variable& bv) -> test_result {
return bind_variable_test_assignment(bv, db, keyspace, receiver);
},
@@ -1190,7 +1138,7 @@ static lw_shared_ptr<column_specification> get_lhs_receiver(const expression& pr
// Given type of LHS and the operation finds the expected type of RHS.
// The type will be the same as LHS for simple operations like =, but it will be different for more complex ones like IN or CONTAINS.
static lw_shared_ptr<column_specification> get_rhs_receiver(lw_shared_ptr<column_specification>& lhs_receiver, oper_t oper) {
const data_type lhs_type = lhs_receiver->type->underlying_type();
const data_type& lhs_type = lhs_receiver->type->underlying_type();
if (oper == oper_t::IN) {
data_type rhs_receiver_type = list_type_impl::get_instance(std::move(lhs_type), false);

View File

@@ -144,7 +144,7 @@ void preliminary_binop_vaidation_checks(const binary_operator& binop) {
}
if (binop.op == oper_t::IS_NOT) {
bool rhs_is_null = (is<untyped_constant>(binop.rhs) && as<untyped_constant>(binop.rhs).partial_type == untyped_constant::type_class::null)
bool rhs_is_null = is<null>(binop.rhs)
|| (is<constant>(binop.rhs) && as<constant>(binop.rhs).is_null());
if (!rhs_is_null) {
throw exceptions::invalid_request_exception(format("Unsupported \"IS NOT\" relation: {}", pretty_binop_printer));

View File

@@ -1,30 +0,0 @@
// Copyright (C) 2023-present ScyllaDB
// SPDX-License-Identifier: (AGPL-3.0-or-later and Apache-2.0)
#pragma once
#include <optional>
#include "expression.hh"
namespace cql3 {
class query_options;
}
namespace cql3::expr {
// Some expression users can behave differently if the expression is a bind variable
// and if that bind variable is unset. unset_bind_variable_guard encapsulates the two
// conditions.
class unset_bind_variable_guard {
// Disengaged if the operand is not exactly a single bind variable.
std::optional<bind_variable> _var;
public:
explicit unset_bind_variable_guard(const expr::expression& operand);
explicit unset_bind_variable_guard(std::nullopt_t) {}
explicit unset_bind_variable_guard(const std::optional<expr::expression>& operand);
bool is_unset(const query_options& qo) const;
};
}

View File

@@ -12,7 +12,7 @@
#include "types.hh"
#include "types/tuple.hh"
#include "cql3/functions/scalar_function.hh"
#include "cql3/util.hh"
#include "cql_serialization_format.hh"
#include "utils/big_decimal.hh"
#include "aggregate_fcts.hh"
#include "user_aggregate.hh"
@@ -40,10 +40,10 @@ public:
virtual void reset() override {
_count = 0;
}
virtual opt_bytes compute() override {
virtual opt_bytes compute(cql_serialization_format sf) override {
return long_type->decompose(_count);
}
virtual void add_input(const std::vector<opt_bytes>& values) override {
virtual void add_input(cql_serialization_format sf, const std::vector<opt_bytes>& values) override {
++_count;
}
virtual void set_accumulator(const opt_bytes& acc) override {
@@ -56,7 +56,7 @@ public:
virtual opt_bytes get_accumulator() const override {
return long_type->decompose(_count);
}
virtual void reduce(const opt_bytes& acc) override {
virtual void reduce(cql_serialization_format sf, const opt_bytes& acc) override {
if (acc) {
auto other = value_cast<int64_t>(long_type->deserialize(bytes_view(*acc)));
_count += other;
@@ -189,13 +189,13 @@ public:
virtual void reset() override {
_acc = _initcond;
}
virtual opt_bytes compute() override {
return _finalfunc ? _finalfunc->execute(std::vector<bytes_opt>{_acc}) : _acc;
virtual opt_bytes compute(cql_serialization_format sf) override {
return _finalfunc ? _finalfunc->execute(sf, std::vector<bytes_opt>{_acc}) : _acc;
}
virtual void add_input(const std::vector<opt_bytes>& values) override {
virtual void add_input(cql_serialization_format sf, const std::vector<opt_bytes>& values) override {
std::vector<bytes_opt> args{_acc};
args.insert(args.end(), values.begin(), values.end());
_acc = _sfunc->execute(args);
_acc = _sfunc->execute(sf, args);
}
virtual void set_accumulator(const opt_bytes& acc) override {
_acc = acc;
@@ -203,9 +203,9 @@ public:
virtual opt_bytes get_accumulator() const override {
return _acc;
}
virtual void reduce(const opt_bytes& acc) override {
virtual void reduce(cql_serialization_format sf, const opt_bytes& acc) override {
std::vector<bytes_opt> args{_acc, acc};
_acc = _rfunc->execute(args);
_acc = _rfunc->execute(sf, args);
}
};
@@ -218,10 +218,10 @@ public:
virtual void reset() override {
_sum = {};
}
virtual opt_bytes compute() override {
virtual opt_bytes compute(cql_serialization_format sf) override {
return data_type_for<Type>()->decompose(accumulator_for<Type>::narrow(_sum));
}
virtual void add_input(const std::vector<opt_bytes>& values) override {
virtual void add_input(cql_serialization_format sf, const std::vector<opt_bytes>& values) override {
if (!values[0]) {
return;
}
@@ -237,7 +237,7 @@ public:
virtual opt_bytes get_accumulator() const override {
return accumulator_for<Type>::decompose(_sum);
}
virtual void reduce(const opt_bytes& acc) override {
virtual void reduce(cql_serialization_format sf, const opt_bytes& acc) override {
if (acc) {
auto other = accumulator_for<Type>::deserialize(acc);
_sum += other;
@@ -248,7 +248,7 @@ public:
template <typename Type>
class impl_reducible_sum_function final : public impl_sum_function_for<Type> {
public:
virtual bytes_opt compute() override {
virtual bytes_opt compute(cql_serialization_format sf) override {
return this->get_accumulator();
}
};
@@ -316,14 +316,14 @@ public:
_sum = {};
_count = 0;
}
virtual opt_bytes compute() override {
virtual opt_bytes compute(cql_serialization_format sf) override {
Type ret{};
if (_count) {
ret = impl_div_for_avg<Type>::div(_sum, _count);
}
return data_type_for<Type>()->decompose(ret);
}
virtual void add_input(const std::vector<opt_bytes>& values) override {
virtual void add_input(cql_serialization_format sf, const std::vector<opt_bytes>& values) override {
if (!values[0]) {
return;
}
@@ -348,7 +348,7 @@ public:
);
return tuple_val.serialize();
}
virtual void reduce(const opt_bytes& acc) override {
virtual void reduce(cql_serialization_format sf, const opt_bytes& acc) override {
if (acc) {
data_type tuple_type = tuple_type_impl::get_instance({accumulator_for<Type>::data_type(), long_type});
auto tuple = value_cast<tuple_type_impl::native_type>(tuple_type->deserialize(bytes_view(*acc)));
@@ -362,7 +362,7 @@ public:
template <typename Type>
class impl_reducible_avg_function : public impl_avg_function_for<Type> {
public:
virtual bytes_opt compute() override {
virtual bytes_opt compute(cql_serialization_format sf) override {
return this->get_accumulator();
}
};
@@ -457,13 +457,13 @@ public:
virtual void reset() override {
_max = {};
}
virtual opt_bytes compute() override {
virtual opt_bytes compute(cql_serialization_format sf) override {
if (!_max) {
return {};
}
return data_type_for<Type>()->decompose(data_value(Type{*_max}));
}
virtual void add_input(const std::vector<opt_bytes>& values) override {
virtual void add_input(cql_serialization_format sf, const std::vector<opt_bytes>& values) override {
if (!values[0]) {
return;
}
@@ -487,8 +487,8 @@ public:
}
return {};
}
virtual void reduce(const opt_bytes& acc) override {
return add_input({acc});
virtual void reduce(cql_serialization_format sf, const opt_bytes& acc) override {
return add_input(sf, {acc});
}
};
@@ -502,10 +502,10 @@ public:
virtual void reset() override {
_max = {};
}
virtual opt_bytes compute() override {
virtual opt_bytes compute(cql_serialization_format sf) override {
return _max.value_or(bytes{});
}
virtual void add_input(const std::vector<opt_bytes>& values) override {
virtual void add_input(cql_serialization_format sf, const std::vector<opt_bytes>& values) override {
if (values.empty() || !values[0]) {
return;
}
@@ -519,11 +519,11 @@ public:
virtual opt_bytes get_accumulator() const override {
return _max;
}
virtual void reduce(const opt_bytes& acc) override {
virtual void reduce(cql_serialization_format sf, const opt_bytes& acc) override {
if (acc && !acc->length()) {
return;
}
return add_input({acc});
return add_input(sf, {acc});
}
};
@@ -598,13 +598,13 @@ public:
virtual void reset() override {
_min = {};
}
virtual opt_bytes compute() override {
virtual opt_bytes compute(cql_serialization_format sf) override {
if (!_min) {
return {};
}
return data_type_for<Type>()->decompose(data_value(Type{*_min}));
}
virtual void add_input(const std::vector<opt_bytes>& values) override {
virtual void add_input(cql_serialization_format sf, const std::vector<opt_bytes>& values) override {
if (!values[0]) {
return;
}
@@ -628,8 +628,8 @@ public:
}
return {};
}
virtual void reduce(const opt_bytes& acc) override {
return add_input({acc});
virtual void reduce(cql_serialization_format sf, const opt_bytes& acc) override {
return add_input(sf, {acc});
}
};
@@ -643,10 +643,10 @@ public:
virtual void reset() override {
_min = {};
}
virtual opt_bytes compute() override {
virtual opt_bytes compute(cql_serialization_format sf) override {
return _min.value_or(bytes{});
}
virtual void add_input(const std::vector<opt_bytes>& values) override {
virtual void add_input(cql_serialization_format sf, const std::vector<opt_bytes>& values) override {
if (values.empty() || !values[0]) {
return;
}
@@ -660,11 +660,11 @@ public:
virtual opt_bytes get_accumulator() const override {
return _min;
}
virtual void reduce(const opt_bytes& acc) override {
virtual void reduce(cql_serialization_format sf, const opt_bytes& acc) override {
if (acc && !acc->length()) {
return;
}
return add_input({acc});
return add_input(sf, {acc});
}
};
@@ -720,10 +720,10 @@ public:
virtual void reset() override {
_count = 0;
}
virtual opt_bytes compute() override {
virtual opt_bytes compute(cql_serialization_format sf) override {
return long_type->decompose(_count);
}
virtual void add_input(const std::vector<opt_bytes>& values) override {
virtual void add_input(cql_serialization_format sf, const std::vector<opt_bytes>& values) override {
if (!values[0]) {
return;
}
@@ -739,7 +739,7 @@ public:
virtual opt_bytes get_accumulator() const override {
return long_type->decompose(_count);
}
virtual void reduce(const opt_bytes& acc) override {
virtual void reduce(cql_serialization_format sf, const opt_bytes& acc) override {
if (acc) {
auto other = value_cast<int64_t>(long_type->deserialize(bytes_view(*acc)));
_count += other;
@@ -814,35 +814,6 @@ bool user_aggregate::is_reducible() const { return _reducefunc != nullptr; }
bool user_aggregate::requires_thread() const { return _sfunc->requires_thread() || (_finalfunc && _finalfunc->requires_thread()); }
bool user_aggregate::has_finalfunc() const { return _finalfunc != nullptr; }
std::ostream& user_aggregate::describe(std::ostream& os) const {
auto ks = cql3::util::maybe_quote(name().keyspace);
auto na = cql3::util::maybe_quote(name().name);
os << "CREATE AGGREGATE " << ks << "." << na << "(";
for (size_t i = 0; i < _arg_types.size(); i++) {
if (i > 0) {
os << ", ";
}
os << _arg_types[i]->cql3_type_name();
}
os << ")\n";
os << "SFUNC " << cql3::util::maybe_quote(_sfunc->name().name) << "\n"
<< "STYPE " << _sfunc->return_type()->cql3_type_name();
if (is_reducible()) {
os << "\n" << "REDUCEFUNC " << cql3::util::maybe_quote(_reducefunc->name().name);
}
if (has_finalfunc()) {
os << "\n" << "FINALFUNC " << cql3::util::maybe_quote(_finalfunc->name().name);
}
if (_initcond) {
os << "\n" << "INITCOND " << _sfunc->return_type()->deserialize(bytes_view(*_initcond)).to_parsable_string();
}
os << ";";
return os;
}
shared_ptr<aggregate_function>
aggregate_fcts::make_count_rows_function() {
return make_shared<count_rows_function>();

View File

@@ -18,6 +18,7 @@
#include "bytes_ostream.hh"
#include "types.hh"
#include "cql_serialization_format.hh"
#include <boost/algorithm/cxx11/any_of.hpp>
@@ -46,7 +47,7 @@ public:
virtual bool requires_thread() const override;
virtual bytes_opt execute(const std::vector<bytes_opt>& parameters) override {
virtual bytes_opt execute(cql_serialization_format sf, const std::vector<bytes_opt>& parameters) override {
bytes_ostream encoded_row;
encoded_row.write("{", 1);
for (size_t i = 0; i < _selector_names.size(); ++i) {

View File

@@ -14,6 +14,7 @@
#include "exceptions/exceptions.hh"
#include <seastar/core/print.hh>
#include "cql3/cql3_type.hh"
#include "cql_serialization_format.hh"
namespace cql3 {
@@ -27,7 +28,7 @@ shared_ptr<function>
make_to_blob_function(data_type from_type) {
auto name = from_type->as_cql3_type().to_string() + "asblob";
return make_native_scalar_function<true>(name, bytes_type, { from_type },
[] (const std::vector<bytes_opt>& parameters) {
[] (cql_serialization_format sf, const std::vector<bytes_opt>& parameters) {
return parameters[0];
});
}
@@ -37,13 +38,13 @@ shared_ptr<function>
make_from_blob_function(data_type to_type) {
sstring name = sstring("blobas") + to_type->as_cql3_type().to_string();
return make_native_scalar_function<true>(name, to_type, { bytes_type },
[name, to_type] (const std::vector<bytes_opt>& parameters) -> bytes_opt {
[name, to_type] (cql_serialization_format sf, const std::vector<bytes_opt>& parameters) -> bytes_opt {
auto&& val = parameters[0];
if (!val) {
return val;
}
try {
to_type->validate(*val);
to_type->validate(*val, sf);
return val;
} catch (marshal_exception& e) {
using namespace exceptions;
@@ -57,7 +58,7 @@ inline
shared_ptr<function>
make_varchar_as_blob_fct() {
return make_native_scalar_function<true>("varcharasblob", bytes_type, { utf8_type },
[] (const std::vector<bytes_opt>& parameters) -> bytes_opt {
[] (cql_serialization_format sf, const std::vector<bytes_opt>& parameters) -> bytes_opt {
return parameters[0];
});
}
@@ -66,7 +67,7 @@ inline
shared_ptr<function>
make_blob_as_varchar_fct() {
return make_native_scalar_function<true>("blobasvarchar", utf8_type, { bytes_type },
[] (const std::vector<bytes_opt>& parameters) -> bytes_opt {
[] (cql_serialization_format sf, const std::vector<bytes_opt>& parameters) -> bytes_opt {
return parameters[0];
});
}

View File

@@ -35,7 +35,7 @@ public:
virtual void print(std::ostream& os) const override {
os << "cast(" << _arg_types[0]->name() << " as " << _return_type->name() << ")";
}
virtual bytes_opt execute(const std::vector<bytes_opt>& parameters) override {
virtual bytes_opt execute(cql_serialization_format sf, const std::vector<bytes_opt>& parameters) override {
auto from_type = arg_types()[0];
auto to_type = return_type();
@@ -165,6 +165,8 @@ static data_value castas_fctn_from_dv_to_string(data_value from) {
return from.type()->to_string_impl(from);
}
// FIXME: Add conversions for counters, after they are fully implemented...
static constexpr unsigned next_power_of_2(unsigned val) {
unsigned ret = 1;
while (ret <= val) {
@@ -368,26 +370,6 @@ castas_fctn get_castas_fctn(data_type to_type, data_type from_type) {
return castas_fctn_from_dv_to_string;
case cast_switch_case_val(kind::utf8, kind::ascii):
return castas_fctn_simple<sstring, sstring>;
case cast_switch_case_val(kind::byte, kind::counter):
return castas_fctn_simple<int8_t, int64_t>;
case cast_switch_case_val(kind::short_kind, kind::counter):
return castas_fctn_simple<int16_t, int64_t>;
case cast_switch_case_val(kind::int32, kind::counter):
return castas_fctn_simple<int32_t, int64_t>;
case cast_switch_case_val(kind::long_kind, kind::counter):
return castas_fctn_simple<int64_t, int64_t>;
case cast_switch_case_val(kind::float_kind, kind::counter):
return castas_fctn_simple<float, int64_t>;
case cast_switch_case_val(kind::double_kind, kind::counter):
return castas_fctn_simple<double, int64_t>;
case cast_switch_case_val(kind::varint, kind::counter):
return castas_fctn_simple<utils::multiprecision_int, int64_t>;
case cast_switch_case_val(kind::decimal, kind::counter):
return castas_fctn_from_integer_to_decimal<int64_t>;
case cast_switch_case_val(kind::ascii, kind::counter):
case cast_switch_case_val(kind::utf8, kind::counter):
return castas_fctn_to_string<int64_t>;
}
throw exceptions::invalid_request_exception(format("{} cannot be cast to {}", from_type->name(), to_type->name()));
}

View File

@@ -40,8 +40,8 @@ public:
return Pure;
}
bytes_opt execute(const std::vector<bytes_opt>& parameters) override {
return _func(parameters);
bytes_opt execute(cql_serialization_format sf, const std::vector<bytes_opt>& parameters) override {
return _func(sf, parameters);
}
};
@@ -61,7 +61,7 @@ make_failure_injection_function(sstring name,
shared_ptr<function> make_enable_injection_function() {
return make_failure_injection_function<false>("enable_injection", empty_type, { ascii_type, ascii_type },
[] (const std::vector<bytes_opt>& parameters) {
[] (cql_serialization_format, const std::vector<bytes_opt>& parameters) {
sstring injection_name = ascii_type->get_string(parameters[0].value());
const bool one_shot = ascii_type->get_string(parameters[1].value()) == "true";
smp::invoke_on_all([injection_name, one_shot] () mutable {
@@ -73,7 +73,7 @@ shared_ptr<function> make_enable_injection_function() {
shared_ptr<function> make_disable_injection_function() {
return make_failure_injection_function<false>("disable_injection", empty_type, { ascii_type },
[] (const std::vector<bytes_opt>& parameters) {
[] (cql_serialization_format, const std::vector<bytes_opt>& parameters) {
sstring injection_name = ascii_type->get_string(parameters[0].value());
smp::invoke_on_all([injection_name] () mutable {
utils::get_local_injector().disable(injection_name);
@@ -85,7 +85,7 @@ shared_ptr<function> make_disable_injection_function() {
shared_ptr<function> make_enabled_injections_function() {
const auto list_type_inst = list_type_impl::get_instance(ascii_type, false);
return make_failure_injection_function<true>("enabled_injections", list_type_inst, {},
[list_type_inst] (const std::vector<bytes_opt>&) -> bytes {
[list_type_inst] (cql_serialization_format, const std::vector<bytes_opt>&) -> bytes {
return seastar::map_reduce(smp::all_cpus(), [] (unsigned) {
return make_ready_future<std::vector<sstring>>(utils::get_local_injector().enabled_injections());
}, std::vector<data_value>(),

View File

@@ -13,10 +13,7 @@
#include "cql3/lists.hh"
#include "cql3/constants.hh"
#include "cql3/user_types.hh"
#include "cql3/ut_name.hh"
#include "cql3/type_json.hh"
#include "cql3/functions/user_function.hh"
#include "cql3/functions/user_aggregate.hh"
#include "data_dictionary/data_dictionary.hh"
#include "types/map.hh"
#include "types/set.hh"
@@ -27,7 +24,6 @@
#include "cql3/prepare_context.hh"
#include "user_aggregate.hh"
#include "cql3/expr/expression.hh"
#include <boost/range/adaptor/transformed.hpp>
#include <boost/range/adaptors.hpp>
#include "error_injection_fcts.hh"
@@ -53,13 +49,6 @@ bool abstract_function::requires_thread() const { return false; }
bool as_json_function::requires_thread() const { return false; }
static bool same_signature(const shared_ptr<function>& f1, const shared_ptr<function>& f2) {
if (f1 == nullptr || f2 == nullptr) {
return false;
}
return f1->name() == f2->name() && f1->arg_types() == f2->arg_types();
}
thread_local std::unordered_multimap<function_name, shared_ptr<function>> functions::_declared = init();
void functions::clear_functions() noexcept {
@@ -105,6 +94,11 @@ functions::init() noexcept {
if (type == cql3_type::blob) {
continue;
}
// counters are not supported yet
if (type.is_counter()) {
warn(unimplemented::cause::COUNTERS);
continue;
}
declare(make_to_blob_function(type.get_type()));
declare(make_from_blob_function(type.get_type()));
@@ -146,56 +140,22 @@ void functions::replace_function(shared_ptr<function> func) {
with_udf_iter(func->name(), func->arg_types(), [func] (functions::declared_t::iterator i) {
i->second = std::move(func);
});
auto scalar_func = dynamic_pointer_cast<scalar_function>(func);
if (!scalar_func) {
return;
}
for (auto& fit : _declared) {
auto aggregate = dynamic_pointer_cast<user_aggregate>(fit.second);
if (aggregate && (same_signature(aggregate->sfunc(), scalar_func)
|| (same_signature(aggregate->finalfunc(), scalar_func))
|| (same_signature(aggregate->reducefunc(), scalar_func))))
{
// we need to replace at least one underlying function
shared_ptr<scalar_function> sfunc = same_signature(aggregate->sfunc(), scalar_func) ? scalar_func : aggregate->sfunc();
shared_ptr<scalar_function> finalfunc = same_signature(aggregate->finalfunc(), scalar_func) ? scalar_func : aggregate->finalfunc();
shared_ptr<scalar_function> reducefunc = same_signature(aggregate->reducefunc(), scalar_func) ? scalar_func : aggregate->reducefunc();
fit.second = ::make_shared<user_aggregate>(aggregate->name(), aggregate->initcond(), sfunc, reducefunc, finalfunc);
}
}
}
void functions::remove_function(const function_name& name, const std::vector<data_type>& arg_types) {
with_udf_iter(name, arg_types, [] (functions::declared_t::iterator i) { _declared.erase(i); });
}
std::optional<function_name> functions::used_by_user_aggregate(shared_ptr<user_function> func) {
std::optional<function_name> functions::used_by_user_aggregate(const function_name& name) {
for (const shared_ptr<function>& fptr : _declared | boost::adaptors::map_values) {
auto aggregate = dynamic_pointer_cast<user_aggregate>(fptr);
if (aggregate && (same_signature(aggregate->sfunc(), func)
|| (same_signature(aggregate->finalfunc(), func))
|| (same_signature(aggregate->reducefunc(), func))))
{
if (aggregate && (aggregate->sfunc().name() == name || (aggregate->has_finalfunc() && aggregate->finalfunc().name() == name))) {
return aggregate->name();
}
}
return {};
}
std::optional<function_name> functions::used_by_user_function(const ut_name& user_type) {
for (const shared_ptr<function>& fptr : _declared | boost::adaptors::map_values) {
for (auto& arg_type : fptr->arg_types()) {
if (arg_type->references_user_type(user_type.get_keyspace(), user_type.get_user_type_name())) {
return fptr->name();
}
}
if (fptr->return_type()->references_user_type(user_type.get_keyspace(), user_type.get_user_type_name())) {
return fptr->name();
}
}
return {};
}
lw_shared_ptr<column_specification>
functions::make_arg_spec(const sstring& receiver_ks, const sstring& receiver_cf,
const function& fun, size_t i) {
@@ -211,7 +171,7 @@ inline
shared_ptr<function>
make_to_json_function(data_type t) {
return make_native_scalar_function<true>("tojson", utf8_type, {t},
[t](const std::vector<bytes_opt>& parameters) -> bytes_opt {
[t](cql_serialization_format sf, const std::vector<bytes_opt>& parameters) -> bytes_opt {
return utf8_type->decompose(to_json_string(*t, parameters[0]));
});
}
@@ -220,12 +180,12 @@ inline
shared_ptr<function>
make_from_json_function(data_dictionary::database db, const sstring& keyspace, data_type t) {
return make_native_scalar_function<true>("fromjson", t, {utf8_type},
[&db, keyspace, t](const std::vector<bytes_opt>& parameters) -> bytes_opt {
[&db, keyspace, t](cql_serialization_format sf, const std::vector<bytes_opt>& parameters) -> bytes_opt {
try {
rjson::value json_value = rjson::parse(utf8_type->to_string(parameters[0].value()));
bytes_opt parsed_json_value;
if (!json_value.IsNull()) {
parsed_json_value.emplace(from_json_object(*t, json_value));
parsed_json_value.emplace(from_json_object(*t, json_value, sf));
}
return parsed_json_value;
} catch(rjson::error& e) {
@@ -422,32 +382,6 @@ functions::get(data_dictionary::database db,
return std::move(compatibles[0]);
}
template<typename F>
std::vector<shared_ptr<F>> functions::get_filtered_transformed(const sstring& keyspace) {
auto filter = [&] (const std::pair<const function_name, shared_ptr<function>>& d) -> bool {
return d.first.keyspace == keyspace && dynamic_cast<F*>(d.second.get());
};
auto transformer = [] (const std::pair<const function_name, shared_ptr<function>>& d) -> shared_ptr<F> {
return dynamic_pointer_cast<F>(d.second);
};
return boost::copy_range<std::vector<shared_ptr<F>>>(
_declared
| boost::adaptors::filtered(filter)
| boost::adaptors::transformed(transformer)
);
}
std::vector<shared_ptr<user_function>>
functions::get_user_functions(const sstring& keyspace) {
return get_filtered_transformed<user_function>(keyspace);
}
std::vector<shared_ptr<user_aggregate>>
functions::get_user_aggregates(const sstring& keyspace) {
return get_filtered_transformed<user_aggregate>(keyspace);
}
boost::iterator_range<functions::declared_t::iterator>
functions::find(const function_name& name) {
assert(name.has_keyspace()); // : "function name not fully qualified";

View File

@@ -27,10 +27,6 @@
namespace cql3 {
namespace functions {
//forward declarations
class user_function;
class user_aggregate;
using declared_t = std::unordered_multimap<function_name, shared_ptr<function>>;
void add_agg_functions(declared_t& funcs);
@@ -61,8 +57,6 @@ public:
const std::vector<shared_ptr<assignment_testable>> args(std::begin(provided_args), std::end(provided_args));
return get(db, keyspace, name, args, receiver_ks, receiver_cf, receiver);
}
static std::vector<shared_ptr<user_function>> get_user_functions(const sstring& keyspace);
static std::vector<shared_ptr<user_aggregate>> get_user_aggregates(const sstring& keyspace);
static boost::iterator_range<declared_t::iterator> find(const function_name& name);
static declared_t::iterator find_iter(const function_name& name, const std::vector<data_type>& arg_types);
static shared_ptr<function> find(const function_name& name, const std::vector<data_type>& arg_types);
@@ -71,15 +65,11 @@ public:
static void add_function(shared_ptr<function>);
static void replace_function(shared_ptr<function>);
static void remove_function(const function_name& name, const std::vector<data_type>& arg_types);
static std::optional<function_name> used_by_user_aggregate(shared_ptr<user_function>);
static std::optional<function_name> used_by_user_function(const ut_name& user_type);
static std::optional<function_name> used_by_user_aggregate(const function_name& name);
private:
template <typename F>
static void with_udf_iter(const function_name& name, const std::vector<data_type>& arg_types, F&& f);
template <typename F>
static std::vector<shared_ptr<F>> get_filtered_transformed(const sstring& keyspace);
// This method and matchArguments are somewhat duplicate, but this method allows us to provide more precise errors in the common
// case where there is no override for a given function. This is thus probably worth the minor code duplication.
static void validate_types(data_dictionary::database db,

View File

@@ -12,6 +12,7 @@
#include "native_function.hh"
#include "scalar_function.hh"
#include "cql_serialization_format.hh"
#include "log.hh"
#include <seastar/core/shared_ptr.hh>
@@ -47,9 +48,9 @@ public:
virtual bool is_pure() const override {
return Pure;
}
virtual bytes_opt execute(const std::vector<bytes_opt>& parameters) override {
virtual bytes_opt execute(cql_serialization_format sf, const std::vector<bytes_opt>& parameters) override {
try {
return _func(parameters);
return _func(sf, parameters);
} catch(exceptions::cassandra_exception&) {
// If the function's code took the time to produce an official
// cassandra_exception, pass it through. Otherwise, below we will

View File

@@ -23,11 +23,12 @@ public:
/**
* Applies this function to the specified parameter.
*
* @param protocolVersion protocol version used for parameters and return value
* @param parameters the input parameters
* @return the result of applying this function to the parameter
* @throws InvalidRequestException if this function cannot not be applied to the parameter
*/
virtual bytes_opt execute(const std::vector<bytes_opt>& parameters) = 0;
virtual bytes_opt execute(cql_serialization_format sf, const std::vector<bytes_opt>& parameters) = 0;
};

View File

@@ -24,7 +24,7 @@ inline
shared_ptr<function>
make_now_fct() {
return make_native_scalar_function<false>("now", timeuuid_type, {},
[] (const std::vector<bytes_opt>& values) -> bytes_opt {
[] (cql_serialization_format sf, const std::vector<bytes_opt>& values) -> bytes_opt {
return {to_bytes(utils::UUID_gen::get_time_UUID())};
});
}
@@ -42,7 +42,7 @@ inline
shared_ptr<function>
make_min_timeuuid_fct() {
return make_native_scalar_function<true>("mintimeuuid", timeuuid_type, { timestamp_type },
[] (const std::vector<bytes_opt>& values) -> bytes_opt {
[] (cql_serialization_format sf, const std::vector<bytes_opt>& values) -> bytes_opt {
auto& bb = values[0];
if (!bb) {
return {};
@@ -60,7 +60,7 @@ inline
shared_ptr<function>
make_max_timeuuid_fct() {
return make_native_scalar_function<true>("maxtimeuuid", timeuuid_type, { timestamp_type },
[] (const std::vector<bytes_opt>& values) -> bytes_opt {
[] (cql_serialization_format sf, const std::vector<bytes_opt>& values) -> bytes_opt {
auto& bb = values[0];
if (!bb) {
return {};
@@ -89,7 +89,7 @@ inline
shared_ptr<function>
make_date_of_fct() {
return make_native_scalar_function<true>("dateof", timestamp_type, { timeuuid_type },
[] (const std::vector<bytes_opt>& values) -> bytes_opt {
[] (cql_serialization_format sf, const std::vector<bytes_opt>& values) -> bytes_opt {
using namespace utils;
auto& bb = values[0];
if (!bb) {
@@ -104,7 +104,7 @@ inline
shared_ptr<function>
make_unix_timestamp_of_fct() {
return make_native_scalar_function<true>("unixtimestampof", long_type, { timeuuid_type },
[] (const std::vector<bytes_opt>& values) -> bytes_opt {
[] (cql_serialization_format sf, const std::vector<bytes_opt>& values) -> bytes_opt {
using namespace utils;
auto& bb = values[0];
if (!bb) {
@@ -117,7 +117,7 @@ make_unix_timestamp_of_fct() {
inline shared_ptr<function>
make_currenttimestamp_fct() {
return make_native_scalar_function<false>("currenttimestamp", timestamp_type, {},
[] (const std::vector<bytes_opt>& values) -> bytes_opt {
[] (cql_serialization_format sf, const std::vector<bytes_opt>& values) -> bytes_opt {
return {timestamp_type->decompose(db_clock::now())};
});
}
@@ -125,7 +125,7 @@ make_currenttimestamp_fct() {
inline shared_ptr<function>
make_currenttime_fct() {
return make_native_scalar_function<false>("currenttime", time_type, {},
[] (const std::vector<bytes_opt>& values) -> bytes_opt {
[] (cql_serialization_format sf, const std::vector<bytes_opt>& values) -> bytes_opt {
constexpr int64_t milliseconds_in_day = 3600 * 24 * 1000;
int64_t milliseconds_since_epoch = std::chrono::duration_cast<std::chrono::milliseconds>(db_clock::now().time_since_epoch()).count();
int64_t nanoseconds_today = (milliseconds_since_epoch % milliseconds_in_day) * 1000 * 1000;
@@ -136,7 +136,7 @@ make_currenttime_fct() {
inline shared_ptr<function>
make_currentdate_fct() {
return make_native_scalar_function<false>("currentdate", simple_date_type, {},
[] (const std::vector<bytes_opt>& values) -> bytes_opt {
[] (cql_serialization_format sf, const std::vector<bytes_opt>& values) -> bytes_opt {
auto to_simple_date = get_castas_fctn(simple_date_type, timestamp_type);
return {simple_date_type->decompose(to_simple_date(db_clock::now()))};
});
@@ -146,7 +146,7 @@ inline
shared_ptr<function>
make_currenttimeuuid_fct() {
return make_native_scalar_function<false>("currenttimeuuid", timeuuid_type, {},
[] (const std::vector<bytes_opt>& values) -> bytes_opt {
[] (cql_serialization_format sf, const std::vector<bytes_opt>& values) -> bytes_opt {
return {timeuuid_type->decompose(timeuuid_native_type{utils::UUID_gen::get_time_UUID()})};
});
}
@@ -155,7 +155,7 @@ inline
shared_ptr<function>
make_timeuuidtodate_fct() {
return make_native_scalar_function<true>("todate", simple_date_type, { timeuuid_type },
[] (const std::vector<bytes_opt>& values) -> bytes_opt {
[] (cql_serialization_format sf, const std::vector<bytes_opt>& values) -> bytes_opt {
using namespace utils;
auto& bb = values[0];
if (!bb) {
@@ -171,7 +171,7 @@ inline
shared_ptr<function>
make_timestamptodate_fct() {
return make_native_scalar_function<true>("todate", simple_date_type, { timestamp_type },
[] (const std::vector<bytes_opt>& values) -> bytes_opt {
[] (cql_serialization_format sf, const std::vector<bytes_opt>& values) -> bytes_opt {
using namespace utils;
auto& bb = values[0];
if (!bb) {
@@ -190,7 +190,7 @@ inline
shared_ptr<function>
make_timeuuidtotimestamp_fct() {
return make_native_scalar_function<true>("totimestamp", timestamp_type, { timeuuid_type },
[] (const std::vector<bytes_opt>& values) -> bytes_opt {
[] (cql_serialization_format sf, const std::vector<bytes_opt>& values) -> bytes_opt {
using namespace utils;
auto& bb = values[0];
if (!bb) {
@@ -205,7 +205,7 @@ inline
shared_ptr<function>
make_datetotimestamp_fct() {
return make_native_scalar_function<true>("totimestamp", timestamp_type, { simple_date_type },
[] (const std::vector<bytes_opt>& values) -> bytes_opt {
[] (cql_serialization_format sf, const std::vector<bytes_opt>& values) -> bytes_opt {
using namespace utils;
auto& bb = values[0];
if (!bb) {
@@ -224,7 +224,7 @@ inline
shared_ptr<function>
make_timeuuidtounixtimestamp_fct() {
return make_native_scalar_function<true>("tounixtimestamp", long_type, { timeuuid_type },
[] (const std::vector<bytes_opt>& values) -> bytes_opt {
[] (cql_serialization_format sf, const std::vector<bytes_opt>& values) -> bytes_opt {
using namespace utils;
auto& bb = values[0];
if (!bb) {
@@ -242,7 +242,7 @@ inline
shared_ptr<function>
make_timestamptounixtimestamp_fct() {
return make_native_scalar_function<true>("tounixtimestamp", long_type, { timestamp_type },
[] (const std::vector<bytes_opt>& values) -> bytes_opt {
[] (cql_serialization_format sf, const std::vector<bytes_opt>& values) -> bytes_opt {
using namespace utils;
auto& bb = values[0];
if (!bb) {
@@ -260,7 +260,7 @@ inline
shared_ptr<function>
make_datetounixtimestamp_fct() {
return make_native_scalar_function<true>("tounixtimestamp", long_type, { simple_date_type },
[] (const std::vector<bytes_opt>& values) -> bytes_opt {
[] (cql_serialization_format sf, const std::vector<bytes_opt>& values) -> bytes_opt {
using namespace utils;
auto& bb = values[0];
if (!bb) {

View File

@@ -31,7 +31,7 @@ public:
, _schema(s) {
}
bytes_opt execute(const std::vector<bytes_opt>& parameters) override {
bytes_opt execute(cql_serialization_format sf, const std::vector<bytes_opt>& parameters) override {
if (std::any_of(parameters.cbegin(), parameters.cend(), [](const auto& param){ return !param; })) {
return std::nullopt;
}

View File

@@ -11,12 +11,11 @@
#include "abstract_function.hh"
#include "scalar_function.hh"
#include "aggregate_function.hh"
#include "data_dictionary/keyspace_element.hh"
namespace cql3 {
namespace functions {
class user_aggregate : public abstract_function, public aggregate_function, public data_dictionary::keyspace_element {
class user_aggregate : public abstract_function, public aggregate_function{
bytes_opt _initcond;
::shared_ptr<scalar_function> _sfunc;
::shared_ptr<scalar_function> _reducefunc;
@@ -32,19 +31,14 @@ public:
virtual bool requires_thread() const override;
bool has_finalfunc() const;
virtual sstring keypace_name() const override { return name().keyspace; }
virtual sstring element_name() const override { return name().name; }
virtual sstring element_type() const override { return "aggregate"; }
virtual std::ostream& describe(std::ostream& os) const override;
seastar::shared_ptr<scalar_function> sfunc() const {
return _sfunc;
const scalar_function& sfunc() const {
return *_sfunc;
}
seastar::shared_ptr<scalar_function> reducefunc() const {
return _reducefunc;
const scalar_function& reducefunc() const {
return *_reducefunc;
}
seastar::shared_ptr<scalar_function> finalfunc() const {
return _finalfunc;
const scalar_function& finalfunc() const {
return *_finalfunc;
}
const bytes_opt& initcond() const {
return _initcond;

View File

@@ -7,8 +7,8 @@
*/
#include "user_function.hh"
#include "cql3/util.hh"
#include "log.hh"
#include "cql_serialization_format.hh"
#include "lang/wasm.hh"
#include <seastar/core/thread.hh>
@@ -32,7 +32,7 @@ bool user_function::is_aggregate() const { return false; }
bool user_function::requires_thread() const { return true; }
bytes_opt user_function::execute(const std::vector<bytes_opt>& parameters) {
bytes_opt user_function::execute(cql_serialization_format sf, const std::vector<bytes_opt>& parameters) {
const auto& types = arg_types();
if (parameters.size() != types.size()) {
throw std::logic_error("Wrong number of parameters");
@@ -66,33 +66,5 @@ bytes_opt user_function::execute(const std::vector<bytes_opt>& parameters) {
});
}
std::ostream& user_function::describe(std::ostream& os) const {
auto ks = cql3::util::maybe_quote(name().keyspace);
auto na = cql3::util::maybe_quote(name().name);
os << "CREATE FUNCTION " << ks << "." << na << "(";
for (size_t i = 0; i < _arg_names.size(); i++) {
if (i > 0) {
os << ", ";
}
os << _arg_names[i] << " " << _arg_types[i]->cql3_type_name();
}
os << ")\n";
if (_called_on_null_input) {
os << "CALLED";
} else {
os << "RETURNS NULL";
}
os << " ON NULL INPUT\n"
<< "RETURNS " << _return_type->cql3_type_name() << "\n"
<< "LANGUAGE " << _language << "\n"
<< "AS $$\n"
<< _body << "\n"
<< "$$;";
return os;
}
}
}

View File

@@ -14,19 +14,18 @@
#include "scalar_function.hh"
#include "lang/lua.hh"
#include "lang/wasm.hh"
#include "data_dictionary/keyspace_element.hh"
namespace cql3 {
namespace functions {
class user_function final : public abstract_function, public scalar_function, public data_dictionary::keyspace_element {
class user_function final : public abstract_function, public scalar_function {
public:
struct lua_context {
sstring bitcode;
// FIXME: We should not need a copy in each function. It is here
// because user_function::execute is only passed the
// the runtime arguments. We could
// cql_serialization_format and the runtime arguments. We could
// avoid it by having a runtime->execute(user_function) instead,
// but that is a large refactoring. We could also store a
// lua_runtime in a thread_local variable, but that is one extra
@@ -59,12 +58,7 @@ public:
virtual bool is_native() const override;
virtual bool is_aggregate() const override;
virtual bool requires_thread() const override;
virtual bytes_opt execute(const std::vector<bytes_opt>& parameters) override;
virtual sstring keypace_name() const override { return name().keyspace; }
virtual sstring element_name() const override { return name().name; }
virtual sstring element_type() const override { return "function"; }
virtual std::ostream& describe(std::ostream& os) const override;
virtual bytes_opt execute(cql_serialization_format sf, const std::vector<bytes_opt>& parameters) override;
};
}

View File

@@ -22,7 +22,7 @@ inline
shared_ptr<function>
make_uuid_fct() {
return make_native_scalar_function<false>("uuid", uuid_type, {},
[] (const std::vector<bytes_opt>& parameters) -> bytes_opt {
[] (cql_serialization_format sf, const std::vector<bytes_opt>& parameters) -> bytes_opt {
return {uuid_type->decompose(utils::make_random_uuid())};
});
}

View File

@@ -37,6 +37,9 @@ lists::setter::execute(mutation& m, const clustering_key_prefix& prefix, const u
void
lists::setter::execute(mutation& m, const clustering_key_prefix& prefix, const update_parameters& params, const column_definition& column, const cql3::raw_value& value) {
if (value.is_unset_value()) {
return;
}
if (column.type->is_multi_cell()) {
// Delete all cells first, then append new ones
collection_mutation_view_description mut;
@@ -67,7 +70,13 @@ lists::setter_by_index::execute(mutation& m, const clustering_key_prefix& prefix
if (index.is_null()) {
throw exceptions::invalid_request_exception("Invalid null value for list index");
}
if (index.is_unset_value()) {
throw exceptions::invalid_request_exception("Invalid unset value for list index");
}
auto value = expr::evaluate(*_e, params._options);
if (value.is_unset_value()) {
return;
}
auto idx = index.view().deserialize<int32_t>(*int32_type);
auto&& existing_list_opt = params.get_prefetched_list(m.key(), prefix, column);
@@ -113,6 +122,10 @@ lists::setter_by_uuid::execute(mutation& m, const clustering_key_prefix& prefix,
throw exceptions::invalid_request_exception("Invalid null value for list index");
}
if (index.is_unset_value()) {
throw exceptions::invalid_request_exception("Invalid unset value for list index");
}
auto ltype = static_cast<const list_type_impl*>(column.type.get());
collection_mutation_description mut;
@@ -132,6 +145,9 @@ lists::setter_by_uuid::execute(mutation& m, const clustering_key_prefix& prefix,
void
lists::appender::execute(mutation& m, const clustering_key_prefix& prefix, const update_parameters& params) {
const cql3::raw_value value = expr::evaluate(*_e, params._options);
if (value.is_unset_value()) {
return;
}
assert(column.type->is_multi_cell()); // "Attempted to append to a frozen list";
do_append(value, m, prefix, column, params);
}
@@ -145,7 +161,7 @@ lists::do_append(const cql3::raw_value& list_value,
if (column.type->is_multi_cell()) {
// If we append null, do nothing. Note that for Setter, we've
// already removed the previous value so we're good here too
if (list_value.is_null()) {
if (list_value.is_null_or_unset()) {
return;
}
@@ -183,7 +199,7 @@ void
lists::prepender::execute(mutation& m, const clustering_key_prefix& prefix, const update_parameters& params) {
assert(column.type->is_multi_cell()); // "Attempted to prepend to a frozen list";
cql3::raw_value lvalue = expr::evaluate(*_e, params._options);
if (lvalue.is_null()) {
if (lvalue.is_null_or_unset()) {
return;
}
@@ -249,7 +265,7 @@ lists::discarder::execute(mutation& m, const clustering_key_prefix& prefix, cons
return;
}
if (lvalue.is_null()) {
if (lvalue.is_null_or_unset()) {
return;
}
@@ -288,6 +304,9 @@ lists::discarder_by_index::execute(mutation& m, const clustering_key_prefix& pre
if (index.is_null()) {
throw exceptions::invalid_request_exception("Invalid null value for list index");
}
if (index.is_unset_value()) {
return;
}
auto&& existing_list_opt = params.get_prefetched_list(m.key(), prefix, column);
int32_t idx = index.view().deserialize<int32_t>(*int32_type);

View File

@@ -27,21 +27,21 @@ public:
static lw_shared_ptr<column_specification> value_spec_of(const column_specification&);
static lw_shared_ptr<column_specification> uuid_index_spec_of(const column_specification&);
public:
class setter : public operation_skip_if_unset {
class setter : public operation {
public:
setter(const column_definition& column, expr::expression e)
: operation_skip_if_unset(column, std::move(e)) {
: operation(column, std::move(e)) {
}
virtual void execute(mutation& m, const clustering_key_prefix& prefix, const update_parameters& params) override;
static void execute(mutation& m, const clustering_key_prefix& prefix, const update_parameters& params, const column_definition& column, const cql3::raw_value& value);
};
class setter_by_index : public operation_skip_if_unset {
class setter_by_index : public operation {
protected:
expr::expression _idx;
public:
setter_by_index(const column_definition& column, expr::expression idx, expr::expression e)
: operation_skip_if_unset(column, std::move(e)), _idx(std::move(idx)) {
: operation(column, std::move(e)), _idx(std::move(idx)) {
}
virtual bool requires_read() const override;
virtual void fill_prepare_context(prepare_context& ctx) override;
@@ -57,9 +57,9 @@ public:
virtual void execute(mutation& m, const clustering_key_prefix& prefix, const update_parameters& params) override;
};
class appender : public operation_skip_if_unset {
class appender : public operation {
public:
using operation_skip_if_unset::operation_skip_if_unset;
using operation::operation;
virtual void execute(mutation& m, const clustering_key_prefix& prefix, const update_parameters& params) override;
};
@@ -69,25 +69,25 @@ public:
const column_definition& column,
const update_parameters& params);
class prepender : public operation_skip_if_unset {
class prepender : public operation {
public:
using operation_skip_if_unset::operation_skip_if_unset;
using operation::operation;
virtual void execute(mutation& m, const clustering_key_prefix& prefix, const update_parameters& params) override;
};
class discarder : public operation_skip_if_unset {
class discarder : public operation {
public:
discarder(const column_definition& column, expr::expression e)
: operation_skip_if_unset(column, std::move(e)) {
: operation(column, std::move(e)) {
}
virtual bool requires_read() const override;
virtual void execute(mutation& m, const clustering_key_prefix& prefix, const update_parameters& params) override;
};
class discarder_by_index : public operation_skip_if_unset {
class discarder_by_index : public operation {
public:
discarder_by_index(const column_definition& column, expr::expression idx)
: operation_skip_if_unset(column, std::move(idx)) {
: operation(column, std::move(idx)) {
}
virtual bool requires_read() const override;
virtual void execute(mutation& m, const clustering_key_prefix& prefix, const update_parameters& params) override;

View File

@@ -26,6 +26,9 @@ maps::setter::execute(mutation& m, const clustering_key_prefix& row_key, const u
void
maps::setter::execute(mutation& m, const clustering_key_prefix& row_key, const update_parameters& params, const column_definition& column, const cql3::raw_value& value) {
if (value.is_unset_value()) {
return;
}
if (column.type->is_multi_cell()) {
// Delete all cells first, then put new ones
collection_mutation_description mut;
@@ -47,6 +50,12 @@ maps::setter_by_key::execute(mutation& m, const clustering_key_prefix& prefix, c
assert(column.type->is_multi_cell()); // "Attempted to set a value for a single key on a frozen map"m
auto key = expr::evaluate(_k, params._options);
auto value = expr::evaluate(*_e, params._options);
if (value.is_unset_value()) {
return;
}
if (key.is_unset_value()) {
throw invalid_request_exception("Invalid unset map key");
}
if (key.is_null()) {
throw invalid_request_exception("Invalid null map key");
}
@@ -64,7 +73,9 @@ void
maps::putter::execute(mutation& m, const clustering_key_prefix& prefix, const update_parameters& params) {
assert(column.type->is_multi_cell()); // "Attempted to add items to a frozen map";
cql3::raw_value value = expr::evaluate(*_e, params._options);
do_put(m, prefix, params, value, column);
if (!value.is_unset_value()) {
do_put(m, prefix, params, value, column);
}
}
void
@@ -100,6 +111,9 @@ maps::discarder_by_key::execute(mutation& m, const clustering_key_prefix& prefix
if (key.is_null()) {
throw exceptions::invalid_request_exception("Invalid null map key");
}
if (key.is_unset_value()) {
throw exceptions::invalid_request_exception("Invalid unset map key");
}
collection_mutation_description mut;
mut.cells.emplace_back(std::move(key).to_bytes(), params.make_dead_cell());

View File

@@ -27,30 +27,30 @@ public:
static lw_shared_ptr<column_specification> key_spec_of(const column_specification& column);
static lw_shared_ptr<column_specification> value_spec_of(const column_specification& column);
class setter : public operation_skip_if_unset {
class setter : public operation {
public:
setter(const column_definition& column, expr::expression e)
: operation_skip_if_unset(column, std::move(e)) {
: operation(column, std::move(e)) {
}
virtual void execute(mutation& m, const clustering_key_prefix& row_key, const update_parameters& params) override;
static void execute(mutation& m, const clustering_key_prefix& row_key, const update_parameters& params, const column_definition& column, const cql3::raw_value& value);
};
class setter_by_key : public operation_skip_if_unset {
class setter_by_key : public operation {
expr::expression _k;
public:
setter_by_key(const column_definition& column, expr::expression k, expr::expression e)
: operation_skip_if_unset(column, std::move(e)), _k(std::move(k)) {
: operation(column, std::move(e)), _k(std::move(k)) {
}
virtual void fill_prepare_context(prepare_context& ctx) override;
virtual void execute(mutation& m, const clustering_key_prefix& prefix, const update_parameters& params) override;
};
class putter : public operation_skip_if_unset {
class putter : public operation {
public:
putter(const column_definition& column, expr::expression e)
: operation_skip_if_unset(column, std::move(e)) {
: operation(column, std::move(e)) {
}
virtual void execute(mutation& m, const clustering_key_prefix& prefix, const update_parameters& params) override;
};
@@ -58,10 +58,10 @@ public:
static void do_put(mutation& m, const clustering_key_prefix& prefix, const update_parameters& params,
const cql3::raw_value& value, const column_definition& column);
class discarder_by_key : public operation_no_unset_support {
class discarder_by_key : public operation {
public:
discarder_by_key(const column_definition& column, expr::expression k)
: operation_no_unset_support(column, std::move(k)) {
: operation(column, std::move(k)) {
}
virtual void execute(mutation& m, const clustering_key_prefix& prefix, const update_parameters& params) override;
};

View File

@@ -32,9 +32,9 @@ operation::set_element::prepare(data_dictionary::database db, const sstring& key
using exceptions::invalid_request_exception;
auto rtype = dynamic_pointer_cast<const collection_type_impl>(receiver.type);
if (!rtype) {
throw invalid_request_exception(format("Invalid operation ({}) for non collection column {}", to_string(receiver), receiver.name_as_text()));
throw invalid_request_exception(format("Invalid operation ({}) for non collection column {}", to_string(receiver), receiver.name()));
} else if (!rtype->is_multi_cell()) {
throw invalid_request_exception(format("Invalid operation ({}) for frozen collection column {}", to_string(receiver), receiver.name_as_text()));
throw invalid_request_exception(format("Invalid operation ({}) for frozen collection column {}", to_string(receiver), receiver.name()));
}
if (rtype->get_kind() == abstract_type::kind::list) {
@@ -47,7 +47,7 @@ operation::set_element::prepare(data_dictionary::database db, const sstring& key
return make_shared<lists::setter_by_index>(receiver, std::move(idx), std::move(lval));
}
} else if (rtype->get_kind() == abstract_type::kind::set) {
throw invalid_request_exception(format("Invalid operation ({}) for set column {}", to_string(receiver), receiver.name_as_text()));
throw invalid_request_exception(format("Invalid operation ({}) for set column {}", to_string(receiver), receiver.name()));
} else if (rtype->get_kind() == abstract_type::kind::map) {
auto key = prepare_expression(_selector, db, keyspace, nullptr, maps::key_spec_of(*receiver.column_specification));
auto mval = prepare_expression(_value, db, keyspace, nullptr, maps::value_spec_of(*receiver.column_specification));
@@ -136,11 +136,11 @@ operation::addition::prepare(data_dictionary::database db, const sstring& keyspa
auto ctype = dynamic_pointer_cast<const collection_type_impl>(receiver.type);
if (!ctype) {
if (!receiver.is_counter()) {
throw exceptions::invalid_request_exception(format("Invalid operation ({}) for non counter column {}", to_string(receiver), receiver.name_as_text()));
throw exceptions::invalid_request_exception(format("Invalid operation ({}) for non counter column {}", to_string(receiver), receiver.name()));
}
return make_shared<constants::adder>(receiver, std::move(v));
} else if (!ctype->is_multi_cell()) {
throw exceptions::invalid_request_exception(format("Invalid operation ({}) for frozen collection column {}", to_string(receiver), receiver.name_as_text()));
throw exceptions::invalid_request_exception(format("Invalid operation ({}) for frozen collection column {}", to_string(receiver), receiver.name()));
}
if (ctype->get_kind() == abstract_type::kind::list) {
@@ -169,14 +169,14 @@ operation::subtraction::prepare(data_dictionary::database db, const sstring& key
auto ctype = dynamic_pointer_cast<const collection_type_impl>(receiver.type);
if (!ctype) {
if (!receiver.is_counter()) {
throw exceptions::invalid_request_exception(format("Invalid operation ({}) for non counter column {}", to_string(receiver), receiver.name_as_text()));
throw exceptions::invalid_request_exception(format("Invalid operation ({}) for non counter column {}", to_string(receiver), receiver.name()));
}
auto v = prepare_expression(_value, db, keyspace, nullptr, receiver.column_specification);
return make_shared<constants::subtracter>(receiver, std::move(v));
}
if (!ctype->is_multi_cell()) {
throw exceptions::invalid_request_exception(
format("Invalid operation ({}) for frozen collection column {}", to_string(receiver), receiver.name_as_text()));
format("Invalid operation ({}) for frozen collection column {}", to_string(receiver), receiver.name()));
}
if (ctype->get_kind() == abstract_type::kind::list) {
@@ -211,9 +211,9 @@ operation::prepend::prepare(data_dictionary::database db, const sstring& keyspac
auto v = prepare_expression(_value, db, keyspace, nullptr, receiver.column_specification);
if (!dynamic_cast<const list_type_impl*>(receiver.type.get())) {
throw exceptions::invalid_request_exception(format("Invalid operation ({}) for non list column {}", to_string(receiver), receiver.name_as_text()));
throw exceptions::invalid_request_exception(format("Invalid operation ({}) for non list column {}", to_string(receiver), receiver.name()));
} else if (!receiver.type->is_multi_cell()) {
throw exceptions::invalid_request_exception(format("Invalid operation ({}) for frozen list column {}", to_string(receiver), receiver.name_as_text()));
throw exceptions::invalid_request_exception(format("Invalid operation ({}) for frozen list column {}", to_string(receiver), receiver.name()));
}
return make_shared<lists::prepender>(receiver, std::move(v));
@@ -268,9 +268,9 @@ operation::set_counter_value_from_tuple_list::prepare(data_dictionary::database
auto v = prepare_expression(_value, db, keyspace, nullptr, spec);
// Will not be used elsewhere, so make it local.
class counter_setter : public operation_no_unset_support {
class counter_setter : public operation {
public:
using operation_no_unset_support::operation_no_unset_support;
using operation::operation;
bool is_raw_counter_shard_write() const override {
return true;
@@ -340,9 +340,9 @@ operation::element_deletion::affected_column() const {
shared_ptr<operation>
operation::element_deletion::prepare(data_dictionary::database db, const sstring& keyspace, const column_definition& receiver) const {
if (!receiver.type->is_collection()) {
throw exceptions::invalid_request_exception(format("Invalid deletion operation for non collection column {}", receiver.name_as_text()));
throw exceptions::invalid_request_exception(format("Invalid deletion operation for non collection column {}", receiver.name()));
} else if (!receiver.type->is_multi_cell()) {
throw exceptions::invalid_request_exception(format("Invalid deletion operation for frozen collection column {}", receiver.name_as_text()));
throw exceptions::invalid_request_exception(format("Invalid deletion operation for frozen collection column {}", receiver.name()));
}
auto ctype = static_pointer_cast<const collection_type_impl>(receiver.type);
if (ctype->get_kind() == abstract_type::kind::list) {

View File

@@ -17,7 +17,6 @@
#include "update_parameters.hh"
#include "cql3/column_identifier.hh"
#include "cql3/expr/expression.hh"
#include "cql3/expr/unset.hh"
#include <optional>
@@ -55,13 +54,10 @@ protected:
// may require none of more than one expression, but most need 1 so it simplify things a bit.
std::optional<expr::expression> _e;
// A guard to check if the operation should be skipped due to unset operand.
expr::unset_bind_variable_guard _unset_guard;
public:
operation(const column_definition& column_, std::optional<expr::expression> e, expr::unset_bind_variable_guard ubvg)
operation(const column_definition& column_, std::optional<expr::expression> e)
: column{column_}
, _e(std::move(e))
, _unset_guard(std::move(ubvg))
{ }
virtual ~operation() {}
@@ -91,14 +87,10 @@ public:
}
/**
* Execute the operation. Check should_skip_operation() first.
* Execute the operation.
*/
virtual void execute(mutation& m, const clustering_key_prefix& prefix, const update_parameters& params) = 0;
bool should_skip_operation(const query_options& qo) const {
return _unset_guard.is_unset(qo);
}
virtual void prepare_for_broadcast_tables(statements::broadcast_tables::prepared_update&) const;
/**
@@ -273,18 +265,4 @@ public:
};
};
class operation_skip_if_unset : public operation {
public:
operation_skip_if_unset(const column_definition& column, expr::expression e)
: operation(column, e, expr::unset_bind_variable_guard(e)) {
}
};
class operation_no_unset_support : public operation {
public:
operation_no_unset_support(const column_definition& column, std::optional<expr::expression> e)
: operation(column, std::move(e), expr::unset_bind_variable_guard(std::nullopt)) {
}
};
}

View File

@@ -23,43 +23,42 @@ thread_local const query_options::specific_options query_options::specific_optio
thread_local query_options query_options::DEFAULT{default_cql_config,
db::consistency_level::ONE, std::nullopt,
std::vector<cql3::raw_value_view>(), false, query_options::specific_options::DEFAULT};
std::vector<cql3::raw_value_view>(), false, query_options::specific_options::DEFAULT, cql_serialization_format::latest()};
query_options::query_options(const cql_config& cfg,
db::consistency_level consistency,
std::optional<std::vector<sstring_view>> names,
std::vector<cql3::raw_value> values,
std::vector<cql3::raw_value_view> value_views,
cql3::unset_bind_variable_vector unset,
bool skip_metadata,
specific_options options
)
specific_options options,
cql_serialization_format sf)
: _cql_config(cfg)
, _consistency(consistency)
, _names(std::move(names))
, _values(std::move(values))
, _value_views(value_views)
, _unset(unset)
, _skip_metadata(skip_metadata)
, _options(std::move(options))
, _cql_serialization_format(sf)
{
}
query_options::query_options(const cql_config& cfg,
db::consistency_level consistency,
std::optional<std::vector<sstring_view>> names,
cql3::raw_value_vector_with_unset values,
std::vector<cql3::raw_value> values,
bool skip_metadata,
specific_options options
)
specific_options options,
cql_serialization_format sf)
: _cql_config(cfg)
, _consistency(consistency)
, _names(std::move(names))
, _values(std::move(values.values))
, _values(std::move(values))
, _value_views()
, _unset(std::move(values.unset))
, _skip_metadata(skip_metadata)
, _options(std::move(options))
, _cql_serialization_format(sf)
{
fill_value_views();
}
@@ -67,22 +66,22 @@ query_options::query_options(const cql_config& cfg,
query_options::query_options(const cql_config& cfg,
db::consistency_level consistency,
std::optional<std::vector<sstring_view>> names,
cql3::raw_value_view_vector_with_unset value_views,
std::vector<cql3::raw_value_view> value_views,
bool skip_metadata,
specific_options options
)
specific_options options,
cql_serialization_format sf)
: _cql_config(cfg)
, _consistency(consistency)
, _names(std::move(names))
, _values()
, _value_views(std::move(value_views.values))
, _unset(std::move(value_views.unset))
, _value_views(std::move(value_views))
, _skip_metadata(skip_metadata)
, _options(std::move(options))
, _cql_serialization_format(sf)
{
}
query_options::query_options(db::consistency_level cl, cql3::raw_value_vector_with_unset values,
query_options::query_options(db::consistency_level cl, std::vector<cql3::raw_value> values,
specific_options options)
: query_options(
default_cql_config,
@@ -90,7 +89,8 @@ query_options::query_options(db::consistency_level cl, cql3::raw_value_vector_wi
{},
std::move(values),
false,
std::move(options)
std::move(options),
cql_serialization_format::latest()
)
{
}
@@ -101,9 +101,9 @@ query_options::query_options(std::unique_ptr<query_options> qo, lw_shared_ptr<se
std::move(qo->_names),
std::move(qo->_values),
std::move(qo->_value_views),
std::move(qo->_unset),
qo->_skip_metadata,
query_options::specific_options{qo->_options.page_size, paging_state, qo->_options.serial_consistency, qo->_options.timestamp}) {
query_options::specific_options{qo->_options.page_size, paging_state, qo->_options.serial_consistency, qo->_options.timestamp},
qo->_cql_serialization_format) {
}
@@ -113,13 +113,13 @@ query_options::query_options(std::unique_ptr<query_options> qo, lw_shared_ptr<se
std::move(qo->_names),
std::move(qo->_values),
std::move(qo->_value_views),
std::move(qo->_unset),
qo->_skip_metadata,
query_options::specific_options{page_size, paging_state, qo->_options.serial_consistency, qo->_options.timestamp}) {
query_options::specific_options{page_size, paging_state, qo->_options.serial_consistency, qo->_options.timestamp},
qo->_cql_serialization_format) {
}
query_options::query_options(cql3::raw_value_vector_with_unset values)
query_options::query_options(std::vector<cql3::raw_value> values)
: query_options(
db::consistency_level::ONE, std::move(values))
{}
@@ -135,21 +135,12 @@ void query_options::prepare(const std::vector<lw_shared_ptr<column_specification
ordered_values.reserve(specs.size());
for (auto&& spec : specs) {
auto& spec_name = spec->name->text();
bool found_value_for_name = false;
for (size_t j = 0; j < names.size(); j++) {
if (names[j] == spec_name) {
ordered_values.emplace_back(_value_views[j]);
found_value_for_name = true;
break;
}
}
// No bound value was found with the name `spec_name`.
// This means that the user forgot to include a bound value with such name.
if (!found_value_for_name) {
throw exceptions::invalid_request_exception(
format("Missing value for bind marker with name: {}", spec_name));
}
}
_value_views = std::move(ordered_values);
}

View File

@@ -11,14 +11,13 @@
#pragma once
#include <concepts>
#include <initializer_list>
#include "timestamp.hh"
#include "bytes.hh"
#include "db/consistency_level_type.hh"
#include "service/query_state.hh"
#include "service/pager/paging_state.hh"
#include "cql3/values.hh"
#include "utils/small_vector.hh"
#include "cql_serialization_format.hh"
namespace cql3 {
@@ -29,38 +28,6 @@ class column_specification;
using computed_function_values = std::unordered_map<uint8_t, bytes_opt>;
using unset_bind_variable_vector = utils::small_vector<bool, 16>;
// Matches a raw_value_view with an unset vector to support CQL binary protocol
// "unset" values.
struct raw_value_view_vector_with_unset {
std::vector<raw_value_view> values;
unset_bind_variable_vector unset;
raw_value_view_vector_with_unset(std::vector<raw_value_view> values_, unset_bind_variable_vector unset_) : values(std::move(values_)), unset(std::move(unset_)) {}
// Constructor with no unset support, for tests and internal queries
raw_value_view_vector_with_unset(std::vector<raw_value_view> values_) : values(std::move(values_)) {
unset.resize(values.size());
}
raw_value_view_vector_with_unset() = default;
};
// Matches a raw_value with an unset vector to support CQL binary protocol
// "unset" values.
struct raw_value_vector_with_unset {
std::vector<raw_value> values;
unset_bind_variable_vector unset;
raw_value_vector_with_unset(std::vector<raw_value> values_, unset_bind_variable_vector unset_) : values(std::move(values_)), unset(std::move(unset_)) {}
// Constructor with no unset support, for tests and internal queries
raw_value_vector_with_unset(std::vector<raw_value> values_) : values(std::move(values_)) {
unset.resize(values.size());
}
// Mostly for testing.
raw_value_vector_with_unset(std::initializer_list<raw_value> values_) : raw_value_vector_with_unset(std::vector(values_)) {}
raw_value_vector_with_unset() = default;
};
/**
* Options for a query.
*/
@@ -81,9 +48,9 @@ private:
const std::optional<std::vector<sstring_view>> _names;
std::vector<cql3::raw_value> _values;
std::vector<cql3::raw_value_view> _value_views;
unset_bind_variable_vector _unset;
const bool _skip_metadata;
const specific_options _options;
cql_serialization_format _cql_serialization_format;
std::optional<std::vector<query_options>> _batch_options;
// We must use the same microsecond-precision timestamp for
// all cells created by an LWT statement or when a statement
@@ -116,10 +83,23 @@ private:
// evaluation sites and we only have a const reference to `query_options`.
mutable computed_function_values _cached_pk_fn_calls;
private:
// Batch constructor.
template <typename Values>
requires std::same_as<Values, raw_value_vector_with_unset> || std::same_as<Values, raw_value_view_vector_with_unset>
explicit query_options(query_options&& o, std::vector<Values> values_ranges);
/**
* @brief Batch query_options constructor.
*
* Requirements:
* - @tparam OneMutationDataRange has a begin() and end() iterators.
* - The values of @tparam OneMutationDataRange are of either raw_value_view or raw_value types.
*
* @param o Base query_options object. query_options objects for each statement in the batch will derive the values from it.
* @param values_ranges a vector of values ranges for each statement in the batch.
*/
template<typename OneMutationDataRange>
requires requires (OneMutationDataRange range) {
std::begin(range);
std::end(range);
} && ( requires (OneMutationDataRange range) { { *range.begin() } -> std::convertible_to<raw_value_view>; } ||
requires (OneMutationDataRange range) { { *range.begin() } -> std::convertible_to<raw_value>; } )
explicit query_options(query_options&& o, std::vector<OneMutationDataRange> values_ranges);
public:
query_options(query_options&&) = default;
@@ -128,30 +108,43 @@ public:
explicit query_options(const cql_config& cfg,
db::consistency_level consistency,
std::optional<std::vector<sstring_view>> names,
raw_value_vector_with_unset values,
std::vector<cql3::raw_value> values,
bool skip_metadata,
specific_options options
);
specific_options options,
cql_serialization_format sf);
explicit query_options(const cql_config& cfg,
db::consistency_level consistency,
std::optional<std::vector<sstring_view>> names,
std::vector<cql3::raw_value> values,
std::vector<cql3::raw_value_view> value_views,
unset_bind_variable_vector unset,
bool skip_metadata,
specific_options options
);
specific_options options,
cql_serialization_format sf);
explicit query_options(const cql_config& cfg,
db::consistency_level consistency,
std::optional<std::vector<sstring_view>> names,
raw_value_view_vector_with_unset value_views,
std::vector<cql3::raw_value_view> value_views,
bool skip_metadata,
specific_options options
);
specific_options options,
cql_serialization_format sf);
template <typename Values>
requires std::same_as<Values, raw_value_vector_with_unset> || std::same_as<Values, raw_value_view_vector_with_unset>
static query_options make_batch_options(query_options&& o, std::vector<Values> values_ranges) {
/**
* @brief Batch query_options factory.
*
* Requirements:
* - @tparam OneMutationDataRange has a begin() and end() iterators.
* - The values of @tparam OneMutationDataRange are of either raw_value_view or raw_value types.
*
* @param o Base query_options object. query_options objects for each statement in the batch will derive the values from it.
* @param values_ranges a vector of values ranges for each statement in the batch.
*/
template<typename OneMutationDataRange>
requires requires (OneMutationDataRange range) {
std::begin(range);
std::end(range);
} && ( requires (OneMutationDataRange range) { { *range.begin() } -> std::convertible_to<raw_value_view>; } ||
requires (OneMutationDataRange range) { { *range.begin() } -> std::convertible_to<raw_value>; } )
static query_options make_batch_options(query_options&& o, std::vector<OneMutationDataRange> values_ranges) {
return query_options(std::move(o), std::move(values_ranges));
}
@@ -159,8 +152,8 @@ public:
static thread_local query_options DEFAULT;
// forInternalUse
explicit query_options(raw_value_vector_with_unset values);
explicit query_options(db::consistency_level, raw_value_vector_with_unset values, specific_options options = specific_options::DEFAULT);
explicit query_options(std::vector<cql3::raw_value> values);
explicit query_options(db::consistency_level, std::vector<cql3::raw_value> values, specific_options options = specific_options::DEFAULT);
explicit query_options(std::unique_ptr<query_options>, lw_shared_ptr<service::pager::paging_state> paging_state);
explicit query_options(std::unique_ptr<query_options>, lw_shared_ptr<service::pager::paging_state> paging_state, int32_t page_size);
@@ -169,14 +162,7 @@ public:
}
cql3::raw_value_view get_value_at(size_t idx) const {
if (_unset.at(idx)) {
throw exceptions::invalid_request_exception(fmt::format("Unexpected unset value for bind variable {}", idx));
}
return _value_views[idx];
}
bool is_unset(size_t idx) const {
return _unset.at(idx);
return _value_views.at(idx);
}
size_t get_values_count() const {
@@ -209,6 +195,18 @@ public:
return tstamp != api::missing_timestamp ? tstamp : state.get_timestamp();
}
/**
* The protocol version for the query. Will be 3 if the object don't come from
* a native protocol request (i.e. it's been allocated locally or by CQL-over-thrift).
*/
int get_protocol_version() const {
return _cql_serialization_format.protocol_version();
}
cql_serialization_format get_cql_serialization_format() const {
return _cql_serialization_format;
}
const query_options::specific_options& get_specific_options() const {
return _options;
}
@@ -280,15 +278,19 @@ private:
void fill_value_views();
};
template <typename Values>
requires std::same_as<Values, raw_value_vector_with_unset> || std::same_as<Values, raw_value_view_vector_with_unset>
query_options::query_options(query_options&& o, std::vector<Values> values_ranges)
template<typename OneMutationDataRange>
requires requires (OneMutationDataRange range) {
std::begin(range);
std::end(range);
} && ( requires (OneMutationDataRange range) { { *range.begin() } -> std::convertible_to<raw_value_view>; } ||
requires (OneMutationDataRange range) { { *range.begin() } -> std::convertible_to<raw_value>; } )
query_options::query_options(query_options&& o, std::vector<OneMutationDataRange> values_ranges)
: query_options(std::move(o))
{
std::vector<query_options> tmp;
tmp.reserve(values_ranges.size());
std::transform(values_ranges.begin(), values_ranges.end(), std::back_inserter(tmp), [this](auto& values_range) {
return query_options(_cql_config, _consistency, {}, std::move(values_range), _skip_metadata, _options);
return query_options(_cql_config, _consistency, {}, std::move(values_range), _skip_metadata, _options, _cql_serialization_format);
});
_batch_options = std::move(tmp);
}

Some files were not shown because too many files have changed in this diff Show More