Compare commits
59 Commits
scylla-4.4
...
next-4.4
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
c37f5938fd | ||
|
|
fa90112787 | ||
|
|
f5895e5c04 | ||
|
|
ce944911f2 | ||
|
|
b220130e4a | ||
|
|
de4f5b3b1f | ||
|
|
84a42570ec | ||
|
|
001f57ec0c | ||
|
|
3279718d52 | ||
|
|
c128994f90 | ||
|
|
9af2e5ead1 | ||
|
|
be695a7353 | ||
|
|
cc9285697d | ||
|
|
21d140febc | ||
|
|
77e05ca482 | ||
|
|
5375b8f1a1 | ||
|
|
7a82432e38 | ||
|
|
146f7b5421 | ||
|
|
e1c7a906f0 | ||
|
|
c5d6e75db8 | ||
|
|
da630e80ea | ||
|
|
8ea1cbe78d | ||
|
|
03b04d40f2 | ||
|
|
175d004513 | ||
|
|
091b794742 | ||
|
|
8be87bb0b1 | ||
|
|
a84142705a | ||
|
|
fc32534aee | ||
|
|
4e526ad88a | ||
|
|
176f253aa3 | ||
|
|
c49cd5d9b6 | ||
|
|
5d4abb521b | ||
|
|
cfc2562dec | ||
|
|
4a1171e2fa | ||
|
|
542a508c50 | ||
|
|
dd018d4de4 | ||
|
|
70098a1991 | ||
|
|
008f2ff370 | ||
|
|
f71cdede5e | ||
|
|
0fd17af2ee | ||
|
|
77cb6596c4 | ||
|
|
c81c7d2d89 | ||
|
|
b3a762f179 | ||
|
|
2bba07bdf4 | ||
|
|
87bfb57ccf | ||
|
|
6ca8590540 | ||
|
|
da57d6c7cd | ||
|
|
61469d62b8 | ||
|
|
c63092038e | ||
|
|
cb7fbb859b | ||
|
|
01920c1293 | ||
|
|
fd64cae856 | ||
|
|
b1032a2699 | ||
|
|
90941622df | ||
|
|
4250ab27d8 | ||
|
|
475e0d0893 | ||
|
|
27333587a8 | ||
|
|
0cfe0e8c8e | ||
|
|
cb3225f2de |
@@ -1,7 +1,7 @@
|
||||
#!/bin/sh
|
||||
|
||||
PRODUCT=scylla
|
||||
VERSION=4.4.4
|
||||
VERSION=4.4.9
|
||||
|
||||
if test -f version
|
||||
then
|
||||
|
||||
@@ -2509,7 +2509,7 @@ update_item_operation::apply(std::unique_ptr<rjson::value> previous_item, api::t
|
||||
const attribute_path_map_node<parsed::update_expression::action>* h = nullptr) {
|
||||
any_updates = true;
|
||||
if (_returnvalues == returnvalues::ALL_NEW) {
|
||||
rjson::set_with_string_name(_return_attributes,
|
||||
rjson::replace_with_string_name(_return_attributes,
|
||||
to_sstring_view(column_name), rjson::copy(json_value));
|
||||
} else if (_returnvalues == returnvalues::UPDATED_NEW) {
|
||||
rjson::value&& v = rjson::copy(json_value);
|
||||
|
||||
@@ -93,6 +93,10 @@ public:
|
||||
[&] (const json::json_return_type& json_return_value) {
|
||||
slogger.trace("api_handler success case");
|
||||
if (json_return_value._body_writer) {
|
||||
// Unfortunately, write_body() forces us to choose
|
||||
// from a fixed and irrelevant list of "mime-types"
|
||||
// at this point. But we'll override it with the
|
||||
// one (application/x-amz-json-1.0) below.
|
||||
rep->write_body("json", std::move(json_return_value._body_writer));
|
||||
} else {
|
||||
rep->_content += json_return_value._res;
|
||||
@@ -105,14 +109,15 @@ public:
|
||||
|
||||
return make_ready_future<std::unique_ptr<reply>>(std::move(rep));
|
||||
});
|
||||
}), _type("json") { }
|
||||
}) { }
|
||||
|
||||
api_handler(const api_handler&) = default;
|
||||
future<std::unique_ptr<reply>> handle(const sstring& path,
|
||||
std::unique_ptr<request> req, std::unique_ptr<reply> rep) override {
|
||||
return _f_handle(std::move(req), std::move(rep)).then(
|
||||
[this](std::unique_ptr<reply> rep) {
|
||||
rep->done(_type);
|
||||
rep->set_mime_type("application/x-amz-json-1.0");
|
||||
rep->done();
|
||||
return make_ready_future<std::unique_ptr<reply>>(std::move(rep));
|
||||
});
|
||||
}
|
||||
@@ -126,7 +131,6 @@ protected:
|
||||
}
|
||||
|
||||
future_handler_function _f_handle;
|
||||
sstring _type;
|
||||
};
|
||||
|
||||
class gated_handler : public handler_base {
|
||||
@@ -192,24 +196,31 @@ future<> server::verify_signature(const request& req) {
|
||||
throw api_error::missing_authentication_token("Authorization header is mandatory for signature verification");
|
||||
}
|
||||
std::string host = host_it->second;
|
||||
std::vector<std::string_view> credentials_raw = split(authorization_it->second, ' ');
|
||||
std::string_view authorization_header = authorization_it->second;
|
||||
auto pos = authorization_header.find_first_of(' ');
|
||||
if (pos == std::string_view::npos || authorization_header.substr(0, pos) != "AWS4-HMAC-SHA256") {
|
||||
throw api_error::invalid_signature(format("Authorization header must use AWS4-HMAC-SHA256 algorithm: {}", authorization_header));
|
||||
}
|
||||
authorization_header.remove_prefix(pos+1);
|
||||
std::string credential;
|
||||
std::string user_signature;
|
||||
std::string signed_headers_str;
|
||||
std::vector<std::string_view> signed_headers;
|
||||
for (std::string_view entry : credentials_raw) {
|
||||
do {
|
||||
// Either one of a comma or space can mark the end of an entry
|
||||
pos = authorization_header.find_first_of(" ,");
|
||||
std::string_view entry = authorization_header.substr(0, pos);
|
||||
if (pos != std::string_view::npos) {
|
||||
authorization_header.remove_prefix(pos + 1);
|
||||
}
|
||||
if (entry.empty()) {
|
||||
continue;
|
||||
}
|
||||
std::vector<std::string_view> entry_split = split(entry, '=');
|
||||
if (entry_split.size() != 2) {
|
||||
if (entry != "AWS4-HMAC-SHA256") {
|
||||
throw api_error::invalid_signature(format("Only AWS4-HMAC-SHA256 algorithm is supported. Found: {}", entry));
|
||||
}
|
||||
continue;
|
||||
}
|
||||
std::string_view auth_value = entry_split[1];
|
||||
// Commas appear as an additional (quite redundant) delimiter
|
||||
if (auth_value.back() == ',') {
|
||||
auth_value.remove_suffix(1);
|
||||
}
|
||||
if (entry_split[0] == "Credential") {
|
||||
credential = std::string(auth_value);
|
||||
} else if (entry_split[0] == "Signature") {
|
||||
@@ -219,7 +230,8 @@ future<> server::verify_signature(const request& req) {
|
||||
signed_headers = split(auth_value, ';');
|
||||
std::sort(signed_headers.begin(), signed_headers.end());
|
||||
}
|
||||
}
|
||||
} while (pos != std::string_view::npos);
|
||||
|
||||
std::vector<std::string_view> credential_split = split(credential, '/');
|
||||
if (credential_split.size() != 5) {
|
||||
throw api_error::validation(format("Incorrect credential information format: {}", credential));
|
||||
|
||||
@@ -38,6 +38,7 @@ stats::stats() : api_operations{} {
|
||||
#define OPERATION_LATENCY(name, CamelCaseName) \
|
||||
seastar::metrics::make_histogram("op_latency", \
|
||||
seastar::metrics::description("Latency histogram of an operation via Alternator API"), {op(CamelCaseName)}, [this]{return to_metrics_histogram(api_operations.name);}),
|
||||
OPERATION(batch_get_item, "BatchGetItem")
|
||||
OPERATION(batch_write_item, "BatchWriteItem")
|
||||
OPERATION(create_backup, "CreateBackup")
|
||||
OPERATION(create_global_table, "CreateGlobalTable")
|
||||
|
||||
@@ -225,7 +225,7 @@ void set_repair(http_context& ctx, routes& r, sharded<netw::messaging_service>&
|
||||
try {
|
||||
res = fut.get0();
|
||||
} catch (std::exception& e) {
|
||||
return make_exception_future<json::json_return_type>(httpd::server_error_exception(e.what()));
|
||||
return make_exception_future<json::json_return_type>(httpd::bad_param_exception(e.what()));
|
||||
}
|
||||
return make_ready_future<json::json_return_type>(json::json_return_type(res));
|
||||
});
|
||||
|
||||
@@ -39,7 +39,7 @@ public:
|
||||
using size_type = bytes::size_type;
|
||||
using value_type = bytes::value_type;
|
||||
using fragment_type = bytes_view;
|
||||
static constexpr size_type max_chunk_size() { return 128 * 1024; }
|
||||
static constexpr size_type max_chunk_size() { return max_alloc_size() - sizeof(chunk); }
|
||||
private:
|
||||
static_assert(sizeof(value_type) == 1, "value_type is assumed to be one byte long");
|
||||
struct chunk {
|
||||
@@ -59,6 +59,7 @@ private:
|
||||
void operator delete(void* ptr) { free(ptr); }
|
||||
};
|
||||
static constexpr size_type default_chunk_size{512};
|
||||
static constexpr size_type max_alloc_size() { return 128 * 1024; }
|
||||
private:
|
||||
std::unique_ptr<chunk> _begin;
|
||||
chunk* _current;
|
||||
@@ -132,16 +133,15 @@ private:
|
||||
return _current->size - _current->offset;
|
||||
}
|
||||
// Figure out next chunk size.
|
||||
// - must be enough for data_size
|
||||
// - must be enough for data_size + sizeof(chunk)
|
||||
// - must be at least _initial_chunk_size
|
||||
// - try to double each time to prevent too many allocations
|
||||
// - do not exceed max_chunk_size
|
||||
// - should not exceed max_alloc_size, unless data_size requires so
|
||||
size_type next_alloc_size(size_t data_size) const {
|
||||
auto next_size = _current
|
||||
? _current->size * 2
|
||||
: _initial_chunk_size;
|
||||
next_size = std::min(next_size, max_chunk_size());
|
||||
// FIXME: check for overflow?
|
||||
next_size = std::min(next_size, max_alloc_size());
|
||||
return std::max<size_type>(next_size, data_size + sizeof(chunk));
|
||||
}
|
||||
// Makes room for a contiguous region of given size.
|
||||
|
||||
@@ -709,16 +709,16 @@ private:
|
||||
}
|
||||
return false;
|
||||
}
|
||||
bool compare(const T&, const value_type& v);
|
||||
int32_t compare(const T&, const value_type& v);
|
||||
};
|
||||
|
||||
template<>
|
||||
bool maybe_back_insert_iterator<std::vector<std::pair<bytes_view, bytes_view>>, bytes_view>::compare(const bytes_view& t, const value_type& v) {
|
||||
int32_t maybe_back_insert_iterator<std::vector<std::pair<bytes_view, bytes_view>>, bytes_view>::compare(const bytes_view& t, const value_type& v) {
|
||||
return _type.compare(t, v.first);
|
||||
}
|
||||
|
||||
template<>
|
||||
bool maybe_back_insert_iterator<std::vector<bytes_view>, bytes_view>::compare(const bytes_view& t, const value_type& v) {
|
||||
int32_t maybe_back_insert_iterator<std::vector<bytes_view>, bytes_view>::compare(const bytes_view& t, const value_type& v) {
|
||||
return _type.compare(t, v);
|
||||
}
|
||||
|
||||
|
||||
@@ -99,8 +99,8 @@ listen_address: localhost
|
||||
# listen_on_broadcast_address: false
|
||||
|
||||
# port for the CQL native transport to listen for clients on
|
||||
# For security reasons, you should not expose this port to the internet. Firewall it if needed.
|
||||
# To disable the CQL native transport, set this option to 0.
|
||||
# For security reasons, you should not expose this port to the internet. Firewall it if needed.
|
||||
# To disable the CQL native transport, remove this option and configure native_transport_port_ssl.
|
||||
native_transport_port: 9042
|
||||
|
||||
# Like native_transport_port, but clients are forwarded to specific shards, based on the
|
||||
|
||||
@@ -281,7 +281,7 @@ scylla_tests = set([
|
||||
'test/boost/cdc_generation_test',
|
||||
'test/boost/aggregate_fcts_test',
|
||||
'test/boost/allocation_strategy_test',
|
||||
'test/boost/alternator_base64_test',
|
||||
'test/boost/alternator_unit_test',
|
||||
'test/boost/anchorless_list_test',
|
||||
'test/boost/auth_passwords_test',
|
||||
'test/boost/auth_resource_test',
|
||||
@@ -1033,7 +1033,7 @@ pure_boost_tests = set([
|
||||
])
|
||||
|
||||
tests_not_using_seastar_test_framework = set([
|
||||
'test/boost/alternator_base64_test',
|
||||
'test/boost/alternator_unit_test',
|
||||
'test/boost/small_vector_test',
|
||||
'test/manual/gossip',
|
||||
'test/manual/message',
|
||||
@@ -1107,7 +1107,7 @@ deps['test/boost/linearizing_input_stream_test'] = [
|
||||
]
|
||||
|
||||
deps['test/boost/duration_test'] += ['test/lib/exception_utils.cc']
|
||||
deps['test/boost/alternator_base64_test'] += ['alternator/base64.cc']
|
||||
deps['test/boost/alternator_unit_test'] += ['alternator/base64.cc']
|
||||
|
||||
deps['test/raft/replication_test'] = ['test/raft/replication_test.cc'] + scylla_raft_dependencies
|
||||
deps['test/boost/raft_fsm_test'] = ['test/boost/raft_fsm_test.cc', 'test/lib/log.cc'] + scylla_raft_dependencies
|
||||
@@ -1969,7 +1969,7 @@ with open(buildfile_tmp, 'w') as f:
|
||||
command = ./dist/debian/debian_files_gen.py
|
||||
build $builddir/debian/debian: debian_files_gen | always
|
||||
rule extract_node_exporter
|
||||
command = tar -C build -xvpf {node_exporter_filename} && rm -rfv build/node_exporter && mv -v build/{node_exporter_dirname} build/node_exporter
|
||||
command = tar -C build -xvpf {node_exporter_filename} --no-same-owner && rm -rfv build/node_exporter && mv -v build/{node_exporter_dirname} build/node_exporter
|
||||
build $builddir/node_exporter: extract_node_exporter | always
|
||||
''').format(**globals()))
|
||||
|
||||
|
||||
@@ -181,13 +181,18 @@ inline
|
||||
shared_ptr<function>
|
||||
make_from_json_function(database& db, const sstring& keyspace, data_type t) {
|
||||
return make_native_scalar_function<true>("fromjson", t, {utf8_type},
|
||||
[&db, &keyspace, t](cql_serialization_format sf, const std::vector<bytes_opt>& parameters) -> bytes_opt {
|
||||
rjson::value json_value = rjson::parse(utf8_type->to_string(parameters[0].value()));
|
||||
bytes_opt parsed_json_value;
|
||||
if (!json_value.IsNull()) {
|
||||
parsed_json_value.emplace(from_json_object(*t, json_value, sf));
|
||||
[&db, keyspace, t](cql_serialization_format sf, const std::vector<bytes_opt>& parameters) -> bytes_opt {
|
||||
try {
|
||||
rjson::value json_value = rjson::parse(utf8_type->to_string(parameters[0].value()));
|
||||
bytes_opt parsed_json_value;
|
||||
if (!json_value.IsNull()) {
|
||||
parsed_json_value.emplace(from_json_object(*t, json_value, sf));
|
||||
}
|
||||
return parsed_json_value;
|
||||
} catch(rjson::error& e) {
|
||||
throw exceptions::function_execution_exception("fromJson",
|
||||
format("Failed parsing fromJson parameter: {}", e.what()), keyspace, {t->name()});
|
||||
}
|
||||
return parsed_json_value;
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
@@ -78,7 +78,22 @@ public:
|
||||
return Pure;
|
||||
}
|
||||
virtual bytes_opt execute(cql_serialization_format sf, const std::vector<bytes_opt>& parameters) override {
|
||||
return _func(sf, parameters);
|
||||
try {
|
||||
return _func(sf, parameters);
|
||||
} catch(exceptions::cassandra_exception&) {
|
||||
// If the function's code took the time to produce an official
|
||||
// cassandra_exception, pass it through. Otherwise, below we will
|
||||
// wrap the unknown exception in a function_execution_exception.
|
||||
throw;
|
||||
} catch(...) {
|
||||
std::vector<sstring> args;
|
||||
args.reserve(arg_types().size());
|
||||
for (const data_type& a : arg_types()) {
|
||||
args.push_back(a->name());
|
||||
}
|
||||
throw exceptions::function_execution_exception(name().name,
|
||||
format("Failed execution of function {}: {}", name(), std::current_exception()), name().keyspace, std::move(args));
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@@ -551,16 +551,27 @@ bool statement_restrictions::need_filtering() const {
|
||||
// clustering restrictions. Therefore, a continuous clustering range is guaranteed.
|
||||
return false;
|
||||
}
|
||||
if (!_clustering_columns_restrictions->needs_filtering(*_schema)) { // Guaranteed continuous clustering range.
|
||||
return false;
|
||||
}
|
||||
// Now we know there are some clustering-column restrictions that are out-of-order or not EQ. A naive base-table
|
||||
// query must be filtered. What about an index-table query? That can only avoid filtering if there is exactly one
|
||||
// EQ supported by an index.
|
||||
return !(_clustering_columns_restrictions->size() == 1 && _has_queriable_ck_index);
|
||||
|
||||
// TODO: it is also possible to avoid filtering here if a non-empty CK prefix is specified and token_known, plus
|
||||
// there's exactly one out-of-order-but-index-supported clustering-column restriction.
|
||||
if (_has_queriable_ck_index && _uses_secondary_indexing) {
|
||||
// In cases where we use an index, clustering column restrictions might cause the need for filtering.
|
||||
// TODO: This is overly conservative, there are some cases when this returns true but filtering
|
||||
// is not needed. Because of that the database will sometimes perform filtering when it's not actually needed.
|
||||
// Query performance shouldn't be affected much, at most we will filter rows that are all correct.
|
||||
// Here are some cases to consider:
|
||||
// On a table with primary key (p, c1, c2, c3) with an index on c3
|
||||
// WHERE c3 = ? - doesn't require filtering
|
||||
// WHERE c1 = ? AND c2 = ? AND c3 = ? - requires filtering
|
||||
// WHERE p = ? AND c1 = ? AND c3 = ? - doesn't require filtering, but we conservatively report it does
|
||||
// WHERE p = ? AND c1 LIKE ? AND c3 = ? - requires filtering
|
||||
// WHERE p = ? AND c1 = ? AND c2 LIKE ? AND c3 = ? - requires filtering
|
||||
// WHERE p = ? AND c1 = ? AND c2 = ? AND c3 = ? - doesn't use an index
|
||||
// WHERE p = ? AND c1 = ? AND c2 < ? AND c3 = ? - doesn't require filtering, but we report it does
|
||||
return _clustering_columns_restrictions->size() > 1;
|
||||
}
|
||||
// Now we know that the query doesn't use an index.
|
||||
|
||||
// The only thing that can cause filtering now are the clustering columns.
|
||||
return _clustering_columns_restrictions->needs_filtering(*_schema);
|
||||
}
|
||||
|
||||
void statement_restrictions::validate_secondary_index_selections(bool selects_only_static_columns) {
|
||||
|
||||
@@ -306,6 +306,13 @@ create_index_statement::announce_migration(service::storage_proxy& proxy) const
|
||||
format("Index {} is a duplicate of existing index {}", index.name(), existing_index.value().name()));
|
||||
}
|
||||
}
|
||||
auto index_table_name = secondary_index::index_table_name(accepted_name);
|
||||
if (db.has_schema(keyspace(), index_table_name)) {
|
||||
return make_exception_future<::shared_ptr<cql_transport::event::schema_change>>(
|
||||
exceptions::invalid_request_exception(format("Index {} cannot be created, because table {} already exists",
|
||||
accepted_name, index_table_name))
|
||||
);
|
||||
}
|
||||
++_cql_stats->secondary_index_creates;
|
||||
schema_builder builder{schema};
|
||||
builder.with_index(index);
|
||||
|
||||
@@ -964,6 +964,7 @@ lw_shared_ptr<const service::pager::paging_state> indexed_table_select_statement
|
||||
}
|
||||
|
||||
auto paging_state_copy = make_lw_shared<service::pager::paging_state>(service::pager::paging_state(*paging_state));
|
||||
paging_state_copy->set_remaining(internal_paging_size);
|
||||
paging_state_copy->set_partition_key(std::move(index_pk));
|
||||
paging_state_copy->set_clustering_key(std::move(index_ck));
|
||||
return std::move(paging_state_copy);
|
||||
|
||||
@@ -1948,7 +1948,11 @@ sstring database::get_available_index_name(const sstring &ks_name, const sstring
|
||||
auto base_name = index_metadata::get_default_index_name(cf_name, index_name_root);
|
||||
sstring accepted_name = base_name;
|
||||
int i = 0;
|
||||
while (existing_names.contains(accepted_name)) {
|
||||
auto name_accepted = [&] {
|
||||
auto index_table_name = secondary_index::index_table_name(accepted_name);
|
||||
return !has_schema(ks_name, index_table_name) && !existing_names.contains(accepted_name);
|
||||
};
|
||||
while (!name_accepted()) {
|
||||
accepted_name = base_name + "_" + std::to_string(++i);
|
||||
}
|
||||
return accepted_name;
|
||||
|
||||
18
database.hh
18
database.hh
@@ -240,9 +240,13 @@ public:
|
||||
return _memtables.back();
|
||||
}
|
||||
|
||||
// The caller has to make sure the element exist before calling this.
|
||||
// # 8904 - this method is akin to std::set::erase(key_type), not
|
||||
// erase(iterator). Should be tolerant against non-existing.
|
||||
void erase(const shared_memtable& element) {
|
||||
_memtables.erase(boost::range::find(_memtables, element));
|
||||
auto i = boost::range::find(_memtables, element);
|
||||
if (i != _memtables.end()) {
|
||||
_memtables.erase(i);
|
||||
}
|
||||
}
|
||||
void clear() {
|
||||
_memtables.clear();
|
||||
@@ -893,7 +897,7 @@ public:
|
||||
return _pending_writes_phaser.start();
|
||||
}
|
||||
|
||||
future<> await_pending_writes() {
|
||||
future<> await_pending_writes() noexcept {
|
||||
return _pending_writes_phaser.advance_and_await();
|
||||
}
|
||||
|
||||
@@ -905,7 +909,7 @@ public:
|
||||
return _pending_reads_phaser.start();
|
||||
}
|
||||
|
||||
future<> await_pending_reads() {
|
||||
future<> await_pending_reads() noexcept {
|
||||
return _pending_reads_phaser.advance_and_await();
|
||||
}
|
||||
|
||||
@@ -917,7 +921,7 @@ public:
|
||||
return _pending_streams_phaser.start();
|
||||
}
|
||||
|
||||
future<> await_pending_streams() {
|
||||
future<> await_pending_streams() noexcept {
|
||||
return _pending_streams_phaser.advance_and_await();
|
||||
}
|
||||
|
||||
@@ -925,11 +929,11 @@ public:
|
||||
return _pending_streams_phaser.operations_in_progress();
|
||||
}
|
||||
|
||||
future<> await_pending_flushes() {
|
||||
future<> await_pending_flushes() noexcept {
|
||||
return _pending_flushes_phaser.advance_and_await();
|
||||
}
|
||||
|
||||
future<> await_pending_ops() {
|
||||
future<> await_pending_ops() noexcept {
|
||||
return when_all(await_pending_reads(), await_pending_writes(), await_pending_streams(), await_pending_flushes()).discard_result();
|
||||
}
|
||||
|
||||
|
||||
@@ -124,7 +124,7 @@ static future<> try_record(std::string_view large_table, const sstables::sstable
|
||||
const auto sstable_name = sst.get_filename();
|
||||
std::string pk_str = key_to_str(partition_key.to_partition_key(s), s);
|
||||
auto timestamp = db_clock::now();
|
||||
large_data_logger.warn("Writing large {} {}/{}: {}{} ({} bytes)", desc, ks_name, cf_name, pk_str, extra_path, size);
|
||||
large_data_logger.warn("Writing large {} {}/{}: {}{} ({} bytes) to {}", desc, ks_name, cf_name, pk_str, extra_path, size, sstable_name);
|
||||
return db::qctx->execute_cql(req, ks_name, cf_name, sstable_name, size, pk_str, timestamp, args...)
|
||||
.discard_result()
|
||||
.handle_exception([ks_name, cf_name, large_table, sstable_name] (std::exception_ptr ep) {
|
||||
@@ -140,9 +140,10 @@ future<> cql_table_large_data_handler::record_large_partitions(const sstables::s
|
||||
void cql_table_large_data_handler::log_too_many_rows(const sstables::sstable& sst, const sstables::key& partition_key,
|
||||
uint64_t rows_count) const {
|
||||
const schema& s = *sst.get_schema();
|
||||
large_data_logger.warn("Writing a partition with too many rows [{}/{}:{}] ({} rows)",
|
||||
const auto sstable_name = sst.get_filename();
|
||||
large_data_logger.warn("Writing a partition with too many rows [{}/{}:{}] ({} rows) to {}",
|
||||
s.ks_name(), s.cf_name(), partition_key.to_partition_key(s).with_schema(s),
|
||||
rows_count);
|
||||
rows_count, sstable_name);
|
||||
}
|
||||
|
||||
future<> cql_table_large_data_handler::record_large_cells(const sstables::sstable& sst, const sstables::key& partition_key,
|
||||
|
||||
@@ -43,9 +43,13 @@
|
||||
|
||||
namespace db {
|
||||
|
||||
future<> snapshot_ctl::check_snapshot_not_exist(sstring ks_name, sstring name) {
|
||||
future<> snapshot_ctl::check_snapshot_not_exist(sstring ks_name, sstring name, std::optional<std::vector<sstring>> filter) {
|
||||
auto& ks = _db.local().find_keyspace(ks_name);
|
||||
return parallel_for_each(ks.metadata()->cf_meta_data(), [this, ks_name = std::move(ks_name), name = std::move(name)] (auto& pair) {
|
||||
return parallel_for_each(ks.metadata()->cf_meta_data(), [this, ks_name = std::move(ks_name), name = std::move(name), filter = std::move(filter)] (auto& pair) {
|
||||
auto& cf_name = pair.first;
|
||||
if (filter && std::find(filter->begin(), filter->end(), cf_name) == filter->end()) {
|
||||
return make_ready_future<>();
|
||||
}
|
||||
auto& cf = _db.local().find_column_family(pair.second);
|
||||
return cf.snapshot_exists(name).then([ks_name = std::move(ks_name), name] (bool exists) {
|
||||
if (exists) {
|
||||
@@ -111,7 +115,7 @@ future<> snapshot_ctl::take_column_family_snapshot(sstring ks_name, std::vector<
|
||||
}
|
||||
|
||||
return run_snapshot_modify_operation([this, ks_name = std::move(ks_name), tables = std::move(tables), tag = std::move(tag)] {
|
||||
return check_snapshot_not_exist(ks_name, tag).then([this, ks_name, tables = std::move(tables), tag] {
|
||||
return check_snapshot_not_exist(ks_name, tag, tables).then([this, ks_name, tables, tag] {
|
||||
return do_with(std::vector<sstring>(std::move(tables)),[this, ks_name, tag](const std::vector<sstring>& tables) {
|
||||
return do_for_each(tables, [ks_name, tag, this] (const sstring& table_name) {
|
||||
if (table_name.find(".") != sstring::npos) {
|
||||
|
||||
@@ -40,6 +40,8 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include <seastar/core/sharded.hh>
|
||||
#include <seastar/core/future.hh>
|
||||
#include "database.hh"
|
||||
@@ -112,7 +114,7 @@ private:
|
||||
seastar::rwlock _lock;
|
||||
seastar::gate _ops;
|
||||
|
||||
future<> check_snapshot_not_exist(sstring ks_name, sstring name);
|
||||
future<> check_snapshot_not_exist(sstring ks_name, sstring name, std::optional<std::vector<sstring>> filter = {});
|
||||
|
||||
template <typename Func>
|
||||
std::result_of_t<Func()> run_snapshot_modify_operation(Func&&);
|
||||
|
||||
13
dist/common/scripts/scylla_cpuscaling_setup
vendored
13
dist/common/scripts/scylla_cpuscaling_setup
vendored
@@ -22,6 +22,7 @@
|
||||
|
||||
import os
|
||||
import sys
|
||||
import argparse
|
||||
import shlex
|
||||
import distro
|
||||
from scylla_util import *
|
||||
@@ -46,7 +47,12 @@ if __name__ == '__main__':
|
||||
if os.getuid() > 0:
|
||||
print('Requires root permission.')
|
||||
sys.exit(1)
|
||||
if not os.path.exists('/sys/devices/system/cpu/cpufreq/policy0/scaling_governor'):
|
||||
parser = argparse.ArgumentParser(description='CPU scaling setup script for Scylla.')
|
||||
parser.add_argument('--force', dest='force', action='store_true',
|
||||
help='force running setup even CPU scaling unsupported')
|
||||
args = parser.parse_args()
|
||||
|
||||
if not args.force and not os.path.exists('/sys/devices/system/cpu/cpu0/cpufreq/scaling_governor'):
|
||||
print('This computer doesn\'t supported CPU scaling configuration.')
|
||||
sys.exit(0)
|
||||
if not is_debian_variant():
|
||||
@@ -56,6 +62,11 @@ if __name__ == '__main__':
|
||||
if not shutil.which('cpufreq-set'):
|
||||
pkg_install('cpufrequtils')
|
||||
if is_debian_variant():
|
||||
try:
|
||||
ondemand = systemd_unit('ondemand')
|
||||
ondemand.disable()
|
||||
except:
|
||||
pass
|
||||
cfg = sysconfig_parser('/etc/default/cpufrequtils')
|
||||
cfg.set('GOVERNOR', 'performance')
|
||||
cfg.commit()
|
||||
|
||||
4
dist/common/scripts/scylla_io_setup
vendored
4
dist/common/scripts/scylla_io_setup
vendored
@@ -254,7 +254,7 @@ if __name__ == "__main__":
|
||||
disk_properties["read_bandwidth"] = 2650 * mbs
|
||||
disk_properties["write_iops"] = 360000
|
||||
disk_properties["write_bandwidth"] = 1400 * mbs
|
||||
elif nr_disks == "16":
|
||||
elif nr_disks == 16:
|
||||
disk_properties["read_iops"] = 1600000
|
||||
disk_properties["read_bandwidth"] = 4521251328
|
||||
#below is google, above is our measured
|
||||
@@ -263,7 +263,7 @@ if __name__ == "__main__":
|
||||
disk_properties["write_bandwidth"] = 2759452672
|
||||
#below is google, above is our measured
|
||||
#disk_properties["write_bandwidth"] = 3120 * mbs
|
||||
elif nr_disks == "24":
|
||||
elif nr_disks == 24:
|
||||
disk_properties["read_iops"] = 2400000
|
||||
disk_properties["read_bandwidth"] = 5921532416
|
||||
#below is google, above is our measured
|
||||
|
||||
6
dist/common/scripts/scylla_ntp_setup
vendored
6
dist/common/scripts/scylla_ntp_setup
vendored
@@ -90,12 +90,12 @@ if __name__ == '__main__':
|
||||
with open('/etc/ntp.conf') as f:
|
||||
conf = f.read()
|
||||
if args.subdomain:
|
||||
conf2 = re.sub(r'server\s+([0-9]+)\.(\S+)\.pool\.ntp\.org', 'server \\1.{}.pool.ntp.org'.format(args.subdomain), conf, flags=re.MULTILINE)
|
||||
conf2 = re.sub(r'(server|pool)\s+([0-9]+)\.(\S+)\.pool\.ntp\.org', '\\1 \\2.{}.pool.ntp.org'.format(args.subdomain), conf, flags=re.MULTILINE)
|
||||
with open('/etc/ntp.conf', 'w') as f:
|
||||
f.write(conf2)
|
||||
conf = conf2
|
||||
match = re.search(r'^server\s+(\S*)(\s+\S+)?', conf, flags=re.MULTILINE)
|
||||
server = match.group(1)
|
||||
match = re.search(r'^(server|pool)\s+(\S*)(\s+\S+)?', conf, flags=re.MULTILINE)
|
||||
server = match.group(2)
|
||||
ntpd = systemd_unit('ntpd.service')
|
||||
ntpd.stop()
|
||||
# ignore error, ntpd may able to adjust clock later
|
||||
|
||||
21
dist/common/scripts/scylla_raid_setup
vendored
21
dist/common/scripts/scylla_raid_setup
vendored
@@ -30,6 +30,8 @@ import distro
|
||||
from pathlib import Path
|
||||
from scylla_util import *
|
||||
from subprocess import run
|
||||
import distro
|
||||
from pkg_resources import parse_version
|
||||
|
||||
if __name__ == '__main__':
|
||||
if os.getuid() > 0:
|
||||
@@ -115,6 +117,25 @@ if __name__ == '__main__':
|
||||
pkg_install('xfsprogs')
|
||||
if not shutil.which('mdadm'):
|
||||
pkg_install('mdadm')
|
||||
# XXX: Workaround for mdmonitor.service issue on CentOS8
|
||||
if is_redhat_variant() and distro.version() == '8':
|
||||
mdadm_rpm = run('rpm -q mdadm', shell=True, check=True, capture_output=True, encoding='utf-8').stdout.strip()
|
||||
match = re.match(r'^mdadm-([0-9]+\.[0-9]+-[a-zA-Z0-9]+)\.', mdadm_rpm)
|
||||
mdadm_version = match.group(1)
|
||||
if parse_version('4.1-14') < parse_version(mdadm_version):
|
||||
repo_data = '''
|
||||
[BaseOS_8_3_2011]
|
||||
name=CentOS8.3.2011 - Base
|
||||
baseurl=http://vault.centos.org/8.3.2011/BaseOS/$basearch/os/
|
||||
gpgcheck=1
|
||||
enabled=0
|
||||
gpgkey=file:///etc/pki/rpm-gpg/RPM-GPG-KEY-centosofficial
|
||||
'''[1:-1]
|
||||
with open('/etc/yum.repos.d/CentOS-Vault-8.3.repo', 'w') as f:
|
||||
f.write(repo_data)
|
||||
run('dnf downgrade --enablerepo=BaseOS_8_3_2011 -y mdadm', shell=True, check=True)
|
||||
run('dnf install -y python3-dnf-plugin-versionlock', shell=True, check=True)
|
||||
run('dnf versionlock add mdadm', shell=True, check=True)
|
||||
try:
|
||||
md_service = systemd_unit('mdmonitor.service')
|
||||
except SystemdException:
|
||||
|
||||
5
dist/common/scripts/scylla_util.py
vendored
5
dist/common/scripts/scylla_util.py
vendored
@@ -147,6 +147,11 @@ class gcp_instance:
|
||||
if af == socket.AF_INET:
|
||||
addr, port = sa
|
||||
if addr == "169.254.169.254":
|
||||
# Make sure it is not on GKE
|
||||
try:
|
||||
gcp_instance().__instance_metadata("machine-type")
|
||||
except urllib.error.HTTPError:
|
||||
return False
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
2
dist/docker/redhat/Dockerfile
vendored
2
dist/docker/redhat/Dockerfile
vendored
@@ -6,7 +6,7 @@ ENV container docker
|
||||
|
||||
# The SCYLLA_REPO_URL argument specifies the URL to the RPM repository this Docker image uses to install Scylla. The default value is the Scylla's unstable RPM repository, which contains the daily build.
|
||||
ARG SCYLLA_REPO_URL=http://downloads.scylladb.com/rpm/unstable/centos/branch-4.4/latest/scylla.repo
|
||||
ARG VERSION=4.4.4
|
||||
ARG VERSION=4.4.9
|
||||
|
||||
ADD scylla_bashrc /scylla_bashrc
|
||||
|
||||
|
||||
@@ -4,3 +4,4 @@ stdout_logfile=/dev/stdout
|
||||
stdout_logfile_maxbytes=0
|
||||
stderr_logfile=/dev/stderr
|
||||
stderr_logfile_maxbytes=0
|
||||
stopwaitsecs=900
|
||||
|
||||
5
dist/docker/redhat/scyllasetup.py
vendored
5
dist/docker/redhat/scyllasetup.py
vendored
@@ -121,12 +121,13 @@ class ScyllaSetup:
|
||||
if self._apiAddress is not None:
|
||||
args += ["--api-address %s" % self._apiAddress]
|
||||
|
||||
if self._alternatorPort is not None:
|
||||
if self._alternatorAddress is not None:
|
||||
args += ["--alternator-address %s" % self._alternatorAddress]
|
||||
|
||||
if self._alternatorPort is not None:
|
||||
args += ["--alternator-port %s" % self._alternatorPort]
|
||||
|
||||
if self._alternatorHttpsPort is not None:
|
||||
args += ["--alternator-address %s" % self._alternatorAddress]
|
||||
args += ["--alternator-https-port %s" % self._alternatorHttpsPort]
|
||||
|
||||
if self._alternatorWriteIsolation is not None:
|
||||
|
||||
4
dist/redhat/scylla.spec
vendored
4
dist/redhat/scylla.spec
vendored
@@ -7,7 +7,7 @@ Group: Applications/Databases
|
||||
License: AGPLv3
|
||||
URL: http://www.scylladb.com/
|
||||
Source0: %{reloc_pkg}
|
||||
Requires: %{product}-server = %{version} %{product}-conf = %{version} %{product}-kernel-conf = %{version} %{product}-jmx = %{version} %{product}-tools = %{version} %{product}-tools-core = %{version} %{product}-node-exporter = %{version}
|
||||
Requires: %{product}-server = %{version} %{product}-conf = %{version} %{product}-python3 = %{version} %{product}-kernel-conf = %{version} %{product}-jmx = %{version} %{product}-tools = %{version} %{product}-tools-core = %{version} %{product}-node-exporter = %{version}
|
||||
Obsoletes: scylla-server < 1.1
|
||||
|
||||
%global _debugsource_template %{nil}
|
||||
@@ -54,7 +54,7 @@ Group: Applications/Databases
|
||||
Summary: The Scylla database server
|
||||
License: AGPLv3
|
||||
URL: http://www.scylladb.com/
|
||||
Requires: %{product}-conf %{product}-python3
|
||||
Requires: %{product}-conf = %{version} %{product}-python3 = %{version}
|
||||
Conflicts: abrt
|
||||
AutoReqProv: no
|
||||
|
||||
|
||||
@@ -340,4 +340,18 @@ public:
|
||||
unsupported_operation_exception(const sstring& msg) : std::runtime_error("unsupported operation: " + msg) {}
|
||||
};
|
||||
|
||||
class function_execution_exception : public cassandra_exception {
|
||||
public:
|
||||
const sstring ks_name;
|
||||
const sstring func_name;
|
||||
const std::vector<sstring> args;
|
||||
function_execution_exception(sstring func_name_, sstring detail, sstring ks_name_, std::vector<sstring> args_) noexcept
|
||||
: cassandra_exception{exception_code::FUNCTION_FAILURE,
|
||||
format("execution of {} failed: {}", func_name_, detail)}
|
||||
, ks_name(std::move(ks_name_))
|
||||
, func_name(std::move(func_name_))
|
||||
, args(std::move(args_))
|
||||
{ }
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
@@ -1445,7 +1445,7 @@ void gossiper::real_mark_alive(inet_address addr, endpoint_state& local_state) {
|
||||
logger.trace("marking as alive {}", addr);
|
||||
|
||||
// Do not mark a node with status shutdown as UP.
|
||||
auto status = get_gossip_status(local_state);
|
||||
auto status = sstring(get_gossip_status(local_state));
|
||||
if (status == sstring(versioned_value::SHUTDOWN)) {
|
||||
logger.warn("Skip marking node {} with status = {} as UP", addr, status);
|
||||
return;
|
||||
@@ -1464,6 +1464,8 @@ void gossiper::real_mark_alive(inet_address addr, endpoint_state& local_state) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Make a copy for endpoint_state because the code below can yield
|
||||
endpoint_state state = local_state;
|
||||
_live_endpoints.push_back(addr);
|
||||
if (_endpoints_to_talk_with.empty()) {
|
||||
_endpoints_to_talk_with.push_back({addr});
|
||||
@@ -1475,8 +1477,8 @@ void gossiper::real_mark_alive(inet_address addr, endpoint_state& local_state) {
|
||||
logger.info("InetAddress {} is now UP, status = {}", addr, status);
|
||||
}
|
||||
|
||||
_subscribers.for_each([addr, local_state] (shared_ptr<i_endpoint_state_change_subscriber> subscriber) {
|
||||
subscriber->on_alive(addr, local_state);
|
||||
_subscribers.for_each([addr, state] (shared_ptr<i_endpoint_state_change_subscriber> subscriber) {
|
||||
subscriber->on_alive(addr, state);
|
||||
logger.trace("Notified {}", fmt::ptr(subscriber.get()));
|
||||
});
|
||||
}
|
||||
@@ -1485,11 +1487,12 @@ void gossiper::real_mark_alive(inet_address addr, endpoint_state& local_state) {
|
||||
void gossiper::mark_dead(inet_address addr, endpoint_state& local_state) {
|
||||
logger.trace("marking as down {}", addr);
|
||||
local_state.mark_dead();
|
||||
endpoint_state state = local_state;
|
||||
_live_endpoints.resize(std::distance(_live_endpoints.begin(), std::remove(_live_endpoints.begin(), _live_endpoints.end(), addr)));
|
||||
_unreachable_endpoints[addr] = now();
|
||||
logger.info("InetAddress {} is now DOWN, status = {}", addr, get_gossip_status(local_state));
|
||||
_subscribers.for_each([addr, local_state] (shared_ptr<i_endpoint_state_change_subscriber> subscriber) {
|
||||
subscriber->on_dead(addr, local_state);
|
||||
logger.info("InetAddress {} is now DOWN, status = {}", addr, get_gossip_status(state));
|
||||
_subscribers.for_each([addr, state] (shared_ptr<i_endpoint_state_change_subscriber> subscriber) {
|
||||
subscriber->on_dead(addr, state);
|
||||
logger.trace("Notified {}", fmt::ptr(subscriber.get()));
|
||||
});
|
||||
}
|
||||
|
||||
@@ -26,6 +26,7 @@
|
||||
|
||||
#include "mutation_reader.hh"
|
||||
#include <seastar/core/future-util.hh>
|
||||
#include <seastar/core/coroutine.hh>
|
||||
#include "flat_mutation_reader.hh"
|
||||
#include "schema_registry.hh"
|
||||
#include "mutation_compactor.hh"
|
||||
@@ -1176,6 +1177,9 @@ flat_mutation_reader evictable_reader::recreate_reader() {
|
||||
_range_override.reset();
|
||||
_slice_override.reset();
|
||||
|
||||
_drop_partition_start = false;
|
||||
_drop_static_row = false;
|
||||
|
||||
if (_last_pkey) {
|
||||
bool partition_range_is_inclusive = true;
|
||||
|
||||
@@ -1261,13 +1265,25 @@ void evictable_reader::maybe_validate_partition_start(const flat_mutation_reader
|
||||
// is in range.
|
||||
if (_last_pkey) {
|
||||
const auto cmp_res = tri_cmp(*_last_pkey, ps.key());
|
||||
if (_drop_partition_start) { // should be the same partition
|
||||
if (_drop_partition_start) { // we expect to continue from the same partition
|
||||
// We cannot assume the partition we stopped the read at is still alive
|
||||
// when we recreate the reader. It might have been compacted away in the
|
||||
// meanwhile, so allow for a larger partition too.
|
||||
require(
|
||||
cmp_res == 0,
|
||||
"{}(): validation failed, expected partition with key equal to _last_pkey {} due to _drop_partition_start being set, but got {}",
|
||||
cmp_res <= 0,
|
||||
"{}(): validation failed, expected partition with key larger or equal to _last_pkey {} due to _drop_partition_start being set, but got {}",
|
||||
__FUNCTION__,
|
||||
*_last_pkey,
|
||||
ps.key());
|
||||
// Reset drop flags and next pos if we are not continuing from the same partition
|
||||
if (cmp_res < 0) {
|
||||
// Close previous partition, we are not going to continue it.
|
||||
push_mutation_fragment(*_schema, _permit, partition_end{});
|
||||
_drop_partition_start = false;
|
||||
_drop_static_row = false;
|
||||
_next_position_in_partition = position_in_partition::for_partition_start();
|
||||
_trim_range_tombstones = false;
|
||||
}
|
||||
} else { // should be a larger partition
|
||||
require(
|
||||
cmp_res < 0,
|
||||
@@ -1318,9 +1334,14 @@ bool evictable_reader::should_drop_fragment(const mutation_fragment& mf) {
|
||||
_drop_partition_start = false;
|
||||
return true;
|
||||
}
|
||||
if (_drop_static_row && mf.is_static_row()) {
|
||||
_drop_static_row = false;
|
||||
return true;
|
||||
// Unlike partition-start above, a partition is not guaranteed to have a
|
||||
// static row fragment. So reset the flag regardless of whether we could
|
||||
// drop one or not.
|
||||
// We are guaranteed to get here only right after dropping a partition-start,
|
||||
// so if we are not seeing a static row here, the partition doesn't have one.
|
||||
if (_drop_static_row) {
|
||||
_drop_static_row = false;
|
||||
return mf.is_static_row();
|
||||
}
|
||||
return false;
|
||||
}
|
||||
@@ -1505,18 +1526,18 @@ future<> evictable_reader::fast_forward_to(const dht::partition_range& pr, db::t
|
||||
_end_of_stream = false;
|
||||
|
||||
if (_reader) {
|
||||
return _reader->fast_forward_to(pr, timeout);
|
||||
co_await _reader->fast_forward_to(pr, timeout);
|
||||
_range_override.reset();
|
||||
co_return;
|
||||
}
|
||||
if (!_reader_created || !_irh) {
|
||||
return make_ready_future<>();
|
||||
co_return;
|
||||
}
|
||||
if (auto reader_opt = try_resume()) {
|
||||
auto f = reader_opt->fast_forward_to(pr, timeout);
|
||||
return f.then([this, reader = std::move(*reader_opt)] () mutable {
|
||||
maybe_pause(std::move(reader));
|
||||
});
|
||||
co_await reader_opt->fast_forward_to(pr, timeout);
|
||||
_range_override.reset();
|
||||
maybe_pause(std::move(*reader_opt));
|
||||
}
|
||||
return make_ready_future<>();
|
||||
}
|
||||
|
||||
evictable_reader_handle::evictable_reader_handle(evictable_reader& r) : _r(&r)
|
||||
@@ -1569,8 +1590,8 @@ class shard_reader : public enable_lw_shared_from_this<shard_reader>, public fla
|
||||
private:
|
||||
shared_ptr<reader_lifecycle_policy> _lifecycle_policy;
|
||||
const unsigned _shard;
|
||||
const dht::partition_range* _pr;
|
||||
const query::partition_slice& _ps;
|
||||
dht::partition_range _pr;
|
||||
query::partition_slice _ps;
|
||||
const io_priority_class& _pc;
|
||||
tracing::global_trace_state_ptr _trace_state;
|
||||
const mutation_reader::forwarding _fwd_mr;
|
||||
@@ -1596,7 +1617,7 @@ public:
|
||||
: impl(std::move(schema), std::move(permit))
|
||||
, _lifecycle_policy(std::move(lifecycle_policy))
|
||||
, _shard(shard)
|
||||
, _pr(&pr)
|
||||
, _pr(pr)
|
||||
, _ps(ps)
|
||||
, _pc(pc)
|
||||
, _trace_state(std::move(trace_state))
|
||||
@@ -1681,7 +1702,7 @@ future<> shard_reader::do_fill_buffer(db::timeout_clock::time_point timeout) {
|
||||
});
|
||||
auto s = gs.get();
|
||||
auto rreader = make_foreign(std::make_unique<evictable_reader>(evictable_reader::auto_pause::yes, std::move(ms),
|
||||
s, _lifecycle_policy->semaphore().make_permit(s.get(), "shard-reader"), *_pr, _ps, _pc, _trace_state, _fwd_mr));
|
||||
s, _lifecycle_policy->semaphore().make_permit(s.get(), "shard-reader"), _pr, _ps, _pc, _trace_state, _fwd_mr));
|
||||
tracing::trace(_trace_state, "Creating shard reader on shard: {}", this_shard_id());
|
||||
auto f = rreader->fill_buffer(timeout);
|
||||
return f.then([rreader = std::move(rreader)] () mutable {
|
||||
@@ -1730,7 +1751,7 @@ void shard_reader::next_partition() {
|
||||
}
|
||||
|
||||
future<> shard_reader::fast_forward_to(const dht::partition_range& pr, db::timeout_clock::time_point timeout) {
|
||||
_pr = ≺
|
||||
_pr = pr;
|
||||
|
||||
if (!_reader && !_read_ahead) {
|
||||
// No need to fast-forward uncreated readers, they will be passed the new
|
||||
@@ -1739,12 +1760,12 @@ future<> shard_reader::fast_forward_to(const dht::partition_range& pr, db::timeo
|
||||
}
|
||||
|
||||
auto f = _read_ahead ? *std::exchange(_read_ahead, std::nullopt) : make_ready_future<>();
|
||||
return f.then([this, &pr, timeout] {
|
||||
return f.then([this, timeout] {
|
||||
_end_of_stream = false;
|
||||
clear_buffer();
|
||||
|
||||
return smp::submit_to(_shard, [this, &pr, timeout] {
|
||||
return _reader->fast_forward_to(pr, timeout);
|
||||
return smp::submit_to(_shard, [this, timeout] {
|
||||
return _reader->fast_forward_to(_pr, timeout);
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
@@ -57,6 +57,8 @@ future<> feed_writer(flat_mutation_reader&& rd, Writer&& wr) {
|
||||
auto f2 = rd.is_buffer_empty() ? rd.fill_buffer(db::no_timeout) : make_ready_future<>();
|
||||
return when_all_succeed(std::move(f1), std::move(f2)).discard_result();
|
||||
});
|
||||
}).then([&wr] {
|
||||
wr.consume_end_of_stream();
|
||||
}).then_wrapped([&wr] (future<> f) {
|
||||
if (f.failed()) {
|
||||
auto ex = f.get_exception();
|
||||
@@ -70,7 +72,6 @@ future<> feed_writer(flat_mutation_reader&& rd, Writer&& wr) {
|
||||
return make_exception_future<>(std::move(ex));
|
||||
});
|
||||
} else {
|
||||
wr.consume_end_of_stream();
|
||||
return wr.close();
|
||||
}
|
||||
});
|
||||
|
||||
@@ -267,9 +267,14 @@ public:
|
||||
return _current_tombstone;
|
||||
}
|
||||
|
||||
const std::deque<range_tombstone>& range_tombstones_for_row(const clustering_key_prefix& ck) {
|
||||
std::vector<range_tombstone> range_tombstones_for_row(const clustering_key_prefix& ck) {
|
||||
drop_unneeded_tombstones(ck);
|
||||
return _range_tombstones;
|
||||
std::vector<range_tombstone> result(_range_tombstones.begin(), _range_tombstones.end());
|
||||
auto cmp = [&] (const range_tombstone& rt1, const range_tombstone& rt2) {
|
||||
return _cmp(rt1.start_bound(), rt2.start_bound());
|
||||
};
|
||||
std::sort(result.begin(), result.end(), cmp);
|
||||
return result;
|
||||
}
|
||||
|
||||
std::deque<range_tombstone> range_tombstones() && {
|
||||
|
||||
@@ -1783,6 +1783,7 @@ future<> bootstrap_with_repair(seastar::sharded<database>& db, seastar::sharded<
|
||||
auto& strat = ks.get_replication_strategy();
|
||||
dht::token_range_vector desired_ranges = strat.get_pending_address_ranges(tmptr, tokens, myip, utils::can_yield::yes);
|
||||
bool find_node_in_local_dc_only = strat.get_type() == locator::replication_strategy_type::network_topology;
|
||||
bool everywhere_topology = strat.get_type() == locator::replication_strategy_type::everywhere_topology;
|
||||
|
||||
//Active ranges
|
||||
auto metadata_clone = tmptr->clone_only_token_map().get0();
|
||||
@@ -1860,7 +1861,9 @@ future<> bootstrap_with_repair(seastar::sharded<database>& db, seastar::sharded<
|
||||
};
|
||||
auto old_endpoints_in_local_dc = get_old_endpoints_in_local_dc();
|
||||
auto rf_in_local_dc = get_rf_in_local_dc();
|
||||
if (old_endpoints.size() == strat.get_replication_factor()) {
|
||||
if (everywhere_topology) {
|
||||
neighbors = old_endpoints_in_local_dc;
|
||||
} else if (old_endpoints.size() == strat.get_replication_factor()) {
|
||||
// For example, with RF = 3 and 3 nodes n1, n2, n3
|
||||
// in the cluster, n4 is bootstrapped, old_replicas
|
||||
// = {n1, n2, n3}, new_replicas = {n1, n2, n4}, n3
|
||||
|
||||
2
seastar
2
seastar
Submodule seastar updated: 4b7d434965...1fb2187322
@@ -2532,7 +2532,13 @@ future<> storage_service::restore_replica_count(inet_address endpoint, inet_addr
|
||||
}
|
||||
return seastar::async([this, endpoint, notify_endpoint] {
|
||||
auto tmptr = get_token_metadata_ptr();
|
||||
auto streamer = make_lw_shared<dht::range_streamer>(_db, tmptr, _abort_source, get_broadcast_address(), "Restore_replica_count", streaming::stream_reason::removenode);
|
||||
abort_source as;
|
||||
auto sub = _abort_source.subscribe([&as] () noexcept {
|
||||
if (!as.abort_requested()) {
|
||||
as.request_abort();
|
||||
}
|
||||
});
|
||||
auto streamer = make_lw_shared<dht::range_streamer>(_db, tmptr, as, get_broadcast_address(), "Restore_replica_count", streaming::stream_reason::removenode);
|
||||
auto my_address = get_broadcast_address();
|
||||
auto non_system_keyspaces = _db.local().get_non_system_keyspaces();
|
||||
for (const auto& keyspace_name : non_system_keyspaces) {
|
||||
@@ -2550,6 +2556,42 @@ future<> storage_service::restore_replica_count(inet_address endpoint, inet_addr
|
||||
}
|
||||
streamer->add_rx_ranges(keyspace_name, std::move(ranges_per_endpoint));
|
||||
}
|
||||
auto status_checker = seastar::async([this, endpoint, &as] {
|
||||
slogger.info("restore_replica_count: Started status checker for removing node {}", endpoint);
|
||||
while (!as.abort_requested()) {
|
||||
auto status = _gossiper.get_gossip_status(endpoint);
|
||||
// If the node to be removed is already in removed status, it has
|
||||
// probably been removed forcely with `nodetool removenode force`.
|
||||
// Abort the restore_replica_count in such case to avoid streaming
|
||||
// attempt since the user has removed the node forcely.
|
||||
if (status == sstring(versioned_value::REMOVED_TOKEN)) {
|
||||
slogger.info("restore_replica_count: Detected node {} has left the cluster, status={}, abort restore_replica_count for removing node {}",
|
||||
endpoint, status, endpoint);
|
||||
if (!as.abort_requested()) {
|
||||
as.request_abort();
|
||||
}
|
||||
return;
|
||||
}
|
||||
slogger.debug("restore_replica_count: Sleep and detect removing node {}, status={}", endpoint, status);
|
||||
sleep_abortable(std::chrono::seconds(10), as).get();
|
||||
}
|
||||
});
|
||||
auto stop_status_checker = defer([endpoint, &status_checker, &as] () mutable {
|
||||
try {
|
||||
slogger.info("restore_replica_count: Started to stop status checker for removing node {}", endpoint);
|
||||
if (!as.abort_requested()) {
|
||||
as.request_abort();
|
||||
}
|
||||
status_checker.get();
|
||||
} catch (const seastar::sleep_aborted& ignored) {
|
||||
slogger.debug("restore_replica_count: Got sleep_abort to stop status checker for removing node {}: {}", endpoint, ignored);
|
||||
} catch (...) {
|
||||
slogger.warn("restore_replica_count: Found error in status checker for removing node {}: {}",
|
||||
endpoint, std::current_exception());
|
||||
}
|
||||
slogger.info("restore_replica_count: Finished to stop status checker for removing node {}", endpoint);
|
||||
});
|
||||
|
||||
streamer->stream_async().then_wrapped([this, streamer, notify_endpoint] (auto&& f) {
|
||||
try {
|
||||
f.get();
|
||||
|
||||
@@ -440,7 +440,6 @@ protected:
|
||||
mutation_source_metadata _ms_metadata = {};
|
||||
garbage_collected_sstable_writer::data _gc_sstable_writer_data;
|
||||
compaction_sstable_replacer_fn _replacer;
|
||||
std::optional<compaction_weight_registration> _weight_registration;
|
||||
utils::UUID _run_identifier;
|
||||
::io_priority_class _io_priority;
|
||||
// optional clone of sstable set to be used for expiration purposes, so it will be set if expiration is enabled.
|
||||
@@ -459,7 +458,6 @@ protected:
|
||||
, _sstable_level(descriptor.level)
|
||||
, _gc_sstable_writer_data(*this)
|
||||
, _replacer(std::move(descriptor.replacer))
|
||||
, _weight_registration(std::move(descriptor.weight_registration))
|
||||
, _run_identifier(descriptor.run_identifier)
|
||||
, _io_priority(descriptor.io_priority)
|
||||
, _sstable_set(std::move(descriptor.all_sstables_snapshot))
|
||||
@@ -919,9 +917,6 @@ public:
|
||||
}
|
||||
|
||||
virtual void on_end_of_compaction() override {
|
||||
if (_weight_registration) {
|
||||
_cf.get_compaction_manager().on_compaction_complete(*_weight_registration);
|
||||
}
|
||||
replace_remaining_exhausted_sstables();
|
||||
}
|
||||
|
||||
|
||||
@@ -134,8 +134,6 @@ struct compaction_descriptor {
|
||||
uint64_t max_sstable_bytes;
|
||||
// Run identifier of output sstables.
|
||||
utils::UUID run_identifier;
|
||||
// Holds ownership of a weight assigned to this compaction iff it's a regular one.
|
||||
std::optional<compaction_weight_registration> weight_registration;
|
||||
// Calls compaction manager's task for this compaction to release reference to exhausted sstables.
|
||||
std::function<void(const std::vector<shared_sstable>& exhausted_sstables)> release_exhausted;
|
||||
// The options passed down to the compaction code.
|
||||
|
||||
@@ -439,7 +439,7 @@ void compaction_manager::reevaluate_postponed_compactions() {
|
||||
}
|
||||
|
||||
void compaction_manager::postpone_compaction_for_column_family(column_family* cf) {
|
||||
_postponed.push_back(cf);
|
||||
_postponed.insert(cf);
|
||||
}
|
||||
|
||||
future<> compaction_manager::stop_ongoing_compactions(sstring reason) {
|
||||
@@ -579,7 +579,7 @@ void compaction_manager::submit(column_family* cf) {
|
||||
return make_ready_future<stop_iteration>(stop_iteration::yes);
|
||||
}
|
||||
auto compacting = make_lw_shared<compacting_sstable_registration>(this, descriptor.sstables);
|
||||
descriptor.weight_registration = compaction_weight_registration(this, weight);
|
||||
auto weight_r = compaction_weight_registration(this, weight);
|
||||
descriptor.release_exhausted = [compacting] (const std::vector<sstables::shared_sstable>& exhausted_sstables) {
|
||||
compacting->release_compacting(exhausted_sstables);
|
||||
};
|
||||
@@ -589,7 +589,7 @@ void compaction_manager::submit(column_family* cf) {
|
||||
_stats.pending_tasks--;
|
||||
_stats.active_tasks++;
|
||||
task->compaction_running = true;
|
||||
return cf.run_compaction(std::move(descriptor)).then_wrapped([this, task, compacting = std::move(compacting)] (future<> f) mutable {
|
||||
return cf.run_compaction(std::move(descriptor)).then_wrapped([this, task, compacting = std::move(compacting), weight_r = std::move(weight_r)] (future<> f) mutable {
|
||||
_stats.active_tasks--;
|
||||
task->compaction_running = false;
|
||||
|
||||
@@ -802,12 +802,15 @@ future<> compaction_manager::remove(column_family* cf) {
|
||||
task->stopping = true;
|
||||
}
|
||||
}
|
||||
_postponed.erase(boost::remove(_postponed, cf), _postponed.end());
|
||||
_postponed.erase(cf);
|
||||
|
||||
// Wait for the termination of an ongoing compaction on cf, if any.
|
||||
return do_for_each(*tasks_to_stop, [this, cf] (auto& task) {
|
||||
return this->task_stop(task);
|
||||
}).then([this, cf, tasks_to_stop] {
|
||||
#ifdef DEBUG
|
||||
assert(std::find_if(_tasks.begin(), _tasks.end(), [cf] (auto& task) { return task->compacting_cf == cf; }) == _tasks.end());
|
||||
#endif
|
||||
_compaction_locks.erase(cf);
|
||||
});
|
||||
}
|
||||
@@ -838,11 +841,6 @@ void compaction_manager::stop_compaction(sstring type) {
|
||||
}
|
||||
}
|
||||
|
||||
void compaction_manager::on_compaction_complete(compaction_weight_registration& weight_registration) {
|
||||
weight_registration.deregister();
|
||||
reevaluate_postponed_compactions();
|
||||
}
|
||||
|
||||
void compaction_manager::propagate_replacement(column_family* cf,
|
||||
const std::vector<sstables::shared_sstable>& removed, const std::vector<sstables::shared_sstable>& added) {
|
||||
for (auto& info : _compactions) {
|
||||
|
||||
@@ -99,7 +99,7 @@ private:
|
||||
future<> _waiting_reevalution = make_ready_future<>();
|
||||
condition_variable _postponed_reevaluation;
|
||||
// column families that wait for compaction but had its submission postponed due to ongoing compaction.
|
||||
std::vector<column_family*> _postponed;
|
||||
std::unordered_set<column_family*> _postponed;
|
||||
// tracks taken weights of ongoing compactions, only one compaction per weight is allowed.
|
||||
// weight is value assigned to a compaction job that is log base N of total size of all input sstables.
|
||||
std::unordered_set<int> _weight_tracker;
|
||||
@@ -256,11 +256,6 @@ public:
|
||||
// Stops ongoing compaction of a given type.
|
||||
void stop_compaction(sstring type);
|
||||
|
||||
// Called by compaction procedure to release the weight lock assigned to it, such that
|
||||
// another compaction waiting on same weight can start as soon as possible. That's usually
|
||||
// called before compaction seals sstable and such and after all compaction work is done.
|
||||
void on_compaction_complete(compaction_weight_registration& weight_registration);
|
||||
|
||||
double backlog() {
|
||||
return _backlog_manager.backlog();
|
||||
}
|
||||
|
||||
@@ -503,7 +503,8 @@ date_tiered_manifest::get_compaction_candidates(column_family& cf, std::vector<s
|
||||
|
||||
int64_t date_tiered_manifest::get_now(column_family& cf) {
|
||||
int64_t max_timestamp = 0;
|
||||
for (auto& sst : *cf.get_sstables()) {
|
||||
auto shared_set = cf.get_sstables();
|
||||
for (auto& sst : *shared_set) {
|
||||
int64_t candidate = sst->get_stats_metadata().max_timestamp;
|
||||
max_timestamp = candidate > max_timestamp ? candidate : max_timestamp;
|
||||
}
|
||||
|
||||
@@ -129,7 +129,7 @@ void sstable_writer_k_l::maybe_flush_pi_block(file_writer& out,
|
||||
// block includes them), but we set block_next_start_offset after - so
|
||||
// even if we wrote a lot of open tombstones, we still get a full
|
||||
// block size of new data.
|
||||
auto& rts = _pi_write.tombstone_accumulator->range_tombstones_for_row(
|
||||
auto rts = _pi_write.tombstone_accumulator->range_tombstones_for_row(
|
||||
clustering_key_prefix::from_range(clustering_key.values()));
|
||||
for (const auto& rt : rts) {
|
||||
auto start = composite::from_clustering_element(*_pi_write.schemap, rt.start);
|
||||
|
||||
@@ -380,7 +380,7 @@ future<prepare_message> stream_session::prepare(std::vector<stream_request> requ
|
||||
try {
|
||||
db.find_column_family(ks, cf);
|
||||
} catch (no_such_column_family&) {
|
||||
auto err = format("[Stream #{{}}] prepare requested ks={{}} cf={{}} does not exist", plan_id, ks, cf);
|
||||
auto err = format("[Stream #{}] prepare requested ks={} cf={} does not exist", plan_id, ks, cf);
|
||||
sslog.warn(err.c_str());
|
||||
throw std::runtime_error(err);
|
||||
}
|
||||
@@ -394,7 +394,7 @@ future<prepare_message> stream_session::prepare(std::vector<stream_request> requ
|
||||
try {
|
||||
db.find_column_family(cf_id);
|
||||
} catch (no_such_column_family&) {
|
||||
auto err = format("[Stream #{{}}] prepare cf_id={} does not exist", plan_id, cf_id);
|
||||
auto err = format("[Stream #{}] prepare cf_id={} does not exist", plan_id, cf_id);
|
||||
sslog.warn(err.c_str());
|
||||
throw std::runtime_error(err);
|
||||
}
|
||||
|
||||
4
table.cc
4
table.cc
@@ -864,8 +864,8 @@ void table::try_trigger_compaction() noexcept {
|
||||
}
|
||||
|
||||
void table::do_trigger_compaction() {
|
||||
// But only submit if we're not locked out
|
||||
if (!_compaction_disabled) {
|
||||
// But not if we're locked out or stopping
|
||||
if (!_compaction_disabled && !_async_gate.is_closed()) {
|
||||
_compaction_manager.submit(this);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -85,3 +85,20 @@ def test_signature_too_futuristic(dynamodb, test_table):
|
||||
response = requests.post(url, headers=headers, verify=False)
|
||||
assert not response.ok
|
||||
assert "InvalidSignatureException" in response.text and "Signature not yet current" in response.text
|
||||
|
||||
# A test that commas can be uses instead of whitespace to separate components
|
||||
# of the Authorization headers - reproducing issue #9568.
|
||||
def test_authorization_no_whitespace(dynamodb, test_table):
|
||||
# Unlike the above tests which checked error cases so didn't need to
|
||||
# calculate a real signature, in this test we really a correct signature,
|
||||
# so we use a function we already have in test_manual_requests.py.
|
||||
from test_manual_requests import get_signed_request
|
||||
payload = '{"TableName": "' + test_table.name + '", "Item": {"p": {"S": "x"}, "c": {"S": "x"}}}'
|
||||
req = get_signed_request(dynamodb, 'PutItem', payload)
|
||||
# Boto3 separates the components of the Authorization header by spaces.
|
||||
# Let's remove all of them except the first one (which separates the
|
||||
# signature algorithm name from the rest) and check the result still works:
|
||||
a = req.headers['Authorization'].split()
|
||||
req.headers['Authorization'] = a[0] + ' ' + ''.join(a[1:])
|
||||
response = requests.post(req.url, headers=req.headers, data=req.body, verify=False)
|
||||
assert response.ok
|
||||
|
||||
@@ -154,3 +154,25 @@ def test_incorrect_numbers(dynamodb, test_table):
|
||||
req = get_signed_request(dynamodb, 'PutItem', payload)
|
||||
response = requests.post(req.url, headers=req.headers, data=req.body, verify=False)
|
||||
assert "ValidationException" in response.text and "numeric" in response.text
|
||||
|
||||
# Although the DynamoDB API responses are JSON, additional conventions apply
|
||||
# to these responses - such as how error codes are encoded in JSON. For this
|
||||
# reason, DynamoDB uses the content type 'application/x-amz-json-1.0' instead
|
||||
# of the standard 'application/json'. This test verifies that we return the
|
||||
# correct content type header.
|
||||
# While most DynamoDB libraries we tried do not care about an unexpected
|
||||
# content-type, it turns out that one (aiodynamo) does. Moreover, AWS already
|
||||
# defined x-amz-json-1.1 - see
|
||||
# https://awslabs.github.io/smithy/1.0/spec/aws/aws-json-1_1-protocol.html
|
||||
# which differs (only) in how it encodes error replies.
|
||||
# So in the future it may become even more important that Scylla return the
|
||||
# correct content type.
|
||||
def test_content_type(dynamodb, test_table):
|
||||
payload = '{"TableName": "' + test_table.name + '", "Item": {"p": {"S": "x"}, "c": {"S": "x"}}}'
|
||||
# Note that get_signed_request() uses x-amz-json-1.0 to encode the
|
||||
# *request*. In the future this may or may not effect the content type
|
||||
# in the response (today, DynamoDB doesn't allow any other content type
|
||||
# in the request anyway).
|
||||
req = get_signed_request(dynamodb, 'PutItem', payload)
|
||||
response = requests.post(req.url, headers=req.headers, data=req.body, verify=False)
|
||||
assert response.headers['Content-Type'] == 'application/x-amz-json-1.0'
|
||||
|
||||
113
test/alternator/test_metrics.py
Normal file
113
test/alternator/test_metrics.py
Normal file
@@ -0,0 +1,113 @@
|
||||
# Copyright 2021-present ScyllaDB
|
||||
#
|
||||
# This file is part of Scylla.
|
||||
#
|
||||
# Scylla is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU Affero General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# Scylla is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Affero General Public License
|
||||
# along with Scylla. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
##############################################################################
|
||||
# Tests for Scylla's metrics (see docs/design-notes/metrics.md) for Alternator
|
||||
# queries. Reproduces issue #9406, where although metrics was implemented for
|
||||
# Alternator requests, they were missing for some operations (BatchGetItem).
|
||||
# In the tests here we attempt to ensure that the metrics continue to work
|
||||
# for the relevant operations as the code evolves.
|
||||
#
|
||||
# Note that all tests in this file test Scylla-specific features, and are
|
||||
# "skipped" when not running against Scylla, or when unable to retrieve
|
||||
# metrics through out-of-band HTTP requests to Scylla's Prometheus port (9180).
|
||||
#
|
||||
# IMPORTANT: we do not want these tests to assume that are not running in
|
||||
# parallel with any other tests or workload - because such an assumption
|
||||
# would limit our test deployment options in the future. NOT making this
|
||||
# assumption means that these tests can't check that a certain operation
|
||||
# increases a certain counter by exactly 1 - because other concurrent
|
||||
# operations might increase it further! So our test can only check that the
|
||||
# counter increases.
|
||||
##############################################################################
|
||||
|
||||
import pytest
|
||||
import requests
|
||||
import re
|
||||
|
||||
from util import random_string
|
||||
|
||||
# Fixture for checking if we are able to test Scylla metrics. Scylla metrics
|
||||
# are not available on AWS (of course), but may also not be available for
|
||||
# Scylla if for some reason we have only access to the Alternator protocol
|
||||
# port but no access to the metrics port (9180).
|
||||
# If metrics are *not* available, tests using this fixture will be skipped.
|
||||
# Tests using this fixture may call get_metrics(metrics).
|
||||
@pytest.fixture(scope="module")
|
||||
def metrics(dynamodb):
|
||||
if dynamodb.meta.client._endpoint.host.endswith('.amazonaws.com'):
|
||||
pytest.skip('Scylla-only feature not supported by AWS')
|
||||
url = dynamodb.meta.client._endpoint.host
|
||||
# The Prometheus API is on port 9180, and always http, not https.
|
||||
url = re.sub(r':[0-9]+(/|$)', ':9180', url)
|
||||
url = re.sub(r'^https:', 'http:', url)
|
||||
url = url + '/metrics'
|
||||
resp = requests.get(url)
|
||||
if resp.status_code != 200:
|
||||
pytest.skip('Metrics port 9180 is not available')
|
||||
yield url
|
||||
|
||||
# Utility function for fetching all metrics from Scylla, using an HTTP request
|
||||
# to port 9180. The response format is defined by the Prometheus protocol.
|
||||
# Only use get_metrics() in a test using the metrics_available fixture.
|
||||
def get_metrics(metrics):
|
||||
response = requests.get(metrics)
|
||||
assert response.status_code == 200
|
||||
return response.text
|
||||
|
||||
# Utility function for fetching a metric with a given name and optionally a
|
||||
# given sub-metric label (which should be a name-value map). If multiple
|
||||
# matches are found, they are summed - this is useful for summing up the
|
||||
# counts from multiple shards.
|
||||
def get_metric(metrics, name, requested_labels=None):
|
||||
total = 0.0
|
||||
lines = re.compile('^'+name+'{.*$', re.MULTILINE)
|
||||
for match in re.findall(lines, get_metrics(metrics)):
|
||||
a = match.split()
|
||||
metric = a[0]
|
||||
val = float(a[1])
|
||||
# Check if match also matches the requested labels
|
||||
if requested_labels:
|
||||
# we know metric begins with name{ and ends with } - the labels
|
||||
# are what we have between those
|
||||
got_labels = metric[len(name)+1:-1].split(',')
|
||||
# Check that every one of the requested labels is in got_labels:
|
||||
for k, v in requested_labels.items():
|
||||
if not f'{k}="{v}"' in got_labels:
|
||||
# No match for requested label, skip this metric (python
|
||||
# doesn't have "continue 2" so let's just set val to 0...
|
||||
val = 0
|
||||
break
|
||||
total += float(val)
|
||||
return total
|
||||
|
||||
def test_batch_write_item(test_table_s, metrics):
|
||||
n1 = get_metric(metrics, 'scylla_alternator_operation', {'op': 'BatchWriteItem'})
|
||||
test_table_s.meta.client.batch_write_item(RequestItems = {
|
||||
test_table_s.name: [{'PutRequest': {'Item': {'p': random_string(), 'a': 'hi'}}}]})
|
||||
n2 = get_metric(metrics, 'scylla_alternator_operation', {'op': 'BatchWriteItem'})
|
||||
assert n2 > n1
|
||||
|
||||
# Reproduces issue #9406:
|
||||
def test_batch_get_item(test_table_s, metrics):
|
||||
n1 = get_metric(metrics, 'scylla_alternator_operation', {'op': 'BatchGetItem'})
|
||||
test_table_s.meta.client.batch_get_item(RequestItems = {
|
||||
test_table_s.name: {'Keys': [{'p': random_string()}], 'ConsistentRead': True}})
|
||||
n2 = get_metric(metrics, 'scylla_alternator_operation', {'op': 'BatchGetItem'})
|
||||
assert n2 > n1
|
||||
|
||||
# TODO: check the rest of the operations
|
||||
@@ -431,3 +431,14 @@ def test_update_item_returnvalues_nested(test_table_s):
|
||||
ret=test_table_s.update_item(Key={'p': p}, ReturnValues='UPDATED_NEW',
|
||||
UpdateExpression='REMOVE a.c[1]')
|
||||
assert ret['Attributes'] == {'a': {'c': [70]}}
|
||||
|
||||
# A reproducer for issue #9542 - when UpdateExpression's REMOVE operation
|
||||
# actually deletes an existing attribute, it breaks the ALL_NEW ReturnValues
|
||||
# for other attributes set in the same command.
|
||||
def test_update_item_returnvalues_all_new_remove_etc(test_table_s):
|
||||
p = random_string()
|
||||
test_table_s.put_item(Item={'p': p, 's': 'dog', 'd': 'foo'})
|
||||
ret=test_table_s.update_item(Key={'p': p}, ReturnValues='ALL_NEW',
|
||||
UpdateExpression='REMOVE d SET s = :v',
|
||||
ExpressionAttributeValues={':v': 'cat'})
|
||||
assert ret['Attributes']['s'] == 'cat'
|
||||
|
||||
@@ -22,6 +22,7 @@
|
||||
#define BOOST_TEST_MODULE alternator
|
||||
#include <boost/test/included/unit_test.hpp>
|
||||
|
||||
#include <seastar/util/defer.hh>
|
||||
#include "alternator/base64.hh"
|
||||
|
||||
static bytes_view to_bytes_view(const std::string& s) {
|
||||
@@ -78,3 +79,22 @@ BOOST_AUTO_TEST_CASE(test_base64_begins_with) {
|
||||
BOOST_REQUIRE(!base64_begins_with(encoded_str3, encoded_non_prefix));
|
||||
}
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(test_allocator_fail_gracefully) {
|
||||
// Unfortunately the address sanitizer fails if the allocator is not able
|
||||
// to allocate the requested memory. The test is therefore skipped for debug mode
|
||||
#ifndef DEBUG
|
||||
static constexpr size_t too_large_alloc_size = 0xffffffffff;
|
||||
rjson::allocator allocator;
|
||||
// Impossible allocation should throw
|
||||
BOOST_REQUIRE_THROW(allocator.Malloc(too_large_alloc_size), rjson::error);
|
||||
// So should impossible reallocation
|
||||
void* memory = allocator.Malloc(1);
|
||||
auto release = defer([memory] { rjson::allocator::Free(memory); });
|
||||
BOOST_REQUIRE_THROW(allocator.Realloc(memory, 1, too_large_alloc_size), rjson::error);
|
||||
// Internal rapidjson stack should also throw
|
||||
// and also be destroyed gracefully later
|
||||
rapidjson::internal::Stack stack(&allocator, 0);
|
||||
BOOST_REQUIRE_THROW(stack.Push<char>(too_large_alloc_size), rjson::error);
|
||||
#endif
|
||||
}
|
||||
@@ -269,6 +269,21 @@ BOOST_AUTO_TEST_CASE(test_writing_placeholders) {
|
||||
BOOST_REQUIRE(in.size() == 0);
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(test_large_placeholder) {
|
||||
bytes_ostream::size_type size;
|
||||
try {
|
||||
for (size = 1; (int32_t)size > 0; size *= 2) {
|
||||
bytes_ostream buf;
|
||||
int8_t* ph;
|
||||
BOOST_TEST_MESSAGE(fmt::format("try size={}", size));
|
||||
ph = buf.write_place_holder(size);
|
||||
std::fill(ph, ph + size, 0);
|
||||
}
|
||||
} catch (const std::bad_alloc&) {
|
||||
}
|
||||
BOOST_REQUIRE(size >= bytes_ostream::max_chunk_size());
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(test_append_big_and_small_chunks) {
|
||||
bytes_ostream small;
|
||||
append_sequence(small, 12);
|
||||
|
||||
@@ -22,6 +22,8 @@
|
||||
#include <seastar/testing/test_case.hh>
|
||||
#include "test/lib/cql_test_env.hh"
|
||||
#include "test/lib/cql_assertions.hh"
|
||||
#include "cql3/untyped_result_set.hh"
|
||||
#include "cql3/query_processor.hh"
|
||||
#include "transport/messages/result_message.hh"
|
||||
|
||||
SEASTAR_TEST_CASE(test_index_with_paging) {
|
||||
@@ -48,3 +50,51 @@ SEASTAR_TEST_CASE(test_index_with_paging) {
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
SEASTAR_TEST_CASE(test_index_with_paging_with_base_short_read) {
|
||||
return do_with_cql_env_thread([] (auto& e) {
|
||||
e.execute_cql("CREATE TABLE tab (pk int, ck text, v int, v2 int, v3 text, PRIMARY KEY (pk, ck))").get();
|
||||
e.execute_cql("CREATE INDEX ON tab (v)").get();
|
||||
|
||||
// Enough to trigger a short read on the base table during scan
|
||||
sstring big_string(2 * query::result_memory_limiter::maximum_result_size, 'j');
|
||||
|
||||
const int row_count = 67;
|
||||
for (int i = 0; i < row_count; ++i) {
|
||||
e.execute_cql(format("INSERT INTO tab (pk, ck, v, v2, v3) VALUES ({}, 'hello{}', 1, {}, '{}')", i % 3, i, i, big_string)).get();
|
||||
}
|
||||
|
||||
eventually([&] {
|
||||
uint64_t count = 0;
|
||||
e.qp().local().query_internal("SELECT * FROM ks.tab WHERE v = 1", [&] (const cql3::untyped_result_set_row&) {
|
||||
++count;
|
||||
return make_ready_future<stop_iteration>(stop_iteration::no);
|
||||
}).get();
|
||||
BOOST_REQUIRE_EQUAL(count, row_count);
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
SEASTAR_TEST_CASE(test_index_with_paging_with_base_short_read_no_ck) {
|
||||
return do_with_cql_env_thread([] (auto& e) {
|
||||
e.execute_cql("CREATE TABLE tab (pk int, v int, v2 int, v3 text, PRIMARY KEY (pk))").get();
|
||||
e.execute_cql("CREATE INDEX ON tab (v)").get();
|
||||
|
||||
// Enough to trigger a short read on the base table during scan
|
||||
sstring big_string(2 * query::result_memory_limiter::maximum_result_size, 'j');
|
||||
|
||||
const int row_count = 67;
|
||||
for (int i = 0; i < row_count; ++i) {
|
||||
e.execute_cql(format("INSERT INTO tab (pk, v, v2, v3) VALUES ({}, 1, {}, '{}')", i, i, big_string)).get();
|
||||
}
|
||||
|
||||
eventually([&] {
|
||||
uint64_t count = 0;
|
||||
e.qp().local().query_internal("SELECT * FROM ks.tab WHERE v = 1", [&] (const cql3::untyped_result_set_row&) {
|
||||
++count;
|
||||
return make_ready_future<stop_iteration>(stop_iteration::no);
|
||||
}).get();
|
||||
BOOST_REQUIRE_EQUAL(count, row_count);
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
@@ -3258,39 +3258,30 @@ flat_mutation_reader create_evictable_reader_and_evict_after_first_buffer(
|
||||
reader_permit permit,
|
||||
const dht::partition_range& prange,
|
||||
const query::partition_slice& slice,
|
||||
std::deque<mutation_fragment> first_buffer,
|
||||
position_in_partition_view last_fragment_position,
|
||||
std::deque<mutation_fragment> second_buffer,
|
||||
size_t max_buffer_size) {
|
||||
std::list<std::deque<mutation_fragment>> buffers,
|
||||
position_in_partition_view first_buf_last_fragment_position,
|
||||
size_t max_buffer_size,
|
||||
bool detach_buffer = true) {
|
||||
class factory {
|
||||
schema_ptr _schema;
|
||||
reader_permit _permit;
|
||||
std::optional<std::deque<mutation_fragment>> _first_buffer;
|
||||
std::optional<std::deque<mutation_fragment>> _second_buffer;
|
||||
std::list<std::deque<mutation_fragment>> _buffers;
|
||||
size_t _max_buffer_size;
|
||||
|
||||
private:
|
||||
std::optional<std::deque<mutation_fragment>> copy_buffer(const std::optional<std::deque<mutation_fragment>>& o) {
|
||||
if (!o) {
|
||||
return {};
|
||||
}
|
||||
return copy_fragments(*_schema, _permit, *o);
|
||||
}
|
||||
|
||||
public:
|
||||
factory(schema_ptr schema, reader_permit permit, std::deque<mutation_fragment> first_buffer, std::deque<mutation_fragment> second_buffer, size_t max_buffer_size)
|
||||
factory(schema_ptr schema, reader_permit permit, std::list<std::deque<mutation_fragment>> buffers, size_t max_buffer_size)
|
||||
: _schema(std::move(schema))
|
||||
, _permit(std::move(permit))
|
||||
, _first_buffer(std::move(first_buffer))
|
||||
, _second_buffer(std::move(second_buffer))
|
||||
, _buffers(std::move(buffers))
|
||||
, _max_buffer_size(max_buffer_size) {
|
||||
}
|
||||
|
||||
factory(const factory& o)
|
||||
: _schema(o._schema)
|
||||
, _permit(o._permit)
|
||||
, _first_buffer(copy_buffer(o._first_buffer))
|
||||
, _second_buffer(copy_buffer(o._second_buffer)) {
|
||||
, _permit(o._permit) {
|
||||
for (const auto& buf : o._buffers) {
|
||||
_buffers.emplace_back(copy_fragments(*_schema, _permit, buf));
|
||||
}
|
||||
}
|
||||
factory(factory&& o) = default;
|
||||
|
||||
@@ -3304,14 +3295,9 @@ flat_mutation_reader create_evictable_reader_and_evict_after_first_buffer(
|
||||
streamed_mutation::forwarding fwd_sm,
|
||||
mutation_reader::forwarding fwd_mr) {
|
||||
BOOST_REQUIRE(s == _schema);
|
||||
if (_first_buffer) {
|
||||
auto buf = *std::exchange(_first_buffer, {});
|
||||
auto rd = make_flat_mutation_reader_from_fragments(_schema, std::move(permit), std::move(buf));
|
||||
rd.set_max_buffer_size(_max_buffer_size);
|
||||
return rd;
|
||||
}
|
||||
if (_second_buffer) {
|
||||
auto buf = *std::exchange(_second_buffer, {});
|
||||
if (!_buffers.empty()) {
|
||||
auto buf = std::move(_buffers.front());
|
||||
_buffers.pop_front();
|
||||
auto rd = make_flat_mutation_reader_from_fragments(_schema, std::move(permit), std::move(buf));
|
||||
rd.set_max_buffer_size(_max_buffer_size);
|
||||
return rd;
|
||||
@@ -3319,9 +3305,9 @@ flat_mutation_reader create_evictable_reader_and_evict_after_first_buffer(
|
||||
return make_empty_flat_reader(_schema, std::move(permit));
|
||||
}
|
||||
};
|
||||
auto ms = mutation_source(factory(schema, permit, std::move(first_buffer), std::move(second_buffer), max_buffer_size));
|
||||
auto ms = mutation_source(factory(schema, permit, std::move(buffers), max_buffer_size));
|
||||
|
||||
auto [rd, handle] = make_manually_paused_evictable_reader(
|
||||
auto rd = make_auto_paused_evictable_reader(
|
||||
std::move(ms),
|
||||
schema,
|
||||
permit,
|
||||
@@ -3337,18 +3323,42 @@ flat_mutation_reader create_evictable_reader_and_evict_after_first_buffer(
|
||||
|
||||
const auto eq_cmp = position_in_partition::equal_compare(*schema);
|
||||
BOOST_REQUIRE(rd.is_buffer_full());
|
||||
BOOST_REQUIRE(eq_cmp(rd.buffer().back().position(), last_fragment_position));
|
||||
BOOST_REQUIRE(eq_cmp(rd.buffer().back().position(), first_buf_last_fragment_position));
|
||||
BOOST_REQUIRE(!rd.is_end_of_stream());
|
||||
|
||||
rd.detach_buffer();
|
||||
|
||||
handle.pause();
|
||||
if (detach_buffer) {
|
||||
rd.detach_buffer();
|
||||
}
|
||||
|
||||
while(permit.semaphore().try_evict_one_inactive_read());
|
||||
|
||||
return std::move(rd);
|
||||
}
|
||||
|
||||
flat_mutation_reader create_evictable_reader_and_evict_after_first_buffer(
|
||||
schema_ptr schema,
|
||||
reader_permit permit,
|
||||
const dht::partition_range& prange,
|
||||
const query::partition_slice& slice,
|
||||
std::deque<mutation_fragment> first_buffer,
|
||||
position_in_partition_view last_fragment_position,
|
||||
std::deque<mutation_fragment> last_buffer,
|
||||
size_t max_buffer_size,
|
||||
bool detach_buffer = true) {
|
||||
std::list<std::deque<mutation_fragment>> list;
|
||||
list.emplace_back(std::move(first_buffer));
|
||||
list.emplace_back(std::move(last_buffer));
|
||||
return create_evictable_reader_and_evict_after_first_buffer(
|
||||
std::move(schema),
|
||||
std::move(permit),
|
||||
prange,
|
||||
slice,
|
||||
std::move(list),
|
||||
last_fragment_position,
|
||||
max_buffer_size,
|
||||
detach_buffer);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
SEASTAR_THREAD_TEST_CASE(test_evictable_reader_trim_range_tombstones) {
|
||||
@@ -3650,7 +3660,7 @@ SEASTAR_THREAD_TEST_CASE(test_evictable_reader_self_validation) {
|
||||
|
||||
check_evictable_reader_validation_is_triggered(
|
||||
"pkey > _last_pkey; pkey ∈ pkrange",
|
||||
partition_error_prefix,
|
||||
"",
|
||||
s.schema(),
|
||||
permit,
|
||||
prange,
|
||||
@@ -3739,6 +3749,317 @@ SEASTAR_THREAD_TEST_CASE(test_evictable_reader_self_validation) {
|
||||
max_buffer_size);
|
||||
}
|
||||
|
||||
SEASTAR_THREAD_TEST_CASE(test_evictable_reader_recreate_before_fast_forward_to) {
|
||||
class test_reader : public flat_mutation_reader::impl {
|
||||
simple_schema _s;
|
||||
const std::vector<dht::decorated_key> _pkeys;
|
||||
std::vector<dht::decorated_key>::const_iterator _it;
|
||||
std::vector<dht::decorated_key>::const_iterator _end;
|
||||
private:
|
||||
void on_range_change(const dht::partition_range& pr) {
|
||||
dht::ring_position_comparator cmp(*_schema);
|
||||
_it = _pkeys.begin();
|
||||
while (_it != _pkeys.end() && !pr.contains(*_it, cmp)) {
|
||||
++_it;
|
||||
}
|
||||
_end = _it;
|
||||
while (_end != _pkeys.end() && pr.contains(*_end, cmp)) {
|
||||
++_end;
|
||||
}
|
||||
}
|
||||
public:
|
||||
test_reader(simple_schema s, reader_permit permit, const dht::partition_range& pr, std::vector<dht::decorated_key> pkeys)
|
||||
: impl(s.schema(), std::move(permit))
|
||||
, _s(std::move(s))
|
||||
, _pkeys(std::move(pkeys)) {
|
||||
on_range_change(pr);
|
||||
}
|
||||
|
||||
virtual future<> fill_buffer(db::timeout_clock::time_point) override {
|
||||
if (_it == _end) {
|
||||
_end_of_stream = true;
|
||||
return make_ready_future<>();
|
||||
}
|
||||
|
||||
push_mutation_fragment(*_schema, _permit, partition_start(*_it++, {}));
|
||||
|
||||
uint32_t ck = 0;
|
||||
while (!is_buffer_full()) {
|
||||
auto ckey = _s.make_ckey(ck);
|
||||
push_mutation_fragment(*_schema, _permit, _s.make_row(_s.make_ckey(ck++), make_random_string(1024)));
|
||||
++ck;
|
||||
}
|
||||
|
||||
push_mutation_fragment(*_schema, _permit, partition_end());
|
||||
return make_ready_future<>();
|
||||
}
|
||||
virtual void next_partition() override {
|
||||
}
|
||||
virtual future<> fast_forward_to(const dht::partition_range& pr, db::timeout_clock::time_point) override {
|
||||
on_range_change(pr);
|
||||
clear_buffer();
|
||||
_end_of_stream = false;
|
||||
return make_ready_future<>();
|
||||
}
|
||||
virtual future<> fast_forward_to(position_range, db::timeout_clock::time_point) override {
|
||||
return make_exception_future<>(make_backtraced_exception_ptr<std::bad_function_call>());
|
||||
}
|
||||
};
|
||||
|
||||
reader_concurrency_semaphore semaphore(reader_concurrency_semaphore::no_limits{}, get_name());
|
||||
simple_schema s;
|
||||
auto permit = semaphore.make_permit(s.schema().get(), get_name());
|
||||
auto pkeys = s.make_pkeys(6);
|
||||
boost::sort(pkeys, dht::decorated_key::less_comparator(s.schema()));
|
||||
|
||||
auto ms = mutation_source([&] (schema_ptr schema,
|
||||
reader_permit permit,
|
||||
const dht::partition_range& range,
|
||||
const query::partition_slice& slice,
|
||||
const io_priority_class& pc,
|
||||
tracing::trace_state_ptr tr,
|
||||
streamed_mutation::forwarding fwd,
|
||||
mutation_reader::forwarding fwd_mr) {
|
||||
std::vector<dht::decorated_key> pkeys_with_data;
|
||||
bool empty = false;
|
||||
for (const auto& pkey : pkeys) {
|
||||
empty = !empty;
|
||||
if (empty) {
|
||||
pkeys_with_data.push_back(pkey);
|
||||
}
|
||||
}
|
||||
return make_flat_mutation_reader<test_reader>(
|
||||
s,
|
||||
std::move(permit),
|
||||
range,
|
||||
std::move(pkeys_with_data));
|
||||
});
|
||||
|
||||
auto pr0 = dht::partition_range::make({pkeys[0], true}, {pkeys[3], true});
|
||||
auto [reader, handle] = make_manually_paused_evictable_reader(std::move(ms), s.schema(), permit, pr0, s.schema()->full_slice(),
|
||||
seastar::default_priority_class(), {}, mutation_reader::forwarding::yes);
|
||||
|
||||
auto reader_assert = assert_that(std::move(reader));
|
||||
reader_assert.produces(pkeys[0]);
|
||||
reader_assert.produces(pkeys[2]);
|
||||
|
||||
handle.pause();
|
||||
BOOST_REQUIRE(semaphore.try_evict_one_inactive_read());
|
||||
|
||||
reader_assert.produces_end_of_stream();
|
||||
|
||||
auto pr1 = dht::partition_range::make({pkeys[4], true}, {pkeys[5], true});
|
||||
reader_assert.fast_forward_to(pr1);
|
||||
|
||||
// Failure will happen in the form of `on_internal_error()`.
|
||||
reader_assert.produces(pkeys[4]);
|
||||
}
|
||||
|
||||
SEASTAR_THREAD_TEST_CASE(test_evictable_reader_drop_flags) {
|
||||
reader_concurrency_semaphore semaphore(1, 0, get_name());
|
||||
simple_schema s;
|
||||
auto permit = semaphore.make_permit(s.schema().get(), get_name());
|
||||
|
||||
auto pkeys = s.make_pkeys(2);
|
||||
std::sort(pkeys.begin(), pkeys.end(), [&s] (const auto& pk1, const auto& pk2) {
|
||||
return pk1.less_compare(*s.schema(), pk2);
|
||||
});
|
||||
const auto& pkey1 = pkeys[0];
|
||||
const auto& pkey2 = pkeys[1];
|
||||
const int second_buffer_ck = 10;
|
||||
|
||||
struct buffer {
|
||||
simple_schema& s;
|
||||
reader_permit permit;
|
||||
std::deque<mutation_fragment> frags;
|
||||
std::vector<mutation> muts;
|
||||
size_t size = 0;
|
||||
std::optional<position_in_partition_view> last_pos;
|
||||
|
||||
buffer(simple_schema& s_, reader_permit permit_, dht::decorated_key key)
|
||||
: s(s_), permit(std::move(permit_)) {
|
||||
add_partition(key);
|
||||
}
|
||||
size_t add_partition(dht::decorated_key key) {
|
||||
size += frags.emplace_back(*s.schema(), permit, partition_start{key, {}}).memory_usage();
|
||||
muts.emplace_back(s.schema(), key);
|
||||
return size;
|
||||
}
|
||||
size_t add_mutation_fragment(mutation_fragment&& mf, bool only_to_frags = false) {
|
||||
if (!only_to_frags) {
|
||||
muts.back().apply(mf);
|
||||
}
|
||||
size += frags.emplace_back(*s.schema(), permit, std::move(mf)).memory_usage();
|
||||
return size;
|
||||
}
|
||||
size_t add_static_row(std::optional<mutation_fragment> sr = {}) {
|
||||
auto srow = sr ? std::move(*sr) : s.make_static_row("s");
|
||||
return add_mutation_fragment(std::move(srow));
|
||||
}
|
||||
size_t add_clustering_row(int i, bool only_to_frags = false) {
|
||||
return add_mutation_fragment(mutation_fragment(*s.schema(), permit, s.make_row(s.make_ckey(i), "v")), only_to_frags);
|
||||
}
|
||||
size_t add_clustering_rows(int start, int end) {
|
||||
for (int i = start; i < end; ++i) {
|
||||
add_clustering_row(i);
|
||||
}
|
||||
return size;
|
||||
}
|
||||
size_t add_partition_end() {
|
||||
size += frags.emplace_back(*s.schema(), permit, partition_end{}).memory_usage();
|
||||
return size;
|
||||
}
|
||||
void save_position() { last_pos = frags.back().position(); }
|
||||
void find_position(size_t buf_size) {
|
||||
size_t s = 0;
|
||||
for (const auto& frag : frags) {
|
||||
s += frag.memory_usage();
|
||||
if (s >= buf_size) {
|
||||
last_pos = frag.position();
|
||||
break;
|
||||
}
|
||||
}
|
||||
BOOST_REQUIRE(last_pos);
|
||||
}
|
||||
};
|
||||
|
||||
auto make_reader = [&] (const buffer& first_buffer, const buffer& second_buffer, const buffer* const third_buffer, size_t max_buffer_size) {
|
||||
std::list<std::deque<mutation_fragment>> buffers;
|
||||
buffers.emplace_back(copy_fragments(*s.schema(), permit, first_buffer.frags));
|
||||
buffers.emplace_back(copy_fragments(*s.schema(), permit, second_buffer.frags));
|
||||
if (third_buffer) {
|
||||
buffers.emplace_back(copy_fragments(*s.schema(), permit, third_buffer->frags));
|
||||
}
|
||||
return create_evictable_reader_and_evict_after_first_buffer(
|
||||
s.schema(),
|
||||
permit,
|
||||
query::full_partition_range,
|
||||
s.schema()->full_slice(),
|
||||
std::move(buffers),
|
||||
*first_buffer.last_pos,
|
||||
max_buffer_size,
|
||||
false);
|
||||
};
|
||||
|
||||
testlog.info("Same partition, with static row");
|
||||
{
|
||||
buffer first_buffer(s, permit, pkey1);
|
||||
first_buffer.add_static_row();
|
||||
auto srow = mutation_fragment(*s.schema(), permit, first_buffer.frags.back());
|
||||
const auto buf_size = first_buffer.add_clustering_rows(0, second_buffer_ck);
|
||||
first_buffer.save_position();
|
||||
first_buffer.add_clustering_row(second_buffer_ck);
|
||||
|
||||
buffer second_buffer(s, permit, pkey1);
|
||||
second_buffer.add_static_row(std::move(srow));
|
||||
second_buffer.add_clustering_row(second_buffer_ck);
|
||||
second_buffer.add_clustering_row(second_buffer_ck + 1);
|
||||
second_buffer.add_partition_end();
|
||||
|
||||
assert_that(make_reader(first_buffer, second_buffer, nullptr, buf_size))
|
||||
.has_monotonic_positions();
|
||||
|
||||
assert_that(make_reader(first_buffer, second_buffer, nullptr, buf_size))
|
||||
.produces(first_buffer.muts[0] + second_buffer.muts[0])
|
||||
.produces_end_of_stream();
|
||||
}
|
||||
|
||||
testlog.info("Same partition, no static row");
|
||||
{
|
||||
buffer first_buffer(s, permit, pkey1);
|
||||
const auto buf_size = first_buffer.add_clustering_rows(0, second_buffer_ck);
|
||||
first_buffer.save_position();
|
||||
first_buffer.add_clustering_row(second_buffer_ck);
|
||||
|
||||
buffer second_buffer(s, permit, pkey1);
|
||||
second_buffer.add_clustering_row(second_buffer_ck);
|
||||
second_buffer.add_clustering_row(second_buffer_ck + 1);
|
||||
second_buffer.add_partition_end();
|
||||
|
||||
assert_that(make_reader(first_buffer, second_buffer, nullptr, buf_size))
|
||||
.has_monotonic_positions();
|
||||
|
||||
assert_that(make_reader(first_buffer, second_buffer, nullptr, buf_size))
|
||||
.produces(first_buffer.muts[0] + second_buffer.muts[0])
|
||||
.produces_end_of_stream();
|
||||
}
|
||||
|
||||
testlog.info("Same partition as expected, no static row, next partition has static row (#8923)");
|
||||
{
|
||||
buffer second_buffer(s, permit, pkey1);
|
||||
second_buffer.add_clustering_rows(second_buffer_ck, second_buffer_ck + second_buffer_ck / 2);
|
||||
// We want to end the buffer on the partition-start below, but since a
|
||||
// partition start will be dropped from it, we have to use the size
|
||||
// without it.
|
||||
const auto buf_size = second_buffer.add_partition_end();
|
||||
second_buffer.add_partition(pkey2);
|
||||
second_buffer.add_static_row();
|
||||
auto srow = mutation_fragment(*s.schema(), permit, second_buffer.frags.back());
|
||||
second_buffer.add_clustering_rows(0, 2);
|
||||
|
||||
buffer first_buffer(s, permit, pkey1);
|
||||
for (int i = 0; first_buffer.add_clustering_row(i) < buf_size; ++i);
|
||||
first_buffer.save_position();
|
||||
first_buffer.add_mutation_fragment(mutation_fragment(*s.schema(), permit, second_buffer.frags[1]));
|
||||
|
||||
buffer third_buffer(s, permit, pkey2);
|
||||
third_buffer.add_static_row(std::move(srow));
|
||||
third_buffer.add_clustering_rows(0, 2);
|
||||
third_buffer.add_partition_end();
|
||||
|
||||
first_buffer.find_position(buf_size);
|
||||
|
||||
assert_that(make_reader(first_buffer, second_buffer, &third_buffer, buf_size))
|
||||
.has_monotonic_positions();
|
||||
|
||||
assert_that(make_reader(first_buffer, second_buffer, &third_buffer, buf_size))
|
||||
.produces(first_buffer.muts[0] + second_buffer.muts[0])
|
||||
.produces(second_buffer.muts[1] + third_buffer.muts[0])
|
||||
.produces_end_of_stream();
|
||||
}
|
||||
|
||||
testlog.info("Next partition, with no static row");
|
||||
{
|
||||
buffer first_buffer(s, permit, pkey1);
|
||||
const auto buf_size = first_buffer.add_clustering_rows(0, second_buffer_ck);
|
||||
first_buffer.save_position();
|
||||
first_buffer.add_clustering_row(second_buffer_ck + 1, true);
|
||||
|
||||
buffer second_buffer(s, permit, pkey2);
|
||||
second_buffer.add_clustering_rows(0, second_buffer_ck / 2);
|
||||
second_buffer.add_partition_end();
|
||||
|
||||
assert_that(make_reader(first_buffer, second_buffer, nullptr, buf_size))
|
||||
.has_monotonic_positions();
|
||||
|
||||
assert_that(make_reader(first_buffer, second_buffer, nullptr, buf_size))
|
||||
.produces(first_buffer.muts[0])
|
||||
.produces(second_buffer.muts[0])
|
||||
.produces_end_of_stream();
|
||||
}
|
||||
|
||||
testlog.info("Next partition, with static row");
|
||||
{
|
||||
buffer first_buffer(s, permit, pkey1);
|
||||
const auto buf_size = first_buffer.add_clustering_rows(0, second_buffer_ck);
|
||||
first_buffer.save_position();
|
||||
first_buffer.add_clustering_row(second_buffer_ck + 1, true);
|
||||
|
||||
buffer second_buffer(s, permit, pkey2);
|
||||
second_buffer.add_static_row();
|
||||
second_buffer.add_clustering_rows(0, second_buffer_ck / 2);
|
||||
second_buffer.add_partition_end();
|
||||
|
||||
assert_that(make_reader(first_buffer, second_buffer, nullptr, buf_size))
|
||||
.has_monotonic_positions();
|
||||
|
||||
assert_that(make_reader(first_buffer, second_buffer, nullptr, buf_size))
|
||||
.produces(first_buffer.muts[0])
|
||||
.produces(second_buffer.muts[0])
|
||||
.produces_end_of_stream();
|
||||
}
|
||||
}
|
||||
|
||||
struct mutation_bounds {
|
||||
std::optional<mutation> m;
|
||||
position_in_partition lower;
|
||||
|
||||
@@ -28,6 +28,8 @@
|
||||
#include "sstables/sstables.hh"
|
||||
#include "test/lib/mutation_source_test.hh"
|
||||
#include "test/lib/sstable_utils.hh"
|
||||
#include "test/lib/mutation_assertions.hh"
|
||||
#include "partition_slice_builder.hh"
|
||||
|
||||
using namespace sstables;
|
||||
using namespace std::chrono_literals;
|
||||
@@ -62,3 +64,69 @@ SEASTAR_TEST_CASE(test_sstable_conforms_to_mutation_source) {
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
// Regression test for scylladb/scylla-enterprise#2016
|
||||
SEASTAR_THREAD_TEST_CASE(test_produces_range_tombstone) {
|
||||
auto s = schema_builder("ks", "cf")
|
||||
.with_column("pk", int32_type, column_kind::partition_key)
|
||||
.with_column("ck", int32_type, column_kind::clustering_key)
|
||||
.with_column("v", int32_type, column_kind::regular_column)
|
||||
.build();
|
||||
|
||||
mutation m(s, partition_key::from_single_value(*s, int32_type->decompose(0)));
|
||||
m.partition().apply_row_tombstone(*s, range_tombstone{
|
||||
clustering_key::from_exploded(*s, {int32_type->decompose(6)}), bound_kind::excl_start,
|
||||
clustering_key::from_exploded(*s, {int32_type->decompose(10)}), bound_kind::incl_end,
|
||||
tombstone(0, gc_clock::time_point())
|
||||
});
|
||||
|
||||
{
|
||||
auto ckey = clustering_key::from_exploded(*s, {int32_type->decompose(6)});
|
||||
deletable_row& row = m.partition().clustered_row(*s, ckey, is_dummy::no, is_continuous(false));
|
||||
row.marker() = row_marker(4);
|
||||
}
|
||||
{
|
||||
auto ckey = clustering_key::from_exploded(*s, {int32_type->decompose(8)});
|
||||
deletable_row& row = m.partition().clustered_row(*s, ckey, is_dummy::no, is_continuous(false));
|
||||
row.apply(tombstone(2, gc_clock::time_point()));
|
||||
row.marker() = row_marker(5);
|
||||
}
|
||||
|
||||
testlog.info("m: {}", m);
|
||||
|
||||
auto slice = partition_slice_builder(*s)
|
||||
.with_range(query::clustering_range::make(
|
||||
{clustering_key::from_exploded(*s, {int32_type->decompose(8)}), false},
|
||||
{clustering_key::from_exploded(*s, {int32_type->decompose(10)}), true}
|
||||
))
|
||||
.build();
|
||||
|
||||
auto pr = dht::partition_range::make_singular(m.decorated_key());
|
||||
|
||||
std::vector<tmpdir> dirs;
|
||||
dirs.emplace_back();
|
||||
sstables::test_env::do_with_async([&] (sstables::test_env& env) {
|
||||
storage_service_for_tests ssft;
|
||||
auto version = sstable_version_types::la;
|
||||
auto index_block_size = 1;
|
||||
sstable_writer_config cfg = env.manager().configure_writer();
|
||||
cfg.promoted_index_block_size = index_block_size;
|
||||
|
||||
auto source = make_sstable_mutation_source(env, s, dirs.back().path().string(), {m}, cfg, version, gc_clock::now());
|
||||
|
||||
{
|
||||
auto rd = source.make_reader(s, tests::make_permit(), pr, slice);
|
||||
while (auto mf = rd(db::no_timeout).get0()) {
|
||||
testlog.info("produced {}", mutation_fragment::printer(*s, *mf));
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
auto rd = source.make_reader(s, tests::make_permit(), pr, slice);
|
||||
mutation_opt sliced_m = read_mutation_from_flat_mutation_reader(rd, db::no_timeout).get0();
|
||||
BOOST_REQUIRE(bool(sliced_m));
|
||||
|
||||
assert_that(*sliced_m).is_equal_to(m, slice.row_ranges(*m.schema(), m.key()));
|
||||
}
|
||||
}).get();
|
||||
}
|
||||
|
||||
@@ -6886,3 +6886,115 @@ SEASTAR_TEST_CASE(test_twcs_single_key_reader_filtering) {
|
||||
cf_stats.sstables_checked_by_clustering_filter - checked_by_ck);
|
||||
});
|
||||
}
|
||||
|
||||
SEASTAR_TEST_CASE(max_ongoing_compaction_test) {
|
||||
return test_env::do_with_async([] (test_env& env) {
|
||||
BOOST_REQUIRE(smp::count == 1);
|
||||
|
||||
auto make_schema = [] (auto idx) {
|
||||
auto builder = schema_builder("tests", std::to_string(idx))
|
||||
.with_column("id", utf8_type, column_kind::partition_key)
|
||||
.with_column("cl", int32_type, column_kind::clustering_key)
|
||||
.with_column("value", int32_type);
|
||||
builder.set_compaction_strategy(sstables::compaction_strategy_type::time_window);
|
||||
std::map <sstring, sstring> opts = {
|
||||
{time_window_compaction_strategy_options::COMPACTION_WINDOW_UNIT_KEY, "HOURS"},
|
||||
{time_window_compaction_strategy_options::COMPACTION_WINDOW_SIZE_KEY, "1"},
|
||||
{time_window_compaction_strategy_options::EXPIRED_SSTABLE_CHECK_FREQUENCY_SECONDS_KEY, "0"},
|
||||
};
|
||||
builder.set_compaction_strategy_options(std::move(opts));
|
||||
builder.set_gc_grace_seconds(0);
|
||||
return builder.build();
|
||||
};
|
||||
|
||||
auto cm = make_lw_shared<compaction_manager>();
|
||||
cm->enable();
|
||||
auto stop_cm = defer([&cm] {
|
||||
cm->stop().get();
|
||||
});
|
||||
|
||||
auto tmp = tmpdir();
|
||||
auto cl_stats = make_lw_shared<cell_locker_stats>();
|
||||
auto tracker = make_lw_shared<cache_tracker>();
|
||||
auto tokens = token_generation_for_shard(1, this_shard_id(), test_db_config.murmur3_partitioner_ignore_msb_bits(), smp::count);
|
||||
|
||||
auto next_timestamp = [] (auto step) {
|
||||
using namespace std::chrono;
|
||||
return (gc_clock::now().time_since_epoch() - duration_cast<microseconds>(step)).count();
|
||||
};
|
||||
auto make_expiring_cell = [&] (schema_ptr s, std::chrono::hours step) {
|
||||
static thread_local int32_t value = 1;
|
||||
|
||||
auto key_str = tokens[0].first;
|
||||
auto key = partition_key::from_exploded(*s, {to_bytes(key_str)});
|
||||
|
||||
mutation m(s, key);
|
||||
auto c_key = clustering_key::from_exploded(*s, {int32_type->decompose(value++)});
|
||||
m.set_clustered_cell(c_key, bytes("value"), data_value(int32_t(value)), next_timestamp(step), gc_clock::duration(step + 5s));
|
||||
return m;
|
||||
};
|
||||
|
||||
auto make_table_with_single_fully_expired_sstable = [&] (auto idx) {
|
||||
auto s = make_schema(idx);
|
||||
column_family::config cfg = column_family_test_config(env.manager());
|
||||
cfg.datadir = tmp.path().string() + "/" + std::to_string(idx);
|
||||
touch_directory(cfg.datadir).get();
|
||||
cfg.enable_commitlog = false;
|
||||
cfg.enable_incremental_backups = false;
|
||||
|
||||
auto sst_gen = [&env, s, dir = cfg.datadir, gen = make_lw_shared<unsigned>(1)] () mutable {
|
||||
return env.make_sstable(s, dir, (*gen)++, sstables::sstable::version_types::md, big);
|
||||
};
|
||||
|
||||
auto cf = make_lw_shared<column_family>(s, cfg, column_family::no_commitlog(), *cm, *cl_stats, *tracker);
|
||||
cf->start();
|
||||
cf->mark_ready_for_writes();
|
||||
|
||||
auto muts = { make_expiring_cell(s, std::chrono::hours(1)) };
|
||||
auto sst = make_sstable_containing(sst_gen, muts);
|
||||
column_family_test(cf).add_sstable(sst);
|
||||
return cf;
|
||||
};
|
||||
|
||||
std::vector<lw_shared_ptr<column_family>> tables;
|
||||
auto stop_tables = defer([&tables] {
|
||||
for (auto& t : tables) {
|
||||
t->stop().get();
|
||||
}
|
||||
});
|
||||
for (auto i = 0; i < 100; i++) {
|
||||
tables.push_back(make_table_with_single_fully_expired_sstable(i));
|
||||
}
|
||||
|
||||
// Make sure everything is expired
|
||||
forward_jump_clocks(std::chrono::hours(100));
|
||||
|
||||
for (auto& t : tables) {
|
||||
BOOST_REQUIRE(t->sstables_count() == 1);
|
||||
t->trigger_compaction();
|
||||
}
|
||||
|
||||
BOOST_REQUIRE(cm->get_stats().pending_tasks >= 1 || cm->get_stats().active_tasks >= 1);
|
||||
|
||||
size_t max_ongoing_compaction = 0;
|
||||
|
||||
// wait for submitted jobs to finish.
|
||||
auto end = [cm, &tables] {
|
||||
return cm->get_stats().pending_tasks == 0 && cm->get_stats().active_tasks == 0
|
||||
&& boost::algorithm::all_of(tables, [] (auto& t) { return t->sstables_count() == 0; });
|
||||
};
|
||||
while (!end()) {
|
||||
if (!cm->get_stats().pending_tasks && !cm->get_stats().active_tasks) {
|
||||
for (auto& t : tables) {
|
||||
if (t->sstables_count()) {
|
||||
t->trigger_compaction();
|
||||
}
|
||||
}
|
||||
}
|
||||
max_ongoing_compaction = std::max(cm->get_stats().active_tasks, max_ongoing_compaction);
|
||||
later().get();
|
||||
}
|
||||
BOOST_REQUIRE(cm->get_stats().errors == 0);
|
||||
BOOST_REQUIRE(max_ongoing_compaction == 1);
|
||||
});
|
||||
}
|
||||
|
||||
@@ -440,7 +440,6 @@ def testNestedClusteringKeyUsage(cql, test_keyspace):
|
||||
)
|
||||
|
||||
# Reproduces issue #7868 and #7902
|
||||
@pytest.mark.xfail(reason="fails because of issue #7902")
|
||||
def testNestedClusteringKeyUsageWithReverseOrder(cql, test_keyspace):
|
||||
with create_table(cql, test_keyspace, "(a int, b frozen<map<set<int>, list<int>>>, c frozen<set<int>>, d int, PRIMARY KEY (a, b, c)) WITH CLUSTERING ORDER BY (b DESC)") as table:
|
||||
execute(cql, table, "INSERT INTO %s (a, b, c, d) VALUES (?, ?, ?, ?)", 0, {}, set(), 0)
|
||||
|
||||
@@ -175,9 +175,12 @@ def wait_for_index(cql, table, column, everything):
|
||||
results = []
|
||||
for v in column_values:
|
||||
results.extend(list(cql.execute(f'SELECT * FROM {table} WHERE {column}={v}')))
|
||||
if set(results) == set(everything):
|
||||
|
||||
if sorted(results) == sorted(everything):
|
||||
return
|
||||
|
||||
time.sleep(0.1)
|
||||
|
||||
pytest.fail('Timeout waiting for index to become up to date.')
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
@@ -291,3 +294,46 @@ def test_contains_frozen_collection_ck(cql, test_keyspace):
|
||||
"SELECT * FROM " + table + " WHERE a=0 AND c=0 AND b CONTAINS 0 ALLOW FILTERING")))
|
||||
assert 1 == len(list(cql.execute(
|
||||
"SELECT * FROM " + table + " WHERE a=0 AND c=0 AND b CONTAINS KEY 0 ALLOW FILTERING")))
|
||||
|
||||
# table5 contains an indexed table with 3 clustering columns.
|
||||
# used to test correct filtering of rows fetched from an index table.
|
||||
@pytest.fixture(scope="module")
|
||||
def table5(cql, test_keyspace):
|
||||
table = test_keyspace + "." + unique_name()
|
||||
cql.execute(f"CREATE TABLE {table} (p int, c1 frozen<list<int>>, c2 frozen<list<int>>, c3 int, PRIMARY KEY (p,c1,c2,c3))")
|
||||
cql.execute(f"CREATE INDEX ON {table} (c3)")
|
||||
cql.execute(f"INSERT INTO {table} (p, c1, c2, c3) VALUES (0, [1], [2], 0)")
|
||||
cql.execute(f"INSERT INTO {table} (p, c1, c2, c3) VALUES (0, [2], [2], 0)")
|
||||
cql.execute(f"INSERT INTO {table} (p, c1, c2, c3) VALUES (0, [1], [3], 0)")
|
||||
cql.execute(f"INSERT INTO {table} (p, c1, c2, c3) VALUES (0, [1], [2], 1)")
|
||||
|
||||
everything = list(cql.execute(f"SELECT * FROM {table}"))
|
||||
wait_for_index(cql, table, 'c3', everything)
|
||||
yield (table, everything)
|
||||
cql.execute(f"DROP TABLE {table}")
|
||||
|
||||
# Test that implementation of filtering for indexes works ok.
|
||||
# Current implementation is a bit conservative - it might sometimes state
|
||||
# that filtering is needed when it isn't actually required, but at least it's safe.
|
||||
def test_select_indexed_cluster_three_keys(cql, table5):
|
||||
def check_good_row(row):
|
||||
return row.p == 0 and row.c1 == [1] and row.c2 == [2] and row.c3 == 0
|
||||
|
||||
check_af_optional(cql, table5, "c3 = 0", lambda r : r.c3 == 0)
|
||||
check_af_mandatory(cql, table5, "c1 = [1] AND c2 = [2] AND c3 = 0", check_good_row)
|
||||
check_af_mandatory(cql, table5, "p = 0 AND c1 CONTAINS 1 AND c3 = 0", lambda r : r.p == 0 and r.c1 == [1] and r.c3 == 0)
|
||||
check_af_mandatory(cql, table5, "p = 0 AND c1 = [1] AND c2 CONTAINS 2 AND c3 = 0", check_good_row)
|
||||
|
||||
# Doesn't use an index - shouldn't be affected
|
||||
check_af_optional(cql, table5, "p = 0 AND c1 = [1] AND c2 = [2] AND c3 = 0", check_good_row)
|
||||
|
||||
# Here are the cases where current implementation of need_filtering() fails
|
||||
# By coincidence they also fail on cassandra, it looks like cassandra is buggy
|
||||
@pytest.mark.xfail(reason="Too conservative need_filtering() implementation")
|
||||
def test_select_indexed_cluster_three_keys_conservative(cql, table5, cassandra_bug):
|
||||
def check_good_row(row):
|
||||
return row.p == 0 and row.c1 == [1] and row.c3 == 0
|
||||
|
||||
# Don't require filtering, but for now we report they do
|
||||
check_af_optional(cql, table5, "p = 0 AND c1 = [1] AND c3 = 0", check_good_row)
|
||||
check_af_optional(cql, table5, "p = 0 AND c1 = [1] AND c2 < [3] AND c3 = 0", lambda r : check_good_row(r) and r.c2 < [3])
|
||||
|
||||
@@ -26,7 +26,7 @@
|
||||
|
||||
from util import unique_name, new_test_table
|
||||
|
||||
from cassandra.protocol import FunctionFailure
|
||||
from cassandra.protocol import FunctionFailure, InvalidRequest
|
||||
|
||||
import pytest
|
||||
import random
|
||||
@@ -34,58 +34,62 @@ import random
|
||||
@pytest.fixture(scope="session")
|
||||
def table1(cql, test_keyspace):
|
||||
table = test_keyspace + "." + unique_name()
|
||||
cql.execute(f"CREATE TABLE {table} (p int PRIMARY KEY, v int, a ascii)")
|
||||
cql.execute(f"CREATE TABLE {table} (p int PRIMARY KEY, v int, a ascii, b boolean)")
|
||||
yield table
|
||||
cql.execute("DROP TABLE " + table)
|
||||
|
||||
# Test that failed fromJson() parsing an invalid JSON results in the expected
|
||||
# error - FunctionFailure - and not some weird internal error.
|
||||
# Reproduces issue #7911.
|
||||
@pytest.mark.xfail(reason="issue #7911")
|
||||
def test_failed_json_parsing_unprepared(cql, table1):
|
||||
p = random.randint(1,1000000000)
|
||||
with pytest.raises(FunctionFailure):
|
||||
cql.execute(f"INSERT INTO {table1} (p, v) VALUES ({p}, fromJson('dog'))")
|
||||
@pytest.mark.xfail(reason="issue #7911")
|
||||
def test_failed_json_parsing_prepared(cql, table1):
|
||||
p = random.randint(1,1000000000)
|
||||
stmt = cql.prepare(f"INSERT INTO {table1} (p, v) VALUES (?, fromJson(?))")
|
||||
with pytest.raises(FunctionFailure):
|
||||
cql.execute(stmt, [0, 'dog'])
|
||||
cql.execute(stmt, [p, 'dog'])
|
||||
|
||||
# Similarly, if the JSON parsing did not fail, but yielded a type which is
|
||||
# incompatible with the type we want it to yield, we should get a clean
|
||||
# FunctionFailure, not some internal server error.
|
||||
# We have here examples of returning a string where a number was expected,
|
||||
# and returning a unicode string where ASCII was expected.
|
||||
# and returning a unicode string where ASCII was expected, and returning
|
||||
# a number of the wrong type
|
||||
# Reproduces issue #7911.
|
||||
@pytest.mark.xfail(reason="issue #7911")
|
||||
def test_fromjson_wrong_type_unprepared(cql, table1):
|
||||
p = random.randint(1,1000000000)
|
||||
with pytest.raises(FunctionFailure):
|
||||
cql.execute(f"INSERT INTO {table1} (p, v) VALUES ({p}, fromJson('\"dog\"'))")
|
||||
with pytest.raises(FunctionFailure):
|
||||
cql.execute(f"INSERT INTO {table1} (p, a) VALUES ({p}, fromJson('3'))")
|
||||
@pytest.mark.xfail(reason="issue #7911")
|
||||
def test_fromjson_wrong_type_prepared(cql, table1):
|
||||
p = random.randint(1,1000000000)
|
||||
stmt = cql.prepare(f"INSERT INTO {table1} (p, v) VALUES (?, fromJson(?))")
|
||||
with pytest.raises(FunctionFailure):
|
||||
cql.execute(stmt, [0, '"dog"'])
|
||||
cql.execute(stmt, [p, '"dog"'])
|
||||
stmt = cql.prepare(f"INSERT INTO {table1} (p, a) VALUES (?, fromJson(?))")
|
||||
with pytest.raises(FunctionFailure):
|
||||
cql.execute(stmt, [0, '3'])
|
||||
@pytest.mark.xfail(reason="issue #7911")
|
||||
cql.execute(stmt, [p, '3'])
|
||||
def test_fromjson_bad_ascii_unprepared(cql, table1):
|
||||
p = random.randint(1,1000000000)
|
||||
with pytest.raises(FunctionFailure):
|
||||
cql.execute(f"INSERT INTO {table1} (p, a) VALUES ({p}, fromJson('\"שלום\"'))")
|
||||
@pytest.mark.xfail(reason="issue #7911")
|
||||
def test_fromjson_bad_ascii_prepared(cql, table1):
|
||||
p = random.randint(1,1000000000)
|
||||
stmt = cql.prepare(f"INSERT INTO {table1} (p, a) VALUES (?, fromJson(?))")
|
||||
with pytest.raises(FunctionFailure):
|
||||
cql.execute(stmt, [0, '"שלום"'])
|
||||
cql.execute(stmt, [p, '"שלום"'])
|
||||
def test_fromjson_nonint_unprepared(cql, table1):
|
||||
p = random.randint(1,1000000000)
|
||||
with pytest.raises(FunctionFailure):
|
||||
cql.execute(f"INSERT INTO {table1} (p, v) VALUES ({p}, fromJson('1.2'))")
|
||||
def test_fromjson_nonint_prepared(cql, table1):
|
||||
p = random.randint(1,1000000000)
|
||||
stmt = cql.prepare(f"INSERT INTO {table1} (p, v) VALUES (?, fromJson(?))")
|
||||
with pytest.raises(FunctionFailure):
|
||||
cql.execute(stmt, [p, '1.2'])
|
||||
|
||||
# The JSON standard does not define or limit the range or precision of
|
||||
# numbers. However, if a number is assigned to a Scylla number type, the
|
||||
@@ -105,7 +109,27 @@ def test_fromjson_int_overflow_prepared(cql, table1):
|
||||
p = random.randint(1,1000000000)
|
||||
stmt = cql.prepare(f"INSERT INTO {table1} (p, v) VALUES (?, fromJson(?))")
|
||||
with pytest.raises(FunctionFailure):
|
||||
cql.execute(stmt, [0, '2147483648'])
|
||||
cql.execute(stmt, [p, '2147483648'])
|
||||
|
||||
# Cassandra allows the strings "true" and "false", not just the JSON constants
|
||||
# true and false, to be assigned to a boolean column. However, very strangely,
|
||||
# it only allows this for prepared statements, and *not* for unprepared
|
||||
# statements - which result in an InvalidRequest!
|
||||
# Reproduces #7915.
|
||||
def test_fromjson_boolean_string_unprepared(cql, table1):
|
||||
p = random.randint(1,1000000000)
|
||||
with pytest.raises(InvalidRequest):
|
||||
cql.execute(f"INSERT INTO {table1} (p, b) VALUES ({p}, '\"true\"')")
|
||||
with pytest.raises(InvalidRequest):
|
||||
cql.execute(f"INSERT INTO {table1} (p, b) VALUES ({p}, '\"false\"')")
|
||||
@pytest.mark.xfail(reason="issue #7915")
|
||||
def test_fromjson_boolean_string_prepared(cql, table1):
|
||||
p = random.randint(1,1000000000)
|
||||
stmt = cql.prepare(f"INSERT INTO {table1} (p, b) VALUES (?, fromJson(?))")
|
||||
cql.execute(stmt, [p, '"true"'])
|
||||
assert list(cql.execute(f"SELECT p, b from {table1} where p = {p}")) == [(p, True)]
|
||||
cql.execute(stmt, [p, '"false"'])
|
||||
assert list(cql.execute(f"SELECT p, b from {table1} where p = {p}")) == [(p, False)]
|
||||
|
||||
# Test that null argument is allowed for fromJson(), with unprepared statement
|
||||
# Reproduces issue #7912.
|
||||
|
||||
@@ -22,7 +22,7 @@ import pytest
|
||||
from cassandra.protocol import SyntaxException, AlreadyExists, InvalidRequest, ConfigurationException, ReadFailure
|
||||
from cassandra.query import SimpleStatement
|
||||
|
||||
from util import new_test_table
|
||||
from util import new_test_table, unique_name
|
||||
|
||||
# A reproducer for issue #7443: Normally, when the entire table is SELECTed,
|
||||
# the partitions are returned sorted by the partitions' token. When there
|
||||
@@ -81,3 +81,107 @@ def test_paging_with_desc_clustering_order(cql, test_keyspace):
|
||||
cql.execute(f"INSERT INTO {table}(p,c) VALUES ({i}, 42)")
|
||||
stmt = SimpleStatement(f"SELECT * FROM {table} WHERE c = 42", fetch_size=1)
|
||||
assert len([row for row in cql.execute(stmt)]) == 3
|
||||
|
||||
# Test which ensures that indexes for a query are picked by the order in which
|
||||
# they appear in restrictions. That way, users can deterministically pick
|
||||
# which indexes are used for which queries.
|
||||
# Note that the order of picking indexing is not set in stone and may be
|
||||
# subject to change - in which case this test case should be amended as well.
|
||||
# The order tested in this case was decided as a good first step in issue
|
||||
# #7969, but it's possible that it will eventually be implemented another
|
||||
# way, e.g. dynamically based on estimated query selectivity statistics.
|
||||
# Ref: #7969
|
||||
@pytest.mark.xfail(reason="The order of picking indexes is currently arbitrary. Issue #7969")
|
||||
def test_order_of_indexes(scylla_only, cql, test_keyspace):
|
||||
schema = 'p int primary key, v1 int, v2 int, v3 int'
|
||||
with new_test_table(cql, test_keyspace, schema) as table:
|
||||
cql.execute(f"CREATE INDEX my_v3_idx ON {table}(v3)")
|
||||
cql.execute(f"CREATE INDEX my_v1_idx ON {table}(v1)")
|
||||
cql.execute(f"CREATE INDEX my_v2_idx ON {table}((p),v2)")
|
||||
# All queries below should use the first index they find in the list
|
||||
# of restrictions. Tracing information will be consulted to ensure
|
||||
# it's true. Currently some of the cases below succeed, because the
|
||||
# order is not well defined (and may, for instance, change upon
|
||||
# server restart), but some of them fail. Once a proper ordering
|
||||
# is implemented, all cases below should succeed.
|
||||
def index_used(query, index_name):
|
||||
assert any([index_name in event.description for event in cql.execute(query, trace=True).get_query_trace().events])
|
||||
index_used(f"SELECT * FROM {table} WHERE v3 = 1", "my_v3_idx")
|
||||
index_used(f"SELECT * FROM {table} WHERE v3 = 1 and v1 = 2 allow filtering", "my_v3_idx")
|
||||
index_used(f"SELECT * FROM {table} WHERE p = 1 and v1 = 1 and v3 = 2 allow filtering", "my_v1_idx")
|
||||
index_used(f"SELECT * FROM {table} WHERE p = 1 and v3 = 1 and v1 = 2 allow filtering", "my_v3_idx")
|
||||
# Local indexes are still skipped if they cannot be used
|
||||
index_used(f"SELECT * FROM {table} WHERE v2 = 1 and v1 = 2 allow filtering", "my_v1_idx")
|
||||
index_used(f"SELECT * FROM {table} WHERE v2 = 1 and v3 = 2 and v1 = 3 allow filtering", "my_v3_idx")
|
||||
index_used(f"SELECT * FROM {table} WHERE v1 = 1 and v2 = 2 and v3 = 3 allow filtering", "my_v1_idx")
|
||||
# Local indexes are still preferred over global ones, if they can be used
|
||||
index_used(f"SELECT * FROM {table} WHERE p = 1 and v1 = 1 and v3 = 2 and v2 = 2 allow filtering", "my_v2_idx")
|
||||
index_used(f"SELECT * FROM {table} WHERE p = 1 and v2 = 1 and v1 = 2 allow filtering", "my_v2_idx")
|
||||
|
||||
# Indexes can be created without an explicit name, in which case a default name is chosen.
|
||||
# However, due to #8620 it was possible to break the index creation mechanism by creating
|
||||
# a properly named regular table, which conflicts with the generated index name.
|
||||
def test_create_unnamed_index_when_its_name_is_taken(cql, test_keyspace):
|
||||
schema = 'p int primary key, v int'
|
||||
with new_test_table(cql, test_keyspace, schema) as table:
|
||||
try:
|
||||
cql.execute(f"CREATE TABLE {table}_v_idx_index (i_do_not_exist_in_the_base_table int primary key)")
|
||||
# Creating an index should succeed, even though its default name is taken
|
||||
# by the table above
|
||||
cql.execute(f"CREATE INDEX ON {table}(v)")
|
||||
finally:
|
||||
cql.execute(f"DROP TABLE {table}_v_idx_index")
|
||||
|
||||
# Indexed created with an explicit name cause a materialized view to be created,
|
||||
# and this view has a specific name - <index-name>_index. If there happens to be
|
||||
# a regular table (or another view) named just like that, index creation should fail.
|
||||
def test_create_named_index_when_its_name_is_taken(scylla_only, cql, test_keyspace):
|
||||
schema = 'p int primary key, v int'
|
||||
with new_test_table(cql, test_keyspace, schema) as table:
|
||||
index_name = unique_name()
|
||||
try:
|
||||
cql.execute(f"CREATE TABLE {test_keyspace}.{index_name}_index (i_do_not_exist_in_the_base_table int primary key)")
|
||||
# Creating an index should fail, because it's impossible to create
|
||||
# its underlying materialized view, because its name is taken by a regular table
|
||||
with pytest.raises(InvalidRequest, match="already exists"):
|
||||
cql.execute(f"CREATE INDEX {index_name} ON {table}(v)")
|
||||
finally:
|
||||
cql.execute(f"DROP TABLE {test_keyspace}.{index_name}_index")
|
||||
|
||||
# Tests for CREATE INDEX IF NOT EXISTS
|
||||
# Reproduces issue #8717.
|
||||
def test_create_index_if_not_exists(cql, test_keyspace):
|
||||
with new_test_table(cql, test_keyspace, 'p int primary key, v int') as table:
|
||||
cql.execute(f"CREATE INDEX ON {table}(v)")
|
||||
# Can't create the same index again without "IF NOT EXISTS", but can
|
||||
# do it with "IF NOT EXISTS":
|
||||
with pytest.raises(InvalidRequest, match="duplicate"):
|
||||
cql.execute(f"CREATE INDEX ON {table}(v)")
|
||||
cql.execute(f"CREATE INDEX IF NOT EXISTS ON {table}(v)")
|
||||
cql.execute(f"DROP INDEX {test_keyspace}.{table.split('.')[1]}_v_idx")
|
||||
|
||||
# Now test the same thing for named indexes. This is what broke in #8717:
|
||||
cql.execute(f"CREATE INDEX xyz ON {table}(v)")
|
||||
with pytest.raises(InvalidRequest, match="already exists"):
|
||||
cql.execute(f"CREATE INDEX xyz ON {table}(v)")
|
||||
cql.execute(f"CREATE INDEX IF NOT EXISTS xyz ON {table}(v)")
|
||||
cql.execute(f"DROP INDEX {test_keyspace}.xyz")
|
||||
|
||||
# Exactly the same with non-lower case name.
|
||||
cql.execute(f'CREATE INDEX "CamelCase" ON {table}(v)')
|
||||
with pytest.raises(InvalidRequest, match="already exists"):
|
||||
cql.execute(f'CREATE INDEX "CamelCase" ON {table}(v)')
|
||||
cql.execute(f'CREATE INDEX IF NOT EXISTS "CamelCase" ON {table}(v)')
|
||||
cql.execute(f'DROP INDEX {test_keyspace}."CamelCase"')
|
||||
|
||||
# Trying to create an index for an attribute that's already indexed,
|
||||
# but with a different name. The "IF NOT EXISTS" appears to succeed
|
||||
# in this case, but does not actually create the new index name -
|
||||
# only the old one remains.
|
||||
cql.execute(f"CREATE INDEX xyz ON {table}(v)")
|
||||
with pytest.raises(InvalidRequest, match="duplicate"):
|
||||
cql.execute(f"CREATE INDEX abc ON {table}(v)")
|
||||
cql.execute(f"CREATE INDEX IF NOT EXISTS abc ON {table}(v)")
|
||||
with pytest.raises(InvalidRequest):
|
||||
cql.execute(f"DROP INDEX {test_keyspace}.abc")
|
||||
cql.execute(f"DROP INDEX {test_keyspace}.xyz")
|
||||
|
||||
@@ -118,6 +118,8 @@ public:
|
||||
return stop_iteration::no;
|
||||
});
|
||||
});
|
||||
}).finally([&ir] () {
|
||||
return ir->close();
|
||||
});
|
||||
}).then([l] {
|
||||
return std::move(*l);
|
||||
|
||||
Submodule tools/java updated: 14e635e5de...e8accfbf45
@@ -97,12 +97,18 @@ future<> controller::do_start_server() {
|
||||
};
|
||||
|
||||
std::vector<listen_cfg> configs;
|
||||
int native_port_idx = -1, native_shard_aware_port_idx = -1;
|
||||
|
||||
if (cfg.native_transport_port() != 0) {
|
||||
configs.push_back(listen_cfg{ socket_address{ip, cfg.native_transport_port()}, false });
|
||||
if (cfg.native_transport_port.is_set() ||
|
||||
(!cfg.native_transport_port_ssl.is_set() && !cfg.native_transport_port.is_set())) {
|
||||
// Non-SSL port is specified || neither SSL nor non-SSL ports are specified
|
||||
configs.emplace_back(listen_cfg{ socket_address{ip, cfg.native_transport_port()}, false });
|
||||
native_port_idx = 0;
|
||||
}
|
||||
if (cfg.native_shard_aware_transport_port.is_set()) {
|
||||
configs.push_back(listen_cfg{ socket_address{ip, cfg.native_shard_aware_transport_port()}, true });
|
||||
if (cfg.native_shard_aware_transport_port.is_set() ||
|
||||
(!cfg.native_shard_aware_transport_port_ssl.is_set() && !cfg.native_shard_aware_transport_port.is_set())) {
|
||||
configs.emplace_back(listen_cfg{ socket_address{ip, cfg.native_shard_aware_transport_port()}, true });
|
||||
native_shard_aware_port_idx = native_port_idx + 1;
|
||||
}
|
||||
|
||||
// main should have made sure values are clean and neatish
|
||||
@@ -127,15 +133,20 @@ future<> controller::do_start_server() {
|
||||
|
||||
logger.info("Enabling encrypted CQL connections between client and server");
|
||||
|
||||
if (cfg.native_transport_port_ssl.is_set() && cfg.native_transport_port_ssl() != cfg.native_transport_port()) {
|
||||
if (cfg.native_transport_port_ssl.is_set() &&
|
||||
(!cfg.native_transport_port.is_set() ||
|
||||
cfg.native_transport_port_ssl() != cfg.native_transport_port())) {
|
||||
// SSL port is specified && non-SSL port is either left out or set to a different value
|
||||
configs.emplace_back(listen_cfg{{ip, cfg.native_transport_port_ssl()}, false, cred});
|
||||
} else {
|
||||
configs[0].cred = cred;
|
||||
} else if (native_port_idx >= 0) {
|
||||
configs[native_port_idx].cred = cred;
|
||||
}
|
||||
if (cfg.native_shard_aware_transport_port_ssl.is_set() && cfg.native_shard_aware_transport_port_ssl() != cfg.native_shard_aware_transport_port()) {
|
||||
if (cfg.native_shard_aware_transport_port_ssl.is_set() &&
|
||||
(!cfg.native_shard_aware_transport_port.is_set() ||
|
||||
cfg.native_shard_aware_transport_port_ssl() != cfg.native_shard_aware_transport_port())) {
|
||||
configs.emplace_back(listen_cfg{{ip, cfg.native_shard_aware_transport_port_ssl()}, true, std::move(cred)});
|
||||
} else if (cfg.native_shard_aware_transport_port.is_set()) {
|
||||
configs[1].cred = std::move(cred);
|
||||
} else if (native_shard_aware_port_idx >= 0) {
|
||||
configs[native_shard_aware_port_idx].cred = std::move(cred);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -572,7 +572,17 @@ future<foreign_ptr<std::unique_ptr<cql_server::response>>>
|
||||
} catch (const exceptions::prepared_query_not_found_exception& ex) {
|
||||
try { ++_server._stats.errors[ex.code()]; } catch(...) {}
|
||||
return make_unprepared_error(stream, ex.code(), ex.what(), ex.id, trace_state);
|
||||
} catch (const exceptions::function_execution_exception& ex) {
|
||||
try { ++_server._stats.errors[ex.code()]; } catch(...) {}
|
||||
return make_function_failure_error(stream, ex.code(), ex.what(), ex.ks_name, ex.func_name, ex.args, trace_state);
|
||||
} catch (const exceptions::cassandra_exception& ex) {
|
||||
// Note: the CQL protocol specifies that many types of errors have
|
||||
// mandatory parameters. These cassandra_exception subclasses MUST
|
||||
// be handled above. This default "cassandra_exception" case is
|
||||
// only appropriate for the specific types of errors which do not have
|
||||
// additional information, such as invalid_request_exception.
|
||||
// TODO: consider listing those types explicitly, instead of the
|
||||
// catch-all type cassandra_exception.
|
||||
try { ++_server._stats.errors[ex.code()]; } catch(...) {}
|
||||
return make_error(stream, ex.code(), ex.what(), trace_state);
|
||||
} catch (std::exception& ex) {
|
||||
@@ -1334,6 +1344,17 @@ std::unique_ptr<cql_server::response> cql_server::connection::make_unprepared_er
|
||||
return response;
|
||||
}
|
||||
|
||||
std::unique_ptr<cql_server::response> cql_server::connection::make_function_failure_error(int16_t stream, exceptions::exception_code err, sstring msg, sstring ks_name, sstring func_name, std::vector<sstring> args, const tracing::trace_state_ptr& tr_state) const
|
||||
{
|
||||
auto response = std::make_unique<cql_server::response>(stream, cql_binary_opcode::ERROR, tr_state);
|
||||
response->write_int(static_cast<int32_t>(err));
|
||||
response->write_string(msg);
|
||||
response->write_string(ks_name);
|
||||
response->write_string(func_name);
|
||||
response->write_string_list(args);
|
||||
return response;
|
||||
}
|
||||
|
||||
std::unique_ptr<cql_server::response> cql_server::connection::make_error(int16_t stream, exceptions::exception_code err, sstring msg, const tracing::trace_state_ptr& tr_state) const
|
||||
{
|
||||
auto response = std::make_unique<cql_server::response>(stream, cql_binary_opcode::ERROR, tr_state);
|
||||
|
||||
@@ -235,6 +235,7 @@ private:
|
||||
std::unique_ptr<cql_server::response> make_mutation_write_failure_error(int16_t stream, exceptions::exception_code err, sstring msg, db::consistency_level cl, int32_t received, int32_t numfailures, int32_t blockfor, db::write_type type, const tracing::trace_state_ptr& tr_state) const;
|
||||
std::unique_ptr<cql_server::response> make_already_exists_error(int16_t stream, exceptions::exception_code err, sstring msg, sstring ks_name, sstring cf_name, const tracing::trace_state_ptr& tr_state) const;
|
||||
std::unique_ptr<cql_server::response> make_unprepared_error(int16_t stream, exceptions::exception_code err, sstring msg, bytes id, const tracing::trace_state_ptr& tr_state) const;
|
||||
std::unique_ptr<cql_server::response> make_function_failure_error(int16_t stream, exceptions::exception_code err, sstring msg, sstring ks_name, sstring func_name, std::vector<sstring> args, const tracing::trace_state_ptr& tr_state) const;
|
||||
std::unique_ptr<cql_server::response> make_error(int16_t stream, exceptions::exception_code err, sstring msg, const tracing::trace_state_ptr& tr_state) const;
|
||||
std::unique_ptr<cql_server::response> make_ready(int16_t stream, const tracing::trace_state_ptr& tr_state) const;
|
||||
std::unique_ptr<cql_server::response> make_supported(int16_t stream, const tracing::trace_state_ptr& tr_state) const;
|
||||
|
||||
6
types.cc
6
types.cc
@@ -1637,10 +1637,10 @@ static void serialize_aux(const tuple_type_impl& type, const tuple_type_impl::na
|
||||
assert(elems.size() <= type.size());
|
||||
|
||||
for (size_t i = 0; i < elems.size(); ++i) {
|
||||
const data_type& t = type.type(i);
|
||||
const abstract_type& t = type.type(i)->without_reversed();
|
||||
const data_value& v = elems[i];
|
||||
if (!v.is_null() && t != v.type()) {
|
||||
throw std::runtime_error(format("tuple element type mismatch: expected {}, got {}", t->name(), v.type()->name()));
|
||||
if (!v.is_null() && t != *v.type()) {
|
||||
throw std::runtime_error(format("tuple element type mismatch: expected {}, got {}", t.name(), v.type()->name()));
|
||||
}
|
||||
|
||||
if (v.is_null()) {
|
||||
|
||||
@@ -263,6 +263,13 @@ decltype(auto) with_simplified(const View& v, Function&& fn)
|
||||
}
|
||||
}
|
||||
|
||||
template<FragmentedView View>
|
||||
void skip_empty_fragments(View& v) {
|
||||
while (!v.empty() && v.current_fragment().empty()) {
|
||||
v.remove_current();
|
||||
}
|
||||
}
|
||||
|
||||
template<FragmentedView V1, FragmentedView V2>
|
||||
int compare_unsigned(V1 v1, V2 v2) {
|
||||
while (!v1.empty() && !v2.empty()) {
|
||||
@@ -272,6 +279,8 @@ int compare_unsigned(V1 v1, V2 v2) {
|
||||
}
|
||||
v1.remove_prefix(n);
|
||||
v2.remove_prefix(n);
|
||||
skip_empty_fragments(v1);
|
||||
skip_empty_fragments(v2);
|
||||
}
|
||||
return v1.size_bytes() - v2.size_bytes();
|
||||
}
|
||||
@@ -286,5 +295,7 @@ void write_fragmented(Dest& dest, Src src) {
|
||||
memcpy(dest.current_fragment().data(), src.current_fragment().data(), n);
|
||||
dest.remove_prefix(n);
|
||||
src.remove_prefix(n);
|
||||
skip_empty_fragments(dest);
|
||||
skip_empty_fragments(src);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -69,11 +69,15 @@ public:
|
||||
// Starts a new phase and waits for all operations started in any of the earlier phases.
|
||||
// It is fine to start multiple awaits in parallel.
|
||||
// Strong exception guarantees.
|
||||
future<> advance_and_await() {
|
||||
future<> advance_and_await() noexcept {
|
||||
try {
|
||||
auto new_gate = make_lw_shared<gate>();
|
||||
++_phase;
|
||||
auto old_gate = std::exchange(_gate, std::move(new_gate));
|
||||
return old_gate->close().then([old_gate, op = start()] {});
|
||||
} catch (...) {
|
||||
return current_exception_as_future();
|
||||
}
|
||||
}
|
||||
|
||||
// Returns current phase number. The smallest value returned is 0.
|
||||
|
||||
@@ -120,6 +120,26 @@ protected:
|
||||
}
|
||||
};
|
||||
|
||||
void* internal::throwing_allocator::Malloc(size_t size) {
|
||||
void* ret = base::Malloc(size);
|
||||
if (size > 0 && !ret) {
|
||||
throw rjson::error(format("Failed to allocate {} bytes", size));
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
void* internal::throwing_allocator::Realloc(void* orig_ptr, size_t orig_size, size_t new_size) {
|
||||
void* ret = base::Realloc(orig_ptr, orig_size, new_size);
|
||||
if (new_size > 0 && !ret) {
|
||||
throw rjson::error(format("Failed to reallocate {} bytes to {} bytes from {}", orig_size, new_size, orig_ptr));
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
void internal::throwing_allocator::Free(void* ptr) {
|
||||
base::Free(ptr);
|
||||
}
|
||||
|
||||
std::string print(const rjson::value& value) {
|
||||
string_buffer buffer;
|
||||
guarded_yieldable_json_handler<writer, false> writer(buffer, 78);
|
||||
@@ -262,6 +282,15 @@ void set(rjson::value& base, rjson::string_ref_type name, rjson::string_ref_type
|
||||
base.AddMember(name, rjson::value(member), the_allocator);
|
||||
}
|
||||
|
||||
void replace_with_string_name(rjson::value& base, const std::string_view name, rjson::value&& member) {
|
||||
rjson::value *m = rjson::find(base, name);
|
||||
if (m) {
|
||||
*m = std::move(member);
|
||||
} else {
|
||||
set_with_string_name(base, name, std::move(member));
|
||||
}
|
||||
}
|
||||
|
||||
void push_back(rjson::value& base_array, rjson::value&& item) {
|
||||
base_array.PushBack(std::move(item), the_allocator);
|
||||
|
||||
|
||||
@@ -66,18 +66,35 @@ public:
|
||||
#include <rapidjson/writer.h>
|
||||
#include <rapidjson/stringbuffer.h>
|
||||
#include <rapidjson/error/en.h>
|
||||
#include <rapidjson/allocators.h>
|
||||
#include <seastar/core/sstring.hh>
|
||||
#include "seastarx.hh"
|
||||
|
||||
namespace rjson {
|
||||
|
||||
using allocator = rapidjson::CrtAllocator;
|
||||
// The internal namespace is a workaround for the fact that fmt::format
|
||||
// also has a to_string_view function and erroneously looks up our rjson::to_string_view
|
||||
// if this allocator is in the rjson namespace.
|
||||
namespace internal {
|
||||
// Implements an interface conforming to the one in rapidjson/allocators.h,
|
||||
// but throws rjson::error on allocation failures
|
||||
class throwing_allocator : public rapidjson::CrtAllocator {
|
||||
using base = rapidjson::CrtAllocator;
|
||||
public:
|
||||
static const bool kNeedFree = base::kNeedFree;
|
||||
void* Malloc(size_t size);
|
||||
void* Realloc(void* orig_ptr, size_t orig_size, size_t new_size);
|
||||
static void Free(void* ptr);
|
||||
};
|
||||
}
|
||||
|
||||
using allocator = internal::throwing_allocator;
|
||||
using encoding = rapidjson::UTF8<>;
|
||||
using document = rapidjson::GenericDocument<encoding, allocator>;
|
||||
using document = rapidjson::GenericDocument<encoding, allocator, allocator>;
|
||||
using value = rapidjson::GenericValue<encoding, allocator>;
|
||||
using string_ref_type = value::StringRefType;
|
||||
using string_buffer = rapidjson::GenericStringBuffer<encoding>;
|
||||
using writer = rapidjson::Writer<string_buffer, encoding>;
|
||||
using string_buffer = rapidjson::GenericStringBuffer<encoding, allocator>;
|
||||
using writer = rapidjson::Writer<string_buffer, encoding, encoding, allocator>;
|
||||
using type = rapidjson::Type;
|
||||
|
||||
/**
|
||||
@@ -186,24 +203,37 @@ std::optional<T> get_opt(const rjson::value& value, std::string_view name) {
|
||||
}
|
||||
}
|
||||
|
||||
// Sets a member in given JSON object by moving the member - allocates the name.
|
||||
// The various set*() functions below *add* a new member to a JSON object.
|
||||
// They all assume that a member with the same key (name) doesn't already
|
||||
// exist in that object, so they are meant to be used just to build a new
|
||||
// object from scratch. If a member with the same name *may* exist, and
|
||||
// might need to be replaced, use the replace*() functions instead.
|
||||
// The benefit of the set*() functions is that they are faster (O(1),
|
||||
// compared to O(n) for the replace* function that need to inspect the
|
||||
// existing members).
|
||||
|
||||
// Adds a member to a given JSON object by moving the member - allocates the name.
|
||||
// Throws if base is not a JSON object.
|
||||
// Assumes a member with the same name does not yet exist in base.
|
||||
void set_with_string_name(rjson::value& base, std::string_view name, rjson::value&& member);
|
||||
|
||||
// Sets a string member in given JSON object by assigning its reference - allocates the name.
|
||||
// Adds a string member to a given JSON object by assigning its reference - allocates the name.
|
||||
// NOTICE: member string liveness must be ensured to be at least as long as base's.
|
||||
// Throws if base is not a JSON object.
|
||||
// Assumes a member with the same name does not yet exist in base.
|
||||
void set_with_string_name(rjson::value& base, std::string_view name, rjson::string_ref_type member);
|
||||
|
||||
// Sets a member in given JSON object by moving the member.
|
||||
// Adds a member to a given JSON object by moving the member.
|
||||
// NOTICE: name liveness must be ensured to be at least as long as base's.
|
||||
// Throws if base is not a JSON object.
|
||||
// Assumes a member with the same name does not yet exist in base.
|
||||
void set(rjson::value& base, rjson::string_ref_type name, rjson::value&& member);
|
||||
|
||||
// Sets a string member in given JSON object by assigning its reference.
|
||||
// Adds a string member to a given JSON object by assigning its reference.
|
||||
// NOTICE: name liveness must be ensured to be at least as long as base's.
|
||||
// NOTICE: member liveness must be ensured to be at least as long as base's.
|
||||
// Throws if base is not a JSON object.
|
||||
// Assumes a member with the same name does not yet exist in base.
|
||||
void set(rjson::value& base, rjson::string_ref_type name, rjson::string_ref_type member);
|
||||
|
||||
/**
|
||||
@@ -224,6 +254,12 @@ set(rjson::value& base, rjson::string_ref_type name, T&& member) {
|
||||
set(base, std::move(name), std::move(v));
|
||||
}
|
||||
|
||||
// Set a member in a given JSON object by moving the member - allocates the name.
|
||||
// If a member with the same name already exist in base, it is replaced.
|
||||
// Throws if base is not a JSON object.
|
||||
void replace_with_string_name(rjson::value& base, std::string_view name, rjson::value&& member);
|
||||
|
||||
|
||||
// Adds a value to a JSON list by moving the item to its end.
|
||||
// Throws if base_array is not a JSON array.
|
||||
void push_back(rjson::value& base_array, rjson::value&& item);
|
||||
|
||||
@@ -44,8 +44,9 @@ void merge_to_gently(std::list<T>& list1, const std::list<T>& list2, Compare com
|
||||
seastar::thread::maybe_yield();
|
||||
if (first1 == last1) {
|
||||
// Copy remaining items of list2 into list1
|
||||
std::copy_if(first2, last2, std::back_inserter(list1), [] (const auto&) { return true; });
|
||||
return;
|
||||
list1.insert(last1, *first2);
|
||||
++first2;
|
||||
continue;
|
||||
}
|
||||
if (comp(*first2, *first1)) {
|
||||
first1 = list1.insert(first1, *first2);
|
||||
|
||||
Reference in New Issue
Block a user