Compare commits
130 Commits
scylla-2.3
...
next-2.3
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
b252bba4a2 | ||
|
|
a0b9fcc041 | ||
|
|
35c9b675c1 | ||
|
|
d71836fef7 | ||
|
|
f8e150e97c | ||
|
|
10c300f894 | ||
|
|
de1d3e5c6b | ||
|
|
69810c13ca | ||
|
|
9b025a5742 | ||
|
|
74eebc4cab | ||
|
|
9b2ca4ee44 | ||
|
|
773bf45774 | ||
|
|
c6705b4335 | ||
|
|
3997871b4d | ||
|
|
4ff1d731bd | ||
|
|
0e0f9143c9 | ||
|
|
9d809d6ea4 | ||
|
|
630d599c34 | ||
|
|
0933c1a00a | ||
|
|
7a7099fcfb | ||
|
|
50235aacb4 | ||
|
|
e888009f12 | ||
|
|
a19615ee9b | ||
|
|
357ca67fda | ||
|
|
7818c63eb1 | ||
|
|
da10eae18c | ||
|
|
d5292cd3ec | ||
|
|
9cb35361d9 | ||
|
|
3e285248be | ||
|
|
6f10ccb441 | ||
|
|
df420499bc | ||
|
|
d29527b4e1 | ||
|
|
8a90e242e4 | ||
|
|
8a78c0aba9 | ||
|
|
8a2bbcf138 | ||
|
|
22c891e6df | ||
|
|
1841d0c2d9 | ||
|
|
e10107fe5a | ||
|
|
0b3a4679db | ||
|
|
ba60d666a9 | ||
|
|
6ea4d0b75c | ||
|
|
8c5911f312 | ||
|
|
de00d7f5a1 | ||
|
|
e5f9dae4bb | ||
|
|
e13e796290 | ||
|
|
336c771663 | ||
|
|
82968afc25 | ||
|
|
383dcffb53 | ||
|
|
0c2abc007c | ||
|
|
1498c4f150 | ||
|
|
f388992a94 | ||
|
|
310540c11f | ||
|
|
7d833023cc | ||
|
|
d94ac196e0 | ||
|
|
1d7430995e | ||
|
|
b662a7f8a4 | ||
|
|
447ad72882 | ||
|
|
b8485d3bce | ||
|
|
034b0f50db | ||
|
|
12ec0becf3 | ||
|
|
666b19552d | ||
|
|
178f870a03 | ||
|
|
1b18f16dc1 | ||
|
|
28934575e4 | ||
|
|
182cbeefb0 | ||
|
|
b70fc41a90 | ||
|
|
debfc795b2 | ||
|
|
0d094575ec | ||
|
|
20baef69a9 | ||
|
|
1bac88601d | ||
|
|
e581fd1463 | ||
|
|
b366bff998 | ||
|
|
38e6984ba5 | ||
|
|
332f76579e | ||
|
|
315a03cf6c | ||
|
|
1847dc7a6a | ||
|
|
dd11b5987e | ||
|
|
a134e8699a | ||
|
|
bd7dcbb8d2 | ||
|
|
74e61528a6 | ||
|
|
5eb4fde2d5 | ||
|
|
cc0703f8ca | ||
|
|
678283a5bb | ||
|
|
552c0d7641 | ||
|
|
860c06660b | ||
|
|
db733ba075 | ||
|
|
88677d39c8 | ||
|
|
d767dee5ec | ||
|
|
702f6ee1b7 | ||
|
|
473b9aec65 | ||
|
|
b548061257 | ||
|
|
01165a9ae7 | ||
|
|
5cdb963768 | ||
|
|
7c9b9a4e24 | ||
|
|
f475c65ae6 | ||
|
|
687372bc48 | ||
|
|
65c140121c | ||
|
|
ed68ad220f | ||
|
|
35f4b8fbbe | ||
|
|
48012fe418 | ||
|
|
c862ccda91 | ||
|
|
83b1057c4b | ||
|
|
c1cb779dd2 | ||
|
|
b47d18f9fd | ||
|
|
f8713b019e | ||
|
|
cd5e4eace5 | ||
|
|
4fb5403670 | ||
|
|
e9df6c42ce | ||
|
|
5fdf492ccc | ||
|
|
fd2b02a12c | ||
|
|
f8cec2f891 | ||
|
|
e4d6577ef2 | ||
|
|
346027248d | ||
|
|
2cf6191353 | ||
|
|
b52d647de2 | ||
|
|
f7c96a37f1 | ||
|
|
ae71ffdcfd | ||
|
|
a235900388 | ||
|
|
be9f150341 | ||
|
|
2478fa1f6e | ||
|
|
d95ac1826e | ||
|
|
6fc17345e9 | ||
|
|
4bfa0ae247 | ||
|
|
174b7870e6 | ||
|
|
e95b4ee825 | ||
|
|
464305de1c | ||
|
|
3a1a9e1a11 | ||
|
|
90dac5d944 | ||
|
|
e5a83d105c | ||
|
|
9b4a0a2879 |
@@ -1,6 +1,6 @@
|
||||
#!/bin/sh
|
||||
|
||||
VERSION=2.3.rc1
|
||||
VERSION=2.3.6
|
||||
|
||||
if test -f version
|
||||
then
|
||||
|
||||
@@ -2228,11 +2228,11 @@
|
||||
"description":"The column family"
|
||||
},
|
||||
"total":{
|
||||
"type":"int",
|
||||
"type":"long",
|
||||
"description":"The total snapshot size"
|
||||
},
|
||||
"live":{
|
||||
"type":"int",
|
||||
"type":"long",
|
||||
"description":"The live snapshot size"
|
||||
}
|
||||
}
|
||||
|
||||
@@ -28,6 +28,7 @@
|
||||
#include "database.hh"
|
||||
#include "schema_builder.hh"
|
||||
#include "service/migration_manager.hh"
|
||||
#include "timeout_config.hh"
|
||||
|
||||
namespace auth {
|
||||
|
||||
@@ -94,4 +95,10 @@ future<> wait_for_schema_agreement(::service::migration_manager& mm, const datab
|
||||
});
|
||||
}
|
||||
|
||||
const timeout_config& internal_distributed_timeout_config() noexcept {
|
||||
static const auto t = 5s;
|
||||
static const timeout_config tc{t, t, t, t, t, t, t};
|
||||
return tc;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -38,6 +38,7 @@
|
||||
using namespace std::chrono_literals;
|
||||
|
||||
class database;
|
||||
class timeout_config;
|
||||
|
||||
namespace service {
|
||||
class migration_manager;
|
||||
@@ -82,4 +83,9 @@ future<> create_metadata_table_if_missing(
|
||||
|
||||
future<> wait_for_schema_agreement(::service::migration_manager&, const database&);
|
||||
|
||||
///
|
||||
/// Time-outs for internal, non-local CQL queries.
|
||||
///
|
||||
const timeout_config& internal_distributed_timeout_config() noexcept;
|
||||
|
||||
}
|
||||
|
||||
@@ -228,7 +228,7 @@ default_authorizer::modify(
|
||||
return _qp.process(
|
||||
query,
|
||||
db::consistency_level::ONE,
|
||||
infinite_timeout_config,
|
||||
internal_distributed_timeout_config(),
|
||||
{permissions::to_strings(set), sstring(role_name), resource.name()}).discard_result();
|
||||
});
|
||||
}
|
||||
@@ -254,7 +254,7 @@ future<std::vector<permission_details>> default_authorizer::list_all() const {
|
||||
return _qp.process(
|
||||
query,
|
||||
db::consistency_level::ONE,
|
||||
infinite_timeout_config,
|
||||
internal_distributed_timeout_config(),
|
||||
{},
|
||||
true).then([](::shared_ptr<cql3::untyped_result_set> results) {
|
||||
std::vector<permission_details> all_details;
|
||||
@@ -282,7 +282,7 @@ future<> default_authorizer::revoke_all(stdx::string_view role_name) const {
|
||||
return _qp.process(
|
||||
query,
|
||||
db::consistency_level::ONE,
|
||||
infinite_timeout_config,
|
||||
internal_distributed_timeout_config(),
|
||||
{sstring(role_name)}).discard_result().handle_exception([role_name](auto ep) {
|
||||
try {
|
||||
std::rethrow_exception(ep);
|
||||
|
||||
@@ -149,7 +149,9 @@ static sstring gensalt() {
|
||||
// blowfish 2011 fix, blowfish, sha512, sha256, md5
|
||||
for (sstring pfx : { "$2y$", "$2a$", "$6$", "$5$", "$1$" }) {
|
||||
salt = pfx + input;
|
||||
if (crypt_r("fisk", salt.c_str(), &tlcrypt)) {
|
||||
const char* e = crypt_r("fisk", salt.c_str(), &tlcrypt);
|
||||
|
||||
if (e && (e[0] != '*')) {
|
||||
prefix = pfx;
|
||||
return salt;
|
||||
}
|
||||
@@ -184,7 +186,7 @@ future<> password_authenticator::migrate_legacy_metadata() const {
|
||||
return _qp.process(
|
||||
query,
|
||||
db::consistency_level::QUORUM,
|
||||
infinite_timeout_config).then([this](::shared_ptr<cql3::untyped_result_set> results) {
|
||||
internal_distributed_timeout_config()).then([this](::shared_ptr<cql3::untyped_result_set> results) {
|
||||
return do_for_each(*results, [this](const cql3::untyped_result_set_row& row) {
|
||||
auto username = row.get_as<sstring>("username");
|
||||
auto salted_hash = row.get_as<sstring>(SALTED_HASH);
|
||||
@@ -192,7 +194,7 @@ future<> password_authenticator::migrate_legacy_metadata() const {
|
||||
return _qp.process(
|
||||
update_row_query,
|
||||
consistency_for_user(username),
|
||||
infinite_timeout_config,
|
||||
internal_distributed_timeout_config(),
|
||||
{std::move(salted_hash), username}).discard_result();
|
||||
}).finally([results] {});
|
||||
}).then([] {
|
||||
@@ -209,7 +211,7 @@ future<> password_authenticator::create_default_if_missing() const {
|
||||
return _qp.process(
|
||||
update_row_query,
|
||||
db::consistency_level::QUORUM,
|
||||
infinite_timeout_config,
|
||||
internal_distributed_timeout_config(),
|
||||
{hashpw(DEFAULT_USER_PASSWORD), DEFAULT_USER_NAME}).then([](auto&&) {
|
||||
plogger.info("Created default superuser authentication record.");
|
||||
});
|
||||
@@ -309,13 +311,17 @@ future<authenticated_user> password_authenticator::authenticate(
|
||||
return _qp.process(
|
||||
query,
|
||||
consistency_for_user(username),
|
||||
infinite_timeout_config,
|
||||
internal_distributed_timeout_config(),
|
||||
{username},
|
||||
true);
|
||||
}).then_wrapped([=](future<::shared_ptr<cql3::untyped_result_set>> f) {
|
||||
try {
|
||||
auto res = f.get0();
|
||||
if (res->empty() || !checkpw(password, res->one().get_as<sstring>(SALTED_HASH))) {
|
||||
auto salted_hash = std::experimental::optional<sstring>();
|
||||
if (!res->empty()) {
|
||||
salted_hash = res->one().get_opt<sstring>(SALTED_HASH);
|
||||
}
|
||||
if (!salted_hash || !checkpw(password, *salted_hash)) {
|
||||
throw exceptions::authentication_exception("Username and/or password are incorrect");
|
||||
}
|
||||
return make_ready_future<authenticated_user>(username);
|
||||
@@ -337,7 +343,7 @@ future<> password_authenticator::create(stdx::string_view role_name, const authe
|
||||
return _qp.process(
|
||||
update_row_query,
|
||||
consistency_for_user(role_name),
|
||||
infinite_timeout_config,
|
||||
internal_distributed_timeout_config(),
|
||||
{hashpw(*options.password), sstring(role_name)}).discard_result();
|
||||
}
|
||||
|
||||
@@ -355,7 +361,7 @@ future<> password_authenticator::alter(stdx::string_view role_name, const authen
|
||||
return _qp.process(
|
||||
query,
|
||||
consistency_for_user(role_name),
|
||||
infinite_timeout_config,
|
||||
internal_distributed_timeout_config(),
|
||||
{hashpw(*options.password), sstring(role_name)}).discard_result();
|
||||
}
|
||||
|
||||
@@ -366,7 +372,10 @@ future<> password_authenticator::drop(stdx::string_view name) const {
|
||||
meta::roles_table::qualified_name(),
|
||||
meta::roles_table::role_col_name);
|
||||
|
||||
return _qp.process(query, consistency_for_user(name), infinite_timeout_config, {sstring(name)}).discard_result();
|
||||
return _qp.process(
|
||||
query, consistency_for_user(name),
|
||||
internal_distributed_timeout_config(),
|
||||
{sstring(name)}).discard_result();
|
||||
}
|
||||
|
||||
future<custom_options> password_authenticator::query_custom_options(stdx::string_view role_name) const {
|
||||
|
||||
@@ -79,7 +79,7 @@ future<bool> default_role_row_satisfies(
|
||||
return qp.process(
|
||||
query,
|
||||
db::consistency_level::QUORUM,
|
||||
infinite_timeout_config,
|
||||
internal_distributed_timeout_config(),
|
||||
{meta::DEFAULT_SUPERUSER_NAME},
|
||||
true).then([&p](::shared_ptr<cql3::untyped_result_set> results) {
|
||||
if (results->empty()) {
|
||||
@@ -104,7 +104,7 @@ future<bool> any_nondefault_role_row_satisfies(
|
||||
return qp.process(
|
||||
query,
|
||||
db::consistency_level::QUORUM,
|
||||
infinite_timeout_config).then([&p](::shared_ptr<cql3::untyped_result_set> results) {
|
||||
internal_distributed_timeout_config()).then([&p](::shared_ptr<cql3::untyped_result_set> results) {
|
||||
if (results->empty()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -196,6 +196,10 @@ future<> service::start() {
|
||||
}
|
||||
|
||||
future<> service::stop() {
|
||||
// Only one of the shards has the listener registered, but let's try to
|
||||
// unregister on each one just to make sure.
|
||||
_migration_manager.unregister_listener(_migration_listener.get());
|
||||
|
||||
return _permissions_cache->stop().then([this] {
|
||||
return when_all_succeed(_role_manager->stop(), _authorizer->stop(), _authenticator->stop());
|
||||
});
|
||||
|
||||
@@ -89,7 +89,7 @@ static future<stdx::optional<record>> find_record(cql3::query_processor& qp, std
|
||||
return qp.process(
|
||||
query,
|
||||
consistency_for_role(role_name),
|
||||
infinite_timeout_config,
|
||||
internal_distributed_timeout_config(),
|
||||
{sstring(role_name)},
|
||||
true).then([](::shared_ptr<cql3::untyped_result_set> results) {
|
||||
if (results->empty()) {
|
||||
@@ -174,7 +174,7 @@ future<> standard_role_manager::create_default_role_if_missing() const {
|
||||
return _qp.process(
|
||||
query,
|
||||
db::consistency_level::QUORUM,
|
||||
infinite_timeout_config,
|
||||
internal_distributed_timeout_config(),
|
||||
{meta::DEFAULT_SUPERUSER_NAME}).then([](auto&&) {
|
||||
log.info("Created default superuser role '{}'.", meta::DEFAULT_SUPERUSER_NAME);
|
||||
return make_ready_future<>();
|
||||
@@ -201,7 +201,7 @@ future<> standard_role_manager::migrate_legacy_metadata() const {
|
||||
return _qp.process(
|
||||
query,
|
||||
db::consistency_level::QUORUM,
|
||||
infinite_timeout_config).then([this](::shared_ptr<cql3::untyped_result_set> results) {
|
||||
internal_distributed_timeout_config()).then([this](::shared_ptr<cql3::untyped_result_set> results) {
|
||||
return do_for_each(*results, [this](const cql3::untyped_result_set_row& row) {
|
||||
role_config config;
|
||||
config.is_superuser = row.get_as<bool>("super");
|
||||
@@ -263,7 +263,7 @@ future<> standard_role_manager::create_or_replace(stdx::string_view role_name, c
|
||||
return _qp.process(
|
||||
query,
|
||||
consistency_for_role(role_name),
|
||||
infinite_timeout_config,
|
||||
internal_distributed_timeout_config(),
|
||||
{sstring(role_name), c.is_superuser, c.can_login},
|
||||
true).discard_result();
|
||||
}
|
||||
@@ -307,7 +307,7 @@ standard_role_manager::alter(stdx::string_view role_name, const role_config_upda
|
||||
build_column_assignments(u),
|
||||
meta::roles_table::role_col_name),
|
||||
consistency_for_role(role_name),
|
||||
infinite_timeout_config,
|
||||
internal_distributed_timeout_config(),
|
||||
{sstring(role_name)}).discard_result();
|
||||
});
|
||||
}
|
||||
@@ -327,7 +327,7 @@ future<> standard_role_manager::drop(stdx::string_view role_name) const {
|
||||
return _qp.process(
|
||||
query,
|
||||
consistency_for_role(role_name),
|
||||
infinite_timeout_config,
|
||||
internal_distributed_timeout_config(),
|
||||
{sstring(role_name)}).then([this, role_name](::shared_ptr<cql3::untyped_result_set> members) {
|
||||
return parallel_for_each(
|
||||
members->begin(),
|
||||
@@ -367,7 +367,7 @@ future<> standard_role_manager::drop(stdx::string_view role_name) const {
|
||||
return _qp.process(
|
||||
query,
|
||||
consistency_for_role(role_name),
|
||||
infinite_timeout_config,
|
||||
internal_distributed_timeout_config(),
|
||||
{sstring(role_name)}).discard_result();
|
||||
};
|
||||
|
||||
@@ -394,7 +394,7 @@ standard_role_manager::modify_membership(
|
||||
return _qp.process(
|
||||
query,
|
||||
consistency_for_role(grantee_name),
|
||||
infinite_timeout_config,
|
||||
internal_distributed_timeout_config(),
|
||||
{role_set{sstring(role_name)}, sstring(grantee_name)}).discard_result();
|
||||
};
|
||||
|
||||
@@ -406,7 +406,7 @@ standard_role_manager::modify_membership(
|
||||
"INSERT INTO %s (role, member) VALUES (?, ?)",
|
||||
meta::role_members_table::qualified_name()),
|
||||
consistency_for_role(role_name),
|
||||
infinite_timeout_config,
|
||||
internal_distributed_timeout_config(),
|
||||
{sstring(role_name), sstring(grantee_name)}).discard_result();
|
||||
|
||||
case membership_change::remove:
|
||||
@@ -415,7 +415,7 @@ standard_role_manager::modify_membership(
|
||||
"DELETE FROM %s WHERE role = ? AND member = ?",
|
||||
meta::role_members_table::qualified_name()),
|
||||
consistency_for_role(role_name),
|
||||
infinite_timeout_config,
|
||||
internal_distributed_timeout_config(),
|
||||
{sstring(role_name), sstring(grantee_name)}).discard_result();
|
||||
}
|
||||
|
||||
@@ -516,7 +516,10 @@ future<role_set> standard_role_manager::query_all() const {
|
||||
// To avoid many copies of a view.
|
||||
static const auto role_col_name_string = sstring(meta::roles_table::role_col_name);
|
||||
|
||||
return _qp.process(query, db::consistency_level::QUORUM, infinite_timeout_config).then([](::shared_ptr<cql3::untyped_result_set> results) {
|
||||
return _qp.process(
|
||||
query,
|
||||
db::consistency_level::QUORUM,
|
||||
internal_distributed_timeout_config()).then([](::shared_ptr<cql3::untyped_result_set> results) {
|
||||
role_set roles;
|
||||
|
||||
std::transform(
|
||||
|
||||
@@ -364,7 +364,7 @@ future<> cache_flat_mutation_reader::read_from_underlying(db::timeout_clock::tim
|
||||
}
|
||||
});
|
||||
return make_ready_future<>();
|
||||
});
|
||||
}, timeout);
|
||||
}
|
||||
|
||||
inline
|
||||
|
||||
@@ -303,6 +303,7 @@ scylla_tests = [
|
||||
'tests/imr_test',
|
||||
'tests/partition_data_test',
|
||||
'tests/reusable_buffer_test',
|
||||
'tests/json_test'
|
||||
]
|
||||
|
||||
perf_tests = [
|
||||
@@ -406,6 +407,7 @@ scylla_core = (['database.cc',
|
||||
'mutation_reader.cc',
|
||||
'flat_mutation_reader.cc',
|
||||
'mutation_query.cc',
|
||||
'json.cc',
|
||||
'keys.cc',
|
||||
'counters.cc',
|
||||
'compress.cc',
|
||||
@@ -514,6 +516,7 @@ scylla_core = (['database.cc',
|
||||
'db/consistency_level.cc',
|
||||
'db/system_keyspace.cc',
|
||||
'db/system_distributed_keyspace.cc',
|
||||
'db/size_estimates_virtual_reader.cc',
|
||||
'db/schema_tables.cc',
|
||||
'db/cql_type_parser.cc',
|
||||
'db/legacy_schema_migrator.cc',
|
||||
@@ -740,6 +743,7 @@ pure_boost_tests = set([
|
||||
'tests/imr_test',
|
||||
'tests/partition_data_test',
|
||||
'tests/reusable_buffer_test',
|
||||
'tests/json_test',
|
||||
])
|
||||
|
||||
tests_not_using_seastar_test_framework = set([
|
||||
@@ -791,7 +795,7 @@ deps['tests/log_heap_test'] = ['tests/log_heap_test.cc']
|
||||
deps['tests/anchorless_list_test'] = ['tests/anchorless_list_test.cc']
|
||||
deps['tests/perf/perf_fast_forward'] += ['release.cc']
|
||||
deps['tests/meta_test'] = ['tests/meta_test.cc']
|
||||
deps['tests/imr_test'] = ['tests/imr_test.cc']
|
||||
deps['tests/imr_test'] = ['tests/imr_test.cc', 'utils/logalloc.cc', 'utils/dynamic_bitset.cc']
|
||||
deps['tests/reusable_buffer_test'] = ['tests/reusable_buffer_test.cc']
|
||||
|
||||
warnings = [
|
||||
|
||||
@@ -473,9 +473,9 @@ insertStatement returns [::shared_ptr<raw::modification_statement> expr]
|
||||
::shared_ptr<cql3::term::raw> json_value;
|
||||
}
|
||||
: K_INSERT K_INTO cf=columnFamilyName
|
||||
'(' c1=cident { column_names.push_back(c1); } ( ',' cn=cident { column_names.push_back(cn); } )* ')'
|
||||
( K_VALUES
|
||||
'(' v1=term { values.push_back(v1); } ( ',' vn=term { values.push_back(vn); } )* ')'
|
||||
('(' c1=cident { column_names.push_back(c1); } ( ',' cn=cident { column_names.push_back(cn); } )* ')'
|
||||
K_VALUES
|
||||
'(' v1=term { values.push_back(v1); } ( ',' vn=term { values.push_back(vn); } )* ')'
|
||||
( K_IF K_NOT K_EXISTS { if_not_exists = true; } )?
|
||||
( usingClause[attrs] )?
|
||||
{
|
||||
|
||||
@@ -67,6 +67,12 @@ class error_collector : public error_listener<RecognizerType, ExceptionBaseType>
|
||||
*/
|
||||
const sstring_view _query;
|
||||
|
||||
/**
|
||||
* An empty bitset to be used as a workaround for AntLR null dereference
|
||||
* bug.
|
||||
*/
|
||||
static typename ExceptionBaseType::BitsetListType _empty_bit_list;
|
||||
|
||||
public:
|
||||
|
||||
/**
|
||||
@@ -144,6 +150,14 @@ private:
|
||||
break;
|
||||
}
|
||||
default:
|
||||
// AntLR Exception class has a bug of dereferencing a null
|
||||
// pointer in the displayRecognitionError. The following
|
||||
// if statement makes sure it will not be null before the
|
||||
// call to that function (displayRecognitionError).
|
||||
// bug reference: https://github.com/antlr/antlr3/issues/191
|
||||
if (!ex->get_expectingSet()) {
|
||||
ex->set_expectingSet(&_empty_bit_list);
|
||||
}
|
||||
ex->displayRecognitionError(token_names, msg);
|
||||
}
|
||||
return msg.str();
|
||||
@@ -345,4 +359,8 @@ private:
|
||||
#endif
|
||||
};
|
||||
|
||||
template<typename RecognizerType, typename TokenType, typename ExceptionBaseType>
|
||||
typename ExceptionBaseType::BitsetListType
|
||||
error_collector<RecognizerType,TokenType,ExceptionBaseType>::_empty_bit_list = typename ExceptionBaseType::BitsetListType();
|
||||
|
||||
}
|
||||
|
||||
@@ -177,7 +177,7 @@ shared_ptr<function>
|
||||
make_to_json_function(data_type t) {
|
||||
return make_native_scalar_function<true>("tojson", utf8_type, {t},
|
||||
[t](cql_serialization_format sf, const std::vector<bytes_opt>& parameters) -> bytes_opt {
|
||||
return utf8_type->decompose(t->to_json_string(parameters[0].value()));
|
||||
return utf8_type->decompose(t->to_json_string(parameters[0]));
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
@@ -217,19 +217,18 @@ void query_options::prepare(const std::vector<::shared_ptr<column_specification>
|
||||
}
|
||||
|
||||
auto& names = *_names;
|
||||
std::vector<cql3::raw_value> ordered_values;
|
||||
std::vector<cql3::raw_value_view> ordered_values;
|
||||
ordered_values.reserve(specs.size());
|
||||
for (auto&& spec : specs) {
|
||||
auto& spec_name = spec->name->text();
|
||||
for (size_t j = 0; j < names.size(); j++) {
|
||||
if (names[j] == spec_name) {
|
||||
ordered_values.emplace_back(_values[j]);
|
||||
ordered_values.emplace_back(_value_views[j]);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
_values = std::move(ordered_values);
|
||||
fill_value_views();
|
||||
_value_views = std::move(ordered_values);
|
||||
}
|
||||
|
||||
void query_options::fill_value_views()
|
||||
|
||||
@@ -239,11 +239,11 @@ query_processor::process(const sstring_view& query_string, service::query_state&
|
||||
log.trace("process: \"{}\"", query_string);
|
||||
tracing::trace(query_state.get_trace_state(), "Parsing a statement");
|
||||
auto p = get_statement(query_string, query_state.get_client_state());
|
||||
options.prepare(p->bound_names);
|
||||
auto cql_statement = p->statement;
|
||||
if (cql_statement->get_bound_terms() != options.get_values_count()) {
|
||||
throw exceptions::invalid_request_exception("Invalid amount of bind variables");
|
||||
}
|
||||
options.prepare(p->bound_names);
|
||||
|
||||
warn(unimplemented::cause::METRICS);
|
||||
#if 0
|
||||
|
||||
@@ -202,6 +202,14 @@ public:
|
||||
const query_options& options,
|
||||
gc_clock::time_point now) const override;
|
||||
|
||||
virtual std::vector<bytes_opt> values_raw(const query_options& options) const = 0;
|
||||
|
||||
virtual std::vector<bytes_opt> values(const query_options& options) const override {
|
||||
std::vector<bytes_opt> ret = values_raw(options);
|
||||
std::sort(ret.begin(),ret.end());
|
||||
ret.erase(std::unique(ret.begin(),ret.end()),ret.end());
|
||||
return ret;
|
||||
}
|
||||
#if 0
|
||||
@Override
|
||||
protected final boolean isSupportedBy(SecondaryIndex index)
|
||||
@@ -224,7 +232,7 @@ public:
|
||||
return abstract_restriction::term_uses_function(_values, ks_name, function_name);
|
||||
}
|
||||
|
||||
virtual std::vector<bytes_opt> values(const query_options& options) const override {
|
||||
virtual std::vector<bytes_opt> values_raw(const query_options& options) const override {
|
||||
std::vector<bytes_opt> ret;
|
||||
for (auto&& v : _values) {
|
||||
ret.emplace_back(to_bytes_opt(v->bind_and_get(options)));
|
||||
@@ -249,7 +257,7 @@ public:
|
||||
return false;
|
||||
}
|
||||
|
||||
virtual std::vector<bytes_opt> values(const query_options& options) const override {
|
||||
virtual std::vector<bytes_opt> values_raw(const query_options& options) const override {
|
||||
auto&& lval = dynamic_pointer_cast<multi_item_terminal>(_marker->bind(options));
|
||||
if (!lval) {
|
||||
throw exceptions::invalid_request_exception("Invalid null value for IN restriction");
|
||||
|
||||
@@ -105,9 +105,11 @@ public:
|
||||
virtual void reset() = 0;
|
||||
|
||||
virtual assignment_testable::test_result test_assignment(database& db, const sstring& keyspace, ::shared_ptr<column_specification> receiver) override {
|
||||
if (receiver->type == get_type()) {
|
||||
auto t1 = receiver->type->underlying_type();
|
||||
auto t2 = get_type()->underlying_type();
|
||||
if (t1 == t2) {
|
||||
return assignment_testable::test_result::EXACT_MATCH;
|
||||
} else if (receiver->type->is_value_compatible_with(*get_type())) {
|
||||
} else if (t1->is_value_compatible_with(*t2)) {
|
||||
return assignment_testable::test_result::WEAKLY_ASSIGNABLE;
|
||||
} else {
|
||||
return assignment_testable::test_result::NOT_ASSIGNABLE;
|
||||
|
||||
@@ -96,12 +96,8 @@ public:
|
||||
encoded_row.write("\\\"", 2);
|
||||
}
|
||||
encoded_row.write("\": ", 3);
|
||||
if (parameters[i]) {
|
||||
sstring row_sstring = _selector_types[i]->to_json_string(parameters[i].value());
|
||||
encoded_row.write(row_sstring.c_str(), row_sstring.size());
|
||||
} else {
|
||||
encoded_row.write("null", 4);
|
||||
}
|
||||
sstring row_sstring = _selector_types[i]->to_json_string(parameters[i]);
|
||||
encoded_row.write(row_sstring.c_str(), row_sstring.size());
|
||||
}
|
||||
encoded_row.write("}", 1);
|
||||
return encoded_row.linearize().to_string();
|
||||
@@ -974,6 +970,10 @@ std::unique_ptr<prepared_statement> select_statement::prepare(database& db, cql_
|
||||
}
|
||||
|
||||
check_needs_filtering(restrictions);
|
||||
size_t restrictions_size = restrictions->get_partition_key_restrictions()->size() + restrictions->get_clustering_columns_restrictions()->size() + restrictions->get_non_pk_restriction().size();
|
||||
if (restrictions->uses_secondary_indexing() && restrictions_size > 1) {
|
||||
throw exceptions::invalid_request_exception("Indexed query may not contain multiple restrictions in 2.3");
|
||||
}
|
||||
|
||||
::shared_ptr<cql3::statements::select_statement> stmt;
|
||||
if (restrictions->uses_secondary_indexing()) {
|
||||
|
||||
@@ -179,7 +179,21 @@ modification_statement::json_cache_opt insert_prepared_json_statement::maybe_pre
|
||||
void
|
||||
insert_prepared_json_statement::execute_set_value(mutation& m, const clustering_key_prefix& prefix, const update_parameters& params, const column_definition& column, const bytes_opt& value) {
|
||||
if (!value) {
|
||||
if (column.type->is_collection()) {
|
||||
auto& k = static_pointer_cast<const collection_type_impl>(column.type)->_kind;
|
||||
if (&k == &collection_type_impl::kind::list) {
|
||||
lists::setter::execute(m, prefix, params, column, make_shared<lists::value>(lists::value(std::vector<bytes_opt>())));
|
||||
} else if (&k == &collection_type_impl::kind::set) {
|
||||
sets::setter::execute(m, prefix, params, column, make_shared<sets::value>(sets::value(std::set<bytes, serialized_compare>(serialized_compare(empty_type)))));
|
||||
} else if (&k == &collection_type_impl::kind::map) {
|
||||
maps::setter::execute(m, prefix, params, column, make_shared<maps::value>(maps::value(std::map<bytes, bytes, serialized_compare>(serialized_compare(empty_type)))));
|
||||
} else {
|
||||
throw exceptions::invalid_request_exception("Incorrect value kind in JSON INSERT statement");
|
||||
}
|
||||
return;
|
||||
}
|
||||
m.set_cell(prefix, column, std::move(operation::make_dead_cell(params)));
|
||||
return;
|
||||
} else if (!column.type->is_collection()) {
|
||||
constants::setter::execute(m, prefix, params, column, raw_value_view::make_value(bytes_view(*value)));
|
||||
return;
|
||||
@@ -204,15 +218,17 @@ insert_prepared_json_statement::execute_set_value(mutation& m, const clustering_
|
||||
dht::partition_range_vector
|
||||
insert_prepared_json_statement::build_partition_keys(const query_options& options, const json_cache_opt& json_cache) {
|
||||
dht::partition_range_vector ranges;
|
||||
std::vector<bytes_opt> exploded;
|
||||
for (const auto& def : s->partition_key_columns()) {
|
||||
auto json_value = json_cache->at(def.name_as_text());
|
||||
auto k = query::range<partition_key>::make_singular(partition_key::from_single_value(*s, json_value.value()));
|
||||
ranges.emplace_back(std::move(k).transform(
|
||||
[this] (partition_key&& k) -> query::ring_position {
|
||||
auto token = dht::global_partitioner().get_token(*s, k);
|
||||
return { std::move(token), std::move(k) };
|
||||
}));
|
||||
if (!json_value) {
|
||||
throw exceptions::invalid_request_exception(sprint("Missing mandatory PRIMARY KEY part %s", def.name_as_text()));
|
||||
}
|
||||
exploded.emplace_back(*json_value);
|
||||
}
|
||||
auto pkey = partition_key::from_optional_exploded(*s, std::move(exploded));
|
||||
auto k = query::range<query::ring_position>::make_singular(dht::global_partitioner().decorate_key(*s, std::move(pkey)));
|
||||
ranges.emplace_back(std::move(k));
|
||||
return ranges;
|
||||
}
|
||||
|
||||
@@ -221,7 +237,10 @@ query::clustering_row_ranges insert_prepared_json_statement::create_clustering_r
|
||||
std::vector<bytes_opt> exploded;
|
||||
for (const auto& def : s->clustering_key_columns()) {
|
||||
auto json_value = json_cache->at(def.name_as_text());
|
||||
exploded.emplace_back(json_value.value());
|
||||
if (!json_value) {
|
||||
throw exceptions::invalid_request_exception(sprint("Missing mandatory PRIMARY KEY part %s", def.name_as_text()));
|
||||
}
|
||||
exploded.emplace_back(*json_value);
|
||||
}
|
||||
auto k = query::range<clustering_key_prefix>::make_singular(clustering_key_prefix::from_optional_exploded(*s, std::move(exploded)));
|
||||
ranges.emplace_back(query::clustering_range(std::move(k)));
|
||||
|
||||
@@ -405,7 +405,7 @@ public:
|
||||
in_marker(int32_t bind_index, ::shared_ptr<column_specification> receiver)
|
||||
: abstract_marker(bind_index, std::move(receiver))
|
||||
{
|
||||
assert(dynamic_pointer_cast<const list_type_impl>(receiver->type));
|
||||
assert(dynamic_pointer_cast<const list_type_impl>(_receiver->type));
|
||||
}
|
||||
|
||||
virtual shared_ptr<terminal> bind(const query_options& options) override {
|
||||
|
||||
@@ -53,6 +53,9 @@ update_parameters::get_prefetched_list(
|
||||
return {};
|
||||
}
|
||||
|
||||
if (column.is_static()) {
|
||||
ckey = clustering_key_view::make_empty();
|
||||
}
|
||||
auto i = _prefetched->rows.find(std::make_pair(std::move(pkey), std::move(ckey)));
|
||||
if (i == _prefetched->rows.end()) {
|
||||
return {};
|
||||
|
||||
26
database.cc
26
database.cc
@@ -961,6 +961,11 @@ table::seal_active_memtable(flush_permit&& permit) {
|
||||
}
|
||||
_memtables->add_memtable();
|
||||
_stats.memtable_switch_count++;
|
||||
// This will set evictable occupancy of the old memtable region to zero, so that
|
||||
// this region is considered last for flushing by dirty_memory_manager::flush_when_needed().
|
||||
// If we don't do that, the flusher may keep picking up this memtable list for flushing after
|
||||
// the permit is released even though there is not much to flush in the active memtable of this list.
|
||||
old->region().ground_evictable_occupancy();
|
||||
auto previous_flush = _flush_barrier.advance_and_await();
|
||||
auto op = _flush_barrier.start();
|
||||
|
||||
@@ -1329,6 +1334,7 @@ table::on_compaction_completion(const std::vector<sstables::shared_sstable>& new
|
||||
|
||||
// This is done in the background, so we can consider this compaction completed.
|
||||
seastar::with_gate(_sstable_deletion_gate, [this, sstables_to_remove] {
|
||||
return with_semaphore(_sstable_deletion_sem, 1, [this, sstables_to_remove = std::move(sstables_to_remove)] {
|
||||
return sstables::delete_atomically(sstables_to_remove, *get_large_partition_handler()).then_wrapped([this, sstables_to_remove] (future<> f) {
|
||||
std::exception_ptr eptr;
|
||||
try {
|
||||
@@ -1352,6 +1358,7 @@ table::on_compaction_completion(const std::vector<sstables::shared_sstable>& new
|
||||
return make_exception_future<>(eptr);
|
||||
}
|
||||
return make_ready_future<>();
|
||||
});
|
||||
}).then([this] {
|
||||
// refresh underlying data source in row cache to prevent it from holding reference
|
||||
// to sstables files which were previously deleted.
|
||||
@@ -1473,7 +1480,10 @@ future<> table::cleanup_sstables(sstables::compaction_descriptor descriptor) {
|
||||
static thread_local semaphore sem(1);
|
||||
|
||||
return with_semaphore(sem, 1, [this, &sst] {
|
||||
return this->compact_sstables(sstables::compaction_descriptor({ sst }, sst->get_sstable_level()), true);
|
||||
// release reference to sstables cleaned up, otherwise space usage from their data and index
|
||||
// components cannot be reclaimed until all of them are cleaned.
|
||||
auto sstable_level = sst->get_sstable_level();
|
||||
return this->compact_sstables(sstables::compaction_descriptor({ std::move(sst) }, sstable_level), true);
|
||||
});
|
||||
});
|
||||
});
|
||||
@@ -1655,9 +1665,9 @@ future<> distributed_loader::open_sstable(distributed<database>& db, sstables::e
|
||||
// to distribute evenly the resource usage among all shards.
|
||||
|
||||
return db.invoke_on(column_family::calculate_shard_from_sstable_generation(comps.generation),
|
||||
[&db, comps = std::move(comps), func = std::move(func), pc] (database& local) {
|
||||
[&db, comps = std::move(comps), func = std::move(func), &pc] (database& local) {
|
||||
|
||||
return with_semaphore(local.sstable_load_concurrency_sem(), 1, [&db, &local, comps = std::move(comps), func = std::move(func), pc] {
|
||||
return with_semaphore(local.sstable_load_concurrency_sem(), 1, [&db, &local, comps = std::move(comps), func = std::move(func), &pc] {
|
||||
auto& cf = local.find_column_family(comps.ks, comps.cf);
|
||||
|
||||
auto f = sstables::sstable::load_shared_components(cf.schema(), cf._config.datadir, comps.generation, comps.version, comps.format, pc);
|
||||
@@ -3437,6 +3447,13 @@ future<> dirty_memory_manager::flush_when_needed() {
|
||||
// release the biggest amount of memory and is less likely to be generating tiny
|
||||
// SSTables.
|
||||
memtable& candidate_memtable = memtable::from_region(*(this->_virtual_region_group.get_largest_region()));
|
||||
|
||||
if (candidate_memtable.empty()) {
|
||||
// Soft pressure, but nothing to flush. It could be due to fsync or memtable_to_cache lagging.
|
||||
// Back off to avoid OOMing with flush continuations.
|
||||
return sleep(1ms);
|
||||
}
|
||||
|
||||
// Do not wait. The semaphore will protect us against a concurrent flush. But we
|
||||
// want to start a new one as soon as the permits are destroyed and the semaphore is
|
||||
// made ready again, not when we are done with the current one.
|
||||
@@ -3984,6 +4001,7 @@ seal_snapshot(sstring jsondir) {
|
||||
|
||||
future<> table::snapshot(sstring name) {
|
||||
return flush().then([this, name = std::move(name)]() {
|
||||
return with_semaphore(_sstable_deletion_sem, 1, [this, name = std::move(name)]() {
|
||||
auto tables = boost::copy_range<std::vector<sstables::shared_sstable>>(*_sstables->all());
|
||||
return do_with(std::move(tables), [this, name](std::vector<sstables::shared_sstable> & tables) {
|
||||
auto jsondir = _config.datadir + "/snapshots/" + name;
|
||||
@@ -4048,6 +4066,7 @@ future<> table::snapshot(sstring name) {
|
||||
});
|
||||
});
|
||||
});
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
@@ -4179,6 +4198,7 @@ future<> table::fail_streaming_mutations(utils::UUID plan_id) {
|
||||
_streaming_memtables_big.erase(it);
|
||||
return entry->flush_in_progress.close().then([this, entry] {
|
||||
for (auto&& sst : entry->sstables) {
|
||||
sst.monitor->write_failed();
|
||||
sst.sstable->mark_for_deletion();
|
||||
}
|
||||
});
|
||||
|
||||
@@ -294,6 +294,8 @@ public:
|
||||
class table;
|
||||
using column_family = table;
|
||||
|
||||
class database_sstable_write_monitor;
|
||||
|
||||
class table : public enable_lw_shared_from_this<table> {
|
||||
public:
|
||||
struct config {
|
||||
@@ -389,7 +391,7 @@ private:
|
||||
// plan memtables and the resulting sstables are not made visible until
|
||||
// the streaming is complete.
|
||||
struct monitored_sstable {
|
||||
std::unique_ptr<sstables::write_monitor> monitor;
|
||||
std::unique_ptr<database_sstable_write_monitor> monitor;
|
||||
sstables::shared_sstable sstable;
|
||||
};
|
||||
|
||||
@@ -428,6 +430,10 @@ private:
|
||||
std::unordered_map<uint64_t, sstables::shared_sstable> _sstables_need_rewrite;
|
||||
// Control background fibers waiting for sstables to be deleted
|
||||
seastar::gate _sstable_deletion_gate;
|
||||
// This semaphore ensures that an operation like snapshot won't have its selected
|
||||
// sstables deleted by compaction in parallel, a race condition which could
|
||||
// easily result in failure.
|
||||
seastar::semaphore _sstable_deletion_sem = {1};
|
||||
// There are situations in which we need to stop writing sstables. Flushers will take
|
||||
// the read lock, and the ones that wish to stop that process will take the write lock.
|
||||
rwlock _sstables_lock;
|
||||
|
||||
@@ -163,7 +163,7 @@ future<> db::commitlog_replayer::impl::init() {
|
||||
// Get all truncation records for the CF and initialize max rps if
|
||||
// present. Cannot do this on demand, as there may be no sstables to
|
||||
// mark the CF as "needed".
|
||||
return db::system_keyspace::get_truncated_position(uuid).then([&map, &uuid](std::vector<db::replay_position> tpps) {
|
||||
return db::system_keyspace::get_truncated_position(uuid).then([&map, uuid](std::vector<db::replay_position> tpps) {
|
||||
for (auto& p : tpps) {
|
||||
rlogger.trace("CF {} truncated at {}", uuid, p);
|
||||
auto& pp = map[p.shard_id()][uuid];
|
||||
|
||||
@@ -686,33 +686,7 @@ read_keyspace_mutation(distributed<service::storage_proxy>& proxy, const sstring
|
||||
static semaphore the_merge_lock {1};
|
||||
|
||||
future<> merge_lock() {
|
||||
// ref: #1088
|
||||
// to avoid deadlocks, we don't want long-standing calls to the shard 0
|
||||
// as they can cause a deadlock:
|
||||
//
|
||||
// fiber1 fiber2
|
||||
// merge_lock() (succeeds)
|
||||
// merge_lock() (waits)
|
||||
// invoke_on_all() (waits on merge_lock to relinquish smp::submit_to slot)
|
||||
//
|
||||
// so we issue the lock calls with a timeout; the slot will be relinquished, and invoke_on_all()
|
||||
// can complete
|
||||
return repeat([] () mutable {
|
||||
return smp::submit_to(0, [] {
|
||||
return the_merge_lock.try_wait();
|
||||
}).then([] (bool result) {
|
||||
if (result) {
|
||||
return make_ready_future<stop_iteration>(stop_iteration::yes);
|
||||
} else {
|
||||
static thread_local auto rand_engine = std::default_random_engine();
|
||||
auto dist = std::uniform_int_distribution<int>(0, 100);
|
||||
auto to = std::chrono::microseconds(dist(rand_engine));
|
||||
return sleep(to).then([] {
|
||||
return make_ready_future<stop_iteration>(stop_iteration::no);
|
||||
});
|
||||
}
|
||||
});
|
||||
});
|
||||
return smp::submit_to(0, [] { return the_merge_lock.wait(); });
|
||||
}
|
||||
|
||||
future<> merge_unlock() {
|
||||
|
||||
329
db/size_estimates_virtual_reader.cc
Normal file
329
db/size_estimates_virtual_reader.cc
Normal file
@@ -0,0 +1,329 @@
|
||||
/*
|
||||
* Copyright (C) 2019 ScyllaDB
|
||||
*
|
||||
* Modified by ScyllaDB
|
||||
*/
|
||||
|
||||
/*
|
||||
* This file is part of Scylla.
|
||||
*
|
||||
* Scylla is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Scylla is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with Scylla. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <boost/range/adaptor/indirected.hpp>
|
||||
#include <boost/range/adaptor/map.hpp>
|
||||
#include <boost/range/adaptor/transformed.hpp>
|
||||
#include <boost/range/algorithm/find_if.hpp>
|
||||
|
||||
#include "clustering_bounds_comparator.hh"
|
||||
#include "database.hh"
|
||||
#include "db/system_keyspace.hh"
|
||||
#include "dht/i_partitioner.hh"
|
||||
#include "partition_range_compat.hh"
|
||||
#include "range.hh"
|
||||
#include "service/storage_service.hh"
|
||||
#include "stdx.hh"
|
||||
#include "mutation_fragment.hh"
|
||||
#include "sstables/sstables.hh"
|
||||
#include "db/timeout_clock.hh"
|
||||
#include "database.hh"
|
||||
|
||||
#include "db/size_estimates_virtual_reader.hh"
|
||||
|
||||
namespace db {
|
||||
|
||||
namespace size_estimates {
|
||||
|
||||
struct virtual_row {
|
||||
const bytes& cf_name;
|
||||
const token_range& tokens;
|
||||
clustering_key_prefix as_key() const {
|
||||
return clustering_key_prefix::from_exploded(std::vector<bytes_view>{cf_name, tokens.start, tokens.end});
|
||||
}
|
||||
};
|
||||
|
||||
struct virtual_row_comparator {
|
||||
schema_ptr _schema;
|
||||
virtual_row_comparator(schema_ptr schema) : _schema(schema) { }
|
||||
bool operator()(const clustering_key_prefix& key1, const clustering_key_prefix& key2) {
|
||||
return clustering_key_prefix::prefix_equality_less_compare(*_schema)(key1, key2);
|
||||
}
|
||||
bool operator()(const virtual_row& row, const clustering_key_prefix& key) {
|
||||
return operator()(row.as_key(), key);
|
||||
}
|
||||
bool operator()(const clustering_key_prefix& key, const virtual_row& row) {
|
||||
return operator()(key, row.as_key());
|
||||
}
|
||||
};
|
||||
|
||||
// Iterating over the cartesian product of cf_names and token_ranges.
|
||||
class virtual_row_iterator : public std::iterator<std::input_iterator_tag, const virtual_row> {
|
||||
std::reference_wrapper<const std::vector<bytes>> _cf_names;
|
||||
std::reference_wrapper<const std::vector<token_range>> _ranges;
|
||||
size_t _cf_names_idx = 0;
|
||||
size_t _ranges_idx = 0;
|
||||
public:
|
||||
struct end_iterator_tag {};
|
||||
virtual_row_iterator(const std::vector<bytes>& cf_names, const std::vector<token_range>& ranges)
|
||||
: _cf_names(std::ref(cf_names))
|
||||
, _ranges(std::ref(ranges))
|
||||
{ }
|
||||
virtual_row_iterator(const std::vector<bytes>& cf_names, const std::vector<token_range>& ranges, end_iterator_tag)
|
||||
: _cf_names(std::ref(cf_names))
|
||||
, _ranges(std::ref(ranges))
|
||||
, _cf_names_idx(cf_names.size())
|
||||
, _ranges_idx(ranges.size())
|
||||
{
|
||||
if (cf_names.empty() || ranges.empty()) {
|
||||
// The product of an empty range with any range is an empty range.
|
||||
// In this case we want the end iterator to be equal to the begin iterator,
|
||||
// which has_ranges_idx = _cf_names_idx = 0.
|
||||
_ranges_idx = _cf_names_idx = 0;
|
||||
}
|
||||
}
|
||||
virtual_row_iterator& operator++() {
|
||||
if (++_ranges_idx == _ranges.get().size() && ++_cf_names_idx < _cf_names.get().size()) {
|
||||
_ranges_idx = 0;
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
virtual_row_iterator operator++(int) {
|
||||
virtual_row_iterator i(*this);
|
||||
++(*this);
|
||||
return i;
|
||||
}
|
||||
const value_type operator*() const {
|
||||
return { _cf_names.get()[_cf_names_idx], _ranges.get()[_ranges_idx] };
|
||||
}
|
||||
bool operator==(const virtual_row_iterator& i) const {
|
||||
return _cf_names_idx == i._cf_names_idx
|
||||
&& _ranges_idx == i._ranges_idx;
|
||||
}
|
||||
bool operator!=(const virtual_row_iterator& i) const {
|
||||
return !(*this == i);
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Returns the keyspaces, ordered by name, as selected by the partition_range.
|
||||
*/
|
||||
static std::vector<sstring> get_keyspaces(const schema& s, const database& db, dht::partition_range range) {
|
||||
struct keyspace_less_comparator {
|
||||
const schema& _s;
|
||||
keyspace_less_comparator(const schema& s) : _s(s) { }
|
||||
dht::ring_position as_ring_position(const sstring& ks) {
|
||||
auto pkey = partition_key::from_single_value(_s, utf8_type->decompose(ks));
|
||||
return dht::global_partitioner().decorate_key(_s, std::move(pkey));
|
||||
}
|
||||
bool operator()(const sstring& ks1, const sstring& ks2) {
|
||||
return as_ring_position(ks1).less_compare(_s, as_ring_position(ks2));
|
||||
}
|
||||
bool operator()(const sstring& ks, const dht::ring_position& rp) {
|
||||
return as_ring_position(ks).less_compare(_s, rp);
|
||||
}
|
||||
bool operator()(const dht::ring_position& rp, const sstring& ks) {
|
||||
return rp.less_compare(_s, as_ring_position(ks));
|
||||
}
|
||||
};
|
||||
auto keyspaces = db.get_non_system_keyspaces();
|
||||
auto cmp = keyspace_less_comparator(s);
|
||||
boost::sort(keyspaces, cmp);
|
||||
return boost::copy_range<std::vector<sstring>>(
|
||||
range.slice(keyspaces, std::move(cmp)) | boost::adaptors::filtered([&s] (const auto& ks) {
|
||||
// If this is a range query, results are divided between shards by the partition key (keyspace_name).
|
||||
return shard_of(dht::global_partitioner().get_token(s,
|
||||
partition_key::from_single_value(s, utf8_type->decompose(ks))))
|
||||
== engine().cpu_id();
|
||||
})
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Makes a wrapping range of ring_position from a nonwrapping range of token, used to select sstables.
|
||||
*/
|
||||
static dht::partition_range as_ring_position_range(dht::token_range& r) {
|
||||
stdx::optional<range<dht::ring_position>::bound> start_bound, end_bound;
|
||||
if (r.start()) {
|
||||
start_bound = {{ dht::ring_position(r.start()->value(), dht::ring_position::token_bound::start), r.start()->is_inclusive() }};
|
||||
}
|
||||
if (r.end()) {
|
||||
end_bound = {{ dht::ring_position(r.end()->value(), dht::ring_position::token_bound::end), r.end()->is_inclusive() }};
|
||||
}
|
||||
return dht::partition_range(std::move(start_bound), std::move(end_bound), r.is_singular());
|
||||
}
|
||||
|
||||
/**
|
||||
* Add a new range_estimates for the specified range, considering the sstables associated with `cf`.
|
||||
*/
|
||||
static system_keyspace::range_estimates estimate(const column_family& cf, const token_range& r) {
|
||||
int64_t count{0};
|
||||
utils::estimated_histogram hist{0};
|
||||
auto from_bytes = [] (auto& b) {
|
||||
return dht::global_partitioner().from_sstring(utf8_type->to_string(b));
|
||||
};
|
||||
dht::token_range_vector ranges;
|
||||
compat::unwrap_into(
|
||||
wrapping_range<dht::token>({{ from_bytes(r.start) }}, {{ from_bytes(r.end) }}),
|
||||
dht::token_comparator(),
|
||||
[&] (auto&& rng) { ranges.push_back(std::move(rng)); });
|
||||
for (auto&& r : ranges) {
|
||||
auto rp_range = as_ring_position_range(r);
|
||||
for (auto&& sstable : cf.select_sstables(rp_range)) {
|
||||
count += sstable->estimated_keys_for_range(r);
|
||||
hist.merge(sstable->get_stats_metadata().estimated_row_size);
|
||||
}
|
||||
}
|
||||
return {cf.schema(), r.start, r.end, count, count > 0 ? hist.mean() : 0};
|
||||
}
|
||||
|
||||
future<std::vector<token_range>> get_local_ranges() {
|
||||
auto& ss = service::get_local_storage_service();
|
||||
return ss.get_local_tokens().then([&ss] (auto&& tokens) {
|
||||
auto ranges = ss.get_token_metadata().get_primary_ranges_for(std::move(tokens));
|
||||
std::vector<token_range> local_ranges;
|
||||
auto to_bytes = [](const stdx::optional<dht::token_range::bound>& b) {
|
||||
assert(b);
|
||||
return utf8_type->decompose(dht::global_partitioner().to_sstring(b->value()));
|
||||
};
|
||||
// We merge the ranges to be compatible with how Cassandra shows it's size estimates table.
|
||||
// All queries will be on that table, where all entries are text and there's no notion of
|
||||
// token ranges form the CQL point of view.
|
||||
auto left_inf = boost::find_if(ranges, [] (auto&& r) {
|
||||
return !r.start() || r.start()->value() == dht::minimum_token();
|
||||
});
|
||||
auto right_inf = boost::find_if(ranges, [] (auto&& r) {
|
||||
return !r.end() || r.start()->value() == dht::maximum_token();
|
||||
});
|
||||
if (left_inf != right_inf && left_inf != ranges.end() && right_inf != ranges.end()) {
|
||||
local_ranges.push_back(token_range{to_bytes(right_inf->start()), to_bytes(left_inf->end())});
|
||||
ranges.erase(left_inf);
|
||||
ranges.erase(right_inf);
|
||||
}
|
||||
for (auto&& r : ranges) {
|
||||
local_ranges.push_back(token_range{to_bytes(r.start()), to_bytes(r.end())});
|
||||
}
|
||||
boost::sort(local_ranges, [] (auto&& tr1, auto&& tr2) {
|
||||
return utf8_type->less(tr1.start, tr2.start);
|
||||
});
|
||||
return local_ranges;
|
||||
});
|
||||
}
|
||||
|
||||
size_estimates_mutation_reader::size_estimates_mutation_reader(schema_ptr schema, const dht::partition_range& prange, const query::partition_slice& slice, streamed_mutation::forwarding fwd)
|
||||
: impl(schema)
|
||||
, _schema(std::move(schema))
|
||||
, _prange(&prange)
|
||||
, _slice(slice)
|
||||
, _fwd(fwd)
|
||||
{ }
|
||||
|
||||
future<> size_estimates_mutation_reader::get_next_partition() {
|
||||
auto& db = service::get_local_storage_proxy().get_db().local();
|
||||
if (!_keyspaces) {
|
||||
_keyspaces = get_keyspaces(*_schema, db, *_prange);
|
||||
_current_partition = _keyspaces->begin();
|
||||
}
|
||||
if (_current_partition == _keyspaces->end()) {
|
||||
_end_of_stream = true;
|
||||
return make_ready_future<>();
|
||||
}
|
||||
return get_local_ranges().then([&db, this] (auto&& ranges) {
|
||||
auto estimates = this->estimates_for_current_keyspace(db, std::move(ranges));
|
||||
auto mutations = db::system_keyspace::make_size_estimates_mutation(*_current_partition, std::move(estimates));
|
||||
++_current_partition;
|
||||
std::vector<mutation> ms;
|
||||
ms.emplace_back(std::move(mutations));
|
||||
_partition_reader = flat_mutation_reader_from_mutations(std::move(ms), _fwd);
|
||||
});
|
||||
}
|
||||
|
||||
future<> size_estimates_mutation_reader::fill_buffer(db::timeout_clock::time_point timeout) {
|
||||
return do_until([this, timeout] { return is_end_of_stream() || is_buffer_full(); }, [this, timeout] {
|
||||
if (!_partition_reader) {
|
||||
return get_next_partition();
|
||||
}
|
||||
return _partition_reader->consume_pausable([this] (mutation_fragment mf) {
|
||||
push_mutation_fragment(std::move(mf));
|
||||
return stop_iteration(is_buffer_full());
|
||||
}, timeout).then([this] {
|
||||
if (_partition_reader->is_end_of_stream() && _partition_reader->is_buffer_empty()) {
|
||||
_partition_reader = stdx::nullopt;
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
void size_estimates_mutation_reader::next_partition() {
|
||||
clear_buffer_to_next_partition();
|
||||
if (is_buffer_empty()) {
|
||||
_partition_reader = stdx::nullopt;
|
||||
}
|
||||
}
|
||||
|
||||
future<> size_estimates_mutation_reader::fast_forward_to(const dht::partition_range& pr, db::timeout_clock::time_point timeout) {
|
||||
clear_buffer();
|
||||
_prange = ≺
|
||||
_keyspaces = stdx::nullopt;
|
||||
_partition_reader = stdx::nullopt;
|
||||
_end_of_stream = false;
|
||||
return make_ready_future<>();
|
||||
}
|
||||
|
||||
future<> size_estimates_mutation_reader::fast_forward_to(position_range pr, db::timeout_clock::time_point timeout) {
|
||||
forward_buffer_to(pr.start());
|
||||
_end_of_stream = false;
|
||||
if (_partition_reader) {
|
||||
return _partition_reader->fast_forward_to(std::move(pr), timeout);
|
||||
}
|
||||
return make_ready_future<>();
|
||||
}
|
||||
|
||||
size_t size_estimates_mutation_reader::buffer_size() const {
|
||||
if (_partition_reader) {
|
||||
return flat_mutation_reader::impl::buffer_size() + _partition_reader->buffer_size();
|
||||
}
|
||||
return flat_mutation_reader::impl::buffer_size();
|
||||
}
|
||||
|
||||
std::vector<db::system_keyspace::range_estimates>
|
||||
size_estimates_mutation_reader::estimates_for_current_keyspace(const database& db, std::vector<token_range> local_ranges) const {
|
||||
// For each specified range, estimate (crudely) mean partition size and partitions count.
|
||||
auto pkey = partition_key::from_single_value(*_schema, utf8_type->decompose(*_current_partition));
|
||||
auto cfs = db.find_keyspace(*_current_partition).metadata()->cf_meta_data();
|
||||
auto cf_names = boost::copy_range<std::vector<bytes>>(cfs | boost::adaptors::transformed([] (auto&& cf) {
|
||||
return utf8_type->decompose(cf.first);
|
||||
}));
|
||||
boost::sort(cf_names, [] (auto&& n1, auto&& n2) {
|
||||
return utf8_type->less(n1, n2);
|
||||
});
|
||||
std::vector<db::system_keyspace::range_estimates> estimates;
|
||||
for (auto& range : _slice.row_ranges(*_schema, pkey)) {
|
||||
auto rows = boost::make_iterator_range(
|
||||
virtual_row_iterator(cf_names, local_ranges),
|
||||
virtual_row_iterator(cf_names, local_ranges, virtual_row_iterator::end_iterator_tag()));
|
||||
auto rows_to_estimate = range.slice(rows, virtual_row_comparator(_schema));
|
||||
for (auto&& r : rows_to_estimate) {
|
||||
auto& cf = db.find_column_family(*_current_partition, utf8_type->to_string(r.cf_name));
|
||||
estimates.push_back(estimate(cf, r.tokens));
|
||||
if (estimates.size() >= _slice.partition_row_limit()) {
|
||||
return estimates;
|
||||
}
|
||||
}
|
||||
}
|
||||
return estimates;
|
||||
}
|
||||
|
||||
} // namespace size_estimates
|
||||
|
||||
} // namespace db
|
||||
@@ -21,33 +21,19 @@
|
||||
* along with Scylla. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <boost/range/adaptor/indirected.hpp>
|
||||
#include <boost/range/adaptor/map.hpp>
|
||||
#include <boost/range/adaptor/transformed.hpp>
|
||||
#include <boost/range/algorithm/find_if.hpp>
|
||||
|
||||
#include "clustering_bounds_comparator.hh"
|
||||
#include "database.hh"
|
||||
#include "db/system_keyspace.hh"
|
||||
#include "dht/i_partitioner.hh"
|
||||
#include "mutation_reader.hh"
|
||||
#include "partition_range_compat.hh"
|
||||
#include "range.hh"
|
||||
#include "service/storage_service.hh"
|
||||
#include "stdx.hh"
|
||||
#include "mutation_fragment.hh"
|
||||
#include "sstables/sstables.hh"
|
||||
#include "db/timeout_clock.hh"
|
||||
|
||||
namespace db {
|
||||
|
||||
namespace size_estimates {
|
||||
|
||||
struct token_range {
|
||||
bytes start;
|
||||
bytes end;
|
||||
};
|
||||
|
||||
class size_estimates_mutation_reader final : public flat_mutation_reader::impl {
|
||||
struct token_range {
|
||||
bytes start;
|
||||
bytes end;
|
||||
};
|
||||
schema_ptr _schema;
|
||||
const dht::partition_range* _prange;
|
||||
const query::partition_slice& _slice;
|
||||
@@ -57,267 +43,18 @@ class size_estimates_mutation_reader final : public flat_mutation_reader::impl {
|
||||
streamed_mutation::forwarding _fwd;
|
||||
flat_mutation_reader_opt _partition_reader;
|
||||
public:
|
||||
size_estimates_mutation_reader(schema_ptr schema, const dht::partition_range& prange, const query::partition_slice& slice, streamed_mutation::forwarding fwd)
|
||||
: impl(schema)
|
||||
, _schema(std::move(schema))
|
||||
, _prange(&prange)
|
||||
, _slice(slice)
|
||||
, _fwd(fwd)
|
||||
{ }
|
||||
size_estimates_mutation_reader(schema_ptr, const dht::partition_range&, const query::partition_slice&, streamed_mutation::forwarding);
|
||||
|
||||
virtual future<> fill_buffer(db::timeout_clock::time_point) override;
|
||||
virtual void next_partition() override;
|
||||
virtual future<> fast_forward_to(const dht::partition_range&, db::timeout_clock::time_point) override;
|
||||
virtual future<> fast_forward_to(position_range, db::timeout_clock::time_point) override;
|
||||
virtual size_t buffer_size() const override;
|
||||
private:
|
||||
future<> get_next_partition() {
|
||||
// For each specified range, estimate (crudely) mean partition size and partitions count.
|
||||
auto& db = service::get_local_storage_proxy().get_db().local();
|
||||
if (!_keyspaces) {
|
||||
_keyspaces = get_keyspaces(*_schema, db, *_prange);
|
||||
_current_partition = _keyspaces->begin();
|
||||
}
|
||||
if (_current_partition == _keyspaces->end()) {
|
||||
_end_of_stream = true;
|
||||
return make_ready_future<>();
|
||||
}
|
||||
return get_local_ranges().then([&db, this] (auto&& ranges) {
|
||||
auto estimates = this->estimates_for_current_keyspace(db, std::move(ranges));
|
||||
auto mutations = db::system_keyspace::make_size_estimates_mutation(*_current_partition, std::move(estimates));
|
||||
++_current_partition;
|
||||
std::vector<mutation> ms;
|
||||
ms.emplace_back(std::move(mutations));
|
||||
_partition_reader = flat_mutation_reader_from_mutations(std::move(ms), _fwd);
|
||||
});
|
||||
}
|
||||
public:
|
||||
virtual future<> fill_buffer(db::timeout_clock::time_point timeout) override {
|
||||
return do_until([this, timeout] { return is_end_of_stream() || is_buffer_full(); }, [this, timeout] {
|
||||
if (!_partition_reader) {
|
||||
return get_next_partition();
|
||||
}
|
||||
return _partition_reader->consume_pausable([this] (mutation_fragment mf) {
|
||||
push_mutation_fragment(std::move(mf));
|
||||
return stop_iteration(is_buffer_full());
|
||||
}, timeout).then([this] {
|
||||
if (_partition_reader->is_end_of_stream() && _partition_reader->is_buffer_empty()) {
|
||||
_partition_reader = stdx::nullopt;
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
virtual void next_partition() override {
|
||||
clear_buffer_to_next_partition();
|
||||
if (is_buffer_empty()) {
|
||||
_partition_reader = stdx::nullopt;
|
||||
}
|
||||
}
|
||||
virtual future<> fast_forward_to(const dht::partition_range& pr, db::timeout_clock::time_point timeout) override {
|
||||
clear_buffer();
|
||||
_prange = ≺
|
||||
_keyspaces = stdx::nullopt;
|
||||
_partition_reader = stdx::nullopt;
|
||||
_end_of_stream = false;
|
||||
return make_ready_future<>();
|
||||
}
|
||||
virtual future<> fast_forward_to(position_range pr, db::timeout_clock::time_point timeout) override {
|
||||
forward_buffer_to(pr.start());
|
||||
_end_of_stream = false;
|
||||
if (_partition_reader) {
|
||||
return _partition_reader->fast_forward_to(std::move(pr), timeout);
|
||||
}
|
||||
return make_ready_future<>();
|
||||
}
|
||||
virtual size_t buffer_size() const override {
|
||||
if (_partition_reader) {
|
||||
return flat_mutation_reader::impl::buffer_size() + _partition_reader->buffer_size();
|
||||
}
|
||||
return flat_mutation_reader::impl::buffer_size();
|
||||
}
|
||||
/**
|
||||
* Returns the primary ranges for the local node.
|
||||
* Used for testing as well.
|
||||
*/
|
||||
static future<std::vector<token_range>> get_local_ranges() {
|
||||
auto& ss = service::get_local_storage_service();
|
||||
return ss.get_local_tokens().then([&ss] (auto&& tokens) {
|
||||
auto ranges = ss.get_token_metadata().get_primary_ranges_for(std::move(tokens));
|
||||
std::vector<token_range> local_ranges;
|
||||
auto to_bytes = [](const stdx::optional<dht::token_range::bound>& b) {
|
||||
assert(b);
|
||||
return utf8_type->decompose(dht::global_partitioner().to_sstring(b->value()));
|
||||
};
|
||||
// We merge the ranges to be compatible with how Cassandra shows it's size estimates table.
|
||||
// All queries will be on that table, where all entries are text and there's no notion of
|
||||
// token ranges form the CQL point of view.
|
||||
auto left_inf = boost::find_if(ranges, [] (auto&& r) {
|
||||
return !r.start() || r.start()->value() == dht::minimum_token();
|
||||
});
|
||||
auto right_inf = boost::find_if(ranges, [] (auto&& r) {
|
||||
return !r.end() || r.start()->value() == dht::maximum_token();
|
||||
});
|
||||
if (left_inf != right_inf && left_inf != ranges.end() && right_inf != ranges.end()) {
|
||||
local_ranges.push_back(token_range{to_bytes(right_inf->start()), to_bytes(left_inf->end())});
|
||||
ranges.erase(left_inf);
|
||||
ranges.erase(right_inf);
|
||||
}
|
||||
for (auto&& r : ranges) {
|
||||
local_ranges.push_back(token_range{to_bytes(r.start()), to_bytes(r.end())});
|
||||
}
|
||||
boost::sort(local_ranges, [] (auto&& tr1, auto&& tr2) {
|
||||
return utf8_type->less(tr1.start, tr2.start);
|
||||
});
|
||||
return local_ranges;
|
||||
});
|
||||
}
|
||||
private:
|
||||
struct virtual_row {
|
||||
const bytes& cf_name;
|
||||
const token_range& tokens;
|
||||
clustering_key_prefix as_key() const {
|
||||
return clustering_key_prefix::from_exploded(std::vector<bytes_view>{cf_name, tokens.start, tokens.end});
|
||||
}
|
||||
};
|
||||
struct virtual_row_comparator {
|
||||
schema_ptr _schema;
|
||||
virtual_row_comparator(schema_ptr schema) : _schema(schema) { }
|
||||
bool operator()(const clustering_key_prefix& key1, const clustering_key_prefix& key2) {
|
||||
return clustering_key_prefix::prefix_equality_less_compare(*_schema)(key1, key2);
|
||||
}
|
||||
bool operator()(const virtual_row& row, const clustering_key_prefix& key) {
|
||||
return operator()(row.as_key(), key);
|
||||
}
|
||||
bool operator()(const clustering_key_prefix& key, const virtual_row& row) {
|
||||
return operator()(key, row.as_key());
|
||||
}
|
||||
};
|
||||
class virtual_row_iterator : public std::iterator<std::input_iterator_tag, const virtual_row> {
|
||||
std::reference_wrapper<const std::vector<bytes>> _cf_names;
|
||||
std::reference_wrapper<const std::vector<token_range>> _ranges;
|
||||
size_t _cf_names_idx = 0;
|
||||
size_t _ranges_idx = 0;
|
||||
public:
|
||||
struct end_iterator_tag {};
|
||||
virtual_row_iterator(const std::vector<bytes>& cf_names, const std::vector<token_range>& ranges)
|
||||
: _cf_names(std::ref(cf_names))
|
||||
, _ranges(std::ref(ranges))
|
||||
{ }
|
||||
virtual_row_iterator(const std::vector<bytes>& cf_names, const std::vector<token_range>& ranges, end_iterator_tag)
|
||||
: _cf_names(std::ref(cf_names))
|
||||
, _ranges(std::ref(ranges))
|
||||
, _cf_names_idx(cf_names.size())
|
||||
, _ranges_idx(ranges.size())
|
||||
{ }
|
||||
virtual_row_iterator& operator++() {
|
||||
if (++_ranges_idx == _ranges.get().size() && ++_cf_names_idx < _cf_names.get().size()) {
|
||||
_ranges_idx = 0;
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
virtual_row_iterator operator++(int) {
|
||||
virtual_row_iterator i(*this);
|
||||
++(*this);
|
||||
return i;
|
||||
}
|
||||
const value_type operator*() const {
|
||||
return { _cf_names.get()[_cf_names_idx], _ranges.get()[_ranges_idx] };
|
||||
}
|
||||
bool operator==(const virtual_row_iterator& i) const {
|
||||
return _cf_names_idx == i._cf_names_idx
|
||||
&& _ranges_idx == i._ranges_idx;
|
||||
}
|
||||
bool operator!=(const virtual_row_iterator& i) const {
|
||||
return !(*this == i);
|
||||
}
|
||||
};
|
||||
future<> get_next_partition();
|
||||
|
||||
std::vector<db::system_keyspace::range_estimates>
|
||||
estimates_for_current_keyspace(const database& db, std::vector<token_range> local_ranges) const {
|
||||
auto pkey = partition_key::from_single_value(*_schema, utf8_type->decompose(*_current_partition));
|
||||
auto cfs = db.find_keyspace(*_current_partition).metadata()->cf_meta_data();
|
||||
auto cf_names = boost::copy_range<std::vector<bytes>>(cfs | boost::adaptors::transformed([] (auto&& cf) {
|
||||
return utf8_type->decompose(cf.first);
|
||||
}));
|
||||
boost::sort(cf_names, [] (auto&& n1, auto&& n2) {
|
||||
return utf8_type->less(n1, n2);
|
||||
});
|
||||
std::vector<db::system_keyspace::range_estimates> estimates;
|
||||
for (auto& range : _slice.row_ranges(*_schema, pkey)) {
|
||||
auto rows = boost::make_iterator_range(
|
||||
virtual_row_iterator(cf_names, local_ranges),
|
||||
virtual_row_iterator(cf_names, local_ranges, virtual_row_iterator::end_iterator_tag()));
|
||||
auto rows_to_estimate = range.slice(rows, virtual_row_comparator(_schema));
|
||||
for (auto&& r : rows_to_estimate) {
|
||||
auto& cf = db.find_column_family(*_current_partition, utf8_type->to_string(r.cf_name));
|
||||
estimates.push_back(estimate(cf, r.tokens));
|
||||
if (estimates.size() >= _slice.partition_row_limit()) {
|
||||
return estimates;
|
||||
}
|
||||
}
|
||||
}
|
||||
return estimates;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the keyspaces, ordered by name, as selected by the partition_range.
|
||||
*/
|
||||
static ks_range get_keyspaces(const schema& s, const database& db, dht::partition_range range) {
|
||||
struct keyspace_less_comparator {
|
||||
const schema& _s;
|
||||
keyspace_less_comparator(const schema& s) : _s(s) { }
|
||||
dht::ring_position as_ring_position(const sstring& ks) {
|
||||
auto pkey = partition_key::from_single_value(_s, utf8_type->decompose(ks));
|
||||
return dht::global_partitioner().decorate_key(_s, std::move(pkey));
|
||||
}
|
||||
bool operator()(const sstring& ks1, const sstring& ks2) {
|
||||
return as_ring_position(ks1).less_compare(_s, as_ring_position(ks2));
|
||||
}
|
||||
bool operator()(const sstring& ks, const dht::ring_position& rp) {
|
||||
return as_ring_position(ks).less_compare(_s, rp);
|
||||
}
|
||||
bool operator()(const dht::ring_position& rp, const sstring& ks) {
|
||||
return rp.less_compare(_s, as_ring_position(ks));
|
||||
}
|
||||
};
|
||||
auto keyspaces = db.get_non_system_keyspaces();
|
||||
auto cmp = keyspace_less_comparator(s);
|
||||
boost::sort(keyspaces, cmp);
|
||||
return boost::copy_range<ks_range>(range.slice(keyspaces, std::move(cmp)));
|
||||
}
|
||||
|
||||
/**
|
||||
* Makes a wrapping range of ring_position from a nonwrapping range of token, used to select sstables.
|
||||
*/
|
||||
static dht::partition_range as_ring_position_range(dht::token_range& r) {
|
||||
stdx::optional<range<dht::ring_position>::bound> start_bound, end_bound;
|
||||
if (r.start()) {
|
||||
start_bound = {{ dht::ring_position(r.start()->value(), dht::ring_position::token_bound::start), r.start()->is_inclusive() }};
|
||||
}
|
||||
if (r.end()) {
|
||||
end_bound = {{ dht::ring_position(r.end()->value(), dht::ring_position::token_bound::end), r.end()->is_inclusive() }};
|
||||
}
|
||||
return dht::partition_range(std::move(start_bound), std::move(end_bound), r.is_singular());
|
||||
}
|
||||
|
||||
/**
|
||||
* Add a new range_estimates for the specified range, considering the sstables associated with `cf`.
|
||||
*/
|
||||
static system_keyspace::range_estimates estimate(const column_family& cf, const token_range& r) {
|
||||
int64_t count{0};
|
||||
utils::estimated_histogram hist{0};
|
||||
auto from_bytes = [] (auto& b) {
|
||||
return dht::global_partitioner().from_sstring(utf8_type->to_string(b));
|
||||
};
|
||||
dht::token_range_vector ranges;
|
||||
compat::unwrap_into(
|
||||
wrapping_range<dht::token>({{ from_bytes(r.start) }}, {{ from_bytes(r.end) }}),
|
||||
dht::token_comparator(),
|
||||
[&] (auto&& rng) { ranges.push_back(std::move(rng)); });
|
||||
for (auto&& r : ranges) {
|
||||
auto rp_range = as_ring_position_range(r);
|
||||
for (auto&& sstable : cf.select_sstables(rp_range)) {
|
||||
count += sstable->estimated_keys_for_range(r);
|
||||
hist.merge(sstable->get_stats_metadata().estimated_row_size);
|
||||
}
|
||||
}
|
||||
return {cf.schema(), r.start, r.end, count, count > 0 ? hist.mean() : 0};
|
||||
}
|
||||
estimates_for_current_keyspace(const database&, std::vector<token_range> local_ranges) const;
|
||||
};
|
||||
|
||||
struct virtual_reader {
|
||||
@@ -332,6 +69,12 @@ struct virtual_reader {
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Returns the primary ranges for the local node.
|
||||
* Used for testing as well.
|
||||
*/
|
||||
future<std::vector<token_range>> get_local_ranges();
|
||||
|
||||
} // namespace size_estimates
|
||||
|
||||
} // namespace db
|
||||
|
||||
@@ -26,6 +26,7 @@
|
||||
#include "db/consistency_level_type.hh"
|
||||
#include "db/system_keyspace.hh"
|
||||
#include "schema_builder.hh"
|
||||
#include "timeout_config.hh"
|
||||
#include "types.hh"
|
||||
|
||||
#include <seastar/core/reactor.hh>
|
||||
@@ -97,11 +98,17 @@ future<> system_distributed_keyspace::stop() {
|
||||
return make_ready_future<>();
|
||||
}
|
||||
|
||||
static const timeout_config internal_distributed_timeout_config = [] {
|
||||
using namespace std::chrono_literals;
|
||||
const auto t = 10s;
|
||||
return timeout_config{ t, t, t, t, t, t, t };
|
||||
}();
|
||||
|
||||
future<std::unordered_map<utils::UUID, sstring>> system_distributed_keyspace::view_status(sstring ks_name, sstring view_name) const {
|
||||
return _qp.process(
|
||||
sprint("SELECT host_id, status FROM %s.%s WHERE keyspace_name = ? AND view_name = ?", NAME, VIEW_BUILD_STATUS),
|
||||
db::consistency_level::ONE,
|
||||
infinite_timeout_config,
|
||||
internal_distributed_timeout_config,
|
||||
{ std::move(ks_name), std::move(view_name) },
|
||||
false).then([this] (::shared_ptr<cql3::untyped_result_set> cql_result) {
|
||||
return boost::copy_range<std::unordered_map<utils::UUID, sstring>>(*cql_result
|
||||
@@ -118,7 +125,7 @@ future<> system_distributed_keyspace::start_view_build(sstring ks_name, sstring
|
||||
return _qp.process(
|
||||
sprint("INSERT INTO %s.%s (keyspace_name, view_name, host_id, status) VALUES (?, ?, ?, ?)", NAME, VIEW_BUILD_STATUS),
|
||||
db::consistency_level::ONE,
|
||||
infinite_timeout_config,
|
||||
internal_distributed_timeout_config,
|
||||
{ std::move(ks_name), std::move(view_name), std::move(host_id), "STARTED" },
|
||||
false).discard_result();
|
||||
});
|
||||
@@ -129,7 +136,7 @@ future<> system_distributed_keyspace::finish_view_build(sstring ks_name, sstring
|
||||
return _qp.process(
|
||||
sprint("UPDATE %s.%s SET status = ? WHERE keyspace_name = ? AND view_name = ? AND host_id = ?", NAME, VIEW_BUILD_STATUS),
|
||||
db::consistency_level::ONE,
|
||||
infinite_timeout_config,
|
||||
internal_distributed_timeout_config,
|
||||
{ "SUCCESS", std::move(ks_name), std::move(view_name), std::move(host_id) },
|
||||
false).discard_result();
|
||||
});
|
||||
@@ -139,7 +146,7 @@ future<> system_distributed_keyspace::remove_view(sstring ks_name, sstring view_
|
||||
return _qp.process(
|
||||
sprint("DELETE FROM %s.%s WHERE keyspace_name = ? AND view_name = ?", NAME, VIEW_BUILD_STATUS),
|
||||
db::consistency_level::ONE,
|
||||
infinite_timeout_config,
|
||||
internal_distributed_timeout_config,
|
||||
{ std::move(ks_name), std::move(view_name) },
|
||||
false).discard_result();
|
||||
}
|
||||
|
||||
@@ -1635,6 +1635,9 @@ void make(database& db, bool durable, bool volatile_testing_only) {
|
||||
auto cfg = ks.make_column_family_config(*table, db.get_config(), db.get_large_partition_handler());
|
||||
if (maybe_write_in_user_memory(table, db)) {
|
||||
cfg.dirty_memory_manager = &db._dirty_memory_manager;
|
||||
} else {
|
||||
cfg.memtable_scheduling_group = default_scheduling_group();
|
||||
cfg.memtable_to_cache_scheduling_group = default_scheduling_group();
|
||||
}
|
||||
db.add_column_family(ks, table, std::move(cfg));
|
||||
maybe_add_virtual_reader(table, db);
|
||||
|
||||
2
dist/ami/files/.scylla_ami_login
vendored
2
dist/ami/files/.scylla_ami_login
vendored
@@ -94,7 +94,7 @@ if __name__ == '__main__':
|
||||
colorprint(MSG_HEADER)
|
||||
aws = aws_instance()
|
||||
if not aws.is_supported_instance_class():
|
||||
colorprint(MSG_UNSUPPORTED_INSTANCE_TYPE.format(type=aws.instance_class()))
|
||||
colorprint(MSG_UNSUPPORTED_INSTANCE_TYPE, type=aws.instance_class())
|
||||
else:
|
||||
setup = systemd_unit('scylla-ami-setup.service')
|
||||
res = setup.is_active()
|
||||
|
||||
2
dist/ami/files/scylla-ami
vendored
2
dist/ami/files/scylla-ami
vendored
Submodule dist/ami/files/scylla-ami updated: d53834f387...fe156a5725
2
dist/ami/scylla.json
vendored
2
dist/ami/scylla.json
vendored
@@ -68,7 +68,7 @@
|
||||
"type": "shell",
|
||||
"inline": [
|
||||
"sudo yum install -y epel-release",
|
||||
"sudo yum install -y python34",
|
||||
"sudo yum install -y python36",
|
||||
"sudo /home/{{user `ssh_username`}}/scylla-ami/scylla_install_ami {{ user `install_args` }}"
|
||||
]
|
||||
}
|
||||
|
||||
4
dist/common/scripts/scylla_ec2_check
vendored
4
dist/common/scripts/scylla_ec2_check
vendored
@@ -43,7 +43,7 @@ if __name__ == '__main__':
|
||||
driver = match.group(1)
|
||||
|
||||
if not en:
|
||||
colorprint('{red}{instance_class} doesn\'t support enahanced networking!{nocolor}'.format(instance_class))
|
||||
colorprint('{red}{instance_class} doesn\'t support enahanced networking!{nocolor}', instance_class=instance_class)
|
||||
print('''To enable enhanced networking, please use the instance type which supports it.
|
||||
More documentation available at:
|
||||
http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/enhanced-networking.html#enabling_enhanced_networking''')
|
||||
@@ -57,3 +57,5 @@ http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/enhanced-networking.html#enab
|
||||
print('''More documentation available at:
|
||||
http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/enhanced-networking.html''')
|
||||
sys.exit(1)
|
||||
|
||||
colorprint('{green}This EC2 instance is optimized for Scylla.{nocolor}')
|
||||
|
||||
9
dist/common/scripts/scylla_ntp_setup
vendored
9
dist/common/scripts/scylla_ntp_setup
vendored
@@ -49,7 +49,8 @@ if __name__ == '__main__':
|
||||
if is_systemd():
|
||||
ntp = systemd_unit('ntp.service')
|
||||
ntp.stop()
|
||||
run('ntpdate ntp.ubuntu.com')
|
||||
# ignore error, ntpd may able to adjust clock later
|
||||
run('ntpdate ntp.ubuntu.com', exception=False)
|
||||
ntp.start()
|
||||
else:
|
||||
run('service ntp stop')
|
||||
@@ -70,7 +71,8 @@ if __name__ == '__main__':
|
||||
sntpd.start()
|
||||
else:
|
||||
run('rc-service ntpd stop', exception=False)
|
||||
run('ntpdate {}'.format(server))
|
||||
# ignore error, ntpd may able to adjust clock later
|
||||
run('ntpdate {}'.format(server), exception=False)
|
||||
run('rc-update add ntpd default')
|
||||
run('rc-service ntpd start')
|
||||
|
||||
@@ -87,6 +89,7 @@ if __name__ == '__main__':
|
||||
server = match.group(1)
|
||||
ntpd = systemd_unit('ntpd.service')
|
||||
ntpd.stop()
|
||||
run('ntpdate {}'.format(server))
|
||||
# ignore error, ntpd may able to adjust clock later
|
||||
run('ntpdate {}'.format(server), exception=False)
|
||||
ntpd.enable()
|
||||
ntpd.start()
|
||||
|
||||
10
dist/common/scripts/scylla_raid_setup
vendored
10
dist/common/scripts/scylla_raid_setup
vendored
@@ -146,7 +146,15 @@ if __name__ == '__main__':
|
||||
match = re.search(r'^/dev/\S+: (UUID="\S+")', res.strip())
|
||||
uuid = match.group(1)
|
||||
with open('/etc/fstab', 'a') as f:
|
||||
f.write('{uuid} {mount_at} xfs noatime 0 0\n'.format(uuid=uuid, mount_at=mount_at))
|
||||
f.write('{uuid} {mount_at} xfs noatime,nofail 0 0\n'.format(uuid=uuid, mount_at=mount_at))
|
||||
mounts_conf = '/etc/systemd/system/scylla-server.service.d/mounts.conf'
|
||||
if not os.path.exists(mounts_conf):
|
||||
makedirs('/etc/systemd/system/scylla-server.service.d/')
|
||||
with open(mounts_conf, 'w') as f:
|
||||
f.write('[Unit]\nRequiresMountsFor={mount_at}\n'.format(mount_at=mount_at))
|
||||
else:
|
||||
with open(mounts_conf, 'a') as f:
|
||||
f.write('RequiresMountsFor={mount_at}\n'.format(mount_at=mount_at))
|
||||
|
||||
if is_debian_variant():
|
||||
run('update-initramfs -u')
|
||||
|
||||
13
dist/common/scripts/scylla_setup
vendored
13
dist/common/scripts/scylla_setup
vendored
@@ -61,7 +61,7 @@ def interactive_choose_nic():
|
||||
print(nics)
|
||||
n = input('> ')
|
||||
if is_valid_nic(n):
|
||||
return nic
|
||||
return n
|
||||
|
||||
def do_verify_package(pkg):
|
||||
if is_debian_variant():
|
||||
@@ -102,7 +102,7 @@ def run_setup_script(name, script):
|
||||
res = run(script, exception=False)
|
||||
if res != 0:
|
||||
if interactive:
|
||||
colorprint('{red}{name} setup failed. Press any key to continue...{nocolor}'.format(name=name))
|
||||
colorprint('{red}{name} setup failed. Press any key to continue...{nocolor}', name=name)
|
||||
input()
|
||||
else:
|
||||
print('{} setup failed.'.format(name))
|
||||
@@ -192,11 +192,12 @@ if __name__ == '__main__':
|
||||
fstrim_setup = not args.no_fstrim_setup
|
||||
selinux_reboot_required = False
|
||||
|
||||
colorprint('{green}Skip any of the following steps by answering \'no\'{nocolor}')
|
||||
if interactive:
|
||||
colorprint('{green}Skip any of the following steps by answering \'no\'{nocolor}')
|
||||
|
||||
if is_ec2():
|
||||
if interactive:
|
||||
ec2_check = interactive_ask_service('Do you want to run Amazon EC2 configuration check?', 'Yes - runs a script to verify that this instance is optimized for running Scylls. No - skips the configuration check.', 'yes')
|
||||
ec2_check = interactive_ask_service('Do you want to run Amazon EC2 configuration check?', 'Yes - runs a script to verify that this instance is optimized for running Scylla. No - skips the configuration check.', 'yes')
|
||||
if ec2_check:
|
||||
nic = interactive_choose_nic()
|
||||
if ec2_check:
|
||||
@@ -221,7 +222,7 @@ if __name__ == '__main__':
|
||||
elif is_gentoo_variant():
|
||||
run('rc-update add scylla-server default')
|
||||
|
||||
if interactive:
|
||||
if interactive and not os.path.exists('/etc/scylla.d/housekeeping.cfg'):
|
||||
version_check = interactive_ask_service('Do you want to enable Scylla to check if there is a newer version of Scylla available?', 'Yes - start the Scylla-housekeeping service to check for a newer version. This check runs periodically. No - skips this step.', 'yes')
|
||||
if version_check:
|
||||
with open('/etc/scylla.d/housekeeping.cfg', 'w') as f:
|
||||
@@ -293,7 +294,7 @@ if __name__ == '__main__':
|
||||
print('Please select unmounted disks from the following list: {}'.format(devices))
|
||||
selected = []
|
||||
dsklist = []
|
||||
while len(devices):
|
||||
while True:
|
||||
print('type \'cancel\' to cancel RAID/XFS setup.')
|
||||
print('type \'done\' to finish selection. Selected: {}'.format(selected))
|
||||
if len(dsklist) > 0:
|
||||
|
||||
6
dist/common/scripts/scylla_util.py
vendored
6
dist/common/scripts/scylla_util.py
vendored
@@ -350,8 +350,10 @@ def is_unused_disk(dev):
|
||||
return False
|
||||
|
||||
CONCOLORS = {'green':'\033[1;32m', 'red':'\033[1;31m', 'nocolor':'\033[0m'}
|
||||
def colorprint(msg):
|
||||
print(msg.format(**CONCOLORS))
|
||||
def colorprint(msg, **kwargs):
|
||||
fmt = dict(CONCOLORS)
|
||||
fmt.update(kwargs)
|
||||
print(msg.format(**fmt))
|
||||
|
||||
def get_mode_cpuset(nic, mode):
|
||||
try:
|
||||
|
||||
@@ -10,7 +10,7 @@ Group=scylla
|
||||
ExecStart=/usr/lib/scylla/scylla-housekeeping --uuid-file /var/lib/scylla-housekeeping/housekeeping.uuid -q -c /etc/scylla.d/housekeeping.cfg --repo-files '/etc/apt/sources.list.d/scylla*.list' version --mode d
|
||||
{{/debian}}
|
||||
{{#redhat}}
|
||||
ExecStart=/usr/lib/scylla/scylla-housekeeping --uuid-file /var/lib/scylla-housekeeping/housekeeping.uuid -q -c /etc/scylla.d/housekeeping.cfg --repo-files @@REPOFILES@@ version --mode d
|
||||
ExecStart=/usr/lib/scylla/scylla-housekeeping --uuid-file /var/lib/scylla-housekeeping/housekeeping.uuid -q -c /etc/scylla.d/housekeeping.cfg --repo-files '/etc/yum.repos.d/scylla*.repo' version --mode d
|
||||
{{/redhat}}
|
||||
|
||||
[Install]
|
||||
|
||||
@@ -6,7 +6,7 @@ After=network.target
|
||||
Type=simple
|
||||
User=scylla
|
||||
Group=scylla
|
||||
ExecStart=/usr/lib/scylla/scylla-housekeeping --uuid-file /var/lib/scylla-housekeeping/housekeeping.uuid -q -c /etc/scylla.d/housekeeping.cfg --repo-files @@REPOFILES@@ version --mode r
|
||||
ExecStart=/usr/lib/scylla/scylla-housekeeping --uuid-file /var/lib/scylla-housekeeping/housekeeping.uuid -q -c /etc/scylla.d/housekeeping.cfg --repo-files '/etc/yum.repos.d/scylla*.repo' version --mode r
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
|
||||
7
dist/debian/build_deb.sh
vendored
7
dist/debian/build_deb.sh
vendored
@@ -116,6 +116,9 @@ if [ ! -f /usr/bin/pystache ]; then
|
||||
sudo apt-get install -y python-pystache
|
||||
fi
|
||||
fi
|
||||
if is_debian_variant && [ ! -f /usr/share/doc/python-pkg-resources/copyright ]; then
|
||||
sudo apt-get install -y python-pkg-resources
|
||||
fi
|
||||
|
||||
if [ -z "$TARGET" ]; then
|
||||
if is_debian_variant; then
|
||||
@@ -157,8 +160,8 @@ chmod a+rx debian/rules
|
||||
|
||||
if [ "$TARGET" != "trusty" ]; then
|
||||
pystache dist/common/systemd/scylla-server.service.mustache "{ $MUSTACHE_DIST }" > debian/scylla-server.service
|
||||
pystache dist/common/systemd/scylla-housekeeping-daily.service.mustache "{ $MUSTACHE_DIST }" > debian/scylla-housekeeping-daily.service
|
||||
pystache dist/common/systemd/scylla-housekeeping-restart.service.mustache "{ $MUSTACHE_DIST }" > debian/scylla-housekeeping-restart.service
|
||||
pystache dist/common/systemd/scylla-housekeeping-daily.service.mustache "{ $MUSTACHE_DIST }" > debian/scylla-server.scylla-housekeeping-daily.service
|
||||
pystache dist/common/systemd/scylla-housekeeping-restart.service.mustache "{ $MUSTACHE_DIST }" > debian/scylla-server.scylla-housekeeping-restart.service
|
||||
cp dist/common/systemd/scylla-fstrim.service debian/scylla-server.scylla-fstrim.service
|
||||
cp dist/common/systemd/node-exporter.service debian/scylla-server.node-exporter.service
|
||||
fi
|
||||
|
||||
4
dist/docker/redhat/Dockerfile
vendored
4
dist/docker/redhat/Dockerfile
vendored
@@ -26,14 +26,14 @@ ADD commandlineparser.py /commandlineparser.py
|
||||
ADD docker-entrypoint.py /docker-entrypoint.py
|
||||
|
||||
# Install Scylla:
|
||||
RUN curl http://downloads.scylladb.com/rpm/unstable/centos/master/latest/scylla.repo -o /etc/yum.repos.d/scylla.repo && \
|
||||
RUN curl http://downloads.scylladb.com/rpm/centos/scylla-2.3.repo -o /etc/yum.repos.d/scylla.repo && \
|
||||
yum -y install epel-release && \
|
||||
yum -y clean expire-cache && \
|
||||
yum -y update && \
|
||||
yum -y remove boost-thread boost-system && \
|
||||
yum -y install scylla hostname supervisor && \
|
||||
yum clean all && \
|
||||
yum -y install python34 python34-PyYAML && \
|
||||
yum -y install python36 python36-PyYAML && \
|
||||
cat /scylla_bashrc >> /etc/bashrc && \
|
||||
mkdir -p /etc/supervisor.conf.d && \
|
||||
mkdir -p /var/log/scylla && \
|
||||
|
||||
8
dist/docker/redhat/scylla-service.sh
vendored
8
dist/docker/redhat/scylla-service.sh
vendored
@@ -1,7 +1,13 @@
|
||||
#!/bin/bash
|
||||
|
||||
. /usr/lib/scylla/scylla_prepare
|
||||
/usr/lib/scylla/scylla_prepare
|
||||
|
||||
. /etc/sysconfig/scylla-server
|
||||
|
||||
export SCYLLA_HOME SCYLLA_CONF
|
||||
|
||||
for f in /etc/scylla.d/*.conf; do
|
||||
. "$f"
|
||||
done
|
||||
|
||||
exec /usr/bin/scylla $SCYLLA_ARGS $SEASTAR_IO $DEV_MODE $CPUSET $SCYLLA_DOCKER_ARGS
|
||||
|
||||
7
dist/redhat/build_rpm.sh
vendored
7
dist/redhat/build_rpm.sh
vendored
@@ -98,12 +98,19 @@ rm -f version
|
||||
|
||||
pystache dist/redhat/scylla.spec.mustache "{ \"version\": \"$SCYLLA_VERSION\", \"release\": \"$SCYLLA_RELEASE\", \"housekeeping\": $DIST }" > build/scylla.spec
|
||||
|
||||
# mock generates files owned by root, fix this up
|
||||
fix_ownership() {
|
||||
sudo chown "$(id -u):$(id -g)" -R "$@"
|
||||
}
|
||||
|
||||
if [ $JOBS -gt 0 ]; then
|
||||
RPM_JOBS_OPTS=(--define="_smp_mflags -j$JOBS")
|
||||
fi
|
||||
sudo mock --buildsrpm --root=$TARGET --resultdir=`pwd`/build/srpms --spec=build/scylla.spec --sources=build/scylla-$VERSION.tar $SRPM_OPTS "${RPM_JOBS_OPTS[@]}"
|
||||
fix_ownership build/srpms
|
||||
if [[ "$TARGET" =~ ^epel-7- ]]; then
|
||||
TARGET=scylla-$TARGET
|
||||
RPM_OPTS="$RPM_OPTS --configdir=dist/redhat/mock"
|
||||
fi
|
||||
sudo mock --rebuild --root=$TARGET --resultdir=`pwd`/build/rpms $RPM_OPTS "${RPM_JOBS_OPTS[@]}" build/srpms/scylla-$VERSION*.src.rpm
|
||||
fix_ownership build/rpms
|
||||
|
||||
6
dist/redhat/scylla.spec.mustache
vendored
6
dist/redhat/scylla.spec.mustache
vendored
@@ -56,9 +56,9 @@ License: AGPLv3
|
||||
URL: http://www.scylladb.com/
|
||||
BuildRequires: libaio-devel libstdc++-devel cryptopp-devel hwloc-devel numactl-devel libpciaccess-devel libxml2-devel zlib-devel thrift-devel yaml-cpp-devel lz4-devel snappy-devel jsoncpp-devel systemd-devel xz-devel pcre-devel elfutils-libelf-devel bzip2-devel keyutils-libs-devel xfsprogs-devel make gnutls-devel systemd-devel lksctp-tools-devel protobuf-devel protobuf-compiler systemtap-sdt-devel ninja-build cmake python ragel grep kernel-headers
|
||||
%{?fedora:BuildRequires: boost-devel antlr3-tool antlr3-C++-devel python3 gcc-c++ libasan libubsan python3-pyparsing dnf-yum python2-pystache}
|
||||
%{?rhel:BuildRequires: scylla-libstdc++73-static scylla-boost163-devel scylla-boost163-static scylla-antlr35-tool scylla-antlr35-C++-devel python34 scylla-gcc73-c++, scylla-python34-pyparsing20 yaml-cpp-static pystache python-setuptools}
|
||||
%{?rhel:BuildRequires: scylla-libstdc++73-static scylla-boost163-devel scylla-boost163-static scylla-antlr35-tool scylla-antlr35-C++-devel python36 scylla-gcc73-c++, scylla-python36-pyparsing20 yaml-cpp-static pystache python-setuptools}
|
||||
Requires: scylla-conf systemd-libs hwloc PyYAML python-urwid pciutils pyparsing python-requests curl util-linux python-setuptools pciutils python3-pyudev mdadm xfsprogs
|
||||
%{?rhel:Requires: python34 python34-PyYAML kernel >= 3.10.0-514}
|
||||
%{?rhel:Requires: python36 python36-PyYAML kernel >= 3.10.0-514}
|
||||
%{?fedora:Requires: python3 python3-PyYAML}
|
||||
Conflicts: abrt
|
||||
%ifarch x86_64
|
||||
@@ -97,7 +97,7 @@ cflags="--cflags=${defines[*]}"
|
||||
%endif
|
||||
%if 0%{?rhel}
|
||||
. /etc/profile.d/scylla.sh
|
||||
python3.4 ./configure.py %{?configure_opt} --with=scylla --with=iotune --mode=release "$cflags" --static-boost --static-yaml-cpp --compiler=/opt/scylladb/bin/g++-7.3 --python python3.4 --ldflag=-Wl,-rpath=/opt/scylladb/lib64
|
||||
python3.6 ./configure.py %{?configure_opt} --with=scylla --with=iotune --mode=release "$cflags" --static-boost --static-yaml-cpp --compiler=/opt/scylladb/bin/g++-7.3 --python python3.6 --ldflag=-Wl,-rpath=/opt/scylladb/lib64
|
||||
%endif
|
||||
ninja-build %{?_smp_mflags} build/release/scylla build/release/iotune
|
||||
|
||||
|
||||
@@ -449,9 +449,13 @@ GCC6_CONCEPT(requires requires(StopCondition stop, ConsumeMutationFragment consu
|
||||
{ consume_mf(std::move(mf)) } -> void;
|
||||
{ consume_eos() } -> future<>;
|
||||
})
|
||||
future<> consume_mutation_fragments_until(flat_mutation_reader& r, StopCondition&& stop,
|
||||
ConsumeMutationFragment&& consume_mf, ConsumeEndOfStream&& consume_eos) {
|
||||
return do_until([stop] { return stop(); }, [&r, stop, consume_mf, consume_eos] {
|
||||
future<> consume_mutation_fragments_until(
|
||||
flat_mutation_reader& r,
|
||||
StopCondition&& stop,
|
||||
ConsumeMutationFragment&& consume_mf,
|
||||
ConsumeEndOfStream&& consume_eos,
|
||||
db::timeout_clock::time_point timeout) {
|
||||
return do_until([stop] { return stop(); }, [&r, stop, consume_mf, consume_eos, timeout] {
|
||||
while (!r.is_buffer_empty()) {
|
||||
consume_mf(r.pop_mutation_fragment());
|
||||
if (stop()) {
|
||||
@@ -461,7 +465,7 @@ future<> consume_mutation_fragments_until(flat_mutation_reader& r, StopCondition
|
||||
if (r.is_end_of_stream()) {
|
||||
return consume_eos();
|
||||
}
|
||||
return r.fill_buffer();
|
||||
return r.fill_buffer(timeout);
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
@@ -129,26 +129,8 @@ public:
|
||||
update_is_normal();
|
||||
}
|
||||
|
||||
void apply_application_state(application_state key, versioned_value&& value) {
|
||||
auto&& e = _application_state[key];
|
||||
if (e.version < value.version) {
|
||||
e = std::move(value);
|
||||
}
|
||||
update_is_normal();
|
||||
}
|
||||
|
||||
void apply_application_state(application_state key, const versioned_value& value) {
|
||||
auto&& e = _application_state[key];
|
||||
if (e.version < value.version) {
|
||||
e = value;
|
||||
}
|
||||
update_is_normal();
|
||||
}
|
||||
|
||||
void apply_application_state(const endpoint_state& es) {
|
||||
for (auto&& e : es._application_state) {
|
||||
apply_application_state(e.first, e.second);
|
||||
}
|
||||
void add_application_state(const endpoint_state& es) {
|
||||
_application_state = es._application_state;
|
||||
update_is_normal();
|
||||
}
|
||||
|
||||
|
||||
@@ -923,7 +923,7 @@ void gossiper::make_random_gossip_digest(std::vector<gossip_digest>& g_digests)
|
||||
future<> gossiper::replicate(inet_address ep, const endpoint_state& es) {
|
||||
return container().invoke_on_all([ep, es, orig = engine().cpu_id(), self = shared_from_this()] (gossiper& g) {
|
||||
if (engine().cpu_id() != orig) {
|
||||
g.endpoint_state_map[ep].apply_application_state(es);
|
||||
g.endpoint_state_map[ep].add_application_state(es);
|
||||
}
|
||||
});
|
||||
}
|
||||
@@ -932,7 +932,7 @@ future<> gossiper::replicate(inet_address ep, const std::map<application_state,
|
||||
return container().invoke_on_all([ep, &src, &changed, orig = engine().cpu_id(), self = shared_from_this()] (gossiper& g) {
|
||||
if (engine().cpu_id() != orig) {
|
||||
for (auto&& key : changed) {
|
||||
g.endpoint_state_map[ep].apply_application_state(key, src.at(key));
|
||||
g.endpoint_state_map[ep].add_application_state(key, src.at(key));
|
||||
}
|
||||
}
|
||||
});
|
||||
@@ -941,7 +941,7 @@ future<> gossiper::replicate(inet_address ep, const std::map<application_state,
|
||||
future<> gossiper::replicate(inet_address ep, application_state key, const versioned_value& value) {
|
||||
return container().invoke_on_all([ep, key, &value, orig = engine().cpu_id(), self = shared_from_this()] (gossiper& g) {
|
||||
if (engine().cpu_id() != orig) {
|
||||
g.endpoint_state_map[ep].apply_application_state(key, value);
|
||||
g.endpoint_state_map[ep].add_application_state(key, value);
|
||||
}
|
||||
});
|
||||
}
|
||||
@@ -1168,11 +1168,13 @@ stdx::optional<endpoint_state> gossiper::get_endpoint_state_for_endpoint(inet_ad
|
||||
}
|
||||
}
|
||||
|
||||
void gossiper::reset_endpoint_state_map() {
|
||||
endpoint_state_map.clear();
|
||||
future<> gossiper::reset_endpoint_state_map() {
|
||||
_unreachable_endpoints.clear();
|
||||
_live_endpoints.clear();
|
||||
_live_endpoints_just_added.clear();
|
||||
return container().invoke_on_all([] (gossiper& g) {
|
||||
g.endpoint_state_map.clear();
|
||||
});
|
||||
}
|
||||
|
||||
std::unordered_map<inet_address, endpoint_state>& gms::gossiper::get_endpoint_states() {
|
||||
@@ -1655,6 +1657,7 @@ void gossiper::maybe_initialize_local_state(int generation_nbr) {
|
||||
}
|
||||
}
|
||||
|
||||
// Runs inside seastar::async context
|
||||
void gossiper::add_saved_endpoint(inet_address ep) {
|
||||
if (ep == get_broadcast_address()) {
|
||||
logger.debug("Attempt to add self as saved endpoint");
|
||||
@@ -1680,6 +1683,7 @@ void gossiper::add_saved_endpoint(inet_address ep) {
|
||||
}
|
||||
ep_state.mark_dead();
|
||||
endpoint_state_map[ep] = ep_state;
|
||||
replicate(ep, ep_state).get();
|
||||
_unreachable_endpoints[ep] = now();
|
||||
logger.trace("Adding saved endpoint {} {}", ep, ep_state.get_heart_beat_state().get_generation());
|
||||
}
|
||||
@@ -1915,6 +1919,7 @@ void gossiper::mark_as_shutdown(const inet_address& endpoint) {
|
||||
auto& ep_state = *es;
|
||||
ep_state.add_application_state(application_state::STATUS, storage_service_value_factory().shutdown(true));
|
||||
ep_state.get_heart_beat_state().force_highest_possible_version_unsafe();
|
||||
replicate(endpoint, ep_state).get();
|
||||
mark_dead(endpoint, ep_state);
|
||||
get_local_failure_detector().force_conviction(endpoint);
|
||||
}
|
||||
|
||||
@@ -418,7 +418,7 @@ public:
|
||||
stdx::optional<endpoint_state> get_endpoint_state_for_endpoint(inet_address ep) const;
|
||||
|
||||
// removes ALL endpoint states; should only be called after shadow gossip
|
||||
void reset_endpoint_state_map();
|
||||
future<> reset_endpoint_state_map();
|
||||
|
||||
std::unordered_map<inet_address, endpoint_state>& get_endpoint_states();
|
||||
|
||||
|
||||
36
imr/alloc.hh
36
imr/alloc.hh
@@ -84,6 +84,8 @@ template<typename Structure, typename CtxFactory>
|
||||
GCC6_CONCEPT(requires ContextFactory<CtxFactory>)
|
||||
class lsa_migrate_fn final : public migrate_fn_type, CtxFactory {
|
||||
public:
|
||||
using structure = Structure;
|
||||
|
||||
explicit lsa_migrate_fn(CtxFactory context_factory)
|
||||
: migrate_fn_type(1)
|
||||
, CtxFactory(std::move(context_factory))
|
||||
@@ -201,8 +203,21 @@ public:
|
||||
/// arguments are passed to `T::size_when_serialized`.
|
||||
///
|
||||
/// \return null pointer of type `uint8_t*`.
|
||||
template<typename T, typename MigrateFn, typename... Args>
|
||||
uint8_t* allocate(MigrateFn* migrate_fn, Args&&... args) noexcept {
|
||||
static_assert(std::is_same_v<typename MigrateFn::structure, T>);
|
||||
return do_allocate<T>(migrate_fn, std::forward<Args>(args)...);
|
||||
}
|
||||
|
||||
template<typename T, typename MigrateFn, typename... Args>
|
||||
auto allocate_nested(MigrateFn* migrate_fn, Args&&... args) noexcept {
|
||||
static_assert(std::is_same_v<typename MigrateFn::structure, T>);
|
||||
return do_allocate_nested<T>(migrate_fn, std::forward<Args>(args)...);
|
||||
}
|
||||
|
||||
private:
|
||||
template<typename T, typename... Args>
|
||||
uint8_t* allocate(migrate_fn_type* migrate_fn, Args&& ... args) noexcept {
|
||||
uint8_t* do_allocate(migrate_fn_type* migrate_fn, Args&&... args) noexcept {
|
||||
auto size = T::size_when_serialized(std::forward<Args>(args)...);
|
||||
_parent.request(size, migrate_fn);
|
||||
|
||||
@@ -216,7 +231,7 @@ public:
|
||||
}
|
||||
|
||||
template<typename T, typename... Args>
|
||||
auto allocate_nested(migrate_fn_type* migrate_fn, Args&& ... args) noexcept {
|
||||
auto do_allocate_nested(migrate_fn_type* migrate_fn, Args&& ... args) noexcept {
|
||||
auto n = _parent.request(0, migrate_fn);
|
||||
return T::get_sizer(continuation(_parent, n),
|
||||
std::forward<Args>(args)...);
|
||||
@@ -244,15 +259,28 @@ public:
|
||||
/// to the buffer requested in the sizing phase. Arguments are passed
|
||||
/// to `T::serialize`.
|
||||
/// \return pointer to the IMR object
|
||||
template<typename T, typename MigrateFn, typename... Args>
|
||||
uint8_t* allocate(MigrateFn* migrate_fn, Args&&... args) noexcept {
|
||||
static_assert(std::is_same_v<typename MigrateFn::structure, T>);
|
||||
return do_allocate<T>(migrate_fn, std::forward<Args>(args)...);
|
||||
}
|
||||
|
||||
template<typename T, typename MigrateFn, typename... Args>
|
||||
auto allocate_nested(MigrateFn* migrate_fn, Args&&... args) noexcept {
|
||||
static_assert(std::is_same_v<typename MigrateFn::structure, T>);
|
||||
return do_allocate_nested<T>(migrate_fn, std::forward<Args>(args)...);
|
||||
}
|
||||
|
||||
private:
|
||||
template<typename T, typename... Args>
|
||||
uint8_t* allocate(migrate_fn_type* migrate_fn, Args&& ... args) noexcept {
|
||||
uint8_t* do_allocate(migrate_fn_type* migrate_fn, Args&&... args) noexcept {
|
||||
auto ptr = _parent.next_object();
|
||||
T::serialize(ptr, std::forward<Args>(args)...);
|
||||
return ptr;
|
||||
}
|
||||
|
||||
template<typename T, typename... Args>
|
||||
auto allocate_nested(migrate_fn_type*, Args&& ... args) noexcept {
|
||||
auto do_allocate_nested(migrate_fn_type*, Args&& ... args) noexcept {
|
||||
auto ptr = _parent.next_object();
|
||||
return T::get_serializer(ptr,
|
||||
continuation(ptr),
|
||||
|
||||
25
imr/utils.hh
25
imr/utils.hh
@@ -61,8 +61,12 @@ private:
|
||||
public:
|
||||
object_context(const uint8_t*, State... state) : _state { state... } { }
|
||||
template<typename Tag, typename... Args>
|
||||
Context context_for(const uint8_t* ptr, Args&&... args) const noexcept {
|
||||
return create(ptr, std::index_sequence_for<State...>());
|
||||
auto context_for(const uint8_t* ptr, Args&&... args) const noexcept {
|
||||
if constexpr (std::is_same_v<Tag, basic_object::tags::back_pointer>) {
|
||||
return no_context_t();
|
||||
} else {
|
||||
return create(ptr, std::index_sequence_for<State...>());
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
@@ -158,13 +162,22 @@ public:
|
||||
}
|
||||
|
||||
/// Create an IMR objects
|
||||
template<typename Writer>
|
||||
template<typename Writer, typename MigrateFn>
|
||||
GCC6_CONCEPT(requires WriterAllocator<Writer, Structure>)
|
||||
static object make(Writer&& object_writer,
|
||||
allocation_strategy::migrate_fn migrate = &imr::alloc::default_lsa_migrate_fn<structure>::migrate_fn) {
|
||||
MigrateFn* migrate = &imr::alloc::default_lsa_migrate_fn<structure>::migrate_fn) {
|
||||
static_assert(std::is_same_v<typename MigrateFn::structure, structure>);
|
||||
return do_make(std::forward<Writer>(object_writer), migrate);
|
||||
}
|
||||
private:
|
||||
template<typename Writer>
|
||||
GCC6_CONCEPT(requires WriterAllocator<Writer, Structure>)
|
||||
static object do_make(Writer&& object_writer, allocation_strategy::migrate_fn migrate) {
|
||||
struct alloc_deleter {
|
||||
size_t _size;
|
||||
|
||||
void operator()(uint8_t* ptr) {
|
||||
current_allocator().free(ptr);
|
||||
current_allocator().free(ptr, _size);
|
||||
}
|
||||
};
|
||||
using alloc_unique_ptr = std::unique_ptr<uint8_t[], alloc_deleter>;
|
||||
@@ -176,7 +189,7 @@ public:
|
||||
auto& alloc = current_allocator();
|
||||
alloc::object_allocator allocator(alloc);
|
||||
auto obj_size = structure::size_when_serialized(writer, allocator.get_sizer());
|
||||
auto ptr = alloc_unique_ptr(static_cast<uint8_t*>(alloc.alloc(migrate, obj_size, 1)));
|
||||
auto ptr = alloc_unique_ptr(static_cast<uint8_t*>(alloc.alloc(migrate, obj_size, 1)), alloc_deleter { obj_size });
|
||||
allocator.allocate_all();
|
||||
structure::serialize(ptr.get(), writer, allocator.get_serializer());
|
||||
return object(ptr.release());
|
||||
|
||||
@@ -42,5 +42,5 @@ elif [ "$ID" = "fedora" ]; then
|
||||
yum install -y yaml-cpp-devel thrift-devel antlr3-tool antlr3-C++-devel jsoncpp-devel snappy-devel
|
||||
elif [ "$ID" = "centos" ]; then
|
||||
yum install -y yaml-cpp-devel thrift-devel scylla-antlr35-tool scylla-antlr35-C++-devel jsoncpp-devel snappy-devel scylla-boost163-static scylla-python34-pyparsing20 systemd-devel
|
||||
echo -e "Configure example:\n\tpython3.4 ./configure.py --enable-dpdk --mode=release --static-boost --compiler=/opt/scylladb/bin/g++-7.3 --python python3.4 --ldflag=-Wl,-rpath=/opt/scylladb/lib64 --cflags=-I/opt/scylladb/include --with-antlr3=/opt/scylladb/bin/antlr3"
|
||||
echo -e "Configure example:\n\tpython3.6 ./configure.py --enable-dpdk --mode=release --static-boost --compiler=/opt/scylladb/bin/g++-7.3 --python python3.6 --ldflag=-Wl,-rpath=/opt/scylladb/lib64 --cflags=-I/opt/scylladb/include --with-antlr3=/opt/scylladb/bin/antlr3"
|
||||
fi
|
||||
|
||||
80
json.cc
Normal file
80
json.cc
Normal file
@@ -0,0 +1,80 @@
|
||||
/*
|
||||
* Copyright (C) 2018 ScyllaDB
|
||||
*/
|
||||
|
||||
/*
|
||||
* This file is part of Scylla.
|
||||
*
|
||||
* Scylla is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Scylla is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with Scylla. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include "json.hh"
|
||||
|
||||
namespace seastar {
|
||||
namespace json {
|
||||
|
||||
static inline bool is_control_char(char c) {
|
||||
return c >= 0 && c <= 0x1F;
|
||||
}
|
||||
|
||||
static inline bool needs_escaping(const sstring& s) {
|
||||
return std::any_of(s.begin(), s.end(), [](char c) {return is_control_char(c) || c == '"' || c == '\\';});
|
||||
}
|
||||
|
||||
sstring value_to_quoted_string(const sstring& value) {
|
||||
if (!needs_escaping(value)) {
|
||||
return sprint("\"%s\"", value);
|
||||
}
|
||||
std::ostringstream oss;
|
||||
oss << std::hex << std::uppercase << std::setfill('0');
|
||||
oss.put('"');
|
||||
for (char c : value) {
|
||||
switch (c) {
|
||||
case '"':
|
||||
oss.put('\\').put('"');
|
||||
break;
|
||||
case '\\':
|
||||
oss.put('\\').put('\\');
|
||||
break;
|
||||
case '\b':
|
||||
oss.put('\\').put('b');
|
||||
break;
|
||||
case '\f':
|
||||
oss.put('\\').put('f');
|
||||
break;
|
||||
case '\n':
|
||||
oss.put('\\').put('n');
|
||||
break;
|
||||
case '\r':
|
||||
oss.put('\\').put('r');
|
||||
break;
|
||||
case '\t':
|
||||
oss.put('\\').put('t');
|
||||
break;
|
||||
default:
|
||||
if (is_control_char(c)) {
|
||||
oss.put('\\').put('u') << std::setw(4) << static_cast<int>(c);
|
||||
} else {
|
||||
oss.put(c);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
oss.put('"');
|
||||
return oss.str();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
2
json.hh
2
json.hh
@@ -95,6 +95,8 @@ inline std::map<sstring, sstring> to_map(const sstring& raw) {
|
||||
return to_map(raw, std::map<sstring, sstring>());
|
||||
}
|
||||
|
||||
sstring value_to_quoted_string(const sstring& value);
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
4
keys.hh
4
keys.hh
@@ -748,6 +748,10 @@ public:
|
||||
static const compound& get_compound_type(const schema& s) {
|
||||
return s.clustering_key_prefix_type();
|
||||
}
|
||||
|
||||
static clustering_key_prefix_view make_empty() {
|
||||
return { bytes_view() };
|
||||
}
|
||||
};
|
||||
|
||||
class clustering_key_prefix : public prefix_compound_wrapper<clustering_key_prefix, clustering_key_prefix_view, clustering_key> {
|
||||
|
||||
@@ -119,9 +119,17 @@ insert_token_range_to_sorted_container_while_unwrapping(
|
||||
const dht::token& tok,
|
||||
dht::token_range_vector& ret) {
|
||||
if (prev_tok < tok) {
|
||||
ret.emplace_back(
|
||||
dht::token_range::bound(prev_tok, false),
|
||||
dht::token_range::bound(tok, true));
|
||||
auto pos = ret.end();
|
||||
if (!ret.empty() && !std::prev(pos)->end()) {
|
||||
// We inserted a wrapped range (a, b] previously as
|
||||
// (-inf, b], (a, +inf). So now we insert in the next-to-last
|
||||
// position to keep the last range (a, +inf) at the end.
|
||||
pos = std::prev(pos);
|
||||
}
|
||||
ret.insert(pos,
|
||||
dht::token_range{
|
||||
dht::token_range::bound(prev_tok, false),
|
||||
dht::token_range::bound(tok, true)});
|
||||
} else {
|
||||
ret.emplace_back(
|
||||
dht::token_range::bound(prev_tok, false),
|
||||
|
||||
7
main.cc
7
main.cc
@@ -763,8 +763,11 @@ int main(int ac, char** av) {
|
||||
return service::get_local_storage_service().drain_on_shutdown();
|
||||
});
|
||||
|
||||
engine().at_exit([] {
|
||||
return view_builder.stop();
|
||||
engine().at_exit([cfg] {
|
||||
if (cfg->view_building()) {
|
||||
return view_builder.stop();
|
||||
}
|
||||
return make_ready_future<>();
|
||||
});
|
||||
|
||||
engine().at_exit([&db] {
|
||||
|
||||
@@ -262,11 +262,12 @@ void messaging_service::start_listen() {
|
||||
// FIXME: we don't set so.tcp_nodelay, because we can't tell at this point whether the connection will come from a
|
||||
// local or remote datacenter, and whether or not the connection will be used for gossip. We can fix
|
||||
// the first by wrapping its server_socket, but not the second.
|
||||
auto limits = rpc_resource_limits(_mcfg.rpc_memory_limit);
|
||||
if (!_server[0]) {
|
||||
auto listen = [&] (const gms::inet_address& a) {
|
||||
auto addr = ipv4_addr{a.raw_addr(), _port};
|
||||
return std::unique_ptr<rpc_protocol_server_wrapper>(new rpc_protocol_server_wrapper(*_rpc,
|
||||
so, addr, rpc_resource_limits(_mcfg.rpc_memory_limit)));
|
||||
so, addr, limits));
|
||||
};
|
||||
_server[0] = listen(_listen_address);
|
||||
if (listen_to_bc) {
|
||||
@@ -277,7 +278,7 @@ void messaging_service::start_listen() {
|
||||
if (!_server_tls[0]) {
|
||||
auto listen = [&] (const gms::inet_address& a) {
|
||||
return std::unique_ptr<rpc_protocol_server_wrapper>(
|
||||
[this, &so, &a] () -> std::unique_ptr<rpc_protocol_server_wrapper>{
|
||||
[this, &so, &a, limits] () -> std::unique_ptr<rpc_protocol_server_wrapper>{
|
||||
if (_encrypt_what == encrypt_what::none) {
|
||||
return nullptr;
|
||||
}
|
||||
@@ -285,7 +286,7 @@ void messaging_service::start_listen() {
|
||||
lo.reuse_address = true;
|
||||
auto addr = make_ipv4_address(ipv4_addr{a.raw_addr(), _ssl_port});
|
||||
return std::make_unique<rpc_protocol_server_wrapper>(*_rpc,
|
||||
so, seastar::tls::listen(_credentials, addr, lo));
|
||||
so, seastar::tls::listen(_credentials, addr, lo), limits);
|
||||
}());
|
||||
};
|
||||
_server_tls[0] = listen(_listen_address);
|
||||
|
||||
@@ -125,7 +125,7 @@ public:
|
||||
return _ck.equal(s, other._ck)
|
||||
&& _t == other._t
|
||||
&& _marker == other._marker
|
||||
&& _cells.equal(column_kind::static_column, s, other._cells, s);
|
||||
&& _cells.equal(column_kind::regular_column, s, other._cells, s);
|
||||
}
|
||||
|
||||
friend std::ostream& operator<<(std::ostream& os, const clustering_row& row);
|
||||
|
||||
@@ -144,7 +144,14 @@ mutation_partition::mutation_partition(const schema& s, const mutation_partition
|
||||
, _static_row(s, column_kind::static_column, x._static_row)
|
||||
, _static_row_continuous(x._static_row_continuous)
|
||||
, _rows()
|
||||
, _row_tombstones(x._row_tombstones) {
|
||||
, _row_tombstones(x._row_tombstones)
|
||||
#ifdef SEASTAR_DEBUG
|
||||
, _schema_version(s.version())
|
||||
#endif
|
||||
{
|
||||
#ifdef SEASTAR_DEBUG
|
||||
assert(x._schema_version == _schema_version);
|
||||
#endif
|
||||
auto cloner = [&s] (const auto& x) {
|
||||
return current_allocator().construct<rows_entry>(s, x);
|
||||
};
|
||||
@@ -157,7 +164,14 @@ mutation_partition::mutation_partition(const mutation_partition& x, const schema
|
||||
, _static_row(schema, column_kind::static_column, x._static_row)
|
||||
, _static_row_continuous(x._static_row_continuous)
|
||||
, _rows()
|
||||
, _row_tombstones(x._row_tombstones, range_tombstone_list::copy_comparator_only()) {
|
||||
, _row_tombstones(x._row_tombstones, range_tombstone_list::copy_comparator_only())
|
||||
#ifdef SEASTAR_DEBUG
|
||||
, _schema_version(schema.version())
|
||||
#endif
|
||||
{
|
||||
#ifdef SEASTAR_DEBUG
|
||||
assert(x._schema_version == _schema_version);
|
||||
#endif
|
||||
try {
|
||||
for(auto&& r : ck_ranges) {
|
||||
for (const rows_entry& e : x.range(schema, r)) {
|
||||
@@ -180,7 +194,13 @@ mutation_partition::mutation_partition(mutation_partition&& x, const schema& sch
|
||||
, _static_row_continuous(x._static_row_continuous)
|
||||
, _rows(std::move(x._rows))
|
||||
, _row_tombstones(std::move(x._row_tombstones))
|
||||
#ifdef SEASTAR_DEBUG
|
||||
, _schema_version(schema.version())
|
||||
#endif
|
||||
{
|
||||
#ifdef SEASTAR_DEBUG
|
||||
assert(x._schema_version == _schema_version);
|
||||
#endif
|
||||
{
|
||||
auto deleter = current_deleter<rows_entry>();
|
||||
auto it = _rows.begin();
|
||||
@@ -220,6 +240,7 @@ mutation_partition::operator=(mutation_partition&& x) noexcept {
|
||||
}
|
||||
|
||||
void mutation_partition::ensure_last_dummy(const schema& s) {
|
||||
check_schema(s);
|
||||
if (_rows.empty() || !_rows.rbegin()->is_last_dummy()) {
|
||||
_rows.insert_before(_rows.end(),
|
||||
*current_allocator().construct<rows_entry>(s, rows_entry::last_dummy_tag(), is_continuous::yes));
|
||||
@@ -276,11 +297,16 @@ void deletable_row::apply(const schema& s, clustering_row cr) {
|
||||
|
||||
void
|
||||
mutation_partition::apply(const schema& s, const mutation_fragment& mf) {
|
||||
check_schema(s);
|
||||
mutation_fragment_applier applier{s, *this};
|
||||
mf.visit(applier);
|
||||
}
|
||||
|
||||
void mutation_partition::apply_monotonically(const schema& s, mutation_partition&& p, cache_tracker* tracker) {
|
||||
#ifdef SEASTAR_DEBUG
|
||||
assert(s.version() == _schema_version);
|
||||
assert(p._schema_version == _schema_version);
|
||||
#endif
|
||||
_tombstone.apply(p._tombstone);
|
||||
_row_tombstones.apply_monotonically(s, std::move(p._row_tombstones));
|
||||
_static_row.apply_monotonically(s, column_kind::static_column, std::move(p._static_row));
|
||||
@@ -356,6 +382,7 @@ void mutation_partition::apply_weak(const schema& s, mutation_partition&& p) {
|
||||
|
||||
tombstone
|
||||
mutation_partition::range_tombstone_for_row(const schema& schema, const clustering_key& key) const {
|
||||
check_schema(schema);
|
||||
tombstone t = _tombstone;
|
||||
if (!_row_tombstones.empty()) {
|
||||
auto found = _row_tombstones.search_tombstone_covering(schema, key);
|
||||
@@ -366,6 +393,7 @@ mutation_partition::range_tombstone_for_row(const schema& schema, const clusteri
|
||||
|
||||
row_tombstone
|
||||
mutation_partition::tombstone_for_row(const schema& schema, const clustering_key& key) const {
|
||||
check_schema(schema);
|
||||
row_tombstone t = row_tombstone(range_tombstone_for_row(schema, key));
|
||||
|
||||
auto j = _rows.find(key, rows_entry::compare(schema));
|
||||
@@ -378,6 +406,7 @@ mutation_partition::tombstone_for_row(const schema& schema, const clustering_key
|
||||
|
||||
row_tombstone
|
||||
mutation_partition::tombstone_for_row(const schema& schema, const rows_entry& e) const {
|
||||
check_schema(schema);
|
||||
row_tombstone t = e.row().deleted_at();
|
||||
t.apply(range_tombstone_for_row(schema, e.key()));
|
||||
return t;
|
||||
@@ -385,6 +414,7 @@ mutation_partition::tombstone_for_row(const schema& schema, const rows_entry& e)
|
||||
|
||||
void
|
||||
mutation_partition::apply_row_tombstone(const schema& schema, clustering_key_prefix prefix, tombstone t) {
|
||||
check_schema(schema);
|
||||
assert(!prefix.is_full(schema));
|
||||
auto start = prefix;
|
||||
_row_tombstones.apply(schema, {std::move(start), std::move(prefix), std::move(t)});
|
||||
@@ -392,11 +422,13 @@ mutation_partition::apply_row_tombstone(const schema& schema, clustering_key_pre
|
||||
|
||||
void
|
||||
mutation_partition::apply_row_tombstone(const schema& schema, range_tombstone rt) {
|
||||
check_schema(schema);
|
||||
_row_tombstones.apply(schema, std::move(rt));
|
||||
}
|
||||
|
||||
void
|
||||
mutation_partition::apply_delete(const schema& schema, const clustering_key_prefix& prefix, tombstone t) {
|
||||
check_schema(schema);
|
||||
if (prefix.is_empty(schema)) {
|
||||
apply(t);
|
||||
} else if (prefix.is_full(schema)) {
|
||||
@@ -408,6 +440,7 @@ mutation_partition::apply_delete(const schema& schema, const clustering_key_pref
|
||||
|
||||
void
|
||||
mutation_partition::apply_delete(const schema& schema, range_tombstone rt) {
|
||||
check_schema(schema);
|
||||
if (range_tombstone::is_single_clustering_row_tombstone(schema, rt.start, rt.start_kind, rt.end, rt.end_kind)) {
|
||||
apply_delete(schema, std::move(rt.start), std::move(rt.tomb));
|
||||
return;
|
||||
@@ -417,6 +450,7 @@ mutation_partition::apply_delete(const schema& schema, range_tombstone rt) {
|
||||
|
||||
void
|
||||
mutation_partition::apply_delete(const schema& schema, clustering_key&& prefix, tombstone t) {
|
||||
check_schema(schema);
|
||||
if (prefix.is_empty(schema)) {
|
||||
apply(t);
|
||||
} else if (prefix.is_full(schema)) {
|
||||
@@ -428,6 +462,7 @@ mutation_partition::apply_delete(const schema& schema, clustering_key&& prefix,
|
||||
|
||||
void
|
||||
mutation_partition::apply_delete(const schema& schema, clustering_key_prefix_view prefix, tombstone t) {
|
||||
check_schema(schema);
|
||||
if (prefix.is_empty(schema)) {
|
||||
apply(t);
|
||||
} else if (prefix.is_full(schema)) {
|
||||
@@ -451,12 +486,14 @@ void mutation_partition::insert_row(const schema& s, const clustering_key& key,
|
||||
}
|
||||
|
||||
void mutation_partition::insert_row(const schema& s, const clustering_key& key, const deletable_row& row) {
|
||||
check_schema(s);
|
||||
auto e = current_allocator().construct<rows_entry>(s, key, row);
|
||||
_rows.insert(_rows.end(), *e, rows_entry::compare(s));
|
||||
}
|
||||
|
||||
const row*
|
||||
mutation_partition::find_row(const schema& s, const clustering_key& key) const {
|
||||
check_schema(s);
|
||||
auto i = _rows.find(key, rows_entry::compare(s));
|
||||
if (i == _rows.end()) {
|
||||
return nullptr;
|
||||
@@ -466,6 +503,7 @@ mutation_partition::find_row(const schema& s, const clustering_key& key) const {
|
||||
|
||||
deletable_row&
|
||||
mutation_partition::clustered_row(const schema& s, clustering_key&& key) {
|
||||
check_schema(s);
|
||||
auto i = _rows.find(key, rows_entry::compare(s));
|
||||
if (i == _rows.end()) {
|
||||
auto e = current_allocator().construct<rows_entry>(std::move(key));
|
||||
@@ -477,6 +515,7 @@ mutation_partition::clustered_row(const schema& s, clustering_key&& key) {
|
||||
|
||||
deletable_row&
|
||||
mutation_partition::clustered_row(const schema& s, const clustering_key& key) {
|
||||
check_schema(s);
|
||||
auto i = _rows.find(key, rows_entry::compare(s));
|
||||
if (i == _rows.end()) {
|
||||
auto e = current_allocator().construct<rows_entry>(key);
|
||||
@@ -488,6 +527,7 @@ mutation_partition::clustered_row(const schema& s, const clustering_key& key) {
|
||||
|
||||
deletable_row&
|
||||
mutation_partition::clustered_row(const schema& s, clustering_key_view key) {
|
||||
check_schema(s);
|
||||
auto i = _rows.find(key, rows_entry::compare(s));
|
||||
if (i == _rows.end()) {
|
||||
auto e = current_allocator().construct<rows_entry>(key);
|
||||
@@ -499,6 +539,7 @@ mutation_partition::clustered_row(const schema& s, clustering_key_view key) {
|
||||
|
||||
deletable_row&
|
||||
mutation_partition::clustered_row(const schema& s, position_in_partition_view pos, is_dummy dummy, is_continuous continuous) {
|
||||
check_schema(s);
|
||||
auto i = _rows.find(pos, rows_entry::compare(s));
|
||||
if (i == _rows.end()) {
|
||||
auto e = current_allocator().construct<rows_entry>(s, pos, dummy, continuous);
|
||||
@@ -510,6 +551,7 @@ mutation_partition::clustered_row(const schema& s, position_in_partition_view po
|
||||
|
||||
mutation_partition::rows_type::const_iterator
|
||||
mutation_partition::lower_bound(const schema& schema, const query::clustering_range& r) const {
|
||||
check_schema(schema);
|
||||
if (!r.start()) {
|
||||
return std::cbegin(_rows);
|
||||
}
|
||||
@@ -518,6 +560,7 @@ mutation_partition::lower_bound(const schema& schema, const query::clustering_ra
|
||||
|
||||
mutation_partition::rows_type::const_iterator
|
||||
mutation_partition::upper_bound(const schema& schema, const query::clustering_range& r) const {
|
||||
check_schema(schema);
|
||||
if (!r.end()) {
|
||||
return std::cend(_rows);
|
||||
}
|
||||
@@ -526,6 +569,7 @@ mutation_partition::upper_bound(const schema& schema, const query::clustering_ra
|
||||
|
||||
boost::iterator_range<mutation_partition::rows_type::const_iterator>
|
||||
mutation_partition::range(const schema& schema, const query::clustering_range& r) const {
|
||||
check_schema(schema);
|
||||
return boost::make_iterator_range(lower_bound(schema, r), upper_bound(schema, r));
|
||||
}
|
||||
|
||||
@@ -562,6 +606,7 @@ mutation_partition::upper_bound(const schema& schema, const query::clustering_ra
|
||||
template<typename Func>
|
||||
void mutation_partition::for_each_row(const schema& schema, const query::clustering_range& row_range, bool reversed, Func&& func) const
|
||||
{
|
||||
check_schema(schema);
|
||||
auto r = range(schema, row_range);
|
||||
if (!reversed) {
|
||||
for (const auto& e : r) {
|
||||
@@ -778,6 +823,7 @@ bool has_any_live_data(const schema& s, column_kind kind, const row& cells, tomb
|
||||
|
||||
void
|
||||
mutation_partition::query_compacted(query::result::partition_writer& pw, const schema& s, uint32_t limit) const {
|
||||
check_schema(s);
|
||||
const query::partition_slice& slice = pw.slice();
|
||||
max_timestamp max_ts{pw.last_modified()};
|
||||
|
||||
@@ -996,6 +1042,10 @@ bool mutation_partition::equal(const schema& s, const mutation_partition& p) con
|
||||
}
|
||||
|
||||
bool mutation_partition::equal(const schema& this_schema, const mutation_partition& p, const schema& p_schema) const {
|
||||
#ifdef SEASTAR_DEBUG
|
||||
assert(_schema_version == this_schema.version());
|
||||
assert(p._schema_version == p_schema.version());
|
||||
#endif
|
||||
if (_tombstone != p._tombstone) {
|
||||
return false;
|
||||
}
|
||||
@@ -1095,7 +1145,7 @@ row::apply_monotonically(const column_definition& column, atomic_cell_or_collect
|
||||
if (_type == storage_type::vector && id < max_vector_size) {
|
||||
if (id >= _storage.vector.v.size()) {
|
||||
_storage.vector.v.resize(id);
|
||||
_storage.vector.v.emplace_back(cell_and_hash{std::move(value), std::move(hash)});
|
||||
_storage.vector.v.emplace_back(std::move(value), std::move(hash));
|
||||
_storage.vector.present.set(id);
|
||||
_size++;
|
||||
} else if (auto& cell_and_hash = _storage.vector.v[id]; !bool(cell_and_hash.cell)) {
|
||||
@@ -1124,6 +1174,7 @@ row::apply_monotonically(const column_definition& column, atomic_cell_or_collect
|
||||
void
|
||||
row::append_cell(column_id id, atomic_cell_or_collection value) {
|
||||
if (_type == storage_type::vector && id < max_vector_size) {
|
||||
assert(_storage.vector.v.size() <= id);
|
||||
_storage.vector.v.resize(id);
|
||||
_storage.vector.v.emplace_back(cell_and_hash{std::move(value), cell_hash_opt()});
|
||||
_storage.vector.present.set(id);
|
||||
@@ -1162,7 +1213,7 @@ row::find_cell(column_id id) const {
|
||||
size_t row::external_memory_usage(const schema& s, column_kind kind) const {
|
||||
size_t mem = 0;
|
||||
if (_type == storage_type::vector) {
|
||||
mem += _storage.vector.v.external_memory_usage();
|
||||
mem += _storage.vector.v.used_space_external_memory_usage();
|
||||
column_id id = 0;
|
||||
for (auto&& c_a_h : _storage.vector.v) {
|
||||
auto& cdef = s.column_at(kind, id++);
|
||||
@@ -1188,6 +1239,7 @@ size_t rows_entry::memory_usage(const schema& s) const {
|
||||
}
|
||||
|
||||
size_t mutation_partition::external_memory_usage(const schema& s) const {
|
||||
check_schema(s);
|
||||
size_t sum = 0;
|
||||
sum += static_row().external_memory_usage(s, column_kind::static_column);
|
||||
for (auto& clr : clustered_rows()) {
|
||||
@@ -1206,6 +1258,7 @@ void mutation_partition::trim_rows(const schema& s,
|
||||
const std::vector<query::clustering_range>& row_ranges,
|
||||
Func&& func)
|
||||
{
|
||||
check_schema(s);
|
||||
static_assert(std::is_same<stop_iteration, std::result_of_t<Func(rows_entry&)>>::value, "Bad func signature");
|
||||
|
||||
stop_iteration stop = stop_iteration::no;
|
||||
@@ -1250,6 +1303,7 @@ uint32_t mutation_partition::do_compact(const schema& s,
|
||||
uint32_t row_limit,
|
||||
can_gc_fn& can_gc)
|
||||
{
|
||||
check_schema(s);
|
||||
assert(row_limit > 0);
|
||||
|
||||
auto gc_before = saturating_subtract(query_time, s.gc_grace_seconds());
|
||||
@@ -1315,12 +1369,14 @@ mutation_partition::compact_for_query(
|
||||
bool reverse,
|
||||
uint32_t row_limit)
|
||||
{
|
||||
check_schema(s);
|
||||
return do_compact(s, query_time, row_ranges, reverse, row_limit, always_gc);
|
||||
}
|
||||
|
||||
void mutation_partition::compact_for_compaction(const schema& s,
|
||||
can_gc_fn& can_gc, gc_clock::time_point compaction_time)
|
||||
{
|
||||
check_schema(s);
|
||||
static const std::vector<query::clustering_range> all_rows = {
|
||||
query::clustering_range::make_open_ended_both_sides()
|
||||
};
|
||||
@@ -1354,11 +1410,13 @@ row::is_live(const schema& s, column_kind kind, tombstone base_tombstone, gc_clo
|
||||
|
||||
bool
|
||||
mutation_partition::is_static_row_live(const schema& s, gc_clock::time_point query_time) const {
|
||||
check_schema(s);
|
||||
return has_any_live_data(s, column_kind::static_column, static_row(), _tombstone, query_time);
|
||||
}
|
||||
|
||||
size_t
|
||||
mutation_partition::live_row_count(const schema& s, gc_clock::time_point query_time) const {
|
||||
check_schema(s);
|
||||
size_t count = 0;
|
||||
|
||||
for (const rows_entry& e : non_dummy_rows()) {
|
||||
@@ -1704,6 +1762,7 @@ row row::difference(const schema& s, column_kind kind, const row& other) const
|
||||
|
||||
mutation_partition mutation_partition::difference(schema_ptr s, const mutation_partition& other) const
|
||||
{
|
||||
check_schema(*s);
|
||||
mutation_partition mp(s);
|
||||
if (_tombstone > other._tombstone) {
|
||||
mp.apply(_tombstone);
|
||||
@@ -1734,6 +1793,7 @@ mutation_partition mutation_partition::difference(schema_ptr s, const mutation_p
|
||||
}
|
||||
|
||||
void mutation_partition::accept(const schema& s, mutation_partition_visitor& v) const {
|
||||
check_schema(s);
|
||||
v.accept_partition_tombstone(_tombstone);
|
||||
_static_row.for_each_cell([&] (column_id id, const atomic_cell_or_collection& cell) {
|
||||
const column_definition& def = s.static_column_at(id);
|
||||
@@ -1817,9 +1877,10 @@ void mutation_querier::query_static_row(const row& r, tombstone current_tombston
|
||||
} else if (_short_reads_allowed) {
|
||||
seastar::measuring_output_stream stream;
|
||||
ser::qr_partition__static_row__cells<seastar::measuring_output_stream> out(stream, { });
|
||||
auto start = stream.size();
|
||||
get_compacted_row_slice(_schema, slice, column_kind::static_column,
|
||||
r, slice.static_columns, _static_cells_wr);
|
||||
_memory_accounter.update(stream.size());
|
||||
r, slice.static_columns, out);
|
||||
_memory_accounter.update(stream.size() - start);
|
||||
}
|
||||
if (_pw.requested_digest()) {
|
||||
max_timestamp max_ts{_pw.last_modified()};
|
||||
@@ -1880,8 +1941,9 @@ stop_iteration mutation_querier::consume(clustering_row&& cr, row_tombstone curr
|
||||
} else if (_short_reads_allowed) {
|
||||
seastar::measuring_output_stream stream;
|
||||
ser::qr_partition__rows<seastar::measuring_output_stream> out(stream, { });
|
||||
auto start = stream.size();
|
||||
write_row(out);
|
||||
stop = _memory_accounter.update_and_check(stream.size());
|
||||
stop = _memory_accounter.update_and_check(stream.size() - start);
|
||||
}
|
||||
|
||||
_live_clustering_rows++;
|
||||
@@ -2165,6 +2227,9 @@ mutation_partition::mutation_partition(mutation_partition::incomplete_tag, const
|
||||
, _static_row_continuous(!s.has_static_columns())
|
||||
, _rows()
|
||||
, _row_tombstones(s)
|
||||
#ifdef SEASTAR_DEBUG
|
||||
, _schema_version(s.version())
|
||||
#endif
|
||||
{
|
||||
_rows.insert_before(_rows.end(),
|
||||
*current_allocator().construct<rows_entry>(s, rows_entry::last_dummy_tag(), is_continuous::no));
|
||||
@@ -2196,6 +2261,7 @@ void mutation_partition::make_fully_continuous() {
|
||||
}
|
||||
|
||||
clustering_interval_set mutation_partition::get_continuity(const schema& s, is_continuous cont) const {
|
||||
check_schema(s);
|
||||
clustering_interval_set result;
|
||||
auto i = _rows.begin();
|
||||
auto prev_pos = position_in_partition::before_all_clustered_rows();
|
||||
@@ -2245,6 +2311,7 @@ stop_iteration mutation_partition::clear_gently(cache_tracker* tracker) noexcept
|
||||
|
||||
bool
|
||||
mutation_partition::check_continuity(const schema& s, const position_range& r, is_continuous cont) const {
|
||||
check_schema(s);
|
||||
auto less = rows_entry::compare(s);
|
||||
auto i = _rows.lower_bound(r.start(), less);
|
||||
auto end = _rows.lower_bound(r.end(), less);
|
||||
|
||||
@@ -74,6 +74,15 @@ using cell_hash_opt = seastar::optimized_optional<cell_hash>;
|
||||
struct cell_and_hash {
|
||||
atomic_cell_or_collection cell;
|
||||
mutable cell_hash_opt hash;
|
||||
|
||||
cell_and_hash() = default;
|
||||
cell_and_hash(cell_and_hash&&) noexcept = default;
|
||||
cell_and_hash& operator=(cell_and_hash&&) noexcept = default;
|
||||
|
||||
cell_and_hash(atomic_cell_or_collection&& cell, cell_hash_opt hash)
|
||||
: cell(std::move(cell))
|
||||
, hash(hash)
|
||||
{ }
|
||||
};
|
||||
|
||||
//
|
||||
@@ -896,6 +905,9 @@ private:
|
||||
// Contains only strict prefixes so that we don't have to lookup full keys
|
||||
// in both _row_tombstones and _rows.
|
||||
range_tombstone_list _row_tombstones;
|
||||
#ifdef SEASTAR_DEBUG
|
||||
table_schema_version _schema_version;
|
||||
#endif
|
||||
|
||||
friend class mutation_partition_applier;
|
||||
friend class converting_mutation_partition_applier;
|
||||
@@ -910,10 +922,16 @@ public:
|
||||
mutation_partition(schema_ptr s)
|
||||
: _rows()
|
||||
, _row_tombstones(*s)
|
||||
#ifdef SEASTAR_DEBUG
|
||||
, _schema_version(s->version())
|
||||
#endif
|
||||
{ }
|
||||
mutation_partition(mutation_partition& other, copy_comparators_only)
|
||||
: _rows()
|
||||
, _row_tombstones(other._row_tombstones, range_tombstone_list::copy_comparator_only())
|
||||
#ifdef SEASTAR_DEBUG
|
||||
, _schema_version(other._schema_version)
|
||||
#endif
|
||||
{ }
|
||||
mutation_partition(mutation_partition&&) = default;
|
||||
mutation_partition(const schema& s, const mutation_partition&);
|
||||
@@ -1113,6 +1131,12 @@ private:
|
||||
template<typename Func>
|
||||
void for_each_row(const schema& schema, const query::clustering_range& row_range, bool reversed, Func&& func) const;
|
||||
friend class counter_write_query_result_builder;
|
||||
|
||||
void check_schema(const schema& s) const {
|
||||
#ifdef SEASTAR_DEBUG
|
||||
assert(s.version() == _schema_version);
|
||||
#endif
|
||||
}
|
||||
};
|
||||
|
||||
inline
|
||||
|
||||
@@ -312,7 +312,7 @@ partition_version& partition_entry::add_version(const schema& s, cache_tracker*
|
||||
|
||||
void partition_entry::apply(const schema& s, const mutation_partition& mp, const schema& mp_schema)
|
||||
{
|
||||
apply(s, mutation_partition(s, mp), mp_schema);
|
||||
apply(s, mutation_partition(mp_schema, mp), mp_schema);
|
||||
}
|
||||
|
||||
void partition_entry::apply(const schema& s, mutation_partition&& mp, const schema& mp_schema)
|
||||
|
||||
@@ -38,7 +38,7 @@ class autoupdating_underlying_reader final {
|
||||
row_cache& _cache;
|
||||
read_context& _read_context;
|
||||
stdx::optional<flat_mutation_reader> _reader;
|
||||
utils::phased_barrier::phase_type _reader_creation_phase;
|
||||
utils::phased_barrier::phase_type _reader_creation_phase = 0;
|
||||
dht::partition_range _range = { };
|
||||
stdx::optional<dht::decorated_key> _last_key;
|
||||
stdx::optional<dht::decorated_key> _new_last_key;
|
||||
@@ -105,7 +105,6 @@ public:
|
||||
return make_ready_future<>();
|
||||
}
|
||||
utils::phased_barrier::phase_type creation_phase() const {
|
||||
assert(_reader);
|
||||
return _reader_creation_phase;
|
||||
}
|
||||
const dht::partition_range& range() const {
|
||||
|
||||
2
seastar
2
seastar
Submodule seastar updated: 814a0552b6...efda4281c2
@@ -54,7 +54,7 @@ static logging::logger mlogger("migration_task");
|
||||
future<> migration_task::run_may_throw(distributed<service::storage_proxy>& proxy, const gms::inet_address& endpoint)
|
||||
{
|
||||
if (!gms::get_failure_detector().local().is_alive(endpoint)) {
|
||||
mlogger.error("Can't send migration request: node {} is down.", endpoint);
|
||||
mlogger.warn("Can't send migration request: node {} is down.", endpoint);
|
||||
return make_ready_future<>();
|
||||
}
|
||||
netw::messaging_service::msg_addr id{endpoint, 0};
|
||||
|
||||
@@ -144,7 +144,11 @@ future<lowres_clock::duration> cache_hitrate_calculator::recalculate_hitrates()
|
||||
return _db.invoke_on_all([this, rates = std::move(rates), cpuid = engine().cpu_id()] (database& db) {
|
||||
sstring gstate;
|
||||
for (auto& cf : db.get_column_families() | boost::adaptors::filtered(non_system_filter)) {
|
||||
stat s = rates.at(cf.first);
|
||||
auto it = rates.find(cf.first);
|
||||
if (it == rates.end()) { // a table may be added before map/reduce compltes and this code runs
|
||||
continue;
|
||||
}
|
||||
stat s = it->second;
|
||||
float rate = 0;
|
||||
if (s.h) {
|
||||
rate = s.h / (s.h + s.m);
|
||||
|
||||
@@ -1980,18 +1980,21 @@ public:
|
||||
_timeout.arm(timeout);
|
||||
}
|
||||
virtual ~abstract_read_resolver() {};
|
||||
virtual void on_error(gms::inet_address ep) = 0;
|
||||
virtual void on_error(gms::inet_address ep, bool disconnect) = 0;
|
||||
future<> done() {
|
||||
return _done_promise.get_future();
|
||||
}
|
||||
void error(gms::inet_address ep, std::exception_ptr eptr) {
|
||||
sstring why;
|
||||
bool disconnect = false;
|
||||
try {
|
||||
std::rethrow_exception(eptr);
|
||||
} catch (rpc::closed_error&) {
|
||||
return; // do not report connection closed exception, gossiper does that
|
||||
// do not report connection closed exception, gossiper does that
|
||||
disconnect = true;
|
||||
} catch (rpc::timeout_error&) {
|
||||
return; // do not report timeouts, the whole operation will timeout and be reported
|
||||
// do not report timeouts, the whole operation will timeout and be reported
|
||||
return; // also do not report timeout as replica failure for the same reason
|
||||
} catch(std::exception& e) {
|
||||
why = e.what();
|
||||
} catch(...) {
|
||||
@@ -1999,10 +2002,12 @@ public:
|
||||
}
|
||||
|
||||
if (!_request_failed) { // request may fail only once.
|
||||
on_error(ep);
|
||||
on_error(ep, disconnect);
|
||||
}
|
||||
|
||||
slogger.error("Exception when communicating with {}: {}", ep, why);
|
||||
if (why.length()) {
|
||||
slogger.error("Exception when communicating with {}: {}", ep, why);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
@@ -2077,10 +2082,16 @@ public:
|
||||
_done_promise.set_value();
|
||||
}
|
||||
}
|
||||
void on_error(gms::inet_address ep) override {
|
||||
void on_error(gms::inet_address ep, bool disconnect) override {
|
||||
if (waiting_for(ep)) {
|
||||
_failed++;
|
||||
}
|
||||
if (disconnect && _block_for == _target_count_for_cl) {
|
||||
// if the error is because of a connection disconnect and there is no targets to speculate
|
||||
// wait for timeout in hope that the client will issue speculative read
|
||||
// FIXME: resolver should have access to all replicas and try another one in this case
|
||||
return;
|
||||
}
|
||||
if (_block_for + _failed > _target_count_for_cl) {
|
||||
fail_request(std::make_exception_ptr(read_failure_exception(_schema->ks_name(), _schema->cf_name(), _cl, _cl_responses, _failed, _block_for, _data_result)));
|
||||
}
|
||||
@@ -2406,7 +2417,7 @@ public:
|
||||
}
|
||||
}
|
||||
}
|
||||
void on_error(gms::inet_address ep) override {
|
||||
void on_error(gms::inet_address ep, bool disconnect) override {
|
||||
fail_request(std::make_exception_ptr(read_failure_exception(_schema->ks_name(), _schema->cf_name(), _cl, response_count(), 1, _targets_count, response_count() != 0)));
|
||||
}
|
||||
uint32_t max_live_count() const {
|
||||
@@ -3701,6 +3712,7 @@ future<> storage_proxy::truncate_blocking(sstring keyspace, sstring cfname) {
|
||||
std::rethrow_exception(ep);
|
||||
} catch (rpc::timeout_error& e) {
|
||||
slogger.trace("Truncation of {} timed out: {}", cfname, e.what());
|
||||
throw;
|
||||
} catch (...) {
|
||||
throw;
|
||||
}
|
||||
|
||||
@@ -303,7 +303,7 @@ void storage_service::prepare_to_join(std::vector<inet_address> loaded_endpoints
|
||||
gossiper.check_knows_remote_features(local_features, peer_features);
|
||||
}
|
||||
|
||||
gossiper.reset_endpoint_state_map();
|
||||
gossiper.reset_endpoint_state_map().get();
|
||||
for (auto ep : loaded_endpoints) {
|
||||
gossiper.add_saved_endpoint(ep);
|
||||
}
|
||||
@@ -317,7 +317,7 @@ void storage_service::prepare_to_join(std::vector<inet_address> loaded_endpoints
|
||||
slogger.info("Checking remote features with gossip");
|
||||
gossiper.do_shadow_round().get();
|
||||
gossiper.check_knows_remote_features(local_features);
|
||||
gossiper.reset_endpoint_state_map();
|
||||
gossiper.reset_endpoint_state_map().get();
|
||||
for (auto ep : loaded_endpoints) {
|
||||
gossiper.add_saved_endpoint(ep);
|
||||
}
|
||||
@@ -419,13 +419,9 @@ void storage_service::join_token_ring(int delay) {
|
||||
db::system_keyspace::set_bootstrap_state(db::system_keyspace::bootstrap_state::IN_PROGRESS).get();
|
||||
}
|
||||
set_mode(mode::JOINING, "waiting for ring information", true);
|
||||
// first sleep the delay to make sure we see all our peers
|
||||
for (int i = 0; i < delay; i += 1000) {
|
||||
// if we see schema, we can proceed to the next check directly
|
||||
if (_db.local().get_version() != database::empty_version) {
|
||||
slogger.debug("got schema: {}", _db.local().get_version());
|
||||
break;
|
||||
}
|
||||
auto& gossiper = gms::get_gossiper().local();
|
||||
// first sleep the delay to make sure we see *at least* one other node
|
||||
for (int i = 0; i < delay && gossiper.get_live_members().size() < 2; i += 1000) {
|
||||
sleep(std::chrono::seconds(1)).get();
|
||||
}
|
||||
// if our schema hasn't matched yet, keep sleeping until it does
|
||||
@@ -484,7 +480,6 @@ void storage_service::join_token_ring(int delay) {
|
||||
for (auto token : _bootstrap_tokens) {
|
||||
auto existing = _token_metadata.get_endpoint(token);
|
||||
if (existing) {
|
||||
auto& gossiper = gms::get_local_gossiper();
|
||||
auto* eps = gossiper.get_endpoint_state_for_endpoint_ptr(*existing);
|
||||
if (eps && eps->get_update_timestamp() > gms::gossiper::clk::now() - std::chrono::milliseconds(delay)) {
|
||||
throw std::runtime_error("Cannot replace a live node...");
|
||||
@@ -622,6 +617,8 @@ void storage_service::bootstrap(std::unordered_set<token> tokens) {
|
||||
db::system_keyspace::update_tokens(tokens).get();
|
||||
auto& gossiper = gms::get_local_gossiper();
|
||||
if (!db().local().is_replacing()) {
|
||||
// Wait until we know tokens of existing node before announcing join status.
|
||||
gossiper.wait_for_range_setup().get();
|
||||
// if not an existing token then bootstrap
|
||||
gossiper.add_local_application_state({
|
||||
{ gms::application_state::TOKENS, value_factory.tokens(tokens) },
|
||||
@@ -1541,7 +1538,7 @@ future<> storage_service::check_for_endpoint_collision() {
|
||||
throw std::runtime_error("Other bootstrapping/leaving/moving nodes detected, cannot bootstrap while consistent_rangemovement is true (check_for_endpoint_collision)");
|
||||
} else {
|
||||
gossiper.goto_shadow_round();
|
||||
gossiper.reset_endpoint_state_map();
|
||||
gossiper.reset_endpoint_state_map().get();
|
||||
found_bootstrapping_node = true;
|
||||
auto elapsed = std::chrono::duration_cast<std::chrono::seconds>(gms::gossiper::clk::now() - t).count();
|
||||
slogger.info("Checking bootstrapping/leaving/moving nodes: node={}, status={}, sleep 1 second and check again ({} seconds elapsed) (check_for_endpoint_collision)", addr, state, elapsed);
|
||||
@@ -1553,7 +1550,7 @@ future<> storage_service::check_for_endpoint_collision() {
|
||||
}
|
||||
} while (found_bootstrapping_node);
|
||||
slogger.info("Checking bootstrapping/leaving/moving nodes: ok (check_for_endpoint_collision)");
|
||||
gossiper.reset_endpoint_state_map();
|
||||
gossiper.reset_endpoint_state_map().get();
|
||||
});
|
||||
}
|
||||
|
||||
@@ -1603,8 +1600,9 @@ future<std::unordered_set<token>> storage_service::prepare_replacement_info() {
|
||||
auto tokens = get_tokens_for(replace_address);
|
||||
// use the replacee's host Id as our own so we receive hints, etc
|
||||
return db::system_keyspace::set_local_host_id(host_id).discard_result().then([replace_address, tokens = std::move(tokens)] {
|
||||
gms::get_local_gossiper().reset_endpoint_state_map(); // clean up since we have what we need
|
||||
return make_ready_future<std::unordered_set<token>>(std::move(tokens));
|
||||
return gms::get_local_gossiper().reset_endpoint_state_map().then([tokens = std::move(tokens)] { // clean up since we have what we need
|
||||
return make_ready_future<std::unordered_set<token>>(std::move(tokens));
|
||||
});
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
@@ -179,6 +179,8 @@ struct compaction_read_monitor_generator final : public read_monitor_generator {
|
||||
void remove_sstable(bool is_tracking) {
|
||||
if (is_tracking) {
|
||||
_cf.get_compaction_strategy().get_backlog_tracker().remove_sstable(_sst);
|
||||
} else if (_sst) {
|
||||
_cf.get_compaction_strategy().get_backlog_tracker().revert_charges(_sst);
|
||||
}
|
||||
_sst = {};
|
||||
}
|
||||
@@ -303,6 +305,7 @@ public:
|
||||
class compaction {
|
||||
protected:
|
||||
column_family& _cf;
|
||||
schema_ptr _schema;
|
||||
std::vector<shared_sstable> _sstables;
|
||||
uint64_t _max_sstable_size;
|
||||
uint32_t _sstable_level;
|
||||
@@ -313,6 +316,7 @@ protected:
|
||||
protected:
|
||||
compaction(column_family& cf, std::vector<shared_sstable> sstables, uint64_t max_sstable_size, uint32_t sstable_level)
|
||||
: _cf(cf)
|
||||
, _schema(cf.schema())
|
||||
, _sstables(std::move(sstables))
|
||||
, _max_sstable_size(max_sstable_size)
|
||||
, _sstable_level(sstable_level)
|
||||
@@ -361,10 +365,9 @@ private:
|
||||
virtual flat_mutation_reader make_sstable_reader(lw_shared_ptr<sstables::sstable_set> ssts) const = 0;
|
||||
|
||||
flat_mutation_reader setup() {
|
||||
auto ssts = make_lw_shared<sstables::sstable_set>(_cf.get_compaction_strategy().make_sstable_set(_cf.schema()));
|
||||
auto schema = _cf.schema();
|
||||
auto ssts = make_lw_shared<sstables::sstable_set>(_cf.get_compaction_strategy().make_sstable_set(_schema));
|
||||
sstring formatted_msg = "[";
|
||||
auto fully_expired = get_fully_expired_sstables(_cf, _sstables, gc_clock::now() - schema->gc_grace_seconds());
|
||||
auto fully_expired = get_fully_expired_sstables(_cf, _sstables, gc_clock::now() - _schema->gc_grace_seconds());
|
||||
|
||||
for (auto& sst : _sstables) {
|
||||
// Compacted sstable keeps track of its ancestors.
|
||||
@@ -396,8 +399,8 @@ private:
|
||||
}
|
||||
formatted_msg += "]";
|
||||
_info->sstables = _sstables.size();
|
||||
_info->ks = schema->ks_name();
|
||||
_info->cf = schema->cf_name();
|
||||
_info->ks = _schema->ks_name();
|
||||
_info->cf = _schema->cf_name();
|
||||
report_start(formatted_msg);
|
||||
|
||||
return make_sstable_reader(std::move(ssts));
|
||||
@@ -462,7 +465,7 @@ private:
|
||||
}
|
||||
|
||||
const schema_ptr& schema() const {
|
||||
return _cf.schema();
|
||||
return _schema;
|
||||
}
|
||||
public:
|
||||
static future<compaction_info> run(std::unique_ptr<compaction> c);
|
||||
@@ -518,10 +521,10 @@ public:
|
||||
}
|
||||
|
||||
flat_mutation_reader make_sstable_reader(lw_shared_ptr<sstables::sstable_set> ssts) const override {
|
||||
return ::make_local_shard_sstable_reader(_cf.schema(),
|
||||
return ::make_local_shard_sstable_reader(_schema,
|
||||
std::move(ssts),
|
||||
query::full_partition_range,
|
||||
_cf.schema()->full_slice(),
|
||||
_schema->full_slice(),
|
||||
service::get_local_compaction_priority(),
|
||||
no_resource_tracking(),
|
||||
nullptr,
|
||||
@@ -570,7 +573,7 @@ public:
|
||||
cfg.monitor = &_active_write_monitors.back();
|
||||
cfg.large_partition_handler = _cf.get_large_partition_handler();
|
||||
// TODO: calculate encoding_stats based on statistics of compacted sstables
|
||||
_writer.emplace(_sst->get_writer(*_cf.schema(), partitions_per_sstable(), cfg, encoding_stats{}, priority));
|
||||
_writer.emplace(_sst->get_writer(*_schema, partitions_per_sstable(), cfg, encoding_stats{}, priority));
|
||||
}
|
||||
return &*_writer;
|
||||
}
|
||||
@@ -610,7 +613,7 @@ public:
|
||||
}
|
||||
|
||||
std::function<bool(const dht::decorated_key&)> filter_func() const override {
|
||||
dht::token_range_vector owned_ranges = service::get_local_storage_service().get_local_ranges(_cf.schema()->ks_name());
|
||||
dht::token_range_vector owned_ranges = service::get_local_storage_service().get_local_ranges(_schema->ks_name());
|
||||
|
||||
return [this, owned_ranges = std::move(owned_ranges)] (const dht::decorated_key& dk) {
|
||||
if (dht::shard_of(dk.token()) != engine().cpu_id()) {
|
||||
@@ -684,10 +687,10 @@ public:
|
||||
|
||||
// Use reader that makes sure no non-local mutation will not be filtered out.
|
||||
flat_mutation_reader make_sstable_reader(lw_shared_ptr<sstables::sstable_set> ssts) const override {
|
||||
return ::make_range_sstable_reader(_cf.schema(),
|
||||
return ::make_range_sstable_reader(_schema,
|
||||
std::move(ssts),
|
||||
query::full_partition_range,
|
||||
_cf.schema()->full_slice(),
|
||||
_schema->full_slice(),
|
||||
service::get_local_compaction_priority(),
|
||||
no_resource_tracking(),
|
||||
nullptr,
|
||||
@@ -719,7 +722,7 @@ public:
|
||||
cfg.large_partition_handler = _cf.get_large_partition_handler();
|
||||
auto&& priority = service::get_local_compaction_priority();
|
||||
// TODO: calculate encoding_stats based on statistics of compacted sstables
|
||||
writer.emplace(sst->get_writer(*_cf.schema(), partitions_per_sstable(_shard), cfg, encoding_stats{}, priority, _shard));
|
||||
writer.emplace(sst->get_writer(*_schema, partitions_per_sstable(_shard), cfg, encoding_stats{}, priority, _shard));
|
||||
}
|
||||
return &*writer;
|
||||
}
|
||||
|
||||
@@ -66,6 +66,14 @@ public:
|
||||
_cm->deregister_compacting_sstables(_compacting);
|
||||
}
|
||||
}
|
||||
|
||||
// Explicitly release compacting sstables
|
||||
void release_compacting(const std::vector<sstables::shared_sstable>& sstables) {
|
||||
_cm->deregister_compacting_sstables(sstables);
|
||||
for (auto& sst : sstables) {
|
||||
_compacting.erase(boost::remove(_compacting, sst), _compacting.end());
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
compaction_weight_registration::compaction_weight_registration(compaction_manager* cm, int weight)
|
||||
@@ -564,17 +572,23 @@ future<> compaction_manager::perform_cleanup(column_family* cf) {
|
||||
return make_ready_future<stop_iteration>(stop_iteration::yes);
|
||||
}
|
||||
column_family& cf = *task->compacting_cf;
|
||||
sstables::compaction_descriptor descriptor = sstables::compaction_descriptor(get_candidates(cf));
|
||||
auto compacting = compacting_sstable_registration(this, descriptor.sstables);
|
||||
auto sstables = get_candidates(cf);
|
||||
auto compacting = make_lw_shared<compacting_sstable_registration>(this, sstables);
|
||||
|
||||
_stats.pending_tasks--;
|
||||
_stats.active_tasks++;
|
||||
compaction_backlog_tracker user_initiated(std::make_unique<user_initiated_backlog_tracker>(_compaction_controller.backlog_of_shares(200), _available_memory));
|
||||
return do_with(std::move(user_initiated), [this, &cf, descriptor = std::move(descriptor)] (compaction_backlog_tracker& bt) mutable {
|
||||
return with_scheduling_group(_scheduling_group, [this, &cf, descriptor = std::move(descriptor)] () mutable {
|
||||
return cf.cleanup_sstables(std::move(descriptor));
|
||||
return do_with(std::move(user_initiated), std::move(sstables), [this, &cf, compacting] (compaction_backlog_tracker& bt,
|
||||
std::vector<sstables::shared_sstable>& sstables) mutable {
|
||||
return with_scheduling_group(_scheduling_group, [this, &cf, &sstables, compacting] () mutable {
|
||||
return do_for_each(sstables, [this, &cf, compacting] (auto& sst) {
|
||||
return cf.cleanup_sstables(sstables::compaction_descriptor({sst})).then([&sst, compacting] {
|
||||
// Releases reference to cleaned sstable such that respective used disk space can be freed.
|
||||
compacting->release_compacting({std::move(sst)});
|
||||
});
|
||||
});
|
||||
});
|
||||
}).then_wrapped([this, task, compacting = std::move(compacting)] (future<> f) mutable {
|
||||
}).then_wrapped([this, task, compacting] (future<> f) mutable {
|
||||
_stats.active_tasks--;
|
||||
if (!can_proceed(task)) {
|
||||
maybe_stop_on_error(std::move(f));
|
||||
|
||||
@@ -404,11 +404,6 @@ public:
|
||||
auto itw = writes_per_window.find(bound);
|
||||
if (itw != writes_per_window.end()) {
|
||||
ow_this_window = &itw->second;
|
||||
// We will erase here so we can keep track of which
|
||||
// writes belong to existing windows. Writes that don't belong to any window
|
||||
// are writes in progress to new windows and will be accounted in the final
|
||||
// loop before we return
|
||||
writes_per_window.erase(itw);
|
||||
}
|
||||
auto* oc_this_window = &no_oc;
|
||||
auto itc = compactions_per_window.find(bound);
|
||||
@@ -416,6 +411,13 @@ public:
|
||||
oc_this_window = &itc->second;
|
||||
}
|
||||
b += windows.second.backlog(*ow_this_window, *oc_this_window);
|
||||
if (itw != writes_per_window.end()) {
|
||||
// We will erase here so we can keep track of which
|
||||
// writes belong to existing windows. Writes that don't belong to any window
|
||||
// are writes in progress to new windows and will be accounted in the final
|
||||
// loop before we return
|
||||
writes_per_window.erase(itw);
|
||||
}
|
||||
}
|
||||
|
||||
// Partial writes that don't belong to any window are accounted here.
|
||||
|
||||
@@ -53,7 +53,8 @@ atomic_cell make_counter_cell(api::timestamp_type timestamp, bytes_view value) {
|
||||
throw marshal_exception("encountered a local shard in a counter cell");
|
||||
}
|
||||
}
|
||||
auto shard_count = value.size() / shard_size;
|
||||
auto header_length = (size_t(header_size) + 1) * sizeof(int16_t);
|
||||
auto shard_count = (value.size() - header_length) / shard_size;
|
||||
if (shard_count != size_t(header_size)) {
|
||||
throw marshal_exception("encountered remote shards in a counter cell");
|
||||
}
|
||||
|
||||
@@ -695,9 +695,12 @@ public:
|
||||
// Sets streamed_mutation::_end_of_range when there are no more fragments for the query range.
|
||||
// Returns information whether the parser should continue to parse more
|
||||
// input and produce more fragments or we have collected enough and should yield.
|
||||
// Returns proceed:yes only when all pending fragments have been pushed.
|
||||
proceed push_ready_fragments() {
|
||||
if (_ready) {
|
||||
return push_ready_fragments_with_ready_set();
|
||||
if (push_ready_fragments_with_ready_set() == proceed::no) {
|
||||
return proceed::no;
|
||||
}
|
||||
}
|
||||
|
||||
if (_out_of_range) {
|
||||
|
||||
@@ -787,6 +787,11 @@ future<> parse(sstable_version_types v, random_access_reader& in, utils::estimat
|
||||
if (length == 0) {
|
||||
throw malformed_sstable_exception("Estimated histogram with zero size found. Can't continue!");
|
||||
}
|
||||
|
||||
// Arrays are potentially pre-initialized by the estimated_histogram constructor.
|
||||
eh.bucket_offsets.clear();
|
||||
eh.buckets.clear();
|
||||
|
||||
eh.bucket_offsets.reserve(length - 1);
|
||||
eh.buckets.reserve(length);
|
||||
|
||||
|
||||
@@ -292,7 +292,7 @@ void stream_manager::on_restart(inet_address endpoint, endpoint_state ep_state)
|
||||
}
|
||||
|
||||
void stream_manager::on_dead(inet_address endpoint, endpoint_state ep_state) {
|
||||
if (has_peer(endpoint) && ep_state.is_shutdown()) {
|
||||
if (has_peer(endpoint)) {
|
||||
sslog.info("stream_manager: Close all stream_session with peer = {} in on_dead", endpoint);
|
||||
get_stream_manager().invoke_on_all([endpoint] (auto& sm) {
|
||||
sm.fail_sessions(endpoint);
|
||||
|
||||
@@ -509,8 +509,7 @@ void stream_session::close_session(stream_session_state final_state) {
|
||||
_stream_result->handle_session_complete(shared_from_this());
|
||||
}
|
||||
|
||||
sslog.debug("[Stream #{}] close_session session={}, state={}, cancel keep_alive timer", plan_id(), this, final_state);
|
||||
_keep_alive.cancel();
|
||||
sslog.debug("[Stream #{}] close_session session={}, state={}", plan_id(), this, final_state);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -537,41 +536,6 @@ bool stream_session::is_initialized() const {
|
||||
|
||||
void stream_session::init(shared_ptr<stream_result_future> stream_result_) {
|
||||
_stream_result = stream_result_;
|
||||
_keep_alive.set_callback([this] {
|
||||
auto plan_id = this->plan_id();
|
||||
auto peer = this->peer;
|
||||
get_local_stream_manager().get_progress_on_all_shards(plan_id, peer).then([this, peer, plan_id] (stream_bytes sbytes) {
|
||||
if (this->_is_aborted) {
|
||||
sslog.info("[Stream #{}] The session {} is closed, keep alive timer will do nothing", plan_id, this);
|
||||
return;
|
||||
}
|
||||
auto now = lowres_clock::now();
|
||||
sslog.debug("[Stream #{}] keep alive timer callback sbytes old: tx={}, rx={} new: tx={} rx={}",
|
||||
plan_id, this->_last_stream_bytes.bytes_sent, this->_last_stream_bytes.bytes_received,
|
||||
sbytes.bytes_sent, sbytes.bytes_received);
|
||||
if (sbytes.bytes_sent > this->_last_stream_bytes.bytes_sent ||
|
||||
sbytes.bytes_received > this->_last_stream_bytes.bytes_received) {
|
||||
sslog.debug("[Stream #{}] The session {} made progress with peer {}", plan_id, this, peer);
|
||||
// Progress has been made
|
||||
this->_last_stream_bytes = sbytes;
|
||||
this->_last_stream_progress = now;
|
||||
this->start_keep_alive_timer();
|
||||
} else if (now - this->_last_stream_progress >= this->_keep_alive_timeout) {
|
||||
// Timeout
|
||||
sslog.info("[Stream #{}] The session {} is idle for {} seconds, the peer {} is probably gone, close it",
|
||||
plan_id, this, this->_keep_alive_timeout.count(), peer);
|
||||
this->on_error();
|
||||
} else {
|
||||
// Start the timer to check again
|
||||
sslog.info("[Stream #{}] The session {} made no progress with peer {}", plan_id, this, peer);
|
||||
this->start_keep_alive_timer();
|
||||
}
|
||||
}).handle_exception([plan_id, peer, session = this->shared_from_this()] (auto ep) {
|
||||
sslog.info("[Stream #{}] keep alive timer callback fails with peer {}: {}", plan_id, peer, ep);
|
||||
});
|
||||
});
|
||||
_last_stream_progress = lowres_clock::now();
|
||||
start_keep_alive_timer();
|
||||
}
|
||||
|
||||
utils::UUID stream_session::plan_id() {
|
||||
|
||||
@@ -175,20 +175,8 @@ private:
|
||||
bool _complete_sent = false;
|
||||
bool _received_failed_complete_message = false;
|
||||
|
||||
// If the session is idle for 10 minutes, close the session
|
||||
std::chrono::seconds _keep_alive_timeout{60 * 10};
|
||||
// Check every 1 minutes
|
||||
std::chrono::seconds _keep_alive_interval{60};
|
||||
timer<lowres_clock> _keep_alive;
|
||||
stream_bytes _last_stream_bytes;
|
||||
lowres_clock::time_point _last_stream_progress;
|
||||
|
||||
session_info _session_info;
|
||||
public:
|
||||
void start_keep_alive_timer() {
|
||||
_keep_alive.rearm(lowres_clock::now() + _keep_alive_interval);
|
||||
}
|
||||
|
||||
void add_bytes_sent(int64_t bytes) {
|
||||
_bytes_sent += bytes;
|
||||
}
|
||||
|
||||
@@ -161,7 +161,6 @@ future<> stream_transfer_task::execute() {
|
||||
});
|
||||
}).then([this, id, plan_id, cf_id] {
|
||||
sslog.debug("[Stream #{}] GOT STREAM_MUTATION_DONE Reply from {}", plan_id, id.addr);
|
||||
session->start_keep_alive_timer();
|
||||
}).handle_exception([this, plan_id, id] (auto ep){
|
||||
sslog.warn("[Stream #{}] stream_transfer_task: Fail to send to {}: {}", plan_id, id, ep);
|
||||
std::rethrow_exception(ep);
|
||||
|
||||
2
test.py
2
test.py
@@ -235,7 +235,7 @@ if __name__ == "__main__":
|
||||
mode = 'debug'
|
||||
xmlout = (args.jenkins + "." + mode + "." +
|
||||
os.path.basename(path.split()[0]) + ".boost.xml")
|
||||
boost_args += ['--report_level=no', '--logger=XML,test_suite,' + xmlout]
|
||||
boost_args += ['--report_level=no', '--logger=HRF,test_suite:XML,test_suite,' + xmlout]
|
||||
if type == 'boost':
|
||||
boost_args += ['--']
|
||||
def report_error(out, report_subcause):
|
||||
|
||||
@@ -215,3 +215,22 @@ SEASTAR_TEST_CASE(test_aggregate_count) {
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
SEASTAR_TEST_CASE(test_reverse_type_aggregation) {
|
||||
return do_with_cql_env_thread([&] (auto& e) {
|
||||
e.execute_cql("CREATE TABLE test(p int, c timestamp, v int, primary key (p, c)) with clustering order by (c desc)").get();
|
||||
e.execute_cql("INSERT INTO test(p, c, v) VALUES (1, 1, 1)").get();
|
||||
e.execute_cql("INSERT INTO test(p, c, v) VALUES (1, 2, 1)").get();
|
||||
|
||||
{
|
||||
auto tp = db_clock::from_time_t({ 0 }) + std::chrono::milliseconds(1);
|
||||
auto msg = e.execute_cql("SELECT min(c) FROM test").get0();
|
||||
assert_that(msg).is_rows().with_size(1).with_row({{timestamp_type->decompose(tp)}});
|
||||
}
|
||||
{
|
||||
auto tp = db_clock::from_time_t({ 0 }) + std::chrono::milliseconds(2);
|
||||
auto msg = e.execute_cql("SELECT max(c) FROM test").get0();
|
||||
assert_that(msg).is_rows().with_size(1).with_row({{timestamp_type->decompose(tp)}});
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
@@ -2076,10 +2076,9 @@ SEASTAR_TEST_CASE(test_in_restriction) {
|
||||
assert_that(msg).is_rows().with_size(0);
|
||||
return e.execute_cql("select r1 from tir where p1 in (2, 0, 2, 1);");
|
||||
}).then([&e] (shared_ptr<cql_transport::messages::result_message> msg) {
|
||||
assert_that(msg).is_rows().with_rows({
|
||||
assert_that(msg).is_rows().with_rows_ignore_order({
|
||||
{int32_type->decompose(4)},
|
||||
{int32_type->decompose(0)},
|
||||
{int32_type->decompose(4)},
|
||||
{int32_type->decompose(1)},
|
||||
{int32_type->decompose(2)},
|
||||
{int32_type->decompose(3)},
|
||||
@@ -2101,6 +2100,58 @@ SEASTAR_TEST_CASE(test_in_restriction) {
|
||||
{int32_type->decompose(2)},
|
||||
{int32_type->decompose(1)},
|
||||
});
|
||||
return e.prepare("select r1 from tir where p1 in ?");
|
||||
}).then([&e] (cql3::prepared_cache_key_type prepared_id){
|
||||
auto my_list_type = list_type_impl::get_instance(int32_type, true);
|
||||
std::vector<cql3::raw_value> raw_values;
|
||||
auto in_values_list = my_list_type->decompose(make_list_value(my_list_type,
|
||||
list_type_impl::native_type{{int(2), int(0), int(2), int(1)}}));
|
||||
raw_values.emplace_back(cql3::raw_value::make_value(in_values_list));
|
||||
return e.execute_prepared(prepared_id,raw_values);
|
||||
}).then([&e] (shared_ptr<cql_transport::messages::result_message> msg) {
|
||||
assert_that(msg).is_rows().with_rows_ignore_order({
|
||||
{int32_type->decompose(4)},
|
||||
{int32_type->decompose(0)},
|
||||
{int32_type->decompose(1)},
|
||||
{int32_type->decompose(2)},
|
||||
{int32_type->decompose(3)},
|
||||
});
|
||||
}).then([&e]{
|
||||
return e.execute_cql("create table tir2 (p1 int, c1 int, r1 int, PRIMARY KEY (p1, c1,r1));").discard_result();
|
||||
}).then([&e] {
|
||||
e.require_table_exists("ks", "tir2");
|
||||
return e.execute_cql("insert into tir2 (p1, c1, r1) values (0, 0, 0);").discard_result();
|
||||
}).then([&e] {
|
||||
return e.execute_cql("insert into tir2 (p1, c1, r1) values (1, 0, 1);").discard_result();
|
||||
}).then([&e] {
|
||||
return e.execute_cql("insert into tir2 (p1, c1, r1) values (1, 1, 2);").discard_result();
|
||||
}).then([&e] {
|
||||
return e.execute_cql("insert into tir2 (p1, c1, r1) values (1, 2, 3);").discard_result();
|
||||
}).then([&e] {
|
||||
return e.execute_cql("insert into tir2 (p1, c1, r1) values (2, 3, 4);").discard_result();
|
||||
}).then([&e]{
|
||||
return e.execute_cql("select r1 from tir2 where (c1,r1) in ((0, 1),(1,2),(0,1),(1,2),(3,3)) ALLOW FILTERING;");
|
||||
}).then([&e] (shared_ptr<cql_transport::messages::result_message> msg) {
|
||||
assert_that(msg).is_rows().with_rows_ignore_order({
|
||||
{int32_type->decompose(1)},
|
||||
{int32_type->decompose(2)},
|
||||
});
|
||||
return e.prepare("select r1 from tir where p1 in ?");
|
||||
}).then([&e] (cql3::prepared_cache_key_type prepared_id){
|
||||
auto my_list_type = list_type_impl::get_instance(int32_type, true);
|
||||
std::vector<cql3::raw_value> raw_values;
|
||||
auto in_values_list = my_list_type->decompose(make_list_value(my_list_type,
|
||||
list_type_impl::native_type{{int(2), int(0), int(2), int(1)}}));
|
||||
raw_values.emplace_back(cql3::raw_value::make_value(in_values_list));
|
||||
return e.execute_prepared(prepared_id,raw_values);
|
||||
}).then([&e] (shared_ptr<cql_transport::messages::result_message> msg) {
|
||||
assert_that(msg).is_rows().with_rows_ignore_order({
|
||||
{int32_type->decompose(4)},
|
||||
{int32_type->decompose(0)},
|
||||
{int32_type->decompose(1)},
|
||||
{int32_type->decompose(2)},
|
||||
{int32_type->decompose(3)},
|
||||
});
|
||||
});
|
||||
});
|
||||
}
|
||||
@@ -2613,6 +2664,7 @@ SEASTAR_TEST_CASE(test_select_json_types) {
|
||||
" r date,"
|
||||
" s time,"
|
||||
" u duration,"
|
||||
" w int,"
|
||||
");").get();
|
||||
|
||||
e.require_table_exists("ks", "all_types").get();
|
||||
@@ -2640,7 +2692,7 @@ SEASTAR_TEST_CASE(test_select_json_types) {
|
||||
" 1y2mo3w4d5h6m7s8ms9us10ns"
|
||||
");").get();
|
||||
|
||||
auto msg = e.execute_cql("SELECT JSON a, b, c, d, e, f, \"G\", \"H\", \"I\", j, k, l, m, n, o, p, q, r, s, u, unixtimestampof(k) FROM all_types WHERE a = 'ascii'").get0();
|
||||
auto msg = e.execute_cql("SELECT JSON a, b, c, d, e, f, \"G\", \"H\", \"I\", j, k, l, m, n, o, p, q, r, s, u, w, unixtimestampof(k) FROM all_types WHERE a = 'ascii'").get0();
|
||||
assert_that(msg).is_rows().with_rows({
|
||||
{
|
||||
utf8_type->decompose(
|
||||
@@ -2664,6 +2716,7 @@ SEASTAR_TEST_CASE(test_select_json_types) {
|
||||
"\"r\": \"1970-01-02\", "
|
||||
"\"s\": 00:00:00.000000001, "
|
||||
"\"u\": \"1y2mo25d5h6m7s8ms9us10ns\", "
|
||||
"\"w\": null, "
|
||||
"\"unixtimestampof(k)\": 1261009589805}"
|
||||
)
|
||||
}
|
||||
@@ -2671,7 +2724,7 @@ SEASTAR_TEST_CASE(test_select_json_types) {
|
||||
|
||||
msg = e.execute_cql("SELECT toJson(a), toJson(b), toJson(c), toJson(d), toJson(e), toJson(f),"
|
||||
"toJson(\"G\"), toJson(\"H\"), toJson(\"I\"), toJson(j), toJson(k), toJson(l), toJson(m), toJson(n),"
|
||||
"toJson(o), toJson(p), toJson(q), toJson(r), toJson(s), toJson(u),"
|
||||
"toJson(o), toJson(p), toJson(q), toJson(r), toJson(s), toJson(u), toJson(w),"
|
||||
"toJson(unixtimestampof(k)), toJson(toJson(toJson(p))) FROM all_types WHERE a = 'ascii'").get0();
|
||||
assert_that(msg).is_rows().with_rows({
|
||||
{
|
||||
@@ -2695,6 +2748,7 @@ SEASTAR_TEST_CASE(test_select_json_types) {
|
||||
utf8_type->decompose("\"1970-01-02\""),
|
||||
utf8_type->decompose("00:00:00.000000001"),
|
||||
utf8_type->decompose("\"1y2mo25d5h6m7s8ms9us10ns\""),
|
||||
utf8_type->decompose("null"),
|
||||
utf8_type->decompose("1261009589805"),
|
||||
utf8_type->decompose("\"\\\"3\\\"\"")
|
||||
}
|
||||
@@ -2783,7 +2837,7 @@ SEASTAR_TEST_CASE(test_insert_json_types) {
|
||||
|
||||
e.require_table_exists("ks", "all_types").get();
|
||||
e.execute_cql(
|
||||
"INSERT INTO all_types (a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r, s, u) JSON '"
|
||||
"INSERT INTO all_types JSON '"
|
||||
"{\"a\": \"ascii\", "
|
||||
"\"b\": 123456789, "
|
||||
"\"c\": \"0xdeadbeef\", "
|
||||
@@ -2838,13 +2892,41 @@ SEASTAR_TEST_CASE(test_insert_json_types) {
|
||||
|
||||
e.execute_cql("UPDATE all_types SET b = fromJson('42') WHERE a = fromJson('\"ascii\"');").get();
|
||||
e.execute_cql("UPDATE all_types SET \"I\" = fromJson('\"zażółć gęślą jaźń\"') WHERE a = fromJson('\"ascii\"');").get();
|
||||
e.execute_cql("UPDATE all_types SET n = fromJson('\"2147483648\"') WHERE a = fromJson('\"ascii\"');").get();
|
||||
e.execute_cql("UPDATE all_types SET o = fromJson('\"3.45\"') WHERE a = fromJson('\"ascii\"');").get();
|
||||
|
||||
msg = e.execute_cql("SELECT a, b, \"I\" FROM all_types WHERE a = 'ascii'").get0();
|
||||
msg = e.execute_cql("SELECT a, b, \"I\", n, o FROM all_types WHERE a = 'ascii'").get0();
|
||||
assert_that(msg).is_rows().with_rows({
|
||||
{
|
||||
ascii_type->decompose(sstring("ascii")),
|
||||
long_type->decompose(42l),
|
||||
utf8_type->decompose(sstring("zażółć gęślą jaźń")),
|
||||
varint_type->decompose(boost::multiprecision::cpp_int(2147483648)),
|
||||
decimal_type->decompose(big_decimal { 2, boost::multiprecision::cpp_int(345) }),
|
||||
}
|
||||
});
|
||||
|
||||
e.execute_cql("CREATE TABLE multi_column_pk_table (p1 int, p2 int, p3 int, c1 int, c2 int, v int, PRIMARY KEY((p1, p2, p3), c1, c2));").get();
|
||||
e.require_table_exists("ks", "multi_column_pk_table").get();
|
||||
|
||||
e.execute_cql("INSERT INTO multi_column_pk_table JSON '"
|
||||
"{\"p1\": 1, "
|
||||
"\"p2\": 2, "
|
||||
"\"p3\": 3, "
|
||||
"\"c1\": 4, "
|
||||
"\"c2\": 5, "
|
||||
"\"v\": 6 "
|
||||
"}'").get();
|
||||
|
||||
msg = e.execute_cql("SELECT * FROM multi_column_pk_table").get0();
|
||||
assert_that(msg).is_rows().with_rows({
|
||||
{
|
||||
int32_type->decompose(1),
|
||||
int32_type->decompose(2),
|
||||
int32_type->decompose(3),
|
||||
int32_type->decompose(4),
|
||||
int32_type->decompose(5),
|
||||
int32_type->decompose(6)
|
||||
}
|
||||
});
|
||||
});
|
||||
@@ -2863,7 +2945,7 @@ SEASTAR_TEST_CASE(test_insert_json_collections) {
|
||||
e.require_table_exists("ks", "collections").get();
|
||||
|
||||
e.execute_cql(
|
||||
"INSERT INTO collections (a, b, c, d) JSON '"
|
||||
"INSERT INTO collections JSON '"
|
||||
"{\"a\": \"key\", "
|
||||
"\"b\": {\"1\": \"abc\", \"2\": \"!\", \"3\": \"de\"}, "
|
||||
"\"c\": [0, 1.125, 2.25, 4.5], "
|
||||
@@ -2898,10 +2980,10 @@ SEASTAR_TEST_CASE(test_prepared_json) {
|
||||
|
||||
cql3::prepared_cache_key_type prepared_id = e.prepare(
|
||||
"begin batch \n"
|
||||
" insert into json_data (k, v) json :named_bound0; \n"
|
||||
" insert into json_data (k, v) json ?; \n"
|
||||
" insert into json_data (k, v) json :named_bound1; \n"
|
||||
" insert into json_data (k, v) json ?; \n"
|
||||
" insert into json_data json :named_bound0; \n"
|
||||
" insert into json_data json ?; \n"
|
||||
" insert into json_data json :named_bound1; \n"
|
||||
" insert into json_data json ?; \n"
|
||||
"apply batch;").get0();
|
||||
|
||||
std::vector<cql3::raw_value> raw_values;
|
||||
@@ -3008,3 +3090,66 @@ SEASTAR_TEST_CASE(test_empty_partition_range_scan) {
|
||||
assert_that(res).is_rows().is_empty();
|
||||
});
|
||||
}
|
||||
|
||||
SEASTAR_TEST_CASE(test_static_multi_cell_static_lists_with_ckey) {
|
||||
return do_with_cql_env_thread([] (cql_test_env& e) {
|
||||
e.execute_cql("CREATE TABLE t (p int, c int, slist list<int> static, v int, PRIMARY KEY (p, c));").get();
|
||||
e.execute_cql("INSERT INTO t (p, c, slist, v) VALUES (1, 1, [1], 1); ").get();
|
||||
|
||||
{
|
||||
e.execute_cql("UPDATE t SET slist[0] = 3, v = 3 WHERE p = 1 AND c = 1;").get();
|
||||
auto msg = e.execute_cql("SELECT slist, v FROM t WHERE p = 1 AND c = 1;").get0();
|
||||
auto slist_type = list_type_impl::get_instance(int32_type, true);
|
||||
assert_that(msg).is_rows().with_row({
|
||||
{ slist_type->decompose(make_list_value(slist_type, list_type_impl::native_type({{3}}))) },
|
||||
{ int32_type->decompose(3) }
|
||||
});
|
||||
}
|
||||
{
|
||||
e.execute_cql("UPDATE t SET slist = [4], v = 4 WHERE p = 1 AND c = 1;").get();
|
||||
auto msg = e.execute_cql("SELECT slist, v FROM t WHERE p = 1 AND c = 1;").get0();
|
||||
auto slist_type = list_type_impl::get_instance(int32_type, true);
|
||||
assert_that(msg).is_rows().with_row({
|
||||
{ slist_type->decompose(make_list_value(slist_type, list_type_impl::native_type({{4}}))) },
|
||||
{ int32_type->decompose(4) }
|
||||
});
|
||||
}
|
||||
{
|
||||
e.execute_cql("UPDATE t SET slist = [3] + slist , v = 5 WHERE p = 1 AND c = 1;").get();
|
||||
auto msg = e.execute_cql("SELECT slist, v FROM t WHERE p = 1 AND c = 1;").get0();
|
||||
auto slist_type = list_type_impl::get_instance(int32_type, true);
|
||||
assert_that(msg).is_rows().with_row({
|
||||
{ slist_type->decompose(make_list_value(slist_type, list_type_impl::native_type({3, 4}))) },
|
||||
{ int32_type->decompose(5) }
|
||||
});
|
||||
}
|
||||
{
|
||||
e.execute_cql("UPDATE t SET slist = slist + [5] , v = 6 WHERE p = 1 AND c = 1;").get();
|
||||
auto msg = e.execute_cql("SELECT slist, v FROM t WHERE p = 1 AND c = 1;").get0();
|
||||
auto slist_type = list_type_impl::get_instance(int32_type, true);
|
||||
assert_that(msg).is_rows().with_row({
|
||||
{ slist_type->decompose(make_list_value(slist_type, list_type_impl::native_type({3, 4, 5}))) },
|
||||
{ int32_type->decompose(6) }
|
||||
});
|
||||
}
|
||||
{
|
||||
e.execute_cql("DELETE slist[2] from t WHERE p = 1;").get();
|
||||
auto msg = e.execute_cql("SELECT slist, v FROM t WHERE p = 1 AND c = 1;").get0();
|
||||
auto slist_type = list_type_impl::get_instance(int32_type, true);
|
||||
assert_that(msg).is_rows().with_row({
|
||||
{ slist_type->decompose(make_list_value(slist_type, list_type_impl::native_type({3, 4}))) },
|
||||
{ int32_type->decompose(6) }
|
||||
});
|
||||
}
|
||||
{
|
||||
e.execute_cql("UPDATE t SET slist = slist - [4] , v = 7 WHERE p = 1 AND c = 1;").get();
|
||||
auto msg = e.execute_cql("SELECT slist, v FROM t WHERE p = 1 AND c = 1;").get0();
|
||||
auto slist_type = list_type_impl::get_instance(int32_type, true);
|
||||
assert_that(msg).is_rows().with_row({
|
||||
{ slist_type->decompose(make_list_value(slist_type, list_type_impl::native_type({3}))) },
|
||||
{ int32_type->decompose(7) }
|
||||
});
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
@@ -239,6 +239,17 @@ public:
|
||||
return *this;
|
||||
}
|
||||
|
||||
flat_reader_assertions& produces(const schema& s, const mutation_fragment& mf) {
|
||||
auto mfopt = read_next();
|
||||
if (!mfopt) {
|
||||
BOOST_FAIL(sprint("Expected %s, but got end of stream", mf));
|
||||
}
|
||||
if (!mfopt->equal(s, mf)) {
|
||||
BOOST_FAIL(sprint("Expected %s, but got %s", mf, *mfopt));
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
flat_reader_assertions& produces_end_of_stream() {
|
||||
BOOST_TEST_MESSAGE("Expecting end of stream");
|
||||
auto mfopt = read_next();
|
||||
|
||||
@@ -108,14 +108,10 @@ SEASTAR_TEST_CASE(test_flat_mutation_reader_consume_single_partition) {
|
||||
BOOST_REQUIRE_EQUAL(1, result._consume_new_partition_call_count);
|
||||
BOOST_REQUIRE_EQUAL(1, result._consume_end_of_partition_call_count);
|
||||
BOOST_REQUIRE_EQUAL(m.partition().partition_tombstone() ? 1 : 0, result._consume_tombstone_call_count);
|
||||
auto r2 = flat_mutation_reader_from_mutations({m});
|
||||
auto start = r2().get0();
|
||||
BOOST_REQUIRE(start);
|
||||
BOOST_REQUIRE(start->is_partition_start());
|
||||
auto r2 = assert_that(flat_mutation_reader_from_mutations({m}));
|
||||
r2.produces_partition_start(m.decorated_key(), m.partition().partition_tombstone());
|
||||
for (auto& mf : result._fragments) {
|
||||
auto mfopt = r2().get0();
|
||||
BOOST_REQUIRE(mfopt);
|
||||
BOOST_REQUIRE(mf.equal(*m.schema(), *mfopt));
|
||||
r2.produces(*m.schema(), mf);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
@@ -23,6 +23,8 @@
|
||||
|
||||
#include <boost/test/unit_test.hpp>
|
||||
|
||||
#include <seastar/util/defer.hh>
|
||||
|
||||
#include "tests/test-utils.hh"
|
||||
#include "message/messaging_service.hh"
|
||||
#include "gms/failure_detector.hh"
|
||||
@@ -39,18 +41,23 @@ SEASTAR_TEST_CASE(test_boot_shutdown){
|
||||
sharded<auth::service> auth_service;
|
||||
sharded<db::system_distributed_keyspace> sys_dist_ks;
|
||||
utils::fb_utilities::set_broadcast_address(gms::inet_address("127.0.0.1"));
|
||||
|
||||
locator::i_endpoint_snitch::create_snitch("SimpleSnitch").get();
|
||||
auto stop_snitch = defer([&] { gms::get_failure_detector().stop().get(); });
|
||||
|
||||
netw::get_messaging_service().start(gms::inet_address("127.0.0.1"), 7000, false /* don't bind */).get();
|
||||
auto stop_messaging_service = defer([&] { netw::get_messaging_service().stop().get(); });
|
||||
|
||||
service::get_storage_service().start(std::ref(db), std::ref(auth_service), std::ref(sys_dist_ks)).get();
|
||||
auto stop_ss = defer([&] { service::get_storage_service().stop().get(); });
|
||||
|
||||
db.start().get();
|
||||
netw::get_messaging_service().start(gms::inet_address("127.0.0.1")).get();
|
||||
auto stop_db = defer([&] { db.stop().get(); });
|
||||
|
||||
gms::get_failure_detector().start().get();
|
||||
auto stop_failure_detector = defer([&] { gms::get_failure_detector().stop().get(); });
|
||||
|
||||
gms::get_gossiper().start().get();
|
||||
gms::get_gossiper().stop().get();
|
||||
gms::get_failure_detector().stop().get();
|
||||
db.stop().get();
|
||||
service::get_storage_service().stop().get();
|
||||
netw::get_messaging_service().stop().get();
|
||||
locator::i_endpoint_snitch::stop_snitch().get();
|
||||
auto stop_gossiper = defer([&] { gms::get_gossiper().stop().get(); });
|
||||
});
|
||||
}
|
||||
|
||||
@@ -33,6 +33,10 @@
|
||||
#include "imr/fundamental.hh"
|
||||
#include "imr/compound.hh"
|
||||
#include "imr/methods.hh"
|
||||
#include "imr/utils.hh"
|
||||
|
||||
#include "failure_injecting_allocation_strategy.hh"
|
||||
#include "utils/logalloc.hh"
|
||||
|
||||
#include "random-utils.hh"
|
||||
|
||||
@@ -717,3 +721,127 @@ BOOST_AUTO_TEST_CASE(test_variant_destructor) {
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_SUITE_END();
|
||||
|
||||
namespace object_exception_safety {
|
||||
|
||||
using nested_structure = imr::structure<
|
||||
imr::member<A, imr::pod<size_t>>,
|
||||
imr::member<B, imr::buffer<B>>
|
||||
>;
|
||||
|
||||
using structure = imr::structure<
|
||||
imr::member<A, imr::pod<size_t>>,
|
||||
imr::member<C, imr::tagged_type<C, imr::pod<void*>>>,
|
||||
imr::member<D, imr::tagged_type<C, imr::pod<void*>>>,
|
||||
imr::member<B, imr::buffer<A>>
|
||||
>;
|
||||
|
||||
struct structue_context {
|
||||
size_t _size;
|
||||
|
||||
structue_context(const uint8_t* ptr)
|
||||
: _size(imr::pod<size_t>::make_view(ptr).load())
|
||||
{
|
||||
BOOST_CHECK_EQUAL(_size, 4);
|
||||
}
|
||||
|
||||
template<typename Tag>
|
||||
size_t size_of() const noexcept {
|
||||
return _size;
|
||||
}
|
||||
|
||||
template<typename Tag, typename... Args>
|
||||
decltype(auto) context_for(Args&&...) const noexcept { return *this; }
|
||||
};
|
||||
|
||||
struct nested_structue_context {
|
||||
size_t _size;
|
||||
|
||||
nested_structue_context(const uint8_t* ptr)
|
||||
: _size(imr::pod<size_t>::make_view(ptr).load())
|
||||
{
|
||||
BOOST_CHECK_NE(_size, 0);
|
||||
}
|
||||
|
||||
template<typename Tag>
|
||||
size_t size_of() const noexcept {
|
||||
return _size;
|
||||
}
|
||||
|
||||
template<typename Tag, typename... Args>
|
||||
decltype(auto) context_for(Args&&...) const noexcept { return *this; }
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
namespace imr::methods {
|
||||
|
||||
template<>
|
||||
struct destructor<imr::tagged_type<C, imr::pod<void*>>> {
|
||||
static void run(uint8_t* ptr, ...) {
|
||||
using namespace object_exception_safety;
|
||||
auto obj_ptr = imr::pod<uint8_t*>::make_view(ptr).load();
|
||||
imr::methods::destroy<nested_structure>(obj_ptr, nested_structue_context(obj_ptr));
|
||||
current_allocator().free(obj_ptr);
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(test_object_exception_safety) {
|
||||
using namespace object_exception_safety;
|
||||
|
||||
using context_factory_for_structure = imr::alloc::context_factory<imr::utils::object_context<structue_context>>;
|
||||
using lsa_migrator_fn_for_structure = imr::alloc::lsa_migrate_fn<imr::utils::object<structure>::structure, context_factory_for_structure>;
|
||||
auto migrator_for_structure = lsa_migrator_fn_for_structure(context_factory_for_structure());
|
||||
|
||||
using context_factory_for_nested_structure = imr::alloc::context_factory<nested_structue_context>;
|
||||
using lsa_migrator_fn_for_nested_structure = imr::alloc::lsa_migrate_fn<nested_structure, context_factory_for_nested_structure>;
|
||||
auto migrator_for_nested_structure = lsa_migrator_fn_for_nested_structure(context_factory_for_nested_structure());
|
||||
|
||||
auto writer_fn = [&] (auto serializer, auto& allocator) {
|
||||
return serializer
|
||||
.serialize(4)
|
||||
.serialize(allocator.template allocate<nested_structure>(
|
||||
&migrator_for_nested_structure,
|
||||
[&] (auto nested_serializer) {
|
||||
return nested_serializer
|
||||
.serialize(128)
|
||||
.serialize(128, [] (auto&&...) { })
|
||||
.done();
|
||||
}
|
||||
))
|
||||
.serialize(allocator.template allocate<nested_structure>(
|
||||
&migrator_for_nested_structure,
|
||||
[&] (auto nested_serializer) {
|
||||
return nested_serializer
|
||||
.serialize(1024)
|
||||
.serialize(1024, [] (auto&&...) { })
|
||||
.done();
|
||||
}
|
||||
))
|
||||
.serialize(bytes(4, 'a'))
|
||||
.done();
|
||||
};
|
||||
|
||||
logalloc::region reg;
|
||||
|
||||
size_t fail_offset = 0;
|
||||
auto allocator = failure_injecting_allocation_strategy(reg.allocator());
|
||||
with_allocator(allocator, [&] {
|
||||
while (true) {
|
||||
allocator.fail_after(fail_offset++);
|
||||
try {
|
||||
imr::utils::object<structure>::make(writer_fn, &migrator_for_structure);
|
||||
} catch (const std::bad_alloc&) {
|
||||
BOOST_CHECK_EQUAL(reg.occupancy().used_space(), 0);
|
||||
continue;
|
||||
}
|
||||
BOOST_CHECK_EQUAL(reg.occupancy().used_space(), 0);
|
||||
break;
|
||||
}
|
||||
});
|
||||
|
||||
BOOST_CHECK_EQUAL(fail_offset, 4);
|
||||
}
|
||||
|
||||
|
||||
59
tests/json_test.cc
Normal file
59
tests/json_test.cc
Normal file
@@ -0,0 +1,59 @@
|
||||
|
||||
/*
|
||||
* Copyright (C) 2015 ScyllaDB
|
||||
*/
|
||||
|
||||
/*
|
||||
* This file is part of Scylla.
|
||||
*
|
||||
* Scylla is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Scylla is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with Scylla. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#define BOOST_TEST_MODULE json
|
||||
|
||||
#include <boost/test/unit_test.hpp>
|
||||
|
||||
#include "tests/test-utils.hh"
|
||||
#include "json.hh"
|
||||
#include "stdx.hh"
|
||||
|
||||
BOOST_AUTO_TEST_CASE(test_value_to_quoted_string) {
|
||||
std::vector<sstring> input = {
|
||||
"\"\\\b\f\n\r\t",
|
||||
sstring(1, '\0') + "\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f",
|
||||
"regular string",
|
||||
"mixed\t\t\t\ba\f \007 string \002 fgh",
|
||||
"chào mọi người 123!",
|
||||
"ყველას მოგესალმებით 456?;",
|
||||
"всем привет",
|
||||
"大家好",
|
||||
""
|
||||
};
|
||||
|
||||
std::vector<sstring> expected = {
|
||||
"\"\\\"\\\\\\b\\f\\n\\r\\t\"",
|
||||
"\"\\u0000\\u0001\\u0002\\u0003\\u0004\\u0005\\u0006\\u0007\\b\\t\\n\\u000B\\f\\r\\u000E\\u000F\\u0010\\u0011\\u0012\\u0013\\u0014\\u0015\\u0016\\u0017\\u0018\\u0019\\u001A\\u001B\\u001C\\u001D\\u001E\\u001F\"",
|
||||
"\"regular string\"",
|
||||
"\"mixed\\t\\t\\t\\ba\\f \\u0007 string \\u0002 fgh\"",
|
||||
"\"chào mọi người 123!\"",
|
||||
"\"ყველას მოგესალმებით 456?;\"",
|
||||
"\"всем привет\"",
|
||||
"\"大家好\"",
|
||||
"\"\""
|
||||
};
|
||||
|
||||
for (size_t i = 0; i < input.size(); ++i) {
|
||||
BOOST_CHECK_EQUAL(json::value_to_quoted_string(input[i]), expected[i]);
|
||||
}
|
||||
}
|
||||
@@ -269,7 +269,7 @@ SEASTAR_TEST_CASE(test_loading_cache_loading_different_keys) {
|
||||
using namespace std::chrono;
|
||||
std::vector<int> ivec(num_loaders);
|
||||
load_count = 0;
|
||||
utils::loading_cache<int, sstring> loading_cache(num_loaders, 1s, test_logger);
|
||||
utils::loading_cache<int, sstring> loading_cache(num_loaders, 1h, test_logger);
|
||||
auto stop_cache_reload = seastar::defer([&loading_cache] { loading_cache.stop().get(); });
|
||||
|
||||
prepare().get();
|
||||
|
||||
@@ -26,11 +26,13 @@
|
||||
|
||||
#include <boost/test/unit_test.hpp>
|
||||
#include <query-result-set.hh>
|
||||
#include <query-result-writer.hh>
|
||||
|
||||
#include "tests/test_services.hh"
|
||||
#include "tests/test-utils.hh"
|
||||
#include "tests/mutation_assertions.hh"
|
||||
#include "tests/result_set_assertions.hh"
|
||||
#include "tests/mutation_source_test.hh"
|
||||
|
||||
#include "mutation_query.hh"
|
||||
#include "core/do_with.hh"
|
||||
@@ -527,3 +529,22 @@ SEASTAR_TEST_CASE(test_partition_limit) {
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
SEASTAR_THREAD_TEST_CASE(test_result_size_calculation) {
|
||||
random_mutation_generator gen(random_mutation_generator::generate_counters::no);
|
||||
std::vector<mutation> mutations = gen(1);
|
||||
schema_ptr s = gen.schema();
|
||||
mutation_source source = make_source(std::move(mutations));
|
||||
query::result_memory_limiter l(std::numeric_limits<ssize_t>::max());
|
||||
query::partition_slice slice = make_full_slice(*s);
|
||||
slice.options.set<query::partition_slice::option::allow_short_read>();
|
||||
|
||||
query::result::builder digest_only_builder(slice, query::result_options{query::result_request::only_digest, query::digest_algorithm::xxHash}, l.new_digest_read(query::result_memory_limiter::maximum_result_size).get0());
|
||||
data_query(s, source, query::full_partition_range, slice, std::numeric_limits<uint32_t>::max(), std::numeric_limits<uint32_t>::max(), gc_clock::now(), digest_only_builder).get0();
|
||||
|
||||
query::result::builder result_and_digest_builder(slice, query::result_options{query::result_request::result_and_digest, query::digest_algorithm::xxHash}, l.new_data_read(query::result_memory_limiter::maximum_result_size).get0());
|
||||
data_query(s, source, query::full_partition_range, slice, std::numeric_limits<uint32_t>::max(), std::numeric_limits<uint32_t>::max(), gc_clock::now(), result_and_digest_builder).get0();
|
||||
|
||||
BOOST_REQUIRE_EQUAL(digest_only_builder.memory_accounter().used_memory(), result_and_digest_builder.memory_accounter().used_memory());
|
||||
}
|
||||
|
||||
|
||||
@@ -1171,7 +1171,7 @@ static mutation_sets generate_mutation_sets() {
|
||||
auto tomb = new_tombstone();
|
||||
m1.partition().apply_delete(*s1, ck2, tomb);
|
||||
result.unequal.emplace_back(mutations{m1, m2});
|
||||
m2.partition().apply_delete(*s1, ck2, tomb);
|
||||
m2.partition().apply_delete(*s2, ck2, tomb);
|
||||
result.equal.emplace_back(mutations{m1, m2});
|
||||
}
|
||||
|
||||
@@ -1180,7 +1180,7 @@ static mutation_sets generate_mutation_sets() {
|
||||
auto key = clustering_key_prefix::from_deeply_exploded(*s1, {data_value(bytes("ck2_0"))});
|
||||
m1.partition().apply_row_tombstone(*s1, key, tomb);
|
||||
result.unequal.emplace_back(mutations{m1, m2});
|
||||
m2.partition().apply_row_tombstone(*s1, key, tomb);
|
||||
m2.partition().apply_row_tombstone(*s2, key, tomb);
|
||||
result.equal.emplace_back(mutations{m1, m2});
|
||||
}
|
||||
|
||||
@@ -1204,7 +1204,7 @@ static mutation_sets generate_mutation_sets() {
|
||||
auto ts = new_timestamp();
|
||||
m1.partition().apply_insert(*s1, ck2, ts);
|
||||
result.unequal.emplace_back(mutations{m1, m2});
|
||||
m2.partition().apply_insert(*s1, ck2, ts);
|
||||
m2.partition().apply_insert(*s2, ck2, ts);
|
||||
result.equal.emplace_back(mutations{m1, m2});
|
||||
}
|
||||
|
||||
|
||||
@@ -1716,3 +1716,45 @@ SEASTAR_THREAD_TEST_CASE(test_cell_external_memory_usage) {
|
||||
test_collection(bytes(64 * 1024 + 1, 'a'));
|
||||
test_collection(bytes(1024 * 1024, 'a'));
|
||||
}
|
||||
|
||||
// external_memory_usage() must be invariant to the merging order,
|
||||
// so that accounting of a clustering_row produced by partition_snapshot_flat_reader
|
||||
// doesn't give a greater result than what is used by the memtable region, possibly
|
||||
// after all MVCC versions are merged.
|
||||
// Overaccounting leads to assertion failure in ~flush_memory_accounter.
|
||||
SEASTAR_THREAD_TEST_CASE(test_row_size_is_immune_to_application_order) {
|
||||
auto s = schema_builder("ks", "cf")
|
||||
.with_column("pk", utf8_type, column_kind::partition_key)
|
||||
.with_column("v1", utf8_type)
|
||||
.with_column("v2", utf8_type)
|
||||
.with_column("v3", utf8_type)
|
||||
.with_column("v4", utf8_type)
|
||||
.with_column("v5", utf8_type)
|
||||
.with_column("v6", utf8_type)
|
||||
.with_column("v7", utf8_type)
|
||||
.with_column("v8", utf8_type)
|
||||
.with_column("v9", utf8_type)
|
||||
.build();
|
||||
|
||||
auto value = utf8_type->decompose(data_value("value"));
|
||||
|
||||
row r1;
|
||||
r1.append_cell(7, make_atomic_cell(value));
|
||||
|
||||
row r2;
|
||||
r2.append_cell(8, make_atomic_cell(value));
|
||||
|
||||
auto size1 = [&] {
|
||||
auto r3 = row(*s, column_kind::regular_column, r1);
|
||||
r3.apply(*s, column_kind::regular_column, r2);
|
||||
return r3.external_memory_usage(*s, column_kind::regular_column);
|
||||
}();
|
||||
|
||||
auto size2 = [&] {
|
||||
auto r3 = row(*s, column_kind::regular_column, r2);
|
||||
r3.apply(*s, column_kind::regular_column, r1);
|
||||
return r3.external_memory_usage(*s, column_kind::regular_column);
|
||||
}();
|
||||
|
||||
BOOST_REQUIRE_EQUAL(size1, size2);
|
||||
}
|
||||
|
||||
@@ -30,6 +30,7 @@
|
||||
#include <map>
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
#include <boost/range/algorithm/adjacent_find.hpp>
|
||||
|
||||
static logging::logger nlogger("NetworkTopologyStrategyLogger");
|
||||
|
||||
@@ -52,6 +53,27 @@ void print_natural_endpoints(double point, const std::vector<inet_address> v) {
|
||||
nlogger.debug("{}", strm.str());
|
||||
}
|
||||
|
||||
#ifndef SEASTAR_DEBUG
|
||||
static void verify_sorted(const dht::token_range_vector& trv) {
|
||||
auto not_strictly_before = [] (const dht::token_range a, const dht::token_range b) {
|
||||
return !b.start()
|
||||
|| !a.end()
|
||||
|| a.end()->value() > b.start()->value()
|
||||
|| (a.end()->value() == b.start()->value() && a.end()->is_inclusive() && b.start()->is_inclusive());
|
||||
};
|
||||
BOOST_CHECK(boost::adjacent_find(trv, not_strictly_before) == trv.end());
|
||||
}
|
||||
#endif
|
||||
|
||||
static void check_ranges_are_sorted(abstract_replication_strategy* ars, gms::inet_address ep) {
|
||||
// Too slow in debug mode
|
||||
#ifndef SEASTAR_DEBUG
|
||||
verify_sorted(ars->get_ranges(ep));
|
||||
verify_sorted(ars->get_primary_ranges(ep));
|
||||
verify_sorted(ars->get_primary_ranges_within_dc(ep));
|
||||
#endif
|
||||
}
|
||||
|
||||
void strategy_sanity_check(
|
||||
abstract_replication_strategy* ars_ptr,
|
||||
const std::map<sstring, sstring>& options) {
|
||||
@@ -150,6 +172,7 @@ void full_ring_check(const std::vector<ring_point>& ring_points,
|
||||
auto endpoints2 = ars_ptr->get_natural_endpoints(t2);
|
||||
|
||||
endpoints_check(ars_ptr, endpoints2);
|
||||
check_ranges_are_sorted(ars_ptr, rp.host);
|
||||
BOOST_CHECK(cache_hit_count + 1 == ars_ptr->get_cache_hits_count());
|
||||
BOOST_CHECK(endpoints1 == endpoints2);
|
||||
}
|
||||
|
||||
@@ -620,6 +620,66 @@ SEASTAR_TEST_CASE(test_single_key_queries_after_population_in_reverse_order) {
|
||||
});
|
||||
}
|
||||
|
||||
// Reproducer for https://github.com/scylladb/scylla/issues/4236
|
||||
SEASTAR_TEST_CASE(test_partition_range_population_with_concurrent_memtable_flushes) {
|
||||
return seastar::async([] {
|
||||
auto s = make_schema();
|
||||
|
||||
std::vector<mutation> mutations = make_ring(s, 3);
|
||||
|
||||
auto mt = make_lw_shared<memtable>(s);
|
||||
for (auto&& m : mutations) {
|
||||
mt->apply(m);
|
||||
}
|
||||
|
||||
cache_tracker tracker;
|
||||
row_cache cache(s, snapshot_source_from_snapshot(mt->as_data_source()), tracker);
|
||||
|
||||
bool cancel_updater = false;
|
||||
auto updater = repeat([&] {
|
||||
if (cancel_updater) {
|
||||
return make_ready_future<stop_iteration>(stop_iteration::yes);
|
||||
}
|
||||
return later().then([&] {
|
||||
auto mt = make_lw_shared<memtable>(s);
|
||||
return cache.update([]{}, *mt).then([mt] {
|
||||
return stop_iteration::no;
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
{
|
||||
auto pr = dht::partition_range::make_singular(query::ring_position(mutations[1].decorated_key()));
|
||||
assert_that(cache.make_reader(s, pr))
|
||||
.produces(mutations[1])
|
||||
.produces_end_of_stream();
|
||||
}
|
||||
|
||||
{
|
||||
auto pr = dht::partition_range::make_ending_with(
|
||||
{query::ring_position(mutations[2].decorated_key()), true});
|
||||
assert_that(cache.make_reader(s, pr))
|
||||
.produces(mutations[0])
|
||||
.produces(mutations[1])
|
||||
.produces(mutations[2])
|
||||
.produces_end_of_stream();
|
||||
}
|
||||
|
||||
cache.invalidate([]{}).get();
|
||||
|
||||
{
|
||||
assert_that(cache.make_reader(s, query::full_partition_range))
|
||||
.produces(mutations[0])
|
||||
.produces(mutations[1])
|
||||
.produces(mutations[2])
|
||||
.produces_end_of_stream();
|
||||
}
|
||||
|
||||
cancel_updater = true;
|
||||
updater.get();
|
||||
});
|
||||
}
|
||||
|
||||
SEASTAR_TEST_CASE(test_row_cache_conforms_to_mutation_source) {
|
||||
return seastar::async([] {
|
||||
cache_tracker tracker;
|
||||
|
||||
@@ -4649,3 +4649,74 @@ SEASTAR_TEST_CASE(sstable_timestamp_metadata_correcness_with_negative) {
|
||||
BOOST_REQUIRE(sst->get_stats_metadata().max_timestamp == 5);
|
||||
});
|
||||
}
|
||||
|
||||
SEASTAR_TEST_CASE(backlog_tracker_correctness_after_stop_tracking_compaction) {
|
||||
return async([] {
|
||||
storage_service_for_tests ssft;
|
||||
cell_locker_stats cl_stats;
|
||||
|
||||
auto builder = schema_builder("tests", "backlog_correctness_after_stop_tracking_compaction")
|
||||
.with_column("id", utf8_type, column_kind::partition_key)
|
||||
.with_column("value", int32_type);
|
||||
auto s = builder.build();
|
||||
|
||||
auto tmp = make_lw_shared<tmpdir>();
|
||||
auto sst_gen = [s, tmp, gen = make_lw_shared<unsigned>(1)] () mutable {
|
||||
auto sst = make_sstable(s, tmp->path, (*gen)++, la, big);
|
||||
sst->set_unshared();
|
||||
return sst;
|
||||
};
|
||||
|
||||
column_family_for_tests cf(s);
|
||||
cf->set_compaction_strategy(sstables::compaction_strategy_type::leveled);
|
||||
|
||||
{
|
||||
auto tokens = token_generation_for_current_shard(4);
|
||||
auto make_insert = [&] (auto p) {
|
||||
auto key = partition_key::from_exploded(*s, {to_bytes(p.first)});
|
||||
mutation m(s, key);
|
||||
m.set_clustered_cell(clustering_key::make_empty(), bytes("value"), data_value(int32_t(1)), 1 /* ts */);
|
||||
BOOST_REQUIRE(m.decorated_key().token() == p.second);
|
||||
return m;
|
||||
};
|
||||
auto mut1 = make_insert(tokens[0]);
|
||||
auto mut2 = make_insert(tokens[1]);
|
||||
auto mut3 = make_insert(tokens[2]);
|
||||
auto mut4 = make_insert(tokens[3]);
|
||||
std::vector<shared_sstable> ssts = {
|
||||
make_sstable_containing(sst_gen, {mut1, mut2}),
|
||||
make_sstable_containing(sst_gen, {mut3, mut4})
|
||||
};
|
||||
|
||||
for (auto& sst : ssts) {
|
||||
cf->get_compaction_strategy().get_backlog_tracker().add_sstable(sst);
|
||||
}
|
||||
|
||||
// Start compaction, then stop tracking compaction, switch to TWCS, wait for compaction to finish and check for backlog.
|
||||
// That's done to assert backlog will work for compaction that is finished and was stopped tracking.
|
||||
|
||||
auto fut = sstables::compact_sstables(sstables::compaction_descriptor(ssts), *cf, sst_gen);
|
||||
|
||||
bool stopped_tracking = false;
|
||||
for (auto& info : cf._data->cm.get_compactions()) {
|
||||
if (info->cf == cf->schema()->cf_name()) {
|
||||
info->stop_tracking();
|
||||
stopped_tracking = true;
|
||||
}
|
||||
}
|
||||
BOOST_REQUIRE(stopped_tracking);
|
||||
|
||||
cf->set_compaction_strategy(sstables::compaction_strategy_type::time_window);
|
||||
for (auto& sst : ssts) {
|
||||
cf->get_compaction_strategy().get_backlog_tracker().add_sstable(sst);
|
||||
}
|
||||
|
||||
auto ret = fut.get0();
|
||||
BOOST_REQUIRE(ret.new_sstables.size() == 1);
|
||||
BOOST_REQUIRE(ret.tracking == false);
|
||||
}
|
||||
// triggers code that iterates through registered compactions.
|
||||
cf._data->cm.backlog();
|
||||
cf->get_compaction_strategy().get_backlog_tracker().backlog();
|
||||
});
|
||||
}
|
||||
|
||||
@@ -1248,3 +1248,49 @@ SEASTAR_TEST_CASE(test_writing_combined_stream_with_tombstones_at_the_same_posit
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
// Reproducer for #4206
|
||||
SEASTAR_THREAD_TEST_CASE(test_counter_header_size) {
|
||||
auto dir = tmpdir();
|
||||
storage_service_for_tests ssft;
|
||||
auto wait_bg = seastar::defer([] { sstables::await_background_jobs().get(); });
|
||||
|
||||
auto s = schema_builder("ks", "counter_test")
|
||||
.with_column("pk", int32_type, column_kind::partition_key)
|
||||
.with_column("ck", int32_type, column_kind::clustering_key)
|
||||
.with_column("c1", counter_type)
|
||||
.build();
|
||||
|
||||
auto pk = partition_key::from_single_value(*s, int32_type->decompose(0));
|
||||
auto ck = clustering_key::from_single_value(*s, int32_type->decompose(0));
|
||||
|
||||
auto& col = *s->get_column_definition(utf8_type->decompose(sstring("c1")));
|
||||
|
||||
auto ids = std::vector<counter_id>();
|
||||
for (auto i = 0; i < 128; i++) {
|
||||
ids.emplace_back(counter_id(utils::make_random_uuid()));
|
||||
}
|
||||
std::sort(ids.begin(), ids.end());
|
||||
|
||||
mutation m(s, pk);
|
||||
counter_cell_builder ccb;
|
||||
for (auto id : ids) {
|
||||
ccb.add_shard(counter_shard(id, 1, 1));
|
||||
}
|
||||
m.set_clustered_cell(ck, col, ccb.build(api::new_timestamp()));
|
||||
|
||||
auto mt = make_lw_shared<memtable>(s);
|
||||
mt->apply(m);
|
||||
|
||||
for (const auto version : all_sstable_versions) {
|
||||
auto sst = sstables::make_sstable(s, dir.path, 1, version, sstables::sstable::format_types::big);
|
||||
sstable_writer_config cfg;
|
||||
cfg.large_partition_handler = &nop_lp_handler;
|
||||
sst->write_components(mt->make_flat_reader(s), 1, s, cfg).get();
|
||||
sst->load().get();
|
||||
|
||||
assert_that(sst->as_mutation_source().make_reader(s))
|
||||
.produces(m)
|
||||
.produces_end_of_stream();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -45,180 +45,143 @@
|
||||
using namespace std::literals::chrono_literals;
|
||||
|
||||
SEASTAR_TEST_CASE(test_query_size_estimates_virtual_table) {
|
||||
return do_with_cql_env([] (auto& e) {
|
||||
auto ranges = db::size_estimates::size_estimates_mutation_reader::get_local_ranges().get0();
|
||||
return do_with_cql_env_thread([] (cql_test_env& e) {
|
||||
auto ranges = db::size_estimates::get_local_ranges().get0();
|
||||
auto start_token1 = utf8_type->to_string(ranges[3].start);
|
||||
auto start_token2 = utf8_type->to_string(ranges[5].start);
|
||||
auto end_token1 = utf8_type->to_string(ranges[3].end);
|
||||
auto end_token2 = utf8_type->to_string(ranges[55].end);
|
||||
auto &qp = e.local_qp();
|
||||
return e.execute_cql("create table cf1(pk text PRIMARY KEY, v int);").discard_result().then([&e] {
|
||||
return e.execute_cql("create table cf2(pk text PRIMARY KEY, v int);").discard_result();
|
||||
}).then([&qp] {
|
||||
return qp.execute_internal("select * from system.size_estimates where keyspace_name = 'ks';").then([](auto rs) {
|
||||
BOOST_REQUIRE_EQUAL(rs->size(), 512);
|
||||
});
|
||||
}).then([&qp] {
|
||||
return qp.execute_internal("select * from system.size_estimates where keyspace_name = 'ks' limit 100;").then([](auto rs) {
|
||||
BOOST_REQUIRE_EQUAL(rs->size(), 100);
|
||||
});
|
||||
}).then([&qp] {
|
||||
return qp.execute_internal("select * from system.size_estimates where keyspace_name = 'ks' "
|
||||
"and table_name = 'cf1';").then([](auto rs) {
|
||||
BOOST_REQUIRE_EQUAL(rs->size(), 256);
|
||||
});
|
||||
}).then([&qp] {
|
||||
return qp.execute_internal("select * from system.size_estimates where keyspace_name = 'ks' "
|
||||
"and table_name > 'cf1';").then([](auto rs) {
|
||||
BOOST_REQUIRE_EQUAL(rs->size(), 256);
|
||||
});
|
||||
}).then([&qp] {
|
||||
return qp.execute_internal("select * from system.size_estimates where keyspace_name = 'ks' "
|
||||
"and table_name >= 'cf1';").then([](auto rs) {
|
||||
BOOST_REQUIRE_EQUAL(rs->size(), 512);
|
||||
});
|
||||
}).then([&qp] {
|
||||
return qp.execute_internal("select * from system.size_estimates where keyspace_name = 'ks' "
|
||||
"and table_name < 'cf2';").then([](auto rs) {
|
||||
BOOST_REQUIRE_EQUAL(rs->size(), 256);
|
||||
});
|
||||
}).then([&qp] {
|
||||
return qp.execute_internal("select * from system.size_estimates where keyspace_name = 'ks' "
|
||||
"and table_name <= 'cf2';").then([](auto rs) {
|
||||
BOOST_REQUIRE_EQUAL(rs->size(), 512);
|
||||
});
|
||||
}).then([&qp] {
|
||||
return qp.execute_internal("select * from system.size_estimates where keyspace_name = 'ks' "
|
||||
"and table_name in ('cf1', 'cf2');").then([](auto rs) {
|
||||
BOOST_REQUIRE_EQUAL(rs->size(), 512);
|
||||
});
|
||||
}).then([&qp] {
|
||||
return qp.execute_internal("select * from system.size_estimates where keyspace_name = 'ks' "
|
||||
"and table_name >= 'cf1' and table_name <= 'cf1';").then([](auto rs) {
|
||||
BOOST_REQUIRE_EQUAL(rs->size(), 256);
|
||||
});
|
||||
}).then([&qp] {
|
||||
return qp.execute_internal("select * from system.size_estimates where keyspace_name = 'ks' "
|
||||
"and table_name >= 'cf1' and table_name <= 'cf2';").then([](auto rs) {
|
||||
BOOST_REQUIRE_EQUAL(rs->size(), 512);
|
||||
});
|
||||
}).then([&qp] {
|
||||
return qp.execute_internal("select * from system.size_estimates where keyspace_name = 'ks' "
|
||||
"and table_name > 'cf1' and table_name < 'cf2';").then([](auto rs) {
|
||||
BOOST_REQUIRE_EQUAL(rs->size(), 0);
|
||||
});
|
||||
}).then([&qp, start_token1] {
|
||||
return qp.execute_internal(sprint("select * from system.size_estimates where keyspace_name = 'ks' "
|
||||
"and table_name = 'cf1' and range_start = '%s';", start_token1)).then([](auto rs) {
|
||||
BOOST_REQUIRE_EQUAL(rs->size(), 1);
|
||||
});
|
||||
}).then([&qp, start_token1] {
|
||||
return qp.execute_internal(sprint("select * from system.size_estimates where keyspace_name = 'ks' "
|
||||
"and table_name = 'cf1' and range_start >= '%s';", start_token1)).then([](auto rs) {
|
||||
BOOST_REQUIRE_EQUAL(rs->size(), 253);
|
||||
});
|
||||
}).then([&qp, start_token1] {
|
||||
return qp.execute_internal(sprint("select * from system.size_estimates where keyspace_name = 'ks' "
|
||||
"and table_name = 'cf1' and range_start > '%s';", start_token1)).then([](auto rs) {
|
||||
BOOST_REQUIRE_EQUAL(rs->size(), 252);
|
||||
});
|
||||
}).then([&qp, start_token1] {
|
||||
return qp.execute_internal(sprint("select * from system.size_estimates where keyspace_name = 'ks' "
|
||||
"and table_name = 'cf1' and range_start <= '%s';", start_token1)).then([](auto rs) {
|
||||
BOOST_REQUIRE_EQUAL(rs->size(), 4);
|
||||
});
|
||||
}).then([&qp, start_token1] {
|
||||
return qp.execute_internal(sprint("select * from system.size_estimates where keyspace_name = 'ks' "
|
||||
"and table_name = 'cf1' and range_start < '%s';", start_token1)).then([](auto rs) {
|
||||
BOOST_REQUIRE_EQUAL(rs->size(), 3);
|
||||
});
|
||||
}).then([&qp, start_token1] {
|
||||
return qp.execute_internal(sprint("select * from system.size_estimates where keyspace_name = 'ks' "
|
||||
"and table_name = 'cf1' and range_start >= '%s' and range_start <= '%s';", start_token1, start_token1)).then([](auto rs) {
|
||||
BOOST_REQUIRE_EQUAL(rs->size(), 1);
|
||||
});
|
||||
}).then([&qp, start_token1, start_token2] {
|
||||
return qp.execute_internal(sprint("select * from system.size_estimates where keyspace_name = 'ks' "
|
||||
"and table_name = 'cf1' and range_start >= '%s' and range_start <= '%s';", start_token1, start_token2)).then([](auto rs) {
|
||||
BOOST_REQUIRE_EQUAL(rs->size(), 3);
|
||||
});
|
||||
}).then([&qp, start_token1, start_token2] {
|
||||
return qp.execute_internal(sprint("select * from system.size_estimates where keyspace_name = 'ks' "
|
||||
"and table_name = 'cf1' and range_start > '%s' and range_start < '%s';", start_token1, start_token2)).then([](auto rs) {
|
||||
BOOST_REQUIRE_EQUAL(rs->size(), 1);
|
||||
});
|
||||
}).then([&qp, start_token1, start_token2] {
|
||||
return qp.execute_internal(sprint("select * from system.size_estimates where keyspace_name = 'ks' "
|
||||
"and table_name = 'cf1' and range_start in ('%s', '%s');", start_token1, start_token2)).then([](auto rs) {
|
||||
BOOST_REQUIRE_EQUAL(rs->size(), 2);
|
||||
});
|
||||
}).then([&qp, start_token1, start_token2] {
|
||||
return qp.execute_internal(sprint("select * from system.size_estimates where keyspace_name = 'ks' "
|
||||
"and table_name = 'cf1' and range_start > '%s' and range_start <= '%s';", start_token1, start_token2)).then([](auto rs) {
|
||||
BOOST_REQUIRE_EQUAL(rs->size(), 2);
|
||||
});
|
||||
}).then([&qp, start_token1, start_token2] {
|
||||
return qp.execute_internal(sprint("select * from system.size_estimates where keyspace_name = 'ks' "
|
||||
"and table_name = 'cf1' and range_start >= '%s' and range_start < '%s';", start_token1, start_token2)).then([](auto rs) {
|
||||
BOOST_REQUIRE_EQUAL(rs->size(), 2);
|
||||
});
|
||||
}).then([&qp, start_token1, end_token1] {
|
||||
return qp.execute_internal(sprint("select * from system.size_estimates where keyspace_name = 'ks' "
|
||||
"and table_name = 'cf1' and range_start = '%s' and range_end = '%s';", start_token1, end_token1)).then([](auto rs) {
|
||||
BOOST_REQUIRE_EQUAL(rs->size(), 1);
|
||||
});
|
||||
}).then([&qp, start_token1, end_token1] {
|
||||
return qp.execute_internal(sprint("select * from system.size_estimates where keyspace_name = 'ks' "
|
||||
"and table_name = 'cf1' and range_start = '%s' and range_end >= '%s';", start_token1, end_token1)).then([](auto rs) {
|
||||
BOOST_REQUIRE_EQUAL(rs->size(), 1);
|
||||
});
|
||||
}).then([&qp, start_token1, end_token1] {
|
||||
return qp.execute_internal(sprint("select * from system.size_estimates where keyspace_name = 'ks' "
|
||||
"and table_name = 'cf1' and range_start = '%s' and range_end > '%s';", start_token1, end_token1)).then([](auto rs) {
|
||||
BOOST_REQUIRE_EQUAL(rs->size(), 0);
|
||||
});
|
||||
}).then([&qp, start_token1, end_token1] {
|
||||
return qp.execute_internal(sprint("select * from system.size_estimates where keyspace_name = 'ks' "
|
||||
"and table_name = 'cf1' and range_start = '%s' and range_end <= '%s';", start_token1, end_token1)).then([](auto rs) {
|
||||
BOOST_REQUIRE_EQUAL(rs->size(), 1);
|
||||
});
|
||||
}).then([&qp, start_token1, end_token1] {
|
||||
return qp.execute_internal(sprint("select * from system.size_estimates where keyspace_name = 'ks' "
|
||||
"and table_name = 'cf1' and range_start = '%s' and range_end < '%s';", start_token1, end_token1)).then([](auto rs) {
|
||||
BOOST_REQUIRE_EQUAL(rs->size(), 0);
|
||||
});
|
||||
}).then([&qp, start_token1, end_token1] {
|
||||
return qp.execute_internal(sprint("select * from system.size_estimates where keyspace_name = 'ks' "
|
||||
"and table_name = 'cf1' and range_start = '%s' and range_end >= '%s' and range_end <= '%s';", start_token1, end_token1, end_token1)).then([](auto rs) {
|
||||
BOOST_REQUIRE_EQUAL(rs->size(), 1);
|
||||
});
|
||||
}).then([&qp, start_token1, end_token1] {
|
||||
return qp.execute_internal(sprint("select * from system.size_estimates where keyspace_name = 'ks' "
|
||||
"and table_name = 'cf1' and range_start = '%s' and range_end > '%s' and range_end < '%s';", start_token1, end_token1, end_token1)).then([](auto rs) {
|
||||
BOOST_REQUIRE_EQUAL(rs->size(), 0);
|
||||
});
|
||||
}).then([&qp, start_token1, end_token1] {
|
||||
return qp.execute_internal(sprint("select * from system.size_estimates where keyspace_name = 'ks' "
|
||||
"and (table_name, range_start, range_end) = ('cf1', '%s', '%s');", start_token1, end_token1)).then([](auto rs) {
|
||||
BOOST_REQUIRE_EQUAL(rs->size(), 1);
|
||||
});
|
||||
}).then([&qp, start_token1, end_token1] {
|
||||
return qp.execute_internal(sprint("select * from system.size_estimates where keyspace_name = 'ks' "
|
||||
"and (table_name, range_start, range_end) >= ('cf1', '%s', '%s') and (table_name) <= ('cf2');", start_token1, end_token1)).then([](auto rs) {
|
||||
BOOST_REQUIRE_EQUAL(rs->size(), 509);
|
||||
});
|
||||
}).then([&qp, start_token1, start_token2, end_token1] {
|
||||
return qp.execute_internal(sprint("select * from system.size_estimates where keyspace_name = 'ks' "
|
||||
"and (table_name, range_start, range_end) >= ('cf1', '%s', '%s') "
|
||||
"and (table_name, range_start) <= ('cf2', '%s');", start_token1, end_token1, start_token2)).then([](auto rs) {
|
||||
BOOST_REQUIRE_EQUAL(rs->size(), 259);
|
||||
});
|
||||
}).then([&qp, start_token1] {
|
||||
return qp.execute_internal(sprint("select * from system.size_estimates where keyspace_name = 'ks' "
|
||||
"and (table_name, range_start) < ('cf2', '%s');", start_token1)).then([](auto rs) {
|
||||
BOOST_REQUIRE_EQUAL(rs->size(), 259);
|
||||
});
|
||||
}).discard_result();
|
||||
|
||||
// Should not timeout.
|
||||
e.execute_cql("select * from system.size_estimates;").discard_result().get();
|
||||
|
||||
auto rs = e.execute_cql("select * from system.size_estimates where keyspace_name = 'ks';").get0();
|
||||
assert_that(rs).is_rows().with_size(0);
|
||||
|
||||
e.execute_cql("create table cf1(pk text PRIMARY KEY, v int);").discard_result().get();
|
||||
e.execute_cql("create table cf2(pk text PRIMARY KEY, v int);").discard_result().get();
|
||||
|
||||
rs = e.execute_cql("select * from system.size_estimates where keyspace_name = 'ks';").get0();
|
||||
assert_that(rs).is_rows().with_size(512);
|
||||
|
||||
rs = e.execute_cql("select * from system.size_estimates where keyspace_name = 'ks' limit 100;").get0();
|
||||
assert_that(rs).is_rows().with_size(100);
|
||||
|
||||
rs = e.execute_cql("select * from system.size_estimates where keyspace_name = 'ks' and table_name = 'cf1';").get0();
|
||||
assert_that(rs).is_rows().with_size(256);
|
||||
|
||||
rs = e.execute_cql("select * from system.size_estimates where keyspace_name = 'ks' and table_name > 'cf1';").get0();
|
||||
assert_that(rs).is_rows().with_size(256);
|
||||
|
||||
rs = e.execute_cql("select * from system.size_estimates where keyspace_name = 'ks' and table_name >= 'cf1';").get0();
|
||||
assert_that(rs).is_rows().with_size(512);
|
||||
|
||||
rs = e.execute_cql("select * from system.size_estimates where keyspace_name = 'ks' and table_name < 'cf2';").get0();
|
||||
assert_that(rs).is_rows().with_size(256);
|
||||
|
||||
rs = e.execute_cql("select * from system.size_estimates where keyspace_name = 'ks' and table_name <= 'cf2';").get0();
|
||||
assert_that(rs).is_rows().with_size(512);
|
||||
|
||||
rs = e.execute_cql("select * from system.size_estimates where keyspace_name = 'ks' and table_name in ('cf1', 'cf2');").get0();
|
||||
assert_that(rs).is_rows().with_size(512);
|
||||
|
||||
rs = e.execute_cql("select * from system.size_estimates where keyspace_name = 'ks' and table_name >= 'cf1' and table_name <= 'cf1';").get0();
|
||||
assert_that(rs).is_rows().with_size(256);
|
||||
|
||||
rs = e.execute_cql("select * from system.size_estimates where keyspace_name = 'ks' and table_name >= 'cf1' and table_name <= 'cf2';").get0();
|
||||
assert_that(rs).is_rows().with_size(512);
|
||||
|
||||
rs = e.execute_cql("select * from system.size_estimates where keyspace_name = 'ks' and table_name > 'cf1' and table_name < 'cf2';").get0();
|
||||
assert_that(rs).is_rows().with_size(0);
|
||||
|
||||
rs = e.execute_cql(sprint("select * from system.size_estimates where keyspace_name = 'ks' "
|
||||
"and table_name = 'cf1' and range_start = '%s';", start_token1)).get0();
|
||||
assert_that(rs).is_rows().with_size(1);
|
||||
|
||||
rs = e.execute_cql(sprint("select * from system.size_estimates where keyspace_name = 'ks' "
|
||||
"and table_name = 'cf1' and range_start >= '%s';", start_token1)).get0();
|
||||
assert_that(rs).is_rows().with_size(253);
|
||||
|
||||
rs = e.execute_cql(sprint("select * from system.size_estimates where keyspace_name = 'ks' "
|
||||
"and table_name = 'cf1' and range_start > '%s';", start_token1)).get0();
|
||||
assert_that(rs).is_rows().with_size(252);
|
||||
|
||||
rs = e.execute_cql(sprint("select * from system.size_estimates where keyspace_name = 'ks' "
|
||||
"and table_name = 'cf1' and range_start <= '%s';", start_token1)).get0();
|
||||
assert_that(rs).is_rows().with_size(4);
|
||||
|
||||
rs = e.execute_cql(sprint("select * from system.size_estimates where keyspace_name = 'ks' "
|
||||
"and table_name = 'cf1' and range_start < '%s';", start_token1)).get0();
|
||||
assert_that(rs).is_rows().with_size(3);
|
||||
|
||||
rs = e.execute_cql(sprint("select * from system.size_estimates where keyspace_name = 'ks' "
|
||||
"and table_name = 'cf1' and range_start >= '%s' and range_start <= '%s';", start_token1, start_token1)).get0();
|
||||
assert_that(rs).is_rows().with_size(1);
|
||||
|
||||
rs = e.execute_cql(sprint("select * from system.size_estimates where keyspace_name = 'ks' "
|
||||
"and table_name = 'cf1' and range_start >= '%s' and range_start <= '%s';", start_token1, start_token2)).get0();
|
||||
assert_that(rs).is_rows().with_size(3);
|
||||
|
||||
rs = e.execute_cql(sprint("select * from system.size_estimates where keyspace_name = 'ks' "
|
||||
"and table_name = 'cf1' and range_start > '%s' and range_start < '%s';", start_token1, start_token2)).get0();
|
||||
assert_that(rs).is_rows().with_size(1);
|
||||
|
||||
rs = e.execute_cql(sprint("select * from system.size_estimates where keyspace_name = 'ks' "
|
||||
"and table_name = 'cf1' and range_start in ('%s', '%s');", start_token1, start_token2)).get0();
|
||||
assert_that(rs).is_rows().with_size(2);
|
||||
|
||||
rs = e.execute_cql(sprint("select * from system.size_estimates where keyspace_name = 'ks' "
|
||||
"and table_name = 'cf1' and range_start > '%s' and range_start <= '%s';", start_token1, start_token2)).get0();
|
||||
assert_that(rs).is_rows().with_size(2);
|
||||
|
||||
rs = e.execute_cql(sprint("select * from system.size_estimates where keyspace_name = 'ks' "
|
||||
"and table_name = 'cf1' and range_start >= '%s' and range_start < '%s';", start_token1, start_token2)).get0();
|
||||
assert_that(rs).is_rows().with_size(2);
|
||||
|
||||
rs = e.execute_cql(sprint("select * from system.size_estimates where keyspace_name = 'ks' "
|
||||
"and table_name = 'cf1' and range_start = '%s' and range_end = '%s';", start_token1, end_token1)).get0();
|
||||
assert_that(rs).is_rows().with_size(1);
|
||||
|
||||
rs = e.execute_cql(sprint("select * from system.size_estimates where keyspace_name = 'ks' "
|
||||
"and table_name = 'cf1' and range_start = '%s' and range_end >= '%s';", start_token1, end_token1)).get0();
|
||||
assert_that(rs).is_rows().with_size(1);
|
||||
|
||||
rs = e.execute_cql(sprint("select * from system.size_estimates where keyspace_name = 'ks' "
|
||||
"and table_name = 'cf1' and range_start = '%s' and range_end > '%s';", start_token1, end_token1)).get0();
|
||||
assert_that(rs).is_rows().with_size(0);
|
||||
|
||||
rs = e.execute_cql(sprint("select * from system.size_estimates where keyspace_name = 'ks' "
|
||||
"and table_name = 'cf1' and range_start = '%s' and range_end <= '%s';", start_token1, end_token1)).get0();
|
||||
assert_that(rs).is_rows().with_size(1);
|
||||
|
||||
rs = e.execute_cql(sprint("select * from system.size_estimates where keyspace_name = 'ks' "
|
||||
"and table_name = 'cf1' and range_start = '%s' and range_end < '%s';", start_token1, end_token1)).get0();
|
||||
assert_that(rs).is_rows().with_size(0);
|
||||
|
||||
rs = e.execute_cql(sprint("select * from system.size_estimates where keyspace_name = 'ks' "
|
||||
"and table_name = 'cf1' and range_start = '%s' and range_end >= '%s' and range_end <= '%s';", start_token1, end_token1, end_token1)).get0();
|
||||
assert_that(rs).is_rows().with_size(1);
|
||||
|
||||
rs = e.execute_cql(sprint("select * from system.size_estimates where keyspace_name = 'ks' "
|
||||
"and table_name = 'cf1' and range_start = '%s' and range_end > '%s' and range_end < '%s';", start_token1, end_token1, end_token1)).get0();
|
||||
assert_that(rs).is_rows().with_size(0);
|
||||
|
||||
rs = e.execute_cql(sprint("select * from system.size_estimates where keyspace_name = 'ks' "
|
||||
"and (table_name, range_start, range_end) = ('cf1', '%s', '%s');", start_token1, end_token1)).get0();
|
||||
assert_that(rs).is_rows().with_size(1);
|
||||
|
||||
rs = e.execute_cql(sprint("select * from system.size_estimates where keyspace_name = 'ks' "
|
||||
"and (table_name, range_start, range_end) >= ('cf1', '%s', '%s') and (table_name) <= ('cf2');", start_token1, end_token1)).get0();
|
||||
assert_that(rs).is_rows().with_size(509);
|
||||
|
||||
rs = e.execute_cql(sprint("select * from system.size_estimates where keyspace_name = 'ks' "
|
||||
"and (table_name, range_start, range_end) >= ('cf1', '%s', '%s') "
|
||||
"and (table_name, range_start) <= ('cf2', '%s');", start_token1, end_token1, start_token2)).get0();
|
||||
assert_that(rs).is_rows().with_size(259);
|
||||
|
||||
rs = e.execute_cql(sprint("select * from system.size_estimates where keyspace_name = 'ks' "
|
||||
"and (table_name, range_start) < ('cf2', '%s');", start_token1)).get0();
|
||||
assert_that(rs).is_rows().with_size(259);
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
@@ -847,7 +847,6 @@ future<response_type> cql_server::connection::process_execute(uint16_t stream, b
|
||||
}
|
||||
auto& options = *q_state->options;
|
||||
auto skip_metadata = options.skip_metadata();
|
||||
options.prepare(prepared->bound_names);
|
||||
|
||||
tracing::set_page_size(client_state.get_trace_state(), options.get_page_size());
|
||||
tracing::set_consistency_level(client_state.get_trace_state(), options.get_consistency());
|
||||
@@ -864,6 +863,9 @@ future<response_type> cql_server::connection::process_execute(uint16_t stream, b
|
||||
tracing::trace(query_state.get_trace_state(), "Invalid amount of bind variables: expected {:d} received {:d}", stmt->get_bound_terms(), options.get_values_count());
|
||||
throw exceptions::invalid_request_exception("Invalid amount of bind variables");
|
||||
}
|
||||
|
||||
options.prepare(prepared->bound_names);
|
||||
|
||||
tracing::trace(query_state.get_trace_state(), "Processing a statement");
|
||||
return _server._query_processor.local().process_statement_prepared(std::move(prepared), std::move(cache_key), query_state, options, needs_authorization).then([this, stream, buf = std::move(buf), &query_state, skip_metadata] (auto msg) {
|
||||
tracing::trace(query_state.get_trace_state(), "Done processing - preparing a result");
|
||||
@@ -1471,7 +1473,7 @@ void cql_server::connection::read_name_and_value_list(bytes_view& buf, std::vect
|
||||
names.reserve(size);
|
||||
values.reserve(size);
|
||||
for (uint16_t i = 0; i < size; i++) {
|
||||
names.emplace_back(read_string(buf));
|
||||
names.emplace_back(read_string_view(buf));
|
||||
values.emplace_back(read_value_view(buf));
|
||||
}
|
||||
}
|
||||
|
||||
27
types.cc
27
types.cc
@@ -236,9 +236,6 @@ struct integer_type_impl : simple_type_impl<T> {
|
||||
return to_sstring(compose_value(b));
|
||||
}
|
||||
virtual sstring to_json_string(const bytes& b) const override {
|
||||
if (b.empty()) {
|
||||
return "null";
|
||||
}
|
||||
return to_sstring(compose_value(b));
|
||||
}
|
||||
virtual bytes from_json_object(const Json::Value& value, cql_serialization_format sf) const override {
|
||||
@@ -506,9 +503,6 @@ struct boolean_type_impl : public simple_type_impl<bool> {
|
||||
return boolean_to_string(*b.begin());
|
||||
}
|
||||
virtual sstring to_json_string(const bytes& b) const override {
|
||||
if (b.empty()) {
|
||||
return "null";
|
||||
}
|
||||
return to_string(b);
|
||||
}
|
||||
virtual bytes from_json_object(const Json::Value& value, cql_serialization_format sf) const override {
|
||||
@@ -1412,7 +1406,7 @@ struct floating_type_impl : public simple_type_impl<T> {
|
||||
virtual sstring to_json_string(const bytes& b) const override {
|
||||
auto v = deserialize(b);
|
||||
if (v.is_null()) {
|
||||
return "null";
|
||||
throw exceptions::invalid_request_exception("Cannot create JSON string - deserialization error");
|
||||
}
|
||||
T d = this->from_value(v);
|
||||
if (std::isnan(d) || std::isinf(d)) {
|
||||
@@ -1477,7 +1471,7 @@ public:
|
||||
}
|
||||
b.push_back(v);
|
||||
}
|
||||
std::copy(b.crbegin(), b.crend(), out);
|
||||
out = std::copy(b.crbegin(), b.crend(), out);
|
||||
}
|
||||
virtual size_t serialized_size(const void* value) const override {
|
||||
if (!value) {
|
||||
@@ -1540,12 +1534,15 @@ public:
|
||||
virtual sstring to_json_string(const bytes& b) const override {
|
||||
auto v = deserialize(b);
|
||||
if (v.is_null()) {
|
||||
return "null";
|
||||
throw exceptions::invalid_request_exception("Cannot create JSON string - deserialization error");
|
||||
}
|
||||
return from_value(v).get().str();
|
||||
return to_string(b);
|
||||
}
|
||||
virtual bytes from_json_object(const Json::Value& value, cql_serialization_format sf) const override {
|
||||
if (value.isString()) {
|
||||
return from_string(value.asString());
|
||||
}
|
||||
return from_string(json::to_sstring(value));
|
||||
}
|
||||
virtual bytes from_string(sstring_view text) const override {
|
||||
@@ -1648,13 +1645,15 @@ public:
|
||||
virtual sstring to_json_string(const bytes& b) const override {
|
||||
auto v = deserialize(b);
|
||||
if (v.is_null()) {
|
||||
return "null";
|
||||
throw exceptions::invalid_request_exception("Cannot create JSON string - deserialization error");
|
||||
}
|
||||
return from_value(v).get().to_string();
|
||||
}
|
||||
virtual bytes from_json_object(const Json::Value& value, cql_serialization_format sf) const override {
|
||||
if (!value.isNumeric()) {
|
||||
throw marshal_exception(sprint("%s must be represented as numeric in JSON", value.toStyledString()));
|
||||
if (value.isString()) {
|
||||
return from_string(value.asString());
|
||||
} else if (!value.isNumeric()) {
|
||||
throw marshal_exception(sprint("%s must be represented as numeric or string in JSON", value.toStyledString()));
|
||||
}
|
||||
|
||||
return from_string(json::to_sstring(value));
|
||||
@@ -1871,7 +1870,7 @@ public:
|
||||
virtual sstring to_json_string(const bytes& b) const override {
|
||||
auto v = deserialize(b);
|
||||
if (v.is_null()) {
|
||||
return "null";
|
||||
throw exceptions::invalid_request_exception("Cannot create JSON string - deserialization error");
|
||||
}
|
||||
return quote_json_string(to_string(b));
|
||||
}
|
||||
@@ -2166,7 +2165,7 @@ void write_collection_value(bytes::iterator& out, cql_serialization_format sf, b
|
||||
}
|
||||
|
||||
sstring abstract_type::quote_json_string(const sstring& s) {
|
||||
return Json::valueToQuotedString(s.c_str());
|
||||
return json::value_to_quoted_string(s);
|
||||
}
|
||||
|
||||
void write_collection_value(bytes::iterator& out, cql_serialization_format sf, data_type type, const data_value& value) {
|
||||
|
||||
3
types.hh
3
types.hh
@@ -551,6 +551,9 @@ public:
|
||||
virtual sstring to_string(const bytes& b) const = 0;
|
||||
virtual bytes from_string(sstring_view text) const = 0;
|
||||
virtual sstring to_json_string(const bytes& b) const = 0;
|
||||
sstring to_json_string(const bytes_opt& b) const {
|
||||
return b ? to_json_string(*b) : "null";
|
||||
}
|
||||
virtual bytes from_json_object(const Json::Value& value, cql_serialization_format sf) const = 0;
|
||||
virtual bool is_counter() const { return false; }
|
||||
virtual bool is_collection() const { return false; }
|
||||
|
||||
@@ -775,3 +775,7 @@ FUNC_START(__crc32_vpmsum)
|
||||
|
||||
FUNC_END(__crc32_vpmsum)
|
||||
#endif
|
||||
|
||||
// Mark the stack as non-executable so the final executable won't
|
||||
// have an executable stack
|
||||
.section .note.GNU-stack,"",@progbits
|
||||
|
||||
@@ -23,9 +23,12 @@
|
||||
|
||||
#include <chrono>
|
||||
#include <unordered_map>
|
||||
|
||||
#include <boost/intrusive/list.hpp>
|
||||
#include <boost/intrusive/unordered_set.hpp>
|
||||
#include <boost/intrusive/parent_from_member.hpp>
|
||||
#include <boost/range/adaptor/filtered.hpp>
|
||||
#include <boost/range/adaptor/transformed.hpp>
|
||||
|
||||
#include <seastar/core/reactor.hh>
|
||||
#include <seastar/core/timer.hh>
|
||||
@@ -33,6 +36,7 @@
|
||||
|
||||
#include "exceptions/exceptions.hh"
|
||||
#include "utils/loading_shared_values.hh"
|
||||
#include "utils/chunked_vector.hh"
|
||||
#include "log.hh"
|
||||
|
||||
namespace bi = boost::intrusive;
|
||||
@@ -40,7 +44,7 @@ namespace bi = boost::intrusive;
|
||||
namespace utils {
|
||||
|
||||
using loading_cache_clock_type = seastar::lowres_clock;
|
||||
using auto_unlink_list_hook = bi::list_base_hook<bi::link_mode<bi::auto_unlink>>;
|
||||
using safe_link_list_hook = bi::list_base_hook<bi::link_mode<bi::safe_link>>;
|
||||
|
||||
template<typename Tp, typename Key, typename EntrySize , typename Hash, typename EqualPred, typename LoadingSharedValuesStats>
|
||||
class timestamped_val {
|
||||
@@ -145,13 +149,13 @@ public:
|
||||
|
||||
/// \brief This is and LRU list entry which is also an anchor for a loading_cache value.
|
||||
template<typename Tp, typename Key, typename EntrySize , typename Hash, typename EqualPred, typename LoadingSharedValuesStats>
|
||||
class timestamped_val<Tp, Key, EntrySize, Hash, EqualPred, LoadingSharedValuesStats>::lru_entry : public auto_unlink_list_hook {
|
||||
class timestamped_val<Tp, Key, EntrySize, Hash, EqualPred, LoadingSharedValuesStats>::lru_entry : public safe_link_list_hook {
|
||||
private:
|
||||
using ts_value_type = timestamped_val<Tp, Key, EntrySize, Hash, EqualPred, LoadingSharedValuesStats>;
|
||||
using loading_values_type = typename ts_value_type::loading_values_type;
|
||||
|
||||
public:
|
||||
using lru_list_type = bi::list<lru_entry, bi::constant_time_size<false>>;
|
||||
using lru_list_type = bi::list<lru_entry>;
|
||||
using timestamped_val_ptr = typename loading_values_type::entry_ptr;
|
||||
|
||||
private:
|
||||
@@ -170,6 +174,9 @@ public:
|
||||
}
|
||||
|
||||
~lru_entry() {
|
||||
if (safe_link_list_hook::is_linked()) {
|
||||
_lru_list.erase(_lru_list.iterator_to(*this));
|
||||
}
|
||||
_cache_size -= _ts_val_ptr->size();
|
||||
_ts_val_ptr->set_anchor_back_reference(nullptr);
|
||||
}
|
||||
@@ -181,7 +188,9 @@ public:
|
||||
/// Set this item as the most recently used item.
|
||||
/// The MRU item is going to be at the front of the _lru_list, the LRU item - at the back.
|
||||
void touch() noexcept {
|
||||
auto_unlink_list_hook::unlink();
|
||||
if (safe_link_list_hook::is_linked()) {
|
||||
_lru_list.erase(_lru_list.iterator_to(*this));
|
||||
}
|
||||
_lru_list.push_front(*this);
|
||||
}
|
||||
|
||||
@@ -250,9 +259,10 @@ private:
|
||||
using ts_value_lru_entry = typename ts_value_type::lru_entry;
|
||||
using set_iterator = typename loading_values_type::iterator;
|
||||
using lru_list_type = typename ts_value_lru_entry::lru_list_type;
|
||||
using list_iterator = typename lru_list_type::iterator;
|
||||
struct value_extractor_fn {
|
||||
Tp& operator()(ts_value_type& tv) const {
|
||||
return tv.value();
|
||||
Tp& operator()(ts_value_lru_entry& le) const {
|
||||
return le.timestamped_value().value();
|
||||
}
|
||||
};
|
||||
|
||||
@@ -262,7 +272,7 @@ public:
|
||||
using value_ptr = typename ts_value_type::value_ptr;
|
||||
|
||||
class entry_is_too_big : public std::exception {};
|
||||
using iterator = boost::transform_iterator<value_extractor_fn, set_iterator>;
|
||||
using iterator = boost::transform_iterator<value_extractor_fn, list_iterator>;
|
||||
|
||||
private:
|
||||
loading_cache(size_t max_size, std::chrono::milliseconds expiry, std::chrono::milliseconds refresh, logging::logger& logger)
|
||||
@@ -376,19 +386,19 @@ public:
|
||||
|
||||
template<typename KeyType, typename KeyHasher, typename KeyEqual>
|
||||
iterator find(const KeyType& key, KeyHasher key_hasher_func, KeyEqual key_equal_func) noexcept {
|
||||
return boost::make_transform_iterator(set_find(key, std::move(key_hasher_func), std::move(key_equal_func)), _value_extractor_fn);
|
||||
return boost::make_transform_iterator(to_list_iterator(set_find(key, std::move(key_hasher_func), std::move(key_equal_func))), _value_extractor_fn);
|
||||
};
|
||||
|
||||
iterator find(const Key& k) noexcept {
|
||||
return boost::make_transform_iterator(set_find(k), _value_extractor_fn);
|
||||
return boost::make_transform_iterator(to_list_iterator(set_find(k)), _value_extractor_fn);
|
||||
}
|
||||
|
||||
iterator end() {
|
||||
return boost::make_transform_iterator(_loading_values.end(), _value_extractor_fn);
|
||||
return boost::make_transform_iterator(list_end(), _value_extractor_fn);
|
||||
}
|
||||
|
||||
iterator begin() {
|
||||
return boost::make_transform_iterator(_loading_values.begin(), _value_extractor_fn);
|
||||
return boost::make_transform_iterator(list_begin(), _value_extractor_fn);
|
||||
}
|
||||
|
||||
template <typename Pred>
|
||||
@@ -421,7 +431,7 @@ public:
|
||||
}
|
||||
|
||||
size_t size() const {
|
||||
return _loading_values.size();
|
||||
return _lru_list.size();
|
||||
}
|
||||
|
||||
/// \brief returns the memory size the currently cached entries occupy according to the EntrySize predicate.
|
||||
@@ -430,6 +440,15 @@ public:
|
||||
}
|
||||
|
||||
private:
|
||||
/// Should only be called on values for which the following holds: set_it == set_end() || set_it->ready()
|
||||
/// For instance this always holds for iterators returned by set_find(...).
|
||||
list_iterator to_list_iterator(set_iterator set_it) {
|
||||
if (set_it != set_end()) {
|
||||
return _lru_list.iterator_to(*set_it->lru_entry_ptr());
|
||||
}
|
||||
return list_end();
|
||||
}
|
||||
|
||||
set_iterator ready_entry_iterator(set_iterator it) {
|
||||
set_iterator end_it = set_end();
|
||||
|
||||
@@ -458,6 +477,14 @@ private:
|
||||
return _loading_values.begin();
|
||||
}
|
||||
|
||||
list_iterator list_end() noexcept {
|
||||
return _lru_list.end();
|
||||
}
|
||||
|
||||
list_iterator list_begin() noexcept {
|
||||
return _lru_list.begin();
|
||||
}
|
||||
|
||||
bool caching_enabled() const {
|
||||
return _expiry != std::chrono::milliseconds(0);
|
||||
}
|
||||
@@ -467,11 +494,19 @@ private:
|
||||
Alloc().deallocate(val, 1);
|
||||
}
|
||||
|
||||
future<> reload(ts_value_lru_entry& lru_entry) {
|
||||
return _load(lru_entry.key()).then_wrapped([this, key = lru_entry.key()] (auto&& f) mutable {
|
||||
future<> reload(timestamped_val_ptr ts_value_ptr) {
|
||||
const Key& key = loading_values_type::to_key(ts_value_ptr);
|
||||
|
||||
// Do nothing if the entry has been dropped before we got here (e.g. by the _load() call on another key that is
|
||||
// also being reloaded).
|
||||
if (!ts_value_ptr->lru_entry_ptr()) {
|
||||
_logger.trace("{}: entry was dropped before the reload", key);
|
||||
return make_ready_future<>();
|
||||
}
|
||||
|
||||
return _load(key).then_wrapped([this, ts_value_ptr = std::move(ts_value_ptr), &key] (auto&& f) mutable {
|
||||
// if the entry has been evicted by now - simply end here
|
||||
set_iterator it = this->set_find(key);
|
||||
if (it == this->set_end()) {
|
||||
if (!ts_value_ptr->lru_entry_ptr()) {
|
||||
_logger.trace("{}: entry was dropped during the reload", key);
|
||||
return make_ready_future<>();
|
||||
}
|
||||
@@ -483,7 +518,7 @@ private:
|
||||
// will be propagated up to the user and will fail the
|
||||
// corresponding query.
|
||||
try {
|
||||
*it = f.get0();
|
||||
*ts_value_ptr = f.get0();
|
||||
} catch (std::exception& e) {
|
||||
_logger.debug("{}: reload failed: {}", key, e.what());
|
||||
} catch (...) {
|
||||
@@ -540,21 +575,25 @@ private:
|
||||
// check if rehashing is needed and do it if it is.
|
||||
periodic_rehash();
|
||||
|
||||
if (ReloadEnabled == loading_cache_reload_enabled::no) {
|
||||
if constexpr (ReloadEnabled == loading_cache_reload_enabled::no) {
|
||||
_logger.trace("on_timer(): rearming");
|
||||
_timer.arm(loading_cache_clock_type::now() + _timer_period);
|
||||
return;
|
||||
}
|
||||
|
||||
// Reload all those which vlaue needs to be reloaded.
|
||||
// Reload all those which value needs to be reloaded.
|
||||
with_gate(_timer_reads_gate, [this] {
|
||||
return parallel_for_each(_lru_list.begin(), _lru_list.end(), [this] (ts_value_lru_entry& lru_entry) {
|
||||
_logger.trace("on_timer(): {}: checking the value age", lru_entry.key());
|
||||
if (lru_entry.timestamped_value().loaded() + _refresh < loading_cache_clock_type::now()) {
|
||||
_logger.trace("on_timer(): {}: reloading the value", lru_entry.key());
|
||||
return this->reload(lru_entry);
|
||||
}
|
||||
return now();
|
||||
auto to_reload = boost::copy_range<utils::chunked_vector<timestamped_val_ptr>>(_lru_list
|
||||
| boost::adaptors::filtered([this] (ts_value_lru_entry& lru_entry) {
|
||||
return lru_entry.timestamped_value().loaded() + _refresh < loading_cache_clock_type::now();
|
||||
})
|
||||
| boost::adaptors::transformed([] (ts_value_lru_entry& lru_entry) {
|
||||
return lru_entry.timestamped_value_ptr();
|
||||
}));
|
||||
|
||||
return parallel_for_each(std::move(to_reload), [this] (timestamped_val_ptr ts_value_ptr) {
|
||||
_logger.trace("on_timer(): {}: reloading the value", loading_values_type::to_key(ts_value_ptr));
|
||||
return this->reload(std::move(ts_value_ptr));
|
||||
}).finally([this] {
|
||||
_logger.trace("on_timer(): rearming");
|
||||
_timer.arm(loading_cache_clock_type::now() + _timer_period);
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user