Compare commits
173 Commits
mv
...
branch-2.3
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
a0b9fcc041 | ||
|
|
35c9b675c1 | ||
|
|
d71836fef7 | ||
|
|
f8e150e97c | ||
|
|
10c300f894 | ||
|
|
de1d3e5c6b | ||
|
|
69810c13ca | ||
|
|
9b025a5742 | ||
|
|
74eebc4cab | ||
|
|
9b2ca4ee44 | ||
|
|
773bf45774 | ||
|
|
c6705b4335 | ||
|
|
3997871b4d | ||
|
|
4ff1d731bd | ||
|
|
0e0f9143c9 | ||
|
|
9d809d6ea4 | ||
|
|
630d599c34 | ||
|
|
0933c1a00a | ||
|
|
7a7099fcfb | ||
|
|
50235aacb4 | ||
|
|
e888009f12 | ||
|
|
a19615ee9b | ||
|
|
357ca67fda | ||
|
|
7818c63eb1 | ||
|
|
da10eae18c | ||
|
|
d5292cd3ec | ||
|
|
9cb35361d9 | ||
|
|
3e285248be | ||
|
|
6f10ccb441 | ||
|
|
df420499bc | ||
|
|
d29527b4e1 | ||
|
|
8a90e242e4 | ||
|
|
8a78c0aba9 | ||
|
|
8a2bbcf138 | ||
|
|
22c891e6df | ||
|
|
1841d0c2d9 | ||
|
|
e10107fe5a | ||
|
|
0b3a4679db | ||
|
|
ba60d666a9 | ||
|
|
6ea4d0b75c | ||
|
|
8c5911f312 | ||
|
|
de00d7f5a1 | ||
|
|
e5f9dae4bb | ||
|
|
e13e796290 | ||
|
|
336c771663 | ||
|
|
82968afc25 | ||
|
|
383dcffb53 | ||
|
|
0c2abc007c | ||
|
|
1498c4f150 | ||
|
|
f388992a94 | ||
|
|
310540c11f | ||
|
|
7d833023cc | ||
|
|
d94ac196e0 | ||
|
|
1d7430995e | ||
|
|
b662a7f8a4 | ||
|
|
447ad72882 | ||
|
|
b8485d3bce | ||
|
|
034b0f50db | ||
|
|
12ec0becf3 | ||
|
|
666b19552d | ||
|
|
178f870a03 | ||
|
|
1b18f16dc1 | ||
|
|
28934575e4 | ||
|
|
182cbeefb0 | ||
|
|
b70fc41a90 | ||
|
|
debfc795b2 | ||
|
|
0d094575ec | ||
|
|
20baef69a9 | ||
|
|
1bac88601d | ||
|
|
e581fd1463 | ||
|
|
b366bff998 | ||
|
|
38e6984ba5 | ||
|
|
332f76579e | ||
|
|
315a03cf6c | ||
|
|
1847dc7a6a | ||
|
|
dd11b5987e | ||
|
|
a134e8699a | ||
|
|
bd7dcbb8d2 | ||
|
|
74e61528a6 | ||
|
|
5eb4fde2d5 | ||
|
|
cc0703f8ca | ||
|
|
678283a5bb | ||
|
|
552c0d7641 | ||
|
|
860c06660b | ||
|
|
db733ba075 | ||
|
|
88677d39c8 | ||
|
|
d767dee5ec | ||
|
|
702f6ee1b7 | ||
|
|
473b9aec65 | ||
|
|
b548061257 | ||
|
|
01165a9ae7 | ||
|
|
5cdb963768 | ||
|
|
7c9b9a4e24 | ||
|
|
f475c65ae6 | ||
|
|
687372bc48 | ||
|
|
65c140121c | ||
|
|
ed68ad220f | ||
|
|
35f4b8fbbe | ||
|
|
48012fe418 | ||
|
|
c862ccda91 | ||
|
|
83b1057c4b | ||
|
|
c1cb779dd2 | ||
|
|
b47d18f9fd | ||
|
|
f8713b019e | ||
|
|
cd5e4eace5 | ||
|
|
4fb5403670 | ||
|
|
e9df6c42ce | ||
|
|
5fdf492ccc | ||
|
|
fd2b02a12c | ||
|
|
f8cec2f891 | ||
|
|
e4d6577ef2 | ||
|
|
346027248d | ||
|
|
2cf6191353 | ||
|
|
b52d647de2 | ||
|
|
f7c96a37f1 | ||
|
|
ae71ffdcfd | ||
|
|
a235900388 | ||
|
|
be9f150341 | ||
|
|
2478fa1f6e | ||
|
|
d95ac1826e | ||
|
|
6fc17345e9 | ||
|
|
4bfa0ae247 | ||
|
|
174b7870e6 | ||
|
|
e95b4ee825 | ||
|
|
464305de1c | ||
|
|
3a1a9e1a11 | ||
|
|
90dac5d944 | ||
|
|
e5a83d105c | ||
|
|
9b4a0a2879 | ||
|
|
adad12ddc3 | ||
|
|
a77bb1fe34 | ||
|
|
3c7e6dfdb9 | ||
|
|
fab136ae1d | ||
|
|
a4218f536b | ||
|
|
9f4431ef04 | ||
|
|
66250bf8cc | ||
|
|
88fe3c2694 | ||
|
|
db4c3d3e52 | ||
|
|
ca22a1cd1a | ||
|
|
f9b702764e | ||
|
|
54701bd95c | ||
|
|
30eca5f534 | ||
|
|
cd057d3882 | ||
|
|
c5a5a2265e | ||
|
|
3e482c6c9d | ||
|
|
5b6cadb890 | ||
|
|
9cf8cd6c02 | ||
|
|
b34567b69b | ||
|
|
02b763ed97 | ||
|
|
05500a52d7 | ||
|
|
4afa558e97 | ||
|
|
f3956421f7 | ||
|
|
a17a6ce8f5 | ||
|
|
58a362c1f2 | ||
|
|
361b2dd7a5 | ||
|
|
f6a2bafae2 | ||
|
|
2ec25a55cd | ||
|
|
d3fb7c5515 | ||
|
|
b1ac6a36f2 | ||
|
|
8cba125bce | ||
|
|
f46f9f7533 | ||
|
|
090d991f8e | ||
|
|
ae15a80d01 | ||
|
|
6cf902343a | ||
|
|
d5e59f671c | ||
|
|
38944655c5 | ||
|
|
06e274ff34 | ||
|
|
c24d4a8acb | ||
|
|
5f95b76c65 | ||
|
|
0bdb7e1e7c | ||
|
|
56ea4f3154 | ||
|
|
d9c178063c | ||
|
|
b21b7f73b9 |
2
.gitmodules
vendored
2
.gitmodules
vendored
@@ -1,6 +1,6 @@
|
||||
[submodule "seastar"]
|
||||
path = seastar
|
||||
url = ../seastar
|
||||
url = ../scylla-seastar
|
||||
ignore = dirty
|
||||
[submodule "swagger-ui"]
|
||||
path = swagger-ui
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
#!/bin/sh
|
||||
|
||||
VERSION=666.development
|
||||
VERSION=2.3.6
|
||||
|
||||
if test -f version
|
||||
then
|
||||
|
||||
@@ -2228,11 +2228,11 @@
|
||||
"description":"The column family"
|
||||
},
|
||||
"total":{
|
||||
"type":"int",
|
||||
"type":"long",
|
||||
"description":"The total snapshot size"
|
||||
},
|
||||
"live":{
|
||||
"type":"int",
|
||||
"type":"long",
|
||||
"description":"The live snapshot size"
|
||||
}
|
||||
}
|
||||
|
||||
@@ -47,14 +47,6 @@ atomic_cell atomic_cell::make_live(const abstract_type& type, api::timestamp_typ
|
||||
);
|
||||
}
|
||||
|
||||
atomic_cell atomic_cell::make_live(const abstract_type& type, api::timestamp_type timestamp, ser::buffer_view<bytes_ostream::fragment_iterator> value, atomic_cell::collection_member cm) {
|
||||
auto& imr_data = type.imr_state();
|
||||
return atomic_cell(
|
||||
imr_data.type_info(),
|
||||
imr_object_type::make(data::cell::make_live(imr_data.type_info(), timestamp, value, bool(cm)), &imr_data.lsa_migrator())
|
||||
);
|
||||
}
|
||||
|
||||
atomic_cell atomic_cell::make_live(const abstract_type& type, api::timestamp_type timestamp, bytes_view value,
|
||||
gc_clock::time_point expiry, gc_clock::duration ttl, atomic_cell::collection_member cm) {
|
||||
auto& imr_data = type.imr_state();
|
||||
@@ -64,15 +56,6 @@ atomic_cell atomic_cell::make_live(const abstract_type& type, api::timestamp_typ
|
||||
);
|
||||
}
|
||||
|
||||
atomic_cell atomic_cell::make_live(const abstract_type& type, api::timestamp_type timestamp, ser::buffer_view<bytes_ostream::fragment_iterator> value,
|
||||
gc_clock::time_point expiry, gc_clock::duration ttl, atomic_cell::collection_member cm) {
|
||||
auto& imr_data = type.imr_state();
|
||||
return atomic_cell(
|
||||
imr_data.type_info(),
|
||||
imr_object_type::make(data::cell::make_live(imr_data.type_info(), timestamp, value, expiry, ttl, bool(cm)), &imr_data.lsa_migrator())
|
||||
);
|
||||
}
|
||||
|
||||
atomic_cell atomic_cell::make_live_counter_update(api::timestamp_type timestamp, int64_t value) {
|
||||
auto& imr_data = no_type_imr_descriptor();
|
||||
return atomic_cell(
|
||||
|
||||
@@ -34,8 +34,6 @@
|
||||
#include "data/schema_info.hh"
|
||||
#include "imr/utils.hh"
|
||||
|
||||
#include "serializer.hh"
|
||||
|
||||
class abstract_type;
|
||||
class collection_type_impl;
|
||||
|
||||
@@ -188,8 +186,6 @@ public:
|
||||
static atomic_cell make_dead(api::timestamp_type timestamp, gc_clock::time_point deletion_time);
|
||||
static atomic_cell make_live(const abstract_type& type, api::timestamp_type timestamp, bytes_view value,
|
||||
collection_member = collection_member::no);
|
||||
static atomic_cell make_live(const abstract_type& type, api::timestamp_type timestamp, ser::buffer_view<bytes_ostream::fragment_iterator> value,
|
||||
collection_member = collection_member::no);
|
||||
static atomic_cell make_live(const abstract_type& type, api::timestamp_type timestamp, const bytes& value,
|
||||
collection_member cm = collection_member::no) {
|
||||
return make_live(type, timestamp, bytes_view(value), cm);
|
||||
@@ -197,8 +193,6 @@ public:
|
||||
static atomic_cell make_live_counter_update(api::timestamp_type timestamp, int64_t value);
|
||||
static atomic_cell make_live(const abstract_type&, api::timestamp_type timestamp, bytes_view value,
|
||||
gc_clock::time_point expiry, gc_clock::duration ttl, collection_member = collection_member::no);
|
||||
static atomic_cell make_live(const abstract_type&, api::timestamp_type timestamp, ser::buffer_view<bytes_ostream::fragment_iterator> value,
|
||||
gc_clock::time_point expiry, gc_clock::duration ttl, collection_member = collection_member::no);
|
||||
static atomic_cell make_live(const abstract_type& type, api::timestamp_type timestamp, const bytes& value,
|
||||
gc_clock::time_point expiry, gc_clock::duration ttl, collection_member cm = collection_member::no)
|
||||
{
|
||||
|
||||
@@ -28,6 +28,7 @@
|
||||
#include "database.hh"
|
||||
#include "schema_builder.hh"
|
||||
#include "service/migration_manager.hh"
|
||||
#include "timeout_config.hh"
|
||||
|
||||
namespace auth {
|
||||
|
||||
@@ -94,4 +95,10 @@ future<> wait_for_schema_agreement(::service::migration_manager& mm, const datab
|
||||
});
|
||||
}
|
||||
|
||||
const timeout_config& internal_distributed_timeout_config() noexcept {
|
||||
static const auto t = 5s;
|
||||
static const timeout_config tc{t, t, t, t, t, t, t};
|
||||
return tc;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -38,6 +38,7 @@
|
||||
using namespace std::chrono_literals;
|
||||
|
||||
class database;
|
||||
class timeout_config;
|
||||
|
||||
namespace service {
|
||||
class migration_manager;
|
||||
@@ -82,4 +83,9 @@ future<> create_metadata_table_if_missing(
|
||||
|
||||
future<> wait_for_schema_agreement(::service::migration_manager&, const database&);
|
||||
|
||||
///
|
||||
/// Time-outs for internal, non-local CQL queries.
|
||||
///
|
||||
const timeout_config& internal_distributed_timeout_config() noexcept;
|
||||
|
||||
}
|
||||
|
||||
@@ -228,7 +228,7 @@ default_authorizer::modify(
|
||||
return _qp.process(
|
||||
query,
|
||||
db::consistency_level::ONE,
|
||||
infinite_timeout_config,
|
||||
internal_distributed_timeout_config(),
|
||||
{permissions::to_strings(set), sstring(role_name), resource.name()}).discard_result();
|
||||
});
|
||||
}
|
||||
@@ -254,7 +254,7 @@ future<std::vector<permission_details>> default_authorizer::list_all() const {
|
||||
return _qp.process(
|
||||
query,
|
||||
db::consistency_level::ONE,
|
||||
infinite_timeout_config,
|
||||
internal_distributed_timeout_config(),
|
||||
{},
|
||||
true).then([](::shared_ptr<cql3::untyped_result_set> results) {
|
||||
std::vector<permission_details> all_details;
|
||||
@@ -282,7 +282,7 @@ future<> default_authorizer::revoke_all(stdx::string_view role_name) const {
|
||||
return _qp.process(
|
||||
query,
|
||||
db::consistency_level::ONE,
|
||||
infinite_timeout_config,
|
||||
internal_distributed_timeout_config(),
|
||||
{sstring(role_name)}).discard_result().handle_exception([role_name](auto ep) {
|
||||
try {
|
||||
std::rethrow_exception(ep);
|
||||
|
||||
@@ -149,7 +149,9 @@ static sstring gensalt() {
|
||||
// blowfish 2011 fix, blowfish, sha512, sha256, md5
|
||||
for (sstring pfx : { "$2y$", "$2a$", "$6$", "$5$", "$1$" }) {
|
||||
salt = pfx + input;
|
||||
if (crypt_r("fisk", salt.c_str(), &tlcrypt)) {
|
||||
const char* e = crypt_r("fisk", salt.c_str(), &tlcrypt);
|
||||
|
||||
if (e && (e[0] != '*')) {
|
||||
prefix = pfx;
|
||||
return salt;
|
||||
}
|
||||
@@ -184,7 +186,7 @@ future<> password_authenticator::migrate_legacy_metadata() const {
|
||||
return _qp.process(
|
||||
query,
|
||||
db::consistency_level::QUORUM,
|
||||
infinite_timeout_config).then([this](::shared_ptr<cql3::untyped_result_set> results) {
|
||||
internal_distributed_timeout_config()).then([this](::shared_ptr<cql3::untyped_result_set> results) {
|
||||
return do_for_each(*results, [this](const cql3::untyped_result_set_row& row) {
|
||||
auto username = row.get_as<sstring>("username");
|
||||
auto salted_hash = row.get_as<sstring>(SALTED_HASH);
|
||||
@@ -192,7 +194,7 @@ future<> password_authenticator::migrate_legacy_metadata() const {
|
||||
return _qp.process(
|
||||
update_row_query,
|
||||
consistency_for_user(username),
|
||||
infinite_timeout_config,
|
||||
internal_distributed_timeout_config(),
|
||||
{std::move(salted_hash), username}).discard_result();
|
||||
}).finally([results] {});
|
||||
}).then([] {
|
||||
@@ -209,7 +211,7 @@ future<> password_authenticator::create_default_if_missing() const {
|
||||
return _qp.process(
|
||||
update_row_query,
|
||||
db::consistency_level::QUORUM,
|
||||
infinite_timeout_config,
|
||||
internal_distributed_timeout_config(),
|
||||
{hashpw(DEFAULT_USER_PASSWORD), DEFAULT_USER_NAME}).then([](auto&&) {
|
||||
plogger.info("Created default superuser authentication record.");
|
||||
});
|
||||
@@ -309,13 +311,17 @@ future<authenticated_user> password_authenticator::authenticate(
|
||||
return _qp.process(
|
||||
query,
|
||||
consistency_for_user(username),
|
||||
infinite_timeout_config,
|
||||
internal_distributed_timeout_config(),
|
||||
{username},
|
||||
true);
|
||||
}).then_wrapped([=](future<::shared_ptr<cql3::untyped_result_set>> f) {
|
||||
try {
|
||||
auto res = f.get0();
|
||||
if (res->empty() || !checkpw(password, res->one().get_as<sstring>(SALTED_HASH))) {
|
||||
auto salted_hash = std::experimental::optional<sstring>();
|
||||
if (!res->empty()) {
|
||||
salted_hash = res->one().get_opt<sstring>(SALTED_HASH);
|
||||
}
|
||||
if (!salted_hash || !checkpw(password, *salted_hash)) {
|
||||
throw exceptions::authentication_exception("Username and/or password are incorrect");
|
||||
}
|
||||
return make_ready_future<authenticated_user>(username);
|
||||
@@ -337,7 +343,7 @@ future<> password_authenticator::create(stdx::string_view role_name, const authe
|
||||
return _qp.process(
|
||||
update_row_query,
|
||||
consistency_for_user(role_name),
|
||||
infinite_timeout_config,
|
||||
internal_distributed_timeout_config(),
|
||||
{hashpw(*options.password), sstring(role_name)}).discard_result();
|
||||
}
|
||||
|
||||
@@ -355,7 +361,7 @@ future<> password_authenticator::alter(stdx::string_view role_name, const authen
|
||||
return _qp.process(
|
||||
query,
|
||||
consistency_for_user(role_name),
|
||||
infinite_timeout_config,
|
||||
internal_distributed_timeout_config(),
|
||||
{hashpw(*options.password), sstring(role_name)}).discard_result();
|
||||
}
|
||||
|
||||
@@ -366,7 +372,10 @@ future<> password_authenticator::drop(stdx::string_view name) const {
|
||||
meta::roles_table::qualified_name(),
|
||||
meta::roles_table::role_col_name);
|
||||
|
||||
return _qp.process(query, consistency_for_user(name), infinite_timeout_config, {sstring(name)}).discard_result();
|
||||
return _qp.process(
|
||||
query, consistency_for_user(name),
|
||||
internal_distributed_timeout_config(),
|
||||
{sstring(name)}).discard_result();
|
||||
}
|
||||
|
||||
future<custom_options> password_authenticator::query_custom_options(stdx::string_view role_name) const {
|
||||
|
||||
@@ -79,7 +79,7 @@ future<bool> default_role_row_satisfies(
|
||||
return qp.process(
|
||||
query,
|
||||
db::consistency_level::QUORUM,
|
||||
infinite_timeout_config,
|
||||
internal_distributed_timeout_config(),
|
||||
{meta::DEFAULT_SUPERUSER_NAME},
|
||||
true).then([&p](::shared_ptr<cql3::untyped_result_set> results) {
|
||||
if (results->empty()) {
|
||||
@@ -104,7 +104,7 @@ future<bool> any_nondefault_role_row_satisfies(
|
||||
return qp.process(
|
||||
query,
|
||||
db::consistency_level::QUORUM,
|
||||
infinite_timeout_config).then([&p](::shared_ptr<cql3::untyped_result_set> results) {
|
||||
internal_distributed_timeout_config()).then([&p](::shared_ptr<cql3::untyped_result_set> results) {
|
||||
if (results->empty()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -196,6 +196,10 @@ future<> service::start() {
|
||||
}
|
||||
|
||||
future<> service::stop() {
|
||||
// Only one of the shards has the listener registered, but let's try to
|
||||
// unregister on each one just to make sure.
|
||||
_migration_manager.unregister_listener(_migration_listener.get());
|
||||
|
||||
return _permissions_cache->stop().then([this] {
|
||||
return when_all_succeed(_role_manager->stop(), _authorizer->stop(), _authenticator->stop());
|
||||
});
|
||||
|
||||
@@ -89,7 +89,7 @@ static future<stdx::optional<record>> find_record(cql3::query_processor& qp, std
|
||||
return qp.process(
|
||||
query,
|
||||
consistency_for_role(role_name),
|
||||
infinite_timeout_config,
|
||||
internal_distributed_timeout_config(),
|
||||
{sstring(role_name)},
|
||||
true).then([](::shared_ptr<cql3::untyped_result_set> results) {
|
||||
if (results->empty()) {
|
||||
@@ -174,7 +174,7 @@ future<> standard_role_manager::create_default_role_if_missing() const {
|
||||
return _qp.process(
|
||||
query,
|
||||
db::consistency_level::QUORUM,
|
||||
infinite_timeout_config,
|
||||
internal_distributed_timeout_config(),
|
||||
{meta::DEFAULT_SUPERUSER_NAME}).then([](auto&&) {
|
||||
log.info("Created default superuser role '{}'.", meta::DEFAULT_SUPERUSER_NAME);
|
||||
return make_ready_future<>();
|
||||
@@ -201,7 +201,7 @@ future<> standard_role_manager::migrate_legacy_metadata() const {
|
||||
return _qp.process(
|
||||
query,
|
||||
db::consistency_level::QUORUM,
|
||||
infinite_timeout_config).then([this](::shared_ptr<cql3::untyped_result_set> results) {
|
||||
internal_distributed_timeout_config()).then([this](::shared_ptr<cql3::untyped_result_set> results) {
|
||||
return do_for_each(*results, [this](const cql3::untyped_result_set_row& row) {
|
||||
role_config config;
|
||||
config.is_superuser = row.get_as<bool>("super");
|
||||
@@ -263,7 +263,7 @@ future<> standard_role_manager::create_or_replace(stdx::string_view role_name, c
|
||||
return _qp.process(
|
||||
query,
|
||||
consistency_for_role(role_name),
|
||||
infinite_timeout_config,
|
||||
internal_distributed_timeout_config(),
|
||||
{sstring(role_name), c.is_superuser, c.can_login},
|
||||
true).discard_result();
|
||||
}
|
||||
@@ -307,7 +307,7 @@ standard_role_manager::alter(stdx::string_view role_name, const role_config_upda
|
||||
build_column_assignments(u),
|
||||
meta::roles_table::role_col_name),
|
||||
consistency_for_role(role_name),
|
||||
infinite_timeout_config,
|
||||
internal_distributed_timeout_config(),
|
||||
{sstring(role_name)}).discard_result();
|
||||
});
|
||||
}
|
||||
@@ -327,7 +327,7 @@ future<> standard_role_manager::drop(stdx::string_view role_name) const {
|
||||
return _qp.process(
|
||||
query,
|
||||
consistency_for_role(role_name),
|
||||
infinite_timeout_config,
|
||||
internal_distributed_timeout_config(),
|
||||
{sstring(role_name)}).then([this, role_name](::shared_ptr<cql3::untyped_result_set> members) {
|
||||
return parallel_for_each(
|
||||
members->begin(),
|
||||
@@ -367,7 +367,7 @@ future<> standard_role_manager::drop(stdx::string_view role_name) const {
|
||||
return _qp.process(
|
||||
query,
|
||||
consistency_for_role(role_name),
|
||||
infinite_timeout_config,
|
||||
internal_distributed_timeout_config(),
|
||||
{sstring(role_name)}).discard_result();
|
||||
};
|
||||
|
||||
@@ -394,7 +394,7 @@ standard_role_manager::modify_membership(
|
||||
return _qp.process(
|
||||
query,
|
||||
consistency_for_role(grantee_name),
|
||||
infinite_timeout_config,
|
||||
internal_distributed_timeout_config(),
|
||||
{role_set{sstring(role_name)}, sstring(grantee_name)}).discard_result();
|
||||
};
|
||||
|
||||
@@ -406,7 +406,7 @@ standard_role_manager::modify_membership(
|
||||
"INSERT INTO %s (role, member) VALUES (?, ?)",
|
||||
meta::role_members_table::qualified_name()),
|
||||
consistency_for_role(role_name),
|
||||
infinite_timeout_config,
|
||||
internal_distributed_timeout_config(),
|
||||
{sstring(role_name), sstring(grantee_name)}).discard_result();
|
||||
|
||||
case membership_change::remove:
|
||||
@@ -415,7 +415,7 @@ standard_role_manager::modify_membership(
|
||||
"DELETE FROM %s WHERE role = ? AND member = ?",
|
||||
meta::role_members_table::qualified_name()),
|
||||
consistency_for_role(role_name),
|
||||
infinite_timeout_config,
|
||||
internal_distributed_timeout_config(),
|
||||
{sstring(role_name), sstring(grantee_name)}).discard_result();
|
||||
}
|
||||
|
||||
@@ -516,7 +516,10 @@ future<role_set> standard_role_manager::query_all() const {
|
||||
// To avoid many copies of a view.
|
||||
static const auto role_col_name_string = sstring(meta::roles_table::role_col_name);
|
||||
|
||||
return _qp.process(query, db::consistency_level::QUORUM, infinite_timeout_config).then([](::shared_ptr<cql3::untyped_result_set> results) {
|
||||
return _qp.process(
|
||||
query,
|
||||
db::consistency_level::QUORUM,
|
||||
internal_distributed_timeout_config()).then([](::shared_ptr<cql3::untyped_result_set> results) {
|
||||
role_set roles;
|
||||
|
||||
std::transform(
|
||||
|
||||
@@ -60,11 +60,12 @@ class cache_flat_mutation_reader final : public flat_mutation_reader::impl {
|
||||
// - _next_row_in_range = _next.position() < _upper_bound
|
||||
// - _last_row points at a direct predecessor of the next row which is going to be read.
|
||||
// Used for populating continuity.
|
||||
// - _population_range_starts_before_all_rows is set accordingly
|
||||
reading_from_underlying,
|
||||
|
||||
end_of_stream
|
||||
};
|
||||
partition_snapshot_ptr _snp;
|
||||
lw_shared_ptr<partition_snapshot> _snp;
|
||||
position_in_partition::tri_compare _position_cmp;
|
||||
|
||||
query::clustering_key_filter_ranges _ck_ranges;
|
||||
@@ -86,6 +87,13 @@ class cache_flat_mutation_reader final : public flat_mutation_reader::impl {
|
||||
partition_snapshot_row_cursor _next_row;
|
||||
bool _next_row_in_range = false;
|
||||
|
||||
// True iff current population interval, since the previous clustering row, starts before all clustered rows.
|
||||
// We cannot just look at _lower_bound, because emission of range tombstones changes _lower_bound and
|
||||
// because we mark clustering intervals as continuous when consuming a clustering_row, it would prevent
|
||||
// us from marking the interval as continuous.
|
||||
// Valid when _state == reading_from_underlying.
|
||||
bool _population_range_starts_before_all_rows;
|
||||
|
||||
// Whether _lower_bound was changed within current fill_buffer().
|
||||
// If it did not then we cannot break out of it (e.g. on preemption) because
|
||||
// forward progress is not guaranteed in case iterators are getting constantly invalidated.
|
||||
@@ -129,7 +137,7 @@ public:
|
||||
dht::decorated_key dk,
|
||||
query::clustering_key_filter_ranges&& crr,
|
||||
lw_shared_ptr<read_context> ctx,
|
||||
partition_snapshot_ptr snp,
|
||||
lw_shared_ptr<partition_snapshot> snp,
|
||||
row_cache& cache)
|
||||
: flat_mutation_reader::impl(std::move(s))
|
||||
, _snp(std::move(snp))
|
||||
@@ -149,6 +157,9 @@ public:
|
||||
cache_flat_mutation_reader(const cache_flat_mutation_reader&) = delete;
|
||||
cache_flat_mutation_reader(cache_flat_mutation_reader&&) = delete;
|
||||
virtual future<> fill_buffer(db::timeout_clock::time_point timeout) override;
|
||||
virtual ~cache_flat_mutation_reader() {
|
||||
maybe_merge_versions(_snp, _lsa_manager.region(), _lsa_manager.read_section());
|
||||
}
|
||||
virtual void next_partition() override {
|
||||
clear_buffer_to_next_partition();
|
||||
if (is_buffer_empty()) {
|
||||
@@ -228,6 +239,7 @@ inline
|
||||
future<> cache_flat_mutation_reader::do_fill_buffer(db::timeout_clock::time_point timeout) {
|
||||
if (_state == state::move_to_underlying) {
|
||||
_state = state::reading_from_underlying;
|
||||
_population_range_starts_before_all_rows = _lower_bound.is_before_all_clustered_rows(*_schema);
|
||||
auto end = _next_row_in_range ? position_in_partition(_next_row.position())
|
||||
: position_in_partition(_upper_bound);
|
||||
return _read_context->fast_forward_to(position_range{_lower_bound, std::move(end)}, timeout).then([this, timeout] {
|
||||
@@ -352,12 +364,12 @@ future<> cache_flat_mutation_reader::read_from_underlying(db::timeout_clock::tim
|
||||
}
|
||||
});
|
||||
return make_ready_future<>();
|
||||
});
|
||||
}, timeout);
|
||||
}
|
||||
|
||||
inline
|
||||
bool cache_flat_mutation_reader::ensure_population_lower_bound() {
|
||||
if (!_ck_ranges_curr->start()) {
|
||||
if (_population_range_starts_before_all_rows) {
|
||||
return true;
|
||||
}
|
||||
if (!_last_row.refresh(*_snp)) {
|
||||
@@ -412,6 +424,7 @@ inline
|
||||
void cache_flat_mutation_reader::maybe_add_to_cache(const clustering_row& cr) {
|
||||
if (!can_populate()) {
|
||||
_last_row = nullptr;
|
||||
_population_range_starts_before_all_rows = false;
|
||||
_read_context->cache().on_mispopulate();
|
||||
return;
|
||||
}
|
||||
@@ -445,6 +458,7 @@ void cache_flat_mutation_reader::maybe_add_to_cache(const clustering_row& cr) {
|
||||
with_allocator(standard_allocator(), [&] {
|
||||
_last_row = partition_snapshot_row_weakref(*_snp, it, true);
|
||||
});
|
||||
_population_range_starts_before_all_rows = false;
|
||||
});
|
||||
}
|
||||
|
||||
@@ -664,7 +678,7 @@ inline flat_mutation_reader make_cache_flat_mutation_reader(schema_ptr s,
|
||||
query::clustering_key_filter_ranges crr,
|
||||
row_cache& cache,
|
||||
lw_shared_ptr<cache::read_context> ctx,
|
||||
partition_snapshot_ptr snp)
|
||||
lw_shared_ptr<partition_snapshot> snp)
|
||||
{
|
||||
return make_flat_mutation_reader<cache::cache_flat_mutation_reader>(
|
||||
std::move(s), std::move(dk), std::move(crr), std::move(ctx), std::move(snp), cache);
|
||||
|
||||
@@ -22,7 +22,6 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <functional>
|
||||
#include "keys.hh"
|
||||
#include "schema.hh"
|
||||
#include "range.hh"
|
||||
@@ -44,20 +43,22 @@ bound_kind invert_kind(bound_kind k);
|
||||
int32_t weight(bound_kind k);
|
||||
|
||||
class bound_view {
|
||||
const static thread_local clustering_key _empty_prefix;
|
||||
std::reference_wrapper<const clustering_key_prefix> _prefix;
|
||||
bound_kind _kind;
|
||||
public:
|
||||
const static thread_local clustering_key empty_prefix;
|
||||
const clustering_key_prefix& prefix;
|
||||
bound_kind kind;
|
||||
bound_view(const clustering_key_prefix& prefix, bound_kind kind)
|
||||
: _prefix(prefix)
|
||||
, _kind(kind)
|
||||
: prefix(prefix)
|
||||
, kind(kind)
|
||||
{ }
|
||||
bound_view(const bound_view& other) noexcept = default;
|
||||
bound_view& operator=(const bound_view& other) noexcept = default;
|
||||
|
||||
bound_kind kind() const { return _kind; }
|
||||
const clustering_key_prefix& prefix() const { return _prefix; }
|
||||
|
||||
bound_view& operator=(const bound_view& other) noexcept {
|
||||
if (this != &other) {
|
||||
this->~bound_view();
|
||||
new (this) bound_view(other);
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
struct tri_compare {
|
||||
// To make it assignable and to avoid taking a schema_ptr, we
|
||||
// wrap the schema reference.
|
||||
@@ -81,13 +82,13 @@ public:
|
||||
return d1 < d2 ? w1 - (w1 <= 0) : -(w2 - (w2 <= 0));
|
||||
}
|
||||
int operator()(const bound_view b, const clustering_key_prefix& p) const {
|
||||
return operator()(b._prefix, weight(b._kind), p, 0);
|
||||
return operator()(b.prefix, weight(b.kind), p, 0);
|
||||
}
|
||||
int operator()(const clustering_key_prefix& p, const bound_view b) const {
|
||||
return operator()(p, 0, b._prefix, weight(b._kind));
|
||||
return operator()(p, 0, b.prefix, weight(b.kind));
|
||||
}
|
||||
int operator()(const bound_view b1, const bound_view b2) const {
|
||||
return operator()(b1._prefix, weight(b1._kind), b2._prefix, weight(b2._kind));
|
||||
return operator()(b1.prefix, weight(b1.kind), b2.prefix, weight(b2.kind));
|
||||
}
|
||||
};
|
||||
struct compare {
|
||||
@@ -100,26 +101,26 @@ public:
|
||||
return _cmp(p1, w1, p2, w2) < 0;
|
||||
}
|
||||
bool operator()(const bound_view b, const clustering_key_prefix& p) const {
|
||||
return operator()(b._prefix, weight(b._kind), p, 0);
|
||||
return operator()(b.prefix, weight(b.kind), p, 0);
|
||||
}
|
||||
bool operator()(const clustering_key_prefix& p, const bound_view b) const {
|
||||
return operator()(p, 0, b._prefix, weight(b._kind));
|
||||
return operator()(p, 0, b.prefix, weight(b.kind));
|
||||
}
|
||||
bool operator()(const bound_view b1, const bound_view b2) const {
|
||||
return operator()(b1._prefix, weight(b1._kind), b2._prefix, weight(b2._kind));
|
||||
return operator()(b1.prefix, weight(b1.kind), b2.prefix, weight(b2.kind));
|
||||
}
|
||||
};
|
||||
bool equal(const schema& s, const bound_view other) const {
|
||||
return _kind == other._kind && _prefix.get().equal(s, other._prefix.get());
|
||||
return kind == other.kind && prefix.equal(s, other.prefix);
|
||||
}
|
||||
bool adjacent(const schema& s, const bound_view other) const {
|
||||
return invert_kind(other._kind) == _kind && _prefix.get().equal(s, other._prefix.get());
|
||||
return invert_kind(other.kind) == kind && prefix.equal(s, other.prefix);
|
||||
}
|
||||
static bound_view bottom() {
|
||||
return {_empty_prefix, bound_kind::incl_start};
|
||||
return {empty_prefix, bound_kind::incl_start};
|
||||
}
|
||||
static bound_view top() {
|
||||
return {_empty_prefix, bound_kind::incl_end};
|
||||
return {empty_prefix, bound_kind::incl_end};
|
||||
}
|
||||
template<template<typename> typename R>
|
||||
GCC6_CONCEPT( requires Range<R, clustering_key_prefix_view> )
|
||||
@@ -143,13 +144,13 @@ public:
|
||||
template<template<typename> typename R>
|
||||
GCC6_CONCEPT( requires Range<R, clustering_key_prefix_view> )
|
||||
static stdx::optional<typename R<clustering_key_prefix_view>::bound> to_range_bound(const bound_view& bv) {
|
||||
if (&bv._prefix.get() == &_empty_prefix) {
|
||||
if (&bv.prefix == &empty_prefix) {
|
||||
return {};
|
||||
}
|
||||
bool inclusive = bv._kind != bound_kind::excl_end && bv._kind != bound_kind::excl_start;
|
||||
return {typename R<clustering_key_prefix_view>::bound(bv._prefix.get().view(), inclusive)};
|
||||
bool inclusive = bv.kind != bound_kind::excl_end && bv.kind != bound_kind::excl_start;
|
||||
return {typename R<clustering_key_prefix_view>::bound(bv.prefix.view(), inclusive)};
|
||||
}
|
||||
friend std::ostream& operator<<(std::ostream& out, const bound_view& b) {
|
||||
return out << "{bound: prefix=" << b._prefix.get() << ", kind=" << b._kind << "}";
|
||||
return out << "{bound: prefix=" << b.prefix << ", kind=" << b.kind << "}";
|
||||
}
|
||||
};
|
||||
|
||||
67
compatible_ring_position.hh
Normal file
67
compatible_ring_position.hh
Normal file
@@ -0,0 +1,67 @@
|
||||
/*
|
||||
* Copyright (C) 2016 ScyllaDB
|
||||
*/
|
||||
|
||||
/*
|
||||
* This file is part of Scylla.
|
||||
*
|
||||
* Scylla is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Scylla is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with Scylla. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "query-request.hh"
|
||||
#include <experimental/optional>
|
||||
|
||||
// Wraps ring_position so it is compatible with old-style C++: default constructor,
|
||||
// stateless comparators, yada yada
|
||||
class compatible_ring_position {
|
||||
const schema* _schema = nullptr;
|
||||
// optional to supply a default constructor, no more
|
||||
std::experimental::optional<dht::ring_position> _rp;
|
||||
public:
|
||||
compatible_ring_position() noexcept = default;
|
||||
compatible_ring_position(const schema& s, const dht::ring_position& rp)
|
||||
: _schema(&s), _rp(rp) {
|
||||
}
|
||||
compatible_ring_position(const schema& s, dht::ring_position&& rp)
|
||||
: _schema(&s), _rp(std::move(rp)) {
|
||||
}
|
||||
const dht::token& token() const {
|
||||
return _rp->token();
|
||||
}
|
||||
friend int tri_compare(const compatible_ring_position& x, const compatible_ring_position& y) {
|
||||
return x._rp->tri_compare(*x._schema, *y._rp);
|
||||
}
|
||||
friend bool operator<(const compatible_ring_position& x, const compatible_ring_position& y) {
|
||||
return tri_compare(x, y) < 0;
|
||||
}
|
||||
friend bool operator<=(const compatible_ring_position& x, const compatible_ring_position& y) {
|
||||
return tri_compare(x, y) <= 0;
|
||||
}
|
||||
friend bool operator>(const compatible_ring_position& x, const compatible_ring_position& y) {
|
||||
return tri_compare(x, y) > 0;
|
||||
}
|
||||
friend bool operator>=(const compatible_ring_position& x, const compatible_ring_position& y) {
|
||||
return tri_compare(x, y) >= 0;
|
||||
}
|
||||
friend bool operator==(const compatible_ring_position& x, const compatible_ring_position& y) {
|
||||
return tri_compare(x, y) == 0;
|
||||
}
|
||||
friend bool operator!=(const compatible_ring_position& x, const compatible_ring_position& y) {
|
||||
return tri_compare(x, y) != 0;
|
||||
}
|
||||
};
|
||||
|
||||
@@ -1,64 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2016 ScyllaDB
|
||||
*/
|
||||
|
||||
/*
|
||||
* This file is part of Scylla.
|
||||
*
|
||||
* Scylla is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Scylla is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with Scylla. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "query-request.hh"
|
||||
#include <optional>
|
||||
|
||||
// Wraps ring_position_view so it is compatible with old-style C++: default
|
||||
// constructor, stateless comparators, yada yada.
|
||||
class compatible_ring_position_view {
|
||||
const schema* _schema = nullptr;
|
||||
// Optional to supply a default constructor, no more.
|
||||
std::optional<dht::ring_position_view> _rpv;
|
||||
public:
|
||||
constexpr compatible_ring_position_view() = default;
|
||||
compatible_ring_position_view(const schema& s, dht::ring_position_view rpv)
|
||||
: _schema(&s), _rpv(rpv) {
|
||||
}
|
||||
const dht::ring_position_view& position() const {
|
||||
return *_rpv;
|
||||
}
|
||||
friend int tri_compare(const compatible_ring_position_view& x, const compatible_ring_position_view& y) {
|
||||
return dht::ring_position_tri_compare(*x._schema, *x._rpv, *y._rpv);
|
||||
}
|
||||
friend bool operator<(const compatible_ring_position_view& x, const compatible_ring_position_view& y) {
|
||||
return tri_compare(x, y) < 0;
|
||||
}
|
||||
friend bool operator<=(const compatible_ring_position_view& x, const compatible_ring_position_view& y) {
|
||||
return tri_compare(x, y) <= 0;
|
||||
}
|
||||
friend bool operator>(const compatible_ring_position_view& x, const compatible_ring_position_view& y) {
|
||||
return tri_compare(x, y) > 0;
|
||||
}
|
||||
friend bool operator>=(const compatible_ring_position_view& x, const compatible_ring_position_view& y) {
|
||||
return tri_compare(x, y) >= 0;
|
||||
}
|
||||
friend bool operator==(const compatible_ring_position_view& x, const compatible_ring_position_view& y) {
|
||||
return tri_compare(x, y) == 0;
|
||||
}
|
||||
friend bool operator!=(const compatible_ring_position_view& x, const compatible_ring_position_view& y) {
|
||||
return tri_compare(x, y) != 0;
|
||||
}
|
||||
};
|
||||
|
||||
@@ -303,7 +303,7 @@ scylla_tests = [
|
||||
'tests/imr_test',
|
||||
'tests/partition_data_test',
|
||||
'tests/reusable_buffer_test',
|
||||
'tests/multishard_writer_test',
|
||||
'tests/json_test'
|
||||
]
|
||||
|
||||
perf_tests = [
|
||||
@@ -407,6 +407,7 @@ scylla_core = (['database.cc',
|
||||
'mutation_reader.cc',
|
||||
'flat_mutation_reader.cc',
|
||||
'mutation_query.cc',
|
||||
'json.cc',
|
||||
'keys.cc',
|
||||
'counters.cc',
|
||||
'compress.cc',
|
||||
@@ -515,6 +516,7 @@ scylla_core = (['database.cc',
|
||||
'db/consistency_level.cc',
|
||||
'db/system_keyspace.cc',
|
||||
'db/system_distributed_keyspace.cc',
|
||||
'db/size_estimates_virtual_reader.cc',
|
||||
'db/schema_tables.cc',
|
||||
'db/cql_type_parser.cc',
|
||||
'db/legacy_schema_migrator.cc',
|
||||
@@ -630,7 +632,6 @@ scylla_core = (['database.cc',
|
||||
'utils/arch/powerpc/crc32-vpmsum/crc32_wrapper.cc',
|
||||
'querier.cc',
|
||||
'data/cell.cc',
|
||||
'multishard_writer.cc',
|
||||
]
|
||||
+ [Antlr3Grammar('cql3/Cql.g')]
|
||||
+ [Thrift('interface/cassandra.thrift', 'Cassandra')]
|
||||
@@ -742,6 +743,7 @@ pure_boost_tests = set([
|
||||
'tests/imr_test',
|
||||
'tests/partition_data_test',
|
||||
'tests/reusable_buffer_test',
|
||||
'tests/json_test',
|
||||
])
|
||||
|
||||
tests_not_using_seastar_test_framework = set([
|
||||
@@ -793,7 +795,7 @@ deps['tests/log_heap_test'] = ['tests/log_heap_test.cc']
|
||||
deps['tests/anchorless_list_test'] = ['tests/anchorless_list_test.cc']
|
||||
deps['tests/perf/perf_fast_forward'] += ['release.cc']
|
||||
deps['tests/meta_test'] = ['tests/meta_test.cc']
|
||||
deps['tests/imr_test'] = ['tests/imr_test.cc']
|
||||
deps['tests/imr_test'] = ['tests/imr_test.cc', 'utils/logalloc.cc', 'utils/dynamic_bitset.cc']
|
||||
deps['tests/reusable_buffer_test'] = ['tests/reusable_buffer_test.cc']
|
||||
|
||||
warnings = [
|
||||
|
||||
@@ -92,10 +92,6 @@ public:
|
||||
_p.apply(t);
|
||||
}
|
||||
|
||||
void accept_static_cell(column_id id, atomic_cell cell) {
|
||||
return accept_static_cell(id, atomic_cell_view(cell));
|
||||
}
|
||||
|
||||
virtual void accept_static_cell(column_id id, atomic_cell_view cell) override {
|
||||
const column_mapping_entry& col = _visited_column_mapping.static_column_at(id);
|
||||
const column_definition* def = _p_schema.get_column_definition(col.name());
|
||||
@@ -123,10 +119,6 @@ public:
|
||||
_current_row = &r;
|
||||
}
|
||||
|
||||
void accept_row_cell(column_id id, atomic_cell cell) {
|
||||
return accept_row_cell(id, atomic_cell_view(cell));
|
||||
}
|
||||
|
||||
virtual void accept_row_cell(column_id id, atomic_cell_view cell) override {
|
||||
const column_mapping_entry& col = _visited_column_mapping.regular_column_at(id);
|
||||
const column_definition* def = _p_schema.get_column_definition(col.name());
|
||||
|
||||
@@ -473,9 +473,9 @@ insertStatement returns [::shared_ptr<raw::modification_statement> expr]
|
||||
::shared_ptr<cql3::term::raw> json_value;
|
||||
}
|
||||
: K_INSERT K_INTO cf=columnFamilyName
|
||||
'(' c1=cident { column_names.push_back(c1); } ( ',' cn=cident { column_names.push_back(cn); } )* ')'
|
||||
( K_VALUES
|
||||
'(' v1=term { values.push_back(v1); } ( ',' vn=term { values.push_back(vn); } )* ')'
|
||||
('(' c1=cident { column_names.push_back(c1); } ( ',' cn=cident { column_names.push_back(cn); } )* ')'
|
||||
K_VALUES
|
||||
'(' v1=term { values.push_back(v1); } ( ',' vn=term { values.push_back(vn); } )* ')'
|
||||
( K_IF K_NOT K_EXISTS { if_not_exists = true; } )?
|
||||
( usingClause[attrs] )?
|
||||
{
|
||||
|
||||
@@ -67,6 +67,12 @@ class error_collector : public error_listener<RecognizerType, ExceptionBaseType>
|
||||
*/
|
||||
const sstring_view _query;
|
||||
|
||||
/**
|
||||
* An empty bitset to be used as a workaround for AntLR null dereference
|
||||
* bug.
|
||||
*/
|
||||
static typename ExceptionBaseType::BitsetListType _empty_bit_list;
|
||||
|
||||
public:
|
||||
|
||||
/**
|
||||
@@ -144,6 +150,14 @@ private:
|
||||
break;
|
||||
}
|
||||
default:
|
||||
// AntLR Exception class has a bug of dereferencing a null
|
||||
// pointer in the displayRecognitionError. The following
|
||||
// if statement makes sure it will not be null before the
|
||||
// call to that function (displayRecognitionError).
|
||||
// bug reference: https://github.com/antlr/antlr3/issues/191
|
||||
if (!ex->get_expectingSet()) {
|
||||
ex->set_expectingSet(&_empty_bit_list);
|
||||
}
|
||||
ex->displayRecognitionError(token_names, msg);
|
||||
}
|
||||
return msg.str();
|
||||
@@ -345,4 +359,8 @@ private:
|
||||
#endif
|
||||
};
|
||||
|
||||
template<typename RecognizerType, typename TokenType, typename ExceptionBaseType>
|
||||
typename ExceptionBaseType::BitsetListType
|
||||
error_collector<RecognizerType,TokenType,ExceptionBaseType>::_empty_bit_list = typename ExceptionBaseType::BitsetListType();
|
||||
|
||||
}
|
||||
|
||||
@@ -177,7 +177,7 @@ shared_ptr<function>
|
||||
make_to_json_function(data_type t) {
|
||||
return make_native_scalar_function<true>("tojson", utf8_type, {t},
|
||||
[t](cql_serialization_format sf, const std::vector<bytes_opt>& parameters) -> bytes_opt {
|
||||
return utf8_type->decompose(t->to_json_string(parameters[0].value()));
|
||||
return utf8_type->decompose(t->to_json_string(parameters[0]));
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
@@ -217,19 +217,18 @@ void query_options::prepare(const std::vector<::shared_ptr<column_specification>
|
||||
}
|
||||
|
||||
auto& names = *_names;
|
||||
std::vector<cql3::raw_value> ordered_values;
|
||||
std::vector<cql3::raw_value_view> ordered_values;
|
||||
ordered_values.reserve(specs.size());
|
||||
for (auto&& spec : specs) {
|
||||
auto& spec_name = spec->name->text();
|
||||
for (size_t j = 0; j < names.size(); j++) {
|
||||
if (names[j] == spec_name) {
|
||||
ordered_values.emplace_back(_values[j]);
|
||||
ordered_values.emplace_back(_value_views[j]);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
_values = std::move(ordered_values);
|
||||
fill_value_views();
|
||||
_value_views = std::move(ordered_values);
|
||||
}
|
||||
|
||||
void query_options::fill_value_views()
|
||||
|
||||
@@ -206,30 +206,6 @@ query_processor::query_processor(service::storage_proxy& proxy, distributed<data
|
||||
_cql_stats.secondary_index_rows_read,
|
||||
sm::description("Counts a total number of rows read during CQL requests performed using secondary indexes.")),
|
||||
|
||||
// read requests that required ALLOW FILTERING
|
||||
sm::make_derive(
|
||||
"filtered_read_requests",
|
||||
_cql_stats.filtered_reads,
|
||||
sm::description("Counts a total number of CQL read requests that required ALLOW FILTERING. See filtered_rows_read_total to compare how many rows needed to be filtered.")),
|
||||
|
||||
// rows read with filtering enabled (because ALLOW FILTERING was required)
|
||||
sm::make_derive(
|
||||
"filtered_rows_read_total",
|
||||
_cql_stats.filtered_rows_read_total,
|
||||
sm::description("Counts a total number of rows read during CQL requests that required ALLOW FILTERING. See filtered_rows_matched_total and filtered_rows_dropped_total for information how accurate filtering queries are.")),
|
||||
|
||||
// rows read with filtering enabled and accepted by the filter
|
||||
sm::make_derive(
|
||||
"filtered_rows_matched_total",
|
||||
_cql_stats.filtered_rows_matched_total,
|
||||
sm::description("Counts a number of rows read during CQL requests that required ALLOW FILTERING and accepted by the filter. Number similar to filtered_rows_read_total indicates that filtering is accurate.")),
|
||||
|
||||
// rows read with filtering enabled and rejected by the filter
|
||||
sm::make_derive(
|
||||
"filtered_rows_dropped_total",
|
||||
[this]() {return _cql_stats.filtered_rows_read_total - _cql_stats.filtered_rows_matched_total;},
|
||||
sm::description("Counts a number of rows read during CQL requests that required ALLOW FILTERING and dropped by the filter. Number similar to filtered_rows_read_total indicates that filtering is not accurate and might cause performance degradation.")),
|
||||
|
||||
sm::make_derive(
|
||||
"authorized_prepared_statements_cache_evictions",
|
||||
[] { return authorized_prepared_statements_cache::shard_stats().authorized_prepared_statements_cache_evictions; },
|
||||
@@ -263,11 +239,11 @@ query_processor::process(const sstring_view& query_string, service::query_state&
|
||||
log.trace("process: \"{}\"", query_string);
|
||||
tracing::trace(query_state.get_trace_state(), "Parsing a statement");
|
||||
auto p = get_statement(query_string, query_state.get_client_state());
|
||||
options.prepare(p->bound_names);
|
||||
auto cql_statement = p->statement;
|
||||
if (cql_statement->get_bound_terms() != options.get_values_count()) {
|
||||
throw exceptions::invalid_request_exception("Invalid amount of bind variables");
|
||||
}
|
||||
options.prepare(p->bound_names);
|
||||
|
||||
warn(unimplemented::cause::METRICS);
|
||||
#if 0
|
||||
|
||||
@@ -95,32 +95,7 @@ public:
|
||||
uint32_t size() const override {
|
||||
return uint32_t(get_column_defs().size());
|
||||
}
|
||||
|
||||
bool has_unrestricted_components(const schema& schema) const;
|
||||
|
||||
virtual bool needs_filtering(const schema& schema) const;
|
||||
};
|
||||
|
||||
template<>
|
||||
inline bool primary_key_restrictions<partition_key>::has_unrestricted_components(const schema& schema) const {
|
||||
return size() < schema.partition_key_size();
|
||||
}
|
||||
|
||||
template<>
|
||||
inline bool primary_key_restrictions<clustering_key>::has_unrestricted_components(const schema& schema) const {
|
||||
return size() < schema.clustering_key_size();
|
||||
}
|
||||
|
||||
template<>
|
||||
inline bool primary_key_restrictions<partition_key>::needs_filtering(const schema& schema) const {
|
||||
return !empty() && !is_on_token() && (has_unrestricted_components(schema) || is_contains() || is_slice());
|
||||
}
|
||||
|
||||
template<>
|
||||
inline bool primary_key_restrictions<clustering_key>::needs_filtering(const schema& schema) const {
|
||||
// Currently only overloaded single_column_primary_key_restrictions will require ALLOW FILTERING
|
||||
return false;
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
@@ -314,10 +314,6 @@ public:
|
||||
fail(unimplemented::cause::LEGACY_COMPOSITE_KEYS); // not 100% correct...
|
||||
}
|
||||
|
||||
const single_column_restrictions::restrictions_map& restrictions() const {
|
||||
return _restrictions->restrictions();
|
||||
}
|
||||
|
||||
virtual bool has_supporting_index(const secondary_index::secondary_index_manager& index_manager) const override {
|
||||
return _restrictions->has_supporting_index(index_manager);
|
||||
}
|
||||
@@ -353,8 +349,6 @@ public:
|
||||
_restrictions->restrictions() | boost::adaptors::map_values,
|
||||
[&] (auto&& r) { return r->is_satisfied_by(schema, key, ckey, cells, options, now); });
|
||||
}
|
||||
|
||||
virtual bool needs_filtering(const schema& schema) const override;
|
||||
};
|
||||
|
||||
template<>
|
||||
@@ -412,29 +406,6 @@ single_column_primary_key_restrictions<clustering_key_prefix>::bounds_ranges(con
|
||||
return bounds;
|
||||
}
|
||||
|
||||
template<>
|
||||
bool single_column_primary_key_restrictions<partition_key>::needs_filtering(const schema& schema) const {
|
||||
return primary_key_restrictions<partition_key>::needs_filtering(schema);
|
||||
}
|
||||
|
||||
template<>
|
||||
bool single_column_primary_key_restrictions<clustering_key>::needs_filtering(const schema& schema) const {
|
||||
// Restrictions currently need filtering in three cases:
|
||||
// 1. any of them is a CONTAINS restriction
|
||||
// 2. restrictions do not form a contiguous prefix (i.e. there are gaps in it)
|
||||
// 3. a SLICE restriction isn't on a last place
|
||||
column_id position = 0;
|
||||
for (const auto& restriction : _restrictions->restrictions() | boost::adaptors::map_values) {
|
||||
if (restriction->is_contains() || position != restriction->get_column_def().id) {
|
||||
return true;
|
||||
}
|
||||
if (!restriction->is_slice()) {
|
||||
position = restriction->get_column_def().id + 1;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -93,8 +93,6 @@ public:
|
||||
}
|
||||
|
||||
virtual bool is_supported_by(const secondary_index::index& index) const = 0;
|
||||
using abstract_restriction::is_satisfied_by;
|
||||
virtual bool is_satisfied_by(bytes_view data, const query_options& options) const = 0;
|
||||
#if 0
|
||||
/**
|
||||
* Check if this type of restriction is supported by the specified index.
|
||||
@@ -168,7 +166,6 @@ public:
|
||||
const row& cells,
|
||||
const query_options& options,
|
||||
gc_clock::time_point now) const override;
|
||||
virtual bool is_satisfied_by(bytes_view data, const query_options& options) const override;
|
||||
|
||||
#if 0
|
||||
@Override
|
||||
@@ -204,8 +201,15 @@ public:
|
||||
const row& cells,
|
||||
const query_options& options,
|
||||
gc_clock::time_point now) const override;
|
||||
virtual bool is_satisfied_by(bytes_view data, const query_options& options) const override;
|
||||
|
||||
virtual std::vector<bytes_opt> values_raw(const query_options& options) const = 0;
|
||||
|
||||
virtual std::vector<bytes_opt> values(const query_options& options) const override {
|
||||
std::vector<bytes_opt> ret = values_raw(options);
|
||||
std::sort(ret.begin(),ret.end());
|
||||
ret.erase(std::unique(ret.begin(),ret.end()),ret.end());
|
||||
return ret;
|
||||
}
|
||||
#if 0
|
||||
@Override
|
||||
protected final boolean isSupportedBy(SecondaryIndex index)
|
||||
@@ -228,7 +232,7 @@ public:
|
||||
return abstract_restriction::term_uses_function(_values, ks_name, function_name);
|
||||
}
|
||||
|
||||
virtual std::vector<bytes_opt> values(const query_options& options) const override {
|
||||
virtual std::vector<bytes_opt> values_raw(const query_options& options) const override {
|
||||
std::vector<bytes_opt> ret;
|
||||
for (auto&& v : _values) {
|
||||
ret.emplace_back(to_bytes_opt(v->bind_and_get(options)));
|
||||
@@ -253,7 +257,7 @@ public:
|
||||
return false;
|
||||
}
|
||||
|
||||
virtual std::vector<bytes_opt> values(const query_options& options) const override {
|
||||
virtual std::vector<bytes_opt> values_raw(const query_options& options) const override {
|
||||
auto&& lval = dynamic_pointer_cast<multi_item_terminal>(_marker->bind(options));
|
||||
if (!lval) {
|
||||
throw exceptions::invalid_request_exception("Invalid null value for IN restriction");
|
||||
@@ -360,7 +364,6 @@ public:
|
||||
const row& cells,
|
||||
const query_options& options,
|
||||
gc_clock::time_point now) const override;
|
||||
virtual bool is_satisfied_by(bytes_view data, const query_options& options) const override;
|
||||
};
|
||||
|
||||
// This holds CONTAINS, CONTAINS_KEY, and map[key] = value restrictions because we might want to have any combination of them.
|
||||
@@ -482,7 +485,6 @@ public:
|
||||
const row& cells,
|
||||
const query_options& options,
|
||||
gc_clock::time_point now) const override;
|
||||
virtual bool is_satisfied_by(bytes_view data, const query_options& options) const override;
|
||||
|
||||
#if 0
|
||||
private List<ByteBuffer> keys(const query_options& options) {
|
||||
|
||||
@@ -23,7 +23,6 @@
|
||||
#include <boost/range/algorithm/transform.hpp>
|
||||
#include <boost/range/algorithm.hpp>
|
||||
#include <boost/range/adaptors.hpp>
|
||||
#include <boost/algorithm/cxx11/any_of.hpp>
|
||||
|
||||
#include "statement_restrictions.hh"
|
||||
#include "single_column_primary_key_restrictions.hh"
|
||||
@@ -37,8 +36,6 @@
|
||||
namespace cql3 {
|
||||
namespace restrictions {
|
||||
|
||||
static logging::logger rlogger("restrictions");
|
||||
|
||||
using boost::adaptors::filtered;
|
||||
using boost::adaptors::transformed;
|
||||
|
||||
@@ -205,7 +202,7 @@ statement_restrictions::statement_restrictions(database& db,
|
||||
throw exceptions::invalid_request_exception(sprint("restriction '%s' is only supported in materialized view creation", relation->to_string()));
|
||||
}
|
||||
} else {
|
||||
add_restriction(relation->to_restriction(db, schema, bound_names), for_view, allow_filtering);
|
||||
add_restriction(relation->to_restriction(db, schema, bound_names));
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -217,11 +214,11 @@ statement_restrictions::statement_restrictions(database& db,
|
||||
|| _nonprimary_key_restrictions->has_supporting_index(sim);
|
||||
|
||||
// At this point, the select statement if fully constructed, but we still have a few things to validate
|
||||
process_partition_key_restrictions(has_queriable_index, for_view, allow_filtering);
|
||||
process_partition_key_restrictions(has_queriable_index, for_view);
|
||||
|
||||
// Some but not all of the partition key columns have been specified;
|
||||
// hence we need turn these restrictions into index expressions.
|
||||
if (_uses_secondary_indexing || _partition_key_restrictions->needs_filtering(*_schema)) {
|
||||
if (_uses_secondary_indexing) {
|
||||
_index_restrictions.push_back(_partition_key_restrictions);
|
||||
}
|
||||
|
||||
@@ -237,14 +234,13 @@ statement_restrictions::statement_restrictions(database& db,
|
||||
}
|
||||
}
|
||||
|
||||
process_clustering_columns_restrictions(has_queriable_index, select_a_collection, for_view, allow_filtering);
|
||||
process_clustering_columns_restrictions(has_queriable_index, select_a_collection, for_view);
|
||||
|
||||
// Covers indexes on the first clustering column (among others).
|
||||
if (_is_key_range && has_queriable_clustering_column_index) {
|
||||
_uses_secondary_indexing = true;
|
||||
}
|
||||
if (_is_key_range && has_queriable_clustering_column_index)
|
||||
_uses_secondary_indexing = true;
|
||||
|
||||
if (_uses_secondary_indexing || _clustering_columns_restrictions->needs_filtering(*_schema)) {
|
||||
if (_uses_secondary_indexing) {
|
||||
_index_restrictions.push_back(_clustering_columns_restrictions);
|
||||
} else if (_clustering_columns_restrictions->is_contains()) {
|
||||
fail(unimplemented::cause::INDEXES);
|
||||
@@ -273,48 +269,31 @@ statement_restrictions::statement_restrictions(database& db,
|
||||
uses_secondary_indexing = true;
|
||||
#endif
|
||||
}
|
||||
|
||||
// Even if uses_secondary_indexing is false at this point, we'll still have to use one if
|
||||
// there is restrictions not covered by the PK.
|
||||
if (!_nonprimary_key_restrictions->empty()) {
|
||||
if (has_queriable_index) {
|
||||
_uses_secondary_indexing = true;
|
||||
} else if (!allow_filtering) {
|
||||
throw exceptions::invalid_request_exception("Cannot execute this query as it might involve data filtering and "
|
||||
"thus may have unpredictable performance. If you want to execute "
|
||||
"this query despite the performance unpredictability, use ALLOW FILTERING");
|
||||
}
|
||||
_uses_secondary_indexing = true;
|
||||
_index_restrictions.push_back(_nonprimary_key_restrictions);
|
||||
}
|
||||
|
||||
if (_uses_secondary_indexing && !(for_view || allow_filtering)) {
|
||||
if (_uses_secondary_indexing && !for_view) {
|
||||
validate_secondary_index_selections(selects_only_static_columns);
|
||||
}
|
||||
}
|
||||
|
||||
void statement_restrictions::add_restriction(::shared_ptr<restriction> restriction, bool for_view, bool allow_filtering) {
|
||||
void statement_restrictions::add_restriction(::shared_ptr<restriction> restriction) {
|
||||
if (restriction->is_multi_column()) {
|
||||
_clustering_columns_restrictions = _clustering_columns_restrictions->merge_to(_schema, restriction);
|
||||
} else if (restriction->is_on_token()) {
|
||||
_partition_key_restrictions = _partition_key_restrictions->merge_to(_schema, restriction);
|
||||
} else {
|
||||
add_single_column_restriction(::static_pointer_cast<single_column_restriction>(restriction), for_view, allow_filtering);
|
||||
add_single_column_restriction(::static_pointer_cast<single_column_restriction>(restriction));
|
||||
}
|
||||
}
|
||||
|
||||
void statement_restrictions::add_single_column_restriction(::shared_ptr<single_column_restriction> restriction, bool for_view, bool allow_filtering) {
|
||||
void statement_restrictions::add_single_column_restriction(::shared_ptr<single_column_restriction> restriction) {
|
||||
auto& def = restriction->get_column_def();
|
||||
if (def.is_partition_key()) {
|
||||
// A SELECT query may not request a slice (range) of partition keys
|
||||
// without using token(). This is because there is no way to do this
|
||||
// query efficiently: mumur3 turns a contiguous range of partition
|
||||
// keys into tokens all over the token space.
|
||||
// However, in a SELECT statement used to define a materialized view,
|
||||
// such a slice is fine - it is used to check whether individual
|
||||
// partitions, match, and does not present a performance problem.
|
||||
assert(!restriction->is_on_token());
|
||||
if (restriction->is_slice() && !for_view && !allow_filtering) {
|
||||
throw exceptions::invalid_request_exception(
|
||||
"Only EQ and IN relation are supported on the partition key (unless you use the token() function or allow filtering)");
|
||||
}
|
||||
_partition_key_restrictions = _partition_key_restrictions->merge_to(_schema, restriction);
|
||||
} else if (def.is_clustering_key()) {
|
||||
_clustering_columns_restrictions = _clustering_columns_restrictions->merge_to(_schema, restriction);
|
||||
@@ -333,7 +312,7 @@ const std::vector<::shared_ptr<restrictions>>& statement_restrictions::index_res
|
||||
return _index_restrictions;
|
||||
}
|
||||
|
||||
void statement_restrictions::process_partition_key_restrictions(bool has_queriable_index, bool for_view, bool allow_filtering) {
|
||||
void statement_restrictions::process_partition_key_restrictions(bool has_queriable_index, bool for_view) {
|
||||
// If there is a queriable index, no special condition are required on the other restrictions.
|
||||
// But we still need to know 2 things:
|
||||
// - If we don't have a queriable index, is the query ok
|
||||
@@ -342,32 +321,39 @@ void statement_restrictions::process_partition_key_restrictions(bool has_queriab
|
||||
// components must have a EQ. Only the last partition key component can be in IN relation.
|
||||
if (_partition_key_restrictions->is_on_token()) {
|
||||
_is_key_range = true;
|
||||
} else if (_partition_key_restrictions->has_unrestricted_components(*_schema)) {
|
||||
_is_key_range = true;
|
||||
_uses_secondary_indexing = has_queriable_index;
|
||||
}
|
||||
|
||||
if (_partition_key_restrictions->needs_filtering(*_schema)) {
|
||||
if (!allow_filtering && !for_view && !has_queriable_index) {
|
||||
throw exceptions::invalid_request_exception("Cannot execute this query as it might involve data filtering and "
|
||||
"thus may have unpredictable performance. If you want to execute "
|
||||
"this query despite the performance unpredictability, use ALLOW FILTERING");
|
||||
} else if (has_partition_key_unrestricted_components()) {
|
||||
if (!_partition_key_restrictions->empty() && !for_view) {
|
||||
if (!has_queriable_index) {
|
||||
throw exceptions::invalid_request_exception(sprint("Partition key parts: %s must be restricted as other parts are",
|
||||
join(", ", get_partition_key_unrestricted_components())));
|
||||
}
|
||||
}
|
||||
|
||||
_is_key_range = true;
|
||||
_uses_secondary_indexing = has_queriable_index;
|
||||
}
|
||||
|
||||
if (_partition_key_restrictions->is_slice() && !_partition_key_restrictions->is_on_token() && !for_view) {
|
||||
// A SELECT query may not request a slice (range) of partition keys
|
||||
// without using token(). This is because there is no way to do this
|
||||
// query efficiently: mumur3 turns a contiguous range of partition
|
||||
// keys into tokens all over the token space.
|
||||
// However, in a SELECT statement used to define a materialized view,
|
||||
// such a slice is fine - it is used to check whether individual
|
||||
// partitions, match, and does not present a performance problem.
|
||||
throw exceptions::invalid_request_exception(
|
||||
"Only EQ and IN relation are supported on the partition key (unless you use the token() function)");
|
||||
}
|
||||
}
|
||||
|
||||
bool statement_restrictions::has_partition_key_unrestricted_components() const {
|
||||
return _partition_key_restrictions->has_unrestricted_components(*_schema);
|
||||
return _partition_key_restrictions->size() < _schema->partition_key_size();
|
||||
}
|
||||
|
||||
bool statement_restrictions::has_unrestricted_clustering_columns() const {
|
||||
return _clustering_columns_restrictions->has_unrestricted_components(*_schema);
|
||||
return _clustering_columns_restrictions->size() < _schema->clustering_key_size();
|
||||
}
|
||||
|
||||
void statement_restrictions::process_clustering_columns_restrictions(bool has_queriable_index, bool select_a_collection, bool for_view, bool allow_filtering) {
|
||||
void statement_restrictions::process_clustering_columns_restrictions(bool has_queriable_index, bool select_a_collection, bool for_view) {
|
||||
if (!has_clustering_columns_restriction()) {
|
||||
return;
|
||||
}
|
||||
@@ -376,36 +362,38 @@ void statement_restrictions::process_clustering_columns_restrictions(bool has_qu
|
||||
throw exceptions::invalid_request_exception(
|
||||
"Cannot restrict clustering columns by IN relations when a collection is selected by the query");
|
||||
}
|
||||
if (_clustering_columns_restrictions->is_contains() && !has_queriable_index && !allow_filtering) {
|
||||
if (_clustering_columns_restrictions->is_contains() && !has_queriable_index) {
|
||||
throw exceptions::invalid_request_exception(
|
||||
"Cannot restrict clustering columns by a CONTAINS relation without a secondary index or filtering");
|
||||
"Cannot restrict clustering columns by a CONTAINS relation without a secondary index");
|
||||
}
|
||||
|
||||
if (has_clustering_columns_restriction() && _clustering_columns_restrictions->needs_filtering(*_schema)) {
|
||||
if (has_queriable_index) {
|
||||
_uses_secondary_indexing = true;
|
||||
} else if (!allow_filtering && !for_view) {
|
||||
auto clustering_columns_iter = _schema->clustering_key_columns().begin();
|
||||
for (auto&& restricted_column : _clustering_columns_restrictions->get_column_defs()) {
|
||||
const column_definition* clustering_column = &(*clustering_columns_iter);
|
||||
++clustering_columns_iter;
|
||||
if (clustering_column != restricted_column) {
|
||||
throw exceptions::invalid_request_exception(sprint(
|
||||
"PRIMARY KEY column \"%s\" cannot be restricted as preceding column \"%s\" is not restricted",
|
||||
restricted_column->name_as_text(), clustering_column->name_as_text()));
|
||||
}
|
||||
auto clustering_columns_iter = _schema->clustering_key_columns().begin();
|
||||
|
||||
for (auto&& restricted_column : _clustering_columns_restrictions->get_column_defs()) {
|
||||
const column_definition* clustering_column = &(*clustering_columns_iter);
|
||||
++clustering_columns_iter;
|
||||
|
||||
if (clustering_column != restricted_column && !for_view) {
|
||||
if (!has_queriable_index) {
|
||||
throw exceptions::invalid_request_exception(sprint(
|
||||
"PRIMARY KEY column \"%s\" cannot be restricted as preceding column \"%s\" is not restricted",
|
||||
restricted_column->name_as_text(), clustering_column->name_as_text()));
|
||||
}
|
||||
|
||||
_uses_secondary_indexing = true; // handle gaps and non-keyrange cases.
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (_clustering_columns_restrictions->is_contains()) {
|
||||
_uses_secondary_indexing = true;
|
||||
}
|
||||
}
|
||||
|
||||
dht::partition_range_vector statement_restrictions::get_partition_key_ranges(const query_options& options) const {
|
||||
if (_partition_key_restrictions->empty()) {
|
||||
return {dht::partition_range::make_open_ended_both_sides()};
|
||||
}
|
||||
if (_partition_key_restrictions->needs_filtering(*_schema)) {
|
||||
return {dht::partition_range::make_open_ended_both_sides()};
|
||||
}
|
||||
return _partition_key_restrictions->bounds_ranges(options);
|
||||
}
|
||||
|
||||
@@ -413,30 +401,18 @@ std::vector<query::clustering_range> statement_restrictions::get_clustering_boun
|
||||
if (_clustering_columns_restrictions->empty()) {
|
||||
return {query::clustering_range::make_open_ended_both_sides()};
|
||||
}
|
||||
// TODO(sarna): For filtering to work, clustering range is not bounded at all. For filtering to work faster,
|
||||
// the biggest clustering prefix restriction should be used here.
|
||||
if (_clustering_columns_restrictions->needs_filtering(*_schema)) {
|
||||
return {query::clustering_range::make_open_ended_both_sides()};
|
||||
}
|
||||
return _clustering_columns_restrictions->bounds_ranges(options);
|
||||
}
|
||||
|
||||
bool statement_restrictions::need_filtering() const {
|
||||
bool statement_restrictions::need_filtering() {
|
||||
uint32_t number_of_restricted_columns = 0;
|
||||
for (auto&& restrictions : _index_restrictions) {
|
||||
number_of_restricted_columns += restrictions->size();
|
||||
}
|
||||
|
||||
if (_partition_key_restrictions->is_multi_column() || _clustering_columns_restrictions->is_multi_column()) {
|
||||
// TODO(sarna): Implement ALLOW FILTERING support for multi-column restrictions - return false for now
|
||||
// in order to ensure backwards compatibility
|
||||
return false;
|
||||
}
|
||||
|
||||
return number_of_restricted_columns > 1
|
||||
|| (number_of_restricted_columns == 0 && _partition_key_restrictions->empty() && !_clustering_columns_restrictions->empty())
|
||||
|| (number_of_restricted_columns != 0 && _nonprimary_key_restrictions->has_multiple_contains())
|
||||
|| (number_of_restricted_columns != 0 && !_uses_secondary_indexing);
|
||||
|| (number_of_restricted_columns == 0 && has_clustering_columns_restriction())
|
||||
|| (number_of_restricted_columns != 0 && _nonprimary_key_restrictions->has_multiple_contains());
|
||||
}
|
||||
|
||||
void statement_restrictions::validate_secondary_index_selections(bool selects_only_static_columns) {
|
||||
@@ -454,33 +430,6 @@ void statement_restrictions::validate_secondary_index_selections(bool selects_on
|
||||
}
|
||||
}
|
||||
|
||||
const single_column_restrictions::restrictions_map& statement_restrictions::get_single_column_partition_key_restrictions() const {
|
||||
static single_column_restrictions::restrictions_map empty;
|
||||
auto single_restrictions = dynamic_pointer_cast<single_column_primary_key_restrictions<partition_key>>(_partition_key_restrictions);
|
||||
if (!single_restrictions) {
|
||||
if (dynamic_pointer_cast<initial_key_restrictions<partition_key>>(_partition_key_restrictions)) {
|
||||
return empty;
|
||||
}
|
||||
throw std::runtime_error("statement restrictions for multi-column partition key restrictions are not implemented yet");
|
||||
}
|
||||
return single_restrictions->restrictions();
|
||||
}
|
||||
|
||||
/**
|
||||
* @return clustering key restrictions split into single column restrictions (e.g. for filtering support).
|
||||
*/
|
||||
const single_column_restrictions::restrictions_map& statement_restrictions::get_single_column_clustering_key_restrictions() const {
|
||||
static single_column_restrictions::restrictions_map empty;
|
||||
auto single_restrictions = dynamic_pointer_cast<single_column_primary_key_restrictions<clustering_key>>(_clustering_columns_restrictions);
|
||||
if (!single_restrictions) {
|
||||
if (dynamic_pointer_cast<initial_key_restrictions<clustering_key>>(_clustering_columns_restrictions)) {
|
||||
return empty;
|
||||
}
|
||||
throw std::runtime_error("statement restrictions for multi-column partition key restrictions are not implemented yet");
|
||||
}
|
||||
return single_restrictions->restrictions();
|
||||
}
|
||||
|
||||
static std::optional<atomic_cell_value_view> do_get_value(const schema& schema,
|
||||
const column_definition& cdef,
|
||||
const partition_key& key,
|
||||
@@ -533,14 +482,6 @@ bool single_column_restriction::EQ::is_satisfied_by(const schema& schema,
|
||||
return false;
|
||||
}
|
||||
|
||||
bool single_column_restriction::EQ::is_satisfied_by(bytes_view data, const query_options& options) const {
|
||||
if (_column_def.type->is_counter()) {
|
||||
fail(unimplemented::cause::COUNTERS);
|
||||
}
|
||||
auto operand = value(options);
|
||||
return operand && _column_def.type->compare(*operand, data) == 0;
|
||||
}
|
||||
|
||||
bool single_column_restriction::IN::is_satisfied_by(const schema& schema,
|
||||
const partition_key& key,
|
||||
const clustering_key_prefix& ckey,
|
||||
@@ -562,16 +503,6 @@ bool single_column_restriction::IN::is_satisfied_by(const schema& schema,
|
||||
});
|
||||
}
|
||||
|
||||
bool single_column_restriction::IN::is_satisfied_by(bytes_view data, const query_options& options) const {
|
||||
if (_column_def.type->is_counter()) {
|
||||
fail(unimplemented::cause::COUNTERS);
|
||||
}
|
||||
auto operands = values(options);
|
||||
return boost::algorithm::any_of(operands, [this, &data] (const bytes_opt& operand) {
|
||||
return operand && _column_def.type->compare(*operand, data) == 0;
|
||||
});
|
||||
}
|
||||
|
||||
static query::range<bytes_view> to_range(const term_slice& slice, const query_options& options) {
|
||||
using range_type = query::range<bytes_view>;
|
||||
auto extract_bound = [&] (statements::bound bound) -> stdx::optional<range_type::bound> {
|
||||
@@ -607,13 +538,6 @@ bool single_column_restriction::slice::is_satisfied_by(const schema& schema,
|
||||
});
|
||||
}
|
||||
|
||||
bool single_column_restriction::slice::is_satisfied_by(bytes_view data, const query_options& options) const {
|
||||
if (_column_def.type->is_counter()) {
|
||||
fail(unimplemented::cause::COUNTERS);
|
||||
}
|
||||
return to_range(_slice, options).contains(data, _column_def.type->as_tri_comparator());
|
||||
}
|
||||
|
||||
bool single_column_restriction::contains::is_satisfied_by(const schema& schema,
|
||||
const partition_key& key,
|
||||
const clustering_key_prefix& ckey,
|
||||
@@ -756,11 +680,6 @@ bool single_column_restriction::contains::is_satisfied_by(const schema& schema,
|
||||
return true;
|
||||
}
|
||||
|
||||
bool single_column_restriction::contains::is_satisfied_by(bytes_view data, const query_options& options) const {
|
||||
//TODO(sarna): Deserialize & return. It would be nice to deduplicate, is_satisfied_by above is rather long
|
||||
fail(unimplemented::cause::INDEXES);
|
||||
}
|
||||
|
||||
bool token_restriction::EQ::is_satisfied_by(const schema& schema,
|
||||
const partition_key& key,
|
||||
const clustering_key_prefix& ckey,
|
||||
|
||||
@@ -120,8 +120,8 @@ public:
|
||||
bool for_view = false,
|
||||
bool allow_filtering = false);
|
||||
private:
|
||||
void add_restriction(::shared_ptr<restriction> restriction, bool for_view, bool allow_filtering);
|
||||
void add_single_column_restriction(::shared_ptr<single_column_restriction> restriction, bool for_view, bool allow_filtering);
|
||||
void add_restriction(::shared_ptr<restriction> restriction);
|
||||
void add_single_column_restriction(::shared_ptr<single_column_restriction> restriction);
|
||||
public:
|
||||
bool uses_function(const sstring& ks_name, const sstring& function_name) const;
|
||||
|
||||
@@ -175,7 +175,7 @@ public:
|
||||
*/
|
||||
bool has_unrestricted_clustering_columns() const;
|
||||
private:
|
||||
void process_partition_key_restrictions(bool has_queriable_index, bool for_view, bool allow_filtering);
|
||||
void process_partition_key_restrictions(bool has_queriable_index, bool for_view);
|
||||
|
||||
/**
|
||||
* Returns the partition key components that are not restricted.
|
||||
@@ -190,7 +190,7 @@ private:
|
||||
* @param select_a_collection <code>true</code> if the query should return a collection column
|
||||
* @throws InvalidRequestException if the request is invalid
|
||||
*/
|
||||
void process_clustering_columns_restrictions(bool has_queriable_index, bool select_a_collection, bool for_view, bool allow_filtering);
|
||||
void process_clustering_columns_restrictions(bool has_queriable_index, bool select_a_collection, bool for_view);
|
||||
|
||||
/**
|
||||
* Returns the <code>Restrictions</code> for the specified type of columns.
|
||||
@@ -358,7 +358,7 @@ public:
|
||||
* Checks if the query need to use filtering.
|
||||
* @return <code>true</code> if the query need to use filtering, <code>false</code> otherwise.
|
||||
*/
|
||||
bool need_filtering() const;
|
||||
bool need_filtering();
|
||||
|
||||
void validate_secondary_index_selections(bool selects_only_static_columns);
|
||||
|
||||
@@ -399,16 +399,6 @@ public:
|
||||
const single_column_restrictions::restrictions_map& get_non_pk_restriction() const {
|
||||
return _nonprimary_key_restrictions->restrictions();
|
||||
}
|
||||
|
||||
/**
|
||||
* @return partition key restrictions split into single column restrictions (e.g. for filtering support).
|
||||
*/
|
||||
const single_column_restrictions::restrictions_map& get_single_column_partition_key_restrictions() const;
|
||||
|
||||
/**
|
||||
* @return clustering key restrictions split into single column restrictions (e.g. for filtering support).
|
||||
*/
|
||||
const single_column_restrictions::restrictions_map& get_single_column_clustering_key_restrictions() const;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
@@ -330,86 +330,93 @@ std::unique_ptr<result_set> result_set_builder::build() {
|
||||
return std::move(_result_set);
|
||||
}
|
||||
|
||||
bool result_set_builder::restrictions_filter::operator()(const selection& selection,
|
||||
const std::vector<bytes>& partition_key,
|
||||
const std::vector<bytes>& clustering_key,
|
||||
const query::result_row_view& static_row,
|
||||
const query::result_row_view& row) const {
|
||||
static logging::logger rlogger("restrictions_filter");
|
||||
result_set_builder::visitor::visitor(
|
||||
cql3::selection::result_set_builder& builder, const schema& s,
|
||||
const selection& selection)
|
||||
: _builder(builder), _schema(s), _selection(selection), _row_count(0) {
|
||||
}
|
||||
|
||||
if (_current_pratition_key_does_not_match || _current_static_row_does_not_match) {
|
||||
return false;
|
||||
void result_set_builder::visitor::add_value(const column_definition& def,
|
||||
query::result_row_view::iterator_type& i) {
|
||||
if (def.type->is_multi_cell()) {
|
||||
auto cell = i.next_collection_cell();
|
||||
if (!cell) {
|
||||
_builder.add_empty();
|
||||
return;
|
||||
}
|
||||
_builder.add_collection(def, cell->linearize());
|
||||
} else {
|
||||
auto cell = i.next_atomic_cell();
|
||||
if (!cell) {
|
||||
_builder.add_empty();
|
||||
return;
|
||||
}
|
||||
_builder.add(def, *cell);
|
||||
}
|
||||
}
|
||||
|
||||
void result_set_builder::visitor::accept_new_partition(const partition_key& key,
|
||||
uint32_t row_count) {
|
||||
_partition_key = key.explode(_schema);
|
||||
_row_count = row_count;
|
||||
}
|
||||
|
||||
void result_set_builder::visitor::accept_new_partition(uint32_t row_count) {
|
||||
_row_count = row_count;
|
||||
}
|
||||
|
||||
void result_set_builder::visitor::accept_new_row(const clustering_key& key,
|
||||
const query::result_row_view& static_row,
|
||||
const query::result_row_view& row) {
|
||||
_clustering_key = key.explode(_schema);
|
||||
accept_new_row(static_row, row);
|
||||
}
|
||||
|
||||
void result_set_builder::visitor::accept_new_row(
|
||||
const query::result_row_view& static_row,
|
||||
const query::result_row_view& row) {
|
||||
auto static_row_iterator = static_row.iterator();
|
||||
auto row_iterator = row.iterator();
|
||||
auto non_pk_restrictions_map = _restrictions->get_non_pk_restriction();
|
||||
auto partition_key_restrictions_map = _restrictions->get_single_column_partition_key_restrictions();
|
||||
auto clustering_key_restrictions_map = _restrictions->get_single_column_clustering_key_restrictions();
|
||||
for (auto&& cdef : selection.get_columns()) {
|
||||
switch (cdef->kind) {
|
||||
case column_kind::static_column:
|
||||
// fallthrough
|
||||
case column_kind::regular_column:
|
||||
if (cdef->type->is_multi_cell()) {
|
||||
rlogger.debug("Multi-cell filtering is not implemented yet", cdef->name_as_text());
|
||||
_builder.new_row();
|
||||
for (auto&& def : _selection.get_columns()) {
|
||||
switch (def->kind) {
|
||||
case column_kind::partition_key:
|
||||
_builder.add(_partition_key[def->component_index()]);
|
||||
break;
|
||||
case column_kind::clustering_key:
|
||||
if (_clustering_key.size() > def->component_index()) {
|
||||
_builder.add(_clustering_key[def->component_index()]);
|
||||
} else {
|
||||
auto cell_iterator = (cdef->kind == column_kind::static_column) ? static_row_iterator : row_iterator;
|
||||
auto cell = cell_iterator.next_atomic_cell();
|
||||
|
||||
auto restr_it = non_pk_restrictions_map.find(cdef);
|
||||
if (restr_it == non_pk_restrictions_map.end()) {
|
||||
continue;
|
||||
}
|
||||
restrictions::single_column_restriction& restriction = *restr_it->second;
|
||||
|
||||
bool regular_restriction_matches;
|
||||
if (cell) {
|
||||
regular_restriction_matches = cell->value().with_linearized([&restriction](bytes_view data) {
|
||||
return restriction.is_satisfied_by(data, cql3::query_options({ }));
|
||||
});
|
||||
} else {
|
||||
regular_restriction_matches = restriction.is_satisfied_by(bytes(), cql3::query_options({ }));
|
||||
}
|
||||
if (!regular_restriction_matches) {
|
||||
_current_static_row_does_not_match = (cdef->kind == column_kind::static_column);
|
||||
return false;
|
||||
}
|
||||
|
||||
_builder.add({});
|
||||
}
|
||||
break;
|
||||
case column_kind::partition_key: {
|
||||
auto restr_it = partition_key_restrictions_map.find(cdef);
|
||||
if (restr_it == partition_key_restrictions_map.end()) {
|
||||
continue;
|
||||
}
|
||||
restrictions::single_column_restriction& restriction = *restr_it->second;
|
||||
const bytes& value_to_check = partition_key[cdef->id];
|
||||
bool pk_restriction_matches = restriction.is_satisfied_by(value_to_check, cql3::query_options({ }));
|
||||
if (!pk_restriction_matches) {
|
||||
_current_pratition_key_does_not_match = true;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
case column_kind::regular_column:
|
||||
add_value(*def, row_iterator);
|
||||
break;
|
||||
case column_kind::clustering_key: {
|
||||
auto restr_it = clustering_key_restrictions_map.find(cdef);
|
||||
if (restr_it == clustering_key_restrictions_map.end()) {
|
||||
continue;
|
||||
}
|
||||
restrictions::single_column_restriction& restriction = *restr_it->second;
|
||||
const bytes& value_to_check = clustering_key[cdef->id];
|
||||
bool pk_restriction_matches = restriction.is_satisfied_by(value_to_check, cql3::query_options({ }));
|
||||
if (!pk_restriction_matches) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
case column_kind::static_column:
|
||||
add_value(*def, static_row_iterator);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
assert(0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void result_set_builder::visitor::accept_partition_end(
|
||||
const query::result_row_view& static_row) {
|
||||
if (_row_count == 0) {
|
||||
_builder.new_row();
|
||||
auto static_row_iterator = static_row.iterator();
|
||||
for (auto&& def : _selection.get_columns()) {
|
||||
if (def->is_partition_key()) {
|
||||
_builder.add(_partition_key[def->component_index()]);
|
||||
} else if (def->is_static()) {
|
||||
add_value(*def, static_row_iterator);
|
||||
} else {
|
||||
_builder.add_empty();
|
||||
}
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
api::timestamp_type result_set_builder::timestamp_of(size_t idx) {
|
||||
|
||||
@@ -48,7 +48,6 @@
|
||||
#include "exceptions/exceptions.hh"
|
||||
#include "cql3/selection/raw_selector.hh"
|
||||
#include "cql3/selection/selector_factories.hh"
|
||||
#include "cql3/restrictions/statement_restrictions.hh"
|
||||
#include "unimplemented.hh"
|
||||
|
||||
namespace cql3 {
|
||||
@@ -248,28 +247,6 @@ private:
|
||||
const gc_clock::time_point _now;
|
||||
cql_serialization_format _cql_serialization_format;
|
||||
public:
|
||||
class nop_filter {
|
||||
public:
|
||||
inline bool operator()(const selection&, const std::vector<bytes>&, const std::vector<bytes>&, const query::result_row_view&, const query::result_row_view&) const {
|
||||
return true;
|
||||
}
|
||||
void reset() {
|
||||
}
|
||||
};
|
||||
class restrictions_filter {
|
||||
::shared_ptr<restrictions::statement_restrictions> _restrictions;
|
||||
mutable bool _current_pratition_key_does_not_match = false;
|
||||
mutable bool _current_static_row_does_not_match = false;
|
||||
public:
|
||||
restrictions_filter() = default;
|
||||
explicit restrictions_filter(::shared_ptr<restrictions::statement_restrictions> restrictions) : _restrictions(restrictions) {}
|
||||
bool operator()(const selection& selection, const std::vector<bytes>& pk, const std::vector<bytes>& ck, const query::result_row_view& static_row, const query::result_row_view& row) const;
|
||||
void reset() {
|
||||
_current_pratition_key_does_not_match = false;
|
||||
_current_static_row_does_not_match = false;
|
||||
}
|
||||
};
|
||||
|
||||
result_set_builder(const selection& s, gc_clock::time_point now, cql_serialization_format sf);
|
||||
void add_empty();
|
||||
void add(bytes_opt value);
|
||||
@@ -279,9 +256,8 @@ public:
|
||||
std::unique_ptr<result_set> build();
|
||||
api::timestamp_type timestamp_of(size_t idx);
|
||||
int32_t ttl_of(size_t idx);
|
||||
|
||||
|
||||
// Implements ResultVisitor concept from query.hh
|
||||
template<typename Filter = nop_filter>
|
||||
class visitor {
|
||||
protected:
|
||||
result_set_builder& _builder;
|
||||
@@ -290,100 +266,20 @@ public:
|
||||
uint32_t _row_count;
|
||||
std::vector<bytes> _partition_key;
|
||||
std::vector<bytes> _clustering_key;
|
||||
Filter _filter;
|
||||
public:
|
||||
visitor(cql3::selection::result_set_builder& builder, const schema& s,
|
||||
const selection& selection, Filter filter = Filter())
|
||||
: _builder(builder)
|
||||
, _schema(s)
|
||||
, _selection(selection)
|
||||
, _row_count(0)
|
||||
, _filter(filter)
|
||||
{}
|
||||
visitor(cql3::selection::result_set_builder& builder, const schema& s, const selection&);
|
||||
visitor(visitor&&) = default;
|
||||
|
||||
void add_value(const column_definition& def, query::result_row_view::iterator_type& i) {
|
||||
if (def.type->is_multi_cell()) {
|
||||
auto cell = i.next_collection_cell();
|
||||
if (!cell) {
|
||||
_builder.add_empty();
|
||||
return;
|
||||
}
|
||||
_builder.add_collection(def, cell->linearize());
|
||||
} else {
|
||||
auto cell = i.next_atomic_cell();
|
||||
if (!cell) {
|
||||
_builder.add_empty();
|
||||
return;
|
||||
}
|
||||
_builder.add(def, *cell);
|
||||
}
|
||||
}
|
||||
|
||||
void accept_new_partition(const partition_key& key, uint32_t row_count) {
|
||||
_partition_key = key.explode(_schema);
|
||||
_row_count = row_count;
|
||||
_filter.reset();
|
||||
}
|
||||
|
||||
void accept_new_partition(uint32_t row_count) {
|
||||
_row_count = row_count;
|
||||
_filter.reset();
|
||||
}
|
||||
|
||||
void accept_new_row(const clustering_key& key, const query::result_row_view& static_row, const query::result_row_view& row) {
|
||||
_clustering_key = key.explode(_schema);
|
||||
accept_new_row(static_row, row);
|
||||
}
|
||||
|
||||
void accept_new_row(const query::result_row_view& static_row, const query::result_row_view& row) {
|
||||
auto static_row_iterator = static_row.iterator();
|
||||
auto row_iterator = row.iterator();
|
||||
if (!_filter(_selection, _partition_key, _clustering_key, static_row, row)) {
|
||||
return;
|
||||
}
|
||||
_builder.new_row();
|
||||
for (auto&& def : _selection.get_columns()) {
|
||||
switch (def->kind) {
|
||||
case column_kind::partition_key:
|
||||
_builder.add(_partition_key[def->component_index()]);
|
||||
break;
|
||||
case column_kind::clustering_key:
|
||||
if (_clustering_key.size() > def->component_index()) {
|
||||
_builder.add(_clustering_key[def->component_index()]);
|
||||
} else {
|
||||
_builder.add({});
|
||||
}
|
||||
break;
|
||||
case column_kind::regular_column:
|
||||
add_value(*def, row_iterator);
|
||||
break;
|
||||
case column_kind::static_column:
|
||||
add_value(*def, static_row_iterator);
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void accept_partition_end(const query::result_row_view& static_row) {
|
||||
if (_row_count == 0) {
|
||||
_builder.new_row();
|
||||
auto static_row_iterator = static_row.iterator();
|
||||
for (auto&& def : _selection.get_columns()) {
|
||||
if (def->is_partition_key()) {
|
||||
_builder.add(_partition_key[def->component_index()]);
|
||||
} else if (def->is_static()) {
|
||||
add_value(*def, static_row_iterator);
|
||||
} else {
|
||||
_builder.add_empty();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
void add_value(const column_definition& def, query::result_row_view::iterator_type& i);
|
||||
void accept_new_partition(const partition_key& key, uint32_t row_count);
|
||||
void accept_new_partition(uint32_t row_count);
|
||||
void accept_new_row(const clustering_key& key,
|
||||
const query::result_row_view& static_row,
|
||||
const query::result_row_view& row);
|
||||
void accept_new_row(const query::result_row_view& static_row,
|
||||
const query::result_row_view& row);
|
||||
void accept_partition_end(const query::result_row_view& static_row);
|
||||
};
|
||||
|
||||
private:
|
||||
bytes_opt get_value(data_type t, query::result_atomic_cell_view c);
|
||||
};
|
||||
|
||||
@@ -105,9 +105,11 @@ public:
|
||||
virtual void reset() = 0;
|
||||
|
||||
virtual assignment_testable::test_result test_assignment(database& db, const sstring& keyspace, ::shared_ptr<column_specification> receiver) override {
|
||||
if (receiver->type == get_type()) {
|
||||
auto t1 = receiver->type->underlying_type();
|
||||
auto t2 = get_type()->underlying_type();
|
||||
if (t1 == t2) {
|
||||
return assignment_testable::test_result::EXACT_MATCH;
|
||||
} else if (receiver->type->is_value_compatible_with(*get_type())) {
|
||||
} else if (t1->is_value_compatible_with(*t2)) {
|
||||
return assignment_testable::test_result::WEAKLY_ASSIGNABLE;
|
||||
} else {
|
||||
return assignment_testable::test_result::NOT_ASSIGNABLE;
|
||||
|
||||
@@ -118,8 +118,7 @@ private:
|
||||
schema_ptr schema,
|
||||
::shared_ptr<variable_specifications> bound_names,
|
||||
::shared_ptr<selection::selection> selection,
|
||||
bool for_view = false,
|
||||
bool allow_filtering = false);
|
||||
bool for_view = false);
|
||||
|
||||
/** Returns a ::shared_ptr<term> for the limit or null if no limit is set */
|
||||
::shared_ptr<term> prepare_limit(database& db, ::shared_ptr<variable_specifications> bound_names);
|
||||
|
||||
@@ -96,12 +96,8 @@ public:
|
||||
encoded_row.write("\\\"", 2);
|
||||
}
|
||||
encoded_row.write("\": ", 3);
|
||||
if (parameters[i]) {
|
||||
sstring row_sstring = _selector_types[i]->to_json_string(parameters[i].value());
|
||||
encoded_row.write(row_sstring.c_str(), row_sstring.size());
|
||||
} else {
|
||||
encoded_row.write("null", 4);
|
||||
}
|
||||
sstring row_sstring = _selector_types[i]->to_json_string(parameters[i]);
|
||||
encoded_row.write(row_sstring.c_str(), row_sstring.size());
|
||||
}
|
||||
encoded_row.write("}", 1);
|
||||
return encoded_row.linearize().to_string();
|
||||
@@ -384,7 +380,6 @@ select_statement::do_execute(service::storage_proxy& proxy,
|
||||
auto now = gc_clock::now();
|
||||
|
||||
++_stats.reads;
|
||||
_stats.filtered_reads += _restrictions->need_filtering();
|
||||
|
||||
auto command = ::make_lw_shared<query::read_command>(_schema->id(), _schema->version(),
|
||||
make_partition_slice(options), limit, now, tracing::make_trace_info(state.get_trace_state()), query::max_partitions, utils::UUID(), options.get_timestamp(state));
|
||||
@@ -410,7 +405,7 @@ select_statement::do_execute(service::storage_proxy& proxy,
|
||||
command->slice.options.set<query::partition_slice::option::allow_short_read>();
|
||||
auto timeout = options.get_timeout_config().*get_timeout_config_selector();
|
||||
auto p = service::pager::query_pagers::pager(_schema, _selection,
|
||||
state, options, timeout, command, std::move(key_ranges), _stats, _restrictions->need_filtering() ? _restrictions : nullptr);
|
||||
state, options, timeout, command, std::move(key_ranges));
|
||||
|
||||
if (aggregate) {
|
||||
return do_with(
|
||||
@@ -424,7 +419,6 @@ select_statement::do_execute(service::storage_proxy& proxy,
|
||||
).then([this, &builder] {
|
||||
auto rs = builder.build();
|
||||
update_stats_rows_read(rs->size());
|
||||
_stats.filtered_rows_matched_total += _restrictions->need_filtering() ? rs->size() : 0;
|
||||
auto msg = ::make_shared<cql_transport::messages::result_message::rows>(result(std::move(rs)));
|
||||
return make_ready_future<shared_ptr<cql_transport::messages::result_message>>(std::move(msg));
|
||||
});
|
||||
@@ -437,7 +431,7 @@ select_statement::do_execute(service::storage_proxy& proxy,
|
||||
" you must either remove the ORDER BY or the IN and sort client side, or disable paging for this query");
|
||||
}
|
||||
|
||||
if (_selection->is_trivial() && !_restrictions->need_filtering()) {
|
||||
if (_selection->is_trivial()) {
|
||||
return p->fetch_page_generator(page_size, now, _stats).then([this, p, limit] (result_generator generator) {
|
||||
auto meta = make_shared<metadata>(*_selection->get_result_metadata());
|
||||
if (!p->is_exhausted()) {
|
||||
@@ -458,7 +452,6 @@ select_statement::do_execute(service::storage_proxy& proxy,
|
||||
}
|
||||
|
||||
update_stats_rows_read(rs->size());
|
||||
_stats.filtered_rows_matched_total += _restrictions->need_filtering() ? rs->size() : 0;
|
||||
auto msg = ::make_shared<cql_transport::messages::result_message::rows>(result(std::move(rs)));
|
||||
return make_ready_future<shared_ptr<cql_transport::messages::result_message>>(std::move(msg));
|
||||
});
|
||||
@@ -557,7 +550,7 @@ select_statement::process_results(foreign_ptr<lw_shared_ptr<query::result>> resu
|
||||
const query_options& options,
|
||||
gc_clock::time_point now)
|
||||
{
|
||||
bool fast_path = !needs_post_query_ordering() && _selection->is_trivial() && !_restrictions->need_filtering();
|
||||
bool fast_path = !needs_post_query_ordering() && _selection->is_trivial();
|
||||
if (fast_path) {
|
||||
return make_shared<cql_transport::messages::result_message::rows>(result(
|
||||
result_generator(_schema, std::move(results), std::move(cmd), _selection, _stats),
|
||||
@@ -567,17 +560,9 @@ select_statement::process_results(foreign_ptr<lw_shared_ptr<query::result>> resu
|
||||
|
||||
cql3::selection::result_set_builder builder(*_selection, now,
|
||||
options.get_cql_serialization_format());
|
||||
if (_restrictions->need_filtering()) {
|
||||
results->ensure_counts();
|
||||
_stats.filtered_rows_read_total += *results->row_count();
|
||||
query::result_view::consume(*results, cmd->slice,
|
||||
cql3::selection::result_set_builder::visitor(builder, *_schema,
|
||||
*_selection, cql3::selection::result_set_builder::restrictions_filter(_restrictions)));
|
||||
} else {
|
||||
query::result_view::consume(*results, cmd->slice,
|
||||
cql3::selection::result_set_builder::visitor(builder, *_schema,
|
||||
*_selection));
|
||||
}
|
||||
query::result_view::consume(*results, cmd->slice,
|
||||
cql3::selection::result_set_builder::visitor(builder, *_schema,
|
||||
*_selection));
|
||||
auto rs = builder.build();
|
||||
|
||||
if (needs_post_query_ordering()) {
|
||||
@@ -588,7 +573,6 @@ select_statement::process_results(foreign_ptr<lw_shared_ptr<query::result>> resu
|
||||
rs->trim(cmd->row_limit);
|
||||
}
|
||||
update_stats_rows_read(rs->size());
|
||||
_stats.filtered_rows_matched_total += _restrictions->need_filtering() ? rs->size() : 0;
|
||||
return ::make_shared<cql_transport::messages::result_message::rows>(result(std::move(rs)));
|
||||
}
|
||||
|
||||
@@ -969,7 +953,7 @@ std::unique_ptr<prepared_statement> select_statement::prepare(database& db, cql_
|
||||
? selection::selection::wildcard(schema)
|
||||
: selection::selection::from_selectors(db, schema, _select_clause);
|
||||
|
||||
auto restrictions = prepare_restrictions(db, schema, bound_names, selection, for_view, _parameters->allow_filtering());
|
||||
auto restrictions = prepare_restrictions(db, schema, bound_names, selection, for_view);
|
||||
|
||||
if (_parameters->is_distinct()) {
|
||||
validate_distinct_selection(schema, selection, restrictions);
|
||||
@@ -986,6 +970,10 @@ std::unique_ptr<prepared_statement> select_statement::prepare(database& db, cql_
|
||||
}
|
||||
|
||||
check_needs_filtering(restrictions);
|
||||
size_t restrictions_size = restrictions->get_partition_key_restrictions()->size() + restrictions->get_clustering_columns_restrictions()->size() + restrictions->get_non_pk_restriction().size();
|
||||
if (restrictions->uses_secondary_indexing() && restrictions_size > 1) {
|
||||
throw exceptions::invalid_request_exception("Indexed query may not contain multiple restrictions in 2.3");
|
||||
}
|
||||
|
||||
::shared_ptr<cql3::statements::select_statement> stmt;
|
||||
if (restrictions->uses_secondary_indexing()) {
|
||||
@@ -1023,14 +1011,13 @@ select_statement::prepare_restrictions(database& db,
|
||||
schema_ptr schema,
|
||||
::shared_ptr<variable_specifications> bound_names,
|
||||
::shared_ptr<selection::selection> selection,
|
||||
bool for_view,
|
||||
bool allow_filtering)
|
||||
bool for_view)
|
||||
{
|
||||
try {
|
||||
// FIXME: this method should take a separate allow_filtering parameter
|
||||
// and pass it on. Currently we pass "for_view" as allow_filtering.
|
||||
return ::make_shared<restrictions::statement_restrictions>(db, schema, statement_type::SELECT, std::move(_where_clause), bound_names,
|
||||
selection->contains_only_static_columns(), selection->contains_a_collection(), for_view, allow_filtering);
|
||||
selection->contains_only_static_columns(), selection->contains_a_collection(), for_view, for_view);
|
||||
} catch (const exceptions::unrecognized_entity_exception& e) {
|
||||
if (contains_alias(e.entity)) {
|
||||
throw exceptions::invalid_request_exception(sprint("Aliases aren't allowed in the where clause ('%s')", e.relation->to_string()));
|
||||
|
||||
@@ -179,7 +179,21 @@ modification_statement::json_cache_opt insert_prepared_json_statement::maybe_pre
|
||||
void
|
||||
insert_prepared_json_statement::execute_set_value(mutation& m, const clustering_key_prefix& prefix, const update_parameters& params, const column_definition& column, const bytes_opt& value) {
|
||||
if (!value) {
|
||||
if (column.type->is_collection()) {
|
||||
auto& k = static_pointer_cast<const collection_type_impl>(column.type)->_kind;
|
||||
if (&k == &collection_type_impl::kind::list) {
|
||||
lists::setter::execute(m, prefix, params, column, make_shared<lists::value>(lists::value(std::vector<bytes_opt>())));
|
||||
} else if (&k == &collection_type_impl::kind::set) {
|
||||
sets::setter::execute(m, prefix, params, column, make_shared<sets::value>(sets::value(std::set<bytes, serialized_compare>(serialized_compare(empty_type)))));
|
||||
} else if (&k == &collection_type_impl::kind::map) {
|
||||
maps::setter::execute(m, prefix, params, column, make_shared<maps::value>(maps::value(std::map<bytes, bytes, serialized_compare>(serialized_compare(empty_type)))));
|
||||
} else {
|
||||
throw exceptions::invalid_request_exception("Incorrect value kind in JSON INSERT statement");
|
||||
}
|
||||
return;
|
||||
}
|
||||
m.set_cell(prefix, column, std::move(operation::make_dead_cell(params)));
|
||||
return;
|
||||
} else if (!column.type->is_collection()) {
|
||||
constants::setter::execute(m, prefix, params, column, raw_value_view::make_value(bytes_view(*value)));
|
||||
return;
|
||||
@@ -204,15 +218,17 @@ insert_prepared_json_statement::execute_set_value(mutation& m, const clustering_
|
||||
dht::partition_range_vector
|
||||
insert_prepared_json_statement::build_partition_keys(const query_options& options, const json_cache_opt& json_cache) {
|
||||
dht::partition_range_vector ranges;
|
||||
std::vector<bytes_opt> exploded;
|
||||
for (const auto& def : s->partition_key_columns()) {
|
||||
auto json_value = json_cache->at(def.name_as_text());
|
||||
auto k = query::range<partition_key>::make_singular(partition_key::from_single_value(*s, json_value.value()));
|
||||
ranges.emplace_back(std::move(k).transform(
|
||||
[this] (partition_key&& k) -> query::ring_position {
|
||||
auto token = dht::global_partitioner().get_token(*s, k);
|
||||
return { std::move(token), std::move(k) };
|
||||
}));
|
||||
if (!json_value) {
|
||||
throw exceptions::invalid_request_exception(sprint("Missing mandatory PRIMARY KEY part %s", def.name_as_text()));
|
||||
}
|
||||
exploded.emplace_back(*json_value);
|
||||
}
|
||||
auto pkey = partition_key::from_optional_exploded(*s, std::move(exploded));
|
||||
auto k = query::range<query::ring_position>::make_singular(dht::global_partitioner().decorate_key(*s, std::move(pkey)));
|
||||
ranges.emplace_back(std::move(k));
|
||||
return ranges;
|
||||
}
|
||||
|
||||
@@ -221,7 +237,10 @@ query::clustering_row_ranges insert_prepared_json_statement::create_clustering_r
|
||||
std::vector<bytes_opt> exploded;
|
||||
for (const auto& def : s->clustering_key_columns()) {
|
||||
auto json_value = json_cache->at(def.name_as_text());
|
||||
exploded.emplace_back(json_value.value());
|
||||
if (!json_value) {
|
||||
throw exceptions::invalid_request_exception(sprint("Missing mandatory PRIMARY KEY part %s", def.name_as_text()));
|
||||
}
|
||||
exploded.emplace_back(*json_value);
|
||||
}
|
||||
auto k = query::range<clustering_key_prefix>::make_singular(clustering_key_prefix::from_optional_exploded(*s, std::move(exploded)));
|
||||
ranges.emplace_back(query::clustering_range(std::move(k)));
|
||||
|
||||
@@ -41,10 +41,6 @@ struct cql_stats {
|
||||
int64_t secondary_index_drops = 0;
|
||||
int64_t secondary_index_reads = 0;
|
||||
int64_t secondary_index_rows_read = 0;
|
||||
|
||||
int64_t filtered_reads = 0;
|
||||
int64_t filtered_rows_matched_total = 0;
|
||||
int64_t filtered_rows_read_total = 0;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
@@ -405,7 +405,7 @@ public:
|
||||
in_marker(int32_t bind_index, ::shared_ptr<column_specification> receiver)
|
||||
: abstract_marker(bind_index, std::move(receiver))
|
||||
{
|
||||
assert(dynamic_pointer_cast<const list_type_impl>(receiver->type));
|
||||
assert(dynamic_pointer_cast<const list_type_impl>(_receiver->type));
|
||||
}
|
||||
|
||||
virtual shared_ptr<terminal> bind(const query_options& options) override {
|
||||
|
||||
@@ -53,6 +53,9 @@ update_parameters::get_prefetched_list(
|
||||
return {};
|
||||
}
|
||||
|
||||
if (column.is_static()) {
|
||||
ckey = clustering_key_view::make_empty();
|
||||
}
|
||||
auto i = _prefetched->rows.find(std::make_pair(std::move(pkey), std::move(ckey)));
|
||||
if (i == _prefetched->rows.end()) {
|
||||
return {};
|
||||
|
||||
119
database.cc
119
database.cc
@@ -182,7 +182,7 @@ thread_local dirty_memory_manager default_dirty_memory_manager;
|
||||
lw_shared_ptr<memtable_list>
|
||||
table::make_memory_only_memtable_list() {
|
||||
auto get_schema = [this] { return schema(); };
|
||||
return make_lw_shared<memtable_list>(std::move(get_schema), _config.dirty_memory_manager, _config.memory_compaction_scheduling_group);
|
||||
return make_lw_shared<memtable_list>(std::move(get_schema), _config.dirty_memory_manager);
|
||||
}
|
||||
|
||||
lw_shared_ptr<memtable_list>
|
||||
@@ -191,7 +191,7 @@ table::make_memtable_list() {
|
||||
return seal_active_memtable(std::move(permit));
|
||||
};
|
||||
auto get_schema = [this] { return schema(); };
|
||||
return make_lw_shared<memtable_list>(std::move(seal), std::move(get_schema), _config.dirty_memory_manager, _config.memory_compaction_scheduling_group);
|
||||
return make_lw_shared<memtable_list>(std::move(seal), std::move(get_schema), _config.dirty_memory_manager);
|
||||
}
|
||||
|
||||
lw_shared_ptr<memtable_list>
|
||||
@@ -200,7 +200,7 @@ table::make_streaming_memtable_list() {
|
||||
return seal_active_streaming_memtable_immediate(std::move(permit));
|
||||
};
|
||||
auto get_schema = [this] { return schema(); };
|
||||
return make_lw_shared<memtable_list>(std::move(seal), std::move(get_schema), _config.streaming_dirty_memory_manager, _config.streaming_scheduling_group);
|
||||
return make_lw_shared<memtable_list>(std::move(seal), std::move(get_schema), _config.streaming_dirty_memory_manager);
|
||||
}
|
||||
|
||||
lw_shared_ptr<memtable_list>
|
||||
@@ -209,7 +209,7 @@ table::make_streaming_memtable_big_list(streaming_memtable_big& smb) {
|
||||
return seal_active_streaming_memtable_big(smb, std::move(permit));
|
||||
};
|
||||
auto get_schema = [this] { return schema(); };
|
||||
return make_lw_shared<memtable_list>(std::move(seal), std::move(get_schema), _config.streaming_dirty_memory_manager, _config.streaming_scheduling_group);
|
||||
return make_lw_shared<memtable_list>(std::move(seal), std::move(get_schema), _config.streaming_dirty_memory_manager);
|
||||
}
|
||||
|
||||
table::table(schema_ptr schema, config config, db::commitlog* cl, compaction_manager& compaction_manager, cell_locker_stats& cl_stats, cache_tracker& row_cache_tracker)
|
||||
@@ -237,7 +237,7 @@ partition_presence_checker
|
||||
table::make_partition_presence_checker(lw_shared_ptr<sstables::sstable_set> sstables) {
|
||||
auto sel = make_lw_shared(sstables->make_incremental_selector());
|
||||
return [this, sstables = std::move(sstables), sel = std::move(sel)] (const dht::decorated_key& key) {
|
||||
auto& sst = sel->select(key).sstables;
|
||||
auto& sst = sel->select(key.token()).sstables;
|
||||
if (sst.empty()) {
|
||||
return partition_presence_checker_result::definitely_doesnt_exist;
|
||||
}
|
||||
@@ -453,7 +453,7 @@ public:
|
||||
const dht::partition_range& pr,
|
||||
tracing::trace_state_ptr trace_state,
|
||||
sstable_reader_factory_type fn)
|
||||
: reader_selector(s, pr.start() ? pr.start()->value() : dht::ring_position_view::min())
|
||||
: reader_selector(s, pr.start() ? pr.start()->value() : dht::ring_position::min())
|
||||
, _pr(&pr)
|
||||
, _sstables(std::move(sstables))
|
||||
, _trace_state(std::move(trace_state))
|
||||
@@ -472,34 +472,47 @@ public:
|
||||
incremental_reader_selector(incremental_reader_selector&&) = delete;
|
||||
incremental_reader_selector& operator=(incremental_reader_selector&&) = delete;
|
||||
|
||||
virtual std::vector<flat_mutation_reader> create_new_readers(const std::optional<dht::ring_position_view>& pos) override {
|
||||
dblog.trace("incremental_reader_selector {}: {}({})", this, __FUNCTION__, seastar::lazy_deref(pos));
|
||||
virtual std::vector<flat_mutation_reader> create_new_readers(const dht::token* const t) override {
|
||||
dblog.trace("incremental_reader_selector {}: {}({})", this, __FUNCTION__, seastar::lazy_deref(t));
|
||||
|
||||
auto readers = std::vector<flat_mutation_reader>();
|
||||
const auto& position = (t ? *t : _selector_position.token());
|
||||
// we only pass _selector_position's token to _selector::select() when T is nullptr
|
||||
// because it means gap between sstables, and the lower bound of the first interval
|
||||
// after the gap is guaranteed to be inclusive.
|
||||
auto selection = _selector.select(position);
|
||||
|
||||
do {
|
||||
auto selection = _selector.select(_selector_position);
|
||||
_selector_position = selection.next_position;
|
||||
if (selection.sstables.empty()) {
|
||||
// For the lower bound of the token range the _selector
|
||||
// might not return any sstables, in this case try again
|
||||
// with next_token unless it's maximum token.
|
||||
if (!selection.next_position.is_max()
|
||||
&& position == (_pr->start() ? _pr->start()->value().token() : dht::minimum_token())) {
|
||||
dblog.trace("incremental_reader_selector {}: no sstables intersect with the lower bound, retrying", this);
|
||||
_selector_position = std::move(selection.next_position);
|
||||
return create_new_readers(nullptr);
|
||||
}
|
||||
|
||||
dblog.trace("incremental_reader_selector {}: {} sstables to consider, advancing selector to {}", this, selection.sstables.size(),
|
||||
_selector_position);
|
||||
_selector_position = dht::ring_position::max();
|
||||
return {};
|
||||
}
|
||||
|
||||
readers = boost::copy_range<std::vector<flat_mutation_reader>>(selection.sstables
|
||||
| boost::adaptors::filtered([this] (auto& sst) { return _read_sstables.emplace(sst).second; })
|
||||
| boost::adaptors::transformed([this] (auto& sst) { return this->create_reader(sst); }));
|
||||
} while (!_selector_position.is_max() && readers.empty() && (!pos || dht::ring_position_tri_compare(*_s, *pos, _selector_position) >= 0));
|
||||
_selector_position = std::move(selection.next_position);
|
||||
|
||||
dblog.trace("incremental_reader_selector {}: created {} new readers", this, readers.size());
|
||||
dblog.trace("incremental_reader_selector {}: {} new sstables to consider, advancing selector to {}", this, selection.sstables.size(), _selector_position);
|
||||
|
||||
return readers;
|
||||
return boost::copy_range<std::vector<flat_mutation_reader>>(selection.sstables
|
||||
| boost::adaptors::filtered([this] (auto& sst) { return _read_sstables.emplace(sst).second; })
|
||||
| boost::adaptors::transformed([this] (auto& sst) {
|
||||
return this->create_reader(sst);
|
||||
}));
|
||||
}
|
||||
|
||||
virtual std::vector<flat_mutation_reader> fast_forward_to(const dht::partition_range& pr, db::timeout_clock::time_point timeout) override {
|
||||
_pr = ≺
|
||||
|
||||
auto pos = dht::ring_position_view::for_range_start(*_pr);
|
||||
if (dht::ring_position_tri_compare(*_s, pos, _selector_position) >= 0) {
|
||||
return create_new_readers(pos);
|
||||
dht::ring_position_comparator cmp(*_s);
|
||||
if (cmp(dht::ring_position_view::for_range_start(*_pr), _selector_position) >= 0) {
|
||||
return create_new_readers(&_pr->start()->value().token());
|
||||
}
|
||||
|
||||
return {};
|
||||
@@ -948,6 +961,11 @@ table::seal_active_memtable(flush_permit&& permit) {
|
||||
}
|
||||
_memtables->add_memtable();
|
||||
_stats.memtable_switch_count++;
|
||||
// This will set evictable occupancy of the old memtable region to zero, so that
|
||||
// this region is considered last for flushing by dirty_memory_manager::flush_when_needed().
|
||||
// If we don't do that, the flusher may keep picking up this memtable list for flushing after
|
||||
// the permit is released even though there is not much to flush in the active memtable of this list.
|
||||
old->region().ground_evictable_occupancy();
|
||||
auto previous_flush = _flush_barrier.advance_and_await();
|
||||
auto op = _flush_barrier.start();
|
||||
|
||||
@@ -1316,6 +1334,7 @@ table::on_compaction_completion(const std::vector<sstables::shared_sstable>& new
|
||||
|
||||
// This is done in the background, so we can consider this compaction completed.
|
||||
seastar::with_gate(_sstable_deletion_gate, [this, sstables_to_remove] {
|
||||
return with_semaphore(_sstable_deletion_sem, 1, [this, sstables_to_remove = std::move(sstables_to_remove)] {
|
||||
return sstables::delete_atomically(sstables_to_remove, *get_large_partition_handler()).then_wrapped([this, sstables_to_remove] (future<> f) {
|
||||
std::exception_ptr eptr;
|
||||
try {
|
||||
@@ -1339,6 +1358,7 @@ table::on_compaction_completion(const std::vector<sstables::shared_sstable>& new
|
||||
return make_exception_future<>(eptr);
|
||||
}
|
||||
return make_ready_future<>();
|
||||
});
|
||||
}).then([this] {
|
||||
// refresh underlying data source in row cache to prevent it from holding reference
|
||||
// to sstables files which were previously deleted.
|
||||
@@ -1460,7 +1480,10 @@ future<> table::cleanup_sstables(sstables::compaction_descriptor descriptor) {
|
||||
static thread_local semaphore sem(1);
|
||||
|
||||
return with_semaphore(sem, 1, [this, &sst] {
|
||||
return this->compact_sstables(sstables::compaction_descriptor({ sst }, sst->get_sstable_level()), true);
|
||||
// release reference to sstables cleaned up, otherwise space usage from their data and index
|
||||
// components cannot be reclaimed until all of them are cleaned.
|
||||
auto sstable_level = sst->get_sstable_level();
|
||||
return this->compact_sstables(sstables::compaction_descriptor({ std::move(sst) }, sstable_level), true);
|
||||
});
|
||||
});
|
||||
});
|
||||
@@ -1553,7 +1576,7 @@ future<std::unordered_set<sstring>> table::get_sstables_by_partition_key(const s
|
||||
[this] (std::unordered_set<sstring>& filenames, lw_shared_ptr<sstables::sstable_set::incremental_selector>& sel, partition_key& pk) {
|
||||
return do_with(dht::decorated_key(dht::global_partitioner().decorate_key(*_schema, pk)),
|
||||
[this, &filenames, &sel, &pk](dht::decorated_key& dk) mutable {
|
||||
auto sst = sel->select(dk).sstables;
|
||||
auto sst = sel->select(dk.token()).sstables;
|
||||
auto hk = sstables::sstable::make_hashed_key(*_schema, dk.key());
|
||||
|
||||
return do_for_each(sst, [this, &filenames, &dk, hk = std::move(hk)] (std::vector<sstables::shared_sstable>::const_iterator::reference s) mutable {
|
||||
@@ -1642,9 +1665,9 @@ future<> distributed_loader::open_sstable(distributed<database>& db, sstables::e
|
||||
// to distribute evenly the resource usage among all shards.
|
||||
|
||||
return db.invoke_on(column_family::calculate_shard_from_sstable_generation(comps.generation),
|
||||
[&db, comps = std::move(comps), func = std::move(func), pc] (database& local) {
|
||||
[&db, comps = std::move(comps), func = std::move(func), &pc] (database& local) {
|
||||
|
||||
return with_semaphore(local.sstable_load_concurrency_sem(), 1, [&db, &local, comps = std::move(comps), func = std::move(func), pc] {
|
||||
return with_semaphore(local.sstable_load_concurrency_sem(), 1, [&db, &local, comps = std::move(comps), func = std::move(func), &pc] {
|
||||
auto& cf = local.find_column_family(comps.ks, comps.cf);
|
||||
|
||||
auto f = sstables::sstable::load_shared_components(cf.schema(), cf._config.datadir, comps.generation, comps.version, comps.format, pc);
|
||||
@@ -2145,8 +2168,6 @@ database::database(const db::config& cfg, database_config dbcfg)
|
||||
_compaction_manager->start();
|
||||
setup_metrics();
|
||||
|
||||
_row_cache_tracker.set_compaction_scheduling_group(dbcfg.memory_compaction_scheduling_group);
|
||||
|
||||
dblog.info("Row: max_vector_size: {}, internal_count: {}", size_t(row::max_vector_size), size_t(row::internal_count));
|
||||
}
|
||||
|
||||
@@ -2834,7 +2855,6 @@ keyspace::make_column_family_config(const schema& s, const db::config& db_config
|
||||
cfg.cf_stats = _config.cf_stats;
|
||||
cfg.enable_incremental_backups = _config.enable_incremental_backups;
|
||||
cfg.compaction_scheduling_group = _config.compaction_scheduling_group;
|
||||
cfg.memory_compaction_scheduling_group = _config.memory_compaction_scheduling_group;
|
||||
cfg.memtable_scheduling_group = _config.memtable_scheduling_group;
|
||||
cfg.memtable_to_cache_scheduling_group = _config.memtable_to_cache_scheduling_group;
|
||||
cfg.streaming_scheduling_group = _config.streaming_scheduling_group;
|
||||
@@ -3386,7 +3406,7 @@ future<> memtable_list::request_flush() {
|
||||
}
|
||||
|
||||
lw_shared_ptr<memtable> memtable_list::new_memtable() {
|
||||
return make_lw_shared<memtable>(_current_schema(), *_dirty_memory_manager, this, _compaction_scheduling_group);
|
||||
return make_lw_shared<memtable>(_current_schema(), *_dirty_memory_manager, this);
|
||||
}
|
||||
|
||||
future<flush_permit> flush_permit::reacquire_sstable_write_permit() && {
|
||||
@@ -3427,6 +3447,13 @@ future<> dirty_memory_manager::flush_when_needed() {
|
||||
// release the biggest amount of memory and is less likely to be generating tiny
|
||||
// SSTables.
|
||||
memtable& candidate_memtable = memtable::from_region(*(this->_virtual_region_group.get_largest_region()));
|
||||
|
||||
if (candidate_memtable.empty()) {
|
||||
// Soft pressure, but nothing to flush. It could be due to fsync or memtable_to_cache lagging.
|
||||
// Back off to avoid OOMing with flush continuations.
|
||||
return sleep(1ms);
|
||||
}
|
||||
|
||||
// Do not wait. The semaphore will protect us against a concurrent flush. But we
|
||||
// want to start a new one as soon as the permits are destroyed and the semaphore is
|
||||
// made ready again, not when we are done with the current one.
|
||||
@@ -3615,7 +3642,6 @@ database::make_keyspace_config(const keyspace_metadata& ksm) {
|
||||
cfg.enable_incremental_backups = _enable_incremental_backups;
|
||||
|
||||
cfg.compaction_scheduling_group = _dbcfg.compaction_scheduling_group;
|
||||
cfg.memory_compaction_scheduling_group = _dbcfg.memory_compaction_scheduling_group;
|
||||
cfg.memtable_scheduling_group = _dbcfg.memtable_scheduling_group;
|
||||
cfg.memtable_to_cache_scheduling_group = _dbcfg.memtable_to_cache_scheduling_group;
|
||||
cfg.streaming_scheduling_group = _dbcfg.streaming_scheduling_group;
|
||||
@@ -3975,6 +4001,7 @@ seal_snapshot(sstring jsondir) {
|
||||
|
||||
future<> table::snapshot(sstring name) {
|
||||
return flush().then([this, name = std::move(name)]() {
|
||||
return with_semaphore(_sstable_deletion_sem, 1, [this, name = std::move(name)]() {
|
||||
auto tables = boost::copy_range<std::vector<sstables::shared_sstable>>(*_sstables->all());
|
||||
return do_with(std::move(tables), [this, name](std::vector<sstables::shared_sstable> & tables) {
|
||||
auto jsondir = _config.datadir + "/snapshots/" + name;
|
||||
@@ -4039,6 +4066,7 @@ future<> table::snapshot(sstring name) {
|
||||
});
|
||||
});
|
||||
});
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
@@ -4170,6 +4198,7 @@ future<> table::fail_streaming_mutations(utils::UUID plan_id) {
|
||||
_streaming_memtables_big.erase(it);
|
||||
return entry->flush_in_progress.close().then([this, entry] {
|
||||
for (auto&& sst : entry->sstables) {
|
||||
sst.monitor->write_failed();
|
||||
sst.sstable->mark_for_deletion();
|
||||
}
|
||||
});
|
||||
@@ -4601,11 +4630,14 @@ flat_mutation_reader make_local_shard_sstable_reader(schema_ptr s,
|
||||
}
|
||||
return reader;
|
||||
};
|
||||
return make_combined_reader(s, std::make_unique<incremental_reader_selector>(s,
|
||||
std::move(sstables),
|
||||
pr,
|
||||
std::move(trace_state),
|
||||
std::move(reader_factory_fn)),
|
||||
auto all_readers = boost::copy_range<std::vector<flat_mutation_reader>>(
|
||||
*sstables->all()
|
||||
| boost::adaptors::transformed([&] (sstables::shared_sstable sst) -> flat_mutation_reader {
|
||||
return reader_factory_fn(sst, pr);
|
||||
})
|
||||
);
|
||||
return make_combined_reader(s,
|
||||
std::move(all_readers),
|
||||
fwd,
|
||||
fwd_mr);
|
||||
}
|
||||
@@ -4624,11 +4656,14 @@ flat_mutation_reader make_range_sstable_reader(schema_ptr s,
|
||||
auto reader_factory_fn = [s, &slice, &pc, resource_tracker, fwd, fwd_mr, &monitor_generator] (sstables::shared_sstable& sst, const dht::partition_range& pr) {
|
||||
return sst->read_range_rows_flat(s, pr, slice, pc, resource_tracker, fwd, fwd_mr, monitor_generator(sst));
|
||||
};
|
||||
return make_combined_reader(s, std::make_unique<incremental_reader_selector>(s,
|
||||
std::move(sstables),
|
||||
pr,
|
||||
std::move(trace_state),
|
||||
std::move(reader_factory_fn)),
|
||||
auto sstable_readers = boost::copy_range<std::vector<flat_mutation_reader>>(
|
||||
*sstables->all()
|
||||
| boost::adaptors::transformed([&] (sstables::shared_sstable sst) {
|
||||
return reader_factory_fn(sst, pr);
|
||||
})
|
||||
);
|
||||
return make_combined_reader(s,
|
||||
std::move(sstable_readers),
|
||||
fwd,
|
||||
fwd_mr);
|
||||
}
|
||||
|
||||
27
database.hh
27
database.hh
@@ -164,33 +164,29 @@ private:
|
||||
std::function<schema_ptr()> _current_schema;
|
||||
dirty_memory_manager* _dirty_memory_manager;
|
||||
std::experimental::optional<shared_promise<>> _flush_coalescing;
|
||||
seastar::scheduling_group _compaction_scheduling_group;
|
||||
public:
|
||||
memtable_list(
|
||||
seal_immediate_fn_type seal_immediate_fn,
|
||||
seal_delayed_fn_type seal_delayed_fn,
|
||||
std::function<schema_ptr()> cs,
|
||||
dirty_memory_manager* dirty_memory_manager,
|
||||
seastar::scheduling_group compaction_scheduling_group = seastar::current_scheduling_group())
|
||||
dirty_memory_manager* dirty_memory_manager)
|
||||
: _memtables({})
|
||||
, _seal_immediate_fn(seal_immediate_fn)
|
||||
, _seal_delayed_fn(seal_delayed_fn)
|
||||
, _current_schema(cs)
|
||||
, _dirty_memory_manager(dirty_memory_manager)
|
||||
, _compaction_scheduling_group(compaction_scheduling_group) {
|
||||
, _dirty_memory_manager(dirty_memory_manager) {
|
||||
add_memtable();
|
||||
}
|
||||
|
||||
memtable_list(
|
||||
seal_immediate_fn_type seal_immediate_fn,
|
||||
std::function<schema_ptr()> cs,
|
||||
dirty_memory_manager* dirty_memory_manager,
|
||||
seastar::scheduling_group compaction_scheduling_group = seastar::current_scheduling_group())
|
||||
: memtable_list(std::move(seal_immediate_fn), {}, std::move(cs), dirty_memory_manager, compaction_scheduling_group) {
|
||||
dirty_memory_manager* dirty_memory_manager)
|
||||
: memtable_list(std::move(seal_immediate_fn), {}, std::move(cs), dirty_memory_manager) {
|
||||
}
|
||||
|
||||
memtable_list(std::function<schema_ptr()> cs, dirty_memory_manager* dirty_memory_manager, seastar::scheduling_group compaction_scheduling_group = seastar::current_scheduling_group())
|
||||
: memtable_list({}, {}, std::move(cs), dirty_memory_manager, compaction_scheduling_group) {
|
||||
memtable_list(std::function<schema_ptr()> cs, dirty_memory_manager* dirty_memory_manager)
|
||||
: memtable_list({}, {}, std::move(cs), dirty_memory_manager) {
|
||||
}
|
||||
|
||||
bool may_flush() const {
|
||||
@@ -298,6 +294,8 @@ public:
|
||||
class table;
|
||||
using column_family = table;
|
||||
|
||||
class database_sstable_write_monitor;
|
||||
|
||||
class table : public enable_lw_shared_from_this<table> {
|
||||
public:
|
||||
struct config {
|
||||
@@ -316,7 +314,6 @@ public:
|
||||
seastar::scheduling_group memtable_scheduling_group;
|
||||
seastar::scheduling_group memtable_to_cache_scheduling_group;
|
||||
seastar::scheduling_group compaction_scheduling_group;
|
||||
seastar::scheduling_group memory_compaction_scheduling_group;
|
||||
seastar::scheduling_group statement_scheduling_group;
|
||||
seastar::scheduling_group streaming_scheduling_group;
|
||||
bool enable_metrics_reporting = false;
|
||||
@@ -394,7 +391,7 @@ private:
|
||||
// plan memtables and the resulting sstables are not made visible until
|
||||
// the streaming is complete.
|
||||
struct monitored_sstable {
|
||||
std::unique_ptr<sstables::write_monitor> monitor;
|
||||
std::unique_ptr<database_sstable_write_monitor> monitor;
|
||||
sstables::shared_sstable sstable;
|
||||
};
|
||||
|
||||
@@ -433,6 +430,10 @@ private:
|
||||
std::unordered_map<uint64_t, sstables::shared_sstable> _sstables_need_rewrite;
|
||||
// Control background fibers waiting for sstables to be deleted
|
||||
seastar::gate _sstable_deletion_gate;
|
||||
// This semaphore ensures that an operation like snapshot won't have its selected
|
||||
// sstables deleted by compaction in parallel, a race condition which could
|
||||
// easily result in failure.
|
||||
seastar::semaphore _sstable_deletion_sem = {1};
|
||||
// There are situations in which we need to stop writing sstables. Flushers will take
|
||||
// the read lock, and the ones that wish to stop that process will take the write lock.
|
||||
rwlock _sstables_lock;
|
||||
@@ -1044,7 +1045,6 @@ public:
|
||||
seastar::scheduling_group memtable_scheduling_group;
|
||||
seastar::scheduling_group memtable_to_cache_scheduling_group;
|
||||
seastar::scheduling_group compaction_scheduling_group;
|
||||
seastar::scheduling_group memory_compaction_scheduling_group;
|
||||
seastar::scheduling_group statement_scheduling_group;
|
||||
seastar::scheduling_group streaming_scheduling_group;
|
||||
bool enable_metrics_reporting = false;
|
||||
@@ -1125,7 +1125,6 @@ struct database_config {
|
||||
seastar::scheduling_group memtable_scheduling_group;
|
||||
seastar::scheduling_group memtable_to_cache_scheduling_group; // FIXME: merge with memtable_scheduling_group
|
||||
seastar::scheduling_group compaction_scheduling_group;
|
||||
seastar::scheduling_group memory_compaction_scheduling_group;
|
||||
seastar::scheduling_group statement_scheduling_group;
|
||||
seastar::scheduling_group streaming_scheduling_group;
|
||||
size_t available_memory;
|
||||
|
||||
@@ -163,7 +163,7 @@ future<> db::commitlog_replayer::impl::init() {
|
||||
// Get all truncation records for the CF and initialize max rps if
|
||||
// present. Cannot do this on demand, as there may be no sstables to
|
||||
// mark the CF as "needed".
|
||||
return db::system_keyspace::get_truncated_position(uuid).then([&map, &uuid](std::vector<db::replay_position> tpps) {
|
||||
return db::system_keyspace::get_truncated_position(uuid).then([&map, uuid](std::vector<db::replay_position> tpps) {
|
||||
for (auto& p : tpps) {
|
||||
rlogger.trace("CF {} truncated at {}", uuid, p);
|
||||
auto& pp = map[p.shard_id()][uuid];
|
||||
|
||||
@@ -686,33 +686,7 @@ read_keyspace_mutation(distributed<service::storage_proxy>& proxy, const sstring
|
||||
static semaphore the_merge_lock {1};
|
||||
|
||||
future<> merge_lock() {
|
||||
// ref: #1088
|
||||
// to avoid deadlocks, we don't want long-standing calls to the shard 0
|
||||
// as they can cause a deadlock:
|
||||
//
|
||||
// fiber1 fiber2
|
||||
// merge_lock() (succeeds)
|
||||
// merge_lock() (waits)
|
||||
// invoke_on_all() (waits on merge_lock to relinquish smp::submit_to slot)
|
||||
//
|
||||
// so we issue the lock calls with a timeout; the slot will be relinquished, and invoke_on_all()
|
||||
// can complete
|
||||
return repeat([] () mutable {
|
||||
return smp::submit_to(0, [] {
|
||||
return the_merge_lock.try_wait();
|
||||
}).then([] (bool result) {
|
||||
if (result) {
|
||||
return make_ready_future<stop_iteration>(stop_iteration::yes);
|
||||
} else {
|
||||
static thread_local auto rand_engine = std::default_random_engine();
|
||||
auto dist = std::uniform_int_distribution<int>(0, 100);
|
||||
auto to = std::chrono::microseconds(dist(rand_engine));
|
||||
return sleep(to).then([] {
|
||||
return make_ready_future<stop_iteration>(stop_iteration::no);
|
||||
});
|
||||
}
|
||||
});
|
||||
});
|
||||
return smp::submit_to(0, [] { return the_merge_lock.wait(); });
|
||||
}
|
||||
|
||||
future<> merge_unlock() {
|
||||
|
||||
329
db/size_estimates_virtual_reader.cc
Normal file
329
db/size_estimates_virtual_reader.cc
Normal file
@@ -0,0 +1,329 @@
|
||||
/*
|
||||
* Copyright (C) 2019 ScyllaDB
|
||||
*
|
||||
* Modified by ScyllaDB
|
||||
*/
|
||||
|
||||
/*
|
||||
* This file is part of Scylla.
|
||||
*
|
||||
* Scylla is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Scylla is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with Scylla. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <boost/range/adaptor/indirected.hpp>
|
||||
#include <boost/range/adaptor/map.hpp>
|
||||
#include <boost/range/adaptor/transformed.hpp>
|
||||
#include <boost/range/algorithm/find_if.hpp>
|
||||
|
||||
#include "clustering_bounds_comparator.hh"
|
||||
#include "database.hh"
|
||||
#include "db/system_keyspace.hh"
|
||||
#include "dht/i_partitioner.hh"
|
||||
#include "partition_range_compat.hh"
|
||||
#include "range.hh"
|
||||
#include "service/storage_service.hh"
|
||||
#include "stdx.hh"
|
||||
#include "mutation_fragment.hh"
|
||||
#include "sstables/sstables.hh"
|
||||
#include "db/timeout_clock.hh"
|
||||
#include "database.hh"
|
||||
|
||||
#include "db/size_estimates_virtual_reader.hh"
|
||||
|
||||
namespace db {
|
||||
|
||||
namespace size_estimates {
|
||||
|
||||
struct virtual_row {
|
||||
const bytes& cf_name;
|
||||
const token_range& tokens;
|
||||
clustering_key_prefix as_key() const {
|
||||
return clustering_key_prefix::from_exploded(std::vector<bytes_view>{cf_name, tokens.start, tokens.end});
|
||||
}
|
||||
};
|
||||
|
||||
struct virtual_row_comparator {
|
||||
schema_ptr _schema;
|
||||
virtual_row_comparator(schema_ptr schema) : _schema(schema) { }
|
||||
bool operator()(const clustering_key_prefix& key1, const clustering_key_prefix& key2) {
|
||||
return clustering_key_prefix::prefix_equality_less_compare(*_schema)(key1, key2);
|
||||
}
|
||||
bool operator()(const virtual_row& row, const clustering_key_prefix& key) {
|
||||
return operator()(row.as_key(), key);
|
||||
}
|
||||
bool operator()(const clustering_key_prefix& key, const virtual_row& row) {
|
||||
return operator()(key, row.as_key());
|
||||
}
|
||||
};
|
||||
|
||||
// Iterating over the cartesian product of cf_names and token_ranges.
|
||||
class virtual_row_iterator : public std::iterator<std::input_iterator_tag, const virtual_row> {
|
||||
std::reference_wrapper<const std::vector<bytes>> _cf_names;
|
||||
std::reference_wrapper<const std::vector<token_range>> _ranges;
|
||||
size_t _cf_names_idx = 0;
|
||||
size_t _ranges_idx = 0;
|
||||
public:
|
||||
struct end_iterator_tag {};
|
||||
virtual_row_iterator(const std::vector<bytes>& cf_names, const std::vector<token_range>& ranges)
|
||||
: _cf_names(std::ref(cf_names))
|
||||
, _ranges(std::ref(ranges))
|
||||
{ }
|
||||
virtual_row_iterator(const std::vector<bytes>& cf_names, const std::vector<token_range>& ranges, end_iterator_tag)
|
||||
: _cf_names(std::ref(cf_names))
|
||||
, _ranges(std::ref(ranges))
|
||||
, _cf_names_idx(cf_names.size())
|
||||
, _ranges_idx(ranges.size())
|
||||
{
|
||||
if (cf_names.empty() || ranges.empty()) {
|
||||
// The product of an empty range with any range is an empty range.
|
||||
// In this case we want the end iterator to be equal to the begin iterator,
|
||||
// which has_ranges_idx = _cf_names_idx = 0.
|
||||
_ranges_idx = _cf_names_idx = 0;
|
||||
}
|
||||
}
|
||||
virtual_row_iterator& operator++() {
|
||||
if (++_ranges_idx == _ranges.get().size() && ++_cf_names_idx < _cf_names.get().size()) {
|
||||
_ranges_idx = 0;
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
virtual_row_iterator operator++(int) {
|
||||
virtual_row_iterator i(*this);
|
||||
++(*this);
|
||||
return i;
|
||||
}
|
||||
const value_type operator*() const {
|
||||
return { _cf_names.get()[_cf_names_idx], _ranges.get()[_ranges_idx] };
|
||||
}
|
||||
bool operator==(const virtual_row_iterator& i) const {
|
||||
return _cf_names_idx == i._cf_names_idx
|
||||
&& _ranges_idx == i._ranges_idx;
|
||||
}
|
||||
bool operator!=(const virtual_row_iterator& i) const {
|
||||
return !(*this == i);
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Returns the keyspaces, ordered by name, as selected by the partition_range.
|
||||
*/
|
||||
static std::vector<sstring> get_keyspaces(const schema& s, const database& db, dht::partition_range range) {
|
||||
struct keyspace_less_comparator {
|
||||
const schema& _s;
|
||||
keyspace_less_comparator(const schema& s) : _s(s) { }
|
||||
dht::ring_position as_ring_position(const sstring& ks) {
|
||||
auto pkey = partition_key::from_single_value(_s, utf8_type->decompose(ks));
|
||||
return dht::global_partitioner().decorate_key(_s, std::move(pkey));
|
||||
}
|
||||
bool operator()(const sstring& ks1, const sstring& ks2) {
|
||||
return as_ring_position(ks1).less_compare(_s, as_ring_position(ks2));
|
||||
}
|
||||
bool operator()(const sstring& ks, const dht::ring_position& rp) {
|
||||
return as_ring_position(ks).less_compare(_s, rp);
|
||||
}
|
||||
bool operator()(const dht::ring_position& rp, const sstring& ks) {
|
||||
return rp.less_compare(_s, as_ring_position(ks));
|
||||
}
|
||||
};
|
||||
auto keyspaces = db.get_non_system_keyspaces();
|
||||
auto cmp = keyspace_less_comparator(s);
|
||||
boost::sort(keyspaces, cmp);
|
||||
return boost::copy_range<std::vector<sstring>>(
|
||||
range.slice(keyspaces, std::move(cmp)) | boost::adaptors::filtered([&s] (const auto& ks) {
|
||||
// If this is a range query, results are divided between shards by the partition key (keyspace_name).
|
||||
return shard_of(dht::global_partitioner().get_token(s,
|
||||
partition_key::from_single_value(s, utf8_type->decompose(ks))))
|
||||
== engine().cpu_id();
|
||||
})
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Makes a wrapping range of ring_position from a nonwrapping range of token, used to select sstables.
|
||||
*/
|
||||
static dht::partition_range as_ring_position_range(dht::token_range& r) {
|
||||
stdx::optional<range<dht::ring_position>::bound> start_bound, end_bound;
|
||||
if (r.start()) {
|
||||
start_bound = {{ dht::ring_position(r.start()->value(), dht::ring_position::token_bound::start), r.start()->is_inclusive() }};
|
||||
}
|
||||
if (r.end()) {
|
||||
end_bound = {{ dht::ring_position(r.end()->value(), dht::ring_position::token_bound::end), r.end()->is_inclusive() }};
|
||||
}
|
||||
return dht::partition_range(std::move(start_bound), std::move(end_bound), r.is_singular());
|
||||
}
|
||||
|
||||
/**
|
||||
* Add a new range_estimates for the specified range, considering the sstables associated with `cf`.
|
||||
*/
|
||||
static system_keyspace::range_estimates estimate(const column_family& cf, const token_range& r) {
|
||||
int64_t count{0};
|
||||
utils::estimated_histogram hist{0};
|
||||
auto from_bytes = [] (auto& b) {
|
||||
return dht::global_partitioner().from_sstring(utf8_type->to_string(b));
|
||||
};
|
||||
dht::token_range_vector ranges;
|
||||
compat::unwrap_into(
|
||||
wrapping_range<dht::token>({{ from_bytes(r.start) }}, {{ from_bytes(r.end) }}),
|
||||
dht::token_comparator(),
|
||||
[&] (auto&& rng) { ranges.push_back(std::move(rng)); });
|
||||
for (auto&& r : ranges) {
|
||||
auto rp_range = as_ring_position_range(r);
|
||||
for (auto&& sstable : cf.select_sstables(rp_range)) {
|
||||
count += sstable->estimated_keys_for_range(r);
|
||||
hist.merge(sstable->get_stats_metadata().estimated_row_size);
|
||||
}
|
||||
}
|
||||
return {cf.schema(), r.start, r.end, count, count > 0 ? hist.mean() : 0};
|
||||
}
|
||||
|
||||
future<std::vector<token_range>> get_local_ranges() {
|
||||
auto& ss = service::get_local_storage_service();
|
||||
return ss.get_local_tokens().then([&ss] (auto&& tokens) {
|
||||
auto ranges = ss.get_token_metadata().get_primary_ranges_for(std::move(tokens));
|
||||
std::vector<token_range> local_ranges;
|
||||
auto to_bytes = [](const stdx::optional<dht::token_range::bound>& b) {
|
||||
assert(b);
|
||||
return utf8_type->decompose(dht::global_partitioner().to_sstring(b->value()));
|
||||
};
|
||||
// We merge the ranges to be compatible with how Cassandra shows it's size estimates table.
|
||||
// All queries will be on that table, where all entries are text and there's no notion of
|
||||
// token ranges form the CQL point of view.
|
||||
auto left_inf = boost::find_if(ranges, [] (auto&& r) {
|
||||
return !r.start() || r.start()->value() == dht::minimum_token();
|
||||
});
|
||||
auto right_inf = boost::find_if(ranges, [] (auto&& r) {
|
||||
return !r.end() || r.start()->value() == dht::maximum_token();
|
||||
});
|
||||
if (left_inf != right_inf && left_inf != ranges.end() && right_inf != ranges.end()) {
|
||||
local_ranges.push_back(token_range{to_bytes(right_inf->start()), to_bytes(left_inf->end())});
|
||||
ranges.erase(left_inf);
|
||||
ranges.erase(right_inf);
|
||||
}
|
||||
for (auto&& r : ranges) {
|
||||
local_ranges.push_back(token_range{to_bytes(r.start()), to_bytes(r.end())});
|
||||
}
|
||||
boost::sort(local_ranges, [] (auto&& tr1, auto&& tr2) {
|
||||
return utf8_type->less(tr1.start, tr2.start);
|
||||
});
|
||||
return local_ranges;
|
||||
});
|
||||
}
|
||||
|
||||
size_estimates_mutation_reader::size_estimates_mutation_reader(schema_ptr schema, const dht::partition_range& prange, const query::partition_slice& slice, streamed_mutation::forwarding fwd)
|
||||
: impl(schema)
|
||||
, _schema(std::move(schema))
|
||||
, _prange(&prange)
|
||||
, _slice(slice)
|
||||
, _fwd(fwd)
|
||||
{ }
|
||||
|
||||
future<> size_estimates_mutation_reader::get_next_partition() {
|
||||
auto& db = service::get_local_storage_proxy().get_db().local();
|
||||
if (!_keyspaces) {
|
||||
_keyspaces = get_keyspaces(*_schema, db, *_prange);
|
||||
_current_partition = _keyspaces->begin();
|
||||
}
|
||||
if (_current_partition == _keyspaces->end()) {
|
||||
_end_of_stream = true;
|
||||
return make_ready_future<>();
|
||||
}
|
||||
return get_local_ranges().then([&db, this] (auto&& ranges) {
|
||||
auto estimates = this->estimates_for_current_keyspace(db, std::move(ranges));
|
||||
auto mutations = db::system_keyspace::make_size_estimates_mutation(*_current_partition, std::move(estimates));
|
||||
++_current_partition;
|
||||
std::vector<mutation> ms;
|
||||
ms.emplace_back(std::move(mutations));
|
||||
_partition_reader = flat_mutation_reader_from_mutations(std::move(ms), _fwd);
|
||||
});
|
||||
}
|
||||
|
||||
future<> size_estimates_mutation_reader::fill_buffer(db::timeout_clock::time_point timeout) {
|
||||
return do_until([this, timeout] { return is_end_of_stream() || is_buffer_full(); }, [this, timeout] {
|
||||
if (!_partition_reader) {
|
||||
return get_next_partition();
|
||||
}
|
||||
return _partition_reader->consume_pausable([this] (mutation_fragment mf) {
|
||||
push_mutation_fragment(std::move(mf));
|
||||
return stop_iteration(is_buffer_full());
|
||||
}, timeout).then([this] {
|
||||
if (_partition_reader->is_end_of_stream() && _partition_reader->is_buffer_empty()) {
|
||||
_partition_reader = stdx::nullopt;
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
void size_estimates_mutation_reader::next_partition() {
|
||||
clear_buffer_to_next_partition();
|
||||
if (is_buffer_empty()) {
|
||||
_partition_reader = stdx::nullopt;
|
||||
}
|
||||
}
|
||||
|
||||
future<> size_estimates_mutation_reader::fast_forward_to(const dht::partition_range& pr, db::timeout_clock::time_point timeout) {
|
||||
clear_buffer();
|
||||
_prange = ≺
|
||||
_keyspaces = stdx::nullopt;
|
||||
_partition_reader = stdx::nullopt;
|
||||
_end_of_stream = false;
|
||||
return make_ready_future<>();
|
||||
}
|
||||
|
||||
future<> size_estimates_mutation_reader::fast_forward_to(position_range pr, db::timeout_clock::time_point timeout) {
|
||||
forward_buffer_to(pr.start());
|
||||
_end_of_stream = false;
|
||||
if (_partition_reader) {
|
||||
return _partition_reader->fast_forward_to(std::move(pr), timeout);
|
||||
}
|
||||
return make_ready_future<>();
|
||||
}
|
||||
|
||||
size_t size_estimates_mutation_reader::buffer_size() const {
|
||||
if (_partition_reader) {
|
||||
return flat_mutation_reader::impl::buffer_size() + _partition_reader->buffer_size();
|
||||
}
|
||||
return flat_mutation_reader::impl::buffer_size();
|
||||
}
|
||||
|
||||
std::vector<db::system_keyspace::range_estimates>
|
||||
size_estimates_mutation_reader::estimates_for_current_keyspace(const database& db, std::vector<token_range> local_ranges) const {
|
||||
// For each specified range, estimate (crudely) mean partition size and partitions count.
|
||||
auto pkey = partition_key::from_single_value(*_schema, utf8_type->decompose(*_current_partition));
|
||||
auto cfs = db.find_keyspace(*_current_partition).metadata()->cf_meta_data();
|
||||
auto cf_names = boost::copy_range<std::vector<bytes>>(cfs | boost::adaptors::transformed([] (auto&& cf) {
|
||||
return utf8_type->decompose(cf.first);
|
||||
}));
|
||||
boost::sort(cf_names, [] (auto&& n1, auto&& n2) {
|
||||
return utf8_type->less(n1, n2);
|
||||
});
|
||||
std::vector<db::system_keyspace::range_estimates> estimates;
|
||||
for (auto& range : _slice.row_ranges(*_schema, pkey)) {
|
||||
auto rows = boost::make_iterator_range(
|
||||
virtual_row_iterator(cf_names, local_ranges),
|
||||
virtual_row_iterator(cf_names, local_ranges, virtual_row_iterator::end_iterator_tag()));
|
||||
auto rows_to_estimate = range.slice(rows, virtual_row_comparator(_schema));
|
||||
for (auto&& r : rows_to_estimate) {
|
||||
auto& cf = db.find_column_family(*_current_partition, utf8_type->to_string(r.cf_name));
|
||||
estimates.push_back(estimate(cf, r.tokens));
|
||||
if (estimates.size() >= _slice.partition_row_limit()) {
|
||||
return estimates;
|
||||
}
|
||||
}
|
||||
}
|
||||
return estimates;
|
||||
}
|
||||
|
||||
} // namespace size_estimates
|
||||
|
||||
} // namespace db
|
||||
@@ -21,33 +21,19 @@
|
||||
* along with Scylla. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <boost/range/adaptor/indirected.hpp>
|
||||
#include <boost/range/adaptor/map.hpp>
|
||||
#include <boost/range/adaptor/transformed.hpp>
|
||||
#include <boost/range/algorithm/find_if.hpp>
|
||||
|
||||
#include "clustering_bounds_comparator.hh"
|
||||
#include "database.hh"
|
||||
#include "db/system_keyspace.hh"
|
||||
#include "dht/i_partitioner.hh"
|
||||
#include "mutation_reader.hh"
|
||||
#include "partition_range_compat.hh"
|
||||
#include "range.hh"
|
||||
#include "service/storage_service.hh"
|
||||
#include "stdx.hh"
|
||||
#include "mutation_fragment.hh"
|
||||
#include "sstables/sstables.hh"
|
||||
#include "db/timeout_clock.hh"
|
||||
|
||||
namespace db {
|
||||
|
||||
namespace size_estimates {
|
||||
|
||||
struct token_range {
|
||||
bytes start;
|
||||
bytes end;
|
||||
};
|
||||
|
||||
class size_estimates_mutation_reader final : public flat_mutation_reader::impl {
|
||||
struct token_range {
|
||||
bytes start;
|
||||
bytes end;
|
||||
};
|
||||
schema_ptr _schema;
|
||||
const dht::partition_range* _prange;
|
||||
const query::partition_slice& _slice;
|
||||
@@ -57,267 +43,18 @@ class size_estimates_mutation_reader final : public flat_mutation_reader::impl {
|
||||
streamed_mutation::forwarding _fwd;
|
||||
flat_mutation_reader_opt _partition_reader;
|
||||
public:
|
||||
size_estimates_mutation_reader(schema_ptr schema, const dht::partition_range& prange, const query::partition_slice& slice, streamed_mutation::forwarding fwd)
|
||||
: impl(schema)
|
||||
, _schema(std::move(schema))
|
||||
, _prange(&prange)
|
||||
, _slice(slice)
|
||||
, _fwd(fwd)
|
||||
{ }
|
||||
size_estimates_mutation_reader(schema_ptr, const dht::partition_range&, const query::partition_slice&, streamed_mutation::forwarding);
|
||||
|
||||
virtual future<> fill_buffer(db::timeout_clock::time_point) override;
|
||||
virtual void next_partition() override;
|
||||
virtual future<> fast_forward_to(const dht::partition_range&, db::timeout_clock::time_point) override;
|
||||
virtual future<> fast_forward_to(position_range, db::timeout_clock::time_point) override;
|
||||
virtual size_t buffer_size() const override;
|
||||
private:
|
||||
future<> get_next_partition() {
|
||||
// For each specified range, estimate (crudely) mean partition size and partitions count.
|
||||
auto& db = service::get_local_storage_proxy().get_db().local();
|
||||
if (!_keyspaces) {
|
||||
_keyspaces = get_keyspaces(*_schema, db, *_prange);
|
||||
_current_partition = _keyspaces->begin();
|
||||
}
|
||||
if (_current_partition == _keyspaces->end()) {
|
||||
_end_of_stream = true;
|
||||
return make_ready_future<>();
|
||||
}
|
||||
return get_local_ranges().then([&db, this] (auto&& ranges) {
|
||||
auto estimates = this->estimates_for_current_keyspace(db, std::move(ranges));
|
||||
auto mutations = db::system_keyspace::make_size_estimates_mutation(*_current_partition, std::move(estimates));
|
||||
++_current_partition;
|
||||
std::vector<mutation> ms;
|
||||
ms.emplace_back(std::move(mutations));
|
||||
_partition_reader = flat_mutation_reader_from_mutations(std::move(ms), _fwd);
|
||||
});
|
||||
}
|
||||
public:
|
||||
virtual future<> fill_buffer(db::timeout_clock::time_point timeout) override {
|
||||
return do_until([this, timeout] { return is_end_of_stream() || is_buffer_full(); }, [this, timeout] {
|
||||
if (!_partition_reader) {
|
||||
return get_next_partition();
|
||||
}
|
||||
return _partition_reader->consume_pausable([this] (mutation_fragment mf) {
|
||||
push_mutation_fragment(std::move(mf));
|
||||
return stop_iteration(is_buffer_full());
|
||||
}, timeout).then([this] {
|
||||
if (_partition_reader->is_end_of_stream() && _partition_reader->is_buffer_empty()) {
|
||||
_partition_reader = stdx::nullopt;
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
virtual void next_partition() override {
|
||||
clear_buffer_to_next_partition();
|
||||
if (is_buffer_empty()) {
|
||||
_partition_reader = stdx::nullopt;
|
||||
}
|
||||
}
|
||||
virtual future<> fast_forward_to(const dht::partition_range& pr, db::timeout_clock::time_point timeout) override {
|
||||
clear_buffer();
|
||||
_prange = ≺
|
||||
_keyspaces = stdx::nullopt;
|
||||
_partition_reader = stdx::nullopt;
|
||||
_end_of_stream = false;
|
||||
return make_ready_future<>();
|
||||
}
|
||||
virtual future<> fast_forward_to(position_range pr, db::timeout_clock::time_point timeout) override {
|
||||
forward_buffer_to(pr.start());
|
||||
_end_of_stream = false;
|
||||
if (_partition_reader) {
|
||||
return _partition_reader->fast_forward_to(std::move(pr), timeout);
|
||||
}
|
||||
return make_ready_future<>();
|
||||
}
|
||||
virtual size_t buffer_size() const override {
|
||||
if (_partition_reader) {
|
||||
return flat_mutation_reader::impl::buffer_size() + _partition_reader->buffer_size();
|
||||
}
|
||||
return flat_mutation_reader::impl::buffer_size();
|
||||
}
|
||||
/**
|
||||
* Returns the primary ranges for the local node.
|
||||
* Used for testing as well.
|
||||
*/
|
||||
static future<std::vector<token_range>> get_local_ranges() {
|
||||
auto& ss = service::get_local_storage_service();
|
||||
return ss.get_local_tokens().then([&ss] (auto&& tokens) {
|
||||
auto ranges = ss.get_token_metadata().get_primary_ranges_for(std::move(tokens));
|
||||
std::vector<token_range> local_ranges;
|
||||
auto to_bytes = [](const stdx::optional<dht::token_range::bound>& b) {
|
||||
assert(b);
|
||||
return utf8_type->decompose(dht::global_partitioner().to_sstring(b->value()));
|
||||
};
|
||||
// We merge the ranges to be compatible with how Cassandra shows it's size estimates table.
|
||||
// All queries will be on that table, where all entries are text and there's no notion of
|
||||
// token ranges form the CQL point of view.
|
||||
auto left_inf = boost::find_if(ranges, [] (auto&& r) {
|
||||
return !r.start() || r.start()->value() == dht::minimum_token();
|
||||
});
|
||||
auto right_inf = boost::find_if(ranges, [] (auto&& r) {
|
||||
return !r.end() || r.start()->value() == dht::maximum_token();
|
||||
});
|
||||
if (left_inf != right_inf && left_inf != ranges.end() && right_inf != ranges.end()) {
|
||||
local_ranges.push_back(token_range{to_bytes(right_inf->start()), to_bytes(left_inf->end())});
|
||||
ranges.erase(left_inf);
|
||||
ranges.erase(right_inf);
|
||||
}
|
||||
for (auto&& r : ranges) {
|
||||
local_ranges.push_back(token_range{to_bytes(r.start()), to_bytes(r.end())});
|
||||
}
|
||||
boost::sort(local_ranges, [] (auto&& tr1, auto&& tr2) {
|
||||
return utf8_type->less(tr1.start, tr2.start);
|
||||
});
|
||||
return local_ranges;
|
||||
});
|
||||
}
|
||||
private:
|
||||
struct virtual_row {
|
||||
const bytes& cf_name;
|
||||
const token_range& tokens;
|
||||
clustering_key_prefix as_key() const {
|
||||
return clustering_key_prefix::from_exploded(std::vector<bytes_view>{cf_name, tokens.start, tokens.end});
|
||||
}
|
||||
};
|
||||
struct virtual_row_comparator {
|
||||
schema_ptr _schema;
|
||||
virtual_row_comparator(schema_ptr schema) : _schema(schema) { }
|
||||
bool operator()(const clustering_key_prefix& key1, const clustering_key_prefix& key2) {
|
||||
return clustering_key_prefix::prefix_equality_less_compare(*_schema)(key1, key2);
|
||||
}
|
||||
bool operator()(const virtual_row& row, const clustering_key_prefix& key) {
|
||||
return operator()(row.as_key(), key);
|
||||
}
|
||||
bool operator()(const clustering_key_prefix& key, const virtual_row& row) {
|
||||
return operator()(key, row.as_key());
|
||||
}
|
||||
};
|
||||
class virtual_row_iterator : public std::iterator<std::input_iterator_tag, const virtual_row> {
|
||||
std::reference_wrapper<const std::vector<bytes>> _cf_names;
|
||||
std::reference_wrapper<const std::vector<token_range>> _ranges;
|
||||
size_t _cf_names_idx = 0;
|
||||
size_t _ranges_idx = 0;
|
||||
public:
|
||||
struct end_iterator_tag {};
|
||||
virtual_row_iterator(const std::vector<bytes>& cf_names, const std::vector<token_range>& ranges)
|
||||
: _cf_names(std::ref(cf_names))
|
||||
, _ranges(std::ref(ranges))
|
||||
{ }
|
||||
virtual_row_iterator(const std::vector<bytes>& cf_names, const std::vector<token_range>& ranges, end_iterator_tag)
|
||||
: _cf_names(std::ref(cf_names))
|
||||
, _ranges(std::ref(ranges))
|
||||
, _cf_names_idx(cf_names.size())
|
||||
, _ranges_idx(ranges.size())
|
||||
{ }
|
||||
virtual_row_iterator& operator++() {
|
||||
if (++_ranges_idx == _ranges.get().size() && ++_cf_names_idx < _cf_names.get().size()) {
|
||||
_ranges_idx = 0;
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
virtual_row_iterator operator++(int) {
|
||||
virtual_row_iterator i(*this);
|
||||
++(*this);
|
||||
return i;
|
||||
}
|
||||
const value_type operator*() const {
|
||||
return { _cf_names.get()[_cf_names_idx], _ranges.get()[_ranges_idx] };
|
||||
}
|
||||
bool operator==(const virtual_row_iterator& i) const {
|
||||
return _cf_names_idx == i._cf_names_idx
|
||||
&& _ranges_idx == i._ranges_idx;
|
||||
}
|
||||
bool operator!=(const virtual_row_iterator& i) const {
|
||||
return !(*this == i);
|
||||
}
|
||||
};
|
||||
future<> get_next_partition();
|
||||
|
||||
std::vector<db::system_keyspace::range_estimates>
|
||||
estimates_for_current_keyspace(const database& db, std::vector<token_range> local_ranges) const {
|
||||
auto pkey = partition_key::from_single_value(*_schema, utf8_type->decompose(*_current_partition));
|
||||
auto cfs = db.find_keyspace(*_current_partition).metadata()->cf_meta_data();
|
||||
auto cf_names = boost::copy_range<std::vector<bytes>>(cfs | boost::adaptors::transformed([] (auto&& cf) {
|
||||
return utf8_type->decompose(cf.first);
|
||||
}));
|
||||
boost::sort(cf_names, [] (auto&& n1, auto&& n2) {
|
||||
return utf8_type->less(n1, n2);
|
||||
});
|
||||
std::vector<db::system_keyspace::range_estimates> estimates;
|
||||
for (auto& range : _slice.row_ranges(*_schema, pkey)) {
|
||||
auto rows = boost::make_iterator_range(
|
||||
virtual_row_iterator(cf_names, local_ranges),
|
||||
virtual_row_iterator(cf_names, local_ranges, virtual_row_iterator::end_iterator_tag()));
|
||||
auto rows_to_estimate = range.slice(rows, virtual_row_comparator(_schema));
|
||||
for (auto&& r : rows_to_estimate) {
|
||||
auto& cf = db.find_column_family(*_current_partition, utf8_type->to_string(r.cf_name));
|
||||
estimates.push_back(estimate(cf, r.tokens));
|
||||
if (estimates.size() >= _slice.partition_row_limit()) {
|
||||
return estimates;
|
||||
}
|
||||
}
|
||||
}
|
||||
return estimates;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the keyspaces, ordered by name, as selected by the partition_range.
|
||||
*/
|
||||
static ks_range get_keyspaces(const schema& s, const database& db, dht::partition_range range) {
|
||||
struct keyspace_less_comparator {
|
||||
const schema& _s;
|
||||
keyspace_less_comparator(const schema& s) : _s(s) { }
|
||||
dht::ring_position as_ring_position(const sstring& ks) {
|
||||
auto pkey = partition_key::from_single_value(_s, utf8_type->decompose(ks));
|
||||
return dht::global_partitioner().decorate_key(_s, std::move(pkey));
|
||||
}
|
||||
bool operator()(const sstring& ks1, const sstring& ks2) {
|
||||
return as_ring_position(ks1).less_compare(_s, as_ring_position(ks2));
|
||||
}
|
||||
bool operator()(const sstring& ks, const dht::ring_position& rp) {
|
||||
return as_ring_position(ks).less_compare(_s, rp);
|
||||
}
|
||||
bool operator()(const dht::ring_position& rp, const sstring& ks) {
|
||||
return rp.less_compare(_s, as_ring_position(ks));
|
||||
}
|
||||
};
|
||||
auto keyspaces = db.get_non_system_keyspaces();
|
||||
auto cmp = keyspace_less_comparator(s);
|
||||
boost::sort(keyspaces, cmp);
|
||||
return boost::copy_range<ks_range>(range.slice(keyspaces, std::move(cmp)));
|
||||
}
|
||||
|
||||
/**
|
||||
* Makes a wrapping range of ring_position from a nonwrapping range of token, used to select sstables.
|
||||
*/
|
||||
static dht::partition_range as_ring_position_range(dht::token_range& r) {
|
||||
stdx::optional<range<dht::ring_position>::bound> start_bound, end_bound;
|
||||
if (r.start()) {
|
||||
start_bound = {{ dht::ring_position(r.start()->value(), dht::ring_position::token_bound::start), r.start()->is_inclusive() }};
|
||||
}
|
||||
if (r.end()) {
|
||||
end_bound = {{ dht::ring_position(r.end()->value(), dht::ring_position::token_bound::end), r.end()->is_inclusive() }};
|
||||
}
|
||||
return dht::partition_range(std::move(start_bound), std::move(end_bound), r.is_singular());
|
||||
}
|
||||
|
||||
/**
|
||||
* Add a new range_estimates for the specified range, considering the sstables associated with `cf`.
|
||||
*/
|
||||
static system_keyspace::range_estimates estimate(const column_family& cf, const token_range& r) {
|
||||
int64_t count{0};
|
||||
utils::estimated_histogram hist{0};
|
||||
auto from_bytes = [] (auto& b) {
|
||||
return dht::global_partitioner().from_sstring(utf8_type->to_string(b));
|
||||
};
|
||||
dht::token_range_vector ranges;
|
||||
compat::unwrap_into(
|
||||
wrapping_range<dht::token>({{ from_bytes(r.start) }}, {{ from_bytes(r.end) }}),
|
||||
dht::token_comparator(),
|
||||
[&] (auto&& rng) { ranges.push_back(std::move(rng)); });
|
||||
for (auto&& r : ranges) {
|
||||
auto rp_range = as_ring_position_range(r);
|
||||
for (auto&& sstable : cf.select_sstables(rp_range)) {
|
||||
count += sstable->estimated_keys_for_range(r);
|
||||
hist.merge(sstable->get_stats_metadata().estimated_row_size);
|
||||
}
|
||||
}
|
||||
return {cf.schema(), r.start, r.end, count, count > 0 ? hist.mean() : 0};
|
||||
}
|
||||
estimates_for_current_keyspace(const database&, std::vector<token_range> local_ranges) const;
|
||||
};
|
||||
|
||||
struct virtual_reader {
|
||||
@@ -332,6 +69,12 @@ struct virtual_reader {
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Returns the primary ranges for the local node.
|
||||
* Used for testing as well.
|
||||
*/
|
||||
future<std::vector<token_range>> get_local_ranges();
|
||||
|
||||
} // namespace size_estimates
|
||||
|
||||
} // namespace db
|
||||
|
||||
@@ -26,6 +26,7 @@
|
||||
#include "db/consistency_level_type.hh"
|
||||
#include "db/system_keyspace.hh"
|
||||
#include "schema_builder.hh"
|
||||
#include "timeout_config.hh"
|
||||
#include "types.hh"
|
||||
|
||||
#include <seastar/core/reactor.hh>
|
||||
@@ -97,11 +98,17 @@ future<> system_distributed_keyspace::stop() {
|
||||
return make_ready_future<>();
|
||||
}
|
||||
|
||||
static const timeout_config internal_distributed_timeout_config = [] {
|
||||
using namespace std::chrono_literals;
|
||||
const auto t = 10s;
|
||||
return timeout_config{ t, t, t, t, t, t, t };
|
||||
}();
|
||||
|
||||
future<std::unordered_map<utils::UUID, sstring>> system_distributed_keyspace::view_status(sstring ks_name, sstring view_name) const {
|
||||
return _qp.process(
|
||||
sprint("SELECT host_id, status FROM %s.%s WHERE keyspace_name = ? AND view_name = ?", NAME, VIEW_BUILD_STATUS),
|
||||
db::consistency_level::ONE,
|
||||
infinite_timeout_config,
|
||||
internal_distributed_timeout_config,
|
||||
{ std::move(ks_name), std::move(view_name) },
|
||||
false).then([this] (::shared_ptr<cql3::untyped_result_set> cql_result) {
|
||||
return boost::copy_range<std::unordered_map<utils::UUID, sstring>>(*cql_result
|
||||
@@ -118,7 +125,7 @@ future<> system_distributed_keyspace::start_view_build(sstring ks_name, sstring
|
||||
return _qp.process(
|
||||
sprint("INSERT INTO %s.%s (keyspace_name, view_name, host_id, status) VALUES (?, ?, ?, ?)", NAME, VIEW_BUILD_STATUS),
|
||||
db::consistency_level::ONE,
|
||||
infinite_timeout_config,
|
||||
internal_distributed_timeout_config,
|
||||
{ std::move(ks_name), std::move(view_name), std::move(host_id), "STARTED" },
|
||||
false).discard_result();
|
||||
});
|
||||
@@ -129,7 +136,7 @@ future<> system_distributed_keyspace::finish_view_build(sstring ks_name, sstring
|
||||
return _qp.process(
|
||||
sprint("UPDATE %s.%s SET status = ? WHERE keyspace_name = ? AND view_name = ? AND host_id = ?", NAME, VIEW_BUILD_STATUS),
|
||||
db::consistency_level::ONE,
|
||||
infinite_timeout_config,
|
||||
internal_distributed_timeout_config,
|
||||
{ "SUCCESS", std::move(ks_name), std::move(view_name), std::move(host_id) },
|
||||
false).discard_result();
|
||||
});
|
||||
@@ -139,7 +146,7 @@ future<> system_distributed_keyspace::remove_view(sstring ks_name, sstring view_
|
||||
return _qp.process(
|
||||
sprint("DELETE FROM %s.%s WHERE keyspace_name = ? AND view_name = ?", NAME, VIEW_BUILD_STATUS),
|
||||
db::consistency_level::ONE,
|
||||
infinite_timeout_config,
|
||||
internal_distributed_timeout_config,
|
||||
{ std::move(ks_name), std::move(view_name) },
|
||||
false).discard_result();
|
||||
}
|
||||
|
||||
@@ -1635,6 +1635,9 @@ void make(database& db, bool durable, bool volatile_testing_only) {
|
||||
auto cfg = ks.make_column_family_config(*table, db.get_config(), db.get_large_partition_handler());
|
||||
if (maybe_write_in_user_memory(table, db)) {
|
||||
cfg.dirty_memory_manager = &db._dirty_memory_manager;
|
||||
} else {
|
||||
cfg.memtable_scheduling_group = default_scheduling_group();
|
||||
cfg.memtable_to_cache_scheduling_group = default_scheduling_group();
|
||||
}
|
||||
db.add_column_family(ks, table, std::move(cfg));
|
||||
maybe_add_virtual_reader(table, db);
|
||||
|
||||
@@ -461,7 +461,7 @@ bool ring_position::less_compare(const schema& s, const ring_position& other) co
|
||||
return tri_compare(s, other) < 0;
|
||||
}
|
||||
|
||||
int ring_position_tri_compare(const schema& s, ring_position_view lh, ring_position_view rh) {
|
||||
int ring_position_comparator::operator()(ring_position_view lh, ring_position_view rh) const {
|
||||
auto token_cmp = tri_compare(*lh._token, *rh._token);
|
||||
if (token_cmp) {
|
||||
return token_cmp;
|
||||
@@ -482,10 +482,6 @@ int ring_position_tri_compare(const schema& s, ring_position_view lh, ring_posit
|
||||
}
|
||||
}
|
||||
|
||||
int ring_position_comparator::operator()(ring_position_view lh, ring_position_view rh) const {
|
||||
return ring_position_tri_compare(s, lh, rh);
|
||||
}
|
||||
|
||||
int ring_position_comparator::operator()(ring_position_view lh, sstables::decorated_key_view rh) const {
|
||||
auto token_cmp = tri_compare(*lh._token, rh.token());
|
||||
if (token_cmp) {
|
||||
|
||||
@@ -529,7 +529,6 @@ public:
|
||||
// Such range includes all keys k such that v1 <= k < v2, with order defined by ring_position_comparator.
|
||||
//
|
||||
class ring_position_view {
|
||||
friend int ring_position_tri_compare(const schema& s, ring_position_view lh, ring_position_view rh);
|
||||
friend class ring_position_comparator;
|
||||
|
||||
// Order is lexicographical on (_token, _key) tuples, where _key part may be missing, and
|
||||
@@ -544,7 +543,6 @@ class ring_position_view {
|
||||
const partition_key* _key; // Can be nullptr
|
||||
int8_t _weight;
|
||||
public:
|
||||
using token_bound = ring_position::token_bound;
|
||||
struct after_key_tag {};
|
||||
using after_key = bool_class<after_key_tag>;
|
||||
|
||||
@@ -580,14 +578,6 @@ public:
|
||||
return ring_position_view(after_key_tag(), view);
|
||||
}
|
||||
|
||||
static ring_position_view starting_at(const dht::token& t) {
|
||||
return ring_position_view(t, token_bound::start);
|
||||
}
|
||||
|
||||
static ring_position_view ending_at(const dht::token& t) {
|
||||
return ring_position_view(t, token_bound::end);
|
||||
}
|
||||
|
||||
ring_position_view(const dht::ring_position& pos, after_key after = after_key::no)
|
||||
: _token(&pos.token())
|
||||
, _key(pos.has_key() ? &*pos.key() : nullptr)
|
||||
@@ -615,25 +605,17 @@ public:
|
||||
, _weight(weight)
|
||||
{ }
|
||||
|
||||
explicit ring_position_view(const dht::token& token, token_bound bound = token_bound::start)
|
||||
explicit ring_position_view(const dht::token& token, int8_t weight = -1)
|
||||
: _token(&token)
|
||||
, _key(nullptr)
|
||||
, _weight(static_cast<std::underlying_type_t<token_bound>>(bound))
|
||||
, _weight(weight)
|
||||
{ }
|
||||
|
||||
const dht::token& token() const { return *_token; }
|
||||
const partition_key* key() const { return _key; }
|
||||
|
||||
// Only when key() == nullptr
|
||||
token_bound get_token_bound() const { return token_bound(_weight); }
|
||||
// Only when key() != nullptr
|
||||
after_key is_after_key() const { return after_key(_weight == 1); }
|
||||
|
||||
friend std::ostream& operator<<(std::ostream&, ring_position_view);
|
||||
};
|
||||
|
||||
int ring_position_tri_compare(const schema& s, ring_position_view lh, ring_position_view rh);
|
||||
|
||||
// Trichotomic comparator for ring order
|
||||
struct ring_position_comparator {
|
||||
const schema& s;
|
||||
|
||||
@@ -324,11 +324,11 @@ future<> range_streamer::do_stream_async() {
|
||||
for (auto& range : ranges_to_stream) {
|
||||
range_vec.push_back(range);
|
||||
}
|
||||
auto t = std::chrono::duration_cast<std::chrono::duration<float>>(lowres_clock::now() - start_time).count();
|
||||
auto t = std::chrono::duration_cast<std::chrono::seconds>(lowres_clock::now() - start_time).count();
|
||||
logger.warn("{} with {} for keyspace={} failed, took {} seconds: {}", description, source, keyspace, t, std::current_exception());
|
||||
throw;
|
||||
}
|
||||
auto t = std::chrono::duration_cast<std::chrono::duration<float>>(lowres_clock::now() - start_time).count();
|
||||
auto t = std::chrono::duration_cast<std::chrono::seconds>(lowres_clock::now() - start_time).count();
|
||||
logger.info("{} with {} for keyspace={} succeeded, took {} seconds", description, source, keyspace, t);
|
||||
});
|
||||
|
||||
|
||||
93
dist/ami/files/.bash_profile
vendored
93
dist/ami/files/.bash_profile
vendored
@@ -7,99 +7,8 @@ fi
|
||||
|
||||
# User specific environment and startup programs
|
||||
|
||||
. /usr/lib/scylla/scylla_lib.sh
|
||||
|
||||
PATH=$PATH:$HOME/.local/bin:$HOME/bin
|
||||
|
||||
export PATH
|
||||
|
||||
echo
|
||||
echo ' _____ _ _ _____ ____ '
|
||||
echo ' / ____| | | | | __ \| _ \ '
|
||||
echo ' | (___ ___ _ _| | | __ _| | | | |_) |'
|
||||
echo ' \___ \ / __| | | | | |/ _` | | | | _ < '
|
||||
echo ' ____) | (__| |_| | | | (_| | |__| | |_) |'
|
||||
echo ' |_____/ \___|\__, |_|_|\__,_|_____/|____/ '
|
||||
echo ' __/ | '
|
||||
echo ' |___/ '
|
||||
echo ''
|
||||
echo ''
|
||||
echo 'Nodetool:'
|
||||
echo ' nodetool help'
|
||||
echo 'CQL Shell:'
|
||||
echo ' cqlsh'
|
||||
echo 'More documentation available at: '
|
||||
echo ' http://www.scylladb.com/doc/'
|
||||
echo 'By default, Scylla sends certain information about this node to a data collection server. For information, see http://www.scylladb.com/privacy/'
|
||||
echo
|
||||
|
||||
if [ `ec2_is_supported_instance_type` -eq 0 ]; then
|
||||
TYPE=`curl -s http://169.254.169.254/latest/meta-data/instance-type`
|
||||
tput setaf 1
|
||||
tput bold
|
||||
echo " $TYPE is not supported instance type!"
|
||||
tput sgr0
|
||||
echo -n "To continue startup ScyllaDB on this instance, run 'sudo scylla_io_setup' "
|
||||
echo "then 'systemctl start scylla-server'."
|
||||
echo "For a list of optimized instance types and more EC2 instructions see http://www.scylladb.com/doc/getting-started-amazon/"
|
||||
echo
|
||||
else
|
||||
SETUP=`systemctl is-active scylla-ami-setup`
|
||||
if [ "$SETUP" == "activating" ]; then
|
||||
tput setaf 4
|
||||
tput bold
|
||||
echo " Constructing RAID volume..."
|
||||
tput sgr0
|
||||
echo
|
||||
echo "Please wait for setup. To see status, run "
|
||||
echo " 'systemctl status scylla-ami-setup'"
|
||||
echo
|
||||
echo "After setup finished, scylla-server service will launch."
|
||||
echo "To see status of scylla-server, run "
|
||||
echo " 'systemctl status scylla-server'"
|
||||
echo
|
||||
elif [ "$SETUP" == "failed" ]; then
|
||||
tput setaf 1
|
||||
tput bold
|
||||
echo " AMI initial configuration failed!"
|
||||
tput sgr0
|
||||
echo
|
||||
echo "To see status, run "
|
||||
echo " 'systemctl status scylla-ami-setup'"
|
||||
echo
|
||||
else
|
||||
SCYLLA=`systemctl is-active scylla-server`
|
||||
if [ "$SCYLLA" == "activating" ]; then
|
||||
tput setaf 4
|
||||
tput bold
|
||||
echo " ScyllaDB is starting..."
|
||||
tput sgr0
|
||||
echo
|
||||
echo "Please wait for start. To see status, run "
|
||||
echo " 'systemctl status scylla-server'"
|
||||
echo
|
||||
elif [ "$SCYLLA" == "active" ]; then
|
||||
tput setaf 4
|
||||
tput bold
|
||||
echo " ScyllaDB is active."
|
||||
tput sgr0
|
||||
echo
|
||||
echo "$ nodetool status"
|
||||
echo
|
||||
nodetool status
|
||||
else
|
||||
tput setaf 1
|
||||
tput bold
|
||||
echo " ScyllaDB is not started!"
|
||||
tput sgr0
|
||||
echo "Please wait for startup. To see status of ScyllaDB, run "
|
||||
echo " 'systemctl status scylla-server'"
|
||||
echo
|
||||
fi
|
||||
fi
|
||||
echo -n " "
|
||||
/usr/lib/scylla/scylla_ec2_check
|
||||
if [ $? -eq 0 ]; then
|
||||
echo
|
||||
fi
|
||||
fi
|
||||
~/.scylla_ami_login
|
||||
|
||||
118
dist/ami/files/.scylla_ami_login
vendored
Executable file
118
dist/ami/files/.scylla_ami_login
vendored
Executable file
@@ -0,0 +1,118 @@
|
||||
#!/usr/bin/python3
|
||||
#
|
||||
# Copyright 2018 ScyllaDB
|
||||
#
|
||||
|
||||
#
|
||||
# This file is part of Scylla.
|
||||
#
|
||||
# Scylla is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU Affero General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# Scylla is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with Scylla. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
import os
|
||||
import sys
|
||||
import argparse
|
||||
sys.path.append('/usr/lib/scylla')
|
||||
from scylla_util import *
|
||||
|
||||
MSG_HEADER = '''
|
||||
|
||||
_____ _ _ _____ ____
|
||||
/ ____| | | | | __ \| _ \
|
||||
| (___ ___ _ _| | | __ _| | | | |_) |
|
||||
\___ \ / __| | | | | |/ _` | | | | _ <
|
||||
____) | (__| |_| | | | (_| | |__| | |_) |
|
||||
|_____/ \___|\__, |_|_|\__,_|_____/|____/
|
||||
__/ |
|
||||
|___/
|
||||
|
||||
|
||||
Nodetool:
|
||||
nodetool help
|
||||
CQL Shell:
|
||||
cqlsh
|
||||
More documentation available at:
|
||||
http://www.scylladb.com/doc/
|
||||
By default, Scylla sends certain information about this node to a data collection server. For information, see http://www.scylladb.com/privacy/
|
||||
|
||||
'''[1:-1]
|
||||
MSG_UNSUPPORTED_INSTANCE_TYPE = '''
|
||||
{red}{type} is not supported instance type!{nocolor}
|
||||
To continue startup ScyllaDB on this instance, run 'sudo scylla_io_setup' then 'systemctl start scylla-server'.
|
||||
For a list of optimized instance types and more EC2 instructions see http://www.scylladb.com/doc/getting-started-amazon/"
|
||||
|
||||
'''[1:-1]
|
||||
MSG_SETUP_ACTIVATING = '''
|
||||
{green}Constructing RAID volume...{nocolor}
|
||||
|
||||
Please wait for setup. To see status, run
|
||||
'systemctl status scylla-ami-setup'
|
||||
|
||||
After setup finished, scylla-server service will launch.
|
||||
To see status of scylla-server, run
|
||||
'systemctl status scylla-server'
|
||||
|
||||
'''[1:-1]
|
||||
MSG_SETUP_FAILED = '''
|
||||
{red}AMI initial configuration failed!{nocolor}
|
||||
|
||||
To see status, run
|
||||
'systemctl status scylla-ami-setup'
|
||||
|
||||
'''[1:-1]
|
||||
MSG_SCYLLA_ACTIVATING = '''
|
||||
{green}ScyllaDB is starting...{nocolor}
|
||||
|
||||
Please wait for start. To see status, run
|
||||
'systemctl status scylla-server'
|
||||
|
||||
'''[1:-1]
|
||||
MSG_SCYLLA_FAILED = '''
|
||||
{red}ScyllaDB is not started!{nocolor}
|
||||
Please wait for startup. To see status of ScyllaDB, run
|
||||
'systemctl status scylla-server'
|
||||
|
||||
'''[1:-1]
|
||||
MSG_SCYLLA_ACTIVE = '''
|
||||
{green}ScyllaDB is active.{nocolor}
|
||||
|
||||
$ nodetool status
|
||||
|
||||
'''[1:-1]
|
||||
|
||||
if __name__ == '__main__':
|
||||
colorprint(MSG_HEADER)
|
||||
aws = aws_instance()
|
||||
if not aws.is_supported_instance_class():
|
||||
colorprint(MSG_UNSUPPORTED_INSTANCE_TYPE, type=aws.instance_class())
|
||||
else:
|
||||
setup = systemd_unit('scylla-ami-setup.service')
|
||||
res = setup.is_active()
|
||||
if res == 'activating':
|
||||
colorprint(MSG_SETUP_ACTIVATING)
|
||||
elif res == 'failed':
|
||||
colorprint(MSG_SETUP_FAILED)
|
||||
else:
|
||||
server = systemd_unit('scylla-server.service')
|
||||
res = server.is_active()
|
||||
if res == 'activating':
|
||||
colorprint(MSG_SCYLLA_ACTIVATING)
|
||||
elif res == 'failed':
|
||||
colorprint(MSG_SCYLLA_FAILED)
|
||||
else:
|
||||
colorprint(MSG_SCYLLA_ACTIVE)
|
||||
run('nodetool status', exception=False)
|
||||
print(' ', end='')
|
||||
res = run('/usr/lib/scylla/scylla_ec2_check --nic eth0', exception=False)
|
||||
if res == 0:
|
||||
print('')
|
||||
2
dist/ami/files/scylla-ami
vendored
2
dist/ami/files/scylla-ami
vendored
Submodule dist/ami/files/scylla-ami updated: 67293baf37...fe156a5725
2
dist/ami/scylla.json
vendored
2
dist/ami/scylla.json
vendored
@@ -67,6 +67,8 @@
|
||||
{
|
||||
"type": "shell",
|
||||
"inline": [
|
||||
"sudo yum install -y epel-release",
|
||||
"sudo yum install -y python36",
|
||||
"sudo /home/{{user `ssh_username`}}/scylla-ami/scylla_install_ami {{ user `install_args` }}"
|
||||
]
|
||||
}
|
||||
|
||||
84
dist/common/scripts/node_exporter_install
vendored
84
dist/common/scripts/node_exporter_install
vendored
@@ -1,6 +1,8 @@
|
||||
#!/bin/sh
|
||||
#!/usr/bin/python3
|
||||
#
|
||||
# Copyright 2016 ScyllaDB
|
||||
# Copyright 2018 ScyllaDB
|
||||
#
|
||||
|
||||
#
|
||||
# This file is part of Scylla.
|
||||
#
|
||||
@@ -17,42 +19,46 @@
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with Scylla. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
if [ "`id -u`" -ne 0 ]; then
|
||||
echo "Requires root permission."
|
||||
exit 1
|
||||
fi
|
||||
import os
|
||||
import sys
|
||||
import tempfile
|
||||
import tarfile
|
||||
from scylla_util import *
|
||||
|
||||
if [ -f /usr/bin/node_exporter ] || [ -f /usr/bin/prometheus-node_exporter ]; then
|
||||
echo "node_exporter already installed"
|
||||
exit 1
|
||||
fi
|
||||
VERSION='0.14.0'
|
||||
INSTALL_DIR='/usr/lib/scylla/Prometheus/node_exporter'
|
||||
|
||||
. /usr/lib/scylla/scylla_lib.sh
|
||||
if __name__ == '__main__':
|
||||
if os.getuid() > 0:
|
||||
print('Requires root permission.')
|
||||
sys.exit(1)
|
||||
|
||||
if is_gentoo_variant; then
|
||||
emerge -uq app-metrics/node_exporter
|
||||
if is_systemd; then
|
||||
echo "app-metrics/node_exporter does not install systemd service files, please fill a bug if you need them."
|
||||
else
|
||||
rc-update add node_exporter default
|
||||
rc-service node_exporter start
|
||||
fi
|
||||
else
|
||||
version=0.14.0
|
||||
dir=/usr/lib/scylla/Prometheus/node_exporter
|
||||
mkdir -p $dir
|
||||
cd $dir
|
||||
curl -L https://github.com/prometheus/node_exporter/releases/download/v$version/node_exporter-$version.linux-amd64.tar.gz -o $dir/node_exporter-$version.linux-amd64.tar.gz
|
||||
tar -xvzf $dir/node_exporter-$version.linux-amd64.tar.gz
|
||||
rm $dir/node_exporter-$version.linux-amd64.tar.gz
|
||||
ln -s $dir/node_exporter-$version.linux-amd64/node_exporter /usr/bin
|
||||
. /etc/os-release
|
||||
if os.path.exists('/usr/bin/node_exporter') or os.path.exists('/usr/bin/prometheus-node_exporter'):
|
||||
print('node_exporter already installed')
|
||||
sys.exit(1)
|
||||
|
||||
if is_systemd; then
|
||||
systemctl enable node-exporter
|
||||
systemctl start node-exporter
|
||||
else
|
||||
cat <<EOT >> /etc/init/node_exporter.conf
|
||||
if is_gentoo_variant():
|
||||
run('emerge -uq app-metrics/node_exporter')
|
||||
if is_systemd():
|
||||
print('app-metrics/node_exporter does not install systemd service files, please fill a bug if you need them.')
|
||||
sys.exit(1)
|
||||
else:
|
||||
run('rc-update add node_exporter default')
|
||||
run('rc-service node_exporter start')
|
||||
else:
|
||||
data = curl('https://github.com/prometheus/node_exporter/releases/download/v{version}/node_exporter-{version}.linux-amd64.tar.gz'.format(version=VERSION), byte=True)
|
||||
with open('/var/tmp/node_exporter-{version}.linux-amd64.tar.gz'.format(version=VERSION), 'wb') as f:
|
||||
f.write(data)
|
||||
with tarfile.open('/var/tmp/node_exporter-{version}.linux-amd64.tar.gz'.format(version=VERSION)) as tf:
|
||||
tf.extractall(INSTALL_DIR)
|
||||
os.remove('/var/tmp/node_exporter-{version}.linux-amd64.tar.gz'.format(version=VERSION))
|
||||
os.symlink('{install_dir}/node_exporter-{version}.linux-amd64/node_exporter'.format(install_dir=INSTALL_DIR, version=VERSION), '/usr/bin/node_exporter')
|
||||
if is_systemd():
|
||||
node_exporter = systemd_unit('node-exporter.service')
|
||||
node_exporter.enable()
|
||||
node_exporter.start()
|
||||
else:
|
||||
conf = '''
|
||||
# Run node_exporter
|
||||
|
||||
start on startup
|
||||
@@ -60,9 +66,9 @@ start on startup
|
||||
script
|
||||
/usr/bin/node_exporter
|
||||
end script
|
||||
EOT
|
||||
service node_exporter start
|
||||
fi
|
||||
fi
|
||||
'''[1:-1]
|
||||
with open('/etc/init/node_exporter.conf', 'w') as f:
|
||||
f.write(conf)
|
||||
run('service node_exporter start')
|
||||
|
||||
printf "node_exporter successfully installed\n"
|
||||
print('node_exporter successfully installed')
|
||||
|
||||
44
dist/common/scripts/scylla_ec2_check
vendored
44
dist/common/scripts/scylla_ec2_check
vendored
@@ -24,46 +24,38 @@ import sys
|
||||
import argparse
|
||||
from scylla_util import *
|
||||
|
||||
def get_en_interface_type():
|
||||
type, subtype = curl('http://169.254.169.254/latest/meta-data/instance-type').split('.')
|
||||
if type in ['c3', 'c4', 'd2', 'i2', 'r3']:
|
||||
return 'ixgbevf'
|
||||
if type in ['c5', 'c5d', 'f1', 'g3', 'h1', 'i3', 'm5', 'm5d', 'p2', 'p3', 'r4', 'x1']:
|
||||
return 'ena'
|
||||
if type == 'm4':
|
||||
if subtype == '16xlarge':
|
||||
return 'ena'
|
||||
else:
|
||||
return 'ixgbevf'
|
||||
|
||||
def is_vpc_enabled():
|
||||
with open('/sys/class/net/eth0/address') as f:
|
||||
mac = f.read().strip()
|
||||
mac_stat = curl('http://169.254.169.254/latest/meta-data/network/interfaces/macs/{}/'.format(mac))
|
||||
return True if re.search(r'^vpc-id$', mac_stat, flags=re.MULTILINE) else False
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
if not is_ec2():
|
||||
sys.exit(0)
|
||||
parser = argparse.ArgumentParser(description='Verify EC2 configuration is optimized.')
|
||||
parser.add_argument('--nic', default='eth0',
|
||||
help='specify NIC')
|
||||
args = parser.parse_args()
|
||||
|
||||
type = curl('http://169.254.169.254/latest/meta-data/instance-type')
|
||||
en = get_en_interface_type()
|
||||
match = re.search(r'^driver: (\S+)$', out('ethtool -i eth0'), flags=re.MULTILINE)
|
||||
if not is_valid_nic(args.nic):
|
||||
print('NIC {} doesn\'t exist.'.format(args.nic))
|
||||
sys.exit(1)
|
||||
|
||||
aws = aws_instance()
|
||||
instance_class = aws.instance_class()
|
||||
en = aws.get_en_interface_type()
|
||||
match = re.search(r'^driver: (\S+)$', out('ethtool -i {}'.format(args.nic)), flags=re.MULTILINE)
|
||||
driver = match.group(1)
|
||||
|
||||
if not en:
|
||||
print('{bold_red}{type} doesn\'t support enahanced networking!{no_color}'.format(bold_red=concolor.BOLD_RED, type=type, no_color=concolor.NO_COLOR))
|
||||
colorprint('{red}{instance_class} doesn\'t support enahanced networking!{nocolor}', instance_class=instance_class)
|
||||
print('''To enable enhanced networking, please use the instance type which supports it.
|
||||
More documentation available at:
|
||||
http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/enhanced-networking.html#enabling_enhanced_networking''')
|
||||
sys.exit(1)
|
||||
elif not is_vpc_enabled():
|
||||
print('{bold_red}VPC is not enabled!{no_color}'.format(bold_red=concolor.BOLD_RED, no_color=concolor.NO_COLOR))
|
||||
elif not aws.is_vpc_enabled(args.nic):
|
||||
colorprint('{red}VPC is not enabled!{nocolor}')
|
||||
print('To enable enhanced networking, please enable VPC.')
|
||||
sys.exit(1)
|
||||
elif driver != en:
|
||||
print('{bold_red}Enhanced networking is disabled!{no_color}'.format(bold_red=concolor.BOLD_RED, no_color=concolor.NO_COLOR))
|
||||
colorprint('{red}Enhanced networking is disabled!{nocolor}')
|
||||
print('''More documentation available at:
|
||||
http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/enhanced-networking.html''')
|
||||
sys.exit(1)
|
||||
|
||||
colorprint('{green}This EC2 instance is optimized for Scylla.{nocolor}')
|
||||
|
||||
122
dist/common/scripts/scylla_lib.sh
vendored
122
dist/common/scripts/scylla_lib.sh
vendored
@@ -1,122 +0,0 @@
|
||||
#
|
||||
# Copyright (C) 2016 ScyllaDB
|
||||
|
||||
is_debian_variant() {
|
||||
[ -f /etc/debian_version ]
|
||||
}
|
||||
|
||||
is_redhat_variant() {
|
||||
[ -f /etc/redhat-release ]
|
||||
}
|
||||
|
||||
is_gentoo_variant() {
|
||||
[ -f /etc/gentoo-release ]
|
||||
}
|
||||
|
||||
is_systemd() {
|
||||
grep -q '^systemd$' /proc/1/comm
|
||||
}
|
||||
|
||||
is_ec2() {
|
||||
[ -f /sys/hypervisor/uuid ] && [ "$(head -c 3 /sys/hypervisor/uuid)" = "ec2" ]
|
||||
}
|
||||
|
||||
is_selinux_enabled() {
|
||||
STATUS=`getenforce`
|
||||
if [ "$STATUS" = "Disabled" ]; then
|
||||
return 0
|
||||
else
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
|
||||
ec2_is_supported_instance_type() {
|
||||
TYPE=`curl -s http://169.254.169.254/latest/meta-data/instance-type|cut -d . -f 1`
|
||||
case $TYPE in
|
||||
"i2"|"i3") echo 1;;
|
||||
*) echo 0;;
|
||||
esac
|
||||
}
|
||||
|
||||
verify_args() {
|
||||
if [ -z "$2" ] || [[ "$2" =~ ^--+ ]]; then
|
||||
echo "Requires more parameter for $1."
|
||||
print_usage
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
#
|
||||
# get_mode_cpu_set <mode name, e.g. 'mq', 'sq', 'sq_split'>
|
||||
#
|
||||
get_mode_cpu_set() {
|
||||
local mode=$1
|
||||
local mode_cpu_mask=`/usr/lib/scylla/perftune.py --tune net --nic "$nic" --mode "$mode" --get-cpu-mask` 2>&-
|
||||
|
||||
# If the given mode is not supported - return invalid CPU set
|
||||
if [[ "$?" -ne "0" ]]; then
|
||||
echo "-1"
|
||||
else
|
||||
echo "$mode_cpu_mask" | /usr/lib/scylla/hex2list.py
|
||||
fi
|
||||
}
|
||||
|
||||
#
|
||||
# check_cpuset_conf <NIC name>
|
||||
#
|
||||
get_tune_mode() {
|
||||
local nic=$1
|
||||
|
||||
# if cpuset.conf doesn't exist use the default mode
|
||||
[[ ! -e '/etc/scylla.d/cpuset.conf' ]] && return
|
||||
|
||||
local cur_cpuset=`cat /etc/scylla.d/cpuset.conf | cut -d "\"" -f2- | cut -d" " -f2`
|
||||
local mq_cpuset=`get_mode_cpu_set 'mq'`
|
||||
local sq_cpuset=`get_mode_cpu_set 'sq'`
|
||||
local sq_split_cpuset=`get_mode_cpu_set 'sq_split'`
|
||||
local tune_mode=""
|
||||
|
||||
case "$cur_cpuset" in
|
||||
"$mq_cpuset")
|
||||
tune_mode="--mode mq"
|
||||
;;
|
||||
"$sq_cpuset")
|
||||
tune_mode="--mode sq"
|
||||
;;
|
||||
"$sq_split_cpuset")
|
||||
tune_mode="--mode sq_split"
|
||||
;;
|
||||
esac
|
||||
|
||||
# if cpuset is something different from what we expect - use the default mode
|
||||
echo "$tune_mode"
|
||||
}
|
||||
|
||||
#
|
||||
# create_perftune_conf [<NIC name>]
|
||||
#
|
||||
create_perftune_conf() {
|
||||
local nic=$1
|
||||
[[ -z "$nic" ]] && nic='eth0'
|
||||
|
||||
# if exists - do nothing
|
||||
[[ -e '/etc/scylla.d/perftune.yaml' ]] && return
|
||||
|
||||
local mode=`get_tune_mode "$nic"`
|
||||
/usr/lib/scylla/perftune.py --tune net --nic "$nic" $mode --dump-options-file > /etc/scylla.d/perftune.yaml
|
||||
}
|
||||
|
||||
. /etc/os-release
|
||||
if is_debian_variant || is_gentoo_variant; then
|
||||
SYSCONFIG=/etc/default
|
||||
else
|
||||
SYSCONFIG=/etc/sysconfig
|
||||
fi
|
||||
. $SYSCONFIG/scylla-server
|
||||
|
||||
for i in /etc/scylla.d/*.conf; do
|
||||
if [ "$i" = "/etc/scylla.d/*.conf" ]; then
|
||||
break
|
||||
fi
|
||||
. "$i"
|
||||
done
|
||||
9
dist/common/scripts/scylla_ntp_setup
vendored
9
dist/common/scripts/scylla_ntp_setup
vendored
@@ -49,7 +49,8 @@ if __name__ == '__main__':
|
||||
if is_systemd():
|
||||
ntp = systemd_unit('ntp.service')
|
||||
ntp.stop()
|
||||
run('ntpdate ntp.ubuntu.com')
|
||||
# ignore error, ntpd may able to adjust clock later
|
||||
run('ntpdate ntp.ubuntu.com', exception=False)
|
||||
ntp.start()
|
||||
else:
|
||||
run('service ntp stop')
|
||||
@@ -70,7 +71,8 @@ if __name__ == '__main__':
|
||||
sntpd.start()
|
||||
else:
|
||||
run('rc-service ntpd stop', exception=False)
|
||||
run('ntpdate {}'.format(server))
|
||||
# ignore error, ntpd may able to adjust clock later
|
||||
run('ntpdate {}'.format(server), exception=False)
|
||||
run('rc-update add ntpd default')
|
||||
run('rc-service ntpd start')
|
||||
|
||||
@@ -87,6 +89,7 @@ if __name__ == '__main__':
|
||||
server = match.group(1)
|
||||
ntpd = systemd_unit('ntpd.service')
|
||||
ntpd.stop()
|
||||
run('ntpdate {}'.format(server))
|
||||
# ignore error, ntpd may able to adjust clock later
|
||||
run('ntpdate {}'.format(server), exception=False)
|
||||
ntpd.enable()
|
||||
ntpd.start()
|
||||
|
||||
96
dist/common/scripts/scylla_prepare
vendored
96
dist/common/scripts/scylla_prepare
vendored
@@ -1,33 +1,71 @@
|
||||
#!/bin/bash -e
|
||||
#!/usr/bin/python3
|
||||
#
|
||||
# Copyright 2018 ScyllaDB
|
||||
#
|
||||
|
||||
. /usr/lib/scylla/scylla_lib.sh
|
||||
#
|
||||
# This file is part of Scylla.
|
||||
#
|
||||
# Scylla is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU Affero General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# Scylla is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with Scylla. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
if [ "$AMI" = "yes" ] && [ -f /etc/scylla/ami_disabled ]; then
|
||||
rm /etc/scylla/ami_disabled
|
||||
exit 1
|
||||
fi
|
||||
import os
|
||||
import sys
|
||||
import glob
|
||||
from scylla_util import *
|
||||
|
||||
if [ "$NETWORK_MODE" = "virtio" ]; then
|
||||
ip tuntap del mode tap dev $TAP
|
||||
ip tuntap add mode tap dev $TAP user $USER one_queue vnet_hdr
|
||||
ip link set dev $TAP up
|
||||
ip link set dev $TAP master $BRIDGE
|
||||
chown $USER.$GROUP /dev/vhost-net
|
||||
elif [ "$NETWORK_MODE" = "dpdk" ]; then
|
||||
modprobe uio
|
||||
modprobe uio_pci_generic
|
||||
/usr/lib/scylla/dpdk-devbind.py --force --bind=uio_pci_generic $ETHPCIID
|
||||
for n in /sys/devices/system/node/node?; do
|
||||
echo $NR_HUGEPAGES > $n/hugepages/hugepages-2048kB/nr_hugepages
|
||||
done
|
||||
if [ "$ID" = "ubuntu" ]; then
|
||||
hugeadm --create-mounts
|
||||
fi
|
||||
else # NETWORK_MODE = posix
|
||||
if [ "$SET_NIC" = "yes" ]; then
|
||||
create_perftune_conf "$IFNAME"
|
||||
/usr/lib/scylla/posix_net_conf.sh $IFNAME --options-file /etc/scylla.d/perftune.yaml
|
||||
fi
|
||||
fi
|
||||
if __name__ == '__main__':
|
||||
if os.getuid() > 0:
|
||||
print('Requires root permission.')
|
||||
sys.exit(1)
|
||||
if is_redhat_variant():
|
||||
cfg = sysconfig_parser('/etc/sysconfig/scylla-server')
|
||||
else:
|
||||
cfg = sysconfig_parser('/etc/default/scylla-server')
|
||||
ami = cfg.get('AMI')
|
||||
mode = cfg.get('NETWORK_MODE')
|
||||
|
||||
/usr/lib/scylla/scylla-blocktune
|
||||
if ami == 'yes' and os.path.exists('/etc/scylla/ami_disabled'):
|
||||
os.remove('/etc/scylla/ami_disabled')
|
||||
sys.exit(1)
|
||||
|
||||
if mode == 'virtio':
|
||||
tap = cfg.get('TAP')
|
||||
user = cfg.get('USER')
|
||||
group = cfg.get('GROUP')
|
||||
bridge = cfg.get('BRIDGE')
|
||||
run('ip tuntap del mode tap dev {TAP}'.format(TAP=tap))
|
||||
run('ip tuntap add mode tap dev {TAP} user {USER} one_queue vnet_hdr'.format(TAP=tap, USER=user))
|
||||
run('ip link set dev {TAP} up'.format(TAP=tap))
|
||||
run('ip link set dev {TAP} master {BRIDGE}'.format(TAP=tap, BRIDGE=bridge))
|
||||
run('chown {USER}.{GROUP} /dev/vhost-net'.format(USER=user, GROUP=group))
|
||||
elif mode == 'dpdk':
|
||||
ethpcciid = cfg.get('ETHPCIID')
|
||||
nr_hugepages = cfg.get('NR_HUGEPAGES')
|
||||
run('modprobe uio')
|
||||
run('modprobe uio_pci_generic')
|
||||
run('/usr/lib/scylla/dpdk-devbind.py --force --bind=uio_pci_generic {ETHPCIID}'.format(ETHPCIID=ethpciid))
|
||||
for n in glob.glob('/sys/devices/system/node/node?'):
|
||||
with open('{n}/hugepages/hugepages-2048kB/nr_hugepages'.format(n=n), 'w') as f:
|
||||
f.write(nr_hugepages)
|
||||
if dist_name() == 'Ubuntu':
|
||||
run('hugeadm --create-mounts')
|
||||
fi
|
||||
else:
|
||||
set_nic = cfg.get('SET_NIC')
|
||||
ifname = cfg.get('IFNAME')
|
||||
if set_nic == 'yes':
|
||||
create_perftune_conf(ifname)
|
||||
run('/usr/lib/scylla/posix_net_conf.sh {IFNAME} --options-file /etc/scylla.d/perftune.yaml'.format(IFNAME=ifname))
|
||||
|
||||
run('/usr/lib/scylla/scylla-blocktune')
|
||||
|
||||
10
dist/common/scripts/scylla_raid_setup
vendored
10
dist/common/scripts/scylla_raid_setup
vendored
@@ -146,7 +146,15 @@ if __name__ == '__main__':
|
||||
match = re.search(r'^/dev/\S+: (UUID="\S+")', res.strip())
|
||||
uuid = match.group(1)
|
||||
with open('/etc/fstab', 'a') as f:
|
||||
f.write('{uuid} {mount_at} xfs noatime 0 0\n'.format(uuid=uuid, mount_at=mount_at))
|
||||
f.write('{uuid} {mount_at} xfs noatime,nofail 0 0\n'.format(uuid=uuid, mount_at=mount_at))
|
||||
mounts_conf = '/etc/systemd/system/scylla-server.service.d/mounts.conf'
|
||||
if not os.path.exists(mounts_conf):
|
||||
makedirs('/etc/systemd/system/scylla-server.service.d/')
|
||||
with open(mounts_conf, 'w') as f:
|
||||
f.write('[Unit]\nRequiresMountsFor={mount_at}\n'.format(mount_at=mount_at))
|
||||
else:
|
||||
with open(mounts_conf, 'a') as f:
|
||||
f.write('RequiresMountsFor={mount_at}\n'.format(mount_at=mount_at))
|
||||
|
||||
if is_debian_variant():
|
||||
run('update-initramfs -u')
|
||||
|
||||
66
dist/common/scripts/scylla_setup
vendored
66
dist/common/scripts/scylla_setup
vendored
@@ -48,6 +48,21 @@ def interactive_ask_service(msg1, msg2, default = None):
|
||||
elif ans == 'no' or ans =='n':
|
||||
return False
|
||||
|
||||
def interactive_choose_nic():
|
||||
nics = [os.path.basename(n) for n in glob.glob('/sys/class/net/*') if n != '/sys/class/net/lo']
|
||||
if len(nics) == 0:
|
||||
print('A NIC was not found.')
|
||||
sys.exit(1)
|
||||
elif len(nics) == 1:
|
||||
return nics[0]
|
||||
else:
|
||||
print('Please select a NIC from the following list:')
|
||||
while True:
|
||||
print(nics)
|
||||
n = input('> ')
|
||||
if is_valid_nic(n):
|
||||
return n
|
||||
|
||||
def do_verify_package(pkg):
|
||||
if is_debian_variant():
|
||||
res = run('dpkg -s {}'.format(pkg), silent=True, exception=False)
|
||||
@@ -87,7 +102,7 @@ def run_setup_script(name, script):
|
||||
res = run(script, exception=False)
|
||||
if res != 0:
|
||||
if interactive:
|
||||
print('{red}{name} setup failed. Press any key to continue...{no_color}'.format(red=concolor.BOLD_RED, name=name, no_color=concolor.NO_COLOR))
|
||||
colorprint('{red}{name} setup failed. Press any key to continue...{nocolor}', name=name)
|
||||
input()
|
||||
else:
|
||||
print('{} setup failed.'.format(name))
|
||||
@@ -101,7 +116,7 @@ if __name__ == '__main__':
|
||||
parser = argparse.ArgumentParser(description='Configure environment for Scylla.')
|
||||
parser.add_argument('--disks',
|
||||
help='specify disks for RAID')
|
||||
parser.add_argument('--nic',
|
||||
parser.add_argument('--nic', default='eth0',
|
||||
help='specify NIC')
|
||||
parser.add_argument('--ntp-domain',
|
||||
help='specify NTP domain')
|
||||
@@ -112,7 +127,7 @@ if __name__ == '__main__':
|
||||
parser.add_argument('--developer-mode', action='store_true', default=False,
|
||||
help='enable developer mode')
|
||||
parser.add_argument('--no-ec2-check', action='store_true', default=False,
|
||||
help='skip EC2 configuration check(only on EC2)')
|
||||
help='skip EC2 configuration check')
|
||||
parser.add_argument('--no-kernel-check', action='store_true', default=False,
|
||||
help='skip kernel version check')
|
||||
parser.add_argument('--no-verify-package', action='store_true', default=False,
|
||||
@@ -147,12 +162,14 @@ if __name__ == '__main__':
|
||||
if len(sys.argv) == 1:
|
||||
interactive = True
|
||||
|
||||
if not interactive and not args.no_raid_setup and not args.disks:
|
||||
parser.print_help()
|
||||
sys.exit(1)
|
||||
if not interactive and not args.no_sysconfig_setup and not args.nic:
|
||||
parser.print_help()
|
||||
sys.exit(1)
|
||||
if not interactive:
|
||||
if not args.no_raid_setup and not args.disks:
|
||||
parser.print_help()
|
||||
sys.exit(1)
|
||||
if not args.no_sysconfig_setup or (is_ec2() and not args.no_ec2_check):
|
||||
if not is_valid_nic(args.nic):
|
||||
print('NIC {} doesn\'t exist.'.format(args.nic))
|
||||
sys.exit(1)
|
||||
|
||||
disks = args.disks
|
||||
nic = args.nic
|
||||
@@ -175,13 +192,16 @@ if __name__ == '__main__':
|
||||
fstrim_setup = not args.no_fstrim_setup
|
||||
selinux_reboot_required = False
|
||||
|
||||
print('{green}Skip any of the following steps by answering \'no\'{no_color}'.format(green=concolor.GREEN, no_color=concolor.NO_COLOR))
|
||||
if interactive:
|
||||
colorprint('{green}Skip any of the following steps by answering \'no\'{nocolor}')
|
||||
|
||||
if is_ec2():
|
||||
if interactive:
|
||||
ec2_check = interactive_ask_service('Do you want to run Amazon EC2 configuration check?', 'Yes - runs a script to verify that this instance is optimized for running Scylls. No - skips the configuration check.', 'yes')
|
||||
ec2_check = interactive_ask_service('Do you want to run Amazon EC2 configuration check?', 'Yes - runs a script to verify that this instance is optimized for running Scylla. No - skips the configuration check.', 'yes')
|
||||
if ec2_check:
|
||||
nic = interactive_choose_nic()
|
||||
if ec2_check:
|
||||
run('/usr/lib/scylla/scylla_ec2_check')
|
||||
run('/usr/lib/scylla/scylla_ec2_check --nic {}'.format(nic))
|
||||
|
||||
if interactive:
|
||||
kernel_check = interactive_ask_service('Do you want to run check your kernel version?', 'Yes - runs a script to verify that the kernel for this instance qualifies to run Scylla. No - skips the kernel check.', 'yes')
|
||||
@@ -202,7 +222,7 @@ if __name__ == '__main__':
|
||||
elif is_gentoo_variant():
|
||||
run('rc-update add scylla-server default')
|
||||
|
||||
if interactive:
|
||||
if interactive and not os.path.exists('/etc/scylla.d/housekeeping.cfg'):
|
||||
version_check = interactive_ask_service('Do you want to enable Scylla to check if there is a newer version of Scylla available?', 'Yes - start the Scylla-housekeeping service to check for a newer version. This check runs periodically. No - skips this step.', 'yes')
|
||||
if version_check:
|
||||
with open('/etc/scylla.d/housekeeping.cfg', 'w') as f:
|
||||
@@ -274,7 +294,7 @@ if __name__ == '__main__':
|
||||
print('Please select unmounted disks from the following list: {}'.format(devices))
|
||||
selected = []
|
||||
dsklist = []
|
||||
while len(devices):
|
||||
while True:
|
||||
print('type \'cancel\' to cancel RAID/XFS setup.')
|
||||
print('type \'done\' to finish selection. Selected: {}'.format(selected))
|
||||
if len(dsklist) > 0:
|
||||
@@ -316,21 +336,9 @@ if __name__ == '__main__':
|
||||
if interactive:
|
||||
sysconfig_setup = interactive_ask_service('Do you want to setup a system-wide customized configuration for Scylla?', 'Yes - setup the sysconfig file. No - skips this step.', 'yes')
|
||||
if sysconfig_setup:
|
||||
nics = [os.path.basename(n) for n in glob.glob('/sys/class/net/*') if n != '/sys/class/net/lo']
|
||||
if len(nics) == 0:
|
||||
print('A NIC was not found.')
|
||||
sys.exit(1)
|
||||
elif len(nics) == 1:
|
||||
nic=nics[0]
|
||||
else:
|
||||
print('Please select a NIC from the following list:')
|
||||
while True:
|
||||
print(nics)
|
||||
n = input('> ')
|
||||
if os.path.exists('/sys/class/net/{}'.format(n)):
|
||||
nic = n
|
||||
break
|
||||
set_nic = interactive_ask_service('Do you want to enable Network Interface Card (NIC) optimization?', 'Yes - optimize the NIC queue settings. Selecting Yes greatly improves performance. No - skip this step.', 'yes')
|
||||
nic = interactive_choose_nic()
|
||||
if interactive:
|
||||
set_nic = interactive_ask_service('Do you want to enable Network Interface Card (NIC) optimization?', 'Yes - optimize the NIC queue settings. Selecting Yes greatly improves performance. No - skip this step.', 'yes')
|
||||
if sysconfig_setup:
|
||||
setup_args = '--setup-nic' if set_nic else ''
|
||||
run_setup_script('NIC queue', '/usr/lib/scylla/scylla_sysconfig_setup --nic {nic} {setup_args}'.format(nic=nic, setup_args=setup_args))
|
||||
|
||||
46
dist/common/scripts/scylla_stop
vendored
46
dist/common/scripts/scylla_stop
vendored
@@ -1,10 +1,40 @@
|
||||
#!/bin/bash -e
|
||||
#!/usr/bin/python3
|
||||
#
|
||||
# Copyright 2018 ScyllaDB
|
||||
#
|
||||
|
||||
. /usr/lib/scylla/scylla_lib.sh
|
||||
#
|
||||
# This file is part of Scylla.
|
||||
#
|
||||
# Scylla is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU Affero General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# Scylla is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with Scylla. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
if [ "$NETWORK_MODE" = "virtio" ]; then
|
||||
ip tuntap del mode tap dev $TAP
|
||||
elif [ "$NETWORK_MODE" = "dpdk" ]; then
|
||||
/usr/lib/scylla/dpdk-devbind.py -u $ETHPCIID
|
||||
/usr/lib/scylla/dpdk-devbind.py -b $ETHDRV $ETHPCIID
|
||||
fi
|
||||
import os
|
||||
import sys
|
||||
from scylla_util import *
|
||||
|
||||
if __name__ == '__main__':
|
||||
if os.getuid() > 0:
|
||||
print('Requires root permission.')
|
||||
sys.exit(1)
|
||||
if is_redhat_variant():
|
||||
cfg = sysconfig_parser('/etc/sysconfig/scylla-server')
|
||||
else:
|
||||
cfg = sysconfig_parser('/etc/default/scylla-server')
|
||||
|
||||
|
||||
if cfg.get('NETWORK_MODE') == 'virtio':
|
||||
run('ip tuntap del mode tap dev {TAP}'.format(TAP=cfg.get('TAP')))
|
||||
elif cfg.get('NETWORK_MODE') == 'dpdk':
|
||||
run('/usr/lib/scylla/dpdk-devbind.py -u {ETHPCIID}'.format(ETHPCIID=cfg.get('ETHPCIID')))
|
||||
run('/usr/lib/scylla/dpdk-devbind.py -b {ETHDRV} {ETHPCIID}'.format(ETHDRV=cfg.get('ETHDRV'), ETHPCIID=cfg.get('ETHPCIID')))
|
||||
|
||||
2
dist/common/scripts/scylla_sysconfig_setup
vendored
2
dist/common/scripts/scylla_sysconfig_setup
vendored
@@ -64,7 +64,7 @@ if __name__ == '__main__':
|
||||
help='AMI instance mode')
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.nic and not os.path.exists('/sys/class/net/{}'.format(args.nic)):
|
||||
if args.nic and not is_valid_nic(args.nic):
|
||||
print('NIC {} not found.'.format(args.nic))
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
159
dist/common/scripts/scylla_util.py
vendored
159
dist/common/scripts/scylla_util.py
vendored
@@ -29,13 +29,17 @@ import io
|
||||
import shlex
|
||||
import shutil
|
||||
|
||||
def curl(url):
|
||||
def curl(url, byte=False):
|
||||
max_retries = 5
|
||||
retries = 0
|
||||
while True:
|
||||
try:
|
||||
req = urllib.request.Request(url)
|
||||
return urllib.request.urlopen(req).read().decode('utf-8')
|
||||
with urllib.request.urlopen(req) as res:
|
||||
if byte:
|
||||
return res.read()
|
||||
else:
|
||||
return res.read().decode('utf-8')
|
||||
except urllib.error.HTTPError:
|
||||
logging.warn("Failed to grab %s..." % url)
|
||||
time.sleep(5)
|
||||
@@ -80,6 +84,10 @@ class aws_instance:
|
||||
continue
|
||||
self._disks[t] += [ self.__xenify(dev) ]
|
||||
|
||||
def __mac_address(self, nic='eth0'):
|
||||
with open('/sys/class/net/{}/address'.format(nic)) as f:
|
||||
return f.read().strip()
|
||||
|
||||
def __init__(self):
|
||||
self._type = self.__instance_metadata("instance-type")
|
||||
self.__populate_disks()
|
||||
@@ -96,6 +104,25 @@ class aws_instance:
|
||||
"""Returns the class of the instance we are running in. i.e.: i3"""
|
||||
return self._type.split(".")[0]
|
||||
|
||||
def is_supported_instance_class(self):
|
||||
if self.instance_class() in ['i2', 'i3']:
|
||||
return True
|
||||
return False
|
||||
|
||||
def get_en_interface_type(self):
|
||||
instance_class = self.instance_class()
|
||||
instance_size = self.instance_size()
|
||||
if instance_class in ['c3', 'c4', 'd2', 'i2', 'r3']:
|
||||
return 'ixgbevf'
|
||||
if instance_class in ['c5', 'c5d', 'f1', 'g3', 'h1', 'i3', 'm5', 'm5d', 'p2', 'p3', 'r4', 'x1']:
|
||||
return 'ena'
|
||||
if instance_class == 'm4':
|
||||
if instance_size == '16xlarge':
|
||||
return 'ena'
|
||||
else:
|
||||
return 'ixgbevf'
|
||||
return None
|
||||
|
||||
def disks(self):
|
||||
"""Returns all disks in the system, as visible from the AWS registry"""
|
||||
disks = set()
|
||||
@@ -134,6 +161,11 @@ class aws_instance:
|
||||
"""Returns the private IPv4 address of this instance"""
|
||||
return self.__instance_metadata("local-ipv4")
|
||||
|
||||
def is_vpc_enabled(self, nic='eth0'):
|
||||
mac = self.__mac_address(nic)
|
||||
mac_stat = self.__instance_metadata('network/interfaces/macs/{}'.format(mac))
|
||||
return True if re.search(r'^vpc-id$', mac_stat, flags=re.MULTILINE) else False
|
||||
|
||||
|
||||
## Regular expression helpers
|
||||
# non-advancing comment matcher
|
||||
@@ -223,37 +255,24 @@ class scylla_cpuinfo:
|
||||
return len(self._cpu_data["system"])
|
||||
|
||||
def run(cmd, shell=False, silent=False, exception=True):
|
||||
stdout=None
|
||||
stderr=None
|
||||
if silent:
|
||||
stdout=subprocess.DEVNULL
|
||||
stderr=subprocess.DEVNULL
|
||||
if shell:
|
||||
if exception:
|
||||
return subprocess.check_call(cmd, shell=True, stdout=stdout, stderr=stderr)
|
||||
else:
|
||||
p = subprocess.Popen(cmd, shell=True, stdout=stdout, stderr=stderr)
|
||||
return p.wait()
|
||||
stdout=subprocess.DEVNULL if silent else None
|
||||
stderr=subprocess.DEVNULL if silent else None
|
||||
if not shell:
|
||||
cmd = shlex.split(cmd)
|
||||
if exception:
|
||||
return subprocess.check_call(cmd, shell=shell, stdout=stdout, stderr=stderr)
|
||||
else:
|
||||
if exception:
|
||||
return subprocess.check_call(shlex.split(cmd), stdout=stdout, stderr=stderr)
|
||||
else:
|
||||
p = subprocess.Popen(shlex.split(cmd), stdout=stdout, stderr=stderr)
|
||||
return p.wait()
|
||||
p = subprocess.Popen(cmd, shell=shell, stdout=stdout, stderr=stderr)
|
||||
return p.wait()
|
||||
|
||||
def out(cmd, shell=False, exception=True):
|
||||
if shell:
|
||||
if exception:
|
||||
return subprocess.check_output(cmd, shell=True).strip().decode('utf-8')
|
||||
else:
|
||||
p = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE)
|
||||
return p.communicate()[0].strip().decode('utf-8')
|
||||
if not shell:
|
||||
cmd = shlex.split(cmd)
|
||||
if exception:
|
||||
return subprocess.check_output(cmd, shell=shell).strip().decode('utf-8')
|
||||
else:
|
||||
if exception:
|
||||
return subprocess.check_output(shlex.split(cmd)).strip().decode('utf-8')
|
||||
else:
|
||||
p = subprocess.Popen(shlex.split(cmd), stdout=subprocess.PIPE)
|
||||
return p.communicate()[0].strip().decode('utf-8')
|
||||
p = subprocess.Popen(cmd, shell=shell, stdout=subprocess.PIPE)
|
||||
return p.communicate()[0].strip().decode('utf-8')
|
||||
|
||||
def is_debian_variant():
|
||||
return os.path.exists('/etc/debian_version')
|
||||
@@ -320,30 +339,59 @@ def dist_ver():
|
||||
return platform.dist()[1]
|
||||
|
||||
def is_unused_disk(dev):
|
||||
# dev is not in /sys/class/block/
|
||||
# dev is not in /sys/class/block/, like /dev/nvme[0-9]+
|
||||
if not os.path.isdir('/sys/class/block/{dev}'.format(dev=dev.replace('/dev/',''))):
|
||||
return False
|
||||
# dev is mounted
|
||||
with open('/proc/mounts') as f:
|
||||
s = f.read().strip()
|
||||
if len(re.findall('^{} '.format(dev), s, flags=re.MULTILINE)) > 0:
|
||||
try:
|
||||
fd = os.open(dev, os.O_EXCL)
|
||||
os.close(fd)
|
||||
return True
|
||||
except OSError:
|
||||
return False
|
||||
# dev is used in LVM
|
||||
if shutil.which('pvs'):
|
||||
s = out('pvs -o pv_name --nohead')
|
||||
if len(re.findall(dev, s, flags=re.MULTILINE)) > 0:
|
||||
return False
|
||||
# dev is used for swap
|
||||
s = out('swapon --show=NAME --noheadings')
|
||||
if len(re.findall(dev, s, flags=re.MULTILINE)) > 0:
|
||||
return False
|
||||
# dev is used in MDRAID
|
||||
if os.path.exists('/proc/mdstat'):
|
||||
with open('/proc/mdstat') as f:
|
||||
s = f.read().strip()
|
||||
if len(re.findall(dev, s, flags=re.MULTILINE)) > 0:
|
||||
return False
|
||||
return True
|
||||
|
||||
CONCOLORS = {'green':'\033[1;32m', 'red':'\033[1;31m', 'nocolor':'\033[0m'}
|
||||
def colorprint(msg, **kwargs):
|
||||
fmt = dict(CONCOLORS)
|
||||
fmt.update(kwargs)
|
||||
print(msg.format(**fmt))
|
||||
|
||||
def get_mode_cpuset(nic, mode):
|
||||
try:
|
||||
mode_cpu_mask=out('/usr/lib/scylla/perftune.py --tune net --nic "{nic}" --mode "{mode}" --get-cpu-mask'.format(nic=nic, mode=mode))
|
||||
return hex2list(mode_cpu_mask)
|
||||
except subprocess.CalledProcessError:
|
||||
return '-1'
|
||||
|
||||
def get_cur_cpuset():
|
||||
cfg = sysconfig_parser('/etc/scylla.d/cpuset.conf')
|
||||
cpuset=cfg.get('CPUSET')
|
||||
return re.sub(r'^--cpuset (.+)$', r'\1', cpuset).strip()
|
||||
|
||||
def get_tune_mode(nic):
|
||||
if not os.path.exists('/etc/scylla.d/cpuset.conf'):
|
||||
return
|
||||
cur_cpuset=get_cur_cpuset()
|
||||
mq_cpuset=get_mode_cpuset(nic, 'mq')
|
||||
sq_cpuset=get_mode_cpuset(nic, 'sq')
|
||||
sq_split_cpuset=get_mode_cpuset(nic, 'sq_split')
|
||||
|
||||
if cur_cpuset == mq_cpuset:
|
||||
return 'mq'
|
||||
elif cur_cpuset == sq_cpuset:
|
||||
return 'sq'
|
||||
elif cur_cpuset == sq_split_cpuset:
|
||||
return 'sq_split'
|
||||
|
||||
def create_perftune_conf(nic='eth0'):
|
||||
if os.path.exists('/etc/scylla.d/perftune.yaml'):
|
||||
return
|
||||
mode=get_tune_mode(nic)
|
||||
yaml=out('/usr/lib/scylla/perftune.py --tune net --nic "{nic}" --mode {mode} --dump-options-file'.format(nic=nic, mode=mode))
|
||||
with open('/etc/scylla.d/perftune.yaml', 'w') as f:
|
||||
f.write(yaml)
|
||||
|
||||
def is_valid_nic(nic):
|
||||
return os.path.exists('/sys/class/net/{}'.format(nic))
|
||||
|
||||
class SystemdException(Exception):
|
||||
pass
|
||||
@@ -373,8 +421,7 @@ class systemd_unit:
|
||||
return run('systemctl disable {}'.format(self._unit))
|
||||
|
||||
def is_active(self):
|
||||
res = out('systemctl is-active {}'.format(self._unit), exception=False)
|
||||
return True if re.match(r'^active', res, flags=re.MULTILINE) else False
|
||||
return out('systemctl is-active {}'.format(self._unit), exception=False)
|
||||
|
||||
def mask(self):
|
||||
return run('systemctl mask {}'.format(self._unit))
|
||||
@@ -405,7 +452,7 @@ class sysconfig_parser:
|
||||
self.__load()
|
||||
|
||||
def get(self, key):
|
||||
return self._cfg.get('global', key)
|
||||
return self._cfg.get('global', key).strip('"')
|
||||
|
||||
def set(self, key, val):
|
||||
if not self._cfg.has_option('global', key):
|
||||
@@ -416,9 +463,3 @@ class sysconfig_parser:
|
||||
def commit(self):
|
||||
with open(self._filename, 'w') as f:
|
||||
f.write(self._data)
|
||||
|
||||
class concolor:
|
||||
GREEN = '\033[0;32m'
|
||||
RED = '\033[0;31m'
|
||||
BOLD_RED = '\033[1;31m'
|
||||
NO_COLOR = '\033[0m'
|
||||
|
||||
@@ -10,7 +10,7 @@ Group=scylla
|
||||
ExecStart=/usr/lib/scylla/scylla-housekeeping --uuid-file /var/lib/scylla-housekeeping/housekeeping.uuid -q -c /etc/scylla.d/housekeeping.cfg --repo-files '/etc/apt/sources.list.d/scylla*.list' version --mode d
|
||||
{{/debian}}
|
||||
{{#redhat}}
|
||||
ExecStart=/usr/lib/scylla/scylla-housekeeping --uuid-file /var/lib/scylla-housekeeping/housekeeping.uuid -q -c /etc/scylla.d/housekeeping.cfg --repo-files @@REPOFILES@@ version --mode d
|
||||
ExecStart=/usr/lib/scylla/scylla-housekeeping --uuid-file /var/lib/scylla-housekeeping/housekeeping.uuid -q -c /etc/scylla.d/housekeeping.cfg --repo-files '/etc/yum.repos.d/scylla*.repo' version --mode d
|
||||
{{/redhat}}
|
||||
|
||||
[Install]
|
||||
|
||||
@@ -6,7 +6,7 @@ After=network.target
|
||||
Type=simple
|
||||
User=scylla
|
||||
Group=scylla
|
||||
ExecStart=/usr/lib/scylla/scylla-housekeeping --uuid-file /var/lib/scylla-housekeeping/housekeeping.uuid -q -c /etc/scylla.d/housekeeping.cfg --repo-files @@REPOFILES@@ version --mode r
|
||||
ExecStart=/usr/lib/scylla/scylla-housekeeping --uuid-file /var/lib/scylla-housekeeping/housekeeping.uuid -q -c /etc/scylla.d/housekeeping.cfg --repo-files '/etc/yum.repos.d/scylla*.repo' version --mode r
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
|
||||
7
dist/debian/build_deb.sh
vendored
7
dist/debian/build_deb.sh
vendored
@@ -116,6 +116,9 @@ if [ ! -f /usr/bin/pystache ]; then
|
||||
sudo apt-get install -y python-pystache
|
||||
fi
|
||||
fi
|
||||
if is_debian_variant && [ ! -f /usr/share/doc/python-pkg-resources/copyright ]; then
|
||||
sudo apt-get install -y python-pkg-resources
|
||||
fi
|
||||
|
||||
if [ -z "$TARGET" ]; then
|
||||
if is_debian_variant; then
|
||||
@@ -157,8 +160,8 @@ chmod a+rx debian/rules
|
||||
|
||||
if [ "$TARGET" != "trusty" ]; then
|
||||
pystache dist/common/systemd/scylla-server.service.mustache "{ $MUSTACHE_DIST }" > debian/scylla-server.service
|
||||
pystache dist/common/systemd/scylla-housekeeping-daily.service.mustache "{ $MUSTACHE_DIST }" > debian/scylla-housekeeping-daily.service
|
||||
pystache dist/common/systemd/scylla-housekeeping-restart.service.mustache "{ $MUSTACHE_DIST }" > debian/scylla-housekeeping-restart.service
|
||||
pystache dist/common/systemd/scylla-housekeeping-daily.service.mustache "{ $MUSTACHE_DIST }" > debian/scylla-server.scylla-housekeeping-daily.service
|
||||
pystache dist/common/systemd/scylla-housekeeping-restart.service.mustache "{ $MUSTACHE_DIST }" > debian/scylla-server.scylla-housekeeping-restart.service
|
||||
cp dist/common/systemd/scylla-fstrim.service debian/scylla-server.scylla-fstrim.service
|
||||
cp dist/common/systemd/node-exporter.service debian/scylla-server.node-exporter.service
|
||||
fi
|
||||
|
||||
4
dist/docker/redhat/Dockerfile
vendored
4
dist/docker/redhat/Dockerfile
vendored
@@ -26,14 +26,14 @@ ADD commandlineparser.py /commandlineparser.py
|
||||
ADD docker-entrypoint.py /docker-entrypoint.py
|
||||
|
||||
# Install Scylla:
|
||||
RUN curl http://downloads.scylladb.com/rpm/unstable/centos/master/latest/scylla.repo -o /etc/yum.repos.d/scylla.repo && \
|
||||
RUN curl http://downloads.scylladb.com/rpm/centos/scylla-2.3.repo -o /etc/yum.repos.d/scylla.repo && \
|
||||
yum -y install epel-release && \
|
||||
yum -y clean expire-cache && \
|
||||
yum -y update && \
|
||||
yum -y remove boost-thread boost-system && \
|
||||
yum -y install scylla hostname supervisor && \
|
||||
yum clean all && \
|
||||
yum -y install python34 python34-PyYAML && \
|
||||
yum -y install python36 python36-PyYAML && \
|
||||
cat /scylla_bashrc >> /etc/bashrc && \
|
||||
mkdir -p /etc/supervisor.conf.d && \
|
||||
mkdir -p /var/log/scylla && \
|
||||
|
||||
8
dist/docker/redhat/scylla-service.sh
vendored
8
dist/docker/redhat/scylla-service.sh
vendored
@@ -1,7 +1,13 @@
|
||||
#!/bin/bash
|
||||
|
||||
. /usr/lib/scylla/scylla_prepare
|
||||
/usr/lib/scylla/scylla_prepare
|
||||
|
||||
. /etc/sysconfig/scylla-server
|
||||
|
||||
export SCYLLA_HOME SCYLLA_CONF
|
||||
|
||||
for f in /etc/scylla.d/*.conf; do
|
||||
. "$f"
|
||||
done
|
||||
|
||||
exec /usr/bin/scylla $SCYLLA_ARGS $SEASTAR_IO $DEV_MODE $CPUSET $SCYLLA_DOCKER_ARGS
|
||||
|
||||
7
dist/redhat/build_rpm.sh
vendored
7
dist/redhat/build_rpm.sh
vendored
@@ -98,12 +98,19 @@ rm -f version
|
||||
|
||||
pystache dist/redhat/scylla.spec.mustache "{ \"version\": \"$SCYLLA_VERSION\", \"release\": \"$SCYLLA_RELEASE\", \"housekeeping\": $DIST }" > build/scylla.spec
|
||||
|
||||
# mock generates files owned by root, fix this up
|
||||
fix_ownership() {
|
||||
sudo chown "$(id -u):$(id -g)" -R "$@"
|
||||
}
|
||||
|
||||
if [ $JOBS -gt 0 ]; then
|
||||
RPM_JOBS_OPTS=(--define="_smp_mflags -j$JOBS")
|
||||
fi
|
||||
sudo mock --buildsrpm --root=$TARGET --resultdir=`pwd`/build/srpms --spec=build/scylla.spec --sources=build/scylla-$VERSION.tar $SRPM_OPTS "${RPM_JOBS_OPTS[@]}"
|
||||
fix_ownership build/srpms
|
||||
if [[ "$TARGET" =~ ^epel-7- ]]; then
|
||||
TARGET=scylla-$TARGET
|
||||
RPM_OPTS="$RPM_OPTS --configdir=dist/redhat/mock"
|
||||
fi
|
||||
sudo mock --rebuild --root=$TARGET --resultdir=`pwd`/build/rpms $RPM_OPTS "${RPM_JOBS_OPTS[@]}" build/srpms/scylla-$VERSION*.src.rpm
|
||||
fix_ownership build/rpms
|
||||
|
||||
7
dist/redhat/scylla.spec.mustache
vendored
7
dist/redhat/scylla.spec.mustache
vendored
@@ -56,9 +56,9 @@ License: AGPLv3
|
||||
URL: http://www.scylladb.com/
|
||||
BuildRequires: libaio-devel libstdc++-devel cryptopp-devel hwloc-devel numactl-devel libpciaccess-devel libxml2-devel zlib-devel thrift-devel yaml-cpp-devel lz4-devel snappy-devel jsoncpp-devel systemd-devel xz-devel pcre-devel elfutils-libelf-devel bzip2-devel keyutils-libs-devel xfsprogs-devel make gnutls-devel systemd-devel lksctp-tools-devel protobuf-devel protobuf-compiler systemtap-sdt-devel ninja-build cmake python ragel grep kernel-headers
|
||||
%{?fedora:BuildRequires: boost-devel antlr3-tool antlr3-C++-devel python3 gcc-c++ libasan libubsan python3-pyparsing dnf-yum python2-pystache}
|
||||
%{?rhel:BuildRequires: scylla-libstdc++73-static scylla-boost163-devel scylla-boost163-static scylla-antlr35-tool scylla-antlr35-C++-devel python34 scylla-gcc73-c++, scylla-python34-pyparsing20 yaml-cpp-static pystache python-setuptools}
|
||||
%{?rhel:BuildRequires: scylla-libstdc++73-static scylla-boost163-devel scylla-boost163-static scylla-antlr35-tool scylla-antlr35-C++-devel python36 scylla-gcc73-c++, scylla-python36-pyparsing20 yaml-cpp-static pystache python-setuptools}
|
||||
Requires: scylla-conf systemd-libs hwloc PyYAML python-urwid pciutils pyparsing python-requests curl util-linux python-setuptools pciutils python3-pyudev mdadm xfsprogs
|
||||
%{?rhel:Requires: python34 python34-PyYAML kernel >= 3.10.0-514}
|
||||
%{?rhel:Requires: python36 python36-PyYAML kernel >= 3.10.0-514}
|
||||
%{?fedora:Requires: python3 python3-PyYAML}
|
||||
Conflicts: abrt
|
||||
%ifarch x86_64
|
||||
@@ -97,7 +97,7 @@ cflags="--cflags=${defines[*]}"
|
||||
%endif
|
||||
%if 0%{?rhel}
|
||||
. /etc/profile.d/scylla.sh
|
||||
python3.4 ./configure.py %{?configure_opt} --with=scylla --with=iotune --mode=release "$cflags" --static-boost --static-yaml-cpp --compiler=/opt/scylladb/bin/g++-7.3 --python python3.4 --ldflag=-Wl,-rpath=/opt/scylladb/lib64
|
||||
python3.6 ./configure.py %{?configure_opt} --with=scylla --with=iotune --mode=release "$cflags" --static-boost --static-yaml-cpp --compiler=/opt/scylladb/bin/g++-7.3 --python python3.6 --ldflag=-Wl,-rpath=/opt/scylladb/lib64
|
||||
%endif
|
||||
ninja-build %{?_smp_mflags} build/release/scylla build/release/iotune
|
||||
|
||||
@@ -201,7 +201,6 @@ rm -rf $RPM_BUILD_ROOT
|
||||
%{_prefix}/lib/scylla/api/api-doc/*
|
||||
%{_prefix}/lib/scylla/scyllatop/*
|
||||
%{_prefix}/lib/scylla/scylla_config_get.py
|
||||
%{_prefix}/lib/scylla/scylla_lib.sh
|
||||
%{_prefix}/lib/scylla/scylla_util.py
|
||||
%if 0%{?fedora} >= 27
|
||||
%{_prefix}/lib/scylla/scylla-gdb.py
|
||||
|
||||
@@ -449,9 +449,13 @@ GCC6_CONCEPT(requires requires(StopCondition stop, ConsumeMutationFragment consu
|
||||
{ consume_mf(std::move(mf)) } -> void;
|
||||
{ consume_eos() } -> future<>;
|
||||
})
|
||||
future<> consume_mutation_fragments_until(flat_mutation_reader& r, StopCondition&& stop,
|
||||
ConsumeMutationFragment&& consume_mf, ConsumeEndOfStream&& consume_eos) {
|
||||
return do_until([stop] { return stop(); }, [&r, stop, consume_mf, consume_eos] {
|
||||
future<> consume_mutation_fragments_until(
|
||||
flat_mutation_reader& r,
|
||||
StopCondition&& stop,
|
||||
ConsumeMutationFragment&& consume_mf,
|
||||
ConsumeEndOfStream&& consume_eos,
|
||||
db::timeout_clock::time_point timeout) {
|
||||
return do_until([stop] { return stop(); }, [&r, stop, consume_mf, consume_eos, timeout] {
|
||||
while (!r.is_buffer_empty()) {
|
||||
consume_mf(r.pop_mutation_fragment());
|
||||
if (stop()) {
|
||||
@@ -461,7 +465,7 @@ future<> consume_mutation_fragments_until(flat_mutation_reader& r, StopCondition
|
||||
if (r.is_end_of_stream()) {
|
||||
return consume_eos();
|
||||
}
|
||||
return r.fill_buffer();
|
||||
return r.fill_buffer(timeout);
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
@@ -129,26 +129,8 @@ public:
|
||||
update_is_normal();
|
||||
}
|
||||
|
||||
void apply_application_state(application_state key, versioned_value&& value) {
|
||||
auto&& e = _application_state[key];
|
||||
if (e.version < value.version) {
|
||||
e = std::move(value);
|
||||
}
|
||||
update_is_normal();
|
||||
}
|
||||
|
||||
void apply_application_state(application_state key, const versioned_value& value) {
|
||||
auto&& e = _application_state[key];
|
||||
if (e.version < value.version) {
|
||||
e = value;
|
||||
}
|
||||
update_is_normal();
|
||||
}
|
||||
|
||||
void apply_application_state(const endpoint_state& es) {
|
||||
for (auto&& e : es._application_state) {
|
||||
apply_application_state(e.first, e.second);
|
||||
}
|
||||
void add_application_state(const endpoint_state& es) {
|
||||
_application_state = es._application_state;
|
||||
update_is_normal();
|
||||
}
|
||||
|
||||
|
||||
@@ -923,7 +923,7 @@ void gossiper::make_random_gossip_digest(std::vector<gossip_digest>& g_digests)
|
||||
future<> gossiper::replicate(inet_address ep, const endpoint_state& es) {
|
||||
return container().invoke_on_all([ep, es, orig = engine().cpu_id(), self = shared_from_this()] (gossiper& g) {
|
||||
if (engine().cpu_id() != orig) {
|
||||
g.endpoint_state_map[ep].apply_application_state(es);
|
||||
g.endpoint_state_map[ep].add_application_state(es);
|
||||
}
|
||||
});
|
||||
}
|
||||
@@ -932,7 +932,7 @@ future<> gossiper::replicate(inet_address ep, const std::map<application_state,
|
||||
return container().invoke_on_all([ep, &src, &changed, orig = engine().cpu_id(), self = shared_from_this()] (gossiper& g) {
|
||||
if (engine().cpu_id() != orig) {
|
||||
for (auto&& key : changed) {
|
||||
g.endpoint_state_map[ep].apply_application_state(key, src.at(key));
|
||||
g.endpoint_state_map[ep].add_application_state(key, src.at(key));
|
||||
}
|
||||
}
|
||||
});
|
||||
@@ -941,7 +941,7 @@ future<> gossiper::replicate(inet_address ep, const std::map<application_state,
|
||||
future<> gossiper::replicate(inet_address ep, application_state key, const versioned_value& value) {
|
||||
return container().invoke_on_all([ep, key, &value, orig = engine().cpu_id(), self = shared_from_this()] (gossiper& g) {
|
||||
if (engine().cpu_id() != orig) {
|
||||
g.endpoint_state_map[ep].apply_application_state(key, value);
|
||||
g.endpoint_state_map[ep].add_application_state(key, value);
|
||||
}
|
||||
});
|
||||
}
|
||||
@@ -1168,11 +1168,13 @@ stdx::optional<endpoint_state> gossiper::get_endpoint_state_for_endpoint(inet_ad
|
||||
}
|
||||
}
|
||||
|
||||
void gossiper::reset_endpoint_state_map() {
|
||||
endpoint_state_map.clear();
|
||||
future<> gossiper::reset_endpoint_state_map() {
|
||||
_unreachable_endpoints.clear();
|
||||
_live_endpoints.clear();
|
||||
_live_endpoints_just_added.clear();
|
||||
return container().invoke_on_all([] (gossiper& g) {
|
||||
g.endpoint_state_map.clear();
|
||||
});
|
||||
}
|
||||
|
||||
std::unordered_map<inet_address, endpoint_state>& gms::gossiper::get_endpoint_states() {
|
||||
@@ -1655,6 +1657,7 @@ void gossiper::maybe_initialize_local_state(int generation_nbr) {
|
||||
}
|
||||
}
|
||||
|
||||
// Runs inside seastar::async context
|
||||
void gossiper::add_saved_endpoint(inet_address ep) {
|
||||
if (ep == get_broadcast_address()) {
|
||||
logger.debug("Attempt to add self as saved endpoint");
|
||||
@@ -1680,6 +1683,7 @@ void gossiper::add_saved_endpoint(inet_address ep) {
|
||||
}
|
||||
ep_state.mark_dead();
|
||||
endpoint_state_map[ep] = ep_state;
|
||||
replicate(ep, ep_state).get();
|
||||
_unreachable_endpoints[ep] = now();
|
||||
logger.trace("Adding saved endpoint {} {}", ep, ep_state.get_heart_beat_state().get_generation());
|
||||
}
|
||||
@@ -1915,6 +1919,7 @@ void gossiper::mark_as_shutdown(const inet_address& endpoint) {
|
||||
auto& ep_state = *es;
|
||||
ep_state.add_application_state(application_state::STATUS, storage_service_value_factory().shutdown(true));
|
||||
ep_state.get_heart_beat_state().force_highest_possible_version_unsafe();
|
||||
replicate(endpoint, ep_state).get();
|
||||
mark_dead(endpoint, ep_state);
|
||||
get_local_failure_detector().force_conviction(endpoint);
|
||||
}
|
||||
|
||||
@@ -418,7 +418,7 @@ public:
|
||||
stdx::optional<endpoint_state> get_endpoint_state_for_endpoint(inet_address ep) const;
|
||||
|
||||
// removes ALL endpoint states; should only be called after shadow gossip
|
||||
void reset_endpoint_state_map();
|
||||
future<> reset_endpoint_state_map();
|
||||
|
||||
std::unordered_map<inet_address, endpoint_state>& get_endpoint_states();
|
||||
|
||||
|
||||
36
imr/alloc.hh
36
imr/alloc.hh
@@ -84,6 +84,8 @@ template<typename Structure, typename CtxFactory>
|
||||
GCC6_CONCEPT(requires ContextFactory<CtxFactory>)
|
||||
class lsa_migrate_fn final : public migrate_fn_type, CtxFactory {
|
||||
public:
|
||||
using structure = Structure;
|
||||
|
||||
explicit lsa_migrate_fn(CtxFactory context_factory)
|
||||
: migrate_fn_type(1)
|
||||
, CtxFactory(std::move(context_factory))
|
||||
@@ -201,8 +203,21 @@ public:
|
||||
/// arguments are passed to `T::size_when_serialized`.
|
||||
///
|
||||
/// \return null pointer of type `uint8_t*`.
|
||||
template<typename T, typename MigrateFn, typename... Args>
|
||||
uint8_t* allocate(MigrateFn* migrate_fn, Args&&... args) noexcept {
|
||||
static_assert(std::is_same_v<typename MigrateFn::structure, T>);
|
||||
return do_allocate<T>(migrate_fn, std::forward<Args>(args)...);
|
||||
}
|
||||
|
||||
template<typename T, typename MigrateFn, typename... Args>
|
||||
auto allocate_nested(MigrateFn* migrate_fn, Args&&... args) noexcept {
|
||||
static_assert(std::is_same_v<typename MigrateFn::structure, T>);
|
||||
return do_allocate_nested<T>(migrate_fn, std::forward<Args>(args)...);
|
||||
}
|
||||
|
||||
private:
|
||||
template<typename T, typename... Args>
|
||||
uint8_t* allocate(migrate_fn_type* migrate_fn, Args&& ... args) noexcept {
|
||||
uint8_t* do_allocate(migrate_fn_type* migrate_fn, Args&&... args) noexcept {
|
||||
auto size = T::size_when_serialized(std::forward<Args>(args)...);
|
||||
_parent.request(size, migrate_fn);
|
||||
|
||||
@@ -216,7 +231,7 @@ public:
|
||||
}
|
||||
|
||||
template<typename T, typename... Args>
|
||||
auto allocate_nested(migrate_fn_type* migrate_fn, Args&& ... args) noexcept {
|
||||
auto do_allocate_nested(migrate_fn_type* migrate_fn, Args&& ... args) noexcept {
|
||||
auto n = _parent.request(0, migrate_fn);
|
||||
return T::get_sizer(continuation(_parent, n),
|
||||
std::forward<Args>(args)...);
|
||||
@@ -244,15 +259,28 @@ public:
|
||||
/// to the buffer requested in the sizing phase. Arguments are passed
|
||||
/// to `T::serialize`.
|
||||
/// \return pointer to the IMR object
|
||||
template<typename T, typename MigrateFn, typename... Args>
|
||||
uint8_t* allocate(MigrateFn* migrate_fn, Args&&... args) noexcept {
|
||||
static_assert(std::is_same_v<typename MigrateFn::structure, T>);
|
||||
return do_allocate<T>(migrate_fn, std::forward<Args>(args)...);
|
||||
}
|
||||
|
||||
template<typename T, typename MigrateFn, typename... Args>
|
||||
auto allocate_nested(MigrateFn* migrate_fn, Args&&... args) noexcept {
|
||||
static_assert(std::is_same_v<typename MigrateFn::structure, T>);
|
||||
return do_allocate_nested<T>(migrate_fn, std::forward<Args>(args)...);
|
||||
}
|
||||
|
||||
private:
|
||||
template<typename T, typename... Args>
|
||||
uint8_t* allocate(migrate_fn_type* migrate_fn, Args&& ... args) noexcept {
|
||||
uint8_t* do_allocate(migrate_fn_type* migrate_fn, Args&&... args) noexcept {
|
||||
auto ptr = _parent.next_object();
|
||||
T::serialize(ptr, std::forward<Args>(args)...);
|
||||
return ptr;
|
||||
}
|
||||
|
||||
template<typename T, typename... Args>
|
||||
auto allocate_nested(migrate_fn_type*, Args&& ... args) noexcept {
|
||||
auto do_allocate_nested(migrate_fn_type*, Args&& ... args) noexcept {
|
||||
auto ptr = _parent.next_object();
|
||||
return T::get_serializer(ptr,
|
||||
continuation(ptr),
|
||||
|
||||
25
imr/utils.hh
25
imr/utils.hh
@@ -61,8 +61,12 @@ private:
|
||||
public:
|
||||
object_context(const uint8_t*, State... state) : _state { state... } { }
|
||||
template<typename Tag, typename... Args>
|
||||
Context context_for(const uint8_t* ptr, Args&&... args) const noexcept {
|
||||
return create(ptr, std::index_sequence_for<State...>());
|
||||
auto context_for(const uint8_t* ptr, Args&&... args) const noexcept {
|
||||
if constexpr (std::is_same_v<Tag, basic_object::tags::back_pointer>) {
|
||||
return no_context_t();
|
||||
} else {
|
||||
return create(ptr, std::index_sequence_for<State...>());
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
@@ -158,13 +162,22 @@ public:
|
||||
}
|
||||
|
||||
/// Create an IMR objects
|
||||
template<typename Writer>
|
||||
template<typename Writer, typename MigrateFn>
|
||||
GCC6_CONCEPT(requires WriterAllocator<Writer, Structure>)
|
||||
static object make(Writer&& object_writer,
|
||||
allocation_strategy::migrate_fn migrate = &imr::alloc::default_lsa_migrate_fn<structure>::migrate_fn) {
|
||||
MigrateFn* migrate = &imr::alloc::default_lsa_migrate_fn<structure>::migrate_fn) {
|
||||
static_assert(std::is_same_v<typename MigrateFn::structure, structure>);
|
||||
return do_make(std::forward<Writer>(object_writer), migrate);
|
||||
}
|
||||
private:
|
||||
template<typename Writer>
|
||||
GCC6_CONCEPT(requires WriterAllocator<Writer, Structure>)
|
||||
static object do_make(Writer&& object_writer, allocation_strategy::migrate_fn migrate) {
|
||||
struct alloc_deleter {
|
||||
size_t _size;
|
||||
|
||||
void operator()(uint8_t* ptr) {
|
||||
current_allocator().free(ptr);
|
||||
current_allocator().free(ptr, _size);
|
||||
}
|
||||
};
|
||||
using alloc_unique_ptr = std::unique_ptr<uint8_t[], alloc_deleter>;
|
||||
@@ -176,7 +189,7 @@ public:
|
||||
auto& alloc = current_allocator();
|
||||
alloc::object_allocator allocator(alloc);
|
||||
auto obj_size = structure::size_when_serialized(writer, allocator.get_sizer());
|
||||
auto ptr = alloc_unique_ptr(static_cast<uint8_t*>(alloc.alloc(migrate, obj_size, 1)));
|
||||
auto ptr = alloc_unique_ptr(static_cast<uint8_t*>(alloc.alloc(migrate, obj_size, 1)), alloc_deleter { obj_size });
|
||||
allocator.allocate_all();
|
||||
structure::serialize(ptr.get(), writer, allocator.get_serializer());
|
||||
return object(ptr.release());
|
||||
|
||||
@@ -42,5 +42,5 @@ elif [ "$ID" = "fedora" ]; then
|
||||
yum install -y yaml-cpp-devel thrift-devel antlr3-tool antlr3-C++-devel jsoncpp-devel snappy-devel
|
||||
elif [ "$ID" = "centos" ]; then
|
||||
yum install -y yaml-cpp-devel thrift-devel scylla-antlr35-tool scylla-antlr35-C++-devel jsoncpp-devel snappy-devel scylla-boost163-static scylla-python34-pyparsing20 systemd-devel
|
||||
echo -e "Configure example:\n\tpython3.4 ./configure.py --enable-dpdk --mode=release --static-boost --compiler=/opt/scylladb/bin/g++-7.3 --python python3.4 --ldflag=-Wl,-rpath=/opt/scylladb/lib64 --cflags=-I/opt/scylladb/include --with-antlr3=/opt/scylladb/bin/antlr3"
|
||||
echo -e "Configure example:\n\tpython3.6 ./configure.py --enable-dpdk --mode=release --static-boost --compiler=/opt/scylladb/bin/g++-7.3 --python python3.6 --ldflag=-Wl,-rpath=/opt/scylladb/lib64 --cflags=-I/opt/scylladb/include --with-antlr3=/opt/scylladb/bin/antlr3"
|
||||
fi
|
||||
|
||||
80
json.cc
Normal file
80
json.cc
Normal file
@@ -0,0 +1,80 @@
|
||||
/*
|
||||
* Copyright (C) 2018 ScyllaDB
|
||||
*/
|
||||
|
||||
/*
|
||||
* This file is part of Scylla.
|
||||
*
|
||||
* Scylla is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Scylla is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with Scylla. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include "json.hh"
|
||||
|
||||
namespace seastar {
|
||||
namespace json {
|
||||
|
||||
static inline bool is_control_char(char c) {
|
||||
return c >= 0 && c <= 0x1F;
|
||||
}
|
||||
|
||||
static inline bool needs_escaping(const sstring& s) {
|
||||
return std::any_of(s.begin(), s.end(), [](char c) {return is_control_char(c) || c == '"' || c == '\\';});
|
||||
}
|
||||
|
||||
sstring value_to_quoted_string(const sstring& value) {
|
||||
if (!needs_escaping(value)) {
|
||||
return sprint("\"%s\"", value);
|
||||
}
|
||||
std::ostringstream oss;
|
||||
oss << std::hex << std::uppercase << std::setfill('0');
|
||||
oss.put('"');
|
||||
for (char c : value) {
|
||||
switch (c) {
|
||||
case '"':
|
||||
oss.put('\\').put('"');
|
||||
break;
|
||||
case '\\':
|
||||
oss.put('\\').put('\\');
|
||||
break;
|
||||
case '\b':
|
||||
oss.put('\\').put('b');
|
||||
break;
|
||||
case '\f':
|
||||
oss.put('\\').put('f');
|
||||
break;
|
||||
case '\n':
|
||||
oss.put('\\').put('n');
|
||||
break;
|
||||
case '\r':
|
||||
oss.put('\\').put('r');
|
||||
break;
|
||||
case '\t':
|
||||
oss.put('\\').put('t');
|
||||
break;
|
||||
default:
|
||||
if (is_control_char(c)) {
|
||||
oss.put('\\').put('u') << std::setw(4) << static_cast<int>(c);
|
||||
} else {
|
||||
oss.put(c);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
oss.put('"');
|
||||
return oss.str();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
2
json.hh
2
json.hh
@@ -95,6 +95,8 @@ inline std::map<sstring, sstring> to_map(const sstring& raw) {
|
||||
return to_map(raw, std::map<sstring, sstring>());
|
||||
}
|
||||
|
||||
sstring value_to_quoted_string(const sstring& value);
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
2
keys.cc
2
keys.cc
@@ -113,4 +113,4 @@ int32_t weight(bound_kind k) {
|
||||
abort();
|
||||
}
|
||||
|
||||
const thread_local clustering_key_prefix bound_view::_empty_prefix = clustering_key::make_empty();
|
||||
const thread_local clustering_key_prefix bound_view::empty_prefix = clustering_key::make_empty();
|
||||
|
||||
4
keys.hh
4
keys.hh
@@ -748,6 +748,10 @@ public:
|
||||
static const compound& get_compound_type(const schema& s) {
|
||||
return s.clustering_key_prefix_type();
|
||||
}
|
||||
|
||||
static clustering_key_prefix_view make_empty() {
|
||||
return { bytes_view() };
|
||||
}
|
||||
};
|
||||
|
||||
class clustering_key_prefix : public prefix_compound_wrapper<clustering_key_prefix, clustering_key_prefix_view, clustering_key> {
|
||||
|
||||
@@ -119,9 +119,17 @@ insert_token_range_to_sorted_container_while_unwrapping(
|
||||
const dht::token& tok,
|
||||
dht::token_range_vector& ret) {
|
||||
if (prev_tok < tok) {
|
||||
ret.emplace_back(
|
||||
dht::token_range::bound(prev_tok, false),
|
||||
dht::token_range::bound(tok, true));
|
||||
auto pos = ret.end();
|
||||
if (!ret.empty() && !std::prev(pos)->end()) {
|
||||
// We inserted a wrapped range (a, b] previously as
|
||||
// (-inf, b], (a, +inf). So now we insert in the next-to-last
|
||||
// position to keep the last range (a, +inf) at the end.
|
||||
pos = std::prev(pos);
|
||||
}
|
||||
ret.insert(pos,
|
||||
dht::token_range{
|
||||
dht::token_range::bound(prev_tok, false),
|
||||
dht::token_range::bound(tok, true)});
|
||||
} else {
|
||||
ret.emplace_back(
|
||||
dht::token_range::bound(prev_tok, false),
|
||||
|
||||
24
main.cc
24
main.cc
@@ -389,13 +389,7 @@ int main(int ac, char** av) {
|
||||
sstring broadcast_address = cfg->broadcast_address();
|
||||
sstring broadcast_rpc_address = cfg->broadcast_rpc_address();
|
||||
stdx::optional<std::vector<sstring>> hinted_handoff_enabled = cfg->experimental() ? parse_hinted_handoff_enabled(cfg->hinted_handoff_enabled()) : stdx::nullopt;
|
||||
auto prom_addr = [&] {
|
||||
try {
|
||||
return seastar::net::dns::get_host_by_name(cfg->prometheus_address()).get0();
|
||||
} catch (...) {
|
||||
std::throw_with_nested(std::runtime_error(fmt::format("Unable to resolve prometheus_address {}", cfg->prometheus_address())));
|
||||
}
|
||||
}();
|
||||
auto prom_addr = seastar::net::dns::get_host_by_name(cfg->prometheus_address()).get0();
|
||||
supervisor::notify("starting prometheus API server");
|
||||
uint16_t pport = cfg->prometheus_port();
|
||||
if (pport) {
|
||||
@@ -473,13 +467,7 @@ int main(int ac, char** av) {
|
||||
// #293 - do not stop anything
|
||||
// engine().at_exit([] { return i_endpoint_snitch::stop_snitch(); });
|
||||
supervisor::notify("determining DNS name");
|
||||
auto e = [&] {
|
||||
try {
|
||||
return seastar::net::dns::get_host_by_name(api_address).get0();
|
||||
} catch (...) {
|
||||
std::throw_with_nested(std::runtime_error(fmt::format("Unable to resolve api_address {}", api_address)));
|
||||
}
|
||||
}();
|
||||
auto e = seastar::net::dns::get_host_by_name(api_address).get0();
|
||||
supervisor::notify("starting API server");
|
||||
auto ip = e.addr_list.front();
|
||||
ctx.http_server.start("API").get();
|
||||
@@ -502,7 +490,6 @@ int main(int ac, char** av) {
|
||||
}
|
||||
};
|
||||
dbcfg.compaction_scheduling_group = make_sched_group("compaction", 1000);
|
||||
dbcfg.memory_compaction_scheduling_group = make_sched_group("mem_compaction", 1000);
|
||||
dbcfg.streaming_scheduling_group = make_sched_group("streaming", 200);
|
||||
dbcfg.statement_scheduling_group = make_sched_group("statement", 1000);
|
||||
dbcfg.memtable_scheduling_group = make_sched_group("memtable", 1000);
|
||||
@@ -776,8 +763,11 @@ int main(int ac, char** av) {
|
||||
return service::get_local_storage_service().drain_on_shutdown();
|
||||
});
|
||||
|
||||
engine().at_exit([] {
|
||||
return view_builder.stop();
|
||||
engine().at_exit([cfg] {
|
||||
if (cfg->view_building()) {
|
||||
return view_builder.stop();
|
||||
}
|
||||
return make_ready_future<>();
|
||||
});
|
||||
|
||||
engine().at_exit([&db] {
|
||||
|
||||
17
memtable.cc
17
memtable.cc
@@ -27,11 +27,11 @@
|
||||
#include "schema_upgrader.hh"
|
||||
#include "partition_builder.hh"
|
||||
|
||||
memtable::memtable(schema_ptr schema, dirty_memory_manager& dmm, memtable_list* memtable_list,
|
||||
seastar::scheduling_group compaction_scheduling_group)
|
||||
memtable::memtable(schema_ptr schema, dirty_memory_manager& dmm, memtable_list* memtable_list)
|
||||
: logalloc::region(dmm.region_group())
|
||||
, _dirty_mgr(dmm)
|
||||
, _cleaner(*this, no_cache_tracker, compaction_scheduling_group)
|
||||
, _memtable_cleaner(*this, no_cache_tracker)
|
||||
, _cleaner(&_memtable_cleaner)
|
||||
, _memtable_list(memtable_list)
|
||||
, _schema(std::move(schema))
|
||||
, partitions(memtable_entry::compare(_schema)) {
|
||||
@@ -56,9 +56,10 @@ void memtable::clear() noexcept {
|
||||
auto dirty_before = dirty_size();
|
||||
with_allocator(allocator(), [this] {
|
||||
partitions.clear_and_dispose([this] (memtable_entry* e) {
|
||||
e->partition().evict(_cleaner);
|
||||
e->partition().evict(_memtable_cleaner);
|
||||
current_deleter<memtable_entry>()(e);
|
||||
});
|
||||
_memtable_cleaner.clear();
|
||||
});
|
||||
remove_flushed_memory(dirty_before - dirty_size());
|
||||
}
|
||||
@@ -321,7 +322,7 @@ public:
|
||||
_delegate = delegate_reader(*_delegate_range, _slice, _pc, streamed_mutation::forwarding::no, _fwd_mr);
|
||||
} else {
|
||||
auto key_and_snp = read_section()(region(), [&] {
|
||||
return with_linearized_managed_bytes([&] () -> std::optional<std::pair<dht::decorated_key, partition_snapshot_ptr>> {
|
||||
return with_linearized_managed_bytes([&] () -> std::optional<std::pair<dht::decorated_key, lw_shared_ptr<partition_snapshot>>> {
|
||||
memtable_entry *e = fetch_entry();
|
||||
if (!e) {
|
||||
return { };
|
||||
@@ -483,7 +484,7 @@ private:
|
||||
void get_next_partition() {
|
||||
uint64_t component_size = 0;
|
||||
auto key_and_snp = read_section()(region(), [&] {
|
||||
return with_linearized_managed_bytes([&] () -> std::optional<std::pair<dht::decorated_key, partition_snapshot_ptr>> {
|
||||
return with_linearized_managed_bytes([&] () -> std::optional<std::pair<dht::decorated_key, lw_shared_ptr<partition_snapshot>>> {
|
||||
memtable_entry* e = fetch_entry();
|
||||
if (e) {
|
||||
auto dk = e->key();
|
||||
@@ -549,7 +550,7 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
partition_snapshot_ptr memtable_entry::snapshot(memtable& mtbl) {
|
||||
lw_shared_ptr<partition_snapshot> memtable_entry::snapshot(memtable& mtbl) {
|
||||
return _pe.read(mtbl.region(), mtbl.cleaner(), _schema, no_cache_tracker);
|
||||
}
|
||||
|
||||
@@ -563,7 +564,7 @@ memtable::make_flat_reader(schema_ptr s,
|
||||
mutation_reader::forwarding fwd_mr) {
|
||||
if (query::is_single_partition(range)) {
|
||||
const query::ring_position& pos = range.start()->value();
|
||||
auto snp = _read_section(*this, [&] () -> partition_snapshot_ptr {
|
||||
auto snp = _read_section(*this, [&] () -> lw_shared_ptr<partition_snapshot> {
|
||||
managed_bytes::linearization_context_guard lcg;
|
||||
auto i = partitions.find(pos, memtable_entry::compare(_schema));
|
||||
if (i != partitions.end()) {
|
||||
|
||||
10
memtable.hh
10
memtable.hh
@@ -66,7 +66,7 @@ public:
|
||||
partition_entry& partition() { return _pe; }
|
||||
const schema_ptr& schema() const { return _schema; }
|
||||
schema_ptr& schema() { return _schema; }
|
||||
partition_snapshot_ptr snapshot(memtable& mtbl);
|
||||
lw_shared_ptr<partition_snapshot> snapshot(memtable& mtbl);
|
||||
|
||||
size_t external_memory_usage_without_rows() const {
|
||||
return _key.key().external_memory_usage();
|
||||
@@ -125,7 +125,8 @@ public:
|
||||
bi::compare<memtable_entry::compare>>;
|
||||
private:
|
||||
dirty_memory_manager& _dirty_mgr;
|
||||
mutation_cleaner _cleaner;
|
||||
mutation_cleaner _memtable_cleaner;
|
||||
mutation_cleaner* _cleaner; // will switch to cache's cleaner after memtable is moved to cache.
|
||||
memtable_list *_memtable_list;
|
||||
schema_ptr _schema;
|
||||
logalloc::allocating_section _read_section;
|
||||
@@ -253,8 +254,7 @@ private:
|
||||
void clear() noexcept;
|
||||
uint64_t dirty_size() const;
|
||||
public:
|
||||
explicit memtable(schema_ptr schema, dirty_memory_manager&, memtable_list *memtable_list = nullptr,
|
||||
seastar::scheduling_group compaction_scheduling_group = seastar::current_scheduling_group());
|
||||
explicit memtable(schema_ptr schema, dirty_memory_manager&, memtable_list *memtable_list = nullptr);
|
||||
// Used for testing that want to control the flush process.
|
||||
explicit memtable(schema_ptr schema);
|
||||
~memtable();
|
||||
@@ -294,7 +294,7 @@ public:
|
||||
}
|
||||
|
||||
mutation_cleaner& cleaner() {
|
||||
return _cleaner;
|
||||
return *_cleaner;
|
||||
}
|
||||
public:
|
||||
memtable_list* get_memtable_list() {
|
||||
|
||||
@@ -262,11 +262,12 @@ void messaging_service::start_listen() {
|
||||
// FIXME: we don't set so.tcp_nodelay, because we can't tell at this point whether the connection will come from a
|
||||
// local or remote datacenter, and whether or not the connection will be used for gossip. We can fix
|
||||
// the first by wrapping its server_socket, but not the second.
|
||||
auto limits = rpc_resource_limits(_mcfg.rpc_memory_limit);
|
||||
if (!_server[0]) {
|
||||
auto listen = [&] (const gms::inet_address& a) {
|
||||
auto addr = ipv4_addr{a.raw_addr(), _port};
|
||||
return std::unique_ptr<rpc_protocol_server_wrapper>(new rpc_protocol_server_wrapper(*_rpc,
|
||||
so, addr, rpc_resource_limits(_mcfg.rpc_memory_limit)));
|
||||
so, addr, limits));
|
||||
};
|
||||
_server[0] = listen(_listen_address);
|
||||
if (listen_to_bc) {
|
||||
@@ -277,7 +278,7 @@ void messaging_service::start_listen() {
|
||||
if (!_server_tls[0]) {
|
||||
auto listen = [&] (const gms::inet_address& a) {
|
||||
return std::unique_ptr<rpc_protocol_server_wrapper>(
|
||||
[this, &so, &a] () -> std::unique_ptr<rpc_protocol_server_wrapper>{
|
||||
[this, &so, &a, limits] () -> std::unique_ptr<rpc_protocol_server_wrapper>{
|
||||
if (_encrypt_what == encrypt_what::none) {
|
||||
return nullptr;
|
||||
}
|
||||
@@ -285,7 +286,7 @@ void messaging_service::start_listen() {
|
||||
lo.reuse_address = true;
|
||||
auto addr = make_ipv4_address(ipv4_addr{a.raw_addr(), _ssl_port});
|
||||
return std::make_unique<rpc_protocol_server_wrapper>(*_rpc,
|
||||
so, seastar::tls::listen(_credentials, addr, lo));
|
||||
so, seastar::tls::listen(_credentials, addr, lo), limits);
|
||||
}());
|
||||
};
|
||||
_server_tls[0] = listen(_listen_address);
|
||||
|
||||
@@ -1,226 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2018 ScyllaDB
|
||||
*/
|
||||
|
||||
/*
|
||||
* This file is part of Scylla.
|
||||
*
|
||||
* Scylla is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Scylla is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with Scylla. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include "multishard_writer.hh"
|
||||
#include "mutation_reader.hh"
|
||||
#include "mutation_fragment.hh"
|
||||
#include "schema_registry.hh"
|
||||
#include <vector>
|
||||
#include <seastar/core/future-util.hh>
|
||||
#include <seastar/core/queue.hh>
|
||||
|
||||
class queue_reader final : public flat_mutation_reader::impl {
|
||||
seastar::queue<mutation_fragment_opt>& _mq;
|
||||
public:
|
||||
queue_reader(schema_ptr s, seastar::queue<mutation_fragment_opt>& mq)
|
||||
: impl(std::move(s))
|
||||
, _mq(mq) {
|
||||
}
|
||||
virtual future<> fill_buffer(db::timeout_clock::time_point) override {
|
||||
return do_until([this] { return is_end_of_stream() || is_buffer_full(); }, [this] {
|
||||
return _mq.pop_eventually().then([this] (mutation_fragment_opt mopt) {
|
||||
if (!mopt) {
|
||||
_end_of_stream = true;
|
||||
} else {
|
||||
push_mutation_fragment(std::move(*mopt));
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
virtual void next_partition() override {
|
||||
throw std::bad_function_call();
|
||||
}
|
||||
virtual future<> fast_forward_to(const dht::partition_range&, db::timeout_clock::time_point) override {
|
||||
throw std::bad_function_call();
|
||||
}
|
||||
virtual future<> fast_forward_to(position_range, db::timeout_clock::time_point) override {
|
||||
throw std::bad_function_call();
|
||||
}
|
||||
};
|
||||
|
||||
class shard_writer {
|
||||
private:
|
||||
schema_ptr _s;
|
||||
flat_mutation_reader _reader;
|
||||
std::function<future<> (flat_mutation_reader reader)> _consumer;
|
||||
public:
|
||||
shard_writer(schema_ptr s,
|
||||
flat_mutation_reader reader,
|
||||
std::function<future<> (flat_mutation_reader reader)> consumer);
|
||||
future<> consume();
|
||||
};
|
||||
|
||||
// The multishard_writer class gets mutation_fragments generated from
|
||||
// flat_mutation_reader and consumes the mutation_fragments with
|
||||
// multishard_writer::_consumer. If the mutation_fragment does not belong to
|
||||
// the shard multishard_writer is on, it will forward the mutation_fragment to
|
||||
// the correct shard. Future returned by multishard_writer() becomes
|
||||
// ready when all the mutation_fragments are consumed.
|
||||
class multishard_writer {
|
||||
private:
|
||||
schema_ptr _s;
|
||||
dht::i_partitioner& _partitioner;
|
||||
std::vector<foreign_ptr<std::unique_ptr<shard_writer>>> _shard_writers;
|
||||
std::vector<future<>> _pending_consumers;
|
||||
std::vector<seastar::queue<mutation_fragment_opt>> _queues;
|
||||
unsigned _current_shard = -1;
|
||||
uint64_t _consumed_partitions = 0;
|
||||
flat_mutation_reader _producer;
|
||||
std::function<future<> (flat_mutation_reader)> _consumer;
|
||||
private:
|
||||
unsigned shard_for_mf(const mutation_fragment& mf) {
|
||||
return _partitioner.shard_of(mf.as_partition_start().key().token());
|
||||
}
|
||||
future<> make_shard_writer(unsigned shard);
|
||||
future<stop_iteration> handle_mutation_fragment(mutation_fragment mf);
|
||||
future<stop_iteration> handle_end_of_stream();
|
||||
future<> consume(unsigned shard);
|
||||
future<> wait_pending_consumers();
|
||||
future<> distribute_mutation_fragments();
|
||||
public:
|
||||
multishard_writer(
|
||||
schema_ptr s,
|
||||
dht::i_partitioner& partitioner,
|
||||
flat_mutation_reader producer,
|
||||
std::function<future<> (flat_mutation_reader)> consumer);
|
||||
future<uint64_t> operator()();
|
||||
};
|
||||
|
||||
shard_writer::shard_writer(schema_ptr s,
|
||||
flat_mutation_reader reader,
|
||||
std::function<future<> (flat_mutation_reader reader)> consumer)
|
||||
: _s(s)
|
||||
, _reader(std::move(reader))
|
||||
, _consumer(std::move(consumer)) {
|
||||
}
|
||||
|
||||
future<> shard_writer::consume() {
|
||||
return _reader.peek().then([this] (mutation_fragment* mf_ptr) {
|
||||
if (mf_ptr) {
|
||||
return _consumer(std::move(_reader));
|
||||
}
|
||||
return make_ready_future<>();
|
||||
});
|
||||
}
|
||||
|
||||
multishard_writer::multishard_writer(
|
||||
schema_ptr s,
|
||||
dht::i_partitioner& partitioner,
|
||||
flat_mutation_reader producer,
|
||||
std::function<future<> (flat_mutation_reader)> consumer)
|
||||
: _s(std::move(s))
|
||||
, _partitioner(partitioner)
|
||||
, _producer(std::move(producer))
|
||||
, _consumer(std::move(consumer)) {
|
||||
_shard_writers.resize(_partitioner.shard_count());
|
||||
_queues.reserve(_partitioner.shard_count());
|
||||
for (unsigned shard = 0; shard < _partitioner.shard_count(); shard++) {
|
||||
_queues.push_back(seastar::queue<mutation_fragment_opt>{2});
|
||||
}
|
||||
}
|
||||
|
||||
future<> multishard_writer::make_shard_writer(unsigned shard) {
|
||||
auto this_shard_reader = make_foreign(std::make_unique<flat_mutation_reader>(make_flat_mutation_reader<queue_reader>(_s, _queues[shard])));
|
||||
return smp::submit_to(shard, [gs = global_schema_ptr(_s),
|
||||
consumer = _consumer,
|
||||
reader = std::move(this_shard_reader)] () mutable {
|
||||
auto this_shard_reader = make_foreign_reader(gs.get(), std::move(reader));
|
||||
return make_foreign(std::make_unique<shard_writer>(gs.get(), std::move(this_shard_reader), consumer));
|
||||
}).then([this, shard] (foreign_ptr<std::unique_ptr<shard_writer>> writer) {
|
||||
_shard_writers[shard] = std::move(writer);
|
||||
_pending_consumers.push_back(consume(shard));
|
||||
});
|
||||
}
|
||||
|
||||
future<stop_iteration> multishard_writer::handle_mutation_fragment(mutation_fragment mf) {
|
||||
auto f = make_ready_future<>();
|
||||
if (mf.is_partition_start()) {
|
||||
_consumed_partitions++;
|
||||
if (unsigned shard = shard_for_mf(mf); shard != _current_shard) {
|
||||
_current_shard = shard;
|
||||
if (!bool(_shard_writers[shard])) {
|
||||
f = make_shard_writer(shard);
|
||||
}
|
||||
}
|
||||
}
|
||||
return f.then([this, mf = std::move(mf)] () mutable {
|
||||
assert(_current_shard != -1u);
|
||||
return _queues[_current_shard].push_eventually(mutation_fragment_opt(std::move(mf)));
|
||||
}).then([] {
|
||||
return stop_iteration::no;
|
||||
});
|
||||
}
|
||||
|
||||
future<stop_iteration> multishard_writer::handle_end_of_stream() {
|
||||
return parallel_for_each(boost::irange(0u, _partitioner.shard_count()), [this] (unsigned shard) {
|
||||
if (bool(_shard_writers[shard])) {
|
||||
return _queues[shard].push_eventually(mutation_fragment_opt());
|
||||
} else {
|
||||
return make_ready_future<>();
|
||||
}
|
||||
}).then([] {
|
||||
return stop_iteration::yes;
|
||||
});
|
||||
}
|
||||
|
||||
future<> multishard_writer::consume(unsigned shard) {
|
||||
return smp::submit_to(shard, [writer = _shard_writers[shard].get()] () mutable {
|
||||
return writer->consume();
|
||||
}).handle_exception([this] (std::exception_ptr ep) {
|
||||
for (auto& q : _queues) {
|
||||
q.abort(ep);
|
||||
}
|
||||
return make_exception_future<>(std::move(ep));
|
||||
});
|
||||
}
|
||||
|
||||
future<> multishard_writer::wait_pending_consumers() {
|
||||
return seastar::when_all_succeed(_pending_consumers.begin(), _pending_consumers.end());
|
||||
}
|
||||
|
||||
future<> multishard_writer::distribute_mutation_fragments() {
|
||||
return repeat([this] () mutable {
|
||||
return _producer().then([this] (mutation_fragment_opt mf_opt) mutable {
|
||||
if (mf_opt) {
|
||||
return handle_mutation_fragment(std::move(*mf_opt));
|
||||
} else {
|
||||
return handle_end_of_stream();
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
future<uint64_t> multishard_writer::operator()() {
|
||||
return distribute_mutation_fragments().finally([this] {
|
||||
return wait_pending_consumers();
|
||||
}).then([this] {
|
||||
return _consumed_partitions;
|
||||
});
|
||||
}
|
||||
|
||||
future<uint64_t> distribute_reader_and_consume_on_shards(schema_ptr s,
|
||||
dht::i_partitioner& partitioner,
|
||||
flat_mutation_reader producer,
|
||||
std::function<future<> (flat_mutation_reader)> consumer) {
|
||||
return do_with(multishard_writer(std::move(s), partitioner, std::move(producer), std::move(consumer)), [] (multishard_writer& writer) {
|
||||
return writer();
|
||||
});
|
||||
}
|
||||
@@ -1,35 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2018 ScyllaDB
|
||||
*/
|
||||
|
||||
/*
|
||||
* This file is part of Scylla.
|
||||
*
|
||||
* Scylla is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Scylla is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with Scylla. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "schema.hh"
|
||||
#include "flat_mutation_reader.hh"
|
||||
#include "dht/i_partitioner.hh"
|
||||
|
||||
// Helper to use multishard_writer to distribute mutation_fragments from the
|
||||
// producer to the correct shard and consume with the consumer.
|
||||
// It returns number of partitions consumed.
|
||||
future<uint64_t> distribute_reader_and_consume_on_shards(schema_ptr s,
|
||||
dht::i_partitioner& partitioner,
|
||||
flat_mutation_reader producer,
|
||||
std::function<future<> (flat_mutation_reader)> consumer);
|
||||
|
||||
@@ -26,76 +26,6 @@
|
||||
|
||||
#include "utils/logalloc.hh"
|
||||
|
||||
class mutation_cleaner_impl final {
|
||||
using snapshot_list = boost::intrusive::slist<partition_snapshot,
|
||||
boost::intrusive::member_hook<partition_snapshot, boost::intrusive::slist_member_hook<>, &partition_snapshot::_cleaner_hook>>;
|
||||
struct worker {
|
||||
condition_variable cv;
|
||||
snapshot_list snapshots;
|
||||
logalloc::allocating_section alloc_section;
|
||||
bool done = false; // true means the worker was abandoned and cannot access the mutation_cleaner_impl instance.
|
||||
};
|
||||
private:
|
||||
logalloc::region& _region;
|
||||
cache_tracker* _tracker;
|
||||
partition_version_list _versions;
|
||||
lw_shared_ptr<worker> _worker_state;
|
||||
seastar::scheduling_group _scheduling_group;
|
||||
private:
|
||||
stop_iteration merge_some(partition_snapshot& snp) noexcept;
|
||||
stop_iteration merge_some() noexcept;
|
||||
void start_worker();
|
||||
public:
|
||||
mutation_cleaner_impl(logalloc::region& r, cache_tracker* t, seastar::scheduling_group sg = seastar::current_scheduling_group())
|
||||
: _region(r)
|
||||
, _tracker(t)
|
||||
, _worker_state(make_lw_shared<worker>())
|
||||
, _scheduling_group(sg)
|
||||
{
|
||||
start_worker();
|
||||
}
|
||||
~mutation_cleaner_impl();
|
||||
stop_iteration clear_gently() noexcept;
|
||||
memory::reclaiming_result clear_some() noexcept;
|
||||
void clear() noexcept;
|
||||
void destroy_later(partition_version& v) noexcept;
|
||||
void destroy_gently(partition_version& v) noexcept;
|
||||
void merge(mutation_cleaner_impl& other) noexcept;
|
||||
bool empty() const noexcept { return _versions.empty(); }
|
||||
future<> drain();
|
||||
void merge_and_destroy(partition_snapshot&) noexcept;
|
||||
void set_scheduling_group(seastar::scheduling_group sg) {
|
||||
_scheduling_group = sg;
|
||||
_worker_state->cv.broadcast();
|
||||
}
|
||||
};
|
||||
|
||||
inline
|
||||
void mutation_cleaner_impl::destroy_later(partition_version& v) noexcept {
|
||||
_versions.push_back(v);
|
||||
}
|
||||
|
||||
inline
|
||||
void mutation_cleaner_impl::destroy_gently(partition_version& v) noexcept {
|
||||
if (v.clear_gently(_tracker) == stop_iteration::no) {
|
||||
destroy_later(v);
|
||||
} else {
|
||||
current_allocator().destroy(&v);
|
||||
}
|
||||
}
|
||||
|
||||
inline
|
||||
void mutation_cleaner_impl::merge_and_destroy(partition_snapshot& ps) noexcept {
|
||||
if (ps.slide_to_oldest() == stop_iteration::yes || merge_some(ps) == stop_iteration::yes) {
|
||||
lw_shared_ptr<partition_snapshot>::dispose(&ps);
|
||||
} else {
|
||||
// The snapshot must not be reachable by partitino_entry::read() after this,
|
||||
// which is ensured by slide_to_oldest() == stop_iteration::no.
|
||||
_worker_state->snapshots.push_front(ps);
|
||||
_worker_state->cv.signal();
|
||||
}
|
||||
}
|
||||
|
||||
// Container for garbage partition_version objects, used for freeing them incrementally.
|
||||
//
|
||||
// Mutation cleaner extends the lifetime of mutation_partition without doing
|
||||
@@ -106,71 +36,57 @@ void mutation_cleaner_impl::merge_and_destroy(partition_snapshot& ps) noexcept {
|
||||
// mutation_cleaner should not be thread local objects (or members of thread
|
||||
// local objects).
|
||||
class mutation_cleaner final {
|
||||
lw_shared_ptr<mutation_cleaner_impl> _impl;
|
||||
logalloc::region& _region;
|
||||
cache_tracker* _tracker;
|
||||
partition_version_list _versions;
|
||||
public:
|
||||
mutation_cleaner(logalloc::region& r, cache_tracker* t, seastar::scheduling_group sg = seastar::current_scheduling_group())
|
||||
: _impl(make_lw_shared<mutation_cleaner_impl>(r, t, sg)) {
|
||||
}
|
||||
|
||||
void set_scheduling_group(seastar::scheduling_group sg) {
|
||||
_impl->set_scheduling_group(sg);
|
||||
}
|
||||
mutation_cleaner(logalloc::region& r, cache_tracker* t) : _region(r), _tracker(t) {}
|
||||
~mutation_cleaner();
|
||||
|
||||
// Frees some of the data. Returns stop_iteration::yes iff all was freed.
|
||||
// Must be invoked under owning allocator.
|
||||
stop_iteration clear_gently() noexcept {
|
||||
return _impl->clear_gently();
|
||||
}
|
||||
stop_iteration clear_gently() noexcept;
|
||||
|
||||
// Must be invoked under owning allocator.
|
||||
memory::reclaiming_result clear_some() noexcept {
|
||||
return _impl->clear_some();
|
||||
}
|
||||
memory::reclaiming_result clear_some() noexcept;
|
||||
|
||||
// Must be invoked under owning allocator.
|
||||
void clear() noexcept {
|
||||
_impl->clear();
|
||||
}
|
||||
void clear() noexcept;
|
||||
|
||||
// Enqueues v for destruction.
|
||||
// The object must not be part of any list, and must not be accessed externally any more.
|
||||
// In particular, it must not be attached, even indirectly, to any snapshot or partition_entry,
|
||||
// and must not be evicted from.
|
||||
// Must be invoked under owning allocator.
|
||||
void destroy_later(partition_version& v) noexcept {
|
||||
return _impl->destroy_later(v);
|
||||
}
|
||||
void destroy_later(partition_version& v) noexcept;
|
||||
|
||||
// Destroys v now or later.
|
||||
// Same requirements as destroy_later().
|
||||
// Must be invoked under owning allocator.
|
||||
void destroy_gently(partition_version& v) noexcept {
|
||||
return _impl->destroy_gently(v);
|
||||
}
|
||||
void destroy_gently(partition_version& v) noexcept;
|
||||
|
||||
// Transfers objects from other to this.
|
||||
// This and other must belong to the same logalloc::region, and the same cache_tracker.
|
||||
// After the call other will refer to this cleaner.
|
||||
void merge(mutation_cleaner& other) noexcept {
|
||||
_impl->merge(*other._impl);
|
||||
other._impl = _impl;
|
||||
}
|
||||
// After the call bool(other) is false.
|
||||
void merge(mutation_cleaner& other) noexcept;
|
||||
|
||||
// Returns true iff contains no unfreed objects
|
||||
bool empty() const noexcept {
|
||||
return _impl->empty();
|
||||
}
|
||||
bool empty() const noexcept { return _versions.empty(); }
|
||||
|
||||
// Forces cleaning and returns a future which resolves when there is nothing to clean.
|
||||
future<> drain() {
|
||||
return _impl->drain();
|
||||
}
|
||||
|
||||
// Will merge given snapshot using partition_snapshot::merge_partition_versions() and then destroys it
|
||||
// using destroy_from_this(), possibly deferring in between.
|
||||
// This instance becomes the sole owner of the partition_snapshot object, the caller should not destroy it
|
||||
// nor access it after calling this.
|
||||
void merge_and_destroy(partition_snapshot& ps) {
|
||||
return _impl->merge_and_destroy(ps);
|
||||
}
|
||||
future<> drain();
|
||||
};
|
||||
|
||||
inline
|
||||
void mutation_cleaner::destroy_later(partition_version& v) noexcept {
|
||||
_versions.push_back(v);
|
||||
}
|
||||
|
||||
inline
|
||||
void mutation_cleaner::destroy_gently(partition_version& v) noexcept {
|
||||
if (v.clear_gently(_tracker) == stop_iteration::no) {
|
||||
destroy_later(v);
|
||||
} else {
|
||||
current_allocator().destroy(&v);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -125,7 +125,7 @@ public:
|
||||
return _ck.equal(s, other._ck)
|
||||
&& _t == other._t
|
||||
&& _marker == other._marker
|
||||
&& _cells.equal(column_kind::static_column, s, other._cells, s);
|
||||
&& _cells.equal(column_kind::regular_column, s, other._cells, s);
|
||||
}
|
||||
|
||||
friend std::ostream& operator<<(std::ostream& os, const clustering_row& row);
|
||||
|
||||
@@ -280,15 +280,12 @@ mutation_partition::apply(const schema& s, const mutation_fragment& mf) {
|
||||
mf.visit(applier);
|
||||
}
|
||||
|
||||
stop_iteration mutation_partition::apply_monotonically(const schema& s, mutation_partition&& p, cache_tracker* tracker, is_preemptible preemptible) {
|
||||
void mutation_partition::apply_monotonically(const schema& s, mutation_partition&& p, cache_tracker* tracker) {
|
||||
_tombstone.apply(p._tombstone);
|
||||
_row_tombstones.apply_monotonically(s, std::move(p._row_tombstones));
|
||||
_static_row.apply_monotonically(s, column_kind::static_column, std::move(p._static_row));
|
||||
_static_row_continuous |= p._static_row_continuous;
|
||||
|
||||
if (_row_tombstones.apply_monotonically(s, std::move(p._row_tombstones), preemptible) == stop_iteration::no) {
|
||||
return stop_iteration::no;
|
||||
}
|
||||
|
||||
rows_entry::compare less(s);
|
||||
auto del = current_deleter<rows_entry>();
|
||||
auto p_i = p._rows.begin();
|
||||
@@ -320,34 +317,22 @@ stop_iteration mutation_partition::apply_monotonically(const schema& s, mutation
|
||||
// Newer evictable versions store complete rows
|
||||
i->_row = std::move(src_e._row);
|
||||
} else {
|
||||
memory::on_alloc_point();
|
||||
i->_row.apply_monotonically(s, std::move(src_e._row));
|
||||
}
|
||||
i->set_continuous(continuous);
|
||||
i->set_dummy(dummy);
|
||||
p_i = p._rows.erase_and_dispose(p_i, del);
|
||||
}
|
||||
if (preemptible && need_preempt() && p_i != p._rows.end()) {
|
||||
// We cannot leave p with the clustering range up to p_i->position()
|
||||
// marked as continuous because some of its sub-ranges may have originally been discontinuous.
|
||||
// This would result in the sum of this and p to have broader continuity after preemption,
|
||||
// also possibly violating the invariant of non-overlapping continuity between MVCC versions,
|
||||
// if that's what we're merging here.
|
||||
// It's always safe to mark the range as discontinuous.
|
||||
p_i->set_continuous(false);
|
||||
return stop_iteration::no;
|
||||
}
|
||||
}
|
||||
return stop_iteration::yes;
|
||||
}
|
||||
|
||||
stop_iteration mutation_partition::apply_monotonically(const schema& s, mutation_partition&& p, const schema& p_schema, is_preemptible preemptible) {
|
||||
void mutation_partition::apply_monotonically(const schema& s, mutation_partition&& p, const schema& p_schema) {
|
||||
if (s.version() == p_schema.version()) {
|
||||
return apply_monotonically(s, std::move(p), no_cache_tracker, preemptible);
|
||||
apply_monotonically(s, std::move(p), no_cache_tracker);
|
||||
} else {
|
||||
mutation_partition p2(s, p);
|
||||
p2.upgrade(p_schema, s);
|
||||
return apply_monotonically(s, std::move(p2), no_cache_tracker, is_preemptible::no); // FIXME: make preemptible
|
||||
apply_monotonically(s, std::move(p2), no_cache_tracker);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1110,7 +1095,7 @@ row::apply_monotonically(const column_definition& column, atomic_cell_or_collect
|
||||
if (_type == storage_type::vector && id < max_vector_size) {
|
||||
if (id >= _storage.vector.v.size()) {
|
||||
_storage.vector.v.resize(id);
|
||||
_storage.vector.v.emplace_back(cell_and_hash{std::move(value), std::move(hash)});
|
||||
_storage.vector.v.emplace_back(std::move(value), std::move(hash));
|
||||
_storage.vector.present.set(id);
|
||||
_size++;
|
||||
} else if (auto& cell_and_hash = _storage.vector.v[id]; !bool(cell_and_hash.cell)) {
|
||||
@@ -1139,6 +1124,7 @@ row::apply_monotonically(const column_definition& column, atomic_cell_or_collect
|
||||
void
|
||||
row::append_cell(column_id id, atomic_cell_or_collection value) {
|
||||
if (_type == storage_type::vector && id < max_vector_size) {
|
||||
assert(_storage.vector.v.size() <= id);
|
||||
_storage.vector.v.resize(id);
|
||||
_storage.vector.v.emplace_back(cell_and_hash{std::move(value), cell_hash_opt()});
|
||||
_storage.vector.present.set(id);
|
||||
@@ -1177,7 +1163,7 @@ row::find_cell(column_id id) const {
|
||||
size_t row::external_memory_usage(const schema& s, column_kind kind) const {
|
||||
size_t mem = 0;
|
||||
if (_type == storage_type::vector) {
|
||||
mem += _storage.vector.v.external_memory_usage();
|
||||
mem += _storage.vector.v.used_space_external_memory_usage();
|
||||
column_id id = 0;
|
||||
for (auto&& c_a_h : _storage.vector.v) {
|
||||
auto& cdef = s.column_at(kind, id++);
|
||||
@@ -1411,12 +1397,17 @@ row::row(const schema& s, column_kind kind, const row& o)
|
||||
if (_type == storage_type::vector) {
|
||||
auto& other_vec = o._storage.vector;
|
||||
auto& vec = *new (&_storage.vector) vector_storage;
|
||||
vec.present = other_vec.present;
|
||||
vec.v.reserve(other_vec.v.size());
|
||||
column_id id = 0;
|
||||
for (auto& cell : other_vec.v) {
|
||||
auto& cdef = s.column_at(kind, id++);
|
||||
vec.v.emplace_back(cell_and_hash { cell.cell.copy(*cdef.type), cell.hash });
|
||||
try {
|
||||
vec.present = other_vec.present;
|
||||
vec.v.reserve(other_vec.v.size());
|
||||
column_id id = 0;
|
||||
for (auto& cell : other_vec.v) {
|
||||
auto& cdef = s.column_at(kind, id++);
|
||||
vec.v.emplace_back(cell_and_hash{cell.cell.copy(*cdef.type), cell.hash});
|
||||
}
|
||||
} catch (...) {
|
||||
_storage.vector.~vector_storage();
|
||||
throw;
|
||||
}
|
||||
} else {
|
||||
auto cloner = [&] (const auto& x) {
|
||||
@@ -1827,9 +1818,10 @@ void mutation_querier::query_static_row(const row& r, tombstone current_tombston
|
||||
} else if (_short_reads_allowed) {
|
||||
seastar::measuring_output_stream stream;
|
||||
ser::qr_partition__static_row__cells<seastar::measuring_output_stream> out(stream, { });
|
||||
auto start = stream.size();
|
||||
get_compacted_row_slice(_schema, slice, column_kind::static_column,
|
||||
r, slice.static_columns, _static_cells_wr);
|
||||
_memory_accounter.update(stream.size());
|
||||
r, slice.static_columns, out);
|
||||
_memory_accounter.update(stream.size() - start);
|
||||
}
|
||||
if (_pw.requested_digest()) {
|
||||
max_timestamp max_ts{_pw.last_modified()};
|
||||
@@ -1890,8 +1882,9 @@ stop_iteration mutation_querier::consume(clustering_row&& cr, row_tombstone curr
|
||||
} else if (_short_reads_allowed) {
|
||||
seastar::measuring_output_stream stream;
|
||||
ser::qr_partition__rows<seastar::measuring_output_stream> out(stream, { });
|
||||
auto start = stream.size();
|
||||
write_row(out);
|
||||
stop = _memory_accounter.update_and_check(stream.size());
|
||||
stop = _memory_accounter.update_and_check(stream.size() - start);
|
||||
}
|
||||
|
||||
_live_clustering_rows++;
|
||||
@@ -2320,20 +2313,17 @@ future<mutation_opt> counter_write_query(schema_ptr s, const mutation_source& so
|
||||
return f.finally([r_a_r = std::move(r_a_r)] { });
|
||||
}
|
||||
|
||||
mutation_cleaner_impl::~mutation_cleaner_impl() {
|
||||
_worker_state->done = true;
|
||||
_worker_state->cv.signal();
|
||||
_worker_state->snapshots.clear_and_dispose(typename lw_shared_ptr<partition_snapshot>::disposer());
|
||||
mutation_cleaner::~mutation_cleaner() {
|
||||
with_allocator(_region.allocator(), [this] {
|
||||
clear();
|
||||
});
|
||||
}
|
||||
|
||||
void mutation_cleaner_impl::clear() noexcept {
|
||||
void mutation_cleaner::clear() noexcept {
|
||||
while (clear_gently() == stop_iteration::no) ;
|
||||
}
|
||||
|
||||
stop_iteration mutation_cleaner_impl::clear_gently() noexcept {
|
||||
stop_iteration mutation_cleaner::clear_gently() noexcept {
|
||||
while (clear_some() == memory::reclaiming_result::reclaimed_something) {
|
||||
if (need_preempt()) {
|
||||
return stop_iteration::no;
|
||||
@@ -2342,7 +2332,7 @@ stop_iteration mutation_cleaner_impl::clear_gently() noexcept {
|
||||
return stop_iteration::yes;
|
||||
}
|
||||
|
||||
memory::reclaiming_result mutation_cleaner_impl::clear_some() noexcept {
|
||||
memory::reclaiming_result mutation_cleaner::clear_some() noexcept {
|
||||
if (_versions.empty()) {
|
||||
return memory::reclaiming_result::reclaimed_nothing;
|
||||
}
|
||||
@@ -2355,81 +2345,14 @@ memory::reclaiming_result mutation_cleaner_impl::clear_some() noexcept {
|
||||
return memory::reclaiming_result::reclaimed_something;
|
||||
}
|
||||
|
||||
void mutation_cleaner_impl::merge(mutation_cleaner_impl& r) noexcept {
|
||||
void mutation_cleaner::merge(mutation_cleaner& r) noexcept {
|
||||
_versions.splice(r._versions);
|
||||
_worker_state->snapshots.splice(_worker_state->snapshots.end(), r._worker_state->snapshots);
|
||||
if (!_worker_state->snapshots.empty()) {
|
||||
_worker_state->cv.signal();
|
||||
}
|
||||
}
|
||||
|
||||
void mutation_cleaner_impl::start_worker() {
|
||||
auto f = repeat([w = _worker_state, this] () mutable noexcept {
|
||||
if (w->done) {
|
||||
return make_ready_future<stop_iteration>(stop_iteration::yes);
|
||||
}
|
||||
return with_scheduling_group(_scheduling_group, [w, this] {
|
||||
return w->cv.wait([w] {
|
||||
return w->done || !w->snapshots.empty();
|
||||
}).then([this, w] () noexcept {
|
||||
if (w->done) {
|
||||
return stop_iteration::yes;
|
||||
}
|
||||
merge_some();
|
||||
return stop_iteration::no;
|
||||
});
|
||||
});
|
||||
});
|
||||
if (f.failed()) {
|
||||
f.get();
|
||||
}
|
||||
}
|
||||
|
||||
stop_iteration mutation_cleaner_impl::merge_some(partition_snapshot& snp) noexcept {
|
||||
auto&& region = snp.region();
|
||||
return with_allocator(region.allocator(), [&] {
|
||||
return with_linearized_managed_bytes([&] {
|
||||
// Allocating sections require the region to be reclaimable
|
||||
// which means that they cannot be nested.
|
||||
// It is, however, possible, that if the snapshot is taken
|
||||
// inside an allocating section and then an exception is thrown
|
||||
// this function will be called to clean up even though we
|
||||
// still will be in the context of the allocating section.
|
||||
if (!region.reclaiming_enabled()) {
|
||||
return stop_iteration::no;
|
||||
}
|
||||
try {
|
||||
return _worker_state->alloc_section(region, [&] {
|
||||
return snp.merge_partition_versions();
|
||||
});
|
||||
} catch (...) {
|
||||
// Merging failed, give up as there is no guarantee of forward progress.
|
||||
return stop_iteration::yes;
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
stop_iteration mutation_cleaner_impl::merge_some() noexcept {
|
||||
if (_worker_state->snapshots.empty()) {
|
||||
return stop_iteration::yes;
|
||||
}
|
||||
partition_snapshot& snp = _worker_state->snapshots.front();
|
||||
if (merge_some(snp) == stop_iteration::yes) {
|
||||
_worker_state->snapshots.pop_front();
|
||||
lw_shared_ptr<partition_snapshot>::dispose(&snp);
|
||||
}
|
||||
return stop_iteration::no;
|
||||
}
|
||||
|
||||
future<> mutation_cleaner_impl::drain() {
|
||||
future<> mutation_cleaner::drain() {
|
||||
return repeat([this] {
|
||||
return merge_some();
|
||||
}).then([this] {
|
||||
return repeat([this] {
|
||||
return with_allocator(_region.allocator(), [this] {
|
||||
return clear_gently();
|
||||
});
|
||||
return with_allocator(_region.allocator(), [this] {
|
||||
return clear_gently();
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
@@ -46,7 +46,6 @@
|
||||
#include "clustering_key_filter.hh"
|
||||
#include "intrusive_set_external_comparator.hh"
|
||||
#include "utils/with_relational_operators.hh"
|
||||
#include "utils/preempt.hh"
|
||||
|
||||
class mutation_fragment;
|
||||
class clustering_row;
|
||||
@@ -75,6 +74,15 @@ using cell_hash_opt = seastar::optimized_optional<cell_hash>;
|
||||
struct cell_and_hash {
|
||||
atomic_cell_or_collection cell;
|
||||
mutable cell_hash_opt hash;
|
||||
|
||||
cell_and_hash() = default;
|
||||
cell_and_hash(cell_and_hash&&) noexcept = default;
|
||||
cell_and_hash& operator=(cell_and_hash&&) noexcept = default;
|
||||
|
||||
cell_and_hash(atomic_cell_or_collection&& cell, cell_hash_opt hash)
|
||||
: cell(std::move(cell))
|
||||
, hash(hash)
|
||||
{ }
|
||||
};
|
||||
|
||||
//
|
||||
@@ -988,19 +996,8 @@ public:
|
||||
// This instance and p are governed by the same schema.
|
||||
//
|
||||
// Must be provided with a pointer to the cache_tracker, which owns both this and p.
|
||||
//
|
||||
// Returns stop_iteration::no if the operation was preempted before finished, and stop_iteration::yes otherwise.
|
||||
// On preemption the sum of this and p stays the same (represents the same set of writes), and the state of this
|
||||
// object contains at least all the writes it contained before the call (monotonicity). It may contain partial writes.
|
||||
// Also, some progress is always guaranteed (liveness).
|
||||
//
|
||||
// The operation can be drien to completion like this:
|
||||
//
|
||||
// while (apply_monotonically(..., is_preemtable::yes) == stop_iteration::no) { }
|
||||
//
|
||||
// If is_preemptible::no is passed as argument then stop_iteration::no is never returned.
|
||||
stop_iteration apply_monotonically(const schema& s, mutation_partition&& p, cache_tracker*, is_preemptible = is_preemptible::no);
|
||||
stop_iteration apply_monotonically(const schema& s, mutation_partition&& p, const schema& p_schema, is_preemptible = is_preemptible::no);
|
||||
void apply_monotonically(const schema& s, mutation_partition&& p, cache_tracker*);
|
||||
void apply_monotonically(const schema& s, mutation_partition&& p, const schema& p_schema);
|
||||
|
||||
// Weak exception guarantees.
|
||||
// Assumes this and p are not owned by a cache_tracker.
|
||||
|
||||
@@ -29,8 +29,6 @@
|
||||
#include "mutation_partition.hh"
|
||||
#include "counters.hh"
|
||||
#include "frozen_mutation.hh"
|
||||
#include "partition_builder.hh"
|
||||
#include "converting_mutation_partition_applier.hh"
|
||||
|
||||
#include "utils/UUID.hh"
|
||||
#include "serializer.hh"
|
||||
@@ -62,10 +60,10 @@ atomic_cell read_atomic_cell(const abstract_type& type, atomic_cell_variant cv,
|
||||
explicit atomic_cell_visitor(const abstract_type& t, atomic_cell::collection_member cm)
|
||||
: _type(t), _collection_member(cm) { }
|
||||
atomic_cell operator()(ser::live_cell_view& lcv) const {
|
||||
return atomic_cell::make_live(_type, lcv.created_at(), lcv.value().view(), _collection_member);
|
||||
return atomic_cell::make_live(_type, lcv.created_at(), lcv.value(), _collection_member);
|
||||
}
|
||||
atomic_cell operator()(ser::expiring_cell_view& ecv) const {
|
||||
return atomic_cell::make_live(_type, ecv.c().created_at(), ecv.c().value().view(), ecv.expiry(), ecv.ttl(), _collection_member);
|
||||
return atomic_cell::make_live(_type, ecv.c().created_at(), ecv.c().value(), ecv.expiry(), ecv.ttl(), _collection_member);
|
||||
}
|
||||
atomic_cell operator()(ser::dead_cell_view& dcv) const {
|
||||
return atomic_cell::make_dead(dcv.tomb().timestamp(), dcv.tomb().deletion_time());
|
||||
@@ -131,13 +129,20 @@ void read_and_visit_row(ser::row_view rv, const column_mapping& cm, column_kind
|
||||
: _visitor(v), _id(id), _col(col) { }
|
||||
|
||||
void operator()(atomic_cell_variant& acv) const {
|
||||
if (!_col.is_atomic()) {
|
||||
if (!_col.type()->is_atomic()) {
|
||||
throw std::runtime_error("A collection expected, got an atomic cell");
|
||||
}
|
||||
_visitor.accept_atomic_cell(_id, read_atomic_cell(*_col.type(), acv));
|
||||
// FIXME: Pass view to cell to avoid copy
|
||||
auto&& outer = current_allocator();
|
||||
with_allocator(standard_allocator(), [&] {
|
||||
auto cell = read_atomic_cell(*_col.type(), acv);
|
||||
with_allocator(outer, [&] {
|
||||
_visitor.accept_atomic_cell(_id, cell);
|
||||
});
|
||||
});
|
||||
}
|
||||
void operator()(ser::collection_cell_view& ccv) const {
|
||||
if (_col.is_atomic()) {
|
||||
if (_col.type()->is_atomic()) {
|
||||
throw std::runtime_error("An atomic cell expected, got a collection");
|
||||
}
|
||||
// FIXME: Pass view to cell to avoid copy
|
||||
@@ -182,19 +187,23 @@ row_marker read_row_marker(boost::variant<ser::live_marker_view, ser::expiring_m
|
||||
|
||||
}
|
||||
|
||||
template<typename Visitor>
|
||||
GCC6_CONCEPT(requires MutationViewVisitor<Visitor>)
|
||||
void mutation_partition_view::do_accept(const column_mapping& cm, Visitor& visitor) const {
|
||||
void
|
||||
mutation_partition_view::accept(const schema& s, mutation_partition_visitor& visitor) const {
|
||||
accept(s.get_column_mapping(), visitor);
|
||||
}
|
||||
|
||||
void
|
||||
mutation_partition_view::accept(const column_mapping& cm, mutation_partition_visitor& visitor) const {
|
||||
auto in = _in;
|
||||
auto mpv = ser::deserialize(in, boost::type<ser::mutation_partition_view>());
|
||||
|
||||
visitor.accept_partition_tombstone(mpv.tomb());
|
||||
|
||||
struct static_row_cell_visitor {
|
||||
Visitor& _visitor;
|
||||
mutation_partition_visitor& _visitor;
|
||||
|
||||
void accept_atomic_cell(column_id id, atomic_cell ac) const {
|
||||
_visitor.accept_static_cell(id, std::move(ac));
|
||||
void accept_atomic_cell(column_id id, const atomic_cell& ac) const {
|
||||
_visitor.accept_static_cell(id, ac);
|
||||
}
|
||||
void accept_collection(column_id id, const collection_mutation& cm) const {
|
||||
_visitor.accept_static_cell(id, cm);
|
||||
@@ -208,13 +217,13 @@ void mutation_partition_view::do_accept(const column_mapping& cm, Visitor& visit
|
||||
|
||||
for (auto&& cr : mpv.rows()) {
|
||||
auto t = row_tombstone(cr.deleted_at(), shadowable_tombstone(cr.shadowable_deleted_at()));
|
||||
visitor.accept_row(position_in_partition_view::for_key(cr.key()), t, read_row_marker(cr.marker()), is_dummy::no, is_continuous::yes);
|
||||
visitor.accept_row(position_in_partition_view::for_key(cr.key()), t, read_row_marker(cr.marker()));
|
||||
|
||||
struct cell_visitor {
|
||||
Visitor& _visitor;
|
||||
mutation_partition_visitor& _visitor;
|
||||
|
||||
void accept_atomic_cell(column_id id, atomic_cell ac) const {
|
||||
_visitor.accept_row_cell(id, std::move(ac));
|
||||
void accept_atomic_cell(column_id id, const atomic_cell& ac) const {
|
||||
_visitor.accept_row_cell(id, ac);
|
||||
}
|
||||
void accept_collection(column_id id, const collection_mutation& cm) const {
|
||||
_visitor.accept_row_cell(id, cm);
|
||||
@@ -224,38 +233,6 @@ void mutation_partition_view::do_accept(const column_mapping& cm, Visitor& visit
|
||||
}
|
||||
}
|
||||
|
||||
void mutation_partition_view::accept(const schema& s, partition_builder& visitor) const
|
||||
{
|
||||
do_accept(s.get_column_mapping(), visitor);
|
||||
}
|
||||
|
||||
void mutation_partition_view::accept(const column_mapping& cm, converting_mutation_partition_applier& visitor) const
|
||||
{
|
||||
do_accept(cm, visitor);
|
||||
}
|
||||
|
||||
std::optional<clustering_key> mutation_partition_view::first_row_key() const
|
||||
{
|
||||
auto in = _in;
|
||||
auto mpv = ser::deserialize(in, boost::type<ser::mutation_partition_view>());
|
||||
auto rows = mpv.rows();
|
||||
if (rows.empty()) {
|
||||
return { };
|
||||
}
|
||||
return rows.front().key();
|
||||
}
|
||||
|
||||
std::optional<clustering_key> mutation_partition_view::last_row_key() const
|
||||
{
|
||||
auto in = _in;
|
||||
auto mpv = ser::deserialize(in, boost::type<ser::mutation_partition_view>());
|
||||
auto rows = mpv.rows();
|
||||
if (rows.empty()) {
|
||||
return { };
|
||||
}
|
||||
return rows.back().key();
|
||||
}
|
||||
|
||||
mutation_partition_view mutation_partition_view::from_view(ser::mutation_partition_view v)
|
||||
{
|
||||
return { v.v };
|
||||
@@ -273,8 +250,9 @@ mutation_fragment frozen_mutation_fragment::unfreeze(const schema& s)
|
||||
public:
|
||||
clustering_row_builder(const schema& s, clustering_key key, row_tombstone t, row_marker m)
|
||||
: _s(s), _mf(mutation_fragment::clustering_row_tag_t(), std::move(key), std::move(t), std::move(m), row()) { }
|
||||
void accept_atomic_cell(column_id id, atomic_cell ac) {
|
||||
_mf.as_mutable_clustering_row().cells().append_cell(id, atomic_cell_or_collection(std::move(ac)));
|
||||
void accept_atomic_cell(column_id id, const atomic_cell& ac) {
|
||||
auto& type = *_s.regular_column_at(id).type;
|
||||
_mf.as_mutable_clustering_row().cells().append_cell(id, atomic_cell_or_collection(atomic_cell(type, ac)));
|
||||
}
|
||||
void accept_collection(column_id id, const collection_mutation& cm) {
|
||||
auto& ctype = *static_pointer_cast<const collection_type_impl>(_s.regular_column_at(id).type);
|
||||
@@ -295,8 +273,9 @@ mutation_fragment frozen_mutation_fragment::unfreeze(const schema& s)
|
||||
mutation_fragment _mf;
|
||||
public:
|
||||
explicit static_row_builder(const schema& s) : _s(s), _mf(static_row()) { }
|
||||
void accept_atomic_cell(column_id id, atomic_cell ac) {
|
||||
_mf.as_mutable_static_row().cells().append_cell(id, atomic_cell_or_collection(std::move(ac)));
|
||||
void accept_atomic_cell(column_id id, const atomic_cell& ac) {
|
||||
auto& type = *_s.static_column_at(id).type;
|
||||
_mf.as_mutable_static_row().cells().append_cell(id, atomic_cell_or_collection(atomic_cell(type, ac)));
|
||||
}
|
||||
void accept_collection(column_id id, const collection_mutation& cm) {
|
||||
auto& ctype = *static_pointer_cast<const collection_type_impl>(_s.static_column_at(id).type);
|
||||
|
||||
@@ -29,26 +29,6 @@ namespace ser {
|
||||
class mutation_partition_view;
|
||||
}
|
||||
|
||||
class partition_builder;
|
||||
class converting_mutation_partition_applier;
|
||||
|
||||
GCC6_CONCEPT(
|
||||
template<typename T>
|
||||
concept bool MutationViewVisitor = requires (T visitor, tombstone t, atomic_cell ac,
|
||||
collection_mutation_view cmv, range_tombstone rt,
|
||||
position_in_partition_view pipv, row_tombstone row_tomb,
|
||||
row_marker rm) {
|
||||
visitor.accept_partition_tombstone(t);
|
||||
visitor.accept_static_cell(column_id(), std::move(ac));
|
||||
visitor.accept_static_cell(column_id(), cmv);
|
||||
visitor.accept_row_tombstone(rt);
|
||||
visitor.accept_row(pipv, row_tomb, rm,
|
||||
is_dummy::no, is_continuous::yes);
|
||||
visitor.accept_row_cell(column_id(), std::move(ac));
|
||||
visitor.accept_row_cell(column_id(), cmv);
|
||||
};
|
||||
)
|
||||
|
||||
// View on serialized mutation partition. See mutation_partition_serializer.
|
||||
class mutation_partition_view {
|
||||
utils::input_stream _in;
|
||||
@@ -56,18 +36,11 @@ private:
|
||||
mutation_partition_view(utils::input_stream v)
|
||||
: _in(v)
|
||||
{ }
|
||||
|
||||
template<typename Visitor>
|
||||
GCC6_CONCEPT(requires MutationViewVisitor<Visitor>)
|
||||
void do_accept(const column_mapping&, Visitor& visitor) const;
|
||||
public:
|
||||
static mutation_partition_view from_stream(utils::input_stream v) {
|
||||
return { v };
|
||||
}
|
||||
static mutation_partition_view from_view(ser::mutation_partition_view v);
|
||||
void accept(const schema& schema, partition_builder& visitor) const;
|
||||
void accept(const column_mapping&, converting_mutation_partition_applier& visitor) const;
|
||||
|
||||
std::optional<clustering_key> first_row_key() const;
|
||||
std::optional<clustering_key> last_row_key() const;
|
||||
void accept(const schema& schema, mutation_partition_visitor& visitor) const;
|
||||
void accept(const column_mapping&, mutation_partition_visitor& visitor) const;
|
||||
};
|
||||
|
||||
@@ -184,11 +184,13 @@ private:
|
||||
// end, a call to next_partition() or a call to
|
||||
// fast_forward_to(dht::partition_range).
|
||||
reader_and_last_fragment_kind _single_reader;
|
||||
dht::decorated_key_opt _key;
|
||||
const schema_ptr _schema;
|
||||
streamed_mutation::forwarding _fwd_sm;
|
||||
mutation_reader::forwarding _fwd_mr;
|
||||
private:
|
||||
void maybe_add_readers(const std::optional<dht::ring_position_view>& pos);
|
||||
const dht::token* current_position() const;
|
||||
void maybe_add_readers(const dht::token* const t);
|
||||
void add_readers(std::vector<flat_mutation_reader> new_readers);
|
||||
future<> prepare_next();
|
||||
// Collect all forwardable readers into _next, and remove them from
|
||||
@@ -234,7 +236,7 @@ class list_reader_selector : public reader_selector {
|
||||
|
||||
public:
|
||||
explicit list_reader_selector(schema_ptr s, std::vector<flat_mutation_reader> readers)
|
||||
: reader_selector(s, dht::ring_position_view::min())
|
||||
: reader_selector(s, dht::ring_position::min())
|
||||
, _readers(std::move(readers)) {
|
||||
}
|
||||
|
||||
@@ -244,8 +246,8 @@ public:
|
||||
list_reader_selector(list_reader_selector&&) = default;
|
||||
list_reader_selector& operator=(list_reader_selector&&) = default;
|
||||
|
||||
virtual std::vector<flat_mutation_reader> create_new_readers(const std::optional<dht::ring_position_view>&) override {
|
||||
_selector_position = dht::ring_position_view::max();
|
||||
virtual std::vector<flat_mutation_reader> create_new_readers(const dht::token* const) override {
|
||||
_selector_position = dht::ring_position::max();
|
||||
return std::exchange(_readers, {});
|
||||
}
|
||||
|
||||
@@ -254,10 +256,12 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
void mutation_reader_merger::maybe_add_readers(const std::optional<dht::ring_position_view>& pos) {
|
||||
if (_selector->has_new_readers(pos)) {
|
||||
add_readers(_selector->create_new_readers(pos));
|
||||
void mutation_reader_merger::maybe_add_readers(const dht::token* const t) {
|
||||
if (!_selector->has_new_readers(t)) {
|
||||
return;
|
||||
}
|
||||
|
||||
add_readers(_selector->create_new_readers(t));
|
||||
}
|
||||
|
||||
void mutation_reader_merger::add_readers(std::vector<flat_mutation_reader> new_readers) {
|
||||
@@ -268,6 +272,14 @@ void mutation_reader_merger::add_readers(std::vector<flat_mutation_reader> new_r
|
||||
}
|
||||
}
|
||||
|
||||
const dht::token* mutation_reader_merger::current_position() const {
|
||||
if (!_key) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
return &_key->token();
|
||||
}
|
||||
|
||||
struct mutation_reader_merger::reader_heap_compare {
|
||||
const schema& s;
|
||||
|
||||
@@ -326,10 +338,12 @@ future<> mutation_reader_merger::prepare_next() {
|
||||
// waiting for a fast-forward so there is nothing to do.
|
||||
if (_fragment_heap.empty() && _halted_readers.empty()) {
|
||||
if (_reader_heap.empty()) {
|
||||
maybe_add_readers(std::nullopt);
|
||||
_key = {};
|
||||
} else {
|
||||
maybe_add_readers(_reader_heap.front().fragment.as_partition_start().key());
|
||||
_key = _reader_heap.front().fragment.as_partition_start().key();
|
||||
}
|
||||
|
||||
maybe_add_readers(current_position());
|
||||
}
|
||||
});
|
||||
}
|
||||
@@ -357,7 +371,7 @@ mutation_reader_merger::mutation_reader_merger(schema_ptr schema,
|
||||
, _schema(std::move(schema))
|
||||
, _fwd_sm(fwd_sm)
|
||||
, _fwd_mr(fwd_mr) {
|
||||
maybe_add_readers(std::nullopt);
|
||||
maybe_add_readers(nullptr);
|
||||
}
|
||||
|
||||
future<mutation_reader_merger::mutation_fragment_batch> mutation_reader_merger::operator()() {
|
||||
|
||||
@@ -50,19 +50,19 @@ namespace mutation_reader {
|
||||
class reader_selector {
|
||||
protected:
|
||||
schema_ptr _s;
|
||||
dht::ring_position_view _selector_position;
|
||||
dht::ring_position _selector_position;
|
||||
public:
|
||||
reader_selector(schema_ptr s, dht::ring_position_view rpv) noexcept : _s(std::move(s)), _selector_position(std::move(rpv)) {}
|
||||
reader_selector(schema_ptr s, dht::ring_position rp) noexcept : _s(std::move(s)), _selector_position(std::move(rp)) {}
|
||||
|
||||
virtual ~reader_selector() = default;
|
||||
// Call only if has_new_readers() returned true.
|
||||
virtual std::vector<flat_mutation_reader> create_new_readers(const std::optional<dht::ring_position_view>& pos) = 0;
|
||||
virtual std::vector<flat_mutation_reader> create_new_readers(const dht::token* const t) = 0;
|
||||
virtual std::vector<flat_mutation_reader> fast_forward_to(const dht::partition_range& pr, db::timeout_clock::time_point timeout) = 0;
|
||||
|
||||
// Can be false-positive but never false-negative!
|
||||
bool has_new_readers(const std::optional<dht::ring_position_view>& pos) const noexcept {
|
||||
bool has_new_readers(const dht::token* const t) const noexcept {
|
||||
dht::ring_position_comparator cmp(*_s);
|
||||
return !_selector_position.is_max() && (!pos || cmp(*pos, _selector_position) >= 0);
|
||||
return !_selector_position.is_max() && (!t || cmp(dht::ring_position_view(*t), _selector_position) >= 0);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@@ -25,7 +25,7 @@
|
||||
#include "mutation_partition_view.hh"
|
||||
|
||||
// Partition visitor which builds mutation_partition corresponding to the data its fed with.
|
||||
class partition_builder final : public mutation_partition_visitor {
|
||||
class partition_builder : public mutation_partition_visitor {
|
||||
private:
|
||||
const schema& _schema;
|
||||
mutation_partition& _partition;
|
||||
@@ -43,13 +43,9 @@ public:
|
||||
}
|
||||
|
||||
virtual void accept_static_cell(column_id id, atomic_cell_view cell) override {
|
||||
auto& cdef = _schema.static_column_at(id);
|
||||
accept_static_cell(id, atomic_cell(*cdef.type, cell));
|
||||
}
|
||||
|
||||
void accept_static_cell(column_id id, atomic_cell&& cell) {
|
||||
row& r = _partition.static_row();
|
||||
r.append_cell(id, atomic_cell_or_collection(std::move(cell)));
|
||||
auto& cdef = _schema.static_column_at(id);
|
||||
r.append_cell(id, atomic_cell_or_collection(*cdef.type, cell));
|
||||
}
|
||||
|
||||
virtual void accept_static_cell(column_id id, collection_mutation_view collection) override {
|
||||
@@ -70,13 +66,9 @@ public:
|
||||
}
|
||||
|
||||
virtual void accept_row_cell(column_id id, atomic_cell_view cell) override {
|
||||
auto& cdef = _schema.regular_column_at(id);
|
||||
accept_row_cell(id, atomic_cell(*cdef.type, cell));
|
||||
}
|
||||
|
||||
void accept_row_cell(column_id id, atomic_cell&& cell) {
|
||||
row& r = _current_row->cells();
|
||||
r.append_cell(id, atomic_cell_or_collection(std::move(cell)));
|
||||
auto& cdef = _schema.regular_column_at(id);
|
||||
r.append_cell(id, atomic_cell_or_collection(*cdef.type, cell));
|
||||
}
|
||||
|
||||
virtual void accept_row_cell(column_id id, collection_mutation_view collection) override {
|
||||
|
||||
@@ -33,6 +33,34 @@ struct partition_snapshot_reader_dummy_accounter {
|
||||
};
|
||||
extern partition_snapshot_reader_dummy_accounter no_accounter;
|
||||
|
||||
inline void maybe_merge_versions(lw_shared_ptr<partition_snapshot>& snp,
|
||||
logalloc::region& lsa_region,
|
||||
logalloc::allocating_section& read_section) {
|
||||
if (!snp.owned()) {
|
||||
return;
|
||||
}
|
||||
// If no one else is using this particular snapshot try to merge partition
|
||||
// versions.
|
||||
with_allocator(lsa_region.allocator(), [&snp, &lsa_region, &read_section] {
|
||||
return with_linearized_managed_bytes([&snp, &lsa_region, &read_section] {
|
||||
try {
|
||||
// Allocating sections require the region to be reclaimable
|
||||
// which means that they cannot be nested.
|
||||
// It is, however, possible, that if the snapshot is taken
|
||||
// inside an allocating section and then an exception is thrown
|
||||
// this function will be called to clean up even though we
|
||||
// still will be in the context of the allocating section.
|
||||
if (lsa_region.reclaiming_enabled()) {
|
||||
read_section(lsa_region, [&snp] {
|
||||
snp->merge_partition_versions();
|
||||
});
|
||||
}
|
||||
} catch (...) { }
|
||||
snp = {};
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
template <typename MemoryAccounter = partition_snapshot_reader_dummy_accounter>
|
||||
class partition_snapshot_flat_reader : public flat_mutation_reader::impl, public MemoryAccounter {
|
||||
struct rows_position {
|
||||
@@ -59,7 +87,7 @@ class partition_snapshot_flat_reader : public flat_mutation_reader::impl, public
|
||||
position_in_partition::equal_compare _eq;
|
||||
heap_compare _heap_cmp;
|
||||
|
||||
partition_snapshot_ptr _snapshot;
|
||||
lw_shared_ptr<partition_snapshot> _snapshot;
|
||||
|
||||
logalloc::region& _region;
|
||||
logalloc::allocating_section& _read_section;
|
||||
@@ -71,7 +99,7 @@ class partition_snapshot_flat_reader : public flat_mutation_reader::impl, public
|
||||
private:
|
||||
template<typename Function>
|
||||
decltype(auto) in_alloc_section(Function&& fn) {
|
||||
return _read_section.with_reclaiming_disabled(_region, [&] {
|
||||
return _read_section.with_reclaiming_disabled(_region, [&] {
|
||||
return with_linearized_managed_bytes([&] {
|
||||
return fn();
|
||||
});
|
||||
@@ -127,7 +155,7 @@ class partition_snapshot_flat_reader : public flat_mutation_reader::impl, public
|
||||
return !_clustering_rows.empty();
|
||||
}
|
||||
public:
|
||||
explicit lsa_partition_reader(const schema& s, partition_snapshot_ptr snp,
|
||||
explicit lsa_partition_reader(const schema& s, lw_shared_ptr<partition_snapshot> snp,
|
||||
logalloc::region& region, logalloc::allocating_section& read_section,
|
||||
bool digest_requested)
|
||||
: _schema(s)
|
||||
@@ -140,6 +168,10 @@ class partition_snapshot_flat_reader : public flat_mutation_reader::impl, public
|
||||
, _digest_requested(digest_requested)
|
||||
{ }
|
||||
|
||||
~lsa_partition_reader() {
|
||||
maybe_merge_versions(_snapshot, _region, _read_section);
|
||||
}
|
||||
|
||||
template<typename Function>
|
||||
decltype(auto) with_reserve(Function&& fn) {
|
||||
return _read_section.with_reserve(std::forward<Function>(fn));
|
||||
@@ -155,7 +187,7 @@ class partition_snapshot_flat_reader : public flat_mutation_reader::impl, public
|
||||
return _snapshot->static_row(_digest_requested);
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
// Returns next clustered row in the range.
|
||||
// If the ck_range is the same as the one used previously last_row needs
|
||||
// to be engaged and equal the position of the row returned last time.
|
||||
@@ -266,7 +298,7 @@ private:
|
||||
}
|
||||
public:
|
||||
template <typename... Args>
|
||||
partition_snapshot_flat_reader(schema_ptr s, dht::decorated_key dk, partition_snapshot_ptr snp,
|
||||
partition_snapshot_flat_reader(schema_ptr s, dht::decorated_key dk, lw_shared_ptr<partition_snapshot> snp,
|
||||
query::clustering_key_filter_ranges crr, bool digest_requested,
|
||||
logalloc::region& region, logalloc::allocating_section& read_section,
|
||||
boost::any pointer_to_container, Args&&... args)
|
||||
@@ -312,7 +344,7 @@ inline flat_mutation_reader
|
||||
make_partition_snapshot_flat_reader(schema_ptr s,
|
||||
dht::decorated_key dk,
|
||||
query::clustering_key_filter_ranges crr,
|
||||
partition_snapshot_ptr snp,
|
||||
lw_shared_ptr<partition_snapshot> snp,
|
||||
bool digest_requested,
|
||||
logalloc::region& region,
|
||||
logalloc::allocating_section& read_section,
|
||||
@@ -333,7 +365,7 @@ inline flat_mutation_reader
|
||||
make_partition_snapshot_flat_reader(schema_ptr s,
|
||||
dht::decorated_key dk,
|
||||
query::clustering_key_filter_ranges crr,
|
||||
partition_snapshot_ptr snp,
|
||||
lw_shared_ptr<partition_snapshot> snp,
|
||||
bool digest_requested,
|
||||
logalloc::region& region,
|
||||
logalloc::allocating_section& read_section,
|
||||
|
||||
@@ -187,49 +187,23 @@ void merge_versions(const schema& s, mutation_partition& newer, mutation_partiti
|
||||
newer = std::move(older);
|
||||
}
|
||||
|
||||
stop_iteration partition_snapshot::merge_partition_versions() {
|
||||
void partition_snapshot::merge_partition_versions() {
|
||||
partition_version_ref& v = version();
|
||||
if (!v.is_unique_owner()) {
|
||||
// Shift _version to the oldest unreferenced version and then keep merging left hand side into it.
|
||||
// This is good for performance because in case we were at the latest version
|
||||
// we leave it for incoming writes and they don't have to create a new one.
|
||||
partition_version* current = &*v;
|
||||
while (current->next() && !current->next()->is_referenced()) {
|
||||
current = current->next();
|
||||
_version = partition_version_ref(*current);
|
||||
auto first_used = &*v;
|
||||
_version = { };
|
||||
while (first_used->prev() && !first_used->is_referenced()) {
|
||||
first_used = first_used->prev();
|
||||
}
|
||||
while (auto prev = current->prev()) {
|
||||
_region.allocator().invalidate_references();
|
||||
if (current->partition().apply_monotonically(*schema(), std::move(prev->partition()), _tracker, is_preemptible::yes) == stop_iteration::no) {
|
||||
return stop_iteration::no;
|
||||
}
|
||||
if (prev->is_referenced()) {
|
||||
_version.release();
|
||||
prev->back_reference() = partition_version_ref(*current, prev->back_reference().is_unique_owner());
|
||||
current_allocator().destroy(prev);
|
||||
return stop_iteration::yes;
|
||||
}
|
||||
current_allocator().destroy(prev);
|
||||
}
|
||||
}
|
||||
return stop_iteration::yes;
|
||||
}
|
||||
|
||||
stop_iteration partition_snapshot::slide_to_oldest() noexcept {
|
||||
partition_version_ref& v = version();
|
||||
if (v.is_unique_owner()) {
|
||||
return stop_iteration::yes;
|
||||
auto current = first_used->next();
|
||||
while (current && !current->is_referenced()) {
|
||||
auto next = current->next();
|
||||
merge_versions(*_schema, first_used->partition(), std::move(current->partition()), _tracker);
|
||||
current_allocator().destroy(current);
|
||||
current = next;
|
||||
}
|
||||
}
|
||||
if (_entry) {
|
||||
_entry->_snapshot = nullptr;
|
||||
_entry = nullptr;
|
||||
}
|
||||
partition_version* current = &*v;
|
||||
while (current->next() && !current->next()->is_referenced()) {
|
||||
current = current->next();
|
||||
_version = partition_version_ref(*current);
|
||||
}
|
||||
return current->prev() ? stop_iteration::no : stop_iteration::yes;
|
||||
}
|
||||
|
||||
unsigned partition_snapshot::version_count()
|
||||
@@ -489,13 +463,16 @@ coroutine partition_entry::apply_to_incomplete(const schema& s,
|
||||
bool can_move = !preemptible && !pe._snapshot;
|
||||
|
||||
auto src_snp = pe.read(reg, pe_cleaner, s.shared_from_this(), no_cache_tracker);
|
||||
partition_snapshot_ptr prev_snp;
|
||||
lw_shared_ptr<partition_snapshot> prev_snp;
|
||||
if (preemptible) {
|
||||
// Reads must see prev_snp until whole update completes so that writes
|
||||
// are not partially visible.
|
||||
prev_snp = read(reg, tracker.cleaner(), s.shared_from_this(), &tracker, phase - 1);
|
||||
}
|
||||
auto dst_snp = read(reg, tracker.cleaner(), s.shared_from_this(), &tracker, phase);
|
||||
auto merge_dst_snp = defer([preemptible, dst_snp, ®, &alloc] () mutable {
|
||||
maybe_merge_versions(dst_snp, reg, alloc);
|
||||
});
|
||||
|
||||
// Once we start updating the partition, we must keep all snapshots until the update completes,
|
||||
// otherwise partial writes would be published. So the scope of snapshots must enclose the scope
|
||||
@@ -503,6 +480,7 @@ coroutine partition_entry::apply_to_incomplete(const schema& s,
|
||||
// give the caller a chance to store the coroutine object. The code inside coroutine below
|
||||
// runs outside allocating section.
|
||||
return coroutine([&tracker, &s, &alloc, ®, &acc, can_move, preemptible,
|
||||
merge_dst_snp = std::move(merge_dst_snp), // needs to go away last so that dst_snp is not owned by anyone else
|
||||
cur = partition_snapshot_row_cursor(s, *dst_snp),
|
||||
src_cur = partition_snapshot_row_cursor(s, *src_snp, can_move),
|
||||
dst_snp = std::move(dst_snp),
|
||||
@@ -609,7 +587,7 @@ void partition_entry::upgrade(schema_ptr from, schema_ptr to, mutation_cleaner&
|
||||
remove_or_mark_as_unique_owner(old_version, &cleaner);
|
||||
}
|
||||
|
||||
partition_snapshot_ptr partition_entry::read(logalloc::region& r,
|
||||
lw_shared_ptr<partition_snapshot> partition_entry::read(logalloc::region& r,
|
||||
mutation_cleaner& cleaner, schema_ptr entry_schema, cache_tracker* tracker, partition_snapshot::phase_type phase)
|
||||
{
|
||||
if (_snapshot) {
|
||||
@@ -632,7 +610,7 @@ partition_snapshot_ptr partition_entry::read(logalloc::region& r,
|
||||
|
||||
auto snp = make_lw_shared<partition_snapshot>(entry_schema, r, cleaner, this, tracker, phase);
|
||||
_snapshot = snp.get();
|
||||
return partition_snapshot_ptr(std::move(snp));
|
||||
return snp;
|
||||
}
|
||||
|
||||
std::vector<range_tombstone>
|
||||
@@ -696,13 +674,3 @@ void partition_entry::evict(mutation_cleaner& cleaner) noexcept {
|
||||
remove_or_mark_as_unique_owner(v, &cleaner);
|
||||
}
|
||||
}
|
||||
|
||||
partition_snapshot_ptr::~partition_snapshot_ptr() {
|
||||
if (_snp) {
|
||||
auto&& cleaner = _snp->cleaner();
|
||||
auto snp = _snp.release();
|
||||
if (snp) {
|
||||
cleaner.merge_and_destroy(*snp.release());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -28,7 +28,6 @@
|
||||
#include "utils/coroutine.hh"
|
||||
|
||||
#include <boost/intrusive/parent_from_member.hpp>
|
||||
#include <boost/intrusive/slist.hpp>
|
||||
|
||||
// This is MVCC implementation for mutation_partitions.
|
||||
//
|
||||
@@ -189,9 +188,8 @@ class partition_version_ref {
|
||||
friend class partition_version;
|
||||
public:
|
||||
partition_version_ref() = default;
|
||||
explicit partition_version_ref(partition_version& pv, bool unique_owner = false) noexcept
|
||||
explicit partition_version_ref(partition_version& pv) noexcept
|
||||
: _version(&pv)
|
||||
, _unique_owner(unique_owner)
|
||||
{
|
||||
assert(!_version->_backref);
|
||||
_version->_backref = this;
|
||||
@@ -302,9 +300,8 @@ private:
|
||||
logalloc::region& _region;
|
||||
mutation_cleaner& _cleaner;
|
||||
cache_tracker* _tracker;
|
||||
boost::intrusive::slist_member_hook<> _cleaner_hook;
|
||||
|
||||
friend class partition_entry;
|
||||
friend class mutation_cleaner_impl;
|
||||
public:
|
||||
explicit partition_snapshot(schema_ptr s,
|
||||
logalloc::region& region,
|
||||
@@ -332,17 +329,10 @@ public:
|
||||
return container_of(v._backref);
|
||||
}
|
||||
|
||||
// If possible, merges the version pointed to by this snapshot with
|
||||
// If possible merges the version pointed to by this snapshot with
|
||||
// adjacent partition versions. Leaves the snapshot in an unspecified state.
|
||||
// Can be retried if previous merge attempt has failed.
|
||||
stop_iteration merge_partition_versions();
|
||||
|
||||
// Prepares the snapshot for cleaning by moving to the right-most unreferenced version.
|
||||
// Returns stop_iteration::yes if there is nothing to merge with and the snapshot
|
||||
// should be collected right away, and stop_iteration::no otherwise.
|
||||
// When returns stop_iteration::no, the snapshots is guaranteed to not be attached
|
||||
// to the latest version.
|
||||
stop_iteration slide_to_oldest() noexcept;
|
||||
void merge_partition_versions();
|
||||
|
||||
~partition_snapshot();
|
||||
|
||||
@@ -367,7 +357,6 @@ public:
|
||||
const schema_ptr& schema() const { return _schema; }
|
||||
logalloc::region& region() const { return _region; }
|
||||
cache_tracker* tracker() const { return _tracker; }
|
||||
mutation_cleaner& cleaner() { return _cleaner; }
|
||||
|
||||
tombstone partition_tombstone() const;
|
||||
::static_row static_row(bool digest_requested) const;
|
||||
@@ -379,36 +368,6 @@ public:
|
||||
std::vector<range_tombstone> range_tombstones();
|
||||
};
|
||||
|
||||
class partition_snapshot_ptr {
|
||||
lw_shared_ptr<partition_snapshot> _snp;
|
||||
public:
|
||||
using value_type = partition_snapshot;
|
||||
partition_snapshot_ptr() = default;
|
||||
partition_snapshot_ptr(partition_snapshot_ptr&&) = default;
|
||||
partition_snapshot_ptr(const partition_snapshot_ptr&) = default;
|
||||
partition_snapshot_ptr(lw_shared_ptr<partition_snapshot> snp) : _snp(std::move(snp)) {}
|
||||
~partition_snapshot_ptr();
|
||||
partition_snapshot_ptr& operator=(partition_snapshot_ptr&& other) noexcept {
|
||||
if (this != &other) {
|
||||
this->~partition_snapshot_ptr();
|
||||
new (this) partition_snapshot_ptr(std::move(other));
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
partition_snapshot_ptr& operator=(const partition_snapshot_ptr& other) noexcept {
|
||||
if (this != &other) {
|
||||
this->~partition_snapshot_ptr();
|
||||
new (this) partition_snapshot_ptr(other);
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
partition_snapshot& operator*() { return *_snp; }
|
||||
const partition_snapshot& operator*() const { return *_snp; }
|
||||
partition_snapshot* operator->() { return &*_snp; }
|
||||
const partition_snapshot* operator->() const { return &*_snp; }
|
||||
explicit operator bool() const { return bool(_snp); }
|
||||
};
|
||||
|
||||
class real_dirty_memory_accounter;
|
||||
|
||||
// Represents mutation_partition with snapshotting support a la MVCC.
|
||||
@@ -564,7 +523,7 @@ public:
|
||||
void upgrade(schema_ptr from, schema_ptr to, mutation_cleaner&, cache_tracker*);
|
||||
|
||||
// Snapshots with different values of phase will point to different partition_version objects.
|
||||
partition_snapshot_ptr read(logalloc::region& region,
|
||||
lw_shared_ptr<partition_snapshot> read(logalloc::region& region,
|
||||
mutation_cleaner&,
|
||||
schema_ptr entry_schema,
|
||||
cache_tracker*,
|
||||
|
||||
@@ -121,7 +121,7 @@ public:
|
||||
position_in_partition_view(const clustering_key_prefix& ck)
|
||||
: _type(partition_region::clustered), _ck(&ck) { }
|
||||
position_in_partition_view(range_tag_t, bound_view bv)
|
||||
: _type(partition_region::clustered), _bound_weight(position_weight(bv.kind())), _ck(&bv.prefix()) { }
|
||||
: _type(partition_region::clustered), _bound_weight(position_weight(bv.kind)), _ck(&bv.prefix) { }
|
||||
|
||||
static position_in_partition_view for_range_start(const query::clustering_range& r) {
|
||||
return {position_in_partition_view::range_tag_t(), bound_view::from_range_start(r)};
|
||||
@@ -214,7 +214,7 @@ public:
|
||||
position_in_partition(before_clustering_row_tag_t, clustering_key_prefix ck)
|
||||
: _type(partition_region::clustered), _bound_weight(-1), _ck(std::move(ck)) { }
|
||||
position_in_partition(range_tag_t, bound_view bv)
|
||||
: _type(partition_region::clustered), _bound_weight(position_weight(bv.kind())), _ck(bv.prefix()) { }
|
||||
: _type(partition_region::clustered), _bound_weight(position_weight(bv.kind)), _ck(bv.prefix) { }
|
||||
position_in_partition(after_static_row_tag_t) :
|
||||
position_in_partition(range_tag_t(), bound_view::bottom()) { }
|
||||
explicit position_in_partition(position_in_partition_view view)
|
||||
@@ -273,6 +273,11 @@ public:
|
||||
return is_partition_end() || (_ck && _ck->is_empty(s) && _bound_weight > 0);
|
||||
}
|
||||
|
||||
bool is_before_all_clustered_rows(const schema& s) const {
|
||||
return _type < partition_region::clustered
|
||||
|| (_type == partition_region::clustered && _ck->is_empty(s) && _bound_weight < 0);
|
||||
}
|
||||
|
||||
template<typename Hasher>
|
||||
void feed_hash(Hasher& hasher, const schema& s) const {
|
||||
::feed_hash(hasher, _bound_weight);
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user