mirror of
https://github.com/scylladb/scylladb.git
synced 2026-04-20 08:30:35 +00:00
Compare commits
174 Commits
copilot/fi
...
next-2.3
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
b252bba4a2 | ||
|
|
a0b9fcc041 | ||
|
|
35c9b675c1 | ||
|
|
d71836fef7 | ||
|
|
f8e150e97c | ||
|
|
10c300f894 | ||
|
|
de1d3e5c6b | ||
|
|
69810c13ca | ||
|
|
9b025a5742 | ||
|
|
74eebc4cab | ||
|
|
9b2ca4ee44 | ||
|
|
773bf45774 | ||
|
|
c6705b4335 | ||
|
|
3997871b4d | ||
|
|
4ff1d731bd | ||
|
|
0e0f9143c9 | ||
|
|
9d809d6ea4 | ||
|
|
630d599c34 | ||
|
|
0933c1a00a | ||
|
|
7a7099fcfb | ||
|
|
50235aacb4 | ||
|
|
e888009f12 | ||
|
|
a19615ee9b | ||
|
|
357ca67fda | ||
|
|
7818c63eb1 | ||
|
|
da10eae18c | ||
|
|
d5292cd3ec | ||
|
|
9cb35361d9 | ||
|
|
3e285248be | ||
|
|
6f10ccb441 | ||
|
|
df420499bc | ||
|
|
d29527b4e1 | ||
|
|
8a90e242e4 | ||
|
|
8a78c0aba9 | ||
|
|
8a2bbcf138 | ||
|
|
22c891e6df | ||
|
|
1841d0c2d9 | ||
|
|
e10107fe5a | ||
|
|
0b3a4679db | ||
|
|
ba60d666a9 | ||
|
|
6ea4d0b75c | ||
|
|
8c5911f312 | ||
|
|
de00d7f5a1 | ||
|
|
e5f9dae4bb | ||
|
|
e13e796290 | ||
|
|
336c771663 | ||
|
|
82968afc25 | ||
|
|
383dcffb53 | ||
|
|
0c2abc007c | ||
|
|
1498c4f150 | ||
|
|
f388992a94 | ||
|
|
310540c11f | ||
|
|
7d833023cc | ||
|
|
d94ac196e0 | ||
|
|
1d7430995e | ||
|
|
b662a7f8a4 | ||
|
|
447ad72882 | ||
|
|
b8485d3bce | ||
|
|
034b0f50db | ||
|
|
12ec0becf3 | ||
|
|
666b19552d | ||
|
|
178f870a03 | ||
|
|
1b18f16dc1 | ||
|
|
28934575e4 | ||
|
|
182cbeefb0 | ||
|
|
b70fc41a90 | ||
|
|
debfc795b2 | ||
|
|
0d094575ec | ||
|
|
20baef69a9 | ||
|
|
1bac88601d | ||
|
|
e581fd1463 | ||
|
|
b366bff998 | ||
|
|
38e6984ba5 | ||
|
|
332f76579e | ||
|
|
315a03cf6c | ||
|
|
1847dc7a6a | ||
|
|
dd11b5987e | ||
|
|
a134e8699a | ||
|
|
bd7dcbb8d2 | ||
|
|
74e61528a6 | ||
|
|
5eb4fde2d5 | ||
|
|
cc0703f8ca | ||
|
|
678283a5bb | ||
|
|
552c0d7641 | ||
|
|
860c06660b | ||
|
|
db733ba075 | ||
|
|
88677d39c8 | ||
|
|
d767dee5ec | ||
|
|
702f6ee1b7 | ||
|
|
473b9aec65 | ||
|
|
b548061257 | ||
|
|
01165a9ae7 | ||
|
|
5cdb963768 | ||
|
|
7c9b9a4e24 | ||
|
|
f475c65ae6 | ||
|
|
687372bc48 | ||
|
|
65c140121c | ||
|
|
ed68ad220f | ||
|
|
35f4b8fbbe | ||
|
|
48012fe418 | ||
|
|
c862ccda91 | ||
|
|
83b1057c4b | ||
|
|
c1cb779dd2 | ||
|
|
b47d18f9fd | ||
|
|
f8713b019e | ||
|
|
cd5e4eace5 | ||
|
|
4fb5403670 | ||
|
|
e9df6c42ce | ||
|
|
5fdf492ccc | ||
|
|
fd2b02a12c | ||
|
|
f8cec2f891 | ||
|
|
e4d6577ef2 | ||
|
|
346027248d | ||
|
|
2cf6191353 | ||
|
|
b52d647de2 | ||
|
|
f7c96a37f1 | ||
|
|
ae71ffdcfd | ||
|
|
a235900388 | ||
|
|
be9f150341 | ||
|
|
2478fa1f6e | ||
|
|
d95ac1826e | ||
|
|
6fc17345e9 | ||
|
|
4bfa0ae247 | ||
|
|
174b7870e6 | ||
|
|
e95b4ee825 | ||
|
|
464305de1c | ||
|
|
3a1a9e1a11 | ||
|
|
90dac5d944 | ||
|
|
e5a83d105c | ||
|
|
9b4a0a2879 | ||
|
|
adad12ddc3 | ||
|
|
a77bb1fe34 | ||
|
|
3c7e6dfdb9 | ||
|
|
fab136ae1d | ||
|
|
a4218f536b | ||
|
|
9f4431ef04 | ||
|
|
66250bf8cc | ||
|
|
88fe3c2694 | ||
|
|
db4c3d3e52 | ||
|
|
ca22a1cd1a | ||
|
|
f9b702764e | ||
|
|
54701bd95c | ||
|
|
30eca5f534 | ||
|
|
cd057d3882 | ||
|
|
c5a5a2265e | ||
|
|
3e482c6c9d | ||
|
|
5b6cadb890 | ||
|
|
9cf8cd6c02 | ||
|
|
b34567b69b | ||
|
|
02b763ed97 | ||
|
|
05500a52d7 | ||
|
|
4afa558e97 | ||
|
|
f3956421f7 | ||
|
|
a17a6ce8f5 | ||
|
|
58a362c1f2 | ||
|
|
361b2dd7a5 | ||
|
|
f6a2bafae2 | ||
|
|
2ec25a55cd | ||
|
|
d3fb7c5515 | ||
|
|
b1ac6a36f2 | ||
|
|
8cba125bce | ||
|
|
f46f9f7533 | ||
|
|
090d991f8e | ||
|
|
ae15a80d01 | ||
|
|
6cf902343a | ||
|
|
d5e59f671c | ||
|
|
38944655c5 | ||
|
|
06e274ff34 | ||
|
|
c24d4a8acb | ||
|
|
5f95b76c65 | ||
|
|
0bdb7e1e7c | ||
|
|
56ea4f3154 | ||
|
|
d9c178063c | ||
|
|
b21b7f73b9 |
2
.gitmodules
vendored
2
.gitmodules
vendored
@@ -1,6 +1,6 @@
|
|||||||
[submodule "seastar"]
|
[submodule "seastar"]
|
||||||
path = seastar
|
path = seastar
|
||||||
url = ../seastar
|
url = ../scylla-seastar
|
||||||
ignore = dirty
|
ignore = dirty
|
||||||
[submodule "swagger-ui"]
|
[submodule "swagger-ui"]
|
||||||
path = swagger-ui
|
path = swagger-ui
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
#!/bin/sh
|
#!/bin/sh
|
||||||
|
|
||||||
VERSION=666.development
|
VERSION=2.3.6
|
||||||
|
|
||||||
if test -f version
|
if test -f version
|
||||||
then
|
then
|
||||||
|
|||||||
@@ -2228,11 +2228,11 @@
|
|||||||
"description":"The column family"
|
"description":"The column family"
|
||||||
},
|
},
|
||||||
"total":{
|
"total":{
|
||||||
"type":"int",
|
"type":"long",
|
||||||
"description":"The total snapshot size"
|
"description":"The total snapshot size"
|
||||||
},
|
},
|
||||||
"live":{
|
"live":{
|
||||||
"type":"int",
|
"type":"long",
|
||||||
"description":"The live snapshot size"
|
"description":"The live snapshot size"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -187,7 +187,24 @@ size_t atomic_cell_or_collection::external_memory_usage(const abstract_type& t)
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
auto ctx = data::cell::context(_data.get(), t.imr_state().type_info());
|
auto ctx = data::cell::context(_data.get(), t.imr_state().type_info());
|
||||||
return data::cell::structure::serialized_object_size(_data.get(), ctx);
|
|
||||||
|
auto view = data::cell::structure::make_view(_data.get(), ctx);
|
||||||
|
auto flags = view.get<data::cell::tags::flags>();
|
||||||
|
|
||||||
|
size_t external_value_size = 0;
|
||||||
|
if (flags.get<data::cell::tags::external_data>()) {
|
||||||
|
if (flags.get<data::cell::tags::collection>()) {
|
||||||
|
external_value_size = get_collection_mutation_view(_data.get()).data.size_bytes();
|
||||||
|
} else {
|
||||||
|
auto cell_view = data::cell::atomic_cell_view(t.imr_state().type_info(), view);
|
||||||
|
external_value_size = cell_view.value_size();
|
||||||
|
}
|
||||||
|
// Add overhead of chunk headers. The last one is a special case.
|
||||||
|
external_value_size += (external_value_size - 1) / data::cell::maximum_external_chunk_length * data::cell::external_chunk_overhead;
|
||||||
|
external_value_size += data::cell::external_last_chunk_overhead;
|
||||||
|
}
|
||||||
|
return data::cell::structure::serialized_object_size(_data.get(), ctx)
|
||||||
|
+ imr_object_type::size_overhead + external_value_size;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::ostream& operator<<(std::ostream& os, const atomic_cell_or_collection& c) {
|
std::ostream& operator<<(std::ostream& os, const atomic_cell_or_collection& c) {
|
||||||
|
|||||||
@@ -28,6 +28,7 @@
|
|||||||
#include "database.hh"
|
#include "database.hh"
|
||||||
#include "schema_builder.hh"
|
#include "schema_builder.hh"
|
||||||
#include "service/migration_manager.hh"
|
#include "service/migration_manager.hh"
|
||||||
|
#include "timeout_config.hh"
|
||||||
|
|
||||||
namespace auth {
|
namespace auth {
|
||||||
|
|
||||||
@@ -94,4 +95,10 @@ future<> wait_for_schema_agreement(::service::migration_manager& mm, const datab
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const timeout_config& internal_distributed_timeout_config() noexcept {
|
||||||
|
static const auto t = 5s;
|
||||||
|
static const timeout_config tc{t, t, t, t, t, t, t};
|
||||||
|
return tc;
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -38,6 +38,7 @@
|
|||||||
using namespace std::chrono_literals;
|
using namespace std::chrono_literals;
|
||||||
|
|
||||||
class database;
|
class database;
|
||||||
|
class timeout_config;
|
||||||
|
|
||||||
namespace service {
|
namespace service {
|
||||||
class migration_manager;
|
class migration_manager;
|
||||||
@@ -82,4 +83,9 @@ future<> create_metadata_table_if_missing(
|
|||||||
|
|
||||||
future<> wait_for_schema_agreement(::service::migration_manager&, const database&);
|
future<> wait_for_schema_agreement(::service::migration_manager&, const database&);
|
||||||
|
|
||||||
|
///
|
||||||
|
/// Time-outs for internal, non-local CQL queries.
|
||||||
|
///
|
||||||
|
const timeout_config& internal_distributed_timeout_config() noexcept;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -228,7 +228,7 @@ default_authorizer::modify(
|
|||||||
return _qp.process(
|
return _qp.process(
|
||||||
query,
|
query,
|
||||||
db::consistency_level::ONE,
|
db::consistency_level::ONE,
|
||||||
infinite_timeout_config,
|
internal_distributed_timeout_config(),
|
||||||
{permissions::to_strings(set), sstring(role_name), resource.name()}).discard_result();
|
{permissions::to_strings(set), sstring(role_name), resource.name()}).discard_result();
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
@@ -254,7 +254,7 @@ future<std::vector<permission_details>> default_authorizer::list_all() const {
|
|||||||
return _qp.process(
|
return _qp.process(
|
||||||
query,
|
query,
|
||||||
db::consistency_level::ONE,
|
db::consistency_level::ONE,
|
||||||
infinite_timeout_config,
|
internal_distributed_timeout_config(),
|
||||||
{},
|
{},
|
||||||
true).then([](::shared_ptr<cql3::untyped_result_set> results) {
|
true).then([](::shared_ptr<cql3::untyped_result_set> results) {
|
||||||
std::vector<permission_details> all_details;
|
std::vector<permission_details> all_details;
|
||||||
@@ -282,7 +282,7 @@ future<> default_authorizer::revoke_all(stdx::string_view role_name) const {
|
|||||||
return _qp.process(
|
return _qp.process(
|
||||||
query,
|
query,
|
||||||
db::consistency_level::ONE,
|
db::consistency_level::ONE,
|
||||||
infinite_timeout_config,
|
internal_distributed_timeout_config(),
|
||||||
{sstring(role_name)}).discard_result().handle_exception([role_name](auto ep) {
|
{sstring(role_name)}).discard_result().handle_exception([role_name](auto ep) {
|
||||||
try {
|
try {
|
||||||
std::rethrow_exception(ep);
|
std::rethrow_exception(ep);
|
||||||
|
|||||||
@@ -149,7 +149,9 @@ static sstring gensalt() {
|
|||||||
// blowfish 2011 fix, blowfish, sha512, sha256, md5
|
// blowfish 2011 fix, blowfish, sha512, sha256, md5
|
||||||
for (sstring pfx : { "$2y$", "$2a$", "$6$", "$5$", "$1$" }) {
|
for (sstring pfx : { "$2y$", "$2a$", "$6$", "$5$", "$1$" }) {
|
||||||
salt = pfx + input;
|
salt = pfx + input;
|
||||||
if (crypt_r("fisk", salt.c_str(), &tlcrypt)) {
|
const char* e = crypt_r("fisk", salt.c_str(), &tlcrypt);
|
||||||
|
|
||||||
|
if (e && (e[0] != '*')) {
|
||||||
prefix = pfx;
|
prefix = pfx;
|
||||||
return salt;
|
return salt;
|
||||||
}
|
}
|
||||||
@@ -184,7 +186,7 @@ future<> password_authenticator::migrate_legacy_metadata() const {
|
|||||||
return _qp.process(
|
return _qp.process(
|
||||||
query,
|
query,
|
||||||
db::consistency_level::QUORUM,
|
db::consistency_level::QUORUM,
|
||||||
infinite_timeout_config).then([this](::shared_ptr<cql3::untyped_result_set> results) {
|
internal_distributed_timeout_config()).then([this](::shared_ptr<cql3::untyped_result_set> results) {
|
||||||
return do_for_each(*results, [this](const cql3::untyped_result_set_row& row) {
|
return do_for_each(*results, [this](const cql3::untyped_result_set_row& row) {
|
||||||
auto username = row.get_as<sstring>("username");
|
auto username = row.get_as<sstring>("username");
|
||||||
auto salted_hash = row.get_as<sstring>(SALTED_HASH);
|
auto salted_hash = row.get_as<sstring>(SALTED_HASH);
|
||||||
@@ -192,7 +194,7 @@ future<> password_authenticator::migrate_legacy_metadata() const {
|
|||||||
return _qp.process(
|
return _qp.process(
|
||||||
update_row_query,
|
update_row_query,
|
||||||
consistency_for_user(username),
|
consistency_for_user(username),
|
||||||
infinite_timeout_config,
|
internal_distributed_timeout_config(),
|
||||||
{std::move(salted_hash), username}).discard_result();
|
{std::move(salted_hash), username}).discard_result();
|
||||||
}).finally([results] {});
|
}).finally([results] {});
|
||||||
}).then([] {
|
}).then([] {
|
||||||
@@ -209,7 +211,7 @@ future<> password_authenticator::create_default_if_missing() const {
|
|||||||
return _qp.process(
|
return _qp.process(
|
||||||
update_row_query,
|
update_row_query,
|
||||||
db::consistency_level::QUORUM,
|
db::consistency_level::QUORUM,
|
||||||
infinite_timeout_config,
|
internal_distributed_timeout_config(),
|
||||||
{hashpw(DEFAULT_USER_PASSWORD), DEFAULT_USER_NAME}).then([](auto&&) {
|
{hashpw(DEFAULT_USER_PASSWORD), DEFAULT_USER_NAME}).then([](auto&&) {
|
||||||
plogger.info("Created default superuser authentication record.");
|
plogger.info("Created default superuser authentication record.");
|
||||||
});
|
});
|
||||||
@@ -309,13 +311,17 @@ future<authenticated_user> password_authenticator::authenticate(
|
|||||||
return _qp.process(
|
return _qp.process(
|
||||||
query,
|
query,
|
||||||
consistency_for_user(username),
|
consistency_for_user(username),
|
||||||
infinite_timeout_config,
|
internal_distributed_timeout_config(),
|
||||||
{username},
|
{username},
|
||||||
true);
|
true);
|
||||||
}).then_wrapped([=](future<::shared_ptr<cql3::untyped_result_set>> f) {
|
}).then_wrapped([=](future<::shared_ptr<cql3::untyped_result_set>> f) {
|
||||||
try {
|
try {
|
||||||
auto res = f.get0();
|
auto res = f.get0();
|
||||||
if (res->empty() || !checkpw(password, res->one().get_as<sstring>(SALTED_HASH))) {
|
auto salted_hash = std::experimental::optional<sstring>();
|
||||||
|
if (!res->empty()) {
|
||||||
|
salted_hash = res->one().get_opt<sstring>(SALTED_HASH);
|
||||||
|
}
|
||||||
|
if (!salted_hash || !checkpw(password, *salted_hash)) {
|
||||||
throw exceptions::authentication_exception("Username and/or password are incorrect");
|
throw exceptions::authentication_exception("Username and/or password are incorrect");
|
||||||
}
|
}
|
||||||
return make_ready_future<authenticated_user>(username);
|
return make_ready_future<authenticated_user>(username);
|
||||||
@@ -337,7 +343,7 @@ future<> password_authenticator::create(stdx::string_view role_name, const authe
|
|||||||
return _qp.process(
|
return _qp.process(
|
||||||
update_row_query,
|
update_row_query,
|
||||||
consistency_for_user(role_name),
|
consistency_for_user(role_name),
|
||||||
infinite_timeout_config,
|
internal_distributed_timeout_config(),
|
||||||
{hashpw(*options.password), sstring(role_name)}).discard_result();
|
{hashpw(*options.password), sstring(role_name)}).discard_result();
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -355,7 +361,7 @@ future<> password_authenticator::alter(stdx::string_view role_name, const authen
|
|||||||
return _qp.process(
|
return _qp.process(
|
||||||
query,
|
query,
|
||||||
consistency_for_user(role_name),
|
consistency_for_user(role_name),
|
||||||
infinite_timeout_config,
|
internal_distributed_timeout_config(),
|
||||||
{hashpw(*options.password), sstring(role_name)}).discard_result();
|
{hashpw(*options.password), sstring(role_name)}).discard_result();
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -366,7 +372,10 @@ future<> password_authenticator::drop(stdx::string_view name) const {
|
|||||||
meta::roles_table::qualified_name(),
|
meta::roles_table::qualified_name(),
|
||||||
meta::roles_table::role_col_name);
|
meta::roles_table::role_col_name);
|
||||||
|
|
||||||
return _qp.process(query, consistency_for_user(name), infinite_timeout_config, {sstring(name)}).discard_result();
|
return _qp.process(
|
||||||
|
query, consistency_for_user(name),
|
||||||
|
internal_distributed_timeout_config(),
|
||||||
|
{sstring(name)}).discard_result();
|
||||||
}
|
}
|
||||||
|
|
||||||
future<custom_options> password_authenticator::query_custom_options(stdx::string_view role_name) const {
|
future<custom_options> password_authenticator::query_custom_options(stdx::string_view role_name) const {
|
||||||
|
|||||||
@@ -79,7 +79,7 @@ future<bool> default_role_row_satisfies(
|
|||||||
return qp.process(
|
return qp.process(
|
||||||
query,
|
query,
|
||||||
db::consistency_level::QUORUM,
|
db::consistency_level::QUORUM,
|
||||||
infinite_timeout_config,
|
internal_distributed_timeout_config(),
|
||||||
{meta::DEFAULT_SUPERUSER_NAME},
|
{meta::DEFAULT_SUPERUSER_NAME},
|
||||||
true).then([&p](::shared_ptr<cql3::untyped_result_set> results) {
|
true).then([&p](::shared_ptr<cql3::untyped_result_set> results) {
|
||||||
if (results->empty()) {
|
if (results->empty()) {
|
||||||
@@ -104,7 +104,7 @@ future<bool> any_nondefault_role_row_satisfies(
|
|||||||
return qp.process(
|
return qp.process(
|
||||||
query,
|
query,
|
||||||
db::consistency_level::QUORUM,
|
db::consistency_level::QUORUM,
|
||||||
infinite_timeout_config).then([&p](::shared_ptr<cql3::untyped_result_set> results) {
|
internal_distributed_timeout_config()).then([&p](::shared_ptr<cql3::untyped_result_set> results) {
|
||||||
if (results->empty()) {
|
if (results->empty()) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -196,6 +196,10 @@ future<> service::start() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
future<> service::stop() {
|
future<> service::stop() {
|
||||||
|
// Only one of the shards has the listener registered, but let's try to
|
||||||
|
// unregister on each one just to make sure.
|
||||||
|
_migration_manager.unregister_listener(_migration_listener.get());
|
||||||
|
|
||||||
return _permissions_cache->stop().then([this] {
|
return _permissions_cache->stop().then([this] {
|
||||||
return when_all_succeed(_role_manager->stop(), _authorizer->stop(), _authenticator->stop());
|
return when_all_succeed(_role_manager->stop(), _authorizer->stop(), _authenticator->stop());
|
||||||
});
|
});
|
||||||
|
|||||||
@@ -89,7 +89,7 @@ static future<stdx::optional<record>> find_record(cql3::query_processor& qp, std
|
|||||||
return qp.process(
|
return qp.process(
|
||||||
query,
|
query,
|
||||||
consistency_for_role(role_name),
|
consistency_for_role(role_name),
|
||||||
infinite_timeout_config,
|
internal_distributed_timeout_config(),
|
||||||
{sstring(role_name)},
|
{sstring(role_name)},
|
||||||
true).then([](::shared_ptr<cql3::untyped_result_set> results) {
|
true).then([](::shared_ptr<cql3::untyped_result_set> results) {
|
||||||
if (results->empty()) {
|
if (results->empty()) {
|
||||||
@@ -174,7 +174,7 @@ future<> standard_role_manager::create_default_role_if_missing() const {
|
|||||||
return _qp.process(
|
return _qp.process(
|
||||||
query,
|
query,
|
||||||
db::consistency_level::QUORUM,
|
db::consistency_level::QUORUM,
|
||||||
infinite_timeout_config,
|
internal_distributed_timeout_config(),
|
||||||
{meta::DEFAULT_SUPERUSER_NAME}).then([](auto&&) {
|
{meta::DEFAULT_SUPERUSER_NAME}).then([](auto&&) {
|
||||||
log.info("Created default superuser role '{}'.", meta::DEFAULT_SUPERUSER_NAME);
|
log.info("Created default superuser role '{}'.", meta::DEFAULT_SUPERUSER_NAME);
|
||||||
return make_ready_future<>();
|
return make_ready_future<>();
|
||||||
@@ -201,7 +201,7 @@ future<> standard_role_manager::migrate_legacy_metadata() const {
|
|||||||
return _qp.process(
|
return _qp.process(
|
||||||
query,
|
query,
|
||||||
db::consistency_level::QUORUM,
|
db::consistency_level::QUORUM,
|
||||||
infinite_timeout_config).then([this](::shared_ptr<cql3::untyped_result_set> results) {
|
internal_distributed_timeout_config()).then([this](::shared_ptr<cql3::untyped_result_set> results) {
|
||||||
return do_for_each(*results, [this](const cql3::untyped_result_set_row& row) {
|
return do_for_each(*results, [this](const cql3::untyped_result_set_row& row) {
|
||||||
role_config config;
|
role_config config;
|
||||||
config.is_superuser = row.get_as<bool>("super");
|
config.is_superuser = row.get_as<bool>("super");
|
||||||
@@ -263,7 +263,7 @@ future<> standard_role_manager::create_or_replace(stdx::string_view role_name, c
|
|||||||
return _qp.process(
|
return _qp.process(
|
||||||
query,
|
query,
|
||||||
consistency_for_role(role_name),
|
consistency_for_role(role_name),
|
||||||
infinite_timeout_config,
|
internal_distributed_timeout_config(),
|
||||||
{sstring(role_name), c.is_superuser, c.can_login},
|
{sstring(role_name), c.is_superuser, c.can_login},
|
||||||
true).discard_result();
|
true).discard_result();
|
||||||
}
|
}
|
||||||
@@ -307,7 +307,7 @@ standard_role_manager::alter(stdx::string_view role_name, const role_config_upda
|
|||||||
build_column_assignments(u),
|
build_column_assignments(u),
|
||||||
meta::roles_table::role_col_name),
|
meta::roles_table::role_col_name),
|
||||||
consistency_for_role(role_name),
|
consistency_for_role(role_name),
|
||||||
infinite_timeout_config,
|
internal_distributed_timeout_config(),
|
||||||
{sstring(role_name)}).discard_result();
|
{sstring(role_name)}).discard_result();
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
@@ -327,7 +327,7 @@ future<> standard_role_manager::drop(stdx::string_view role_name) const {
|
|||||||
return _qp.process(
|
return _qp.process(
|
||||||
query,
|
query,
|
||||||
consistency_for_role(role_name),
|
consistency_for_role(role_name),
|
||||||
infinite_timeout_config,
|
internal_distributed_timeout_config(),
|
||||||
{sstring(role_name)}).then([this, role_name](::shared_ptr<cql3::untyped_result_set> members) {
|
{sstring(role_name)}).then([this, role_name](::shared_ptr<cql3::untyped_result_set> members) {
|
||||||
return parallel_for_each(
|
return parallel_for_each(
|
||||||
members->begin(),
|
members->begin(),
|
||||||
@@ -367,7 +367,7 @@ future<> standard_role_manager::drop(stdx::string_view role_name) const {
|
|||||||
return _qp.process(
|
return _qp.process(
|
||||||
query,
|
query,
|
||||||
consistency_for_role(role_name),
|
consistency_for_role(role_name),
|
||||||
infinite_timeout_config,
|
internal_distributed_timeout_config(),
|
||||||
{sstring(role_name)}).discard_result();
|
{sstring(role_name)}).discard_result();
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -394,7 +394,7 @@ standard_role_manager::modify_membership(
|
|||||||
return _qp.process(
|
return _qp.process(
|
||||||
query,
|
query,
|
||||||
consistency_for_role(grantee_name),
|
consistency_for_role(grantee_name),
|
||||||
infinite_timeout_config,
|
internal_distributed_timeout_config(),
|
||||||
{role_set{sstring(role_name)}, sstring(grantee_name)}).discard_result();
|
{role_set{sstring(role_name)}, sstring(grantee_name)}).discard_result();
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -406,7 +406,7 @@ standard_role_manager::modify_membership(
|
|||||||
"INSERT INTO %s (role, member) VALUES (?, ?)",
|
"INSERT INTO %s (role, member) VALUES (?, ?)",
|
||||||
meta::role_members_table::qualified_name()),
|
meta::role_members_table::qualified_name()),
|
||||||
consistency_for_role(role_name),
|
consistency_for_role(role_name),
|
||||||
infinite_timeout_config,
|
internal_distributed_timeout_config(),
|
||||||
{sstring(role_name), sstring(grantee_name)}).discard_result();
|
{sstring(role_name), sstring(grantee_name)}).discard_result();
|
||||||
|
|
||||||
case membership_change::remove:
|
case membership_change::remove:
|
||||||
@@ -415,7 +415,7 @@ standard_role_manager::modify_membership(
|
|||||||
"DELETE FROM %s WHERE role = ? AND member = ?",
|
"DELETE FROM %s WHERE role = ? AND member = ?",
|
||||||
meta::role_members_table::qualified_name()),
|
meta::role_members_table::qualified_name()),
|
||||||
consistency_for_role(role_name),
|
consistency_for_role(role_name),
|
||||||
infinite_timeout_config,
|
internal_distributed_timeout_config(),
|
||||||
{sstring(role_name), sstring(grantee_name)}).discard_result();
|
{sstring(role_name), sstring(grantee_name)}).discard_result();
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -516,7 +516,10 @@ future<role_set> standard_role_manager::query_all() const {
|
|||||||
// To avoid many copies of a view.
|
// To avoid many copies of a view.
|
||||||
static const auto role_col_name_string = sstring(meta::roles_table::role_col_name);
|
static const auto role_col_name_string = sstring(meta::roles_table::role_col_name);
|
||||||
|
|
||||||
return _qp.process(query, db::consistency_level::QUORUM, infinite_timeout_config).then([](::shared_ptr<cql3::untyped_result_set> results) {
|
return _qp.process(
|
||||||
|
query,
|
||||||
|
db::consistency_level::QUORUM,
|
||||||
|
internal_distributed_timeout_config()).then([](::shared_ptr<cql3::untyped_result_set> results) {
|
||||||
role_set roles;
|
role_set roles;
|
||||||
|
|
||||||
std::transform(
|
std::transform(
|
||||||
|
|||||||
@@ -60,6 +60,7 @@ class cache_flat_mutation_reader final : public flat_mutation_reader::impl {
|
|||||||
// - _next_row_in_range = _next.position() < _upper_bound
|
// - _next_row_in_range = _next.position() < _upper_bound
|
||||||
// - _last_row points at a direct predecessor of the next row which is going to be read.
|
// - _last_row points at a direct predecessor of the next row which is going to be read.
|
||||||
// Used for populating continuity.
|
// Used for populating continuity.
|
||||||
|
// - _population_range_starts_before_all_rows is set accordingly
|
||||||
reading_from_underlying,
|
reading_from_underlying,
|
||||||
|
|
||||||
end_of_stream
|
end_of_stream
|
||||||
@@ -86,6 +87,13 @@ class cache_flat_mutation_reader final : public flat_mutation_reader::impl {
|
|||||||
partition_snapshot_row_cursor _next_row;
|
partition_snapshot_row_cursor _next_row;
|
||||||
bool _next_row_in_range = false;
|
bool _next_row_in_range = false;
|
||||||
|
|
||||||
|
// True iff current population interval, since the previous clustering row, starts before all clustered rows.
|
||||||
|
// We cannot just look at _lower_bound, because emission of range tombstones changes _lower_bound and
|
||||||
|
// because we mark clustering intervals as continuous when consuming a clustering_row, it would prevent
|
||||||
|
// us from marking the interval as continuous.
|
||||||
|
// Valid when _state == reading_from_underlying.
|
||||||
|
bool _population_range_starts_before_all_rows;
|
||||||
|
|
||||||
// Whether _lower_bound was changed within current fill_buffer().
|
// Whether _lower_bound was changed within current fill_buffer().
|
||||||
// If it did not then we cannot break out of it (e.g. on preemption) because
|
// If it did not then we cannot break out of it (e.g. on preemption) because
|
||||||
// forward progress is not guaranteed in case iterators are getting constantly invalidated.
|
// forward progress is not guaranteed in case iterators are getting constantly invalidated.
|
||||||
@@ -231,6 +239,7 @@ inline
|
|||||||
future<> cache_flat_mutation_reader::do_fill_buffer(db::timeout_clock::time_point timeout) {
|
future<> cache_flat_mutation_reader::do_fill_buffer(db::timeout_clock::time_point timeout) {
|
||||||
if (_state == state::move_to_underlying) {
|
if (_state == state::move_to_underlying) {
|
||||||
_state = state::reading_from_underlying;
|
_state = state::reading_from_underlying;
|
||||||
|
_population_range_starts_before_all_rows = _lower_bound.is_before_all_clustered_rows(*_schema);
|
||||||
auto end = _next_row_in_range ? position_in_partition(_next_row.position())
|
auto end = _next_row_in_range ? position_in_partition(_next_row.position())
|
||||||
: position_in_partition(_upper_bound);
|
: position_in_partition(_upper_bound);
|
||||||
return _read_context->fast_forward_to(position_range{_lower_bound, std::move(end)}, timeout).then([this, timeout] {
|
return _read_context->fast_forward_to(position_range{_lower_bound, std::move(end)}, timeout).then([this, timeout] {
|
||||||
@@ -355,12 +364,12 @@ future<> cache_flat_mutation_reader::read_from_underlying(db::timeout_clock::tim
|
|||||||
}
|
}
|
||||||
});
|
});
|
||||||
return make_ready_future<>();
|
return make_ready_future<>();
|
||||||
});
|
}, timeout);
|
||||||
}
|
}
|
||||||
|
|
||||||
inline
|
inline
|
||||||
bool cache_flat_mutation_reader::ensure_population_lower_bound() {
|
bool cache_flat_mutation_reader::ensure_population_lower_bound() {
|
||||||
if (!_ck_ranges_curr->start()) {
|
if (_population_range_starts_before_all_rows) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
if (!_last_row.refresh(*_snp)) {
|
if (!_last_row.refresh(*_snp)) {
|
||||||
@@ -415,6 +424,7 @@ inline
|
|||||||
void cache_flat_mutation_reader::maybe_add_to_cache(const clustering_row& cr) {
|
void cache_flat_mutation_reader::maybe_add_to_cache(const clustering_row& cr) {
|
||||||
if (!can_populate()) {
|
if (!can_populate()) {
|
||||||
_last_row = nullptr;
|
_last_row = nullptr;
|
||||||
|
_population_range_starts_before_all_rows = false;
|
||||||
_read_context->cache().on_mispopulate();
|
_read_context->cache().on_mispopulate();
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@@ -448,6 +458,7 @@ void cache_flat_mutation_reader::maybe_add_to_cache(const clustering_row& cr) {
|
|||||||
with_allocator(standard_allocator(), [&] {
|
with_allocator(standard_allocator(), [&] {
|
||||||
_last_row = partition_snapshot_row_weakref(*_snp, it, true);
|
_last_row = partition_snapshot_row_weakref(*_snp, it, true);
|
||||||
});
|
});
|
||||||
|
_population_range_starts_before_all_rows = false;
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -303,6 +303,7 @@ scylla_tests = [
|
|||||||
'tests/imr_test',
|
'tests/imr_test',
|
||||||
'tests/partition_data_test',
|
'tests/partition_data_test',
|
||||||
'tests/reusable_buffer_test',
|
'tests/reusable_buffer_test',
|
||||||
|
'tests/json_test'
|
||||||
]
|
]
|
||||||
|
|
||||||
perf_tests = [
|
perf_tests = [
|
||||||
@@ -406,6 +407,7 @@ scylla_core = (['database.cc',
|
|||||||
'mutation_reader.cc',
|
'mutation_reader.cc',
|
||||||
'flat_mutation_reader.cc',
|
'flat_mutation_reader.cc',
|
||||||
'mutation_query.cc',
|
'mutation_query.cc',
|
||||||
|
'json.cc',
|
||||||
'keys.cc',
|
'keys.cc',
|
||||||
'counters.cc',
|
'counters.cc',
|
||||||
'compress.cc',
|
'compress.cc',
|
||||||
@@ -514,6 +516,7 @@ scylla_core = (['database.cc',
|
|||||||
'db/consistency_level.cc',
|
'db/consistency_level.cc',
|
||||||
'db/system_keyspace.cc',
|
'db/system_keyspace.cc',
|
||||||
'db/system_distributed_keyspace.cc',
|
'db/system_distributed_keyspace.cc',
|
||||||
|
'db/size_estimates_virtual_reader.cc',
|
||||||
'db/schema_tables.cc',
|
'db/schema_tables.cc',
|
||||||
'db/cql_type_parser.cc',
|
'db/cql_type_parser.cc',
|
||||||
'db/legacy_schema_migrator.cc',
|
'db/legacy_schema_migrator.cc',
|
||||||
@@ -740,6 +743,7 @@ pure_boost_tests = set([
|
|||||||
'tests/imr_test',
|
'tests/imr_test',
|
||||||
'tests/partition_data_test',
|
'tests/partition_data_test',
|
||||||
'tests/reusable_buffer_test',
|
'tests/reusable_buffer_test',
|
||||||
|
'tests/json_test',
|
||||||
])
|
])
|
||||||
|
|
||||||
tests_not_using_seastar_test_framework = set([
|
tests_not_using_seastar_test_framework = set([
|
||||||
@@ -791,7 +795,7 @@ deps['tests/log_heap_test'] = ['tests/log_heap_test.cc']
|
|||||||
deps['tests/anchorless_list_test'] = ['tests/anchorless_list_test.cc']
|
deps['tests/anchorless_list_test'] = ['tests/anchorless_list_test.cc']
|
||||||
deps['tests/perf/perf_fast_forward'] += ['release.cc']
|
deps['tests/perf/perf_fast_forward'] += ['release.cc']
|
||||||
deps['tests/meta_test'] = ['tests/meta_test.cc']
|
deps['tests/meta_test'] = ['tests/meta_test.cc']
|
||||||
deps['tests/imr_test'] = ['tests/imr_test.cc']
|
deps['tests/imr_test'] = ['tests/imr_test.cc', 'utils/logalloc.cc', 'utils/dynamic_bitset.cc']
|
||||||
deps['tests/reusable_buffer_test'] = ['tests/reusable_buffer_test.cc']
|
deps['tests/reusable_buffer_test'] = ['tests/reusable_buffer_test.cc']
|
||||||
|
|
||||||
warnings = [
|
warnings = [
|
||||||
|
|||||||
@@ -473,9 +473,9 @@ insertStatement returns [::shared_ptr<raw::modification_statement> expr]
|
|||||||
::shared_ptr<cql3::term::raw> json_value;
|
::shared_ptr<cql3::term::raw> json_value;
|
||||||
}
|
}
|
||||||
: K_INSERT K_INTO cf=columnFamilyName
|
: K_INSERT K_INTO cf=columnFamilyName
|
||||||
'(' c1=cident { column_names.push_back(c1); } ( ',' cn=cident { column_names.push_back(cn); } )* ')'
|
('(' c1=cident { column_names.push_back(c1); } ( ',' cn=cident { column_names.push_back(cn); } )* ')'
|
||||||
( K_VALUES
|
K_VALUES
|
||||||
'(' v1=term { values.push_back(v1); } ( ',' vn=term { values.push_back(vn); } )* ')'
|
'(' v1=term { values.push_back(v1); } ( ',' vn=term { values.push_back(vn); } )* ')'
|
||||||
( K_IF K_NOT K_EXISTS { if_not_exists = true; } )?
|
( K_IF K_NOT K_EXISTS { if_not_exists = true; } )?
|
||||||
( usingClause[attrs] )?
|
( usingClause[attrs] )?
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -67,6 +67,12 @@ class error_collector : public error_listener<RecognizerType, ExceptionBaseType>
|
|||||||
*/
|
*/
|
||||||
const sstring_view _query;
|
const sstring_view _query;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* An empty bitset to be used as a workaround for AntLR null dereference
|
||||||
|
* bug.
|
||||||
|
*/
|
||||||
|
static typename ExceptionBaseType::BitsetListType _empty_bit_list;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -144,6 +150,14 @@ private:
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
default:
|
default:
|
||||||
|
// AntLR Exception class has a bug of dereferencing a null
|
||||||
|
// pointer in the displayRecognitionError. The following
|
||||||
|
// if statement makes sure it will not be null before the
|
||||||
|
// call to that function (displayRecognitionError).
|
||||||
|
// bug reference: https://github.com/antlr/antlr3/issues/191
|
||||||
|
if (!ex->get_expectingSet()) {
|
||||||
|
ex->set_expectingSet(&_empty_bit_list);
|
||||||
|
}
|
||||||
ex->displayRecognitionError(token_names, msg);
|
ex->displayRecognitionError(token_names, msg);
|
||||||
}
|
}
|
||||||
return msg.str();
|
return msg.str();
|
||||||
@@ -345,4 +359,8 @@ private:
|
|||||||
#endif
|
#endif
|
||||||
};
|
};
|
||||||
|
|
||||||
|
template<typename RecognizerType, typename TokenType, typename ExceptionBaseType>
|
||||||
|
typename ExceptionBaseType::BitsetListType
|
||||||
|
error_collector<RecognizerType,TokenType,ExceptionBaseType>::_empty_bit_list = typename ExceptionBaseType::BitsetListType();
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -177,7 +177,7 @@ shared_ptr<function>
|
|||||||
make_to_json_function(data_type t) {
|
make_to_json_function(data_type t) {
|
||||||
return make_native_scalar_function<true>("tojson", utf8_type, {t},
|
return make_native_scalar_function<true>("tojson", utf8_type, {t},
|
||||||
[t](cql_serialization_format sf, const std::vector<bytes_opt>& parameters) -> bytes_opt {
|
[t](cql_serialization_format sf, const std::vector<bytes_opt>& parameters) -> bytes_opt {
|
||||||
return utf8_type->decompose(t->to_json_string(parameters[0].value()));
|
return utf8_type->decompose(t->to_json_string(parameters[0]));
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -217,19 +217,18 @@ void query_options::prepare(const std::vector<::shared_ptr<column_specification>
|
|||||||
}
|
}
|
||||||
|
|
||||||
auto& names = *_names;
|
auto& names = *_names;
|
||||||
std::vector<cql3::raw_value> ordered_values;
|
std::vector<cql3::raw_value_view> ordered_values;
|
||||||
ordered_values.reserve(specs.size());
|
ordered_values.reserve(specs.size());
|
||||||
for (auto&& spec : specs) {
|
for (auto&& spec : specs) {
|
||||||
auto& spec_name = spec->name->text();
|
auto& spec_name = spec->name->text();
|
||||||
for (size_t j = 0; j < names.size(); j++) {
|
for (size_t j = 0; j < names.size(); j++) {
|
||||||
if (names[j] == spec_name) {
|
if (names[j] == spec_name) {
|
||||||
ordered_values.emplace_back(_values[j]);
|
ordered_values.emplace_back(_value_views[j]);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
_values = std::move(ordered_values);
|
_value_views = std::move(ordered_values);
|
||||||
fill_value_views();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void query_options::fill_value_views()
|
void query_options::fill_value_views()
|
||||||
|
|||||||
@@ -239,11 +239,11 @@ query_processor::process(const sstring_view& query_string, service::query_state&
|
|||||||
log.trace("process: \"{}\"", query_string);
|
log.trace("process: \"{}\"", query_string);
|
||||||
tracing::trace(query_state.get_trace_state(), "Parsing a statement");
|
tracing::trace(query_state.get_trace_state(), "Parsing a statement");
|
||||||
auto p = get_statement(query_string, query_state.get_client_state());
|
auto p = get_statement(query_string, query_state.get_client_state());
|
||||||
options.prepare(p->bound_names);
|
|
||||||
auto cql_statement = p->statement;
|
auto cql_statement = p->statement;
|
||||||
if (cql_statement->get_bound_terms() != options.get_values_count()) {
|
if (cql_statement->get_bound_terms() != options.get_values_count()) {
|
||||||
throw exceptions::invalid_request_exception("Invalid amount of bind variables");
|
throw exceptions::invalid_request_exception("Invalid amount of bind variables");
|
||||||
}
|
}
|
||||||
|
options.prepare(p->bound_names);
|
||||||
|
|
||||||
warn(unimplemented::cause::METRICS);
|
warn(unimplemented::cause::METRICS);
|
||||||
#if 0
|
#if 0
|
||||||
|
|||||||
@@ -202,6 +202,14 @@ public:
|
|||||||
const query_options& options,
|
const query_options& options,
|
||||||
gc_clock::time_point now) const override;
|
gc_clock::time_point now) const override;
|
||||||
|
|
||||||
|
virtual std::vector<bytes_opt> values_raw(const query_options& options) const = 0;
|
||||||
|
|
||||||
|
virtual std::vector<bytes_opt> values(const query_options& options) const override {
|
||||||
|
std::vector<bytes_opt> ret = values_raw(options);
|
||||||
|
std::sort(ret.begin(),ret.end());
|
||||||
|
ret.erase(std::unique(ret.begin(),ret.end()),ret.end());
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
#if 0
|
#if 0
|
||||||
@Override
|
@Override
|
||||||
protected final boolean isSupportedBy(SecondaryIndex index)
|
protected final boolean isSupportedBy(SecondaryIndex index)
|
||||||
@@ -224,7 +232,7 @@ public:
|
|||||||
return abstract_restriction::term_uses_function(_values, ks_name, function_name);
|
return abstract_restriction::term_uses_function(_values, ks_name, function_name);
|
||||||
}
|
}
|
||||||
|
|
||||||
virtual std::vector<bytes_opt> values(const query_options& options) const override {
|
virtual std::vector<bytes_opt> values_raw(const query_options& options) const override {
|
||||||
std::vector<bytes_opt> ret;
|
std::vector<bytes_opt> ret;
|
||||||
for (auto&& v : _values) {
|
for (auto&& v : _values) {
|
||||||
ret.emplace_back(to_bytes_opt(v->bind_and_get(options)));
|
ret.emplace_back(to_bytes_opt(v->bind_and_get(options)));
|
||||||
@@ -249,7 +257,7 @@ public:
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
virtual std::vector<bytes_opt> values(const query_options& options) const override {
|
virtual std::vector<bytes_opt> values_raw(const query_options& options) const override {
|
||||||
auto&& lval = dynamic_pointer_cast<multi_item_terminal>(_marker->bind(options));
|
auto&& lval = dynamic_pointer_cast<multi_item_terminal>(_marker->bind(options));
|
||||||
if (!lval) {
|
if (!lval) {
|
||||||
throw exceptions::invalid_request_exception("Invalid null value for IN restriction");
|
throw exceptions::invalid_request_exception("Invalid null value for IN restriction");
|
||||||
|
|||||||
@@ -105,9 +105,11 @@ public:
|
|||||||
virtual void reset() = 0;
|
virtual void reset() = 0;
|
||||||
|
|
||||||
virtual assignment_testable::test_result test_assignment(database& db, const sstring& keyspace, ::shared_ptr<column_specification> receiver) override {
|
virtual assignment_testable::test_result test_assignment(database& db, const sstring& keyspace, ::shared_ptr<column_specification> receiver) override {
|
||||||
if (receiver->type == get_type()) {
|
auto t1 = receiver->type->underlying_type();
|
||||||
|
auto t2 = get_type()->underlying_type();
|
||||||
|
if (t1 == t2) {
|
||||||
return assignment_testable::test_result::EXACT_MATCH;
|
return assignment_testable::test_result::EXACT_MATCH;
|
||||||
} else if (receiver->type->is_value_compatible_with(*get_type())) {
|
} else if (t1->is_value_compatible_with(*t2)) {
|
||||||
return assignment_testable::test_result::WEAKLY_ASSIGNABLE;
|
return assignment_testable::test_result::WEAKLY_ASSIGNABLE;
|
||||||
} else {
|
} else {
|
||||||
return assignment_testable::test_result::NOT_ASSIGNABLE;
|
return assignment_testable::test_result::NOT_ASSIGNABLE;
|
||||||
|
|||||||
@@ -96,12 +96,8 @@ public:
|
|||||||
encoded_row.write("\\\"", 2);
|
encoded_row.write("\\\"", 2);
|
||||||
}
|
}
|
||||||
encoded_row.write("\": ", 3);
|
encoded_row.write("\": ", 3);
|
||||||
if (parameters[i]) {
|
sstring row_sstring = _selector_types[i]->to_json_string(parameters[i]);
|
||||||
sstring row_sstring = _selector_types[i]->to_json_string(parameters[i].value());
|
encoded_row.write(row_sstring.c_str(), row_sstring.size());
|
||||||
encoded_row.write(row_sstring.c_str(), row_sstring.size());
|
|
||||||
} else {
|
|
||||||
encoded_row.write("null", 4);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
encoded_row.write("}", 1);
|
encoded_row.write("}", 1);
|
||||||
return encoded_row.linearize().to_string();
|
return encoded_row.linearize().to_string();
|
||||||
@@ -974,6 +970,10 @@ std::unique_ptr<prepared_statement> select_statement::prepare(database& db, cql_
|
|||||||
}
|
}
|
||||||
|
|
||||||
check_needs_filtering(restrictions);
|
check_needs_filtering(restrictions);
|
||||||
|
size_t restrictions_size = restrictions->get_partition_key_restrictions()->size() + restrictions->get_clustering_columns_restrictions()->size() + restrictions->get_non_pk_restriction().size();
|
||||||
|
if (restrictions->uses_secondary_indexing() && restrictions_size > 1) {
|
||||||
|
throw exceptions::invalid_request_exception("Indexed query may not contain multiple restrictions in 2.3");
|
||||||
|
}
|
||||||
|
|
||||||
::shared_ptr<cql3::statements::select_statement> stmt;
|
::shared_ptr<cql3::statements::select_statement> stmt;
|
||||||
if (restrictions->uses_secondary_indexing()) {
|
if (restrictions->uses_secondary_indexing()) {
|
||||||
|
|||||||
@@ -179,7 +179,21 @@ modification_statement::json_cache_opt insert_prepared_json_statement::maybe_pre
|
|||||||
void
|
void
|
||||||
insert_prepared_json_statement::execute_set_value(mutation& m, const clustering_key_prefix& prefix, const update_parameters& params, const column_definition& column, const bytes_opt& value) {
|
insert_prepared_json_statement::execute_set_value(mutation& m, const clustering_key_prefix& prefix, const update_parameters& params, const column_definition& column, const bytes_opt& value) {
|
||||||
if (!value) {
|
if (!value) {
|
||||||
|
if (column.type->is_collection()) {
|
||||||
|
auto& k = static_pointer_cast<const collection_type_impl>(column.type)->_kind;
|
||||||
|
if (&k == &collection_type_impl::kind::list) {
|
||||||
|
lists::setter::execute(m, prefix, params, column, make_shared<lists::value>(lists::value(std::vector<bytes_opt>())));
|
||||||
|
} else if (&k == &collection_type_impl::kind::set) {
|
||||||
|
sets::setter::execute(m, prefix, params, column, make_shared<sets::value>(sets::value(std::set<bytes, serialized_compare>(serialized_compare(empty_type)))));
|
||||||
|
} else if (&k == &collection_type_impl::kind::map) {
|
||||||
|
maps::setter::execute(m, prefix, params, column, make_shared<maps::value>(maps::value(std::map<bytes, bytes, serialized_compare>(serialized_compare(empty_type)))));
|
||||||
|
} else {
|
||||||
|
throw exceptions::invalid_request_exception("Incorrect value kind in JSON INSERT statement");
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
m.set_cell(prefix, column, std::move(operation::make_dead_cell(params)));
|
m.set_cell(prefix, column, std::move(operation::make_dead_cell(params)));
|
||||||
|
return;
|
||||||
} else if (!column.type->is_collection()) {
|
} else if (!column.type->is_collection()) {
|
||||||
constants::setter::execute(m, prefix, params, column, raw_value_view::make_value(bytes_view(*value)));
|
constants::setter::execute(m, prefix, params, column, raw_value_view::make_value(bytes_view(*value)));
|
||||||
return;
|
return;
|
||||||
@@ -204,15 +218,17 @@ insert_prepared_json_statement::execute_set_value(mutation& m, const clustering_
|
|||||||
dht::partition_range_vector
|
dht::partition_range_vector
|
||||||
insert_prepared_json_statement::build_partition_keys(const query_options& options, const json_cache_opt& json_cache) {
|
insert_prepared_json_statement::build_partition_keys(const query_options& options, const json_cache_opt& json_cache) {
|
||||||
dht::partition_range_vector ranges;
|
dht::partition_range_vector ranges;
|
||||||
|
std::vector<bytes_opt> exploded;
|
||||||
for (const auto& def : s->partition_key_columns()) {
|
for (const auto& def : s->partition_key_columns()) {
|
||||||
auto json_value = json_cache->at(def.name_as_text());
|
auto json_value = json_cache->at(def.name_as_text());
|
||||||
auto k = query::range<partition_key>::make_singular(partition_key::from_single_value(*s, json_value.value()));
|
if (!json_value) {
|
||||||
ranges.emplace_back(std::move(k).transform(
|
throw exceptions::invalid_request_exception(sprint("Missing mandatory PRIMARY KEY part %s", def.name_as_text()));
|
||||||
[this] (partition_key&& k) -> query::ring_position {
|
}
|
||||||
auto token = dht::global_partitioner().get_token(*s, k);
|
exploded.emplace_back(*json_value);
|
||||||
return { std::move(token), std::move(k) };
|
|
||||||
}));
|
|
||||||
}
|
}
|
||||||
|
auto pkey = partition_key::from_optional_exploded(*s, std::move(exploded));
|
||||||
|
auto k = query::range<query::ring_position>::make_singular(dht::global_partitioner().decorate_key(*s, std::move(pkey)));
|
||||||
|
ranges.emplace_back(std::move(k));
|
||||||
return ranges;
|
return ranges;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -221,7 +237,10 @@ query::clustering_row_ranges insert_prepared_json_statement::create_clustering_r
|
|||||||
std::vector<bytes_opt> exploded;
|
std::vector<bytes_opt> exploded;
|
||||||
for (const auto& def : s->clustering_key_columns()) {
|
for (const auto& def : s->clustering_key_columns()) {
|
||||||
auto json_value = json_cache->at(def.name_as_text());
|
auto json_value = json_cache->at(def.name_as_text());
|
||||||
exploded.emplace_back(json_value.value());
|
if (!json_value) {
|
||||||
|
throw exceptions::invalid_request_exception(sprint("Missing mandatory PRIMARY KEY part %s", def.name_as_text()));
|
||||||
|
}
|
||||||
|
exploded.emplace_back(*json_value);
|
||||||
}
|
}
|
||||||
auto k = query::range<clustering_key_prefix>::make_singular(clustering_key_prefix::from_optional_exploded(*s, std::move(exploded)));
|
auto k = query::range<clustering_key_prefix>::make_singular(clustering_key_prefix::from_optional_exploded(*s, std::move(exploded)));
|
||||||
ranges.emplace_back(query::clustering_range(std::move(k)));
|
ranges.emplace_back(query::clustering_range(std::move(k)));
|
||||||
|
|||||||
@@ -405,7 +405,7 @@ public:
|
|||||||
in_marker(int32_t bind_index, ::shared_ptr<column_specification> receiver)
|
in_marker(int32_t bind_index, ::shared_ptr<column_specification> receiver)
|
||||||
: abstract_marker(bind_index, std::move(receiver))
|
: abstract_marker(bind_index, std::move(receiver))
|
||||||
{
|
{
|
||||||
assert(dynamic_pointer_cast<const list_type_impl>(receiver->type));
|
assert(dynamic_pointer_cast<const list_type_impl>(_receiver->type));
|
||||||
}
|
}
|
||||||
|
|
||||||
virtual shared_ptr<terminal> bind(const query_options& options) override {
|
virtual shared_ptr<terminal> bind(const query_options& options) override {
|
||||||
|
|||||||
@@ -53,6 +53,9 @@ update_parameters::get_prefetched_list(
|
|||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (column.is_static()) {
|
||||||
|
ckey = clustering_key_view::make_empty();
|
||||||
|
}
|
||||||
auto i = _prefetched->rows.find(std::make_pair(std::move(pkey), std::move(ckey)));
|
auto i = _prefetched->rows.find(std::make_pair(std::move(pkey), std::move(ckey)));
|
||||||
if (i == _prefetched->rows.end()) {
|
if (i == _prefetched->rows.end()) {
|
||||||
return {};
|
return {};
|
||||||
|
|||||||
@@ -211,6 +211,7 @@ struct cell {
|
|||||||
imr::member<tags::chunk_next, imr::pod<uint8_t*>>,
|
imr::member<tags::chunk_next, imr::pod<uint8_t*>>,
|
||||||
imr::member<tags::chunk_data, imr::buffer<tags::chunk_data>>
|
imr::member<tags::chunk_data, imr::buffer<tags::chunk_data>>
|
||||||
>;
|
>;
|
||||||
|
static constexpr size_t external_chunk_overhead = sizeof(uint8_t*) * 2;
|
||||||
|
|
||||||
using external_last_chunk_size = imr::pod<uint16_t>;
|
using external_last_chunk_size = imr::pod<uint16_t>;
|
||||||
/// The last fragment of an externally stored value
|
/// The last fragment of an externally stored value
|
||||||
@@ -224,6 +225,7 @@ struct cell {
|
|||||||
imr::member<tags::last_chunk_size, external_last_chunk_size>,
|
imr::member<tags::last_chunk_size, external_last_chunk_size>,
|
||||||
imr::member<tags::chunk_data, imr::buffer<tags::chunk_data>>
|
imr::member<tags::chunk_data, imr::buffer<tags::chunk_data>>
|
||||||
>;
|
>;
|
||||||
|
static constexpr size_t external_last_chunk_overhead = sizeof(uint8_t*) + sizeof(uint16_t);
|
||||||
|
|
||||||
class context;
|
class context;
|
||||||
class minimal_context;
|
class minimal_context;
|
||||||
|
|||||||
38
database.cc
38
database.cc
@@ -383,9 +383,13 @@ filter_sstable_for_reader(std::vector<sstables::shared_sstable>&& sstables, colu
|
|||||||
};
|
};
|
||||||
sstables.erase(boost::remove_if(sstables, sstable_has_not_key), sstables.end());
|
sstables.erase(boost::remove_if(sstables, sstable_has_not_key), sstables.end());
|
||||||
|
|
||||||
|
// FIXME: Workaround for https://github.com/scylladb/scylla/issues/3552
|
||||||
|
// and https://github.com/scylladb/scylla/issues/3553
|
||||||
|
const bool filtering_broken = true;
|
||||||
|
|
||||||
// no clustering filtering is applied if schema defines no clustering key or
|
// no clustering filtering is applied if schema defines no clustering key or
|
||||||
// compaction strategy thinks it will not benefit from such an optimization.
|
// compaction strategy thinks it will not benefit from such an optimization.
|
||||||
if (!schema->clustering_key_size() || !cf.get_compaction_strategy().use_clustering_key_filter()) {
|
if (filtering_broken || !schema->clustering_key_size() || !cf.get_compaction_strategy().use_clustering_key_filter()) {
|
||||||
return sstables;
|
return sstables;
|
||||||
}
|
}
|
||||||
::cf_stats* stats = cf.cf_stats();
|
::cf_stats* stats = cf.cf_stats();
|
||||||
@@ -957,6 +961,11 @@ table::seal_active_memtable(flush_permit&& permit) {
|
|||||||
}
|
}
|
||||||
_memtables->add_memtable();
|
_memtables->add_memtable();
|
||||||
_stats.memtable_switch_count++;
|
_stats.memtable_switch_count++;
|
||||||
|
// This will set evictable occupancy of the old memtable region to zero, so that
|
||||||
|
// this region is considered last for flushing by dirty_memory_manager::flush_when_needed().
|
||||||
|
// If we don't do that, the flusher may keep picking up this memtable list for flushing after
|
||||||
|
// the permit is released even though there is not much to flush in the active memtable of this list.
|
||||||
|
old->region().ground_evictable_occupancy();
|
||||||
auto previous_flush = _flush_barrier.advance_and_await();
|
auto previous_flush = _flush_barrier.advance_and_await();
|
||||||
auto op = _flush_barrier.start();
|
auto op = _flush_barrier.start();
|
||||||
|
|
||||||
@@ -1325,6 +1334,7 @@ table::on_compaction_completion(const std::vector<sstables::shared_sstable>& new
|
|||||||
|
|
||||||
// This is done in the background, so we can consider this compaction completed.
|
// This is done in the background, so we can consider this compaction completed.
|
||||||
seastar::with_gate(_sstable_deletion_gate, [this, sstables_to_remove] {
|
seastar::with_gate(_sstable_deletion_gate, [this, sstables_to_remove] {
|
||||||
|
return with_semaphore(_sstable_deletion_sem, 1, [this, sstables_to_remove = std::move(sstables_to_remove)] {
|
||||||
return sstables::delete_atomically(sstables_to_remove, *get_large_partition_handler()).then_wrapped([this, sstables_to_remove] (future<> f) {
|
return sstables::delete_atomically(sstables_to_remove, *get_large_partition_handler()).then_wrapped([this, sstables_to_remove] (future<> f) {
|
||||||
std::exception_ptr eptr;
|
std::exception_ptr eptr;
|
||||||
try {
|
try {
|
||||||
@@ -1348,6 +1358,7 @@ table::on_compaction_completion(const std::vector<sstables::shared_sstable>& new
|
|||||||
return make_exception_future<>(eptr);
|
return make_exception_future<>(eptr);
|
||||||
}
|
}
|
||||||
return make_ready_future<>();
|
return make_ready_future<>();
|
||||||
|
});
|
||||||
}).then([this] {
|
}).then([this] {
|
||||||
// refresh underlying data source in row cache to prevent it from holding reference
|
// refresh underlying data source in row cache to prevent it from holding reference
|
||||||
// to sstables files which were previously deleted.
|
// to sstables files which were previously deleted.
|
||||||
@@ -1469,7 +1480,10 @@ future<> table::cleanup_sstables(sstables::compaction_descriptor descriptor) {
|
|||||||
static thread_local semaphore sem(1);
|
static thread_local semaphore sem(1);
|
||||||
|
|
||||||
return with_semaphore(sem, 1, [this, &sst] {
|
return with_semaphore(sem, 1, [this, &sst] {
|
||||||
return this->compact_sstables(sstables::compaction_descriptor({ sst }, sst->get_sstable_level()), true);
|
// release reference to sstables cleaned up, otherwise space usage from their data and index
|
||||||
|
// components cannot be reclaimed until all of them are cleaned.
|
||||||
|
auto sstable_level = sst->get_sstable_level();
|
||||||
|
return this->compact_sstables(sstables::compaction_descriptor({ std::move(sst) }, sstable_level), true);
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
@@ -1651,9 +1665,9 @@ future<> distributed_loader::open_sstable(distributed<database>& db, sstables::e
|
|||||||
// to distribute evenly the resource usage among all shards.
|
// to distribute evenly the resource usage among all shards.
|
||||||
|
|
||||||
return db.invoke_on(column_family::calculate_shard_from_sstable_generation(comps.generation),
|
return db.invoke_on(column_family::calculate_shard_from_sstable_generation(comps.generation),
|
||||||
[&db, comps = std::move(comps), func = std::move(func), pc] (database& local) {
|
[&db, comps = std::move(comps), func = std::move(func), &pc] (database& local) {
|
||||||
|
|
||||||
return with_semaphore(local.sstable_load_concurrency_sem(), 1, [&db, &local, comps = std::move(comps), func = std::move(func), pc] {
|
return with_semaphore(local.sstable_load_concurrency_sem(), 1, [&db, &local, comps = std::move(comps), func = std::move(func), &pc] {
|
||||||
auto& cf = local.find_column_family(comps.ks, comps.cf);
|
auto& cf = local.find_column_family(comps.ks, comps.cf);
|
||||||
|
|
||||||
auto f = sstables::sstable::load_shared_components(cf.schema(), cf._config.datadir, comps.generation, comps.version, comps.format, pc);
|
auto f = sstables::sstable::load_shared_components(cf.schema(), cf._config.datadir, comps.generation, comps.version, comps.format, pc);
|
||||||
@@ -2699,7 +2713,7 @@ future<> database::drop_column_family(const sstring& ks_name, const sstring& cf_
|
|||||||
remove(*cf);
|
remove(*cf);
|
||||||
cf->clear_views();
|
cf->clear_views();
|
||||||
auto& ks = find_keyspace(ks_name);
|
auto& ks = find_keyspace(ks_name);
|
||||||
return cf->await_pending_writes().then([this, &ks, cf, tsf = std::move(tsf), snapshot] {
|
return when_all_succeed(cf->await_pending_writes(), cf->await_pending_reads()).then([this, &ks, cf, tsf = std::move(tsf), snapshot] {
|
||||||
return truncate(ks, *cf, std::move(tsf), snapshot).finally([this, cf] {
|
return truncate(ks, *cf, std::move(tsf), snapshot).finally([this, cf] {
|
||||||
return cf->stop();
|
return cf->stop();
|
||||||
});
|
});
|
||||||
@@ -3139,7 +3153,7 @@ database::query(schema_ptr s, const query::read_command& cmd, query::result_opti
|
|||||||
seastar::ref(get_result_memory_limiter()),
|
seastar::ref(get_result_memory_limiter()),
|
||||||
max_result_size,
|
max_result_size,
|
||||||
timeout,
|
timeout,
|
||||||
std::move(cache_ctx)).then_wrapped([this, s = _stats, hit_rate = cf.get_global_cache_hit_rate()] (auto f) {
|
std::move(cache_ctx)).then_wrapped([this, s = _stats, hit_rate = cf.get_global_cache_hit_rate(), op = cf.read_in_progress()] (auto f) {
|
||||||
if (f.failed()) {
|
if (f.failed()) {
|
||||||
++s->total_reads_failed;
|
++s->total_reads_failed;
|
||||||
return make_exception_future<lw_shared_ptr<query::result>, cache_temperature>(f.get_exception());
|
return make_exception_future<lw_shared_ptr<query::result>, cache_temperature>(f.get_exception());
|
||||||
@@ -3167,7 +3181,7 @@ database::query_mutations(schema_ptr s, const query::read_command& cmd, const dh
|
|||||||
std::move(accounter),
|
std::move(accounter),
|
||||||
std::move(trace_state),
|
std::move(trace_state),
|
||||||
timeout,
|
timeout,
|
||||||
std::move(cache_ctx)).then_wrapped([this, s = _stats, hit_rate = cf.get_global_cache_hit_rate()] (auto f) {
|
std::move(cache_ctx)).then_wrapped([this, s = _stats, hit_rate = cf.get_global_cache_hit_rate(), op = cf.read_in_progress()] (auto f) {
|
||||||
if (f.failed()) {
|
if (f.failed()) {
|
||||||
++s->total_reads_failed;
|
++s->total_reads_failed;
|
||||||
return make_exception_future<reconcilable_result, cache_temperature>(f.get_exception());
|
return make_exception_future<reconcilable_result, cache_temperature>(f.get_exception());
|
||||||
@@ -3433,6 +3447,13 @@ future<> dirty_memory_manager::flush_when_needed() {
|
|||||||
// release the biggest amount of memory and is less likely to be generating tiny
|
// release the biggest amount of memory and is less likely to be generating tiny
|
||||||
// SSTables.
|
// SSTables.
|
||||||
memtable& candidate_memtable = memtable::from_region(*(this->_virtual_region_group.get_largest_region()));
|
memtable& candidate_memtable = memtable::from_region(*(this->_virtual_region_group.get_largest_region()));
|
||||||
|
|
||||||
|
if (candidate_memtable.empty()) {
|
||||||
|
// Soft pressure, but nothing to flush. It could be due to fsync or memtable_to_cache lagging.
|
||||||
|
// Back off to avoid OOMing with flush continuations.
|
||||||
|
return sleep(1ms);
|
||||||
|
}
|
||||||
|
|
||||||
// Do not wait. The semaphore will protect us against a concurrent flush. But we
|
// Do not wait. The semaphore will protect us against a concurrent flush. But we
|
||||||
// want to start a new one as soon as the permits are destroyed and the semaphore is
|
// want to start a new one as soon as the permits are destroyed and the semaphore is
|
||||||
// made ready again, not when we are done with the current one.
|
// made ready again, not when we are done with the current one.
|
||||||
@@ -3980,6 +4001,7 @@ seal_snapshot(sstring jsondir) {
|
|||||||
|
|
||||||
future<> table::snapshot(sstring name) {
|
future<> table::snapshot(sstring name) {
|
||||||
return flush().then([this, name = std::move(name)]() {
|
return flush().then([this, name = std::move(name)]() {
|
||||||
|
return with_semaphore(_sstable_deletion_sem, 1, [this, name = std::move(name)]() {
|
||||||
auto tables = boost::copy_range<std::vector<sstables::shared_sstable>>(*_sstables->all());
|
auto tables = boost::copy_range<std::vector<sstables::shared_sstable>>(*_sstables->all());
|
||||||
return do_with(std::move(tables), [this, name](std::vector<sstables::shared_sstable> & tables) {
|
return do_with(std::move(tables), [this, name](std::vector<sstables::shared_sstable> & tables) {
|
||||||
auto jsondir = _config.datadir + "/snapshots/" + name;
|
auto jsondir = _config.datadir + "/snapshots/" + name;
|
||||||
@@ -4044,6 +4066,7 @@ future<> table::snapshot(sstring name) {
|
|||||||
});
|
});
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
});
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -4175,6 +4198,7 @@ future<> table::fail_streaming_mutations(utils::UUID plan_id) {
|
|||||||
_streaming_memtables_big.erase(it);
|
_streaming_memtables_big.erase(it);
|
||||||
return entry->flush_in_progress.close().then([this, entry] {
|
return entry->flush_in_progress.close().then([this, entry] {
|
||||||
for (auto&& sst : entry->sstables) {
|
for (auto&& sst : entry->sstables) {
|
||||||
|
sst.monitor->write_failed();
|
||||||
sst.sstable->mark_for_deletion();
|
sst.sstable->mark_for_deletion();
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|||||||
18
database.hh
18
database.hh
@@ -294,6 +294,8 @@ public:
|
|||||||
class table;
|
class table;
|
||||||
using column_family = table;
|
using column_family = table;
|
||||||
|
|
||||||
|
class database_sstable_write_monitor;
|
||||||
|
|
||||||
class table : public enable_lw_shared_from_this<table> {
|
class table : public enable_lw_shared_from_this<table> {
|
||||||
public:
|
public:
|
||||||
struct config {
|
struct config {
|
||||||
@@ -389,7 +391,7 @@ private:
|
|||||||
// plan memtables and the resulting sstables are not made visible until
|
// plan memtables and the resulting sstables are not made visible until
|
||||||
// the streaming is complete.
|
// the streaming is complete.
|
||||||
struct monitored_sstable {
|
struct monitored_sstable {
|
||||||
std::unique_ptr<sstables::write_monitor> monitor;
|
std::unique_ptr<database_sstable_write_monitor> monitor;
|
||||||
sstables::shared_sstable sstable;
|
sstables::shared_sstable sstable;
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -428,6 +430,10 @@ private:
|
|||||||
std::unordered_map<uint64_t, sstables::shared_sstable> _sstables_need_rewrite;
|
std::unordered_map<uint64_t, sstables::shared_sstable> _sstables_need_rewrite;
|
||||||
// Control background fibers waiting for sstables to be deleted
|
// Control background fibers waiting for sstables to be deleted
|
||||||
seastar::gate _sstable_deletion_gate;
|
seastar::gate _sstable_deletion_gate;
|
||||||
|
// This semaphore ensures that an operation like snapshot won't have its selected
|
||||||
|
// sstables deleted by compaction in parallel, a race condition which could
|
||||||
|
// easily result in failure.
|
||||||
|
seastar::semaphore _sstable_deletion_sem = {1};
|
||||||
// There are situations in which we need to stop writing sstables. Flushers will take
|
// There are situations in which we need to stop writing sstables. Flushers will take
|
||||||
// the read lock, and the ones that wish to stop that process will take the write lock.
|
// the read lock, and the ones that wish to stop that process will take the write lock.
|
||||||
rwlock _sstables_lock;
|
rwlock _sstables_lock;
|
||||||
@@ -475,6 +481,8 @@ private:
|
|||||||
// after some modification, needs to ensure that news writes will see it before
|
// after some modification, needs to ensure that news writes will see it before
|
||||||
// it can proceed, such as the view building code.
|
// it can proceed, such as the view building code.
|
||||||
utils::phased_barrier _pending_writes_phaser;
|
utils::phased_barrier _pending_writes_phaser;
|
||||||
|
// Corresponding phaser for in-progress reads.
|
||||||
|
utils::phased_barrier _pending_reads_phaser;
|
||||||
private:
|
private:
|
||||||
void update_stats_for_new_sstable(uint64_t disk_space_used_by_sstable, const std::vector<unsigned>& shards_for_the_sstable) noexcept;
|
void update_stats_for_new_sstable(uint64_t disk_space_used_by_sstable, const std::vector<unsigned>& shards_for_the_sstable) noexcept;
|
||||||
// Adds new sstable to the set of sstables
|
// Adds new sstable to the set of sstables
|
||||||
@@ -817,6 +825,14 @@ public:
|
|||||||
return _pending_writes_phaser.advance_and_await();
|
return _pending_writes_phaser.advance_and_await();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
utils::phased_barrier::operation read_in_progress() {
|
||||||
|
return _pending_reads_phaser.start();
|
||||||
|
}
|
||||||
|
|
||||||
|
future<> await_pending_reads() {
|
||||||
|
return _pending_reads_phaser.advance_and_await();
|
||||||
|
}
|
||||||
|
|
||||||
void add_or_update_view(view_ptr v);
|
void add_or_update_view(view_ptr v);
|
||||||
void remove_view(view_ptr v);
|
void remove_view(view_ptr v);
|
||||||
void clear_views();
|
void clear_views();
|
||||||
|
|||||||
@@ -163,7 +163,7 @@ future<> db::commitlog_replayer::impl::init() {
|
|||||||
// Get all truncation records for the CF and initialize max rps if
|
// Get all truncation records for the CF and initialize max rps if
|
||||||
// present. Cannot do this on demand, as there may be no sstables to
|
// present. Cannot do this on demand, as there may be no sstables to
|
||||||
// mark the CF as "needed".
|
// mark the CF as "needed".
|
||||||
return db::system_keyspace::get_truncated_position(uuid).then([&map, &uuid](std::vector<db::replay_position> tpps) {
|
return db::system_keyspace::get_truncated_position(uuid).then([&map, uuid](std::vector<db::replay_position> tpps) {
|
||||||
for (auto& p : tpps) {
|
for (auto& p : tpps) {
|
||||||
rlogger.trace("CF {} truncated at {}", uuid, p);
|
rlogger.trace("CF {} truncated at {}", uuid, p);
|
||||||
auto& pp = map[p.shard_id()][uuid];
|
auto& pp = map[p.shard_id()][uuid];
|
||||||
|
|||||||
@@ -686,33 +686,7 @@ read_keyspace_mutation(distributed<service::storage_proxy>& proxy, const sstring
|
|||||||
static semaphore the_merge_lock {1};
|
static semaphore the_merge_lock {1};
|
||||||
|
|
||||||
future<> merge_lock() {
|
future<> merge_lock() {
|
||||||
// ref: #1088
|
return smp::submit_to(0, [] { return the_merge_lock.wait(); });
|
||||||
// to avoid deadlocks, we don't want long-standing calls to the shard 0
|
|
||||||
// as they can cause a deadlock:
|
|
||||||
//
|
|
||||||
// fiber1 fiber2
|
|
||||||
// merge_lock() (succeeds)
|
|
||||||
// merge_lock() (waits)
|
|
||||||
// invoke_on_all() (waits on merge_lock to relinquish smp::submit_to slot)
|
|
||||||
//
|
|
||||||
// so we issue the lock calls with a timeout; the slot will be relinquished, and invoke_on_all()
|
|
||||||
// can complete
|
|
||||||
return repeat([] () mutable {
|
|
||||||
return smp::submit_to(0, [] {
|
|
||||||
return the_merge_lock.try_wait();
|
|
||||||
}).then([] (bool result) {
|
|
||||||
if (result) {
|
|
||||||
return make_ready_future<stop_iteration>(stop_iteration::yes);
|
|
||||||
} else {
|
|
||||||
static thread_local auto rand_engine = std::default_random_engine();
|
|
||||||
auto dist = std::uniform_int_distribution<int>(0, 100);
|
|
||||||
auto to = std::chrono::microseconds(dist(rand_engine));
|
|
||||||
return sleep(to).then([] {
|
|
||||||
return make_ready_future<stop_iteration>(stop_iteration::no);
|
|
||||||
});
|
|
||||||
}
|
|
||||||
});
|
|
||||||
});
|
|
||||||
}
|
}
|
||||||
|
|
||||||
future<> merge_unlock() {
|
future<> merge_unlock() {
|
||||||
|
|||||||
329
db/size_estimates_virtual_reader.cc
Normal file
329
db/size_estimates_virtual_reader.cc
Normal file
@@ -0,0 +1,329 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (C) 2019 ScyllaDB
|
||||||
|
*
|
||||||
|
* Modified by ScyllaDB
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This file is part of Scylla.
|
||||||
|
*
|
||||||
|
* Scylla is free software: you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU Affero General Public License as published by
|
||||||
|
* the Free Software Foundation, either version 3 of the License, or
|
||||||
|
* (at your option) any later version.
|
||||||
|
*
|
||||||
|
* Scylla is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with Scylla. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <boost/range/adaptor/indirected.hpp>
|
||||||
|
#include <boost/range/adaptor/map.hpp>
|
||||||
|
#include <boost/range/adaptor/transformed.hpp>
|
||||||
|
#include <boost/range/algorithm/find_if.hpp>
|
||||||
|
|
||||||
|
#include "clustering_bounds_comparator.hh"
|
||||||
|
#include "database.hh"
|
||||||
|
#include "db/system_keyspace.hh"
|
||||||
|
#include "dht/i_partitioner.hh"
|
||||||
|
#include "partition_range_compat.hh"
|
||||||
|
#include "range.hh"
|
||||||
|
#include "service/storage_service.hh"
|
||||||
|
#include "stdx.hh"
|
||||||
|
#include "mutation_fragment.hh"
|
||||||
|
#include "sstables/sstables.hh"
|
||||||
|
#include "db/timeout_clock.hh"
|
||||||
|
#include "database.hh"
|
||||||
|
|
||||||
|
#include "db/size_estimates_virtual_reader.hh"
|
||||||
|
|
||||||
|
namespace db {
|
||||||
|
|
||||||
|
namespace size_estimates {
|
||||||
|
|
||||||
|
struct virtual_row {
|
||||||
|
const bytes& cf_name;
|
||||||
|
const token_range& tokens;
|
||||||
|
clustering_key_prefix as_key() const {
|
||||||
|
return clustering_key_prefix::from_exploded(std::vector<bytes_view>{cf_name, tokens.start, tokens.end});
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
struct virtual_row_comparator {
|
||||||
|
schema_ptr _schema;
|
||||||
|
virtual_row_comparator(schema_ptr schema) : _schema(schema) { }
|
||||||
|
bool operator()(const clustering_key_prefix& key1, const clustering_key_prefix& key2) {
|
||||||
|
return clustering_key_prefix::prefix_equality_less_compare(*_schema)(key1, key2);
|
||||||
|
}
|
||||||
|
bool operator()(const virtual_row& row, const clustering_key_prefix& key) {
|
||||||
|
return operator()(row.as_key(), key);
|
||||||
|
}
|
||||||
|
bool operator()(const clustering_key_prefix& key, const virtual_row& row) {
|
||||||
|
return operator()(key, row.as_key());
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// Iterating over the cartesian product of cf_names and token_ranges.
|
||||||
|
class virtual_row_iterator : public std::iterator<std::input_iterator_tag, const virtual_row> {
|
||||||
|
std::reference_wrapper<const std::vector<bytes>> _cf_names;
|
||||||
|
std::reference_wrapper<const std::vector<token_range>> _ranges;
|
||||||
|
size_t _cf_names_idx = 0;
|
||||||
|
size_t _ranges_idx = 0;
|
||||||
|
public:
|
||||||
|
struct end_iterator_tag {};
|
||||||
|
virtual_row_iterator(const std::vector<bytes>& cf_names, const std::vector<token_range>& ranges)
|
||||||
|
: _cf_names(std::ref(cf_names))
|
||||||
|
, _ranges(std::ref(ranges))
|
||||||
|
{ }
|
||||||
|
virtual_row_iterator(const std::vector<bytes>& cf_names, const std::vector<token_range>& ranges, end_iterator_tag)
|
||||||
|
: _cf_names(std::ref(cf_names))
|
||||||
|
, _ranges(std::ref(ranges))
|
||||||
|
, _cf_names_idx(cf_names.size())
|
||||||
|
, _ranges_idx(ranges.size())
|
||||||
|
{
|
||||||
|
if (cf_names.empty() || ranges.empty()) {
|
||||||
|
// The product of an empty range with any range is an empty range.
|
||||||
|
// In this case we want the end iterator to be equal to the begin iterator,
|
||||||
|
// which has_ranges_idx = _cf_names_idx = 0.
|
||||||
|
_ranges_idx = _cf_names_idx = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
virtual_row_iterator& operator++() {
|
||||||
|
if (++_ranges_idx == _ranges.get().size() && ++_cf_names_idx < _cf_names.get().size()) {
|
||||||
|
_ranges_idx = 0;
|
||||||
|
}
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
virtual_row_iterator operator++(int) {
|
||||||
|
virtual_row_iterator i(*this);
|
||||||
|
++(*this);
|
||||||
|
return i;
|
||||||
|
}
|
||||||
|
const value_type operator*() const {
|
||||||
|
return { _cf_names.get()[_cf_names_idx], _ranges.get()[_ranges_idx] };
|
||||||
|
}
|
||||||
|
bool operator==(const virtual_row_iterator& i) const {
|
||||||
|
return _cf_names_idx == i._cf_names_idx
|
||||||
|
&& _ranges_idx == i._ranges_idx;
|
||||||
|
}
|
||||||
|
bool operator!=(const virtual_row_iterator& i) const {
|
||||||
|
return !(*this == i);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the keyspaces, ordered by name, as selected by the partition_range.
|
||||||
|
*/
|
||||||
|
static std::vector<sstring> get_keyspaces(const schema& s, const database& db, dht::partition_range range) {
|
||||||
|
struct keyspace_less_comparator {
|
||||||
|
const schema& _s;
|
||||||
|
keyspace_less_comparator(const schema& s) : _s(s) { }
|
||||||
|
dht::ring_position as_ring_position(const sstring& ks) {
|
||||||
|
auto pkey = partition_key::from_single_value(_s, utf8_type->decompose(ks));
|
||||||
|
return dht::global_partitioner().decorate_key(_s, std::move(pkey));
|
||||||
|
}
|
||||||
|
bool operator()(const sstring& ks1, const sstring& ks2) {
|
||||||
|
return as_ring_position(ks1).less_compare(_s, as_ring_position(ks2));
|
||||||
|
}
|
||||||
|
bool operator()(const sstring& ks, const dht::ring_position& rp) {
|
||||||
|
return as_ring_position(ks).less_compare(_s, rp);
|
||||||
|
}
|
||||||
|
bool operator()(const dht::ring_position& rp, const sstring& ks) {
|
||||||
|
return rp.less_compare(_s, as_ring_position(ks));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
auto keyspaces = db.get_non_system_keyspaces();
|
||||||
|
auto cmp = keyspace_less_comparator(s);
|
||||||
|
boost::sort(keyspaces, cmp);
|
||||||
|
return boost::copy_range<std::vector<sstring>>(
|
||||||
|
range.slice(keyspaces, std::move(cmp)) | boost::adaptors::filtered([&s] (const auto& ks) {
|
||||||
|
// If this is a range query, results are divided between shards by the partition key (keyspace_name).
|
||||||
|
return shard_of(dht::global_partitioner().get_token(s,
|
||||||
|
partition_key::from_single_value(s, utf8_type->decompose(ks))))
|
||||||
|
== engine().cpu_id();
|
||||||
|
})
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Makes a wrapping range of ring_position from a nonwrapping range of token, used to select sstables.
|
||||||
|
*/
|
||||||
|
static dht::partition_range as_ring_position_range(dht::token_range& r) {
|
||||||
|
stdx::optional<range<dht::ring_position>::bound> start_bound, end_bound;
|
||||||
|
if (r.start()) {
|
||||||
|
start_bound = {{ dht::ring_position(r.start()->value(), dht::ring_position::token_bound::start), r.start()->is_inclusive() }};
|
||||||
|
}
|
||||||
|
if (r.end()) {
|
||||||
|
end_bound = {{ dht::ring_position(r.end()->value(), dht::ring_position::token_bound::end), r.end()->is_inclusive() }};
|
||||||
|
}
|
||||||
|
return dht::partition_range(std::move(start_bound), std::move(end_bound), r.is_singular());
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Add a new range_estimates for the specified range, considering the sstables associated with `cf`.
|
||||||
|
*/
|
||||||
|
static system_keyspace::range_estimates estimate(const column_family& cf, const token_range& r) {
|
||||||
|
int64_t count{0};
|
||||||
|
utils::estimated_histogram hist{0};
|
||||||
|
auto from_bytes = [] (auto& b) {
|
||||||
|
return dht::global_partitioner().from_sstring(utf8_type->to_string(b));
|
||||||
|
};
|
||||||
|
dht::token_range_vector ranges;
|
||||||
|
compat::unwrap_into(
|
||||||
|
wrapping_range<dht::token>({{ from_bytes(r.start) }}, {{ from_bytes(r.end) }}),
|
||||||
|
dht::token_comparator(),
|
||||||
|
[&] (auto&& rng) { ranges.push_back(std::move(rng)); });
|
||||||
|
for (auto&& r : ranges) {
|
||||||
|
auto rp_range = as_ring_position_range(r);
|
||||||
|
for (auto&& sstable : cf.select_sstables(rp_range)) {
|
||||||
|
count += sstable->estimated_keys_for_range(r);
|
||||||
|
hist.merge(sstable->get_stats_metadata().estimated_row_size);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return {cf.schema(), r.start, r.end, count, count > 0 ? hist.mean() : 0};
|
||||||
|
}
|
||||||
|
|
||||||
|
future<std::vector<token_range>> get_local_ranges() {
|
||||||
|
auto& ss = service::get_local_storage_service();
|
||||||
|
return ss.get_local_tokens().then([&ss] (auto&& tokens) {
|
||||||
|
auto ranges = ss.get_token_metadata().get_primary_ranges_for(std::move(tokens));
|
||||||
|
std::vector<token_range> local_ranges;
|
||||||
|
auto to_bytes = [](const stdx::optional<dht::token_range::bound>& b) {
|
||||||
|
assert(b);
|
||||||
|
return utf8_type->decompose(dht::global_partitioner().to_sstring(b->value()));
|
||||||
|
};
|
||||||
|
// We merge the ranges to be compatible with how Cassandra shows it's size estimates table.
|
||||||
|
// All queries will be on that table, where all entries are text and there's no notion of
|
||||||
|
// token ranges form the CQL point of view.
|
||||||
|
auto left_inf = boost::find_if(ranges, [] (auto&& r) {
|
||||||
|
return !r.start() || r.start()->value() == dht::minimum_token();
|
||||||
|
});
|
||||||
|
auto right_inf = boost::find_if(ranges, [] (auto&& r) {
|
||||||
|
return !r.end() || r.start()->value() == dht::maximum_token();
|
||||||
|
});
|
||||||
|
if (left_inf != right_inf && left_inf != ranges.end() && right_inf != ranges.end()) {
|
||||||
|
local_ranges.push_back(token_range{to_bytes(right_inf->start()), to_bytes(left_inf->end())});
|
||||||
|
ranges.erase(left_inf);
|
||||||
|
ranges.erase(right_inf);
|
||||||
|
}
|
||||||
|
for (auto&& r : ranges) {
|
||||||
|
local_ranges.push_back(token_range{to_bytes(r.start()), to_bytes(r.end())});
|
||||||
|
}
|
||||||
|
boost::sort(local_ranges, [] (auto&& tr1, auto&& tr2) {
|
||||||
|
return utf8_type->less(tr1.start, tr2.start);
|
||||||
|
});
|
||||||
|
return local_ranges;
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
size_estimates_mutation_reader::size_estimates_mutation_reader(schema_ptr schema, const dht::partition_range& prange, const query::partition_slice& slice, streamed_mutation::forwarding fwd)
|
||||||
|
: impl(schema)
|
||||||
|
, _schema(std::move(schema))
|
||||||
|
, _prange(&prange)
|
||||||
|
, _slice(slice)
|
||||||
|
, _fwd(fwd)
|
||||||
|
{ }
|
||||||
|
|
||||||
|
future<> size_estimates_mutation_reader::get_next_partition() {
|
||||||
|
auto& db = service::get_local_storage_proxy().get_db().local();
|
||||||
|
if (!_keyspaces) {
|
||||||
|
_keyspaces = get_keyspaces(*_schema, db, *_prange);
|
||||||
|
_current_partition = _keyspaces->begin();
|
||||||
|
}
|
||||||
|
if (_current_partition == _keyspaces->end()) {
|
||||||
|
_end_of_stream = true;
|
||||||
|
return make_ready_future<>();
|
||||||
|
}
|
||||||
|
return get_local_ranges().then([&db, this] (auto&& ranges) {
|
||||||
|
auto estimates = this->estimates_for_current_keyspace(db, std::move(ranges));
|
||||||
|
auto mutations = db::system_keyspace::make_size_estimates_mutation(*_current_partition, std::move(estimates));
|
||||||
|
++_current_partition;
|
||||||
|
std::vector<mutation> ms;
|
||||||
|
ms.emplace_back(std::move(mutations));
|
||||||
|
_partition_reader = flat_mutation_reader_from_mutations(std::move(ms), _fwd);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
future<> size_estimates_mutation_reader::fill_buffer(db::timeout_clock::time_point timeout) {
|
||||||
|
return do_until([this, timeout] { return is_end_of_stream() || is_buffer_full(); }, [this, timeout] {
|
||||||
|
if (!_partition_reader) {
|
||||||
|
return get_next_partition();
|
||||||
|
}
|
||||||
|
return _partition_reader->consume_pausable([this] (mutation_fragment mf) {
|
||||||
|
push_mutation_fragment(std::move(mf));
|
||||||
|
return stop_iteration(is_buffer_full());
|
||||||
|
}, timeout).then([this] {
|
||||||
|
if (_partition_reader->is_end_of_stream() && _partition_reader->is_buffer_empty()) {
|
||||||
|
_partition_reader = stdx::nullopt;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
void size_estimates_mutation_reader::next_partition() {
|
||||||
|
clear_buffer_to_next_partition();
|
||||||
|
if (is_buffer_empty()) {
|
||||||
|
_partition_reader = stdx::nullopt;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
future<> size_estimates_mutation_reader::fast_forward_to(const dht::partition_range& pr, db::timeout_clock::time_point timeout) {
|
||||||
|
clear_buffer();
|
||||||
|
_prange = ≺
|
||||||
|
_keyspaces = stdx::nullopt;
|
||||||
|
_partition_reader = stdx::nullopt;
|
||||||
|
_end_of_stream = false;
|
||||||
|
return make_ready_future<>();
|
||||||
|
}
|
||||||
|
|
||||||
|
future<> size_estimates_mutation_reader::fast_forward_to(position_range pr, db::timeout_clock::time_point timeout) {
|
||||||
|
forward_buffer_to(pr.start());
|
||||||
|
_end_of_stream = false;
|
||||||
|
if (_partition_reader) {
|
||||||
|
return _partition_reader->fast_forward_to(std::move(pr), timeout);
|
||||||
|
}
|
||||||
|
return make_ready_future<>();
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t size_estimates_mutation_reader::buffer_size() const {
|
||||||
|
if (_partition_reader) {
|
||||||
|
return flat_mutation_reader::impl::buffer_size() + _partition_reader->buffer_size();
|
||||||
|
}
|
||||||
|
return flat_mutation_reader::impl::buffer_size();
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<db::system_keyspace::range_estimates>
|
||||||
|
size_estimates_mutation_reader::estimates_for_current_keyspace(const database& db, std::vector<token_range> local_ranges) const {
|
||||||
|
// For each specified range, estimate (crudely) mean partition size and partitions count.
|
||||||
|
auto pkey = partition_key::from_single_value(*_schema, utf8_type->decompose(*_current_partition));
|
||||||
|
auto cfs = db.find_keyspace(*_current_partition).metadata()->cf_meta_data();
|
||||||
|
auto cf_names = boost::copy_range<std::vector<bytes>>(cfs | boost::adaptors::transformed([] (auto&& cf) {
|
||||||
|
return utf8_type->decompose(cf.first);
|
||||||
|
}));
|
||||||
|
boost::sort(cf_names, [] (auto&& n1, auto&& n2) {
|
||||||
|
return utf8_type->less(n1, n2);
|
||||||
|
});
|
||||||
|
std::vector<db::system_keyspace::range_estimates> estimates;
|
||||||
|
for (auto& range : _slice.row_ranges(*_schema, pkey)) {
|
||||||
|
auto rows = boost::make_iterator_range(
|
||||||
|
virtual_row_iterator(cf_names, local_ranges),
|
||||||
|
virtual_row_iterator(cf_names, local_ranges, virtual_row_iterator::end_iterator_tag()));
|
||||||
|
auto rows_to_estimate = range.slice(rows, virtual_row_comparator(_schema));
|
||||||
|
for (auto&& r : rows_to_estimate) {
|
||||||
|
auto& cf = db.find_column_family(*_current_partition, utf8_type->to_string(r.cf_name));
|
||||||
|
estimates.push_back(estimate(cf, r.tokens));
|
||||||
|
if (estimates.size() >= _slice.partition_row_limit()) {
|
||||||
|
return estimates;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return estimates;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace size_estimates
|
||||||
|
|
||||||
|
} // namespace db
|
||||||
@@ -21,33 +21,19 @@
|
|||||||
* along with Scylla. If not, see <http://www.gnu.org/licenses/>.
|
* along with Scylla. If not, see <http://www.gnu.org/licenses/>.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include <boost/range/adaptor/indirected.hpp>
|
|
||||||
#include <boost/range/adaptor/map.hpp>
|
|
||||||
#include <boost/range/adaptor/transformed.hpp>
|
|
||||||
#include <boost/range/algorithm/find_if.hpp>
|
|
||||||
|
|
||||||
#include "clustering_bounds_comparator.hh"
|
|
||||||
#include "database.hh"
|
|
||||||
#include "db/system_keyspace.hh"
|
#include "db/system_keyspace.hh"
|
||||||
#include "dht/i_partitioner.hh"
|
|
||||||
#include "mutation_reader.hh"
|
#include "mutation_reader.hh"
|
||||||
#include "partition_range_compat.hh"
|
|
||||||
#include "range.hh"
|
|
||||||
#include "service/storage_service.hh"
|
|
||||||
#include "stdx.hh"
|
|
||||||
#include "mutation_fragment.hh"
|
|
||||||
#include "sstables/sstables.hh"
|
|
||||||
#include "db/timeout_clock.hh"
|
|
||||||
|
|
||||||
namespace db {
|
namespace db {
|
||||||
|
|
||||||
namespace size_estimates {
|
namespace size_estimates {
|
||||||
|
|
||||||
|
struct token_range {
|
||||||
|
bytes start;
|
||||||
|
bytes end;
|
||||||
|
};
|
||||||
|
|
||||||
class size_estimates_mutation_reader final : public flat_mutation_reader::impl {
|
class size_estimates_mutation_reader final : public flat_mutation_reader::impl {
|
||||||
struct token_range {
|
|
||||||
bytes start;
|
|
||||||
bytes end;
|
|
||||||
};
|
|
||||||
schema_ptr _schema;
|
schema_ptr _schema;
|
||||||
const dht::partition_range* _prange;
|
const dht::partition_range* _prange;
|
||||||
const query::partition_slice& _slice;
|
const query::partition_slice& _slice;
|
||||||
@@ -57,267 +43,18 @@ class size_estimates_mutation_reader final : public flat_mutation_reader::impl {
|
|||||||
streamed_mutation::forwarding _fwd;
|
streamed_mutation::forwarding _fwd;
|
||||||
flat_mutation_reader_opt _partition_reader;
|
flat_mutation_reader_opt _partition_reader;
|
||||||
public:
|
public:
|
||||||
size_estimates_mutation_reader(schema_ptr schema, const dht::partition_range& prange, const query::partition_slice& slice, streamed_mutation::forwarding fwd)
|
size_estimates_mutation_reader(schema_ptr, const dht::partition_range&, const query::partition_slice&, streamed_mutation::forwarding);
|
||||||
: impl(schema)
|
|
||||||
, _schema(std::move(schema))
|
|
||||||
, _prange(&prange)
|
|
||||||
, _slice(slice)
|
|
||||||
, _fwd(fwd)
|
|
||||||
{ }
|
|
||||||
|
|
||||||
|
virtual future<> fill_buffer(db::timeout_clock::time_point) override;
|
||||||
|
virtual void next_partition() override;
|
||||||
|
virtual future<> fast_forward_to(const dht::partition_range&, db::timeout_clock::time_point) override;
|
||||||
|
virtual future<> fast_forward_to(position_range, db::timeout_clock::time_point) override;
|
||||||
|
virtual size_t buffer_size() const override;
|
||||||
private:
|
private:
|
||||||
future<> get_next_partition() {
|
future<> get_next_partition();
|
||||||
// For each specified range, estimate (crudely) mean partition size and partitions count.
|
|
||||||
auto& db = service::get_local_storage_proxy().get_db().local();
|
|
||||||
if (!_keyspaces) {
|
|
||||||
_keyspaces = get_keyspaces(*_schema, db, *_prange);
|
|
||||||
_current_partition = _keyspaces->begin();
|
|
||||||
}
|
|
||||||
if (_current_partition == _keyspaces->end()) {
|
|
||||||
_end_of_stream = true;
|
|
||||||
return make_ready_future<>();
|
|
||||||
}
|
|
||||||
return get_local_ranges().then([&db, this] (auto&& ranges) {
|
|
||||||
auto estimates = this->estimates_for_current_keyspace(db, std::move(ranges));
|
|
||||||
auto mutations = db::system_keyspace::make_size_estimates_mutation(*_current_partition, std::move(estimates));
|
|
||||||
++_current_partition;
|
|
||||||
std::vector<mutation> ms;
|
|
||||||
ms.emplace_back(std::move(mutations));
|
|
||||||
_partition_reader = flat_mutation_reader_from_mutations(std::move(ms), _fwd);
|
|
||||||
});
|
|
||||||
}
|
|
||||||
public:
|
|
||||||
virtual future<> fill_buffer(db::timeout_clock::time_point timeout) override {
|
|
||||||
return do_until([this, timeout] { return is_end_of_stream() || is_buffer_full(); }, [this, timeout] {
|
|
||||||
if (!_partition_reader) {
|
|
||||||
return get_next_partition();
|
|
||||||
}
|
|
||||||
return _partition_reader->consume_pausable([this] (mutation_fragment mf) {
|
|
||||||
push_mutation_fragment(std::move(mf));
|
|
||||||
return stop_iteration(is_buffer_full());
|
|
||||||
}, timeout).then([this] {
|
|
||||||
if (_partition_reader->is_end_of_stream() && _partition_reader->is_buffer_empty()) {
|
|
||||||
_partition_reader = stdx::nullopt;
|
|
||||||
}
|
|
||||||
});
|
|
||||||
});
|
|
||||||
}
|
|
||||||
virtual void next_partition() override {
|
|
||||||
clear_buffer_to_next_partition();
|
|
||||||
if (is_buffer_empty()) {
|
|
||||||
_partition_reader = stdx::nullopt;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
virtual future<> fast_forward_to(const dht::partition_range& pr, db::timeout_clock::time_point timeout) override {
|
|
||||||
clear_buffer();
|
|
||||||
_prange = ≺
|
|
||||||
_keyspaces = stdx::nullopt;
|
|
||||||
_partition_reader = stdx::nullopt;
|
|
||||||
_end_of_stream = false;
|
|
||||||
return make_ready_future<>();
|
|
||||||
}
|
|
||||||
virtual future<> fast_forward_to(position_range pr, db::timeout_clock::time_point timeout) override {
|
|
||||||
forward_buffer_to(pr.start());
|
|
||||||
_end_of_stream = false;
|
|
||||||
if (_partition_reader) {
|
|
||||||
return _partition_reader->fast_forward_to(std::move(pr), timeout);
|
|
||||||
}
|
|
||||||
return make_ready_future<>();
|
|
||||||
}
|
|
||||||
virtual size_t buffer_size() const override {
|
|
||||||
if (_partition_reader) {
|
|
||||||
return flat_mutation_reader::impl::buffer_size() + _partition_reader->buffer_size();
|
|
||||||
}
|
|
||||||
return flat_mutation_reader::impl::buffer_size();
|
|
||||||
}
|
|
||||||
/**
|
|
||||||
* Returns the primary ranges for the local node.
|
|
||||||
* Used for testing as well.
|
|
||||||
*/
|
|
||||||
static future<std::vector<token_range>> get_local_ranges() {
|
|
||||||
auto& ss = service::get_local_storage_service();
|
|
||||||
return ss.get_local_tokens().then([&ss] (auto&& tokens) {
|
|
||||||
auto ranges = ss.get_token_metadata().get_primary_ranges_for(std::move(tokens));
|
|
||||||
std::vector<token_range> local_ranges;
|
|
||||||
auto to_bytes = [](const stdx::optional<dht::token_range::bound>& b) {
|
|
||||||
assert(b);
|
|
||||||
return utf8_type->decompose(dht::global_partitioner().to_sstring(b->value()));
|
|
||||||
};
|
|
||||||
// We merge the ranges to be compatible with how Cassandra shows it's size estimates table.
|
|
||||||
// All queries will be on that table, where all entries are text and there's no notion of
|
|
||||||
// token ranges form the CQL point of view.
|
|
||||||
auto left_inf = boost::find_if(ranges, [] (auto&& r) {
|
|
||||||
return !r.start() || r.start()->value() == dht::minimum_token();
|
|
||||||
});
|
|
||||||
auto right_inf = boost::find_if(ranges, [] (auto&& r) {
|
|
||||||
return !r.end() || r.start()->value() == dht::maximum_token();
|
|
||||||
});
|
|
||||||
if (left_inf != right_inf && left_inf != ranges.end() && right_inf != ranges.end()) {
|
|
||||||
local_ranges.push_back(token_range{to_bytes(right_inf->start()), to_bytes(left_inf->end())});
|
|
||||||
ranges.erase(left_inf);
|
|
||||||
ranges.erase(right_inf);
|
|
||||||
}
|
|
||||||
for (auto&& r : ranges) {
|
|
||||||
local_ranges.push_back(token_range{to_bytes(r.start()), to_bytes(r.end())});
|
|
||||||
}
|
|
||||||
boost::sort(local_ranges, [] (auto&& tr1, auto&& tr2) {
|
|
||||||
return utf8_type->less(tr1.start, tr2.start);
|
|
||||||
});
|
|
||||||
return local_ranges;
|
|
||||||
});
|
|
||||||
}
|
|
||||||
private:
|
|
||||||
struct virtual_row {
|
|
||||||
const bytes& cf_name;
|
|
||||||
const token_range& tokens;
|
|
||||||
clustering_key_prefix as_key() const {
|
|
||||||
return clustering_key_prefix::from_exploded(std::vector<bytes_view>{cf_name, tokens.start, tokens.end});
|
|
||||||
}
|
|
||||||
};
|
|
||||||
struct virtual_row_comparator {
|
|
||||||
schema_ptr _schema;
|
|
||||||
virtual_row_comparator(schema_ptr schema) : _schema(schema) { }
|
|
||||||
bool operator()(const clustering_key_prefix& key1, const clustering_key_prefix& key2) {
|
|
||||||
return clustering_key_prefix::prefix_equality_less_compare(*_schema)(key1, key2);
|
|
||||||
}
|
|
||||||
bool operator()(const virtual_row& row, const clustering_key_prefix& key) {
|
|
||||||
return operator()(row.as_key(), key);
|
|
||||||
}
|
|
||||||
bool operator()(const clustering_key_prefix& key, const virtual_row& row) {
|
|
||||||
return operator()(key, row.as_key());
|
|
||||||
}
|
|
||||||
};
|
|
||||||
class virtual_row_iterator : public std::iterator<std::input_iterator_tag, const virtual_row> {
|
|
||||||
std::reference_wrapper<const std::vector<bytes>> _cf_names;
|
|
||||||
std::reference_wrapper<const std::vector<token_range>> _ranges;
|
|
||||||
size_t _cf_names_idx = 0;
|
|
||||||
size_t _ranges_idx = 0;
|
|
||||||
public:
|
|
||||||
struct end_iterator_tag {};
|
|
||||||
virtual_row_iterator(const std::vector<bytes>& cf_names, const std::vector<token_range>& ranges)
|
|
||||||
: _cf_names(std::ref(cf_names))
|
|
||||||
, _ranges(std::ref(ranges))
|
|
||||||
{ }
|
|
||||||
virtual_row_iterator(const std::vector<bytes>& cf_names, const std::vector<token_range>& ranges, end_iterator_tag)
|
|
||||||
: _cf_names(std::ref(cf_names))
|
|
||||||
, _ranges(std::ref(ranges))
|
|
||||||
, _cf_names_idx(cf_names.size())
|
|
||||||
, _ranges_idx(ranges.size())
|
|
||||||
{ }
|
|
||||||
virtual_row_iterator& operator++() {
|
|
||||||
if (++_ranges_idx == _ranges.get().size() && ++_cf_names_idx < _cf_names.get().size()) {
|
|
||||||
_ranges_idx = 0;
|
|
||||||
}
|
|
||||||
return *this;
|
|
||||||
}
|
|
||||||
virtual_row_iterator operator++(int) {
|
|
||||||
virtual_row_iterator i(*this);
|
|
||||||
++(*this);
|
|
||||||
return i;
|
|
||||||
}
|
|
||||||
const value_type operator*() const {
|
|
||||||
return { _cf_names.get()[_cf_names_idx], _ranges.get()[_ranges_idx] };
|
|
||||||
}
|
|
||||||
bool operator==(const virtual_row_iterator& i) const {
|
|
||||||
return _cf_names_idx == i._cf_names_idx
|
|
||||||
&& _ranges_idx == i._ranges_idx;
|
|
||||||
}
|
|
||||||
bool operator!=(const virtual_row_iterator& i) const {
|
|
||||||
return !(*this == i);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
std::vector<db::system_keyspace::range_estimates>
|
std::vector<db::system_keyspace::range_estimates>
|
||||||
estimates_for_current_keyspace(const database& db, std::vector<token_range> local_ranges) const {
|
estimates_for_current_keyspace(const database&, std::vector<token_range> local_ranges) const;
|
||||||
auto pkey = partition_key::from_single_value(*_schema, utf8_type->decompose(*_current_partition));
|
|
||||||
auto cfs = db.find_keyspace(*_current_partition).metadata()->cf_meta_data();
|
|
||||||
auto cf_names = boost::copy_range<std::vector<bytes>>(cfs | boost::adaptors::transformed([] (auto&& cf) {
|
|
||||||
return utf8_type->decompose(cf.first);
|
|
||||||
}));
|
|
||||||
boost::sort(cf_names, [] (auto&& n1, auto&& n2) {
|
|
||||||
return utf8_type->less(n1, n2);
|
|
||||||
});
|
|
||||||
std::vector<db::system_keyspace::range_estimates> estimates;
|
|
||||||
for (auto& range : _slice.row_ranges(*_schema, pkey)) {
|
|
||||||
auto rows = boost::make_iterator_range(
|
|
||||||
virtual_row_iterator(cf_names, local_ranges),
|
|
||||||
virtual_row_iterator(cf_names, local_ranges, virtual_row_iterator::end_iterator_tag()));
|
|
||||||
auto rows_to_estimate = range.slice(rows, virtual_row_comparator(_schema));
|
|
||||||
for (auto&& r : rows_to_estimate) {
|
|
||||||
auto& cf = db.find_column_family(*_current_partition, utf8_type->to_string(r.cf_name));
|
|
||||||
estimates.push_back(estimate(cf, r.tokens));
|
|
||||||
if (estimates.size() >= _slice.partition_row_limit()) {
|
|
||||||
return estimates;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return estimates;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns the keyspaces, ordered by name, as selected by the partition_range.
|
|
||||||
*/
|
|
||||||
static ks_range get_keyspaces(const schema& s, const database& db, dht::partition_range range) {
|
|
||||||
struct keyspace_less_comparator {
|
|
||||||
const schema& _s;
|
|
||||||
keyspace_less_comparator(const schema& s) : _s(s) { }
|
|
||||||
dht::ring_position as_ring_position(const sstring& ks) {
|
|
||||||
auto pkey = partition_key::from_single_value(_s, utf8_type->decompose(ks));
|
|
||||||
return dht::global_partitioner().decorate_key(_s, std::move(pkey));
|
|
||||||
}
|
|
||||||
bool operator()(const sstring& ks1, const sstring& ks2) {
|
|
||||||
return as_ring_position(ks1).less_compare(_s, as_ring_position(ks2));
|
|
||||||
}
|
|
||||||
bool operator()(const sstring& ks, const dht::ring_position& rp) {
|
|
||||||
return as_ring_position(ks).less_compare(_s, rp);
|
|
||||||
}
|
|
||||||
bool operator()(const dht::ring_position& rp, const sstring& ks) {
|
|
||||||
return rp.less_compare(_s, as_ring_position(ks));
|
|
||||||
}
|
|
||||||
};
|
|
||||||
auto keyspaces = db.get_non_system_keyspaces();
|
|
||||||
auto cmp = keyspace_less_comparator(s);
|
|
||||||
boost::sort(keyspaces, cmp);
|
|
||||||
return boost::copy_range<ks_range>(range.slice(keyspaces, std::move(cmp)));
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Makes a wrapping range of ring_position from a nonwrapping range of token, used to select sstables.
|
|
||||||
*/
|
|
||||||
static dht::partition_range as_ring_position_range(dht::token_range& r) {
|
|
||||||
stdx::optional<range<dht::ring_position>::bound> start_bound, end_bound;
|
|
||||||
if (r.start()) {
|
|
||||||
start_bound = {{ dht::ring_position(r.start()->value(), dht::ring_position::token_bound::start), r.start()->is_inclusive() }};
|
|
||||||
}
|
|
||||||
if (r.end()) {
|
|
||||||
end_bound = {{ dht::ring_position(r.end()->value(), dht::ring_position::token_bound::end), r.end()->is_inclusive() }};
|
|
||||||
}
|
|
||||||
return dht::partition_range(std::move(start_bound), std::move(end_bound), r.is_singular());
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Add a new range_estimates for the specified range, considering the sstables associated with `cf`.
|
|
||||||
*/
|
|
||||||
static system_keyspace::range_estimates estimate(const column_family& cf, const token_range& r) {
|
|
||||||
int64_t count{0};
|
|
||||||
utils::estimated_histogram hist{0};
|
|
||||||
auto from_bytes = [] (auto& b) {
|
|
||||||
return dht::global_partitioner().from_sstring(utf8_type->to_string(b));
|
|
||||||
};
|
|
||||||
dht::token_range_vector ranges;
|
|
||||||
compat::unwrap_into(
|
|
||||||
wrapping_range<dht::token>({{ from_bytes(r.start) }}, {{ from_bytes(r.end) }}),
|
|
||||||
dht::token_comparator(),
|
|
||||||
[&] (auto&& rng) { ranges.push_back(std::move(rng)); });
|
|
||||||
for (auto&& r : ranges) {
|
|
||||||
auto rp_range = as_ring_position_range(r);
|
|
||||||
for (auto&& sstable : cf.select_sstables(rp_range)) {
|
|
||||||
count += sstable->estimated_keys_for_range(r);
|
|
||||||
hist.merge(sstable->get_stats_metadata().estimated_row_size);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return {cf.schema(), r.start, r.end, count, count > 0 ? hist.mean() : 0};
|
|
||||||
}
|
|
||||||
};
|
};
|
||||||
|
|
||||||
struct virtual_reader {
|
struct virtual_reader {
|
||||||
@@ -332,6 +69,12 @@ struct virtual_reader {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the primary ranges for the local node.
|
||||||
|
* Used for testing as well.
|
||||||
|
*/
|
||||||
|
future<std::vector<token_range>> get_local_ranges();
|
||||||
|
|
||||||
} // namespace size_estimates
|
} // namespace size_estimates
|
||||||
|
|
||||||
} // namespace db
|
} // namespace db
|
||||||
|
|||||||
@@ -26,6 +26,7 @@
|
|||||||
#include "db/consistency_level_type.hh"
|
#include "db/consistency_level_type.hh"
|
||||||
#include "db/system_keyspace.hh"
|
#include "db/system_keyspace.hh"
|
||||||
#include "schema_builder.hh"
|
#include "schema_builder.hh"
|
||||||
|
#include "timeout_config.hh"
|
||||||
#include "types.hh"
|
#include "types.hh"
|
||||||
|
|
||||||
#include <seastar/core/reactor.hh>
|
#include <seastar/core/reactor.hh>
|
||||||
@@ -97,11 +98,17 @@ future<> system_distributed_keyspace::stop() {
|
|||||||
return make_ready_future<>();
|
return make_ready_future<>();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static const timeout_config internal_distributed_timeout_config = [] {
|
||||||
|
using namespace std::chrono_literals;
|
||||||
|
const auto t = 10s;
|
||||||
|
return timeout_config{ t, t, t, t, t, t, t };
|
||||||
|
}();
|
||||||
|
|
||||||
future<std::unordered_map<utils::UUID, sstring>> system_distributed_keyspace::view_status(sstring ks_name, sstring view_name) const {
|
future<std::unordered_map<utils::UUID, sstring>> system_distributed_keyspace::view_status(sstring ks_name, sstring view_name) const {
|
||||||
return _qp.process(
|
return _qp.process(
|
||||||
sprint("SELECT host_id, status FROM %s.%s WHERE keyspace_name = ? AND view_name = ?", NAME, VIEW_BUILD_STATUS),
|
sprint("SELECT host_id, status FROM %s.%s WHERE keyspace_name = ? AND view_name = ?", NAME, VIEW_BUILD_STATUS),
|
||||||
db::consistency_level::ONE,
|
db::consistency_level::ONE,
|
||||||
infinite_timeout_config,
|
internal_distributed_timeout_config,
|
||||||
{ std::move(ks_name), std::move(view_name) },
|
{ std::move(ks_name), std::move(view_name) },
|
||||||
false).then([this] (::shared_ptr<cql3::untyped_result_set> cql_result) {
|
false).then([this] (::shared_ptr<cql3::untyped_result_set> cql_result) {
|
||||||
return boost::copy_range<std::unordered_map<utils::UUID, sstring>>(*cql_result
|
return boost::copy_range<std::unordered_map<utils::UUID, sstring>>(*cql_result
|
||||||
@@ -118,7 +125,7 @@ future<> system_distributed_keyspace::start_view_build(sstring ks_name, sstring
|
|||||||
return _qp.process(
|
return _qp.process(
|
||||||
sprint("INSERT INTO %s.%s (keyspace_name, view_name, host_id, status) VALUES (?, ?, ?, ?)", NAME, VIEW_BUILD_STATUS),
|
sprint("INSERT INTO %s.%s (keyspace_name, view_name, host_id, status) VALUES (?, ?, ?, ?)", NAME, VIEW_BUILD_STATUS),
|
||||||
db::consistency_level::ONE,
|
db::consistency_level::ONE,
|
||||||
infinite_timeout_config,
|
internal_distributed_timeout_config,
|
||||||
{ std::move(ks_name), std::move(view_name), std::move(host_id), "STARTED" },
|
{ std::move(ks_name), std::move(view_name), std::move(host_id), "STARTED" },
|
||||||
false).discard_result();
|
false).discard_result();
|
||||||
});
|
});
|
||||||
@@ -129,7 +136,7 @@ future<> system_distributed_keyspace::finish_view_build(sstring ks_name, sstring
|
|||||||
return _qp.process(
|
return _qp.process(
|
||||||
sprint("UPDATE %s.%s SET status = ? WHERE keyspace_name = ? AND view_name = ? AND host_id = ?", NAME, VIEW_BUILD_STATUS),
|
sprint("UPDATE %s.%s SET status = ? WHERE keyspace_name = ? AND view_name = ? AND host_id = ?", NAME, VIEW_BUILD_STATUS),
|
||||||
db::consistency_level::ONE,
|
db::consistency_level::ONE,
|
||||||
infinite_timeout_config,
|
internal_distributed_timeout_config,
|
||||||
{ "SUCCESS", std::move(ks_name), std::move(view_name), std::move(host_id) },
|
{ "SUCCESS", std::move(ks_name), std::move(view_name), std::move(host_id) },
|
||||||
false).discard_result();
|
false).discard_result();
|
||||||
});
|
});
|
||||||
@@ -139,7 +146,7 @@ future<> system_distributed_keyspace::remove_view(sstring ks_name, sstring view_
|
|||||||
return _qp.process(
|
return _qp.process(
|
||||||
sprint("DELETE FROM %s.%s WHERE keyspace_name = ? AND view_name = ?", NAME, VIEW_BUILD_STATUS),
|
sprint("DELETE FROM %s.%s WHERE keyspace_name = ? AND view_name = ?", NAME, VIEW_BUILD_STATUS),
|
||||||
db::consistency_level::ONE,
|
db::consistency_level::ONE,
|
||||||
infinite_timeout_config,
|
internal_distributed_timeout_config,
|
||||||
{ std::move(ks_name), std::move(view_name) },
|
{ std::move(ks_name), std::move(view_name) },
|
||||||
false).discard_result();
|
false).discard_result();
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1635,6 +1635,9 @@ void make(database& db, bool durable, bool volatile_testing_only) {
|
|||||||
auto cfg = ks.make_column_family_config(*table, db.get_config(), db.get_large_partition_handler());
|
auto cfg = ks.make_column_family_config(*table, db.get_config(), db.get_large_partition_handler());
|
||||||
if (maybe_write_in_user_memory(table, db)) {
|
if (maybe_write_in_user_memory(table, db)) {
|
||||||
cfg.dirty_memory_manager = &db._dirty_memory_manager;
|
cfg.dirty_memory_manager = &db._dirty_memory_manager;
|
||||||
|
} else {
|
||||||
|
cfg.memtable_scheduling_group = default_scheduling_group();
|
||||||
|
cfg.memtable_to_cache_scheduling_group = default_scheduling_group();
|
||||||
}
|
}
|
||||||
db.add_column_family(ks, table, std::move(cfg));
|
db.add_column_family(ks, table, std::move(cfg));
|
||||||
maybe_add_virtual_reader(table, db);
|
maybe_add_virtual_reader(table, db);
|
||||||
|
|||||||
@@ -384,6 +384,10 @@ public:
|
|||||||
return "biased-token-round-robin";
|
return "biased-token-round-robin";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
virtual unsigned sharding_ignore_msb() const {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
friend bool operator==(token_view t1, token_view t2);
|
friend bool operator==(token_view t1, token_view t2);
|
||||||
friend bool operator<(token_view t1, token_view t2);
|
friend bool operator<(token_view t1, token_view t2);
|
||||||
friend int tri_compare(token_view t1, token_view t2);
|
friend int tri_compare(token_view t1, token_view t2);
|
||||||
|
|||||||
@@ -290,6 +290,11 @@ murmur3_partitioner::token_for_next_shard(const token& t, shard_id shard, unsign
|
|||||||
return bias(n);
|
return bias(n);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
unsigned
|
||||||
|
murmur3_partitioner::sharding_ignore_msb() const {
|
||||||
|
return _sharding_ignore_msb_bits;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
using registry = class_registrator<i_partitioner, murmur3_partitioner, const unsigned&, const unsigned&>;
|
using registry = class_registrator<i_partitioner, murmur3_partitioner, const unsigned&, const unsigned&>;
|
||||||
static registry registrator("org.apache.cassandra.dht.Murmur3Partitioner");
|
static registry registrator("org.apache.cassandra.dht.Murmur3Partitioner");
|
||||||
|
|||||||
@@ -52,6 +52,7 @@ public:
|
|||||||
|
|
||||||
virtual unsigned shard_of(const token& t) const override;
|
virtual unsigned shard_of(const token& t) const override;
|
||||||
virtual token token_for_next_shard(const token& t, shard_id shard, unsigned spans) const override;
|
virtual token token_for_next_shard(const token& t, shard_id shard, unsigned spans) const override;
|
||||||
|
virtual unsigned sharding_ignore_msb() const override;
|
||||||
private:
|
private:
|
||||||
using uint128_t = unsigned __int128;
|
using uint128_t = unsigned __int128;
|
||||||
static int64_t normalize(int64_t in);
|
static int64_t normalize(int64_t in);
|
||||||
|
|||||||
111
dist/ami/build_ami.sh
vendored
111
dist/ami/build_ami.sh
vendored
@@ -11,11 +11,9 @@ print_usage() {
|
|||||||
echo " --repo repository for both install and update, specify .repo/.list file URL"
|
echo " --repo repository for both install and update, specify .repo/.list file URL"
|
||||||
echo " --repo-for-install repository for install, specify .repo/.list file URL"
|
echo " --repo-for-install repository for install, specify .repo/.list file URL"
|
||||||
echo " --repo-for-update repository for update, specify .repo/.list file URL"
|
echo " --repo-for-update repository for update, specify .repo/.list file URL"
|
||||||
echo " --target specify target distribution"
|
|
||||||
exit 1
|
exit 1
|
||||||
}
|
}
|
||||||
LOCALRPM=0
|
LOCALRPM=0
|
||||||
TARGET=centos
|
|
||||||
while [ $# -gt 0 ]; do
|
while [ $# -gt 0 ]; do
|
||||||
case "$1" in
|
case "$1" in
|
||||||
"--localrpm")
|
"--localrpm")
|
||||||
@@ -34,10 +32,6 @@ while [ $# -gt 0 ]; do
|
|||||||
INSTALL_ARGS="$INSTALL_ARGS --repo-for-update $2"
|
INSTALL_ARGS="$INSTALL_ARGS --repo-for-update $2"
|
||||||
shift 2
|
shift 2
|
||||||
;;
|
;;
|
||||||
"--target")
|
|
||||||
TARGET="$2"
|
|
||||||
shift 2
|
|
||||||
;;
|
|
||||||
*)
|
*)
|
||||||
print_usage
|
print_usage
|
||||||
;;
|
;;
|
||||||
@@ -62,91 +56,42 @@ pkg_install() {
|
|||||||
fi
|
fi
|
||||||
}
|
}
|
||||||
|
|
||||||
case "$TARGET" in
|
AMI=ami-ae7bfdb8
|
||||||
"centos")
|
REGION=us-east-1
|
||||||
AMI=ami-ae7bfdb8
|
SSH_USERNAME=centos
|
||||||
REGION=us-east-1
|
|
||||||
SSH_USERNAME=centos
|
|
||||||
;;
|
|
||||||
"trusty")
|
|
||||||
AMI=ami-ff427095
|
|
||||||
REGION=us-east-1
|
|
||||||
SSH_USERNAME=ubuntu
|
|
||||||
;;
|
|
||||||
"xenial")
|
|
||||||
AMI=ami-da05a4a0
|
|
||||||
REGION=us-east-1
|
|
||||||
SSH_USERNAME=ubuntu
|
|
||||||
;;
|
|
||||||
*)
|
|
||||||
echo "build_ami.sh does not supported this distribution."
|
|
||||||
exit 1
|
|
||||||
;;
|
|
||||||
esac
|
|
||||||
|
|
||||||
if [ $LOCALRPM -eq 1 ]; then
|
if [ $LOCALRPM -eq 1 ]; then
|
||||||
sudo rm -rf build/*
|
sudo rm -rf build/*
|
||||||
REPO=`./scripts/scylla_current_repo --target $TARGET`
|
REPO=`./scripts/scylla_current_repo --target centos`
|
||||||
INSTALL_ARGS="$INSTALL_ARGS --localrpm --repo $REPO"
|
INSTALL_ARGS="$INSTALL_ARGS --localrpm --repo $REPO"
|
||||||
if [ ! -f /usr/bin/git ]; then
|
if [ ! -f /usr/bin/git ]; then
|
||||||
pkg_install git
|
pkg_install git
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if [ "$TARGET" = "centos" ]; then
|
if [ ! -f dist/ami/files/scylla.x86_64.rpm ] || [ ! -f dist/ami/files/scylla-kernel-conf.x86_64.rpm ] || [ ! -f dist/ami/files/scylla-conf.x86_64.rpm ] || [ ! -f dist/ami/files/scylla-server.x86_64.rpm ] || [ ! -f dist/ami/files/scylla-debuginfo.x86_64.rpm ]; then
|
||||||
if [ ! -f dist/ami/files/scylla.x86_64.rpm ] || [ ! -f dist/ami/files/scylla-kernel-conf.x86_64.rpm ] || [ ! -f dist/ami/files/scylla-conf.x86_64.rpm ] || [ ! -f dist/ami/files/scylla-server.x86_64.rpm ] || [ ! -f dist/ami/files/scylla-debuginfo.x86_64.rpm ]; then
|
dist/redhat/build_rpm.sh --dist --target epel-7-x86_64
|
||||||
dist/redhat/build_rpm.sh --dist --target epel-7-x86_64
|
cp build/rpms/scylla-`cat build/SCYLLA-VERSION-FILE`-`cat build/SCYLLA-RELEASE-FILE`.*.x86_64.rpm dist/ami/files/scylla.x86_64.rpm
|
||||||
cp build/rpms/scylla-`cat build/SCYLLA-VERSION-FILE`-`cat build/SCYLLA-RELEASE-FILE`.*.x86_64.rpm dist/ami/files/scylla.x86_64.rpm
|
cp build/rpms/scylla-kernel-conf-`cat build/SCYLLA-VERSION-FILE`-`cat build/SCYLLA-RELEASE-FILE`.*.x86_64.rpm dist/ami/files/scylla-kernel-conf.x86_64.rpm
|
||||||
cp build/rpms/scylla-kernel-conf-`cat build/SCYLLA-VERSION-FILE`-`cat build/SCYLLA-RELEASE-FILE`.*.x86_64.rpm dist/ami/files/scylla-kernel-conf.x86_64.rpm
|
cp build/rpms/scylla-conf-`cat build/SCYLLA-VERSION-FILE`-`cat build/SCYLLA-RELEASE-FILE`.*.x86_64.rpm dist/ami/files/scylla-conf.x86_64.rpm
|
||||||
cp build/rpms/scylla-conf-`cat build/SCYLLA-VERSION-FILE`-`cat build/SCYLLA-RELEASE-FILE`.*.x86_64.rpm dist/ami/files/scylla-conf.x86_64.rpm
|
cp build/rpms/scylla-server-`cat build/SCYLLA-VERSION-FILE`-`cat build/SCYLLA-RELEASE-FILE`.*.x86_64.rpm dist/ami/files/scylla-server.x86_64.rpm
|
||||||
cp build/rpms/scylla-server-`cat build/SCYLLA-VERSION-FILE`-`cat build/SCYLLA-RELEASE-FILE`.*.x86_64.rpm dist/ami/files/scylla-server.x86_64.rpm
|
cp build/rpms/scylla-debuginfo-`cat build/SCYLLA-VERSION-FILE`-`cat build/SCYLLA-RELEASE-FILE`.*.x86_64.rpm dist/ami/files/scylla-debuginfo.x86_64.rpm
|
||||||
cp build/rpms/scylla-debuginfo-`cat build/SCYLLA-VERSION-FILE`-`cat build/SCYLLA-RELEASE-FILE`.*.x86_64.rpm dist/ami/files/scylla-debuginfo.x86_64.rpm
|
fi
|
||||||
fi
|
if [ ! -f dist/ami/files/scylla-jmx.noarch.rpm ]; then
|
||||||
if [ ! -f dist/ami/files/scylla-jmx.noarch.rpm ]; then
|
cd build
|
||||||
cd build
|
git clone --depth 1 https://github.com/scylladb/scylla-jmx.git
|
||||||
git clone --depth 1 https://github.com/scylladb/scylla-jmx.git
|
cd scylla-jmx
|
||||||
cd scylla-jmx
|
dist/redhat/build_rpm.sh --target epel-7-x86_64
|
||||||
dist/redhat/build_rpm.sh --target epel-7-x86_64
|
cd ../..
|
||||||
cd ../..
|
cp build/scylla-jmx/build/rpms/scylla-jmx-`cat build/scylla-jmx/build/SCYLLA-VERSION-FILE`-`cat build/scylla-jmx/build/SCYLLA-RELEASE-FILE`.*.noarch.rpm dist/ami/files/scylla-jmx.noarch.rpm
|
||||||
cp build/scylla-jmx/build/rpms/scylla-jmx-`cat build/scylla-jmx/build/SCYLLA-VERSION-FILE`-`cat build/scylla-jmx/build/SCYLLA-RELEASE-FILE`.*.noarch.rpm dist/ami/files/scylla-jmx.noarch.rpm
|
fi
|
||||||
fi
|
if [ ! -f dist/ami/files/scylla-tools.noarch.rpm ] || [ ! -f dist/ami/files/scylla-tools-core.noarch.rpm ]; then
|
||||||
if [ ! -f dist/ami/files/scylla-tools.noarch.rpm ] || [ ! -f dist/ami/files/scylla-tools-core.noarch.rpm ]; then
|
cd build
|
||||||
cd build
|
git clone --depth 1 https://github.com/scylladb/scylla-tools-java.git
|
||||||
git clone --depth 1 https://github.com/scylladb/scylla-tools-java.git
|
cd scylla-tools-java
|
||||||
cd scylla-tools-java
|
dist/redhat/build_rpm.sh --target epel-7-x86_64
|
||||||
dist/redhat/build_rpm.sh --target epel-7-x86_64
|
cd ../..
|
||||||
cd ../..
|
cp build/scylla-tools-java/build/rpms/scylla-tools-`cat build/scylla-tools-java/build/SCYLLA-VERSION-FILE`-`cat build/scylla-tools-java/build/SCYLLA-RELEASE-FILE`.*.noarch.rpm dist/ami/files/scylla-tools.noarch.rpm
|
||||||
cp build/scylla-tools-java/build/rpms/scylla-tools-`cat build/scylla-tools-java/build/SCYLLA-VERSION-FILE`-`cat build/scylla-tools-java/build/SCYLLA-RELEASE-FILE`.*.noarch.rpm dist/ami/files/scylla-tools.noarch.rpm
|
cp build/scylla-tools-java/build/rpms/scylla-tools-core-`cat build/scylla-tools-java/build/SCYLLA-VERSION-FILE`-`cat build/scylla-tools-java/build/SCYLLA-RELEASE-FILE`.*.noarch.rpm dist/ami/files/scylla-tools-core.noarch.rpm
|
||||||
cp build/scylla-tools-java/build/rpms/scylla-tools-core-`cat build/scylla-tools-java/build/SCYLLA-VERSION-FILE`-`cat build/scylla-tools-java/build/SCYLLA-RELEASE-FILE`.*.noarch.rpm dist/ami/files/scylla-tools-core.noarch.rpm
|
|
||||||
fi
|
|
||||||
else
|
|
||||||
if [ ! -f dist/ami/files/scylla-server_amd64.deb ]; then
|
|
||||||
./scripts/git-archive-all --force-submodules --prefix scylla build/scylla.tar
|
|
||||||
tar -C build/ -xvpf build/scylla.tar
|
|
||||||
cd build/scylla
|
|
||||||
dist/debian/build_deb.sh --dist --target $TARGET
|
|
||||||
cd ../..
|
|
||||||
cp build/scylla/build/debs/scylla_`cat build/SCYLLA-VERSION-FILE | sed 's/\.rc/~rc/'`-`cat build/SCYLLA-RELEASE-FILE`-0ubuntu1~${TARGET}_amd64.deb dist/ami/files/scylla_amd64.deb
|
|
||||||
cp build/scylla/build/debs/scylla-kernel-conf_`cat build/SCYLLA-VERSION-FILE | sed 's/\.rc/~rc/'`-`cat build/SCYLLA-RELEASE-FILE`-0ubuntu1~${TARGET}_amd64.deb dist/ami/files/scylla-kernel-conf_amd64.deb
|
|
||||||
cp build/scylla/build/debs/scylla-conf_`cat build/SCYLLA-VERSION-FILE | sed 's/\.rc/~rc/'`-`cat build/SCYLLA-RELEASE-FILE`-0ubuntu1~${TARGET}_amd64.deb dist/ami/files/scylla-conf_amd64.deb
|
|
||||||
cp build/scylla/build/debs/scylla-server_`cat build/SCYLLA-VERSION-FILE | sed 's/\.rc/~rc/'`-`cat build/SCYLLA-RELEASE-FILE`-0ubuntu1~${TARGET}_amd64.deb dist/ami/files/scylla-server_amd64.deb
|
|
||||||
cp build/scylla/build/debs/scylla-server-dbg_`cat build/SCYLLA-VERSION-FILE | sed 's/\.rc/~rc/'`-`cat build/SCYLLA-RELEASE-FILE`-0ubuntu1~${TARGET}_amd64.deb dist/ami/files/scylla-server-dbg_amd64.deb
|
|
||||||
fi
|
|
||||||
if [ ! -f dist/ami/files/scylla-jmx_all.deb ]; then
|
|
||||||
cd build
|
|
||||||
git clone --depth 1 https://github.com/scylladb/scylla-jmx.git
|
|
||||||
cd scylla-jmx
|
|
||||||
dist/debian/build_deb.sh --target $TARGET
|
|
||||||
cd ../..
|
|
||||||
cp build/scylla-jmx/build/debs/scylla-jmx_`cat build/scylla-jmx/build/SCYLLA-VERSION-FILE | sed 's/\.rc/~rc/'`-`cat build/scylla-jmx/build/SCYLLA-RELEASE-FILE`-0ubuntu1~${TARGET}_all.deb dist/ami/files/scylla-jmx_all.deb
|
|
||||||
fi
|
|
||||||
if [ ! -f dist/ami/files/scylla-tools_all.deb ]; then
|
|
||||||
cd build
|
|
||||||
git clone --depth 1 https://github.com/scylladb/scylla-tools-java.git
|
|
||||||
cd scylla-tools-java
|
|
||||||
dist/debian/build_deb.sh --target $TARGET
|
|
||||||
cd ../..
|
|
||||||
cp build/scylla-tools-java/build/debs/scylla-tools_`cat build/scylla-tools-java/build/SCYLLA-VERSION-FILE | sed 's/\.rc/~rc/'`-`cat build/scylla-tools-java/build/SCYLLA-RELEASE-FILE`-0ubuntu1~${TARGET}_all.deb dist/ami/files/scylla-tools_all.deb
|
|
||||||
fi
|
|
||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
|||||||
115
dist/ami/files/.bash_profile
vendored
115
dist/ami/files/.bash_profile
vendored
@@ -7,121 +7,8 @@ fi
|
|||||||
|
|
||||||
# User specific environment and startup programs
|
# User specific environment and startup programs
|
||||||
|
|
||||||
. /usr/lib/scylla/scylla_lib.sh
|
|
||||||
|
|
||||||
PATH=$PATH:$HOME/.local/bin:$HOME/bin
|
PATH=$PATH:$HOME/.local/bin:$HOME/bin
|
||||||
|
|
||||||
export PATH
|
export PATH
|
||||||
|
|
||||||
echo
|
~/.scylla_ami_login
|
||||||
echo ' _____ _ _ _____ ____ '
|
|
||||||
echo ' / ____| | | | | __ \| _ \ '
|
|
||||||
echo ' | (___ ___ _ _| | | __ _| | | | |_) |'
|
|
||||||
echo ' \___ \ / __| | | | | |/ _` | | | | _ < '
|
|
||||||
echo ' ____) | (__| |_| | | | (_| | |__| | |_) |'
|
|
||||||
echo ' |_____/ \___|\__, |_|_|\__,_|_____/|____/ '
|
|
||||||
echo ' __/ | '
|
|
||||||
echo ' |___/ '
|
|
||||||
echo ''
|
|
||||||
echo ''
|
|
||||||
echo 'Nodetool:'
|
|
||||||
echo ' nodetool help'
|
|
||||||
echo 'CQL Shell:'
|
|
||||||
echo ' cqlsh'
|
|
||||||
echo 'More documentation available at: '
|
|
||||||
echo ' http://www.scylladb.com/doc/'
|
|
||||||
echo 'By default, Scylla sends certain information about this node to a data collection server. For information, see http://www.scylladb.com/privacy/'
|
|
||||||
echo
|
|
||||||
|
|
||||||
if [ `ec2_is_supported_instance_type` -eq 0 ]; then
|
|
||||||
TYPE=`curl -s http://169.254.169.254/latest/meta-data/instance-type`
|
|
||||||
tput setaf 1
|
|
||||||
tput bold
|
|
||||||
echo " $TYPE is not supported instance type!"
|
|
||||||
tput sgr0
|
|
||||||
echo -n "To continue startup ScyllaDB on this instance, run 'sudo scylla_io_setup' "
|
|
||||||
if ! is_systemd; then
|
|
||||||
echo "then 'initctl start scylla-server'."
|
|
||||||
else
|
|
||||||
echo "then 'systemctl start scylla-server'."
|
|
||||||
fi
|
|
||||||
echo "For a list of optimized instance types and more EC2 instructions see http://www.scylladb.com/doc/getting-started-amazon/"
|
|
||||||
echo
|
|
||||||
else
|
|
||||||
SETUP=
|
|
||||||
if is_systemd; then
|
|
||||||
SETUP=`systemctl is-active scylla-ami-setup`
|
|
||||||
fi
|
|
||||||
if [ "$SETUP" == "activating" ]; then
|
|
||||||
tput setaf 4
|
|
||||||
tput bold
|
|
||||||
echo " Constructing RAID volume..."
|
|
||||||
tput sgr0
|
|
||||||
echo
|
|
||||||
echo "Please wait for setup. To see status, run "
|
|
||||||
echo " 'systemctl status scylla-ami-setup'"
|
|
||||||
echo
|
|
||||||
echo "After setup finished, scylla-server service will launch."
|
|
||||||
echo "To see status of scylla-server, run "
|
|
||||||
echo " 'systemctl status scylla-server'"
|
|
||||||
echo
|
|
||||||
elif [ "$SETUP" == "failed" ]; then
|
|
||||||
tput setaf 1
|
|
||||||
tput bold
|
|
||||||
echo " AMI initial configuration failed!"
|
|
||||||
tput sgr0
|
|
||||||
echo
|
|
||||||
echo "To see status, run "
|
|
||||||
echo " 'systemctl status scylla-ami-setup'"
|
|
||||||
echo
|
|
||||||
else
|
|
||||||
if is_systemd; then
|
|
||||||
SCYLLA=`systemctl is-active scylla-server`
|
|
||||||
else
|
|
||||||
if [ "`initctl status scylla-server|grep "running, process"`" != "" ]; then
|
|
||||||
SCYLLA="active"
|
|
||||||
else
|
|
||||||
SCYLLA="failed"
|
|
||||||
fi
|
|
||||||
fi
|
|
||||||
if [ "$SCYLLA" == "activating" ]; then
|
|
||||||
tput setaf 4
|
|
||||||
tput bold
|
|
||||||
echo " ScyllaDB is starting..."
|
|
||||||
tput sgr0
|
|
||||||
echo
|
|
||||||
echo "Please wait for start. To see status, run "
|
|
||||||
echo " 'systemctl status scylla-server'"
|
|
||||||
echo
|
|
||||||
elif [ "$SCYLLA" == "active" ]; then
|
|
||||||
tput setaf 4
|
|
||||||
tput bold
|
|
||||||
echo " ScyllaDB is active."
|
|
||||||
tput sgr0
|
|
||||||
echo
|
|
||||||
echo "$ nodetool status"
|
|
||||||
echo
|
|
||||||
nodetool status
|
|
||||||
else
|
|
||||||
tput setaf 1
|
|
||||||
tput bold
|
|
||||||
echo " ScyllaDB is not started!"
|
|
||||||
tput sgr0
|
|
||||||
echo "Please wait for startup. To see status of ScyllaDB, run "
|
|
||||||
if ! is_systemd; then
|
|
||||||
echo " 'initctl status scylla-server'"
|
|
||||||
echo "and"
|
|
||||||
echo " 'sudo cat /var/log/upstart/scylla-server.log'"
|
|
||||||
echo
|
|
||||||
else
|
|
||||||
echo " 'systemctl status scylla-server'"
|
|
||||||
echo
|
|
||||||
fi
|
|
||||||
fi
|
|
||||||
fi
|
|
||||||
echo -n " "
|
|
||||||
/usr/lib/scylla/scylla_ec2_check
|
|
||||||
if [ $? -eq 0 ]; then
|
|
||||||
echo
|
|
||||||
fi
|
|
||||||
fi
|
|
||||||
|
|||||||
118
dist/ami/files/.scylla_ami_login
vendored
Executable file
118
dist/ami/files/.scylla_ami_login
vendored
Executable file
@@ -0,0 +1,118 @@
|
|||||||
|
#!/usr/bin/python3
|
||||||
|
#
|
||||||
|
# Copyright 2018 ScyllaDB
|
||||||
|
#
|
||||||
|
|
||||||
|
#
|
||||||
|
# This file is part of Scylla.
|
||||||
|
#
|
||||||
|
# Scylla is free software: you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU Affero General Public License as published by
|
||||||
|
# the Free Software Foundation, either version 3 of the License, or
|
||||||
|
# (at your option) any later version.
|
||||||
|
#
|
||||||
|
# Scylla is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU General Public License
|
||||||
|
# along with Scylla. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import argparse
|
||||||
|
sys.path.append('/usr/lib/scylla')
|
||||||
|
from scylla_util import *
|
||||||
|
|
||||||
|
MSG_HEADER = '''
|
||||||
|
|
||||||
|
_____ _ _ _____ ____
|
||||||
|
/ ____| | | | | __ \| _ \
|
||||||
|
| (___ ___ _ _| | | __ _| | | | |_) |
|
||||||
|
\___ \ / __| | | | | |/ _` | | | | _ <
|
||||||
|
____) | (__| |_| | | | (_| | |__| | |_) |
|
||||||
|
|_____/ \___|\__, |_|_|\__,_|_____/|____/
|
||||||
|
__/ |
|
||||||
|
|___/
|
||||||
|
|
||||||
|
|
||||||
|
Nodetool:
|
||||||
|
nodetool help
|
||||||
|
CQL Shell:
|
||||||
|
cqlsh
|
||||||
|
More documentation available at:
|
||||||
|
http://www.scylladb.com/doc/
|
||||||
|
By default, Scylla sends certain information about this node to a data collection server. For information, see http://www.scylladb.com/privacy/
|
||||||
|
|
||||||
|
'''[1:-1]
|
||||||
|
MSG_UNSUPPORTED_INSTANCE_TYPE = '''
|
||||||
|
{red}{type} is not supported instance type!{nocolor}
|
||||||
|
To continue startup ScyllaDB on this instance, run 'sudo scylla_io_setup' then 'systemctl start scylla-server'.
|
||||||
|
For a list of optimized instance types and more EC2 instructions see http://www.scylladb.com/doc/getting-started-amazon/"
|
||||||
|
|
||||||
|
'''[1:-1]
|
||||||
|
MSG_SETUP_ACTIVATING = '''
|
||||||
|
{green}Constructing RAID volume...{nocolor}
|
||||||
|
|
||||||
|
Please wait for setup. To see status, run
|
||||||
|
'systemctl status scylla-ami-setup'
|
||||||
|
|
||||||
|
After setup finished, scylla-server service will launch.
|
||||||
|
To see status of scylla-server, run
|
||||||
|
'systemctl status scylla-server'
|
||||||
|
|
||||||
|
'''[1:-1]
|
||||||
|
MSG_SETUP_FAILED = '''
|
||||||
|
{red}AMI initial configuration failed!{nocolor}
|
||||||
|
|
||||||
|
To see status, run
|
||||||
|
'systemctl status scylla-ami-setup'
|
||||||
|
|
||||||
|
'''[1:-1]
|
||||||
|
MSG_SCYLLA_ACTIVATING = '''
|
||||||
|
{green}ScyllaDB is starting...{nocolor}
|
||||||
|
|
||||||
|
Please wait for start. To see status, run
|
||||||
|
'systemctl status scylla-server'
|
||||||
|
|
||||||
|
'''[1:-1]
|
||||||
|
MSG_SCYLLA_FAILED = '''
|
||||||
|
{red}ScyllaDB is not started!{nocolor}
|
||||||
|
Please wait for startup. To see status of ScyllaDB, run
|
||||||
|
'systemctl status scylla-server'
|
||||||
|
|
||||||
|
'''[1:-1]
|
||||||
|
MSG_SCYLLA_ACTIVE = '''
|
||||||
|
{green}ScyllaDB is active.{nocolor}
|
||||||
|
|
||||||
|
$ nodetool status
|
||||||
|
|
||||||
|
'''[1:-1]
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
colorprint(MSG_HEADER)
|
||||||
|
aws = aws_instance()
|
||||||
|
if not aws.is_supported_instance_class():
|
||||||
|
colorprint(MSG_UNSUPPORTED_INSTANCE_TYPE, type=aws.instance_class())
|
||||||
|
else:
|
||||||
|
setup = systemd_unit('scylla-ami-setup.service')
|
||||||
|
res = setup.is_active()
|
||||||
|
if res == 'activating':
|
||||||
|
colorprint(MSG_SETUP_ACTIVATING)
|
||||||
|
elif res == 'failed':
|
||||||
|
colorprint(MSG_SETUP_FAILED)
|
||||||
|
else:
|
||||||
|
server = systemd_unit('scylla-server.service')
|
||||||
|
res = server.is_active()
|
||||||
|
if res == 'activating':
|
||||||
|
colorprint(MSG_SCYLLA_ACTIVATING)
|
||||||
|
elif res == 'failed':
|
||||||
|
colorprint(MSG_SCYLLA_FAILED)
|
||||||
|
else:
|
||||||
|
colorprint(MSG_SCYLLA_ACTIVE)
|
||||||
|
run('nodetool status', exception=False)
|
||||||
|
print(' ', end='')
|
||||||
|
res = run('/usr/lib/scylla/scylla_ec2_check --nic eth0', exception=False)
|
||||||
|
if res == 0:
|
||||||
|
print('')
|
||||||
2
dist/ami/files/scylla-ami
vendored
2
dist/ami/files/scylla-ami
vendored
Submodule dist/ami/files/scylla-ami updated: 36e85110ec...fe156a5725
7
dist/ami/scylla.json
vendored
7
dist/ami/scylla.json
vendored
@@ -64,14 +64,11 @@
|
|||||||
"source": "files/",
|
"source": "files/",
|
||||||
"destination": "/home/{{user `ssh_username`}}/"
|
"destination": "/home/{{user `ssh_username`}}/"
|
||||||
},
|
},
|
||||||
{
|
|
||||||
"type": "file",
|
|
||||||
"source": "../../scripts/scylla_install_pkg",
|
|
||||||
"destination": "/home/{{user `ssh_username`}}/scylla_install_pkg"
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
"type": "shell",
|
"type": "shell",
|
||||||
"inline": [
|
"inline": [
|
||||||
|
"sudo yum install -y epel-release",
|
||||||
|
"sudo yum install -y python36",
|
||||||
"sudo /home/{{user `ssh_username`}}/scylla-ami/scylla_install_ami {{ user `install_args` }}"
|
"sudo /home/{{user `ssh_username`}}/scylla-ami/scylla_install_ami {{ user `install_args` }}"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|||||||
84
dist/common/scripts/node_exporter_install
vendored
84
dist/common/scripts/node_exporter_install
vendored
@@ -1,6 +1,8 @@
|
|||||||
#!/bin/sh
|
#!/usr/bin/python3
|
||||||
#
|
#
|
||||||
# Copyright 2016 ScyllaDB
|
# Copyright 2018 ScyllaDB
|
||||||
|
#
|
||||||
|
|
||||||
#
|
#
|
||||||
# This file is part of Scylla.
|
# This file is part of Scylla.
|
||||||
#
|
#
|
||||||
@@ -17,42 +19,46 @@
|
|||||||
# You should have received a copy of the GNU General Public License
|
# You should have received a copy of the GNU General Public License
|
||||||
# along with Scylla. If not, see <http://www.gnu.org/licenses/>.
|
# along with Scylla. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
if [ "`id -u`" -ne 0 ]; then
|
import os
|
||||||
echo "Requires root permission."
|
import sys
|
||||||
exit 1
|
import tempfile
|
||||||
fi
|
import tarfile
|
||||||
|
from scylla_util import *
|
||||||
|
|
||||||
if [ -f /usr/bin/node_exporter ] || [ -f /usr/bin/prometheus-node_exporter ]; then
|
VERSION='0.14.0'
|
||||||
echo "node_exporter already installed"
|
INSTALL_DIR='/usr/lib/scylla/Prometheus/node_exporter'
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
. /usr/lib/scylla/scylla_lib.sh
|
if __name__ == '__main__':
|
||||||
|
if os.getuid() > 0:
|
||||||
|
print('Requires root permission.')
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
if is_gentoo_variant; then
|
if os.path.exists('/usr/bin/node_exporter') or os.path.exists('/usr/bin/prometheus-node_exporter'):
|
||||||
emerge -uq app-metrics/node_exporter
|
print('node_exporter already installed')
|
||||||
if is_systemd; then
|
sys.exit(1)
|
||||||
echo "app-metrics/node_exporter does not install systemd service files, please fill a bug if you need them."
|
|
||||||
else
|
|
||||||
rc-update add node_exporter default
|
|
||||||
rc-service node_exporter start
|
|
||||||
fi
|
|
||||||
else
|
|
||||||
version=0.14.0
|
|
||||||
dir=/usr/lib/scylla/Prometheus/node_exporter
|
|
||||||
mkdir -p $dir
|
|
||||||
cd $dir
|
|
||||||
curl -L https://github.com/prometheus/node_exporter/releases/download/v$version/node_exporter-$version.linux-amd64.tar.gz -o $dir/node_exporter-$version.linux-amd64.tar.gz
|
|
||||||
tar -xvzf $dir/node_exporter-$version.linux-amd64.tar.gz
|
|
||||||
rm $dir/node_exporter-$version.linux-amd64.tar.gz
|
|
||||||
ln -s $dir/node_exporter-$version.linux-amd64/node_exporter /usr/bin
|
|
||||||
. /etc/os-release
|
|
||||||
|
|
||||||
if is_systemd; then
|
if is_gentoo_variant():
|
||||||
systemctl enable node-exporter
|
run('emerge -uq app-metrics/node_exporter')
|
||||||
systemctl start node-exporter
|
if is_systemd():
|
||||||
else
|
print('app-metrics/node_exporter does not install systemd service files, please fill a bug if you need them.')
|
||||||
cat <<EOT >> /etc/init/node_exporter.conf
|
sys.exit(1)
|
||||||
|
else:
|
||||||
|
run('rc-update add node_exporter default')
|
||||||
|
run('rc-service node_exporter start')
|
||||||
|
else:
|
||||||
|
data = curl('https://github.com/prometheus/node_exporter/releases/download/v{version}/node_exporter-{version}.linux-amd64.tar.gz'.format(version=VERSION), byte=True)
|
||||||
|
with open('/var/tmp/node_exporter-{version}.linux-amd64.tar.gz'.format(version=VERSION), 'wb') as f:
|
||||||
|
f.write(data)
|
||||||
|
with tarfile.open('/var/tmp/node_exporter-{version}.linux-amd64.tar.gz'.format(version=VERSION)) as tf:
|
||||||
|
tf.extractall(INSTALL_DIR)
|
||||||
|
os.remove('/var/tmp/node_exporter-{version}.linux-amd64.tar.gz'.format(version=VERSION))
|
||||||
|
os.symlink('{install_dir}/node_exporter-{version}.linux-amd64/node_exporter'.format(install_dir=INSTALL_DIR, version=VERSION), '/usr/bin/node_exporter')
|
||||||
|
if is_systemd():
|
||||||
|
node_exporter = systemd_unit('node-exporter.service')
|
||||||
|
node_exporter.enable()
|
||||||
|
node_exporter.start()
|
||||||
|
else:
|
||||||
|
conf = '''
|
||||||
# Run node_exporter
|
# Run node_exporter
|
||||||
|
|
||||||
start on startup
|
start on startup
|
||||||
@@ -60,9 +66,9 @@ start on startup
|
|||||||
script
|
script
|
||||||
/usr/bin/node_exporter
|
/usr/bin/node_exporter
|
||||||
end script
|
end script
|
||||||
EOT
|
'''[1:-1]
|
||||||
service node_exporter start
|
with open('/etc/init/node_exporter.conf', 'w') as f:
|
||||||
fi
|
f.write(conf)
|
||||||
fi
|
run('service node_exporter start')
|
||||||
|
|
||||||
printf "node_exporter successfully installed\n"
|
print('node_exporter successfully installed')
|
||||||
|
|||||||
28
dist/common/scripts/node_health_check
vendored
28
dist/common/scripts/node_health_check
vendored
@@ -28,6 +28,7 @@ OUTPUT_PATH4="$OUTPUT_PATH/data_model"
|
|||||||
OUTPUT_PATH5="$OUTPUT_PATH/network_checks"
|
OUTPUT_PATH5="$OUTPUT_PATH/network_checks"
|
||||||
IS_FEDORA="0"
|
IS_FEDORA="0"
|
||||||
IS_DEBIAN="0"
|
IS_DEBIAN="0"
|
||||||
|
IS_GENTOO="0"
|
||||||
JMX_PORT="7199"
|
JMX_PORT="7199"
|
||||||
CQL_PORT="9042"
|
CQL_PORT="9042"
|
||||||
PRINT_DM=NO
|
PRINT_DM=NO
|
||||||
@@ -75,7 +76,7 @@ while getopts ":hdncap:q:" opt; do
|
|||||||
done
|
done
|
||||||
|
|
||||||
|
|
||||||
##Check server release (Fedora/Oracle/Debian)##
|
##Check server release (Fedora/Oracle/Debian/Gentoo)##
|
||||||
cat /etc/os-release | grep -i fedora &> /dev/null
|
cat /etc/os-release | grep -i fedora &> /dev/null
|
||||||
if [ $? -ne 0 ]; then
|
if [ $? -ne 0 ]; then
|
||||||
cat /etc/os-release | grep -i oracle &> /dev/null
|
cat /etc/os-release | grep -i oracle &> /dev/null
|
||||||
@@ -89,7 +90,12 @@ if [ $? -ne 0 ]; then
|
|||||||
IS_DEBIAN="1"
|
IS_DEBIAN="1"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if [ "$IS_FEDORA" == "1" ] && [ "$IS_DEBIAN" == "1" ]; then
|
cat /etc/os-release | grep -i gentoo &> /dev/null
|
||||||
|
if [ $? -ne 0 ]; then
|
||||||
|
IS_GENTOO="1"
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ "$IS_FEDORA" == "1" ] && [ "$IS_DEBIAN" == "1" ] && [ "$IS_GENTOO" == "1" ]; then
|
||||||
echo "This s a Non-Supported OS, Please Review the Support Matrix"
|
echo "This s a Non-Supported OS, Please Review the Support Matrix"
|
||||||
exit 222
|
exit 222
|
||||||
fi
|
fi
|
||||||
@@ -141,6 +147,9 @@ if [ "$IS_DEBIAN" == "0" ]; then
|
|||||||
sudo apt-get install net-tools -y | grep already
|
sudo apt-get install net-tools -y | grep already
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
if [ "$IS_GENTOO" == "0" ]; then
|
||||||
|
sudo emerge -1uq sys-apps/ethtool sys-apps/net-tools
|
||||||
|
fi
|
||||||
|
|
||||||
#Create dir structure to save output_files#
|
#Create dir structure to save output_files#
|
||||||
echo "--------------------------------------------------"
|
echo "--------------------------------------------------"
|
||||||
@@ -182,6 +191,12 @@ if [ "$IS_DEBIAN" == "0" ]; then
|
|||||||
cp -p /etc/default/scylla-server $OUTPUT_PATH2
|
cp -p /etc/default/scylla-server $OUTPUT_PATH2
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
if [ "$IS_GENTOO" == "0" ]; then
|
||||||
|
sudo emerge -1uq app-portage/portage-utils
|
||||||
|
sudo qlist -ICv scylla > $OUTPUT_PATH2/scylla-pkgs.txt
|
||||||
|
cp -p /etc/default/scylla-server $OUTPUT_PATH2
|
||||||
|
fi
|
||||||
|
|
||||||
|
|
||||||
#Scylla Logs#
|
#Scylla Logs#
|
||||||
echo "--------------------------------------------------"
|
echo "--------------------------------------------------"
|
||||||
@@ -192,7 +207,11 @@ journalctl --help &> /dev/null
|
|||||||
if [ $? -eq 0 ]; then
|
if [ $? -eq 0 ]; then
|
||||||
journalctl -t scylla > $OUTPUT_PATH/scylla-logs.txt
|
journalctl -t scylla > $OUTPUT_PATH/scylla-logs.txt
|
||||||
else
|
else
|
||||||
cat /var/log/syslog | grep -i scylla > $OUTPUT_PATH/scylla-logs.txt
|
if [ "$IS_GENTOO" == "0" ]; then
|
||||||
|
cat /var/log/scylla/scylla.log > $OUTPUT_PATH/scylla-logs.txt
|
||||||
|
else
|
||||||
|
cat /var/log/syslog | grep -i scylla > $OUTPUT_PATH/scylla-logs.txt
|
||||||
|
fi
|
||||||
fi
|
fi
|
||||||
|
|
||||||
gzip -f $OUTPUT_PATH/scylla-logs.txt
|
gzip -f $OUTPUT_PATH/scylla-logs.txt
|
||||||
@@ -224,6 +243,7 @@ if [ "$SCYLLA_SERVICE" == "1" ]; then
|
|||||||
echo "Skipping Data Model Info Collection"
|
echo "Skipping Data Model Info Collection"
|
||||||
echo "--------------------------------------------------"
|
echo "--------------------------------------------------"
|
||||||
else
|
else
|
||||||
|
# TODO: handle connecting with authentication
|
||||||
cqlsh `hostname -i` $CQL_PORT -e "HELP" &> /dev/null
|
cqlsh `hostname -i` $CQL_PORT -e "HELP" &> /dev/null
|
||||||
if [ $? -eq 0 ]; then
|
if [ $? -eq 0 ]; then
|
||||||
echo "Collecting Data Model Info (using port $CQL_PORT)"
|
echo "Collecting Data Model Info (using port $CQL_PORT)"
|
||||||
@@ -357,7 +377,7 @@ if [ "$IS_FEDORA" == "0" ]; then
|
|||||||
echo "## /etc/sysconfig/scylla-server ##" >> $REPORT
|
echo "## /etc/sysconfig/scylla-server ##" >> $REPORT
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if [ "$IS_DEBIAN" == "0" ]; then
|
if [ "$IS_DEBIAN" == "0" ] || [ "$IS_GENTOO" == "0" ]; then
|
||||||
echo "## /etc/default/scylla-server ##" >> $REPORT
|
echo "## /etc/default/scylla-server ##" >> $REPORT
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
|||||||
3
dist/common/scripts/scylla_coredump_setup
vendored
3
dist/common/scripts/scylla_coredump_setup
vendored
@@ -23,7 +23,6 @@ import os
|
|||||||
import sys
|
import sys
|
||||||
import argparse
|
import argparse
|
||||||
import subprocess
|
import subprocess
|
||||||
import shutil
|
|
||||||
from scylla_util import *
|
from scylla_util import *
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
@@ -62,7 +61,7 @@ ExternalSizeMax=1024G
|
|||||||
with open('/etc/systemd/coredump.conf', 'w') as f:
|
with open('/etc/systemd/coredump.conf', 'w') as f:
|
||||||
conf = f.write(conf_data)
|
conf = f.write(conf_data)
|
||||||
if args.dump_to_raiddir:
|
if args.dump_to_raiddir:
|
||||||
shutil.rmtree('/var/lib/systemd/coredump')
|
rmtree('/var/lib/systemd/coredump')
|
||||||
makedirs('/var/lib/scylla/coredump')
|
makedirs('/var/lib/scylla/coredump')
|
||||||
os.symlink('/var/lib/scylla/coredump', '/var/lib/systemd/coredump')
|
os.symlink('/var/lib/scylla/coredump', '/var/lib/systemd/coredump')
|
||||||
run('systemctl daemon-reload')
|
run('systemctl daemon-reload')
|
||||||
|
|||||||
44
dist/common/scripts/scylla_ec2_check
vendored
44
dist/common/scripts/scylla_ec2_check
vendored
@@ -24,46 +24,38 @@ import sys
|
|||||||
import argparse
|
import argparse
|
||||||
from scylla_util import *
|
from scylla_util import *
|
||||||
|
|
||||||
def get_en_interface_type():
|
|
||||||
type, subtype = curl('http://169.254.169.254/latest/meta-data/instance-type').split('.')
|
|
||||||
if type in ['c3', 'c4', 'd4', 'd2', 'i2', 'r3']:
|
|
||||||
return 'ixgbevf'
|
|
||||||
if type in ['i3', 'p2', 'r4', 'x1']:
|
|
||||||
return 'ena'
|
|
||||||
if type == 'm4':
|
|
||||||
if subtype == '16xlarge':
|
|
||||||
return 'ena'
|
|
||||||
else:
|
|
||||||
return 'ixgbevf'
|
|
||||||
|
|
||||||
def is_vpc_enabled():
|
|
||||||
with open('/sys/class/net/eth0/address') as f:
|
|
||||||
mac = f.read().strip()
|
|
||||||
mac_stat = curl('http://169.254.169.254/latest/meta-data/network/interfaces/macs/{}/'.format(mac))
|
|
||||||
return True if re.search(r'^vpc-id$', mac_stat, flags=re.MULTILINE) else False
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
if not is_ec2():
|
if not is_ec2():
|
||||||
sys.exit(0)
|
sys.exit(0)
|
||||||
|
parser = argparse.ArgumentParser(description='Verify EC2 configuration is optimized.')
|
||||||
|
parser.add_argument('--nic', default='eth0',
|
||||||
|
help='specify NIC')
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
type = curl('http://169.254.169.254/latest/meta-data/instance-type')
|
if not is_valid_nic(args.nic):
|
||||||
en = get_en_interface_type()
|
print('NIC {} doesn\'t exist.'.format(args.nic))
|
||||||
match = re.search(r'^driver: (\S+)$', out('ethtool -i eth0'), flags=re.MULTILINE)
|
sys.exit(1)
|
||||||
|
|
||||||
|
aws = aws_instance()
|
||||||
|
instance_class = aws.instance_class()
|
||||||
|
en = aws.get_en_interface_type()
|
||||||
|
match = re.search(r'^driver: (\S+)$', out('ethtool -i {}'.format(args.nic)), flags=re.MULTILINE)
|
||||||
driver = match.group(1)
|
driver = match.group(1)
|
||||||
|
|
||||||
if not en:
|
if not en:
|
||||||
print('{bold_red}{type} doesn\'t support enahanced networking!{no_color}'.format(bold_red=concolor.BOLD_RED, type=type, no_color=concolor.NO_COLOR))
|
colorprint('{red}{instance_class} doesn\'t support enahanced networking!{nocolor}', instance_class=instance_class)
|
||||||
print('''To enable enhanced networking, please use the instance type which supports it.
|
print('''To enable enhanced networking, please use the instance type which supports it.
|
||||||
More documentation available at:
|
More documentation available at:
|
||||||
http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/enhanced-networking.html#enabling_enhanced_networking''')
|
http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/enhanced-networking.html#enabling_enhanced_networking''')
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
elif not is_vpc_enabled():
|
elif not aws.is_vpc_enabled(args.nic):
|
||||||
print('{bold_red}VPC is not enabled!{no_color}'.format(bold_red=concolor.BOLD_RED, no_color=concolor.NO_COLOR))
|
colorprint('{red}VPC is not enabled!{nocolor}')
|
||||||
print('To enable enhanced networking, please enable VPC.')
|
print('To enable enhanced networking, please enable VPC.')
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
elif driver != en:
|
elif driver != en:
|
||||||
print('{bold_red}Enhanced networking is disabled!{no_color}'.format(bold_red=concolor.BOLD_RED, no_color=concolor.NO_COLOR))
|
colorprint('{red}Enhanced networking is disabled!{nocolor}')
|
||||||
print('''More documentation available at:
|
print('''More documentation available at:
|
||||||
http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/enhanced-networking.html''')
|
http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/enhanced-networking.html''')
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
|
colorprint('{green}This EC2 instance is optimized for Scylla.{nocolor}')
|
||||||
|
|||||||
2
dist/common/scripts/scylla_fstrim_setup
vendored
2
dist/common/scripts/scylla_fstrim_setup
vendored
@@ -28,6 +28,8 @@ if __name__ == '__main__':
|
|||||||
if os.getuid() > 0:
|
if os.getuid() > 0:
|
||||||
print('Requires root permission.')
|
print('Requires root permission.')
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
if is_systemd():
|
||||||
|
systemd_unit('scylla-fstrim.timer').unmask()
|
||||||
if is_redhat_variant():
|
if is_redhat_variant():
|
||||||
systemd_unit('fstrim.timer').disable()
|
systemd_unit('fstrim.timer').disable()
|
||||||
if dist_name() == 'Ubuntu' and os.path.exists('/etc/cron.weekly/fstrim'):
|
if dist_name() == 'Ubuntu' and os.path.exists('/etc/cron.weekly/fstrim'):
|
||||||
|
|||||||
122
dist/common/scripts/scylla_lib.sh
vendored
122
dist/common/scripts/scylla_lib.sh
vendored
@@ -1,122 +0,0 @@
|
|||||||
#
|
|
||||||
# Copyright (C) 2016 ScyllaDB
|
|
||||||
|
|
||||||
is_debian_variant() {
|
|
||||||
[ -f /etc/debian_version ]
|
|
||||||
}
|
|
||||||
|
|
||||||
is_redhat_variant() {
|
|
||||||
[ -f /etc/redhat-release ]
|
|
||||||
}
|
|
||||||
|
|
||||||
is_gentoo_variant() {
|
|
||||||
[ -f /etc/gentoo-release ]
|
|
||||||
}
|
|
||||||
|
|
||||||
is_systemd() {
|
|
||||||
grep -q '^systemd$' /proc/1/comm
|
|
||||||
}
|
|
||||||
|
|
||||||
is_ec2() {
|
|
||||||
[ -f /sys/hypervisor/uuid ] && [ "$(head -c 3 /sys/hypervisor/uuid)" = "ec2" ]
|
|
||||||
}
|
|
||||||
|
|
||||||
is_selinux_enabled() {
|
|
||||||
STATUS=`getenforce`
|
|
||||||
if [ "$STATUS" = "Disabled" ]; then
|
|
||||||
return 0
|
|
||||||
else
|
|
||||||
return 1
|
|
||||||
fi
|
|
||||||
}
|
|
||||||
|
|
||||||
ec2_is_supported_instance_type() {
|
|
||||||
TYPE=`curl -s http://169.254.169.254/latest/meta-data/instance-type|cut -d . -f 1`
|
|
||||||
case $TYPE in
|
|
||||||
"i2"|"i3") echo 1;;
|
|
||||||
*) echo 0;;
|
|
||||||
esac
|
|
||||||
}
|
|
||||||
|
|
||||||
verify_args() {
|
|
||||||
if [ -z "$2" ] || [[ "$2" =~ ^--+ ]]; then
|
|
||||||
echo "Requires more parameter for $1."
|
|
||||||
print_usage
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
}
|
|
||||||
|
|
||||||
#
|
|
||||||
# get_mode_cpu_set <mode name, e.g. 'mq', 'sq', 'sq_split'>
|
|
||||||
#
|
|
||||||
get_mode_cpu_set() {
|
|
||||||
local mode=$1
|
|
||||||
local mode_cpu_mask=`/usr/lib/scylla/perftune.py --tune net --nic "$nic" --mode "$mode" --get-cpu-mask` 2>&-
|
|
||||||
|
|
||||||
# If the given mode is not supported - return invalid CPU set
|
|
||||||
if [[ "$?" -ne "0" ]]; then
|
|
||||||
echo "-1"
|
|
||||||
else
|
|
||||||
echo "$mode_cpu_mask" | /usr/lib/scylla/hex2list.py
|
|
||||||
fi
|
|
||||||
}
|
|
||||||
|
|
||||||
#
|
|
||||||
# check_cpuset_conf <NIC name>
|
|
||||||
#
|
|
||||||
get_tune_mode() {
|
|
||||||
local nic=$1
|
|
||||||
|
|
||||||
# if cpuset.conf doesn't exist use the default mode
|
|
||||||
[[ ! -e '/etc/scylla.d/cpuset.conf' ]] && return
|
|
||||||
|
|
||||||
local cur_cpuset=`cat /etc/scylla.d/cpuset.conf | cut -d "\"" -f2- | cut -d" " -f2`
|
|
||||||
local mq_cpuset=`get_mode_cpu_set 'mq'`
|
|
||||||
local sq_cpuset=`get_mode_cpu_set 'sq'`
|
|
||||||
local sq_split_cpuset=`get_mode_cpu_set 'sq_split'`
|
|
||||||
local tune_mode=""
|
|
||||||
|
|
||||||
case "$cur_cpuset" in
|
|
||||||
"$mq_cpuset")
|
|
||||||
tune_mode="--mode mq"
|
|
||||||
;;
|
|
||||||
"$sq_cpuset")
|
|
||||||
tune_mode="--mode sq"
|
|
||||||
;;
|
|
||||||
"$sq_split_cpuset")
|
|
||||||
tune_mode="--mode sq_split"
|
|
||||||
;;
|
|
||||||
esac
|
|
||||||
|
|
||||||
# if cpuset is something different from what we expect - use the default mode
|
|
||||||
echo "$tune_mode"
|
|
||||||
}
|
|
||||||
|
|
||||||
#
|
|
||||||
# create_perftune_conf [<NIC name>]
|
|
||||||
#
|
|
||||||
create_perftune_conf() {
|
|
||||||
local nic=$1
|
|
||||||
[[ -z "$nic" ]] && nic='eth0'
|
|
||||||
|
|
||||||
# if exists - do nothing
|
|
||||||
[[ -e '/etc/scylla.d/perftune.yaml' ]] && return
|
|
||||||
|
|
||||||
local mode=`get_tune_mode "$nic"`
|
|
||||||
/usr/lib/scylla/perftune.py --tune net --nic "$nic" $mode --dump-options-file > /etc/scylla.d/perftune.yaml
|
|
||||||
}
|
|
||||||
|
|
||||||
. /etc/os-release
|
|
||||||
if is_debian_variant || is_gentoo_variant; then
|
|
||||||
SYSCONFIG=/etc/default
|
|
||||||
else
|
|
||||||
SYSCONFIG=/etc/sysconfig
|
|
||||||
fi
|
|
||||||
. $SYSCONFIG/scylla-server
|
|
||||||
|
|
||||||
for i in /etc/scylla.d/*.conf; do
|
|
||||||
if [ "$i" = "/etc/scylla.d/*.conf" ]; then
|
|
||||||
break
|
|
||||||
fi
|
|
||||||
. "$i"
|
|
||||||
done
|
|
||||||
9
dist/common/scripts/scylla_ntp_setup
vendored
9
dist/common/scripts/scylla_ntp_setup
vendored
@@ -49,7 +49,8 @@ if __name__ == '__main__':
|
|||||||
if is_systemd():
|
if is_systemd():
|
||||||
ntp = systemd_unit('ntp.service')
|
ntp = systemd_unit('ntp.service')
|
||||||
ntp.stop()
|
ntp.stop()
|
||||||
run('ntpdate ntp.ubuntu.com')
|
# ignore error, ntpd may able to adjust clock later
|
||||||
|
run('ntpdate ntp.ubuntu.com', exception=False)
|
||||||
ntp.start()
|
ntp.start()
|
||||||
else:
|
else:
|
||||||
run('service ntp stop')
|
run('service ntp stop')
|
||||||
@@ -70,7 +71,8 @@ if __name__ == '__main__':
|
|||||||
sntpd.start()
|
sntpd.start()
|
||||||
else:
|
else:
|
||||||
run('rc-service ntpd stop', exception=False)
|
run('rc-service ntpd stop', exception=False)
|
||||||
run('ntpdate {}'.format(server))
|
# ignore error, ntpd may able to adjust clock later
|
||||||
|
run('ntpdate {}'.format(server), exception=False)
|
||||||
run('rc-update add ntpd default')
|
run('rc-update add ntpd default')
|
||||||
run('rc-service ntpd start')
|
run('rc-service ntpd start')
|
||||||
|
|
||||||
@@ -87,6 +89,7 @@ if __name__ == '__main__':
|
|||||||
server = match.group(1)
|
server = match.group(1)
|
||||||
ntpd = systemd_unit('ntpd.service')
|
ntpd = systemd_unit('ntpd.service')
|
||||||
ntpd.stop()
|
ntpd.stop()
|
||||||
run('ntpdate {}'.format(server))
|
# ignore error, ntpd may able to adjust clock later
|
||||||
|
run('ntpdate {}'.format(server), exception=False)
|
||||||
ntpd.enable()
|
ntpd.enable()
|
||||||
ntpd.start()
|
ntpd.start()
|
||||||
|
|||||||
96
dist/common/scripts/scylla_prepare
vendored
96
dist/common/scripts/scylla_prepare
vendored
@@ -1,33 +1,71 @@
|
|||||||
#!/bin/bash -e
|
#!/usr/bin/python3
|
||||||
|
#
|
||||||
|
# Copyright 2018 ScyllaDB
|
||||||
|
#
|
||||||
|
|
||||||
. /usr/lib/scylla/scylla_lib.sh
|
#
|
||||||
|
# This file is part of Scylla.
|
||||||
|
#
|
||||||
|
# Scylla is free software: you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU Affero General Public License as published by
|
||||||
|
# the Free Software Foundation, either version 3 of the License, or
|
||||||
|
# (at your option) any later version.
|
||||||
|
#
|
||||||
|
# Scylla is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU General Public License
|
||||||
|
# along with Scylla. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
if [ "$AMI" = "yes" ] && [ -f /etc/scylla/ami_disabled ]; then
|
import os
|
||||||
rm /etc/scylla/ami_disabled
|
import sys
|
||||||
exit 1
|
import glob
|
||||||
fi
|
from scylla_util import *
|
||||||
|
|
||||||
if [ "$NETWORK_MODE" = "virtio" ]; then
|
if __name__ == '__main__':
|
||||||
ip tuntap del mode tap dev $TAP
|
if os.getuid() > 0:
|
||||||
ip tuntap add mode tap dev $TAP user $USER one_queue vnet_hdr
|
print('Requires root permission.')
|
||||||
ip link set dev $TAP up
|
sys.exit(1)
|
||||||
ip link set dev $TAP master $BRIDGE
|
if is_redhat_variant():
|
||||||
chown $USER.$GROUP /dev/vhost-net
|
cfg = sysconfig_parser('/etc/sysconfig/scylla-server')
|
||||||
elif [ "$NETWORK_MODE" = "dpdk" ]; then
|
else:
|
||||||
modprobe uio
|
cfg = sysconfig_parser('/etc/default/scylla-server')
|
||||||
modprobe uio_pci_generic
|
ami = cfg.get('AMI')
|
||||||
/usr/lib/scylla/dpdk-devbind.py --force --bind=uio_pci_generic $ETHPCIID
|
mode = cfg.get('NETWORK_MODE')
|
||||||
for n in /sys/devices/system/node/node?; do
|
|
||||||
echo $NR_HUGEPAGES > $n/hugepages/hugepages-2048kB/nr_hugepages
|
|
||||||
done
|
|
||||||
if [ "$ID" = "ubuntu" ]; then
|
|
||||||
hugeadm --create-mounts
|
|
||||||
fi
|
|
||||||
else # NETWORK_MODE = posix
|
|
||||||
if [ "$SET_NIC" = "yes" ]; then
|
|
||||||
create_perftune_conf "$IFNAME"
|
|
||||||
/usr/lib/scylla/posix_net_conf.sh $IFNAME --options-file /etc/scylla.d/perftune.yaml
|
|
||||||
fi
|
|
||||||
fi
|
|
||||||
|
|
||||||
/usr/lib/scylla/scylla-blocktune
|
if ami == 'yes' and os.path.exists('/etc/scylla/ami_disabled'):
|
||||||
|
os.remove('/etc/scylla/ami_disabled')
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
if mode == 'virtio':
|
||||||
|
tap = cfg.get('TAP')
|
||||||
|
user = cfg.get('USER')
|
||||||
|
group = cfg.get('GROUP')
|
||||||
|
bridge = cfg.get('BRIDGE')
|
||||||
|
run('ip tuntap del mode tap dev {TAP}'.format(TAP=tap))
|
||||||
|
run('ip tuntap add mode tap dev {TAP} user {USER} one_queue vnet_hdr'.format(TAP=tap, USER=user))
|
||||||
|
run('ip link set dev {TAP} up'.format(TAP=tap))
|
||||||
|
run('ip link set dev {TAP} master {BRIDGE}'.format(TAP=tap, BRIDGE=bridge))
|
||||||
|
run('chown {USER}.{GROUP} /dev/vhost-net'.format(USER=user, GROUP=group))
|
||||||
|
elif mode == 'dpdk':
|
||||||
|
ethpcciid = cfg.get('ETHPCIID')
|
||||||
|
nr_hugepages = cfg.get('NR_HUGEPAGES')
|
||||||
|
run('modprobe uio')
|
||||||
|
run('modprobe uio_pci_generic')
|
||||||
|
run('/usr/lib/scylla/dpdk-devbind.py --force --bind=uio_pci_generic {ETHPCIID}'.format(ETHPCIID=ethpciid))
|
||||||
|
for n in glob.glob('/sys/devices/system/node/node?'):
|
||||||
|
with open('{n}/hugepages/hugepages-2048kB/nr_hugepages'.format(n=n), 'w') as f:
|
||||||
|
f.write(nr_hugepages)
|
||||||
|
if dist_name() == 'Ubuntu':
|
||||||
|
run('hugeadm --create-mounts')
|
||||||
|
fi
|
||||||
|
else:
|
||||||
|
set_nic = cfg.get('SET_NIC')
|
||||||
|
ifname = cfg.get('IFNAME')
|
||||||
|
if set_nic == 'yes':
|
||||||
|
create_perftune_conf(ifname)
|
||||||
|
run('/usr/lib/scylla/posix_net_conf.sh {IFNAME} --options-file /etc/scylla.d/perftune.yaml'.format(IFNAME=ifname))
|
||||||
|
|
||||||
|
run('/usr/lib/scylla/scylla-blocktune')
|
||||||
|
|||||||
58
dist/common/scripts/scylla_raid_setup
vendored
58
dist/common/scripts/scylla_raid_setup
vendored
@@ -23,6 +23,8 @@ import os
|
|||||||
import argparse
|
import argparse
|
||||||
import pwd
|
import pwd
|
||||||
import grp
|
import grp
|
||||||
|
import sys
|
||||||
|
import stat
|
||||||
from scylla_util import *
|
from scylla_util import *
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
@@ -40,6 +42,8 @@ if __name__ == '__main__':
|
|||||||
help='specify the root of the tree')
|
help='specify the root of the tree')
|
||||||
parser.add_argument('--volume-role', default='all',
|
parser.add_argument('--volume-role', default='all',
|
||||||
help='specify how will this device be used (data, commitlog, or all)')
|
help='specify how will this device be used (data, commitlog, or all)')
|
||||||
|
parser.add_argument('--force-raid', action='store_true', default=False,
|
||||||
|
help='force constructing RAID when only one disk is specified')
|
||||||
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
@@ -60,6 +64,12 @@ if __name__ == '__main__':
|
|||||||
if not os.path.exists(disk):
|
if not os.path.exists(disk):
|
||||||
print('{} is not found'.format(disk))
|
print('{} is not found'.format(disk))
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
if not stat.S_ISBLK(os.stat(disk).st_mode):
|
||||||
|
print('{} is not block device'.format(disk))
|
||||||
|
sys.exit(1)
|
||||||
|
if not is_unused_disk(disk):
|
||||||
|
print('{} is busy'.format(disk))
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
if os.path.exists(args.raiddev):
|
if os.path.exists(args.raiddev):
|
||||||
print('{} is already using'.format(args.raiddev))
|
print('{} is already using'.format(args.raiddev))
|
||||||
@@ -74,12 +84,20 @@ if __name__ == '__main__':
|
|||||||
elif is_gentoo_variant():
|
elif is_gentoo_variant():
|
||||||
run('emerge -uq sys-fs/mdadm sys-fs/xfsprogs')
|
run('emerge -uq sys-fs/mdadm sys-fs/xfsprogs')
|
||||||
|
|
||||||
print('Creating RAID0 for scylla using {nr_disk} disk(s): {disks}'.format(nr_disk=len(disks), disks=args.disks))
|
if len(disks) == 1 and not args.force_raid:
|
||||||
|
raid = False
|
||||||
|
fsdev = disks[0]
|
||||||
|
else:
|
||||||
|
raid = True
|
||||||
|
fsdev = args.raiddev
|
||||||
|
|
||||||
|
print('Creating {type} for scylla using {nr_disk} disk(s): {disks}'.format(type='RAID0' if raid else 'XFS volume', nr_disk=len(disks), disks=args.disks))
|
||||||
if dist_name() == 'Ubuntu' and dist_ver() == '14.04':
|
if dist_name() == 'Ubuntu' and dist_ver() == '14.04':
|
||||||
run('udevadm settle')
|
if raid:
|
||||||
run('mdadm --create --verbose --force --run {raid} --level=0 -c1024 --raid-devices={nr_disk} {disks}'.format(raid=args.raiddev, nr_disk=len(disks), disks=args.disks.replace(',', ' ')))
|
run('udevadm settle')
|
||||||
run('udevadm settle')
|
run('mdadm --create --verbose --force --run {raid} --level=0 -c1024 --raid-devices={nr_disk} {disks}'.format(raid=fsdev, nr_disk=len(disks), disks=args.disks.replace(',', ' ')))
|
||||||
run('mkfs.xfs {} -f'.format(args.raiddev))
|
run('udevadm settle')
|
||||||
|
run('mkfs.xfs {} -f'.format(fsdev))
|
||||||
else:
|
else:
|
||||||
procs=[]
|
procs=[]
|
||||||
for disk in disks:
|
for disk in disks:
|
||||||
@@ -93,22 +111,24 @@ if __name__ == '__main__':
|
|||||||
procs.append(proc)
|
procs.append(proc)
|
||||||
for proc in procs:
|
for proc in procs:
|
||||||
proc.wait()
|
proc.wait()
|
||||||
run('udevadm settle')
|
if raid:
|
||||||
run('mdadm --create --verbose --force --run {raid} --level=0 -c1024 --raid-devices={nr_disk} {disks}'.format(raid=args.raiddev, nr_disk=len(disks), disks=args.disks.replace(',', ' ')))
|
run('udevadm settle')
|
||||||
run('udevadm settle')
|
run('mdadm --create --verbose --force --run {raid} --level=0 -c1024 --raid-devices={nr_disk} {disks}'.format(raid=fsdev, nr_disk=len(disks), disks=args.disks.replace(',', ' ')))
|
||||||
run('mkfs.xfs {} -f -K'.format(args.raiddev))
|
run('udevadm settle')
|
||||||
|
run('mkfs.xfs {} -f -K'.format(fsdev))
|
||||||
|
|
||||||
if is_debian_variant():
|
if is_debian_variant():
|
||||||
confpath = '/etc/mdadm/mdadm.conf'
|
confpath = '/etc/mdadm/mdadm.conf'
|
||||||
else:
|
else:
|
||||||
confpath = '/etc/mdadm.conf'
|
confpath = '/etc/mdadm.conf'
|
||||||
|
|
||||||
res = out('mdadm --detail --scan')
|
if raid:
|
||||||
with open(confpath, 'w') as f:
|
res = out('mdadm --detail --scan')
|
||||||
f.write(res)
|
with open(confpath, 'w') as f:
|
||||||
|
f.write(res)
|
||||||
|
|
||||||
makedirs(mount_at)
|
makedirs(mount_at)
|
||||||
run('mount -t xfs -o noatime {raid} "{mount_at}"'.format(raid=args.raiddev, mount_at=mount_at))
|
run('mount -t xfs -o noatime {raid} "{mount_at}"'.format(raid=fsdev, mount_at=mount_at))
|
||||||
|
|
||||||
makedirs('{}/data'.format(root))
|
makedirs('{}/data'.format(root))
|
||||||
makedirs('{}/commitlog'.format(root))
|
makedirs('{}/commitlog'.format(root))
|
||||||
@@ -122,11 +142,19 @@ if __name__ == '__main__':
|
|||||||
os.chown('{}/coredump'.format(root), uid, gid)
|
os.chown('{}/coredump'.format(root), uid, gid)
|
||||||
|
|
||||||
if args.update_fstab:
|
if args.update_fstab:
|
||||||
res = out('blkid {}'.format(args.raiddev))
|
res = out('blkid {}'.format(fsdev))
|
||||||
match = re.search(r'^/dev/\S+: (UUID="\S+")', res.strip())
|
match = re.search(r'^/dev/\S+: (UUID="\S+")', res.strip())
|
||||||
uuid = match.group(1)
|
uuid = match.group(1)
|
||||||
with open('/etc/fstab', 'a') as f:
|
with open('/etc/fstab', 'a') as f:
|
||||||
f.write('{uuid} {mount_at} xfs noatime 0 0\n'.format(uuid=uuid, mount_at=mount_at))
|
f.write('{uuid} {mount_at} xfs noatime,nofail 0 0\n'.format(uuid=uuid, mount_at=mount_at))
|
||||||
|
mounts_conf = '/etc/systemd/system/scylla-server.service.d/mounts.conf'
|
||||||
|
if not os.path.exists(mounts_conf):
|
||||||
|
makedirs('/etc/systemd/system/scylla-server.service.d/')
|
||||||
|
with open(mounts_conf, 'w') as f:
|
||||||
|
f.write('[Unit]\nRequiresMountsFor={mount_at}\n'.format(mount_at=mount_at))
|
||||||
|
else:
|
||||||
|
with open(mounts_conf, 'a') as f:
|
||||||
|
f.write('RequiresMountsFor={mount_at}\n'.format(mount_at=mount_at))
|
||||||
|
|
||||||
if is_debian_variant():
|
if is_debian_variant():
|
||||||
run('update-initramfs -u')
|
run('update-initramfs -u')
|
||||||
|
|||||||
104
dist/common/scripts/scylla_setup
vendored
104
dist/common/scripts/scylla_setup
vendored
@@ -22,7 +22,6 @@
|
|||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
import argparse
|
import argparse
|
||||||
import logging
|
|
||||||
import glob
|
import glob
|
||||||
import shutil
|
import shutil
|
||||||
import io
|
import io
|
||||||
@@ -49,11 +48,28 @@ def interactive_ask_service(msg1, msg2, default = None):
|
|||||||
elif ans == 'no' or ans =='n':
|
elif ans == 'no' or ans =='n':
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
def interactive_choose_nic():
|
||||||
|
nics = [os.path.basename(n) for n in glob.glob('/sys/class/net/*') if n != '/sys/class/net/lo']
|
||||||
|
if len(nics) == 0:
|
||||||
|
print('A NIC was not found.')
|
||||||
|
sys.exit(1)
|
||||||
|
elif len(nics) == 1:
|
||||||
|
return nics[0]
|
||||||
|
else:
|
||||||
|
print('Please select a NIC from the following list:')
|
||||||
|
while True:
|
||||||
|
print(nics)
|
||||||
|
n = input('> ')
|
||||||
|
if is_valid_nic(n):
|
||||||
|
return n
|
||||||
|
|
||||||
def do_verify_package(pkg):
|
def do_verify_package(pkg):
|
||||||
if is_debian_variant():
|
if is_debian_variant():
|
||||||
res = run('dpkg -s {}'.format(pkg), silent=True, exception=False)
|
res = run('dpkg -s {}'.format(pkg), silent=True, exception=False)
|
||||||
elif is_redhat_variant():
|
elif is_redhat_variant():
|
||||||
res = run('rpm -q {}'.format(pkg), silent=True, exception=False)
|
res = run('rpm -q {}'.format(pkg), silent=True, exception=False)
|
||||||
|
elif is_gentoo_variant():
|
||||||
|
res = 1 if len(glob.glob('/var/db/pkg/*/{}-*'.format(pkg))) else 0
|
||||||
if res != 0:
|
if res != 0:
|
||||||
print('{} package is not installed.'.format(pkg))
|
print('{} package is not installed.'.format(pkg))
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
@@ -67,22 +83,18 @@ def list_block_devices():
|
|||||||
devices = []
|
devices = []
|
||||||
for p in ['/dev/sd*', '/dev/hd*', '/dev/xvd*', '/dev/nvme*', '/dev/mapper/*']:
|
for p in ['/dev/sd*', '/dev/hd*', '/dev/xvd*', '/dev/nvme*', '/dev/mapper/*']:
|
||||||
devices.extend([d for d in glob.glob(p) if d != '/dev/mapper/control'])
|
devices.extend([d for d in glob.glob(p) if d != '/dev/mapper/control'])
|
||||||
return devices
|
return devices
|
||||||
|
|
||||||
def get_unused_disks():
|
def get_unused_disks():
|
||||||
unused = []
|
unused = []
|
||||||
for dev in list_block_devices():
|
for dev in list_block_devices():
|
||||||
with open('/proc/mounts') as f:
|
# dev contains partitions
|
||||||
s = f.read().strip()
|
if len(glob.glob('/sys/class/block/{dev}/{dev}*'.format(dev=dev.replace('/dev/','')))) > 0:
|
||||||
count_raw = len(re.findall('^{} '.format(dev), s, flags=re.MULTILINE))
|
continue
|
||||||
count_pvs = 0
|
# dev is used
|
||||||
if shutil.which('pvs'):
|
if not is_unused_disk(dev):
|
||||||
s = out('pvs -o pv_name --nohead')
|
continue
|
||||||
count_pvs = len(re.findall(dev, s, flags=re.MULTILINE))
|
unused.append(dev)
|
||||||
s = out('swapon --show=NAME --noheadings')
|
|
||||||
count_swap = len(re.findall(dev, s, flags=re.MULTILINE))
|
|
||||||
if count_raw + count_pvs + count_swap == 0:
|
|
||||||
unused.append(dev)
|
|
||||||
return unused
|
return unused
|
||||||
|
|
||||||
def run_setup_script(name, script):
|
def run_setup_script(name, script):
|
||||||
@@ -90,7 +102,7 @@ def run_setup_script(name, script):
|
|||||||
res = run(script, exception=False)
|
res = run(script, exception=False)
|
||||||
if res != 0:
|
if res != 0:
|
||||||
if interactive:
|
if interactive:
|
||||||
print('{red}{name} setup failed. Press any key to continue...{no_color}'.format(red=concolor.BOLD_RED, name=name, no_color=concolor.NO_COLOR))
|
colorprint('{red}{name} setup failed. Press any key to continue...{nocolor}', name=name)
|
||||||
input()
|
input()
|
||||||
else:
|
else:
|
||||||
print('{} setup failed.'.format(name))
|
print('{} setup failed.'.format(name))
|
||||||
@@ -99,12 +111,12 @@ def run_setup_script(name, script):
|
|||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
if os.getuid() > 0:
|
if os.getuid() > 0:
|
||||||
logging.error('Requires root permission.')
|
print('Requires root permission.')
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
parser = argparse.ArgumentParser(description='Configure environment for Scylla.')
|
parser = argparse.ArgumentParser(description='Configure environment for Scylla.')
|
||||||
parser.add_argument('--disks',
|
parser.add_argument('--disks',
|
||||||
help='specify disks for RAID')
|
help='specify disks for RAID')
|
||||||
parser.add_argument('--nic',
|
parser.add_argument('--nic', default='eth0',
|
||||||
help='specify NIC')
|
help='specify NIC')
|
||||||
parser.add_argument('--ntp-domain',
|
parser.add_argument('--ntp-domain',
|
||||||
help='specify NTP domain')
|
help='specify NTP domain')
|
||||||
@@ -115,7 +127,7 @@ if __name__ == '__main__':
|
|||||||
parser.add_argument('--developer-mode', action='store_true', default=False,
|
parser.add_argument('--developer-mode', action='store_true', default=False,
|
||||||
help='enable developer mode')
|
help='enable developer mode')
|
||||||
parser.add_argument('--no-ec2-check', action='store_true', default=False,
|
parser.add_argument('--no-ec2-check', action='store_true', default=False,
|
||||||
help='skip EC2 configuration check(only on EC2)')
|
help='skip EC2 configuration check')
|
||||||
parser.add_argument('--no-kernel-check', action='store_true', default=False,
|
parser.add_argument('--no-kernel-check', action='store_true', default=False,
|
||||||
help='skip kernel version check')
|
help='skip kernel version check')
|
||||||
parser.add_argument('--no-verify-package', action='store_true', default=False,
|
parser.add_argument('--no-verify-package', action='store_true', default=False,
|
||||||
@@ -150,12 +162,14 @@ if __name__ == '__main__':
|
|||||||
if len(sys.argv) == 1:
|
if len(sys.argv) == 1:
|
||||||
interactive = True
|
interactive = True
|
||||||
|
|
||||||
if not interactive and not args.no_raid_setup and not args.disks:
|
if not interactive:
|
||||||
parser.print_help()
|
if not args.no_raid_setup and not args.disks:
|
||||||
sys.exit(1)
|
parser.print_help()
|
||||||
if not interactive and not args.no_sysconfig_setup and not args.nic:
|
sys.exit(1)
|
||||||
parser.print_help()
|
if not args.no_sysconfig_setup or (is_ec2() and not args.no_ec2_check):
|
||||||
sys.exit(1)
|
if not is_valid_nic(args.nic):
|
||||||
|
print('NIC {} doesn\'t exist.'.format(args.nic))
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
disks = args.disks
|
disks = args.disks
|
||||||
nic = args.nic
|
nic = args.nic
|
||||||
@@ -178,13 +192,16 @@ if __name__ == '__main__':
|
|||||||
fstrim_setup = not args.no_fstrim_setup
|
fstrim_setup = not args.no_fstrim_setup
|
||||||
selinux_reboot_required = False
|
selinux_reboot_required = False
|
||||||
|
|
||||||
print('{green}Skip any of the following steps by answering \'no\'{no_color}'.format(green=concolor.GREEN, no_color=concolor.NO_COLOR))
|
if interactive:
|
||||||
|
colorprint('{green}Skip any of the following steps by answering \'no\'{nocolor}')
|
||||||
|
|
||||||
if is_ec2():
|
if is_ec2():
|
||||||
if interactive:
|
if interactive:
|
||||||
ec2_check = interactive_ask_service('Do you want to run Amazon EC2 configuration check?', 'Yes - runs a script to verify that this instance is optimized for running Scylls. No - skips the configuration check.', 'yes')
|
ec2_check = interactive_ask_service('Do you want to run Amazon EC2 configuration check?', 'Yes - runs a script to verify that this instance is optimized for running Scylla. No - skips the configuration check.', 'yes')
|
||||||
|
if ec2_check:
|
||||||
|
nic = interactive_choose_nic()
|
||||||
if ec2_check:
|
if ec2_check:
|
||||||
run('/usr/lib/scylla/scylla_ec2_check')
|
run('/usr/lib/scylla/scylla_ec2_check --nic {}'.format(nic))
|
||||||
|
|
||||||
if interactive:
|
if interactive:
|
||||||
kernel_check = interactive_ask_service('Do you want to run check your kernel version?', 'Yes - runs a script to verify that the kernel for this instance qualifies to run Scylla. No - skips the kernel check.', 'yes')
|
kernel_check = interactive_ask_service('Do you want to run check your kernel version?', 'Yes - runs a script to verify that the kernel for this instance qualifies to run Scylla. No - skips the kernel check.', 'yes')
|
||||||
@@ -202,11 +219,10 @@ if __name__ == '__main__':
|
|||||||
if enable_service:
|
if enable_service:
|
||||||
if is_systemd():
|
if is_systemd():
|
||||||
systemd_unit('scylla-server.service').enable()
|
systemd_unit('scylla-server.service').enable()
|
||||||
systemd_unit('scylla-fstrim.timer').unmask()
|
|
||||||
elif is_gentoo_variant():
|
elif is_gentoo_variant():
|
||||||
run('rc-update add scylla-server default')
|
run('rc-update add scylla-server default')
|
||||||
|
|
||||||
if interactive:
|
if interactive and not os.path.exists('/etc/scylla.d/housekeeping.cfg'):
|
||||||
version_check = interactive_ask_service('Do you want to enable Scylla to check if there is a newer version of Scylla available?', 'Yes - start the Scylla-housekeeping service to check for a newer version. This check runs periodically. No - skips this step.', 'yes')
|
version_check = interactive_ask_service('Do you want to enable Scylla to check if there is a newer version of Scylla available?', 'Yes - start the Scylla-housekeeping service to check for a newer version. This check runs periodically. No - skips this step.', 'yes')
|
||||||
if version_check:
|
if version_check:
|
||||||
with open('/etc/scylla.d/housekeeping.cfg', 'w') as f:
|
with open('/etc/scylla.d/housekeeping.cfg', 'w') as f:
|
||||||
@@ -277,10 +293,14 @@ if __name__ == '__main__':
|
|||||||
else:
|
else:
|
||||||
print('Please select unmounted disks from the following list: {}'.format(devices))
|
print('Please select unmounted disks from the following list: {}'.format(devices))
|
||||||
selected = []
|
selected = []
|
||||||
while len(devices):
|
dsklist = []
|
||||||
|
while True:
|
||||||
print('type \'cancel\' to cancel RAID/XFS setup.')
|
print('type \'cancel\' to cancel RAID/XFS setup.')
|
||||||
print('type \'done\' to finish selection. Selected: {}'.format(selected))
|
print('type \'done\' to finish selection. Selected: {}'.format(selected))
|
||||||
dsk = input('> ')
|
if len(dsklist) > 0:
|
||||||
|
dsk = dsklist.pop(0)
|
||||||
|
else:
|
||||||
|
dsk = input('> ')
|
||||||
if dsk == 'cancel':
|
if dsk == 'cancel':
|
||||||
raid_setup = 0
|
raid_setup = 0
|
||||||
break
|
break
|
||||||
@@ -290,12 +310,16 @@ if __name__ == '__main__':
|
|||||||
break
|
break
|
||||||
if dsk == '':
|
if dsk == '':
|
||||||
continue
|
continue
|
||||||
|
if dsk.find(',') > 0:
|
||||||
|
dsklist = dsk.split(',')
|
||||||
|
continue
|
||||||
if not os.path.exists(dsk):
|
if not os.path.exists(dsk):
|
||||||
print('{} not found'.format(dsk))
|
print('{} not found'.format(dsk))
|
||||||
continue
|
continue
|
||||||
if not stat.S_ISBLK(os.stat(dsk).st_mode):
|
if not stat.S_ISBLK(os.stat(dsk).st_mode):
|
||||||
print('{} is not block device'.format(dsk))
|
print('{} is not block device'.format(dsk))
|
||||||
selected += dsk
|
continue
|
||||||
|
selected.append(dsk)
|
||||||
devices.remove(dsk)
|
devices.remove(dsk)
|
||||||
disks = ','.join(selected)
|
disks = ','.join(selected)
|
||||||
if raid_setup:
|
if raid_setup:
|
||||||
@@ -312,21 +336,9 @@ if __name__ == '__main__':
|
|||||||
if interactive:
|
if interactive:
|
||||||
sysconfig_setup = interactive_ask_service('Do you want to setup a system-wide customized configuration for Scylla?', 'Yes - setup the sysconfig file. No - skips this step.', 'yes')
|
sysconfig_setup = interactive_ask_service('Do you want to setup a system-wide customized configuration for Scylla?', 'Yes - setup the sysconfig file. No - skips this step.', 'yes')
|
||||||
if sysconfig_setup:
|
if sysconfig_setup:
|
||||||
nics = [os.path.basename(n) for n in glob.glob('/sys/class/net/*') if n != '/sys/class/net/lo']
|
nic = interactive_choose_nic()
|
||||||
if len(nics) == 0:
|
if interactive:
|
||||||
print('A NIC was not found.')
|
set_nic = interactive_ask_service('Do you want to enable Network Interface Card (NIC) optimization?', 'Yes - optimize the NIC queue settings. Selecting Yes greatly improves performance. No - skip this step.', 'yes')
|
||||||
sys.exit(1)
|
|
||||||
elif len(nics) == 1:
|
|
||||||
nic=nics[0]
|
|
||||||
else:
|
|
||||||
print('Please select a NIC from the following list:')
|
|
||||||
while True:
|
|
||||||
print(nics)
|
|
||||||
n = input('> ')
|
|
||||||
if os.path.exists('/sys/class/net/{}'.format(n)):
|
|
||||||
nic = n
|
|
||||||
break
|
|
||||||
set_nic = interactive_ask_service('Do you want to enable Network Interface Card (NIC) optimization?', 'Yes - optimize the NIC queue settings. Selecting Yes greatly improves performance. No - skip this step.', 'yes')
|
|
||||||
if sysconfig_setup:
|
if sysconfig_setup:
|
||||||
setup_args = '--setup-nic' if set_nic else ''
|
setup_args = '--setup-nic' if set_nic else ''
|
||||||
run_setup_script('NIC queue', '/usr/lib/scylla/scylla_sysconfig_setup --nic {nic} {setup_args}'.format(nic=nic, setup_args=setup_args))
|
run_setup_script('NIC queue', '/usr/lib/scylla/scylla_sysconfig_setup --nic {nic} {setup_args}'.format(nic=nic, setup_args=setup_args))
|
||||||
|
|||||||
46
dist/common/scripts/scylla_stop
vendored
46
dist/common/scripts/scylla_stop
vendored
@@ -1,10 +1,40 @@
|
|||||||
#!/bin/bash -e
|
#!/usr/bin/python3
|
||||||
|
#
|
||||||
|
# Copyright 2018 ScyllaDB
|
||||||
|
#
|
||||||
|
|
||||||
. /usr/lib/scylla/scylla_lib.sh
|
#
|
||||||
|
# This file is part of Scylla.
|
||||||
|
#
|
||||||
|
# Scylla is free software: you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU Affero General Public License as published by
|
||||||
|
# the Free Software Foundation, either version 3 of the License, or
|
||||||
|
# (at your option) any later version.
|
||||||
|
#
|
||||||
|
# Scylla is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU General Public License
|
||||||
|
# along with Scylla. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
if [ "$NETWORK_MODE" = "virtio" ]; then
|
import os
|
||||||
ip tuntap del mode tap dev $TAP
|
import sys
|
||||||
elif [ "$NETWORK_MODE" = "dpdk" ]; then
|
from scylla_util import *
|
||||||
/usr/lib/scylla/dpdk-devbind.py -u $ETHPCIID
|
|
||||||
/usr/lib/scylla/dpdk-devbind.py -b $ETHDRV $ETHPCIID
|
if __name__ == '__main__':
|
||||||
fi
|
if os.getuid() > 0:
|
||||||
|
print('Requires root permission.')
|
||||||
|
sys.exit(1)
|
||||||
|
if is_redhat_variant():
|
||||||
|
cfg = sysconfig_parser('/etc/sysconfig/scylla-server')
|
||||||
|
else:
|
||||||
|
cfg = sysconfig_parser('/etc/default/scylla-server')
|
||||||
|
|
||||||
|
|
||||||
|
if cfg.get('NETWORK_MODE') == 'virtio':
|
||||||
|
run('ip tuntap del mode tap dev {TAP}'.format(TAP=cfg.get('TAP')))
|
||||||
|
elif cfg.get('NETWORK_MODE') == 'dpdk':
|
||||||
|
run('/usr/lib/scylla/dpdk-devbind.py -u {ETHPCIID}'.format(ETHPCIID=cfg.get('ETHPCIID')))
|
||||||
|
run('/usr/lib/scylla/dpdk-devbind.py -b {ETHDRV} {ETHPCIID}'.format(ETHDRV=cfg.get('ETHDRV'), ETHPCIID=cfg.get('ETHPCIID')))
|
||||||
|
|||||||
4
dist/common/scripts/scylla_sysconfig_setup
vendored
4
dist/common/scripts/scylla_sysconfig_setup
vendored
@@ -64,6 +64,10 @@ if __name__ == '__main__':
|
|||||||
help='AMI instance mode')
|
help='AMI instance mode')
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
if args.nic and not is_valid_nic(args.nic):
|
||||||
|
print('NIC {} not found.'.format(args.nic))
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
ifname = args.nic if args.nic else cfg.get('IFNAME')
|
ifname = args.nic if args.nic else cfg.get('IFNAME')
|
||||||
network_mode = args.mode if args.mode else cfg.get('NETWORK_MODE')
|
network_mode = args.mode if args.mode else cfg.get('NETWORK_MODE')
|
||||||
|
|
||||||
|
|||||||
154
dist/common/scripts/scylla_util.py
vendored
154
dist/common/scripts/scylla_util.py
vendored
@@ -27,14 +27,19 @@ import platform
|
|||||||
import configparser
|
import configparser
|
||||||
import io
|
import io
|
||||||
import shlex
|
import shlex
|
||||||
|
import shutil
|
||||||
|
|
||||||
def curl(url):
|
def curl(url, byte=False):
|
||||||
max_retries = 5
|
max_retries = 5
|
||||||
retries = 0
|
retries = 0
|
||||||
while True:
|
while True:
|
||||||
try:
|
try:
|
||||||
req = urllib.request.Request(url)
|
req = urllib.request.Request(url)
|
||||||
return urllib.request.urlopen(req).read().decode('utf-8')
|
with urllib.request.urlopen(req) as res:
|
||||||
|
if byte:
|
||||||
|
return res.read()
|
||||||
|
else:
|
||||||
|
return res.read().decode('utf-8')
|
||||||
except urllib.error.HTTPError:
|
except urllib.error.HTTPError:
|
||||||
logging.warn("Failed to grab %s..." % url)
|
logging.warn("Failed to grab %s..." % url)
|
||||||
time.sleep(5)
|
time.sleep(5)
|
||||||
@@ -79,6 +84,10 @@ class aws_instance:
|
|||||||
continue
|
continue
|
||||||
self._disks[t] += [ self.__xenify(dev) ]
|
self._disks[t] += [ self.__xenify(dev) ]
|
||||||
|
|
||||||
|
def __mac_address(self, nic='eth0'):
|
||||||
|
with open('/sys/class/net/{}/address'.format(nic)) as f:
|
||||||
|
return f.read().strip()
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self._type = self.__instance_metadata("instance-type")
|
self._type = self.__instance_metadata("instance-type")
|
||||||
self.__populate_disks()
|
self.__populate_disks()
|
||||||
@@ -95,6 +104,25 @@ class aws_instance:
|
|||||||
"""Returns the class of the instance we are running in. i.e.: i3"""
|
"""Returns the class of the instance we are running in. i.e.: i3"""
|
||||||
return self._type.split(".")[0]
|
return self._type.split(".")[0]
|
||||||
|
|
||||||
|
def is_supported_instance_class(self):
|
||||||
|
if self.instance_class() in ['i2', 'i3']:
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
def get_en_interface_type(self):
|
||||||
|
instance_class = self.instance_class()
|
||||||
|
instance_size = self.instance_size()
|
||||||
|
if instance_class in ['c3', 'c4', 'd2', 'i2', 'r3']:
|
||||||
|
return 'ixgbevf'
|
||||||
|
if instance_class in ['c5', 'c5d', 'f1', 'g3', 'h1', 'i3', 'm5', 'm5d', 'p2', 'p3', 'r4', 'x1']:
|
||||||
|
return 'ena'
|
||||||
|
if instance_class == 'm4':
|
||||||
|
if instance_size == '16xlarge':
|
||||||
|
return 'ena'
|
||||||
|
else:
|
||||||
|
return 'ixgbevf'
|
||||||
|
return None
|
||||||
|
|
||||||
def disks(self):
|
def disks(self):
|
||||||
"""Returns all disks in the system, as visible from the AWS registry"""
|
"""Returns all disks in the system, as visible from the AWS registry"""
|
||||||
disks = set()
|
disks = set()
|
||||||
@@ -133,6 +161,11 @@ class aws_instance:
|
|||||||
"""Returns the private IPv4 address of this instance"""
|
"""Returns the private IPv4 address of this instance"""
|
||||||
return self.__instance_metadata("local-ipv4")
|
return self.__instance_metadata("local-ipv4")
|
||||||
|
|
||||||
|
def is_vpc_enabled(self, nic='eth0'):
|
||||||
|
mac = self.__mac_address(nic)
|
||||||
|
mac_stat = self.__instance_metadata('network/interfaces/macs/{}'.format(mac))
|
||||||
|
return True if re.search(r'^vpc-id$', mac_stat, flags=re.MULTILINE) else False
|
||||||
|
|
||||||
|
|
||||||
## Regular expression helpers
|
## Regular expression helpers
|
||||||
# non-advancing comment matcher
|
# non-advancing comment matcher
|
||||||
@@ -222,37 +255,24 @@ class scylla_cpuinfo:
|
|||||||
return len(self._cpu_data["system"])
|
return len(self._cpu_data["system"])
|
||||||
|
|
||||||
def run(cmd, shell=False, silent=False, exception=True):
|
def run(cmd, shell=False, silent=False, exception=True):
|
||||||
stdout=None
|
stdout=subprocess.DEVNULL if silent else None
|
||||||
stderr=None
|
stderr=subprocess.DEVNULL if silent else None
|
||||||
if silent:
|
if not shell:
|
||||||
stdout=subprocess.DEVNULL
|
cmd = shlex.split(cmd)
|
||||||
stderr=subprocess.DEVNULL
|
if exception:
|
||||||
if shell:
|
return subprocess.check_call(cmd, shell=shell, stdout=stdout, stderr=stderr)
|
||||||
if exception:
|
|
||||||
return subprocess.check_call(cmd, shell=True, stdout=stdout, stderr=stderr)
|
|
||||||
else:
|
|
||||||
p = subprocess.Popen(cmd, shell=True, stdout=stdout, stderr=stderr)
|
|
||||||
return p.wait()
|
|
||||||
else:
|
else:
|
||||||
if exception:
|
p = subprocess.Popen(cmd, shell=shell, stdout=stdout, stderr=stderr)
|
||||||
return subprocess.check_call(shlex.split(cmd), stdout=stdout, stderr=stderr)
|
return p.wait()
|
||||||
else:
|
|
||||||
p = subprocess.Popen(shlex.split(cmd), stdout=stdout, stderr=stderr)
|
|
||||||
return p.wait()
|
|
||||||
|
|
||||||
def out(cmd, shell=False, exception=True):
|
def out(cmd, shell=False, exception=True):
|
||||||
if shell:
|
if not shell:
|
||||||
if exception:
|
cmd = shlex.split(cmd)
|
||||||
return subprocess.check_output(cmd, shell=True).strip().decode('utf-8')
|
if exception:
|
||||||
else:
|
return subprocess.check_output(cmd, shell=shell).strip().decode('utf-8')
|
||||||
p = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE)
|
|
||||||
return p.communicate()[0].strip().decode('utf-8')
|
|
||||||
else:
|
else:
|
||||||
if exception:
|
p = subprocess.Popen(cmd, shell=shell, stdout=subprocess.PIPE)
|
||||||
return subprocess.check_output(shlex.split(cmd)).strip().decode('utf-8')
|
return p.communicate()[0].strip().decode('utf-8')
|
||||||
else:
|
|
||||||
p = subprocess.Popen(shlex.split(cmd), stdout=subprocess.PIPE)
|
|
||||||
return p.communicate()[0].strip().decode('utf-8')
|
|
||||||
|
|
||||||
def is_debian_variant():
|
def is_debian_variant():
|
||||||
return os.path.exists('/etc/debian_version')
|
return os.path.exists('/etc/debian_version')
|
||||||
@@ -306,17 +326,82 @@ def makedirs(name):
|
|||||||
if not os.path.isdir(name):
|
if not os.path.isdir(name):
|
||||||
os.makedirs(name)
|
os.makedirs(name)
|
||||||
|
|
||||||
|
def rmtree(path):
|
||||||
|
if not os.path.islink(path):
|
||||||
|
shutil.rmtree(path)
|
||||||
|
else:
|
||||||
|
os.remove(path)
|
||||||
|
|
||||||
def dist_name():
|
def dist_name():
|
||||||
return platform.dist()[0]
|
return platform.dist()[0]
|
||||||
|
|
||||||
def dist_ver():
|
def dist_ver():
|
||||||
return platform.dist()[1]
|
return platform.dist()[1]
|
||||||
|
|
||||||
|
def is_unused_disk(dev):
|
||||||
|
# dev is not in /sys/class/block/, like /dev/nvme[0-9]+
|
||||||
|
if not os.path.isdir('/sys/class/block/{dev}'.format(dev=dev.replace('/dev/',''))):
|
||||||
|
return False
|
||||||
|
try:
|
||||||
|
fd = os.open(dev, os.O_EXCL)
|
||||||
|
os.close(fd)
|
||||||
|
return True
|
||||||
|
except OSError:
|
||||||
|
return False
|
||||||
|
|
||||||
|
CONCOLORS = {'green':'\033[1;32m', 'red':'\033[1;31m', 'nocolor':'\033[0m'}
|
||||||
|
def colorprint(msg, **kwargs):
|
||||||
|
fmt = dict(CONCOLORS)
|
||||||
|
fmt.update(kwargs)
|
||||||
|
print(msg.format(**fmt))
|
||||||
|
|
||||||
|
def get_mode_cpuset(nic, mode):
|
||||||
|
try:
|
||||||
|
mode_cpu_mask=out('/usr/lib/scylla/perftune.py --tune net --nic "{nic}" --mode "{mode}" --get-cpu-mask'.format(nic=nic, mode=mode))
|
||||||
|
return hex2list(mode_cpu_mask)
|
||||||
|
except subprocess.CalledProcessError:
|
||||||
|
return '-1'
|
||||||
|
|
||||||
|
def get_cur_cpuset():
|
||||||
|
cfg = sysconfig_parser('/etc/scylla.d/cpuset.conf')
|
||||||
|
cpuset=cfg.get('CPUSET')
|
||||||
|
return re.sub(r'^--cpuset (.+)$', r'\1', cpuset).strip()
|
||||||
|
|
||||||
|
def get_tune_mode(nic):
|
||||||
|
if not os.path.exists('/etc/scylla.d/cpuset.conf'):
|
||||||
|
return
|
||||||
|
cur_cpuset=get_cur_cpuset()
|
||||||
|
mq_cpuset=get_mode_cpuset(nic, 'mq')
|
||||||
|
sq_cpuset=get_mode_cpuset(nic, 'sq')
|
||||||
|
sq_split_cpuset=get_mode_cpuset(nic, 'sq_split')
|
||||||
|
|
||||||
|
if cur_cpuset == mq_cpuset:
|
||||||
|
return 'mq'
|
||||||
|
elif cur_cpuset == sq_cpuset:
|
||||||
|
return 'sq'
|
||||||
|
elif cur_cpuset == sq_split_cpuset:
|
||||||
|
return 'sq_split'
|
||||||
|
|
||||||
|
def create_perftune_conf(nic='eth0'):
|
||||||
|
if os.path.exists('/etc/scylla.d/perftune.yaml'):
|
||||||
|
return
|
||||||
|
mode=get_tune_mode(nic)
|
||||||
|
yaml=out('/usr/lib/scylla/perftune.py --tune net --nic "{nic}" --mode {mode} --dump-options-file'.format(nic=nic, mode=mode))
|
||||||
|
with open('/etc/scylla.d/perftune.yaml', 'w') as f:
|
||||||
|
f.write(yaml)
|
||||||
|
|
||||||
|
def is_valid_nic(nic):
|
||||||
|
return os.path.exists('/sys/class/net/{}'.format(nic))
|
||||||
|
|
||||||
class SystemdException(Exception):
|
class SystemdException(Exception):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
class systemd_unit:
|
class systemd_unit:
|
||||||
def __init__(self, unit):
|
def __init__(self, unit):
|
||||||
|
try:
|
||||||
|
run('systemctl cat {}'.format(unit), silent=True)
|
||||||
|
except subprocess.CalledProcessError:
|
||||||
|
raise SystemdException('unit {} not found'.format(unit))
|
||||||
self._unit = unit
|
self._unit = unit
|
||||||
|
|
||||||
def start(self):
|
def start(self):
|
||||||
@@ -336,8 +421,7 @@ class systemd_unit:
|
|||||||
return run('systemctl disable {}'.format(self._unit))
|
return run('systemctl disable {}'.format(self._unit))
|
||||||
|
|
||||||
def is_active(self):
|
def is_active(self):
|
||||||
res = out('systemctl is-active {}'.format(self._unit), exception=False)
|
return out('systemctl is-active {}'.format(self._unit), exception=False)
|
||||||
return True if re.match(r'^active', res, flags=re.MULTILINE) else False
|
|
||||||
|
|
||||||
def mask(self):
|
def mask(self):
|
||||||
return run('systemctl mask {}'.format(self._unit))
|
return run('systemctl mask {}'.format(self._unit))
|
||||||
@@ -368,7 +452,7 @@ class sysconfig_parser:
|
|||||||
self.__load()
|
self.__load()
|
||||||
|
|
||||||
def get(self, key):
|
def get(self, key):
|
||||||
return self._cfg.get('global', key)
|
return self._cfg.get('global', key).strip('"')
|
||||||
|
|
||||||
def set(self, key, val):
|
def set(self, key, val):
|
||||||
if not self._cfg.has_option('global', key):
|
if not self._cfg.has_option('global', key):
|
||||||
@@ -379,9 +463,3 @@ class sysconfig_parser:
|
|||||||
def commit(self):
|
def commit(self):
|
||||||
with open(self._filename, 'w') as f:
|
with open(self._filename, 'w') as f:
|
||||||
f.write(self._data)
|
f.write(self._data)
|
||||||
|
|
||||||
class concolor:
|
|
||||||
GREEN = '\033[0;32m'
|
|
||||||
RED = '\033[0;31m'
|
|
||||||
BOLD_RED = '\033[1;31m'
|
|
||||||
NO_COLOR = '\033[0m'
|
|
||||||
|
|||||||
@@ -10,7 +10,7 @@ Group=scylla
|
|||||||
ExecStart=/usr/lib/scylla/scylla-housekeeping --uuid-file /var/lib/scylla-housekeeping/housekeeping.uuid -q -c /etc/scylla.d/housekeeping.cfg --repo-files '/etc/apt/sources.list.d/scylla*.list' version --mode d
|
ExecStart=/usr/lib/scylla/scylla-housekeeping --uuid-file /var/lib/scylla-housekeeping/housekeeping.uuid -q -c /etc/scylla.d/housekeeping.cfg --repo-files '/etc/apt/sources.list.d/scylla*.list' version --mode d
|
||||||
{{/debian}}
|
{{/debian}}
|
||||||
{{#redhat}}
|
{{#redhat}}
|
||||||
ExecStart=/usr/lib/scylla/scylla-housekeeping --uuid-file /var/lib/scylla-housekeeping/housekeeping.uuid -q -c /etc/scylla.d/housekeeping.cfg --repo-files @@REPOFILES@@ version --mode d
|
ExecStart=/usr/lib/scylla/scylla-housekeeping --uuid-file /var/lib/scylla-housekeeping/housekeeping.uuid -q -c /etc/scylla.d/housekeeping.cfg --repo-files '/etc/yum.repos.d/scylla*.repo' version --mode d
|
||||||
{{/redhat}}
|
{{/redhat}}
|
||||||
|
|
||||||
[Install]
|
[Install]
|
||||||
|
|||||||
@@ -6,7 +6,7 @@ After=network.target
|
|||||||
Type=simple
|
Type=simple
|
||||||
User=scylla
|
User=scylla
|
||||||
Group=scylla
|
Group=scylla
|
||||||
ExecStart=/usr/lib/scylla/scylla-housekeeping --uuid-file /var/lib/scylla-housekeeping/housekeeping.uuid -q -c /etc/scylla.d/housekeeping.cfg --repo-files @@REPOFILES@@ version --mode r
|
ExecStart=/usr/lib/scylla/scylla-housekeeping --uuid-file /var/lib/scylla-housekeeping/housekeeping.uuid -q -c /etc/scylla.d/housekeeping.cfg --repo-files '/etc/yum.repos.d/scylla*.repo' version --mode r
|
||||||
|
|
||||||
[Install]
|
[Install]
|
||||||
WantedBy=multi-user.target
|
WantedBy=multi-user.target
|
||||||
|
|||||||
31
dist/debian/build_deb.sh
vendored
31
dist/debian/build_deb.sh
vendored
@@ -51,6 +51,18 @@ is_redhat_variant() {
|
|||||||
is_debian_variant() {
|
is_debian_variant() {
|
||||||
[ -f /etc/debian_version ]
|
[ -f /etc/debian_version ]
|
||||||
}
|
}
|
||||||
|
is_debian() {
|
||||||
|
case "$1" in
|
||||||
|
jessie|stretch) return 0;;
|
||||||
|
*) return 1;;
|
||||||
|
esac
|
||||||
|
}
|
||||||
|
is_ubuntu() {
|
||||||
|
case "$1" in
|
||||||
|
trusty|xenial|bionic) return 0;;
|
||||||
|
*) return 1;;
|
||||||
|
esac
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
pkg_install() {
|
pkg_install() {
|
||||||
@@ -99,11 +111,14 @@ if [ ! -f /usr/bin/dh_testdir ]; then
|
|||||||
fi
|
fi
|
||||||
if [ ! -f /usr/bin/pystache ]; then
|
if [ ! -f /usr/bin/pystache ]; then
|
||||||
if is_redhat_variant; then
|
if is_redhat_variant; then
|
||||||
sudo yum install -y python2-pystache || sudo yum install -y pystache
|
sudo yum install -y /usr/bin/pystache
|
||||||
elif is_debian_variant; then
|
elif is_debian_variant; then
|
||||||
sudo apt-get install -y python-pystache
|
sudo apt-get install -y python-pystache
|
||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
|
if is_debian_variant && [ ! -f /usr/share/doc/python-pkg-resources/copyright ]; then
|
||||||
|
sudo apt-get install -y python-pkg-resources
|
||||||
|
fi
|
||||||
|
|
||||||
if [ -z "$TARGET" ]; then
|
if [ -z "$TARGET" ]; then
|
||||||
if is_debian_variant; then
|
if is_debian_variant; then
|
||||||
@@ -125,12 +140,12 @@ echo $VERSION > version
|
|||||||
|
|
||||||
cp -a dist/debian/debian debian
|
cp -a dist/debian/debian debian
|
||||||
cp dist/common/sysconfig/scylla-server debian/scylla-server.default
|
cp dist/common/sysconfig/scylla-server debian/scylla-server.default
|
||||||
if [ "$TARGET" = "jessie" ] || [ "$TARGET" = "stretch" ]; then
|
if [ "$TARGET" = "trusty" ]; then
|
||||||
REVISION="1~$TARGET"
|
|
||||||
elif [ "$TARGET" = "trusty" ]; then
|
|
||||||
cp dist/debian/scylla-server.cron.d debian/
|
cp dist/debian/scylla-server.cron.d debian/
|
||||||
REVISION="0ubuntu1~$TARGET"
|
fi
|
||||||
elif [ "$TARGET" = "xenial" ] || [ "$TARGET" = "bionic" ]; then
|
if is_debian $TARGET; then
|
||||||
|
REVISION="1~$TARGET"
|
||||||
|
elif is_ubuntu $TARGET; then
|
||||||
REVISION="0ubuntu1~$TARGET"
|
REVISION="0ubuntu1~$TARGET"
|
||||||
else
|
else
|
||||||
echo "Unknown distribution: $TARGET"
|
echo "Unknown distribution: $TARGET"
|
||||||
@@ -145,8 +160,8 @@ chmod a+rx debian/rules
|
|||||||
|
|
||||||
if [ "$TARGET" != "trusty" ]; then
|
if [ "$TARGET" != "trusty" ]; then
|
||||||
pystache dist/common/systemd/scylla-server.service.mustache "{ $MUSTACHE_DIST }" > debian/scylla-server.service
|
pystache dist/common/systemd/scylla-server.service.mustache "{ $MUSTACHE_DIST }" > debian/scylla-server.service
|
||||||
pystache dist/common/systemd/scylla-housekeeping-daily.service.mustache "{ $MUSTACHE_DIST }" > debian/scylla-housekeeping-daily.service
|
pystache dist/common/systemd/scylla-housekeeping-daily.service.mustache "{ $MUSTACHE_DIST }" > debian/scylla-server.scylla-housekeeping-daily.service
|
||||||
pystache dist/common/systemd/scylla-housekeeping-restart.service.mustache "{ $MUSTACHE_DIST }" > debian/scylla-housekeeping-restart.service
|
pystache dist/common/systemd/scylla-housekeeping-restart.service.mustache "{ $MUSTACHE_DIST }" > debian/scylla-server.scylla-housekeeping-restart.service
|
||||||
cp dist/common/systemd/scylla-fstrim.service debian/scylla-server.scylla-fstrim.service
|
cp dist/common/systemd/scylla-fstrim.service debian/scylla-server.scylla-fstrim.service
|
||||||
cp dist/common/systemd/node-exporter.service debian/scylla-server.node-exporter.service
|
cp dist/common/systemd/node-exporter.service debian/scylla-server.node-exporter.service
|
||||||
fi
|
fi
|
||||||
|
|||||||
4
dist/docker/redhat/Dockerfile
vendored
4
dist/docker/redhat/Dockerfile
vendored
@@ -26,14 +26,14 @@ ADD commandlineparser.py /commandlineparser.py
|
|||||||
ADD docker-entrypoint.py /docker-entrypoint.py
|
ADD docker-entrypoint.py /docker-entrypoint.py
|
||||||
|
|
||||||
# Install Scylla:
|
# Install Scylla:
|
||||||
RUN curl http://downloads.scylladb.com/rpm/unstable/centos/master/latest/scylla.repo -o /etc/yum.repos.d/scylla.repo && \
|
RUN curl http://downloads.scylladb.com/rpm/centos/scylla-2.3.repo -o /etc/yum.repos.d/scylla.repo && \
|
||||||
yum -y install epel-release && \
|
yum -y install epel-release && \
|
||||||
yum -y clean expire-cache && \
|
yum -y clean expire-cache && \
|
||||||
yum -y update && \
|
yum -y update && \
|
||||||
yum -y remove boost-thread boost-system && \
|
yum -y remove boost-thread boost-system && \
|
||||||
yum -y install scylla hostname supervisor && \
|
yum -y install scylla hostname supervisor && \
|
||||||
yum clean all && \
|
yum clean all && \
|
||||||
yum -y install python34 python34-PyYAML && \
|
yum -y install python36 python36-PyYAML && \
|
||||||
cat /scylla_bashrc >> /etc/bashrc && \
|
cat /scylla_bashrc >> /etc/bashrc && \
|
||||||
mkdir -p /etc/supervisor.conf.d && \
|
mkdir -p /etc/supervisor.conf.d && \
|
||||||
mkdir -p /var/log/scylla && \
|
mkdir -p /var/log/scylla && \
|
||||||
|
|||||||
8
dist/docker/redhat/scylla-service.sh
vendored
8
dist/docker/redhat/scylla-service.sh
vendored
@@ -1,7 +1,13 @@
|
|||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
|
|
||||||
. /usr/lib/scylla/scylla_prepare
|
/usr/lib/scylla/scylla_prepare
|
||||||
|
|
||||||
|
. /etc/sysconfig/scylla-server
|
||||||
|
|
||||||
export SCYLLA_HOME SCYLLA_CONF
|
export SCYLLA_HOME SCYLLA_CONF
|
||||||
|
|
||||||
|
for f in /etc/scylla.d/*.conf; do
|
||||||
|
. "$f"
|
||||||
|
done
|
||||||
|
|
||||||
exec /usr/bin/scylla $SCYLLA_ARGS $SEASTAR_IO $DEV_MODE $CPUSET $SCYLLA_DOCKER_ARGS
|
exec /usr/bin/scylla $SCYLLA_ARGS $SEASTAR_IO $DEV_MODE $CPUSET $SCYLLA_DOCKER_ARGS
|
||||||
|
|||||||
7
dist/redhat/build_rpm.sh
vendored
7
dist/redhat/build_rpm.sh
vendored
@@ -98,12 +98,19 @@ rm -f version
|
|||||||
|
|
||||||
pystache dist/redhat/scylla.spec.mustache "{ \"version\": \"$SCYLLA_VERSION\", \"release\": \"$SCYLLA_RELEASE\", \"housekeeping\": $DIST }" > build/scylla.spec
|
pystache dist/redhat/scylla.spec.mustache "{ \"version\": \"$SCYLLA_VERSION\", \"release\": \"$SCYLLA_RELEASE\", \"housekeeping\": $DIST }" > build/scylla.spec
|
||||||
|
|
||||||
|
# mock generates files owned by root, fix this up
|
||||||
|
fix_ownership() {
|
||||||
|
sudo chown "$(id -u):$(id -g)" -R "$@"
|
||||||
|
}
|
||||||
|
|
||||||
if [ $JOBS -gt 0 ]; then
|
if [ $JOBS -gt 0 ]; then
|
||||||
RPM_JOBS_OPTS=(--define="_smp_mflags -j$JOBS")
|
RPM_JOBS_OPTS=(--define="_smp_mflags -j$JOBS")
|
||||||
fi
|
fi
|
||||||
sudo mock --buildsrpm --root=$TARGET --resultdir=`pwd`/build/srpms --spec=build/scylla.spec --sources=build/scylla-$VERSION.tar $SRPM_OPTS "${RPM_JOBS_OPTS[@]}"
|
sudo mock --buildsrpm --root=$TARGET --resultdir=`pwd`/build/srpms --spec=build/scylla.spec --sources=build/scylla-$VERSION.tar $SRPM_OPTS "${RPM_JOBS_OPTS[@]}"
|
||||||
|
fix_ownership build/srpms
|
||||||
if [[ "$TARGET" =~ ^epel-7- ]]; then
|
if [[ "$TARGET" =~ ^epel-7- ]]; then
|
||||||
TARGET=scylla-$TARGET
|
TARGET=scylla-$TARGET
|
||||||
RPM_OPTS="$RPM_OPTS --configdir=dist/redhat/mock"
|
RPM_OPTS="$RPM_OPTS --configdir=dist/redhat/mock"
|
||||||
fi
|
fi
|
||||||
sudo mock --rebuild --root=$TARGET --resultdir=`pwd`/build/rpms $RPM_OPTS "${RPM_JOBS_OPTS[@]}" build/srpms/scylla-$VERSION*.src.rpm
|
sudo mock --rebuild --root=$TARGET --resultdir=`pwd`/build/rpms $RPM_OPTS "${RPM_JOBS_OPTS[@]}" build/srpms/scylla-$VERSION*.src.rpm
|
||||||
|
fix_ownership build/rpms
|
||||||
|
|||||||
7
dist/redhat/scylla.spec.mustache
vendored
7
dist/redhat/scylla.spec.mustache
vendored
@@ -56,9 +56,9 @@ License: AGPLv3
|
|||||||
URL: http://www.scylladb.com/
|
URL: http://www.scylladb.com/
|
||||||
BuildRequires: libaio-devel libstdc++-devel cryptopp-devel hwloc-devel numactl-devel libpciaccess-devel libxml2-devel zlib-devel thrift-devel yaml-cpp-devel lz4-devel snappy-devel jsoncpp-devel systemd-devel xz-devel pcre-devel elfutils-libelf-devel bzip2-devel keyutils-libs-devel xfsprogs-devel make gnutls-devel systemd-devel lksctp-tools-devel protobuf-devel protobuf-compiler systemtap-sdt-devel ninja-build cmake python ragel grep kernel-headers
|
BuildRequires: libaio-devel libstdc++-devel cryptopp-devel hwloc-devel numactl-devel libpciaccess-devel libxml2-devel zlib-devel thrift-devel yaml-cpp-devel lz4-devel snappy-devel jsoncpp-devel systemd-devel xz-devel pcre-devel elfutils-libelf-devel bzip2-devel keyutils-libs-devel xfsprogs-devel make gnutls-devel systemd-devel lksctp-tools-devel protobuf-devel protobuf-compiler systemtap-sdt-devel ninja-build cmake python ragel grep kernel-headers
|
||||||
%{?fedora:BuildRequires: boost-devel antlr3-tool antlr3-C++-devel python3 gcc-c++ libasan libubsan python3-pyparsing dnf-yum python2-pystache}
|
%{?fedora:BuildRequires: boost-devel antlr3-tool antlr3-C++-devel python3 gcc-c++ libasan libubsan python3-pyparsing dnf-yum python2-pystache}
|
||||||
%{?rhel:BuildRequires: scylla-libstdc++73-static scylla-boost163-devel scylla-boost163-static scylla-antlr35-tool scylla-antlr35-C++-devel python34 scylla-gcc73-c++, scylla-python34-pyparsing20 yaml-cpp-static pystache python-setuptools}
|
%{?rhel:BuildRequires: scylla-libstdc++73-static scylla-boost163-devel scylla-boost163-static scylla-antlr35-tool scylla-antlr35-C++-devel python36 scylla-gcc73-c++, scylla-python36-pyparsing20 yaml-cpp-static pystache python-setuptools}
|
||||||
Requires: scylla-conf systemd-libs hwloc PyYAML python-urwid pciutils pyparsing python-requests curl util-linux python-setuptools pciutils python3-pyudev mdadm xfsprogs
|
Requires: scylla-conf systemd-libs hwloc PyYAML python-urwid pciutils pyparsing python-requests curl util-linux python-setuptools pciutils python3-pyudev mdadm xfsprogs
|
||||||
%{?rhel:Requires: python34 python34-PyYAML kernel >= 3.10.0-514}
|
%{?rhel:Requires: python36 python36-PyYAML kernel >= 3.10.0-514}
|
||||||
%{?fedora:Requires: python3 python3-PyYAML}
|
%{?fedora:Requires: python3 python3-PyYAML}
|
||||||
Conflicts: abrt
|
Conflicts: abrt
|
||||||
%ifarch x86_64
|
%ifarch x86_64
|
||||||
@@ -97,7 +97,7 @@ cflags="--cflags=${defines[*]}"
|
|||||||
%endif
|
%endif
|
||||||
%if 0%{?rhel}
|
%if 0%{?rhel}
|
||||||
. /etc/profile.d/scylla.sh
|
. /etc/profile.d/scylla.sh
|
||||||
python3.4 ./configure.py %{?configure_opt} --with=scylla --with=iotune --mode=release "$cflags" --static-boost --static-yaml-cpp --compiler=/opt/scylladb/bin/g++-7.3 --python python3.4 --ldflag=-Wl,-rpath=/opt/scylladb/lib64
|
python3.6 ./configure.py %{?configure_opt} --with=scylla --with=iotune --mode=release "$cflags" --static-boost --static-yaml-cpp --compiler=/opt/scylladb/bin/g++-7.3 --python python3.6 --ldflag=-Wl,-rpath=/opt/scylladb/lib64
|
||||||
%endif
|
%endif
|
||||||
ninja-build %{?_smp_mflags} build/release/scylla build/release/iotune
|
ninja-build %{?_smp_mflags} build/release/scylla build/release/iotune
|
||||||
|
|
||||||
@@ -201,7 +201,6 @@ rm -rf $RPM_BUILD_ROOT
|
|||||||
%{_prefix}/lib/scylla/api/api-doc/*
|
%{_prefix}/lib/scylla/api/api-doc/*
|
||||||
%{_prefix}/lib/scylla/scyllatop/*
|
%{_prefix}/lib/scylla/scyllatop/*
|
||||||
%{_prefix}/lib/scylla/scylla_config_get.py
|
%{_prefix}/lib/scylla/scylla_config_get.py
|
||||||
%{_prefix}/lib/scylla/scylla_lib.sh
|
|
||||||
%{_prefix}/lib/scylla/scylla_util.py
|
%{_prefix}/lib/scylla/scylla_util.py
|
||||||
%if 0%{?fedora} >= 27
|
%if 0%{?fedora} >= 27
|
||||||
%{_prefix}/lib/scylla/scylla-gdb.py
|
%{_prefix}/lib/scylla/scylla-gdb.py
|
||||||
|
|||||||
82
docs/protocol-extensions.md
Normal file
82
docs/protocol-extensions.md
Normal file
@@ -0,0 +1,82 @@
|
|||||||
|
Protocol extensions to the Cassandra Native Protocol
|
||||||
|
====================================================
|
||||||
|
|
||||||
|
This document specifies extensions to the protocol defined
|
||||||
|
by Cassandra's native_protocol_v4.spec and native_protocol_v5.spec.
|
||||||
|
The extensions are designed so that a driver supporting them can
|
||||||
|
continue to interoperate with Cassandra and other compatible servers
|
||||||
|
with no configuration needed; the driver can discover the extensions
|
||||||
|
and enable them conditionally.
|
||||||
|
|
||||||
|
An extension can be discovered by using the OPTIONS request; the
|
||||||
|
returned SUPPORTED response will have zero or more options beginning
|
||||||
|
with SCYLLA indicating extensions defined in this documented, in
|
||||||
|
addition to options documented by Cassandra. How to use the extension
|
||||||
|
is further explained in this document.
|
||||||
|
|
||||||
|
# Intranode sharding
|
||||||
|
|
||||||
|
This extension allows the driver to discover how Scylla internally
|
||||||
|
partitions data among logical cores. It can then create at least
|
||||||
|
one connection per logical core, and send queries directly to the
|
||||||
|
logical core that will serve them, greatly improving load balancing
|
||||||
|
and efficiency.
|
||||||
|
|
||||||
|
To use the extension, send the OPTIONS message. The data is returned
|
||||||
|
in the SUPPORTED message, as a set of key/value options. Numeric values
|
||||||
|
are returned as their base-10 ASCII representation.
|
||||||
|
|
||||||
|
The keys and values are:
|
||||||
|
- `SCYLLA_SHARD` is an integer, the zero-based shard number this connection
|
||||||
|
is connected to (for example, `3`).
|
||||||
|
- `SCYLLA_NR_SHARDS` is an integer containing the number of shards on this
|
||||||
|
node (for example, `12`). All shard numbers are smaller than this number.
|
||||||
|
- `SCYLLA_PARTITIONER` is a the fully-qualified name of the partitioner in use (i.e.
|
||||||
|
`org.apache.cassandra.partitioners.Murmur3Partitioner`).
|
||||||
|
- `SCYLLA_SHARDING_ALGORITHM` is the name of an algorithm used to select how
|
||||||
|
partitions are mapped into shards (described below)
|
||||||
|
- `SCYLLA_SHARDING_IGNORE_MSB` is an integer parameter to the algorithm (also
|
||||||
|
described below)
|
||||||
|
|
||||||
|
Currently, one `SCYLLA_SHARDING_ALGORITHM` is defined,
|
||||||
|
`biased-token-round-robin`. To apply the algorithm,
|
||||||
|
perform the following steps (assuming infinite-precision arithmetic):
|
||||||
|
|
||||||
|
- subtract the minimum token value from the partition's token
|
||||||
|
in order to bias it: `biased_token = token - (-2**63)`
|
||||||
|
- shift `biased_token` left by `ignore_msb` bits, discarding any
|
||||||
|
bits beyond the 63rd:
|
||||||
|
`biased_token = (biased_token << SCYLLA_SHARDING_IGNORE_MSB) % (2**64)`
|
||||||
|
- multiply by `SCYLLA_NR_SHARDS` and perform a truncating division by 2**64:
|
||||||
|
`shard = (biased_token * SCYLLA_NR_SHARDS) / 2**64`
|
||||||
|
|
||||||
|
(this apparently convoluted algorithm replaces a slow division instruction with
|
||||||
|
a fast multiply instruction).
|
||||||
|
|
||||||
|
in C with 128-bit arithmetic support, these operations can be efficiently
|
||||||
|
performed in three steps:
|
||||||
|
|
||||||
|
```c++
|
||||||
|
uint64_t biased_token = token + ((uint64_t)1 << 63);
|
||||||
|
biased_token <<= ignore_msb;
|
||||||
|
int shard = ((unsigned __int128)biased_token * nr_shards) >> 64;
|
||||||
|
```
|
||||||
|
|
||||||
|
In languages without 128-bit arithmetic support, use the following (this example
|
||||||
|
is for Java):
|
||||||
|
|
||||||
|
```Java
|
||||||
|
private int scyllaShardOf(long token) {
|
||||||
|
token += Long.MIN_VALUE;
|
||||||
|
token <<= ignoreMsb;
|
||||||
|
long tokLo = token & 0xffffffffL;
|
||||||
|
long tokHi = (token >>> 32) & 0xffffffffL;
|
||||||
|
long mul1 = tokLo * nrShards;
|
||||||
|
long mul2 = tokHi * nrShards;
|
||||||
|
long sum = (mul1 >>> 32) + mul2;
|
||||||
|
return (int)(sum >>> 32);
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
It is recommended that drivers open connections until they have at
|
||||||
|
least one connection per shard, then close excess connections.
|
||||||
@@ -449,9 +449,13 @@ GCC6_CONCEPT(requires requires(StopCondition stop, ConsumeMutationFragment consu
|
|||||||
{ consume_mf(std::move(mf)) } -> void;
|
{ consume_mf(std::move(mf)) } -> void;
|
||||||
{ consume_eos() } -> future<>;
|
{ consume_eos() } -> future<>;
|
||||||
})
|
})
|
||||||
future<> consume_mutation_fragments_until(flat_mutation_reader& r, StopCondition&& stop,
|
future<> consume_mutation_fragments_until(
|
||||||
ConsumeMutationFragment&& consume_mf, ConsumeEndOfStream&& consume_eos) {
|
flat_mutation_reader& r,
|
||||||
return do_until([stop] { return stop(); }, [&r, stop, consume_mf, consume_eos] {
|
StopCondition&& stop,
|
||||||
|
ConsumeMutationFragment&& consume_mf,
|
||||||
|
ConsumeEndOfStream&& consume_eos,
|
||||||
|
db::timeout_clock::time_point timeout) {
|
||||||
|
return do_until([stop] { return stop(); }, [&r, stop, consume_mf, consume_eos, timeout] {
|
||||||
while (!r.is_buffer_empty()) {
|
while (!r.is_buffer_empty()) {
|
||||||
consume_mf(r.pop_mutation_fragment());
|
consume_mf(r.pop_mutation_fragment());
|
||||||
if (stop()) {
|
if (stop()) {
|
||||||
@@ -461,7 +465,7 @@ future<> consume_mutation_fragments_until(flat_mutation_reader& r, StopCondition
|
|||||||
if (r.is_end_of_stream()) {
|
if (r.is_end_of_stream()) {
|
||||||
return consume_eos();
|
return consume_eos();
|
||||||
}
|
}
|
||||||
return r.fill_buffer();
|
return r.fill_buffer(timeout);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -129,26 +129,8 @@ public:
|
|||||||
update_is_normal();
|
update_is_normal();
|
||||||
}
|
}
|
||||||
|
|
||||||
void apply_application_state(application_state key, versioned_value&& value) {
|
void add_application_state(const endpoint_state& es) {
|
||||||
auto&& e = _application_state[key];
|
_application_state = es._application_state;
|
||||||
if (e.version < value.version) {
|
|
||||||
e = std::move(value);
|
|
||||||
}
|
|
||||||
update_is_normal();
|
|
||||||
}
|
|
||||||
|
|
||||||
void apply_application_state(application_state key, const versioned_value& value) {
|
|
||||||
auto&& e = _application_state[key];
|
|
||||||
if (e.version < value.version) {
|
|
||||||
e = value;
|
|
||||||
}
|
|
||||||
update_is_normal();
|
|
||||||
}
|
|
||||||
|
|
||||||
void apply_application_state(const endpoint_state& es) {
|
|
||||||
for (auto&& e : es._application_state) {
|
|
||||||
apply_application_state(e.first, e.second);
|
|
||||||
}
|
|
||||||
update_is_normal();
|
update_is_normal();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -923,7 +923,7 @@ void gossiper::make_random_gossip_digest(std::vector<gossip_digest>& g_digests)
|
|||||||
future<> gossiper::replicate(inet_address ep, const endpoint_state& es) {
|
future<> gossiper::replicate(inet_address ep, const endpoint_state& es) {
|
||||||
return container().invoke_on_all([ep, es, orig = engine().cpu_id(), self = shared_from_this()] (gossiper& g) {
|
return container().invoke_on_all([ep, es, orig = engine().cpu_id(), self = shared_from_this()] (gossiper& g) {
|
||||||
if (engine().cpu_id() != orig) {
|
if (engine().cpu_id() != orig) {
|
||||||
g.endpoint_state_map[ep].apply_application_state(es);
|
g.endpoint_state_map[ep].add_application_state(es);
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
@@ -932,7 +932,7 @@ future<> gossiper::replicate(inet_address ep, const std::map<application_state,
|
|||||||
return container().invoke_on_all([ep, &src, &changed, orig = engine().cpu_id(), self = shared_from_this()] (gossiper& g) {
|
return container().invoke_on_all([ep, &src, &changed, orig = engine().cpu_id(), self = shared_from_this()] (gossiper& g) {
|
||||||
if (engine().cpu_id() != orig) {
|
if (engine().cpu_id() != orig) {
|
||||||
for (auto&& key : changed) {
|
for (auto&& key : changed) {
|
||||||
g.endpoint_state_map[ep].apply_application_state(key, src.at(key));
|
g.endpoint_state_map[ep].add_application_state(key, src.at(key));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
@@ -941,7 +941,7 @@ future<> gossiper::replicate(inet_address ep, const std::map<application_state,
|
|||||||
future<> gossiper::replicate(inet_address ep, application_state key, const versioned_value& value) {
|
future<> gossiper::replicate(inet_address ep, application_state key, const versioned_value& value) {
|
||||||
return container().invoke_on_all([ep, key, &value, orig = engine().cpu_id(), self = shared_from_this()] (gossiper& g) {
|
return container().invoke_on_all([ep, key, &value, orig = engine().cpu_id(), self = shared_from_this()] (gossiper& g) {
|
||||||
if (engine().cpu_id() != orig) {
|
if (engine().cpu_id() != orig) {
|
||||||
g.endpoint_state_map[ep].apply_application_state(key, value);
|
g.endpoint_state_map[ep].add_application_state(key, value);
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
@@ -1168,11 +1168,13 @@ stdx::optional<endpoint_state> gossiper::get_endpoint_state_for_endpoint(inet_ad
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void gossiper::reset_endpoint_state_map() {
|
future<> gossiper::reset_endpoint_state_map() {
|
||||||
endpoint_state_map.clear();
|
|
||||||
_unreachable_endpoints.clear();
|
_unreachable_endpoints.clear();
|
||||||
_live_endpoints.clear();
|
_live_endpoints.clear();
|
||||||
_live_endpoints_just_added.clear();
|
_live_endpoints_just_added.clear();
|
||||||
|
return container().invoke_on_all([] (gossiper& g) {
|
||||||
|
g.endpoint_state_map.clear();
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
std::unordered_map<inet_address, endpoint_state>& gms::gossiper::get_endpoint_states() {
|
std::unordered_map<inet_address, endpoint_state>& gms::gossiper::get_endpoint_states() {
|
||||||
@@ -1655,6 +1657,7 @@ void gossiper::maybe_initialize_local_state(int generation_nbr) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Runs inside seastar::async context
|
||||||
void gossiper::add_saved_endpoint(inet_address ep) {
|
void gossiper::add_saved_endpoint(inet_address ep) {
|
||||||
if (ep == get_broadcast_address()) {
|
if (ep == get_broadcast_address()) {
|
||||||
logger.debug("Attempt to add self as saved endpoint");
|
logger.debug("Attempt to add self as saved endpoint");
|
||||||
@@ -1680,6 +1683,7 @@ void gossiper::add_saved_endpoint(inet_address ep) {
|
|||||||
}
|
}
|
||||||
ep_state.mark_dead();
|
ep_state.mark_dead();
|
||||||
endpoint_state_map[ep] = ep_state;
|
endpoint_state_map[ep] = ep_state;
|
||||||
|
replicate(ep, ep_state).get();
|
||||||
_unreachable_endpoints[ep] = now();
|
_unreachable_endpoints[ep] = now();
|
||||||
logger.trace("Adding saved endpoint {} {}", ep, ep_state.get_heart_beat_state().get_generation());
|
logger.trace("Adding saved endpoint {} {}", ep, ep_state.get_heart_beat_state().get_generation());
|
||||||
}
|
}
|
||||||
@@ -1915,6 +1919,7 @@ void gossiper::mark_as_shutdown(const inet_address& endpoint) {
|
|||||||
auto& ep_state = *es;
|
auto& ep_state = *es;
|
||||||
ep_state.add_application_state(application_state::STATUS, storage_service_value_factory().shutdown(true));
|
ep_state.add_application_state(application_state::STATUS, storage_service_value_factory().shutdown(true));
|
||||||
ep_state.get_heart_beat_state().force_highest_possible_version_unsafe();
|
ep_state.get_heart_beat_state().force_highest_possible_version_unsafe();
|
||||||
|
replicate(endpoint, ep_state).get();
|
||||||
mark_dead(endpoint, ep_state);
|
mark_dead(endpoint, ep_state);
|
||||||
get_local_failure_detector().force_conviction(endpoint);
|
get_local_failure_detector().force_conviction(endpoint);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -418,7 +418,7 @@ public:
|
|||||||
stdx::optional<endpoint_state> get_endpoint_state_for_endpoint(inet_address ep) const;
|
stdx::optional<endpoint_state> get_endpoint_state_for_endpoint(inet_address ep) const;
|
||||||
|
|
||||||
// removes ALL endpoint states; should only be called after shadow gossip
|
// removes ALL endpoint states; should only be called after shadow gossip
|
||||||
void reset_endpoint_state_map();
|
future<> reset_endpoint_state_map();
|
||||||
|
|
||||||
std::unordered_map<inet_address, endpoint_state>& get_endpoint_states();
|
std::unordered_map<inet_address, endpoint_state>& get_endpoint_states();
|
||||||
|
|
||||||
|
|||||||
36
imr/alloc.hh
36
imr/alloc.hh
@@ -84,6 +84,8 @@ template<typename Structure, typename CtxFactory>
|
|||||||
GCC6_CONCEPT(requires ContextFactory<CtxFactory>)
|
GCC6_CONCEPT(requires ContextFactory<CtxFactory>)
|
||||||
class lsa_migrate_fn final : public migrate_fn_type, CtxFactory {
|
class lsa_migrate_fn final : public migrate_fn_type, CtxFactory {
|
||||||
public:
|
public:
|
||||||
|
using structure = Structure;
|
||||||
|
|
||||||
explicit lsa_migrate_fn(CtxFactory context_factory)
|
explicit lsa_migrate_fn(CtxFactory context_factory)
|
||||||
: migrate_fn_type(1)
|
: migrate_fn_type(1)
|
||||||
, CtxFactory(std::move(context_factory))
|
, CtxFactory(std::move(context_factory))
|
||||||
@@ -201,8 +203,21 @@ public:
|
|||||||
/// arguments are passed to `T::size_when_serialized`.
|
/// arguments are passed to `T::size_when_serialized`.
|
||||||
///
|
///
|
||||||
/// \return null pointer of type `uint8_t*`.
|
/// \return null pointer of type `uint8_t*`.
|
||||||
|
template<typename T, typename MigrateFn, typename... Args>
|
||||||
|
uint8_t* allocate(MigrateFn* migrate_fn, Args&&... args) noexcept {
|
||||||
|
static_assert(std::is_same_v<typename MigrateFn::structure, T>);
|
||||||
|
return do_allocate<T>(migrate_fn, std::forward<Args>(args)...);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename T, typename MigrateFn, typename... Args>
|
||||||
|
auto allocate_nested(MigrateFn* migrate_fn, Args&&... args) noexcept {
|
||||||
|
static_assert(std::is_same_v<typename MigrateFn::structure, T>);
|
||||||
|
return do_allocate_nested<T>(migrate_fn, std::forward<Args>(args)...);
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
template<typename T, typename... Args>
|
template<typename T, typename... Args>
|
||||||
uint8_t* allocate(migrate_fn_type* migrate_fn, Args&& ... args) noexcept {
|
uint8_t* do_allocate(migrate_fn_type* migrate_fn, Args&&... args) noexcept {
|
||||||
auto size = T::size_when_serialized(std::forward<Args>(args)...);
|
auto size = T::size_when_serialized(std::forward<Args>(args)...);
|
||||||
_parent.request(size, migrate_fn);
|
_parent.request(size, migrate_fn);
|
||||||
|
|
||||||
@@ -216,7 +231,7 @@ public:
|
|||||||
}
|
}
|
||||||
|
|
||||||
template<typename T, typename... Args>
|
template<typename T, typename... Args>
|
||||||
auto allocate_nested(migrate_fn_type* migrate_fn, Args&& ... args) noexcept {
|
auto do_allocate_nested(migrate_fn_type* migrate_fn, Args&& ... args) noexcept {
|
||||||
auto n = _parent.request(0, migrate_fn);
|
auto n = _parent.request(0, migrate_fn);
|
||||||
return T::get_sizer(continuation(_parent, n),
|
return T::get_sizer(continuation(_parent, n),
|
||||||
std::forward<Args>(args)...);
|
std::forward<Args>(args)...);
|
||||||
@@ -244,15 +259,28 @@ public:
|
|||||||
/// to the buffer requested in the sizing phase. Arguments are passed
|
/// to the buffer requested in the sizing phase. Arguments are passed
|
||||||
/// to `T::serialize`.
|
/// to `T::serialize`.
|
||||||
/// \return pointer to the IMR object
|
/// \return pointer to the IMR object
|
||||||
|
template<typename T, typename MigrateFn, typename... Args>
|
||||||
|
uint8_t* allocate(MigrateFn* migrate_fn, Args&&... args) noexcept {
|
||||||
|
static_assert(std::is_same_v<typename MigrateFn::structure, T>);
|
||||||
|
return do_allocate<T>(migrate_fn, std::forward<Args>(args)...);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename T, typename MigrateFn, typename... Args>
|
||||||
|
auto allocate_nested(MigrateFn* migrate_fn, Args&&... args) noexcept {
|
||||||
|
static_assert(std::is_same_v<typename MigrateFn::structure, T>);
|
||||||
|
return do_allocate_nested<T>(migrate_fn, std::forward<Args>(args)...);
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
template<typename T, typename... Args>
|
template<typename T, typename... Args>
|
||||||
uint8_t* allocate(migrate_fn_type* migrate_fn, Args&& ... args) noexcept {
|
uint8_t* do_allocate(migrate_fn_type* migrate_fn, Args&&... args) noexcept {
|
||||||
auto ptr = _parent.next_object();
|
auto ptr = _parent.next_object();
|
||||||
T::serialize(ptr, std::forward<Args>(args)...);
|
T::serialize(ptr, std::forward<Args>(args)...);
|
||||||
return ptr;
|
return ptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename T, typename... Args>
|
template<typename T, typename... Args>
|
||||||
auto allocate_nested(migrate_fn_type*, Args&& ... args) noexcept {
|
auto do_allocate_nested(migrate_fn_type*, Args&& ... args) noexcept {
|
||||||
auto ptr = _parent.next_object();
|
auto ptr = _parent.next_object();
|
||||||
return T::get_serializer(ptr,
|
return T::get_serializer(ptr,
|
||||||
continuation(ptr),
|
continuation(ptr),
|
||||||
|
|||||||
27
imr/utils.hh
27
imr/utils.hh
@@ -61,8 +61,12 @@ private:
|
|||||||
public:
|
public:
|
||||||
object_context(const uint8_t*, State... state) : _state { state... } { }
|
object_context(const uint8_t*, State... state) : _state { state... } { }
|
||||||
template<typename Tag, typename... Args>
|
template<typename Tag, typename... Args>
|
||||||
Context context_for(const uint8_t* ptr, Args&&... args) const noexcept {
|
auto context_for(const uint8_t* ptr, Args&&... args) const noexcept {
|
||||||
return create(ptr, std::index_sequence_for<State...>());
|
if constexpr (std::is_same_v<Tag, basic_object::tags::back_pointer>) {
|
||||||
|
return no_context_t();
|
||||||
|
} else {
|
||||||
|
return create(ptr, std::index_sequence_for<State...>());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -92,7 +96,7 @@ public:
|
|||||||
imr::member<tags::back_pointer, imr::tagged_type<tags::back_pointer, imr::pod<basic_object*>>>,
|
imr::member<tags::back_pointer, imr::tagged_type<tags::back_pointer, imr::pod<basic_object*>>>,
|
||||||
imr::member<tags::object, Structure>
|
imr::member<tags::object, Structure>
|
||||||
>;
|
>;
|
||||||
|
static constexpr size_t size_overhead = sizeof(basic_object*);
|
||||||
private:
|
private:
|
||||||
explicit object(uint8_t* ptr) noexcept
|
explicit object(uint8_t* ptr) noexcept
|
||||||
: basic_object(ptr)
|
: basic_object(ptr)
|
||||||
@@ -158,13 +162,22 @@ public:
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Create an IMR objects
|
/// Create an IMR objects
|
||||||
template<typename Writer>
|
template<typename Writer, typename MigrateFn>
|
||||||
GCC6_CONCEPT(requires WriterAllocator<Writer, Structure>)
|
GCC6_CONCEPT(requires WriterAllocator<Writer, Structure>)
|
||||||
static object make(Writer&& object_writer,
|
static object make(Writer&& object_writer,
|
||||||
allocation_strategy::migrate_fn migrate = &imr::alloc::default_lsa_migrate_fn<structure>::migrate_fn) {
|
MigrateFn* migrate = &imr::alloc::default_lsa_migrate_fn<structure>::migrate_fn) {
|
||||||
|
static_assert(std::is_same_v<typename MigrateFn::structure, structure>);
|
||||||
|
return do_make(std::forward<Writer>(object_writer), migrate);
|
||||||
|
}
|
||||||
|
private:
|
||||||
|
template<typename Writer>
|
||||||
|
GCC6_CONCEPT(requires WriterAllocator<Writer, Structure>)
|
||||||
|
static object do_make(Writer&& object_writer, allocation_strategy::migrate_fn migrate) {
|
||||||
struct alloc_deleter {
|
struct alloc_deleter {
|
||||||
|
size_t _size;
|
||||||
|
|
||||||
void operator()(uint8_t* ptr) {
|
void operator()(uint8_t* ptr) {
|
||||||
current_allocator().free(ptr);
|
current_allocator().free(ptr, _size);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
using alloc_unique_ptr = std::unique_ptr<uint8_t[], alloc_deleter>;
|
using alloc_unique_ptr = std::unique_ptr<uint8_t[], alloc_deleter>;
|
||||||
@@ -176,7 +189,7 @@ public:
|
|||||||
auto& alloc = current_allocator();
|
auto& alloc = current_allocator();
|
||||||
alloc::object_allocator allocator(alloc);
|
alloc::object_allocator allocator(alloc);
|
||||||
auto obj_size = structure::size_when_serialized(writer, allocator.get_sizer());
|
auto obj_size = structure::size_when_serialized(writer, allocator.get_sizer());
|
||||||
auto ptr = alloc_unique_ptr(static_cast<uint8_t*>(alloc.alloc(migrate, obj_size, 1)));
|
auto ptr = alloc_unique_ptr(static_cast<uint8_t*>(alloc.alloc(migrate, obj_size, 1)), alloc_deleter { obj_size });
|
||||||
allocator.allocate_all();
|
allocator.allocate_all();
|
||||||
structure::serialize(ptr.get(), writer, allocator.get_serializer());
|
structure::serialize(ptr.get(), writer, allocator.get_serializer());
|
||||||
return object(ptr.release());
|
return object(ptr.release());
|
||||||
|
|||||||
@@ -42,5 +42,5 @@ elif [ "$ID" = "fedora" ]; then
|
|||||||
yum install -y yaml-cpp-devel thrift-devel antlr3-tool antlr3-C++-devel jsoncpp-devel snappy-devel
|
yum install -y yaml-cpp-devel thrift-devel antlr3-tool antlr3-C++-devel jsoncpp-devel snappy-devel
|
||||||
elif [ "$ID" = "centos" ]; then
|
elif [ "$ID" = "centos" ]; then
|
||||||
yum install -y yaml-cpp-devel thrift-devel scylla-antlr35-tool scylla-antlr35-C++-devel jsoncpp-devel snappy-devel scylla-boost163-static scylla-python34-pyparsing20 systemd-devel
|
yum install -y yaml-cpp-devel thrift-devel scylla-antlr35-tool scylla-antlr35-C++-devel jsoncpp-devel snappy-devel scylla-boost163-static scylla-python34-pyparsing20 systemd-devel
|
||||||
echo -e "Configure example:\n\tpython3.4 ./configure.py --enable-dpdk --mode=release --static-boost --compiler=/opt/scylladb/bin/g++-7.3 --python python3.4 --ldflag=-Wl,-rpath=/opt/scylladb/lib64 --cflags=-I/opt/scylladb/include --with-antlr3=/opt/scylladb/bin/antlr3"
|
echo -e "Configure example:\n\tpython3.6 ./configure.py --enable-dpdk --mode=release --static-boost --compiler=/opt/scylladb/bin/g++-7.3 --python python3.6 --ldflag=-Wl,-rpath=/opt/scylladb/lib64 --cflags=-I/opt/scylladb/include --with-antlr3=/opt/scylladb/bin/antlr3"
|
||||||
fi
|
fi
|
||||||
|
|||||||
80
json.cc
Normal file
80
json.cc
Normal file
@@ -0,0 +1,80 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (C) 2018 ScyllaDB
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This file is part of Scylla.
|
||||||
|
*
|
||||||
|
* Scylla is free software: you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU Affero General Public License as published by
|
||||||
|
* the Free Software Foundation, either version 3 of the License, or
|
||||||
|
* (at your option) any later version.
|
||||||
|
*
|
||||||
|
* Scylla is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with Scylla. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "json.hh"
|
||||||
|
|
||||||
|
namespace seastar {
|
||||||
|
namespace json {
|
||||||
|
|
||||||
|
static inline bool is_control_char(char c) {
|
||||||
|
return c >= 0 && c <= 0x1F;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline bool needs_escaping(const sstring& s) {
|
||||||
|
return std::any_of(s.begin(), s.end(), [](char c) {return is_control_char(c) || c == '"' || c == '\\';});
|
||||||
|
}
|
||||||
|
|
||||||
|
sstring value_to_quoted_string(const sstring& value) {
|
||||||
|
if (!needs_escaping(value)) {
|
||||||
|
return sprint("\"%s\"", value);
|
||||||
|
}
|
||||||
|
std::ostringstream oss;
|
||||||
|
oss << std::hex << std::uppercase << std::setfill('0');
|
||||||
|
oss.put('"');
|
||||||
|
for (char c : value) {
|
||||||
|
switch (c) {
|
||||||
|
case '"':
|
||||||
|
oss.put('\\').put('"');
|
||||||
|
break;
|
||||||
|
case '\\':
|
||||||
|
oss.put('\\').put('\\');
|
||||||
|
break;
|
||||||
|
case '\b':
|
||||||
|
oss.put('\\').put('b');
|
||||||
|
break;
|
||||||
|
case '\f':
|
||||||
|
oss.put('\\').put('f');
|
||||||
|
break;
|
||||||
|
case '\n':
|
||||||
|
oss.put('\\').put('n');
|
||||||
|
break;
|
||||||
|
case '\r':
|
||||||
|
oss.put('\\').put('r');
|
||||||
|
break;
|
||||||
|
case '\t':
|
||||||
|
oss.put('\\').put('t');
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
if (is_control_char(c)) {
|
||||||
|
oss.put('\\').put('u') << std::setw(4) << static_cast<int>(c);
|
||||||
|
} else {
|
||||||
|
oss.put(c);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
oss.put('"');
|
||||||
|
return oss.str();
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
2
json.hh
2
json.hh
@@ -95,6 +95,8 @@ inline std::map<sstring, sstring> to_map(const sstring& raw) {
|
|||||||
return to_map(raw, std::map<sstring, sstring>());
|
return to_map(raw, std::map<sstring, sstring>());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
sstring value_to_quoted_string(const sstring& value);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
4
keys.hh
4
keys.hh
@@ -748,6 +748,10 @@ public:
|
|||||||
static const compound& get_compound_type(const schema& s) {
|
static const compound& get_compound_type(const schema& s) {
|
||||||
return s.clustering_key_prefix_type();
|
return s.clustering_key_prefix_type();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static clustering_key_prefix_view make_empty() {
|
||||||
|
return { bytes_view() };
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
class clustering_key_prefix : public prefix_compound_wrapper<clustering_key_prefix, clustering_key_prefix_view, clustering_key> {
|
class clustering_key_prefix : public prefix_compound_wrapper<clustering_key_prefix, clustering_key_prefix_view, clustering_key> {
|
||||||
|
|||||||
@@ -119,9 +119,17 @@ insert_token_range_to_sorted_container_while_unwrapping(
|
|||||||
const dht::token& tok,
|
const dht::token& tok,
|
||||||
dht::token_range_vector& ret) {
|
dht::token_range_vector& ret) {
|
||||||
if (prev_tok < tok) {
|
if (prev_tok < tok) {
|
||||||
ret.emplace_back(
|
auto pos = ret.end();
|
||||||
dht::token_range::bound(prev_tok, false),
|
if (!ret.empty() && !std::prev(pos)->end()) {
|
||||||
dht::token_range::bound(tok, true));
|
// We inserted a wrapped range (a, b] previously as
|
||||||
|
// (-inf, b], (a, +inf). So now we insert in the next-to-last
|
||||||
|
// position to keep the last range (a, +inf) at the end.
|
||||||
|
pos = std::prev(pos);
|
||||||
|
}
|
||||||
|
ret.insert(pos,
|
||||||
|
dht::token_range{
|
||||||
|
dht::token_range::bound(prev_tok, false),
|
||||||
|
dht::token_range::bound(tok, true)});
|
||||||
} else {
|
} else {
|
||||||
ret.emplace_back(
|
ret.emplace_back(
|
||||||
dht::token_range::bound(prev_tok, false),
|
dht::token_range::bound(prev_tok, false),
|
||||||
@@ -164,6 +172,30 @@ abstract_replication_strategy::get_primary_ranges(inet_address ep) {
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
dht::token_range_vector
|
||||||
|
abstract_replication_strategy::get_primary_ranges_within_dc(inet_address ep) {
|
||||||
|
dht::token_range_vector ret;
|
||||||
|
sstring local_dc = _snitch->get_datacenter(ep);
|
||||||
|
std::unordered_set<inet_address> local_dc_nodes = _token_metadata.get_topology().get_datacenter_endpoints().at(local_dc);
|
||||||
|
auto prev_tok = _token_metadata.sorted_tokens().back();
|
||||||
|
for (auto tok : _token_metadata.sorted_tokens()) {
|
||||||
|
auto&& eps = calculate_natural_endpoints(tok, _token_metadata);
|
||||||
|
// Unlike get_primary_ranges() which checks if ep is the first
|
||||||
|
// owner of this range, here we check if ep is the first just
|
||||||
|
// among nodes which belong to the local dc of ep.
|
||||||
|
for (auto& e : eps) {
|
||||||
|
if (local_dc_nodes.count(e)) {
|
||||||
|
if (e == ep) {
|
||||||
|
insert_token_range_to_sorted_container_while_unwrapping(prev_tok, tok, ret);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
prev_tok = tok;
|
||||||
|
}
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
std::unordered_multimap<inet_address, dht::token_range>
|
std::unordered_multimap<inet_address, dht::token_range>
|
||||||
abstract_replication_strategy::get_address_ranges(token_metadata& tm) const {
|
abstract_replication_strategy::get_address_ranges(token_metadata& tm) const {
|
||||||
std::unordered_multimap<inet_address, dht::token_range> ret;
|
std::unordered_multimap<inet_address, dht::token_range> ret;
|
||||||
|
|||||||
@@ -113,6 +113,10 @@ public:
|
|||||||
// This function is the analogue of Origin's
|
// This function is the analogue of Origin's
|
||||||
// StorageService.getPrimaryRangesForEndpoint().
|
// StorageService.getPrimaryRangesForEndpoint().
|
||||||
dht::token_range_vector get_primary_ranges(inet_address ep);
|
dht::token_range_vector get_primary_ranges(inet_address ep);
|
||||||
|
// get_primary_ranges_within_dc() is similar to get_primary_ranges()
|
||||||
|
// except it assigns a primary node for each range within each dc,
|
||||||
|
// instead of one node globally.
|
||||||
|
dht::token_range_vector get_primary_ranges_within_dc(inet_address ep);
|
||||||
|
|
||||||
std::unordered_multimap<inet_address, dht::token_range> get_address_ranges(token_metadata& tm) const;
|
std::unordered_multimap<inet_address, dht::token_range> get_address_ranges(token_metadata& tm) const;
|
||||||
|
|
||||||
|
|||||||
7
main.cc
7
main.cc
@@ -763,8 +763,11 @@ int main(int ac, char** av) {
|
|||||||
return service::get_local_storage_service().drain_on_shutdown();
|
return service::get_local_storage_service().drain_on_shutdown();
|
||||||
});
|
});
|
||||||
|
|
||||||
engine().at_exit([] {
|
engine().at_exit([cfg] {
|
||||||
return view_builder.stop();
|
if (cfg->view_building()) {
|
||||||
|
return view_builder.stop();
|
||||||
|
}
|
||||||
|
return make_ready_future<>();
|
||||||
});
|
});
|
||||||
|
|
||||||
engine().at_exit([&db] {
|
engine().at_exit([&db] {
|
||||||
|
|||||||
@@ -262,11 +262,12 @@ void messaging_service::start_listen() {
|
|||||||
// FIXME: we don't set so.tcp_nodelay, because we can't tell at this point whether the connection will come from a
|
// FIXME: we don't set so.tcp_nodelay, because we can't tell at this point whether the connection will come from a
|
||||||
// local or remote datacenter, and whether or not the connection will be used for gossip. We can fix
|
// local or remote datacenter, and whether or not the connection will be used for gossip. We can fix
|
||||||
// the first by wrapping its server_socket, but not the second.
|
// the first by wrapping its server_socket, but not the second.
|
||||||
|
auto limits = rpc_resource_limits(_mcfg.rpc_memory_limit);
|
||||||
if (!_server[0]) {
|
if (!_server[0]) {
|
||||||
auto listen = [&] (const gms::inet_address& a) {
|
auto listen = [&] (const gms::inet_address& a) {
|
||||||
auto addr = ipv4_addr{a.raw_addr(), _port};
|
auto addr = ipv4_addr{a.raw_addr(), _port};
|
||||||
return std::unique_ptr<rpc_protocol_server_wrapper>(new rpc_protocol_server_wrapper(*_rpc,
|
return std::unique_ptr<rpc_protocol_server_wrapper>(new rpc_protocol_server_wrapper(*_rpc,
|
||||||
so, addr, rpc_resource_limits(_mcfg.rpc_memory_limit)));
|
so, addr, limits));
|
||||||
};
|
};
|
||||||
_server[0] = listen(_listen_address);
|
_server[0] = listen(_listen_address);
|
||||||
if (listen_to_bc) {
|
if (listen_to_bc) {
|
||||||
@@ -277,7 +278,7 @@ void messaging_service::start_listen() {
|
|||||||
if (!_server_tls[0]) {
|
if (!_server_tls[0]) {
|
||||||
auto listen = [&] (const gms::inet_address& a) {
|
auto listen = [&] (const gms::inet_address& a) {
|
||||||
return std::unique_ptr<rpc_protocol_server_wrapper>(
|
return std::unique_ptr<rpc_protocol_server_wrapper>(
|
||||||
[this, &so, &a] () -> std::unique_ptr<rpc_protocol_server_wrapper>{
|
[this, &so, &a, limits] () -> std::unique_ptr<rpc_protocol_server_wrapper>{
|
||||||
if (_encrypt_what == encrypt_what::none) {
|
if (_encrypt_what == encrypt_what::none) {
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
@@ -285,7 +286,7 @@ void messaging_service::start_listen() {
|
|||||||
lo.reuse_address = true;
|
lo.reuse_address = true;
|
||||||
auto addr = make_ipv4_address(ipv4_addr{a.raw_addr(), _ssl_port});
|
auto addr = make_ipv4_address(ipv4_addr{a.raw_addr(), _ssl_port});
|
||||||
return std::make_unique<rpc_protocol_server_wrapper>(*_rpc,
|
return std::make_unique<rpc_protocol_server_wrapper>(*_rpc,
|
||||||
so, seastar::tls::listen(_credentials, addr, lo));
|
so, seastar::tls::listen(_credentials, addr, lo), limits);
|
||||||
}());
|
}());
|
||||||
};
|
};
|
||||||
_server_tls[0] = listen(_listen_address);
|
_server_tls[0] = listen(_listen_address);
|
||||||
|
|||||||
@@ -125,7 +125,7 @@ public:
|
|||||||
return _ck.equal(s, other._ck)
|
return _ck.equal(s, other._ck)
|
||||||
&& _t == other._t
|
&& _t == other._t
|
||||||
&& _marker == other._marker
|
&& _marker == other._marker
|
||||||
&& _cells.equal(column_kind::static_column, s, other._cells, s);
|
&& _cells.equal(column_kind::regular_column, s, other._cells, s);
|
||||||
}
|
}
|
||||||
|
|
||||||
friend std::ostream& operator<<(std::ostream& os, const clustering_row& row);
|
friend std::ostream& operator<<(std::ostream& os, const clustering_row& row);
|
||||||
|
|||||||
@@ -144,7 +144,14 @@ mutation_partition::mutation_partition(const schema& s, const mutation_partition
|
|||||||
, _static_row(s, column_kind::static_column, x._static_row)
|
, _static_row(s, column_kind::static_column, x._static_row)
|
||||||
, _static_row_continuous(x._static_row_continuous)
|
, _static_row_continuous(x._static_row_continuous)
|
||||||
, _rows()
|
, _rows()
|
||||||
, _row_tombstones(x._row_tombstones) {
|
, _row_tombstones(x._row_tombstones)
|
||||||
|
#ifdef SEASTAR_DEBUG
|
||||||
|
, _schema_version(s.version())
|
||||||
|
#endif
|
||||||
|
{
|
||||||
|
#ifdef SEASTAR_DEBUG
|
||||||
|
assert(x._schema_version == _schema_version);
|
||||||
|
#endif
|
||||||
auto cloner = [&s] (const auto& x) {
|
auto cloner = [&s] (const auto& x) {
|
||||||
return current_allocator().construct<rows_entry>(s, x);
|
return current_allocator().construct<rows_entry>(s, x);
|
||||||
};
|
};
|
||||||
@@ -157,7 +164,14 @@ mutation_partition::mutation_partition(const mutation_partition& x, const schema
|
|||||||
, _static_row(schema, column_kind::static_column, x._static_row)
|
, _static_row(schema, column_kind::static_column, x._static_row)
|
||||||
, _static_row_continuous(x._static_row_continuous)
|
, _static_row_continuous(x._static_row_continuous)
|
||||||
, _rows()
|
, _rows()
|
||||||
, _row_tombstones(x._row_tombstones, range_tombstone_list::copy_comparator_only()) {
|
, _row_tombstones(x._row_tombstones, range_tombstone_list::copy_comparator_only())
|
||||||
|
#ifdef SEASTAR_DEBUG
|
||||||
|
, _schema_version(schema.version())
|
||||||
|
#endif
|
||||||
|
{
|
||||||
|
#ifdef SEASTAR_DEBUG
|
||||||
|
assert(x._schema_version == _schema_version);
|
||||||
|
#endif
|
||||||
try {
|
try {
|
||||||
for(auto&& r : ck_ranges) {
|
for(auto&& r : ck_ranges) {
|
||||||
for (const rows_entry& e : x.range(schema, r)) {
|
for (const rows_entry& e : x.range(schema, r)) {
|
||||||
@@ -180,7 +194,13 @@ mutation_partition::mutation_partition(mutation_partition&& x, const schema& sch
|
|||||||
, _static_row_continuous(x._static_row_continuous)
|
, _static_row_continuous(x._static_row_continuous)
|
||||||
, _rows(std::move(x._rows))
|
, _rows(std::move(x._rows))
|
||||||
, _row_tombstones(std::move(x._row_tombstones))
|
, _row_tombstones(std::move(x._row_tombstones))
|
||||||
|
#ifdef SEASTAR_DEBUG
|
||||||
|
, _schema_version(schema.version())
|
||||||
|
#endif
|
||||||
{
|
{
|
||||||
|
#ifdef SEASTAR_DEBUG
|
||||||
|
assert(x._schema_version == _schema_version);
|
||||||
|
#endif
|
||||||
{
|
{
|
||||||
auto deleter = current_deleter<rows_entry>();
|
auto deleter = current_deleter<rows_entry>();
|
||||||
auto it = _rows.begin();
|
auto it = _rows.begin();
|
||||||
@@ -220,6 +240,7 @@ mutation_partition::operator=(mutation_partition&& x) noexcept {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void mutation_partition::ensure_last_dummy(const schema& s) {
|
void mutation_partition::ensure_last_dummy(const schema& s) {
|
||||||
|
check_schema(s);
|
||||||
if (_rows.empty() || !_rows.rbegin()->is_last_dummy()) {
|
if (_rows.empty() || !_rows.rbegin()->is_last_dummy()) {
|
||||||
_rows.insert_before(_rows.end(),
|
_rows.insert_before(_rows.end(),
|
||||||
*current_allocator().construct<rows_entry>(s, rows_entry::last_dummy_tag(), is_continuous::yes));
|
*current_allocator().construct<rows_entry>(s, rows_entry::last_dummy_tag(), is_continuous::yes));
|
||||||
@@ -276,11 +297,16 @@ void deletable_row::apply(const schema& s, clustering_row cr) {
|
|||||||
|
|
||||||
void
|
void
|
||||||
mutation_partition::apply(const schema& s, const mutation_fragment& mf) {
|
mutation_partition::apply(const schema& s, const mutation_fragment& mf) {
|
||||||
|
check_schema(s);
|
||||||
mutation_fragment_applier applier{s, *this};
|
mutation_fragment_applier applier{s, *this};
|
||||||
mf.visit(applier);
|
mf.visit(applier);
|
||||||
}
|
}
|
||||||
|
|
||||||
void mutation_partition::apply_monotonically(const schema& s, mutation_partition&& p, cache_tracker* tracker) {
|
void mutation_partition::apply_monotonically(const schema& s, mutation_partition&& p, cache_tracker* tracker) {
|
||||||
|
#ifdef SEASTAR_DEBUG
|
||||||
|
assert(s.version() == _schema_version);
|
||||||
|
assert(p._schema_version == _schema_version);
|
||||||
|
#endif
|
||||||
_tombstone.apply(p._tombstone);
|
_tombstone.apply(p._tombstone);
|
||||||
_row_tombstones.apply_monotonically(s, std::move(p._row_tombstones));
|
_row_tombstones.apply_monotonically(s, std::move(p._row_tombstones));
|
||||||
_static_row.apply_monotonically(s, column_kind::static_column, std::move(p._static_row));
|
_static_row.apply_monotonically(s, column_kind::static_column, std::move(p._static_row));
|
||||||
@@ -356,6 +382,7 @@ void mutation_partition::apply_weak(const schema& s, mutation_partition&& p) {
|
|||||||
|
|
||||||
tombstone
|
tombstone
|
||||||
mutation_partition::range_tombstone_for_row(const schema& schema, const clustering_key& key) const {
|
mutation_partition::range_tombstone_for_row(const schema& schema, const clustering_key& key) const {
|
||||||
|
check_schema(schema);
|
||||||
tombstone t = _tombstone;
|
tombstone t = _tombstone;
|
||||||
if (!_row_tombstones.empty()) {
|
if (!_row_tombstones.empty()) {
|
||||||
auto found = _row_tombstones.search_tombstone_covering(schema, key);
|
auto found = _row_tombstones.search_tombstone_covering(schema, key);
|
||||||
@@ -366,6 +393,7 @@ mutation_partition::range_tombstone_for_row(const schema& schema, const clusteri
|
|||||||
|
|
||||||
row_tombstone
|
row_tombstone
|
||||||
mutation_partition::tombstone_for_row(const schema& schema, const clustering_key& key) const {
|
mutation_partition::tombstone_for_row(const schema& schema, const clustering_key& key) const {
|
||||||
|
check_schema(schema);
|
||||||
row_tombstone t = row_tombstone(range_tombstone_for_row(schema, key));
|
row_tombstone t = row_tombstone(range_tombstone_for_row(schema, key));
|
||||||
|
|
||||||
auto j = _rows.find(key, rows_entry::compare(schema));
|
auto j = _rows.find(key, rows_entry::compare(schema));
|
||||||
@@ -378,6 +406,7 @@ mutation_partition::tombstone_for_row(const schema& schema, const clustering_key
|
|||||||
|
|
||||||
row_tombstone
|
row_tombstone
|
||||||
mutation_partition::tombstone_for_row(const schema& schema, const rows_entry& e) const {
|
mutation_partition::tombstone_for_row(const schema& schema, const rows_entry& e) const {
|
||||||
|
check_schema(schema);
|
||||||
row_tombstone t = e.row().deleted_at();
|
row_tombstone t = e.row().deleted_at();
|
||||||
t.apply(range_tombstone_for_row(schema, e.key()));
|
t.apply(range_tombstone_for_row(schema, e.key()));
|
||||||
return t;
|
return t;
|
||||||
@@ -385,6 +414,7 @@ mutation_partition::tombstone_for_row(const schema& schema, const rows_entry& e)
|
|||||||
|
|
||||||
void
|
void
|
||||||
mutation_partition::apply_row_tombstone(const schema& schema, clustering_key_prefix prefix, tombstone t) {
|
mutation_partition::apply_row_tombstone(const schema& schema, clustering_key_prefix prefix, tombstone t) {
|
||||||
|
check_schema(schema);
|
||||||
assert(!prefix.is_full(schema));
|
assert(!prefix.is_full(schema));
|
||||||
auto start = prefix;
|
auto start = prefix;
|
||||||
_row_tombstones.apply(schema, {std::move(start), std::move(prefix), std::move(t)});
|
_row_tombstones.apply(schema, {std::move(start), std::move(prefix), std::move(t)});
|
||||||
@@ -392,11 +422,13 @@ mutation_partition::apply_row_tombstone(const schema& schema, clustering_key_pre
|
|||||||
|
|
||||||
void
|
void
|
||||||
mutation_partition::apply_row_tombstone(const schema& schema, range_tombstone rt) {
|
mutation_partition::apply_row_tombstone(const schema& schema, range_tombstone rt) {
|
||||||
|
check_schema(schema);
|
||||||
_row_tombstones.apply(schema, std::move(rt));
|
_row_tombstones.apply(schema, std::move(rt));
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
mutation_partition::apply_delete(const schema& schema, const clustering_key_prefix& prefix, tombstone t) {
|
mutation_partition::apply_delete(const schema& schema, const clustering_key_prefix& prefix, tombstone t) {
|
||||||
|
check_schema(schema);
|
||||||
if (prefix.is_empty(schema)) {
|
if (prefix.is_empty(schema)) {
|
||||||
apply(t);
|
apply(t);
|
||||||
} else if (prefix.is_full(schema)) {
|
} else if (prefix.is_full(schema)) {
|
||||||
@@ -408,6 +440,7 @@ mutation_partition::apply_delete(const schema& schema, const clustering_key_pref
|
|||||||
|
|
||||||
void
|
void
|
||||||
mutation_partition::apply_delete(const schema& schema, range_tombstone rt) {
|
mutation_partition::apply_delete(const schema& schema, range_tombstone rt) {
|
||||||
|
check_schema(schema);
|
||||||
if (range_tombstone::is_single_clustering_row_tombstone(schema, rt.start, rt.start_kind, rt.end, rt.end_kind)) {
|
if (range_tombstone::is_single_clustering_row_tombstone(schema, rt.start, rt.start_kind, rt.end, rt.end_kind)) {
|
||||||
apply_delete(schema, std::move(rt.start), std::move(rt.tomb));
|
apply_delete(schema, std::move(rt.start), std::move(rt.tomb));
|
||||||
return;
|
return;
|
||||||
@@ -417,6 +450,7 @@ mutation_partition::apply_delete(const schema& schema, range_tombstone rt) {
|
|||||||
|
|
||||||
void
|
void
|
||||||
mutation_partition::apply_delete(const schema& schema, clustering_key&& prefix, tombstone t) {
|
mutation_partition::apply_delete(const schema& schema, clustering_key&& prefix, tombstone t) {
|
||||||
|
check_schema(schema);
|
||||||
if (prefix.is_empty(schema)) {
|
if (prefix.is_empty(schema)) {
|
||||||
apply(t);
|
apply(t);
|
||||||
} else if (prefix.is_full(schema)) {
|
} else if (prefix.is_full(schema)) {
|
||||||
@@ -428,6 +462,7 @@ mutation_partition::apply_delete(const schema& schema, clustering_key&& prefix,
|
|||||||
|
|
||||||
void
|
void
|
||||||
mutation_partition::apply_delete(const schema& schema, clustering_key_prefix_view prefix, tombstone t) {
|
mutation_partition::apply_delete(const schema& schema, clustering_key_prefix_view prefix, tombstone t) {
|
||||||
|
check_schema(schema);
|
||||||
if (prefix.is_empty(schema)) {
|
if (prefix.is_empty(schema)) {
|
||||||
apply(t);
|
apply(t);
|
||||||
} else if (prefix.is_full(schema)) {
|
} else if (prefix.is_full(schema)) {
|
||||||
@@ -451,12 +486,14 @@ void mutation_partition::insert_row(const schema& s, const clustering_key& key,
|
|||||||
}
|
}
|
||||||
|
|
||||||
void mutation_partition::insert_row(const schema& s, const clustering_key& key, const deletable_row& row) {
|
void mutation_partition::insert_row(const schema& s, const clustering_key& key, const deletable_row& row) {
|
||||||
|
check_schema(s);
|
||||||
auto e = current_allocator().construct<rows_entry>(s, key, row);
|
auto e = current_allocator().construct<rows_entry>(s, key, row);
|
||||||
_rows.insert(_rows.end(), *e, rows_entry::compare(s));
|
_rows.insert(_rows.end(), *e, rows_entry::compare(s));
|
||||||
}
|
}
|
||||||
|
|
||||||
const row*
|
const row*
|
||||||
mutation_partition::find_row(const schema& s, const clustering_key& key) const {
|
mutation_partition::find_row(const schema& s, const clustering_key& key) const {
|
||||||
|
check_schema(s);
|
||||||
auto i = _rows.find(key, rows_entry::compare(s));
|
auto i = _rows.find(key, rows_entry::compare(s));
|
||||||
if (i == _rows.end()) {
|
if (i == _rows.end()) {
|
||||||
return nullptr;
|
return nullptr;
|
||||||
@@ -466,6 +503,7 @@ mutation_partition::find_row(const schema& s, const clustering_key& key) const {
|
|||||||
|
|
||||||
deletable_row&
|
deletable_row&
|
||||||
mutation_partition::clustered_row(const schema& s, clustering_key&& key) {
|
mutation_partition::clustered_row(const schema& s, clustering_key&& key) {
|
||||||
|
check_schema(s);
|
||||||
auto i = _rows.find(key, rows_entry::compare(s));
|
auto i = _rows.find(key, rows_entry::compare(s));
|
||||||
if (i == _rows.end()) {
|
if (i == _rows.end()) {
|
||||||
auto e = current_allocator().construct<rows_entry>(std::move(key));
|
auto e = current_allocator().construct<rows_entry>(std::move(key));
|
||||||
@@ -477,6 +515,7 @@ mutation_partition::clustered_row(const schema& s, clustering_key&& key) {
|
|||||||
|
|
||||||
deletable_row&
|
deletable_row&
|
||||||
mutation_partition::clustered_row(const schema& s, const clustering_key& key) {
|
mutation_partition::clustered_row(const schema& s, const clustering_key& key) {
|
||||||
|
check_schema(s);
|
||||||
auto i = _rows.find(key, rows_entry::compare(s));
|
auto i = _rows.find(key, rows_entry::compare(s));
|
||||||
if (i == _rows.end()) {
|
if (i == _rows.end()) {
|
||||||
auto e = current_allocator().construct<rows_entry>(key);
|
auto e = current_allocator().construct<rows_entry>(key);
|
||||||
@@ -488,6 +527,7 @@ mutation_partition::clustered_row(const schema& s, const clustering_key& key) {
|
|||||||
|
|
||||||
deletable_row&
|
deletable_row&
|
||||||
mutation_partition::clustered_row(const schema& s, clustering_key_view key) {
|
mutation_partition::clustered_row(const schema& s, clustering_key_view key) {
|
||||||
|
check_schema(s);
|
||||||
auto i = _rows.find(key, rows_entry::compare(s));
|
auto i = _rows.find(key, rows_entry::compare(s));
|
||||||
if (i == _rows.end()) {
|
if (i == _rows.end()) {
|
||||||
auto e = current_allocator().construct<rows_entry>(key);
|
auto e = current_allocator().construct<rows_entry>(key);
|
||||||
@@ -499,6 +539,7 @@ mutation_partition::clustered_row(const schema& s, clustering_key_view key) {
|
|||||||
|
|
||||||
deletable_row&
|
deletable_row&
|
||||||
mutation_partition::clustered_row(const schema& s, position_in_partition_view pos, is_dummy dummy, is_continuous continuous) {
|
mutation_partition::clustered_row(const schema& s, position_in_partition_view pos, is_dummy dummy, is_continuous continuous) {
|
||||||
|
check_schema(s);
|
||||||
auto i = _rows.find(pos, rows_entry::compare(s));
|
auto i = _rows.find(pos, rows_entry::compare(s));
|
||||||
if (i == _rows.end()) {
|
if (i == _rows.end()) {
|
||||||
auto e = current_allocator().construct<rows_entry>(s, pos, dummy, continuous);
|
auto e = current_allocator().construct<rows_entry>(s, pos, dummy, continuous);
|
||||||
@@ -510,6 +551,7 @@ mutation_partition::clustered_row(const schema& s, position_in_partition_view po
|
|||||||
|
|
||||||
mutation_partition::rows_type::const_iterator
|
mutation_partition::rows_type::const_iterator
|
||||||
mutation_partition::lower_bound(const schema& schema, const query::clustering_range& r) const {
|
mutation_partition::lower_bound(const schema& schema, const query::clustering_range& r) const {
|
||||||
|
check_schema(schema);
|
||||||
if (!r.start()) {
|
if (!r.start()) {
|
||||||
return std::cbegin(_rows);
|
return std::cbegin(_rows);
|
||||||
}
|
}
|
||||||
@@ -518,6 +560,7 @@ mutation_partition::lower_bound(const schema& schema, const query::clustering_ra
|
|||||||
|
|
||||||
mutation_partition::rows_type::const_iterator
|
mutation_partition::rows_type::const_iterator
|
||||||
mutation_partition::upper_bound(const schema& schema, const query::clustering_range& r) const {
|
mutation_partition::upper_bound(const schema& schema, const query::clustering_range& r) const {
|
||||||
|
check_schema(schema);
|
||||||
if (!r.end()) {
|
if (!r.end()) {
|
||||||
return std::cend(_rows);
|
return std::cend(_rows);
|
||||||
}
|
}
|
||||||
@@ -526,6 +569,7 @@ mutation_partition::upper_bound(const schema& schema, const query::clustering_ra
|
|||||||
|
|
||||||
boost::iterator_range<mutation_partition::rows_type::const_iterator>
|
boost::iterator_range<mutation_partition::rows_type::const_iterator>
|
||||||
mutation_partition::range(const schema& schema, const query::clustering_range& r) const {
|
mutation_partition::range(const schema& schema, const query::clustering_range& r) const {
|
||||||
|
check_schema(schema);
|
||||||
return boost::make_iterator_range(lower_bound(schema, r), upper_bound(schema, r));
|
return boost::make_iterator_range(lower_bound(schema, r), upper_bound(schema, r));
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -562,6 +606,7 @@ mutation_partition::upper_bound(const schema& schema, const query::clustering_ra
|
|||||||
template<typename Func>
|
template<typename Func>
|
||||||
void mutation_partition::for_each_row(const schema& schema, const query::clustering_range& row_range, bool reversed, Func&& func) const
|
void mutation_partition::for_each_row(const schema& schema, const query::clustering_range& row_range, bool reversed, Func&& func) const
|
||||||
{
|
{
|
||||||
|
check_schema(schema);
|
||||||
auto r = range(schema, row_range);
|
auto r = range(schema, row_range);
|
||||||
if (!reversed) {
|
if (!reversed) {
|
||||||
for (const auto& e : r) {
|
for (const auto& e : r) {
|
||||||
@@ -778,6 +823,7 @@ bool has_any_live_data(const schema& s, column_kind kind, const row& cells, tomb
|
|||||||
|
|
||||||
void
|
void
|
||||||
mutation_partition::query_compacted(query::result::partition_writer& pw, const schema& s, uint32_t limit) const {
|
mutation_partition::query_compacted(query::result::partition_writer& pw, const schema& s, uint32_t limit) const {
|
||||||
|
check_schema(s);
|
||||||
const query::partition_slice& slice = pw.slice();
|
const query::partition_slice& slice = pw.slice();
|
||||||
max_timestamp max_ts{pw.last_modified()};
|
max_timestamp max_ts{pw.last_modified()};
|
||||||
|
|
||||||
@@ -996,6 +1042,10 @@ bool mutation_partition::equal(const schema& s, const mutation_partition& p) con
|
|||||||
}
|
}
|
||||||
|
|
||||||
bool mutation_partition::equal(const schema& this_schema, const mutation_partition& p, const schema& p_schema) const {
|
bool mutation_partition::equal(const schema& this_schema, const mutation_partition& p, const schema& p_schema) const {
|
||||||
|
#ifdef SEASTAR_DEBUG
|
||||||
|
assert(_schema_version == this_schema.version());
|
||||||
|
assert(p._schema_version == p_schema.version());
|
||||||
|
#endif
|
||||||
if (_tombstone != p._tombstone) {
|
if (_tombstone != p._tombstone) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
@@ -1095,7 +1145,7 @@ row::apply_monotonically(const column_definition& column, atomic_cell_or_collect
|
|||||||
if (_type == storage_type::vector && id < max_vector_size) {
|
if (_type == storage_type::vector && id < max_vector_size) {
|
||||||
if (id >= _storage.vector.v.size()) {
|
if (id >= _storage.vector.v.size()) {
|
||||||
_storage.vector.v.resize(id);
|
_storage.vector.v.resize(id);
|
||||||
_storage.vector.v.emplace_back(cell_and_hash{std::move(value), std::move(hash)});
|
_storage.vector.v.emplace_back(std::move(value), std::move(hash));
|
||||||
_storage.vector.present.set(id);
|
_storage.vector.present.set(id);
|
||||||
_size++;
|
_size++;
|
||||||
} else if (auto& cell_and_hash = _storage.vector.v[id]; !bool(cell_and_hash.cell)) {
|
} else if (auto& cell_and_hash = _storage.vector.v[id]; !bool(cell_and_hash.cell)) {
|
||||||
@@ -1124,6 +1174,7 @@ row::apply_monotonically(const column_definition& column, atomic_cell_or_collect
|
|||||||
void
|
void
|
||||||
row::append_cell(column_id id, atomic_cell_or_collection value) {
|
row::append_cell(column_id id, atomic_cell_or_collection value) {
|
||||||
if (_type == storage_type::vector && id < max_vector_size) {
|
if (_type == storage_type::vector && id < max_vector_size) {
|
||||||
|
assert(_storage.vector.v.size() <= id);
|
||||||
_storage.vector.v.resize(id);
|
_storage.vector.v.resize(id);
|
||||||
_storage.vector.v.emplace_back(cell_and_hash{std::move(value), cell_hash_opt()});
|
_storage.vector.v.emplace_back(cell_and_hash{std::move(value), cell_hash_opt()});
|
||||||
_storage.vector.present.set(id);
|
_storage.vector.present.set(id);
|
||||||
@@ -1162,7 +1213,7 @@ row::find_cell(column_id id) const {
|
|||||||
size_t row::external_memory_usage(const schema& s, column_kind kind) const {
|
size_t row::external_memory_usage(const schema& s, column_kind kind) const {
|
||||||
size_t mem = 0;
|
size_t mem = 0;
|
||||||
if (_type == storage_type::vector) {
|
if (_type == storage_type::vector) {
|
||||||
mem += _storage.vector.v.external_memory_usage();
|
mem += _storage.vector.v.used_space_external_memory_usage();
|
||||||
column_id id = 0;
|
column_id id = 0;
|
||||||
for (auto&& c_a_h : _storage.vector.v) {
|
for (auto&& c_a_h : _storage.vector.v) {
|
||||||
auto& cdef = s.column_at(kind, id++);
|
auto& cdef = s.column_at(kind, id++);
|
||||||
@@ -1188,6 +1239,7 @@ size_t rows_entry::memory_usage(const schema& s) const {
|
|||||||
}
|
}
|
||||||
|
|
||||||
size_t mutation_partition::external_memory_usage(const schema& s) const {
|
size_t mutation_partition::external_memory_usage(const schema& s) const {
|
||||||
|
check_schema(s);
|
||||||
size_t sum = 0;
|
size_t sum = 0;
|
||||||
sum += static_row().external_memory_usage(s, column_kind::static_column);
|
sum += static_row().external_memory_usage(s, column_kind::static_column);
|
||||||
for (auto& clr : clustered_rows()) {
|
for (auto& clr : clustered_rows()) {
|
||||||
@@ -1206,6 +1258,7 @@ void mutation_partition::trim_rows(const schema& s,
|
|||||||
const std::vector<query::clustering_range>& row_ranges,
|
const std::vector<query::clustering_range>& row_ranges,
|
||||||
Func&& func)
|
Func&& func)
|
||||||
{
|
{
|
||||||
|
check_schema(s);
|
||||||
static_assert(std::is_same<stop_iteration, std::result_of_t<Func(rows_entry&)>>::value, "Bad func signature");
|
static_assert(std::is_same<stop_iteration, std::result_of_t<Func(rows_entry&)>>::value, "Bad func signature");
|
||||||
|
|
||||||
stop_iteration stop = stop_iteration::no;
|
stop_iteration stop = stop_iteration::no;
|
||||||
@@ -1250,6 +1303,7 @@ uint32_t mutation_partition::do_compact(const schema& s,
|
|||||||
uint32_t row_limit,
|
uint32_t row_limit,
|
||||||
can_gc_fn& can_gc)
|
can_gc_fn& can_gc)
|
||||||
{
|
{
|
||||||
|
check_schema(s);
|
||||||
assert(row_limit > 0);
|
assert(row_limit > 0);
|
||||||
|
|
||||||
auto gc_before = saturating_subtract(query_time, s.gc_grace_seconds());
|
auto gc_before = saturating_subtract(query_time, s.gc_grace_seconds());
|
||||||
@@ -1315,12 +1369,14 @@ mutation_partition::compact_for_query(
|
|||||||
bool reverse,
|
bool reverse,
|
||||||
uint32_t row_limit)
|
uint32_t row_limit)
|
||||||
{
|
{
|
||||||
|
check_schema(s);
|
||||||
return do_compact(s, query_time, row_ranges, reverse, row_limit, always_gc);
|
return do_compact(s, query_time, row_ranges, reverse, row_limit, always_gc);
|
||||||
}
|
}
|
||||||
|
|
||||||
void mutation_partition::compact_for_compaction(const schema& s,
|
void mutation_partition::compact_for_compaction(const schema& s,
|
||||||
can_gc_fn& can_gc, gc_clock::time_point compaction_time)
|
can_gc_fn& can_gc, gc_clock::time_point compaction_time)
|
||||||
{
|
{
|
||||||
|
check_schema(s);
|
||||||
static const std::vector<query::clustering_range> all_rows = {
|
static const std::vector<query::clustering_range> all_rows = {
|
||||||
query::clustering_range::make_open_ended_both_sides()
|
query::clustering_range::make_open_ended_both_sides()
|
||||||
};
|
};
|
||||||
@@ -1354,11 +1410,13 @@ row::is_live(const schema& s, column_kind kind, tombstone base_tombstone, gc_clo
|
|||||||
|
|
||||||
bool
|
bool
|
||||||
mutation_partition::is_static_row_live(const schema& s, gc_clock::time_point query_time) const {
|
mutation_partition::is_static_row_live(const schema& s, gc_clock::time_point query_time) const {
|
||||||
|
check_schema(s);
|
||||||
return has_any_live_data(s, column_kind::static_column, static_row(), _tombstone, query_time);
|
return has_any_live_data(s, column_kind::static_column, static_row(), _tombstone, query_time);
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t
|
size_t
|
||||||
mutation_partition::live_row_count(const schema& s, gc_clock::time_point query_time) const {
|
mutation_partition::live_row_count(const schema& s, gc_clock::time_point query_time) const {
|
||||||
|
check_schema(s);
|
||||||
size_t count = 0;
|
size_t count = 0;
|
||||||
|
|
||||||
for (const rows_entry& e : non_dummy_rows()) {
|
for (const rows_entry& e : non_dummy_rows()) {
|
||||||
@@ -1396,12 +1454,17 @@ row::row(const schema& s, column_kind kind, const row& o)
|
|||||||
if (_type == storage_type::vector) {
|
if (_type == storage_type::vector) {
|
||||||
auto& other_vec = o._storage.vector;
|
auto& other_vec = o._storage.vector;
|
||||||
auto& vec = *new (&_storage.vector) vector_storage;
|
auto& vec = *new (&_storage.vector) vector_storage;
|
||||||
vec.present = other_vec.present;
|
try {
|
||||||
vec.v.reserve(other_vec.v.size());
|
vec.present = other_vec.present;
|
||||||
column_id id = 0;
|
vec.v.reserve(other_vec.v.size());
|
||||||
for (auto& cell : other_vec.v) {
|
column_id id = 0;
|
||||||
auto& cdef = s.column_at(kind, id++);
|
for (auto& cell : other_vec.v) {
|
||||||
vec.v.emplace_back(cell_and_hash { cell.cell.copy(*cdef.type), cell.hash });
|
auto& cdef = s.column_at(kind, id++);
|
||||||
|
vec.v.emplace_back(cell_and_hash{cell.cell.copy(*cdef.type), cell.hash});
|
||||||
|
}
|
||||||
|
} catch (...) {
|
||||||
|
_storage.vector.~vector_storage();
|
||||||
|
throw;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
auto cloner = [&] (const auto& x) {
|
auto cloner = [&] (const auto& x) {
|
||||||
@@ -1699,6 +1762,7 @@ row row::difference(const schema& s, column_kind kind, const row& other) const
|
|||||||
|
|
||||||
mutation_partition mutation_partition::difference(schema_ptr s, const mutation_partition& other) const
|
mutation_partition mutation_partition::difference(schema_ptr s, const mutation_partition& other) const
|
||||||
{
|
{
|
||||||
|
check_schema(*s);
|
||||||
mutation_partition mp(s);
|
mutation_partition mp(s);
|
||||||
if (_tombstone > other._tombstone) {
|
if (_tombstone > other._tombstone) {
|
||||||
mp.apply(_tombstone);
|
mp.apply(_tombstone);
|
||||||
@@ -1729,6 +1793,7 @@ mutation_partition mutation_partition::difference(schema_ptr s, const mutation_p
|
|||||||
}
|
}
|
||||||
|
|
||||||
void mutation_partition::accept(const schema& s, mutation_partition_visitor& v) const {
|
void mutation_partition::accept(const schema& s, mutation_partition_visitor& v) const {
|
||||||
|
check_schema(s);
|
||||||
v.accept_partition_tombstone(_tombstone);
|
v.accept_partition_tombstone(_tombstone);
|
||||||
_static_row.for_each_cell([&] (column_id id, const atomic_cell_or_collection& cell) {
|
_static_row.for_each_cell([&] (column_id id, const atomic_cell_or_collection& cell) {
|
||||||
const column_definition& def = s.static_column_at(id);
|
const column_definition& def = s.static_column_at(id);
|
||||||
@@ -1812,9 +1877,10 @@ void mutation_querier::query_static_row(const row& r, tombstone current_tombston
|
|||||||
} else if (_short_reads_allowed) {
|
} else if (_short_reads_allowed) {
|
||||||
seastar::measuring_output_stream stream;
|
seastar::measuring_output_stream stream;
|
||||||
ser::qr_partition__static_row__cells<seastar::measuring_output_stream> out(stream, { });
|
ser::qr_partition__static_row__cells<seastar::measuring_output_stream> out(stream, { });
|
||||||
|
auto start = stream.size();
|
||||||
get_compacted_row_slice(_schema, slice, column_kind::static_column,
|
get_compacted_row_slice(_schema, slice, column_kind::static_column,
|
||||||
r, slice.static_columns, _static_cells_wr);
|
r, slice.static_columns, out);
|
||||||
_memory_accounter.update(stream.size());
|
_memory_accounter.update(stream.size() - start);
|
||||||
}
|
}
|
||||||
if (_pw.requested_digest()) {
|
if (_pw.requested_digest()) {
|
||||||
max_timestamp max_ts{_pw.last_modified()};
|
max_timestamp max_ts{_pw.last_modified()};
|
||||||
@@ -1875,8 +1941,9 @@ stop_iteration mutation_querier::consume(clustering_row&& cr, row_tombstone curr
|
|||||||
} else if (_short_reads_allowed) {
|
} else if (_short_reads_allowed) {
|
||||||
seastar::measuring_output_stream stream;
|
seastar::measuring_output_stream stream;
|
||||||
ser::qr_partition__rows<seastar::measuring_output_stream> out(stream, { });
|
ser::qr_partition__rows<seastar::measuring_output_stream> out(stream, { });
|
||||||
|
auto start = stream.size();
|
||||||
write_row(out);
|
write_row(out);
|
||||||
stop = _memory_accounter.update_and_check(stream.size());
|
stop = _memory_accounter.update_and_check(stream.size() - start);
|
||||||
}
|
}
|
||||||
|
|
||||||
_live_clustering_rows++;
|
_live_clustering_rows++;
|
||||||
@@ -2160,6 +2227,9 @@ mutation_partition::mutation_partition(mutation_partition::incomplete_tag, const
|
|||||||
, _static_row_continuous(!s.has_static_columns())
|
, _static_row_continuous(!s.has_static_columns())
|
||||||
, _rows()
|
, _rows()
|
||||||
, _row_tombstones(s)
|
, _row_tombstones(s)
|
||||||
|
#ifdef SEASTAR_DEBUG
|
||||||
|
, _schema_version(s.version())
|
||||||
|
#endif
|
||||||
{
|
{
|
||||||
_rows.insert_before(_rows.end(),
|
_rows.insert_before(_rows.end(),
|
||||||
*current_allocator().construct<rows_entry>(s, rows_entry::last_dummy_tag(), is_continuous::no));
|
*current_allocator().construct<rows_entry>(s, rows_entry::last_dummy_tag(), is_continuous::no));
|
||||||
@@ -2191,6 +2261,7 @@ void mutation_partition::make_fully_continuous() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
clustering_interval_set mutation_partition::get_continuity(const schema& s, is_continuous cont) const {
|
clustering_interval_set mutation_partition::get_continuity(const schema& s, is_continuous cont) const {
|
||||||
|
check_schema(s);
|
||||||
clustering_interval_set result;
|
clustering_interval_set result;
|
||||||
auto i = _rows.begin();
|
auto i = _rows.begin();
|
||||||
auto prev_pos = position_in_partition::before_all_clustered_rows();
|
auto prev_pos = position_in_partition::before_all_clustered_rows();
|
||||||
@@ -2240,6 +2311,7 @@ stop_iteration mutation_partition::clear_gently(cache_tracker* tracker) noexcept
|
|||||||
|
|
||||||
bool
|
bool
|
||||||
mutation_partition::check_continuity(const schema& s, const position_range& r, is_continuous cont) const {
|
mutation_partition::check_continuity(const schema& s, const position_range& r, is_continuous cont) const {
|
||||||
|
check_schema(s);
|
||||||
auto less = rows_entry::compare(s);
|
auto less = rows_entry::compare(s);
|
||||||
auto i = _rows.lower_bound(r.start(), less);
|
auto i = _rows.lower_bound(r.start(), less);
|
||||||
auto end = _rows.lower_bound(r.end(), less);
|
auto end = _rows.lower_bound(r.end(), less);
|
||||||
|
|||||||
@@ -74,6 +74,15 @@ using cell_hash_opt = seastar::optimized_optional<cell_hash>;
|
|||||||
struct cell_and_hash {
|
struct cell_and_hash {
|
||||||
atomic_cell_or_collection cell;
|
atomic_cell_or_collection cell;
|
||||||
mutable cell_hash_opt hash;
|
mutable cell_hash_opt hash;
|
||||||
|
|
||||||
|
cell_and_hash() = default;
|
||||||
|
cell_and_hash(cell_and_hash&&) noexcept = default;
|
||||||
|
cell_and_hash& operator=(cell_and_hash&&) noexcept = default;
|
||||||
|
|
||||||
|
cell_and_hash(atomic_cell_or_collection&& cell, cell_hash_opt hash)
|
||||||
|
: cell(std::move(cell))
|
||||||
|
, hash(hash)
|
||||||
|
{ }
|
||||||
};
|
};
|
||||||
|
|
||||||
//
|
//
|
||||||
@@ -896,6 +905,9 @@ private:
|
|||||||
// Contains only strict prefixes so that we don't have to lookup full keys
|
// Contains only strict prefixes so that we don't have to lookup full keys
|
||||||
// in both _row_tombstones and _rows.
|
// in both _row_tombstones and _rows.
|
||||||
range_tombstone_list _row_tombstones;
|
range_tombstone_list _row_tombstones;
|
||||||
|
#ifdef SEASTAR_DEBUG
|
||||||
|
table_schema_version _schema_version;
|
||||||
|
#endif
|
||||||
|
|
||||||
friend class mutation_partition_applier;
|
friend class mutation_partition_applier;
|
||||||
friend class converting_mutation_partition_applier;
|
friend class converting_mutation_partition_applier;
|
||||||
@@ -910,10 +922,16 @@ public:
|
|||||||
mutation_partition(schema_ptr s)
|
mutation_partition(schema_ptr s)
|
||||||
: _rows()
|
: _rows()
|
||||||
, _row_tombstones(*s)
|
, _row_tombstones(*s)
|
||||||
|
#ifdef SEASTAR_DEBUG
|
||||||
|
, _schema_version(s->version())
|
||||||
|
#endif
|
||||||
{ }
|
{ }
|
||||||
mutation_partition(mutation_partition& other, copy_comparators_only)
|
mutation_partition(mutation_partition& other, copy_comparators_only)
|
||||||
: _rows()
|
: _rows()
|
||||||
, _row_tombstones(other._row_tombstones, range_tombstone_list::copy_comparator_only())
|
, _row_tombstones(other._row_tombstones, range_tombstone_list::copy_comparator_only())
|
||||||
|
#ifdef SEASTAR_DEBUG
|
||||||
|
, _schema_version(other._schema_version)
|
||||||
|
#endif
|
||||||
{ }
|
{ }
|
||||||
mutation_partition(mutation_partition&&) = default;
|
mutation_partition(mutation_partition&&) = default;
|
||||||
mutation_partition(const schema& s, const mutation_partition&);
|
mutation_partition(const schema& s, const mutation_partition&);
|
||||||
@@ -1113,6 +1131,12 @@ private:
|
|||||||
template<typename Func>
|
template<typename Func>
|
||||||
void for_each_row(const schema& schema, const query::clustering_range& row_range, bool reversed, Func&& func) const;
|
void for_each_row(const schema& schema, const query::clustering_range& row_range, bool reversed, Func&& func) const;
|
||||||
friend class counter_write_query_result_builder;
|
friend class counter_write_query_result_builder;
|
||||||
|
|
||||||
|
void check_schema(const schema& s) const {
|
||||||
|
#ifdef SEASTAR_DEBUG
|
||||||
|
assert(s.version() == _schema_version);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
inline
|
inline
|
||||||
|
|||||||
@@ -312,7 +312,7 @@ partition_version& partition_entry::add_version(const schema& s, cache_tracker*
|
|||||||
|
|
||||||
void partition_entry::apply(const schema& s, const mutation_partition& mp, const schema& mp_schema)
|
void partition_entry::apply(const schema& s, const mutation_partition& mp, const schema& mp_schema)
|
||||||
{
|
{
|
||||||
apply(s, mutation_partition(s, mp), mp_schema);
|
apply(s, mutation_partition(mp_schema, mp), mp_schema);
|
||||||
}
|
}
|
||||||
|
|
||||||
void partition_entry::apply(const schema& s, mutation_partition&& mp, const schema& mp_schema)
|
void partition_entry::apply(const schema& s, mutation_partition&& mp, const schema& mp_schema)
|
||||||
@@ -457,7 +457,10 @@ coroutine partition_entry::apply_to_incomplete(const schema& s,
|
|||||||
pe.upgrade(pe_schema.shared_from_this(), s.shared_from_this(), pe_cleaner, no_cache_tracker);
|
pe.upgrade(pe_schema.shared_from_this(), s.shared_from_this(), pe_cleaner, no_cache_tracker);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool can_move = !pe._snapshot;
|
// When preemptible, later memtable reads could start using the snapshot before
|
||||||
|
// snapshot's writes are made visible in cache, which would cause them to miss those writes.
|
||||||
|
// So we cannot allow erasing when preemptible.
|
||||||
|
bool can_move = !preemptible && !pe._snapshot;
|
||||||
|
|
||||||
auto src_snp = pe.read(reg, pe_cleaner, s.shared_from_this(), no_cache_tracker);
|
auto src_snp = pe.read(reg, pe_cleaner, s.shared_from_this(), no_cache_tracker);
|
||||||
lw_shared_ptr<partition_snapshot> prev_snp;
|
lw_shared_ptr<partition_snapshot> prev_snp;
|
||||||
|
|||||||
@@ -273,6 +273,11 @@ public:
|
|||||||
return is_partition_end() || (_ck && _ck->is_empty(s) && _bound_weight > 0);
|
return is_partition_end() || (_ck && _ck->is_empty(s) && _bound_weight > 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool is_before_all_clustered_rows(const schema& s) const {
|
||||||
|
return _type < partition_region::clustered
|
||||||
|
|| (_type == partition_region::clustered && _ck->is_empty(s) && _bound_weight < 0);
|
||||||
|
}
|
||||||
|
|
||||||
template<typename Hasher>
|
template<typename Hasher>
|
||||||
void feed_hash(Hasher& hasher, const schema& s) const {
|
void feed_hash(Hasher& hasher, const schema& s) const {
|
||||||
::feed_hash(hasher, _bound_weight);
|
::feed_hash(hasher, _bound_weight);
|
||||||
|
|||||||
@@ -38,7 +38,7 @@ class autoupdating_underlying_reader final {
|
|||||||
row_cache& _cache;
|
row_cache& _cache;
|
||||||
read_context& _read_context;
|
read_context& _read_context;
|
||||||
stdx::optional<flat_mutation_reader> _reader;
|
stdx::optional<flat_mutation_reader> _reader;
|
||||||
utils::phased_barrier::phase_type _reader_creation_phase;
|
utils::phased_barrier::phase_type _reader_creation_phase = 0;
|
||||||
dht::partition_range _range = { };
|
dht::partition_range _range = { };
|
||||||
stdx::optional<dht::decorated_key> _last_key;
|
stdx::optional<dht::decorated_key> _last_key;
|
||||||
stdx::optional<dht::decorated_key> _new_last_key;
|
stdx::optional<dht::decorated_key> _new_last_key;
|
||||||
@@ -105,7 +105,6 @@ public:
|
|||||||
return make_ready_future<>();
|
return make_ready_future<>();
|
||||||
}
|
}
|
||||||
utils::phased_barrier::phase_type creation_phase() const {
|
utils::phased_barrier::phase_type creation_phase() const {
|
||||||
assert(_reader);
|
|
||||||
return _reader_creation_phase;
|
return _reader_creation_phase;
|
||||||
}
|
}
|
||||||
const dht::partition_range& range() const {
|
const dht::partition_range& range() const {
|
||||||
|
|||||||
@@ -1004,6 +1004,22 @@ static dht::token_range_vector get_primary_ranges(
|
|||||||
utils::fb_utilities::get_broadcast_address());
|
utils::fb_utilities::get_broadcast_address());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// get_primary_ranges_within_dc() is similar to get_primary_ranges(),
|
||||||
|
// but instead of each range being assigned just one primary owner
|
||||||
|
// across the entire cluster, here each range is assigned a primary
|
||||||
|
// owner in each of the clusters.
|
||||||
|
static dht::token_range_vector get_primary_ranges_within_dc(
|
||||||
|
database& db, sstring keyspace) {
|
||||||
|
auto& rs = db.find_keyspace(keyspace).get_replication_strategy();
|
||||||
|
return rs.get_primary_ranges_within_dc(
|
||||||
|
utils::fb_utilities::get_broadcast_address());
|
||||||
|
}
|
||||||
|
|
||||||
|
static sstring get_local_dc() {
|
||||||
|
return locator::i_endpoint_snitch::get_local_snitch_ptr()->get_datacenter(
|
||||||
|
utils::fb_utilities::get_broadcast_address());
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
struct repair_options {
|
struct repair_options {
|
||||||
// If primary_range is true, we should perform repair only on this node's
|
// If primary_range is true, we should perform repair only on this node's
|
||||||
@@ -1256,21 +1272,14 @@ static int do_repair_start(seastar::sharded<database>& db, sstring keyspace,
|
|||||||
rlogger.info("primary-range repair");
|
rlogger.info("primary-range repair");
|
||||||
// when "primary_range" option is on, neither data_centers nor hosts
|
// when "primary_range" option is on, neither data_centers nor hosts
|
||||||
// may be set, except data_centers may contain only local DC (-local)
|
// may be set, except data_centers may contain only local DC (-local)
|
||||||
#if 0
|
|
||||||
if (options.data_centers.size() == 1 &&
|
if (options.data_centers.size() == 1 &&
|
||||||
options.data_centers[0] == DatabaseDescriptor.getLocalDataCenter()) {
|
options.data_centers[0] == get_local_dc()) {
|
||||||
ranges = get_primary_ranges_within_dc(db.local(), keyspace);
|
ranges = get_primary_ranges_within_dc(db.local(), keyspace);
|
||||||
} else
|
} else if (options.data_centers.size() > 0 || options.hosts.size() > 0) {
|
||||||
#endif
|
|
||||||
#if 0
|
|
||||||
if (options.data_centers.size() > 0 || options.hosts.size() > 0) {
|
|
||||||
throw std::runtime_error("You need to run primary range repair on all nodes in the cluster.");
|
throw std::runtime_error("You need to run primary range repair on all nodes in the cluster.");
|
||||||
} else {
|
} else {
|
||||||
#endif
|
|
||||||
ranges = get_primary_ranges(db.local(), keyspace);
|
ranges = get_primary_ranges(db.local(), keyspace);
|
||||||
#if 0
|
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
} else {
|
} else {
|
||||||
ranges = get_local_ranges(db.local(), keyspace);
|
ranges = get_local_ranges(db.local(), keyspace);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,109 +0,0 @@
|
|||||||
#!/bin/bash -e
|
|
||||||
#
|
|
||||||
# Copyright (C) 2015 ScyllaDB
|
|
||||||
|
|
||||||
if [ "`id -u`" -ne 0 ]; then
|
|
||||||
echo "Requires root permission."
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
print_usage() {
|
|
||||||
echo "scylla_install_pkg --local-pkg /home/scylla/rpms --repo [URL]"
|
|
||||||
echo " --local-pkg install locally built .rpm/.deb on specified directory"
|
|
||||||
echo " --repo repository for both install and update, specify .repo/.list file URL"
|
|
||||||
echo " --repo-for-install repository for install, specify .repo/.list file URL"
|
|
||||||
echo " --repo-for-update repository for update, specify .repo/.list file URL"
|
|
||||||
exit 1
|
|
||||||
}
|
|
||||||
|
|
||||||
LOCAL_PKG=
|
|
||||||
UNSTABLE=0
|
|
||||||
REPO_FOR_INSTALL=
|
|
||||||
REPO_FOR_UPDATE=
|
|
||||||
while [ $# -gt 0 ]; do
|
|
||||||
case "$1" in
|
|
||||||
"--local-pkg")
|
|
||||||
LOCAL_PKG=$2
|
|
||||||
shift 2
|
|
||||||
;;
|
|
||||||
"--repo")
|
|
||||||
REPO_FOR_INSTALL=$2
|
|
||||||
REPO_FOR_UPDATE=$2
|
|
||||||
shift 2
|
|
||||||
;;
|
|
||||||
"--repo-for-install")
|
|
||||||
REPO_FOR_INSTALL=$2
|
|
||||||
shift 2
|
|
||||||
;;
|
|
||||||
"--repo-for-update")
|
|
||||||
REPO_FOR_UPDATE=$2
|
|
||||||
shift 2
|
|
||||||
;;
|
|
||||||
*)
|
|
||||||
print_usage
|
|
||||||
shift 1
|
|
||||||
;;
|
|
||||||
esac
|
|
||||||
done
|
|
||||||
|
|
||||||
. /etc/os-release
|
|
||||||
|
|
||||||
if [ -f /etc/debian_version ]; then
|
|
||||||
echo "#!/bin/sh" >> /usr/sbin/policy-rc.d
|
|
||||||
echo "exit 101" >> /usr/sbin/policy-rc.d
|
|
||||||
chmod +x /usr/sbin/policy-rc.d
|
|
||||||
cp /etc/hosts /etc/hosts.orig
|
|
||||||
echo 127.0.0.1 `hostname` >> /etc/hosts
|
|
||||||
if [ "$REPO_FOR_INSTALL" != "" ]; then
|
|
||||||
curl -L -o /etc/apt/sources.list.d/scylla_install.list $REPO_FOR_INSTALL
|
|
||||||
fi
|
|
||||||
apt-get -o Acquire::AllowInsecureRepositories=true \
|
|
||||||
-o Acquire::AllowDowngradeToInsecureRepositories=true update
|
|
||||||
if [ "$LOCAL_PKG" = "" ]; then
|
|
||||||
apt-get install -o APT::Get::AllowUnauthenticated=true \
|
|
||||||
-y --force-yes scylla
|
|
||||||
else
|
|
||||||
if [ ! -f /usr/bin/gdebi ]; then
|
|
||||||
apt-get install -y --force-yes gdebi-core
|
|
||||||
fi
|
|
||||||
echo Y | gdebi $LOCAL_PKG/scylla-kernel-conf*.deb
|
|
||||||
echo Y | gdebi $LOCAL_PKG/scylla-conf*.deb
|
|
||||||
echo Y | gdebi $LOCAL_PKG/scylla-server_*.deb
|
|
||||||
echo Y | gdebi $LOCAL_PKG/scylla-server-dbg*.deb
|
|
||||||
echo Y | gdebi $LOCAL_PKG/scylla-jmx*.deb
|
|
||||||
echo Y | gdebi $LOCAL_PKG/scylla-tools*.deb
|
|
||||||
echo Y | gdebi $LOCAL_PKG/scylla_*.deb
|
|
||||||
fi
|
|
||||||
mv /etc/hosts.orig /etc/hosts
|
|
||||||
rm /usr/sbin/policy-rc.d
|
|
||||||
rm /etc/apt/sources.list.d/scylla_install.list
|
|
||||||
if [ "$REPO_FOR_UPDATE" != "" ]; then
|
|
||||||
curl -L -o /etc/apt/sources.list.d/scylla.list $REPO_FOR_UPDATE
|
|
||||||
fi
|
|
||||||
apt-get -o Acquire::AllowInsecureRepositories=true \
|
|
||||||
-o Acquire::AllowDowngradeToInsecureRepositories=true update
|
|
||||||
else
|
|
||||||
if [ "$REPO_FOR_INSTALL" != "" ]; then
|
|
||||||
curl -L -o /etc/yum.repos.d/scylla_install.repo $REPO_FOR_INSTALL
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [ "$ID" = "centos" ]; then
|
|
||||||
yum install -y epel-release
|
|
||||||
elif [ "$ID" = "rhel" ]; then
|
|
||||||
rpm -ivh http://download.fedoraproject.org/pub/epel/7/x86_64/e/epel-release-7-7.noarch.rpm
|
|
||||||
else
|
|
||||||
echo "Unsupported distribution"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [ "$LOCAL_PKG" = "" ]; then
|
|
||||||
yum install -y scylla
|
|
||||||
else
|
|
||||||
yum install -y $LOCAL_PKG/scylla*.*.rpm
|
|
||||||
fi
|
|
||||||
|
|
||||||
rm /etc/yum.repos.d/scylla_install.repo
|
|
||||||
if [ "$REPO_FOR_UPDATE" != "" ]; then
|
|
||||||
curl -L -o /etc/yum.repos.d/scylla.repo $REPO_FOR_UPDATE
|
|
||||||
fi
|
|
||||||
fi
|
|
||||||
2
seastar
2
seastar
Submodule seastar updated: d7f35d7663...efda4281c2
@@ -54,7 +54,7 @@ static logging::logger mlogger("migration_task");
|
|||||||
future<> migration_task::run_may_throw(distributed<service::storage_proxy>& proxy, const gms::inet_address& endpoint)
|
future<> migration_task::run_may_throw(distributed<service::storage_proxy>& proxy, const gms::inet_address& endpoint)
|
||||||
{
|
{
|
||||||
if (!gms::get_failure_detector().local().is_alive(endpoint)) {
|
if (!gms::get_failure_detector().local().is_alive(endpoint)) {
|
||||||
mlogger.error("Can't send migration request: node {} is down.", endpoint);
|
mlogger.warn("Can't send migration request: node {} is down.", endpoint);
|
||||||
return make_ready_future<>();
|
return make_ready_future<>();
|
||||||
}
|
}
|
||||||
netw::messaging_service::msg_addr id{endpoint, 0};
|
netw::messaging_service::msg_addr id{endpoint, 0};
|
||||||
|
|||||||
@@ -144,7 +144,11 @@ future<lowres_clock::duration> cache_hitrate_calculator::recalculate_hitrates()
|
|||||||
return _db.invoke_on_all([this, rates = std::move(rates), cpuid = engine().cpu_id()] (database& db) {
|
return _db.invoke_on_all([this, rates = std::move(rates), cpuid = engine().cpu_id()] (database& db) {
|
||||||
sstring gstate;
|
sstring gstate;
|
||||||
for (auto& cf : db.get_column_families() | boost::adaptors::filtered(non_system_filter)) {
|
for (auto& cf : db.get_column_families() | boost::adaptors::filtered(non_system_filter)) {
|
||||||
stat s = rates.at(cf.first);
|
auto it = rates.find(cf.first);
|
||||||
|
if (it == rates.end()) { // a table may be added before map/reduce compltes and this code runs
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
stat s = it->second;
|
||||||
float rate = 0;
|
float rate = 0;
|
||||||
if (s.h) {
|
if (s.h) {
|
||||||
rate = s.h / (s.h + s.m);
|
rate = s.h / (s.h + s.m);
|
||||||
|
|||||||
@@ -85,7 +85,7 @@ static bool has_clustering_keys(const schema& s, const query::read_command& cmd)
|
|||||||
_query_read_repair_decision = state->get_query_read_repair_decision();
|
_query_read_repair_decision = state->get_query_read_repair_decision();
|
||||||
} else {
|
} else {
|
||||||
// Reusing readers is currently only supported for singular queries.
|
// Reusing readers is currently only supported for singular queries.
|
||||||
if (_ranges.front().is_singular()) {
|
if (!_ranges.empty() && query::is_single_partition(_ranges.front())) {
|
||||||
_cmd->query_uuid = utils::make_random_uuid();
|
_cmd->query_uuid = utils::make_random_uuid();
|
||||||
}
|
}
|
||||||
_cmd->is_first_page = true;
|
_cmd->is_first_page = true;
|
||||||
|
|||||||
@@ -211,7 +211,7 @@ protected:
|
|||||||
|
|
||||||
protected:
|
protected:
|
||||||
virtual bool waited_for(gms::inet_address from) = 0;
|
virtual bool waited_for(gms::inet_address from) = 0;
|
||||||
virtual void signal(gms::inet_address from) {
|
void signal(gms::inet_address from) {
|
||||||
if (waited_for(from)) {
|
if (waited_for(from)) {
|
||||||
signal();
|
signal();
|
||||||
}
|
}
|
||||||
@@ -221,7 +221,7 @@ public:
|
|||||||
abstract_write_response_handler(shared_ptr<storage_proxy> p, keyspace& ks, db::consistency_level cl, db::write_type type,
|
abstract_write_response_handler(shared_ptr<storage_proxy> p, keyspace& ks, db::consistency_level cl, db::write_type type,
|
||||||
std::unique_ptr<mutation_holder> mh, std::unordered_set<gms::inet_address> targets, tracing::trace_state_ptr trace_state,
|
std::unique_ptr<mutation_holder> mh, std::unordered_set<gms::inet_address> targets, tracing::trace_state_ptr trace_state,
|
||||||
storage_proxy::write_stats& stats, size_t pending_endpoints = 0, std::vector<gms::inet_address> dead_endpoints = {})
|
storage_proxy::write_stats& stats, size_t pending_endpoints = 0, std::vector<gms::inet_address> dead_endpoints = {})
|
||||||
: _id(p->_next_response_id++), _proxy(std::move(p)), _trace_state(trace_state), _cl(cl), _type(type), _mutation_holder(std::move(mh)), _targets(std::move(targets)),
|
: _id(p->get_next_response_id()), _proxy(std::move(p)), _trace_state(trace_state), _cl(cl), _type(type), _mutation_holder(std::move(mh)), _targets(std::move(targets)),
|
||||||
_dead_endpoints(std::move(dead_endpoints)), _stats(stats) {
|
_dead_endpoints(std::move(dead_endpoints)), _stats(stats) {
|
||||||
// original comment from cassandra:
|
// original comment from cassandra:
|
||||||
// during bootstrap, include pending endpoints in the count
|
// during bootstrap, include pending endpoints in the count
|
||||||
@@ -285,10 +285,13 @@ public:
|
|||||||
}
|
}
|
||||||
// return true on last ack
|
// return true on last ack
|
||||||
bool response(gms::inet_address from) {
|
bool response(gms::inet_address from) {
|
||||||
signal(from);
|
|
||||||
auto it = _targets.find(from);
|
auto it = _targets.find(from);
|
||||||
assert(it != _targets.end());
|
if (it != _targets.end()) {
|
||||||
_targets.erase(it);
|
signal(from);
|
||||||
|
_targets.erase(it);
|
||||||
|
} else {
|
||||||
|
slogger.warn("Receive outdated write ack from {}", from);
|
||||||
|
}
|
||||||
return _targets.size() == 0;
|
return _targets.size() == 0;
|
||||||
}
|
}
|
||||||
future<> wait() {
|
future<> wait() {
|
||||||
@@ -632,9 +635,12 @@ void storage_proxy_stats::split_stats::register_metrics_for(gms::inet_address ep
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
using namespace std::literals::chrono_literals;
|
||||||
|
|
||||||
storage_proxy::~storage_proxy() {}
|
storage_proxy::~storage_proxy() {}
|
||||||
storage_proxy::storage_proxy(distributed<database>& db, storage_proxy::config cfg)
|
storage_proxy::storage_proxy(distributed<database>& db, storage_proxy::config cfg)
|
||||||
: _db(db)
|
: _db(db)
|
||||||
|
, _next_response_id(std::chrono::system_clock::now().time_since_epoch()/1ms)
|
||||||
, _hints_resource_manager(cfg.available_memory / 10)
|
, _hints_resource_manager(cfg.available_memory / 10)
|
||||||
, _hints_for_views_manager(_db.local().get_config().data_file_directories()[0] + "/view_pending_updates", {}, _db.local().get_config().max_hint_window_in_ms(), _hints_resource_manager, _db)
|
, _hints_for_views_manager(_db.local().get_config().data_file_directories()[0] + "/view_pending_updates", {}, _db.local().get_config().max_hint_window_in_ms(), _hints_resource_manager, _db)
|
||||||
, _background_write_throttle_threahsold(cfg.available_memory / 10) {
|
, _background_write_throttle_threahsold(cfg.available_memory / 10) {
|
||||||
@@ -1974,18 +1980,21 @@ public:
|
|||||||
_timeout.arm(timeout);
|
_timeout.arm(timeout);
|
||||||
}
|
}
|
||||||
virtual ~abstract_read_resolver() {};
|
virtual ~abstract_read_resolver() {};
|
||||||
virtual void on_error(gms::inet_address ep) = 0;
|
virtual void on_error(gms::inet_address ep, bool disconnect) = 0;
|
||||||
future<> done() {
|
future<> done() {
|
||||||
return _done_promise.get_future();
|
return _done_promise.get_future();
|
||||||
}
|
}
|
||||||
void error(gms::inet_address ep, std::exception_ptr eptr) {
|
void error(gms::inet_address ep, std::exception_ptr eptr) {
|
||||||
sstring why;
|
sstring why;
|
||||||
|
bool disconnect = false;
|
||||||
try {
|
try {
|
||||||
std::rethrow_exception(eptr);
|
std::rethrow_exception(eptr);
|
||||||
} catch (rpc::closed_error&) {
|
} catch (rpc::closed_error&) {
|
||||||
return; // do not report connection closed exception, gossiper does that
|
// do not report connection closed exception, gossiper does that
|
||||||
|
disconnect = true;
|
||||||
} catch (rpc::timeout_error&) {
|
} catch (rpc::timeout_error&) {
|
||||||
return; // do not report timeouts, the whole operation will timeout and be reported
|
// do not report timeouts, the whole operation will timeout and be reported
|
||||||
|
return; // also do not report timeout as replica failure for the same reason
|
||||||
} catch(std::exception& e) {
|
} catch(std::exception& e) {
|
||||||
why = e.what();
|
why = e.what();
|
||||||
} catch(...) {
|
} catch(...) {
|
||||||
@@ -1993,10 +2002,12 @@ public:
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (!_request_failed) { // request may fail only once.
|
if (!_request_failed) { // request may fail only once.
|
||||||
on_error(ep);
|
on_error(ep, disconnect);
|
||||||
}
|
}
|
||||||
|
|
||||||
slogger.error("Exception when communicating with {}: {}", ep, why);
|
if (why.length()) {
|
||||||
|
slogger.error("Exception when communicating with {}: {}", ep, why);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -2071,10 +2082,16 @@ public:
|
|||||||
_done_promise.set_value();
|
_done_promise.set_value();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
void on_error(gms::inet_address ep) override {
|
void on_error(gms::inet_address ep, bool disconnect) override {
|
||||||
if (waiting_for(ep)) {
|
if (waiting_for(ep)) {
|
||||||
_failed++;
|
_failed++;
|
||||||
}
|
}
|
||||||
|
if (disconnect && _block_for == _target_count_for_cl) {
|
||||||
|
// if the error is because of a connection disconnect and there is no targets to speculate
|
||||||
|
// wait for timeout in hope that the client will issue speculative read
|
||||||
|
// FIXME: resolver should have access to all replicas and try another one in this case
|
||||||
|
return;
|
||||||
|
}
|
||||||
if (_block_for + _failed > _target_count_for_cl) {
|
if (_block_for + _failed > _target_count_for_cl) {
|
||||||
fail_request(std::make_exception_ptr(read_failure_exception(_schema->ks_name(), _schema->cf_name(), _cl, _cl_responses, _failed, _block_for, _data_result)));
|
fail_request(std::make_exception_ptr(read_failure_exception(_schema->ks_name(), _schema->cf_name(), _cl, _cl_responses, _failed, _block_for, _data_result)));
|
||||||
}
|
}
|
||||||
@@ -2400,7 +2417,7 @@ public:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
void on_error(gms::inet_address ep) override {
|
void on_error(gms::inet_address ep, bool disconnect) override {
|
||||||
fail_request(std::make_exception_ptr(read_failure_exception(_schema->ks_name(), _schema->cf_name(), _cl, response_count(), 1, _targets_count, response_count() != 0)));
|
fail_request(std::make_exception_ptr(read_failure_exception(_schema->ks_name(), _schema->cf_name(), _cl, response_count(), 1, _targets_count, response_count() != 0)));
|
||||||
}
|
}
|
||||||
uint32_t max_live_count() const {
|
uint32_t max_live_count() const {
|
||||||
@@ -3323,9 +3340,22 @@ storage_proxy::query_partition_key_range(lw_shared_ptr<query::read_command> cmd,
|
|||||||
slogger.debug("Estimated result rows per range: {}; requested rows: {}, ranges.size(): {}; concurrent range requests: {}",
|
slogger.debug("Estimated result rows per range: {}; requested rows: {}, ranges.size(): {}; concurrent range requests: {}",
|
||||||
result_rows_per_range, cmd->row_limit, ranges.size(), concurrency_factor);
|
result_rows_per_range, cmd->row_limit, ranges.size(), concurrency_factor);
|
||||||
|
|
||||||
|
// The call to `query_partition_key_range_concurrent()` below
|
||||||
|
// updates `cmd` directly when processing the results. Under
|
||||||
|
// some circumstances, when the query executes without deferring,
|
||||||
|
// this updating will happen before the lambda object is constructed
|
||||||
|
// and hence the updates will be visible to the lambda. This will
|
||||||
|
// result in the merger below trimming the results according to the
|
||||||
|
// updated (decremented) limits and causing the paging logic to
|
||||||
|
// declare the query exhausted due to the non-full page. To avoid
|
||||||
|
// this save the original values of the limits here and pass these
|
||||||
|
// to the lambda below.
|
||||||
|
const auto row_limit = cmd->row_limit;
|
||||||
|
const auto partition_limit = cmd->partition_limit;
|
||||||
|
|
||||||
return query_partition_key_range_concurrent(query_options.timeout(*this), std::move(results), cmd, cl, ranges.begin(), std::move(ranges),
|
return query_partition_key_range_concurrent(query_options.timeout(*this), std::move(results), cmd, cl, ranges.begin(), std::move(ranges),
|
||||||
concurrency_factor, std::move(query_options.trace_state), cmd->row_limit, cmd->partition_limit)
|
concurrency_factor, std::move(query_options.trace_state), cmd->row_limit, cmd->partition_limit)
|
||||||
.then([row_limit = cmd->row_limit, partition_limit = cmd->partition_limit](std::vector<foreign_ptr<lw_shared_ptr<query::result>>> results) {
|
.then([row_limit, partition_limit](std::vector<foreign_ptr<lw_shared_ptr<query::result>>> results) {
|
||||||
query::result_merger merger(row_limit, partition_limit);
|
query::result_merger merger(row_limit, partition_limit);
|
||||||
merger.reserve(results.size());
|
merger.reserve(results.size());
|
||||||
|
|
||||||
@@ -3682,6 +3712,7 @@ future<> storage_proxy::truncate_blocking(sstring keyspace, sstring cfname) {
|
|||||||
std::rethrow_exception(ep);
|
std::rethrow_exception(ep);
|
||||||
} catch (rpc::timeout_error& e) {
|
} catch (rpc::timeout_error& e) {
|
||||||
slogger.trace("Truncation of {} timed out: {}", cfname, e.what());
|
slogger.trace("Truncation of {} timed out: {}", cfname, e.what());
|
||||||
|
throw;
|
||||||
} catch (...) {
|
} catch (...) {
|
||||||
throw;
|
throw;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -143,7 +143,7 @@ public:
|
|||||||
};
|
};
|
||||||
private:
|
private:
|
||||||
distributed<database>& _db;
|
distributed<database>& _db;
|
||||||
response_id_type _next_response_id = 1; // 0 is reserved for unique_response_handler
|
response_id_type _next_response_id;
|
||||||
std::unordered_map<response_id_type, rh_entry> _response_handlers;
|
std::unordered_map<response_id_type, rh_entry> _response_handlers;
|
||||||
// This buffer hold ids of throttled writes in case resource consumption goes
|
// This buffer hold ids of throttled writes in case resource consumption goes
|
||||||
// below the threshold and we want to unthrottle some of them. Without this throttled
|
// below the threshold and we want to unthrottle some of them. Without this throttled
|
||||||
@@ -263,6 +263,13 @@ public:
|
|||||||
return _db;
|
return _db;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
response_id_type get_next_response_id() {
|
||||||
|
auto next = _next_response_id++;
|
||||||
|
if (next == 0) { // 0 is reserved for unique_response_handler
|
||||||
|
next = _next_response_id++;
|
||||||
|
}
|
||||||
|
return next;
|
||||||
|
}
|
||||||
void init_messaging_service();
|
void init_messaging_service();
|
||||||
|
|
||||||
// Applies mutation on this node.
|
// Applies mutation on this node.
|
||||||
|
|||||||
@@ -303,7 +303,7 @@ void storage_service::prepare_to_join(std::vector<inet_address> loaded_endpoints
|
|||||||
gossiper.check_knows_remote_features(local_features, peer_features);
|
gossiper.check_knows_remote_features(local_features, peer_features);
|
||||||
}
|
}
|
||||||
|
|
||||||
gossiper.reset_endpoint_state_map();
|
gossiper.reset_endpoint_state_map().get();
|
||||||
for (auto ep : loaded_endpoints) {
|
for (auto ep : loaded_endpoints) {
|
||||||
gossiper.add_saved_endpoint(ep);
|
gossiper.add_saved_endpoint(ep);
|
||||||
}
|
}
|
||||||
@@ -317,7 +317,7 @@ void storage_service::prepare_to_join(std::vector<inet_address> loaded_endpoints
|
|||||||
slogger.info("Checking remote features with gossip");
|
slogger.info("Checking remote features with gossip");
|
||||||
gossiper.do_shadow_round().get();
|
gossiper.do_shadow_round().get();
|
||||||
gossiper.check_knows_remote_features(local_features);
|
gossiper.check_knows_remote_features(local_features);
|
||||||
gossiper.reset_endpoint_state_map();
|
gossiper.reset_endpoint_state_map().get();
|
||||||
for (auto ep : loaded_endpoints) {
|
for (auto ep : loaded_endpoints) {
|
||||||
gossiper.add_saved_endpoint(ep);
|
gossiper.add_saved_endpoint(ep);
|
||||||
}
|
}
|
||||||
@@ -419,13 +419,9 @@ void storage_service::join_token_ring(int delay) {
|
|||||||
db::system_keyspace::set_bootstrap_state(db::system_keyspace::bootstrap_state::IN_PROGRESS).get();
|
db::system_keyspace::set_bootstrap_state(db::system_keyspace::bootstrap_state::IN_PROGRESS).get();
|
||||||
}
|
}
|
||||||
set_mode(mode::JOINING, "waiting for ring information", true);
|
set_mode(mode::JOINING, "waiting for ring information", true);
|
||||||
// first sleep the delay to make sure we see all our peers
|
auto& gossiper = gms::get_gossiper().local();
|
||||||
for (int i = 0; i < delay; i += 1000) {
|
// first sleep the delay to make sure we see *at least* one other node
|
||||||
// if we see schema, we can proceed to the next check directly
|
for (int i = 0; i < delay && gossiper.get_live_members().size() < 2; i += 1000) {
|
||||||
if (_db.local().get_version() != database::empty_version) {
|
|
||||||
slogger.debug("got schema: {}", _db.local().get_version());
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
sleep(std::chrono::seconds(1)).get();
|
sleep(std::chrono::seconds(1)).get();
|
||||||
}
|
}
|
||||||
// if our schema hasn't matched yet, keep sleeping until it does
|
// if our schema hasn't matched yet, keep sleeping until it does
|
||||||
@@ -484,7 +480,6 @@ void storage_service::join_token_ring(int delay) {
|
|||||||
for (auto token : _bootstrap_tokens) {
|
for (auto token : _bootstrap_tokens) {
|
||||||
auto existing = _token_metadata.get_endpoint(token);
|
auto existing = _token_metadata.get_endpoint(token);
|
||||||
if (existing) {
|
if (existing) {
|
||||||
auto& gossiper = gms::get_local_gossiper();
|
|
||||||
auto* eps = gossiper.get_endpoint_state_for_endpoint_ptr(*existing);
|
auto* eps = gossiper.get_endpoint_state_for_endpoint_ptr(*existing);
|
||||||
if (eps && eps->get_update_timestamp() > gms::gossiper::clk::now() - std::chrono::milliseconds(delay)) {
|
if (eps && eps->get_update_timestamp() > gms::gossiper::clk::now() - std::chrono::milliseconds(delay)) {
|
||||||
throw std::runtime_error("Cannot replace a live node...");
|
throw std::runtime_error("Cannot replace a live node...");
|
||||||
@@ -622,6 +617,8 @@ void storage_service::bootstrap(std::unordered_set<token> tokens) {
|
|||||||
db::system_keyspace::update_tokens(tokens).get();
|
db::system_keyspace::update_tokens(tokens).get();
|
||||||
auto& gossiper = gms::get_local_gossiper();
|
auto& gossiper = gms::get_local_gossiper();
|
||||||
if (!db().local().is_replacing()) {
|
if (!db().local().is_replacing()) {
|
||||||
|
// Wait until we know tokens of existing node before announcing join status.
|
||||||
|
gossiper.wait_for_range_setup().get();
|
||||||
// if not an existing token then bootstrap
|
// if not an existing token then bootstrap
|
||||||
gossiper.add_local_application_state({
|
gossiper.add_local_application_state({
|
||||||
{ gms::application_state::TOKENS, value_factory.tokens(tokens) },
|
{ gms::application_state::TOKENS, value_factory.tokens(tokens) },
|
||||||
@@ -1541,7 +1538,7 @@ future<> storage_service::check_for_endpoint_collision() {
|
|||||||
throw std::runtime_error("Other bootstrapping/leaving/moving nodes detected, cannot bootstrap while consistent_rangemovement is true (check_for_endpoint_collision)");
|
throw std::runtime_error("Other bootstrapping/leaving/moving nodes detected, cannot bootstrap while consistent_rangemovement is true (check_for_endpoint_collision)");
|
||||||
} else {
|
} else {
|
||||||
gossiper.goto_shadow_round();
|
gossiper.goto_shadow_round();
|
||||||
gossiper.reset_endpoint_state_map();
|
gossiper.reset_endpoint_state_map().get();
|
||||||
found_bootstrapping_node = true;
|
found_bootstrapping_node = true;
|
||||||
auto elapsed = std::chrono::duration_cast<std::chrono::seconds>(gms::gossiper::clk::now() - t).count();
|
auto elapsed = std::chrono::duration_cast<std::chrono::seconds>(gms::gossiper::clk::now() - t).count();
|
||||||
slogger.info("Checking bootstrapping/leaving/moving nodes: node={}, status={}, sleep 1 second and check again ({} seconds elapsed) (check_for_endpoint_collision)", addr, state, elapsed);
|
slogger.info("Checking bootstrapping/leaving/moving nodes: node={}, status={}, sleep 1 second and check again ({} seconds elapsed) (check_for_endpoint_collision)", addr, state, elapsed);
|
||||||
@@ -1553,7 +1550,7 @@ future<> storage_service::check_for_endpoint_collision() {
|
|||||||
}
|
}
|
||||||
} while (found_bootstrapping_node);
|
} while (found_bootstrapping_node);
|
||||||
slogger.info("Checking bootstrapping/leaving/moving nodes: ok (check_for_endpoint_collision)");
|
slogger.info("Checking bootstrapping/leaving/moving nodes: ok (check_for_endpoint_collision)");
|
||||||
gossiper.reset_endpoint_state_map();
|
gossiper.reset_endpoint_state_map().get();
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1603,8 +1600,9 @@ future<std::unordered_set<token>> storage_service::prepare_replacement_info() {
|
|||||||
auto tokens = get_tokens_for(replace_address);
|
auto tokens = get_tokens_for(replace_address);
|
||||||
// use the replacee's host Id as our own so we receive hints, etc
|
// use the replacee's host Id as our own so we receive hints, etc
|
||||||
return db::system_keyspace::set_local_host_id(host_id).discard_result().then([replace_address, tokens = std::move(tokens)] {
|
return db::system_keyspace::set_local_host_id(host_id).discard_result().then([replace_address, tokens = std::move(tokens)] {
|
||||||
gms::get_local_gossiper().reset_endpoint_state_map(); // clean up since we have what we need
|
return gms::get_local_gossiper().reset_endpoint_state_map().then([tokens = std::move(tokens)] { // clean up since we have what we need
|
||||||
return make_ready_future<std::unordered_set<token>>(std::move(tokens));
|
return make_ready_future<std::unordered_set<token>>(std::move(tokens));
|
||||||
|
});
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
@@ -2643,14 +2641,20 @@ future<> storage_service::send_replication_notification(inet_address remote) {
|
|||||||
// notify the remote token
|
// notify the remote token
|
||||||
auto done = make_shared<bool>(false);
|
auto done = make_shared<bool>(false);
|
||||||
auto local = get_broadcast_address();
|
auto local = get_broadcast_address();
|
||||||
|
auto sent = make_lw_shared<int>(0);
|
||||||
slogger.debug("Notifying {} of replication completion", remote);
|
slogger.debug("Notifying {} of replication completion", remote);
|
||||||
return do_until(
|
return do_until(
|
||||||
[done, remote] {
|
[done, sent, remote] {
|
||||||
return *done || !gms::get_local_failure_detector().is_alive(remote);
|
// The node can send REPLICATION_FINISHED to itself, in which case
|
||||||
|
// is_alive will be true. If the messaging_service is stopped,
|
||||||
|
// REPLICATION_FINISHED can be sent infinitely here. To fix, limit
|
||||||
|
// the number of retries.
|
||||||
|
return *done || !gms::get_local_failure_detector().is_alive(remote) || *sent >= 3;
|
||||||
},
|
},
|
||||||
[done, remote, local] {
|
[done, sent, remote, local] {
|
||||||
auto& ms = netw::get_local_messaging_service();
|
auto& ms = netw::get_local_messaging_service();
|
||||||
netw::msg_addr id{remote, 0};
|
netw::msg_addr id{remote, 0};
|
||||||
|
(*sent)++;
|
||||||
return ms.send_replication_finished(id, local).then_wrapped([id, done] (auto&& f) {
|
return ms.send_replication_finished(id, local).then_wrapped([id, done] (auto&& f) {
|
||||||
try {
|
try {
|
||||||
f.get();
|
f.get();
|
||||||
|
|||||||
@@ -179,6 +179,8 @@ struct compaction_read_monitor_generator final : public read_monitor_generator {
|
|||||||
void remove_sstable(bool is_tracking) {
|
void remove_sstable(bool is_tracking) {
|
||||||
if (is_tracking) {
|
if (is_tracking) {
|
||||||
_cf.get_compaction_strategy().get_backlog_tracker().remove_sstable(_sst);
|
_cf.get_compaction_strategy().get_backlog_tracker().remove_sstable(_sst);
|
||||||
|
} else if (_sst) {
|
||||||
|
_cf.get_compaction_strategy().get_backlog_tracker().revert_charges(_sst);
|
||||||
}
|
}
|
||||||
_sst = {};
|
_sst = {};
|
||||||
}
|
}
|
||||||
@@ -303,6 +305,7 @@ public:
|
|||||||
class compaction {
|
class compaction {
|
||||||
protected:
|
protected:
|
||||||
column_family& _cf;
|
column_family& _cf;
|
||||||
|
schema_ptr _schema;
|
||||||
std::vector<shared_sstable> _sstables;
|
std::vector<shared_sstable> _sstables;
|
||||||
uint64_t _max_sstable_size;
|
uint64_t _max_sstable_size;
|
||||||
uint32_t _sstable_level;
|
uint32_t _sstable_level;
|
||||||
@@ -313,6 +316,7 @@ protected:
|
|||||||
protected:
|
protected:
|
||||||
compaction(column_family& cf, std::vector<shared_sstable> sstables, uint64_t max_sstable_size, uint32_t sstable_level)
|
compaction(column_family& cf, std::vector<shared_sstable> sstables, uint64_t max_sstable_size, uint32_t sstable_level)
|
||||||
: _cf(cf)
|
: _cf(cf)
|
||||||
|
, _schema(cf.schema())
|
||||||
, _sstables(std::move(sstables))
|
, _sstables(std::move(sstables))
|
||||||
, _max_sstable_size(max_sstable_size)
|
, _max_sstable_size(max_sstable_size)
|
||||||
, _sstable_level(sstable_level)
|
, _sstable_level(sstable_level)
|
||||||
@@ -361,10 +365,9 @@ private:
|
|||||||
virtual flat_mutation_reader make_sstable_reader(lw_shared_ptr<sstables::sstable_set> ssts) const = 0;
|
virtual flat_mutation_reader make_sstable_reader(lw_shared_ptr<sstables::sstable_set> ssts) const = 0;
|
||||||
|
|
||||||
flat_mutation_reader setup() {
|
flat_mutation_reader setup() {
|
||||||
auto ssts = make_lw_shared<sstables::sstable_set>(_cf.get_compaction_strategy().make_sstable_set(_cf.schema()));
|
auto ssts = make_lw_shared<sstables::sstable_set>(_cf.get_compaction_strategy().make_sstable_set(_schema));
|
||||||
auto schema = _cf.schema();
|
|
||||||
sstring formatted_msg = "[";
|
sstring formatted_msg = "[";
|
||||||
auto fully_expired = get_fully_expired_sstables(_cf, _sstables, gc_clock::now() - schema->gc_grace_seconds());
|
auto fully_expired = get_fully_expired_sstables(_cf, _sstables, gc_clock::now() - _schema->gc_grace_seconds());
|
||||||
|
|
||||||
for (auto& sst : _sstables) {
|
for (auto& sst : _sstables) {
|
||||||
// Compacted sstable keeps track of its ancestors.
|
// Compacted sstable keeps track of its ancestors.
|
||||||
@@ -396,8 +399,8 @@ private:
|
|||||||
}
|
}
|
||||||
formatted_msg += "]";
|
formatted_msg += "]";
|
||||||
_info->sstables = _sstables.size();
|
_info->sstables = _sstables.size();
|
||||||
_info->ks = schema->ks_name();
|
_info->ks = _schema->ks_name();
|
||||||
_info->cf = schema->cf_name();
|
_info->cf = _schema->cf_name();
|
||||||
report_start(formatted_msg);
|
report_start(formatted_msg);
|
||||||
|
|
||||||
return make_sstable_reader(std::move(ssts));
|
return make_sstable_reader(std::move(ssts));
|
||||||
@@ -462,7 +465,7 @@ private:
|
|||||||
}
|
}
|
||||||
|
|
||||||
const schema_ptr& schema() const {
|
const schema_ptr& schema() const {
|
||||||
return _cf.schema();
|
return _schema;
|
||||||
}
|
}
|
||||||
public:
|
public:
|
||||||
static future<compaction_info> run(std::unique_ptr<compaction> c);
|
static future<compaction_info> run(std::unique_ptr<compaction> c);
|
||||||
@@ -518,10 +521,10 @@ public:
|
|||||||
}
|
}
|
||||||
|
|
||||||
flat_mutation_reader make_sstable_reader(lw_shared_ptr<sstables::sstable_set> ssts) const override {
|
flat_mutation_reader make_sstable_reader(lw_shared_ptr<sstables::sstable_set> ssts) const override {
|
||||||
return ::make_local_shard_sstable_reader(_cf.schema(),
|
return ::make_local_shard_sstable_reader(_schema,
|
||||||
std::move(ssts),
|
std::move(ssts),
|
||||||
query::full_partition_range,
|
query::full_partition_range,
|
||||||
_cf.schema()->full_slice(),
|
_schema->full_slice(),
|
||||||
service::get_local_compaction_priority(),
|
service::get_local_compaction_priority(),
|
||||||
no_resource_tracking(),
|
no_resource_tracking(),
|
||||||
nullptr,
|
nullptr,
|
||||||
@@ -570,7 +573,7 @@ public:
|
|||||||
cfg.monitor = &_active_write_monitors.back();
|
cfg.monitor = &_active_write_monitors.back();
|
||||||
cfg.large_partition_handler = _cf.get_large_partition_handler();
|
cfg.large_partition_handler = _cf.get_large_partition_handler();
|
||||||
// TODO: calculate encoding_stats based on statistics of compacted sstables
|
// TODO: calculate encoding_stats based on statistics of compacted sstables
|
||||||
_writer.emplace(_sst->get_writer(*_cf.schema(), partitions_per_sstable(), cfg, encoding_stats{}, priority));
|
_writer.emplace(_sst->get_writer(*_schema, partitions_per_sstable(), cfg, encoding_stats{}, priority));
|
||||||
}
|
}
|
||||||
return &*_writer;
|
return &*_writer;
|
||||||
}
|
}
|
||||||
@@ -610,7 +613,7 @@ public:
|
|||||||
}
|
}
|
||||||
|
|
||||||
std::function<bool(const dht::decorated_key&)> filter_func() const override {
|
std::function<bool(const dht::decorated_key&)> filter_func() const override {
|
||||||
dht::token_range_vector owned_ranges = service::get_local_storage_service().get_local_ranges(_cf.schema()->ks_name());
|
dht::token_range_vector owned_ranges = service::get_local_storage_service().get_local_ranges(_schema->ks_name());
|
||||||
|
|
||||||
return [this, owned_ranges = std::move(owned_ranges)] (const dht::decorated_key& dk) {
|
return [this, owned_ranges = std::move(owned_ranges)] (const dht::decorated_key& dk) {
|
||||||
if (dht::shard_of(dk.token()) != engine().cpu_id()) {
|
if (dht::shard_of(dk.token()) != engine().cpu_id()) {
|
||||||
@@ -684,10 +687,10 @@ public:
|
|||||||
|
|
||||||
// Use reader that makes sure no non-local mutation will not be filtered out.
|
// Use reader that makes sure no non-local mutation will not be filtered out.
|
||||||
flat_mutation_reader make_sstable_reader(lw_shared_ptr<sstables::sstable_set> ssts) const override {
|
flat_mutation_reader make_sstable_reader(lw_shared_ptr<sstables::sstable_set> ssts) const override {
|
||||||
return ::make_range_sstable_reader(_cf.schema(),
|
return ::make_range_sstable_reader(_schema,
|
||||||
std::move(ssts),
|
std::move(ssts),
|
||||||
query::full_partition_range,
|
query::full_partition_range,
|
||||||
_cf.schema()->full_slice(),
|
_schema->full_slice(),
|
||||||
service::get_local_compaction_priority(),
|
service::get_local_compaction_priority(),
|
||||||
no_resource_tracking(),
|
no_resource_tracking(),
|
||||||
nullptr,
|
nullptr,
|
||||||
@@ -719,7 +722,7 @@ public:
|
|||||||
cfg.large_partition_handler = _cf.get_large_partition_handler();
|
cfg.large_partition_handler = _cf.get_large_partition_handler();
|
||||||
auto&& priority = service::get_local_compaction_priority();
|
auto&& priority = service::get_local_compaction_priority();
|
||||||
// TODO: calculate encoding_stats based on statistics of compacted sstables
|
// TODO: calculate encoding_stats based on statistics of compacted sstables
|
||||||
writer.emplace(sst->get_writer(*_cf.schema(), partitions_per_sstable(_shard), cfg, encoding_stats{}, priority, _shard));
|
writer.emplace(sst->get_writer(*_schema, partitions_per_sstable(_shard), cfg, encoding_stats{}, priority, _shard));
|
||||||
}
|
}
|
||||||
return &*writer;
|
return &*writer;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -66,6 +66,14 @@ public:
|
|||||||
_cm->deregister_compacting_sstables(_compacting);
|
_cm->deregister_compacting_sstables(_compacting);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Explicitly release compacting sstables
|
||||||
|
void release_compacting(const std::vector<sstables::shared_sstable>& sstables) {
|
||||||
|
_cm->deregister_compacting_sstables(sstables);
|
||||||
|
for (auto& sst : sstables) {
|
||||||
|
_compacting.erase(boost::remove(_compacting, sst), _compacting.end());
|
||||||
|
}
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
compaction_weight_registration::compaction_weight_registration(compaction_manager* cm, int weight)
|
compaction_weight_registration::compaction_weight_registration(compaction_manager* cm, int weight)
|
||||||
@@ -564,17 +572,23 @@ future<> compaction_manager::perform_cleanup(column_family* cf) {
|
|||||||
return make_ready_future<stop_iteration>(stop_iteration::yes);
|
return make_ready_future<stop_iteration>(stop_iteration::yes);
|
||||||
}
|
}
|
||||||
column_family& cf = *task->compacting_cf;
|
column_family& cf = *task->compacting_cf;
|
||||||
sstables::compaction_descriptor descriptor = sstables::compaction_descriptor(get_candidates(cf));
|
auto sstables = get_candidates(cf);
|
||||||
auto compacting = compacting_sstable_registration(this, descriptor.sstables);
|
auto compacting = make_lw_shared<compacting_sstable_registration>(this, sstables);
|
||||||
|
|
||||||
_stats.pending_tasks--;
|
_stats.pending_tasks--;
|
||||||
_stats.active_tasks++;
|
_stats.active_tasks++;
|
||||||
compaction_backlog_tracker user_initiated(std::make_unique<user_initiated_backlog_tracker>(_compaction_controller.backlog_of_shares(200), _available_memory));
|
compaction_backlog_tracker user_initiated(std::make_unique<user_initiated_backlog_tracker>(_compaction_controller.backlog_of_shares(200), _available_memory));
|
||||||
return do_with(std::move(user_initiated), [this, &cf, descriptor = std::move(descriptor)] (compaction_backlog_tracker& bt) mutable {
|
return do_with(std::move(user_initiated), std::move(sstables), [this, &cf, compacting] (compaction_backlog_tracker& bt,
|
||||||
return with_scheduling_group(_scheduling_group, [this, &cf, descriptor = std::move(descriptor)] () mutable {
|
std::vector<sstables::shared_sstable>& sstables) mutable {
|
||||||
return cf.cleanup_sstables(std::move(descriptor));
|
return with_scheduling_group(_scheduling_group, [this, &cf, &sstables, compacting] () mutable {
|
||||||
|
return do_for_each(sstables, [this, &cf, compacting] (auto& sst) {
|
||||||
|
return cf.cleanup_sstables(sstables::compaction_descriptor({sst})).then([&sst, compacting] {
|
||||||
|
// Releases reference to cleaned sstable such that respective used disk space can be freed.
|
||||||
|
compacting->release_compacting({std::move(sst)});
|
||||||
|
});
|
||||||
|
});
|
||||||
});
|
});
|
||||||
}).then_wrapped([this, task, compacting = std::move(compacting)] (future<> f) mutable {
|
}).then_wrapped([this, task, compacting] (future<> f) mutable {
|
||||||
_stats.active_tasks--;
|
_stats.active_tasks--;
|
||||||
if (!can_proceed(task)) {
|
if (!can_proceed(task)) {
|
||||||
maybe_stop_on_error(std::move(f));
|
maybe_stop_on_error(std::move(f));
|
||||||
|
|||||||
@@ -404,11 +404,6 @@ public:
|
|||||||
auto itw = writes_per_window.find(bound);
|
auto itw = writes_per_window.find(bound);
|
||||||
if (itw != writes_per_window.end()) {
|
if (itw != writes_per_window.end()) {
|
||||||
ow_this_window = &itw->second;
|
ow_this_window = &itw->second;
|
||||||
// We will erase here so we can keep track of which
|
|
||||||
// writes belong to existing windows. Writes that don't belong to any window
|
|
||||||
// are writes in progress to new windows and will be accounted in the final
|
|
||||||
// loop before we return
|
|
||||||
writes_per_window.erase(itw);
|
|
||||||
}
|
}
|
||||||
auto* oc_this_window = &no_oc;
|
auto* oc_this_window = &no_oc;
|
||||||
auto itc = compactions_per_window.find(bound);
|
auto itc = compactions_per_window.find(bound);
|
||||||
@@ -416,6 +411,13 @@ public:
|
|||||||
oc_this_window = &itc->second;
|
oc_this_window = &itc->second;
|
||||||
}
|
}
|
||||||
b += windows.second.backlog(*ow_this_window, *oc_this_window);
|
b += windows.second.backlog(*ow_this_window, *oc_this_window);
|
||||||
|
if (itw != writes_per_window.end()) {
|
||||||
|
// We will erase here so we can keep track of which
|
||||||
|
// writes belong to existing windows. Writes that don't belong to any window
|
||||||
|
// are writes in progress to new windows and will be accounted in the final
|
||||||
|
// loop before we return
|
||||||
|
writes_per_window.erase(itw);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Partial writes that don't belong to any window are accounted here.
|
// Partial writes that don't belong to any window are accounted here.
|
||||||
|
|||||||
@@ -33,6 +33,7 @@
|
|||||||
#include "unimplemented.hh"
|
#include "unimplemented.hh"
|
||||||
#include "stdx.hh"
|
#include "stdx.hh"
|
||||||
#include "segmented_compress_params.hh"
|
#include "segmented_compress_params.hh"
|
||||||
|
#include "utils/class_registrator.hh"
|
||||||
|
|
||||||
namespace sstables {
|
namespace sstables {
|
||||||
|
|
||||||
@@ -299,7 +300,8 @@ size_t local_compression::compress_max_size(size_t input_len) const {
|
|||||||
|
|
||||||
void compression::set_compressor(compressor_ptr c) {
|
void compression::set_compressor(compressor_ptr c) {
|
||||||
if (c) {
|
if (c) {
|
||||||
auto& cn = c->name();
|
unqualified_name uqn(compressor::namespace_prefix, c->name());
|
||||||
|
const sstring& cn = uqn;
|
||||||
name.value = bytes(cn.begin(), cn.end());
|
name.value = bytes(cn.begin(), cn.end());
|
||||||
for (auto& p : c->options()) {
|
for (auto& p : c->options()) {
|
||||||
if (p.first != compression_parameters::SSTABLE_COMPRESSION) {
|
if (p.first != compression_parameters::SSTABLE_COMPRESSION) {
|
||||||
|
|||||||
@@ -53,7 +53,8 @@ atomic_cell make_counter_cell(api::timestamp_type timestamp, bytes_view value) {
|
|||||||
throw marshal_exception("encountered a local shard in a counter cell");
|
throw marshal_exception("encountered a local shard in a counter cell");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
auto shard_count = value.size() / shard_size;
|
auto header_length = (size_t(header_size) + 1) * sizeof(int16_t);
|
||||||
|
auto shard_count = (value.size() - header_length) / shard_size;
|
||||||
if (shard_count != size_t(header_size)) {
|
if (shard_count != size_t(header_size)) {
|
||||||
throw marshal_exception("encountered remote shards in a counter cell");
|
throw marshal_exception("encountered remote shards in a counter cell");
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -695,9 +695,12 @@ public:
|
|||||||
// Sets streamed_mutation::_end_of_range when there are no more fragments for the query range.
|
// Sets streamed_mutation::_end_of_range when there are no more fragments for the query range.
|
||||||
// Returns information whether the parser should continue to parse more
|
// Returns information whether the parser should continue to parse more
|
||||||
// input and produce more fragments or we have collected enough and should yield.
|
// input and produce more fragments or we have collected enough and should yield.
|
||||||
|
// Returns proceed:yes only when all pending fragments have been pushed.
|
||||||
proceed push_ready_fragments() {
|
proceed push_ready_fragments() {
|
||||||
if (_ready) {
|
if (_ready) {
|
||||||
return push_ready_fragments_with_ready_set();
|
if (push_ready_fragments_with_ready_set() == proceed::no) {
|
||||||
|
return proceed::no;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (_out_of_range) {
|
if (_out_of_range) {
|
||||||
|
|||||||
@@ -787,6 +787,11 @@ future<> parse(sstable_version_types v, random_access_reader& in, utils::estimat
|
|||||||
if (length == 0) {
|
if (length == 0) {
|
||||||
throw malformed_sstable_exception("Estimated histogram with zero size found. Can't continue!");
|
throw malformed_sstable_exception("Estimated histogram with zero size found. Can't continue!");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Arrays are potentially pre-initialized by the estimated_histogram constructor.
|
||||||
|
eh.bucket_offsets.clear();
|
||||||
|
eh.buckets.clear();
|
||||||
|
|
||||||
eh.bucket_offsets.reserve(length - 1);
|
eh.bucket_offsets.reserve(length - 1);
|
||||||
eh.buckets.reserve(length);
|
eh.buckets.reserve(length);
|
||||||
|
|
||||||
|
|||||||
@@ -292,7 +292,7 @@ void stream_manager::on_restart(inet_address endpoint, endpoint_state ep_state)
|
|||||||
}
|
}
|
||||||
|
|
||||||
void stream_manager::on_dead(inet_address endpoint, endpoint_state ep_state) {
|
void stream_manager::on_dead(inet_address endpoint, endpoint_state ep_state) {
|
||||||
if (has_peer(endpoint) && ep_state.is_shutdown()) {
|
if (has_peer(endpoint)) {
|
||||||
sslog.info("stream_manager: Close all stream_session with peer = {} in on_dead", endpoint);
|
sslog.info("stream_manager: Close all stream_session with peer = {} in on_dead", endpoint);
|
||||||
get_stream_manager().invoke_on_all([endpoint] (auto& sm) {
|
get_stream_manager().invoke_on_all([endpoint] (auto& sm) {
|
||||||
sm.fail_sessions(endpoint);
|
sm.fail_sessions(endpoint);
|
||||||
|
|||||||
@@ -509,8 +509,7 @@ void stream_session::close_session(stream_session_state final_state) {
|
|||||||
_stream_result->handle_session_complete(shared_from_this());
|
_stream_result->handle_session_complete(shared_from_this());
|
||||||
}
|
}
|
||||||
|
|
||||||
sslog.debug("[Stream #{}] close_session session={}, state={}, cancel keep_alive timer", plan_id(), this, final_state);
|
sslog.debug("[Stream #{}] close_session session={}, state={}", plan_id(), this, final_state);
|
||||||
_keep_alive.cancel();
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -537,41 +536,6 @@ bool stream_session::is_initialized() const {
|
|||||||
|
|
||||||
void stream_session::init(shared_ptr<stream_result_future> stream_result_) {
|
void stream_session::init(shared_ptr<stream_result_future> stream_result_) {
|
||||||
_stream_result = stream_result_;
|
_stream_result = stream_result_;
|
||||||
_keep_alive.set_callback([this] {
|
|
||||||
auto plan_id = this->plan_id();
|
|
||||||
auto peer = this->peer;
|
|
||||||
get_local_stream_manager().get_progress_on_all_shards(plan_id, peer).then([this, peer, plan_id] (stream_bytes sbytes) {
|
|
||||||
if (this->_is_aborted) {
|
|
||||||
sslog.info("[Stream #{}] The session {} is closed, keep alive timer will do nothing", plan_id, this);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
auto now = lowres_clock::now();
|
|
||||||
sslog.debug("[Stream #{}] keep alive timer callback sbytes old: tx={}, rx={} new: tx={} rx={}",
|
|
||||||
plan_id, this->_last_stream_bytes.bytes_sent, this->_last_stream_bytes.bytes_received,
|
|
||||||
sbytes.bytes_sent, sbytes.bytes_received);
|
|
||||||
if (sbytes.bytes_sent > this->_last_stream_bytes.bytes_sent ||
|
|
||||||
sbytes.bytes_received > this->_last_stream_bytes.bytes_received) {
|
|
||||||
sslog.debug("[Stream #{}] The session {} made progress with peer {}", plan_id, this, peer);
|
|
||||||
// Progress has been made
|
|
||||||
this->_last_stream_bytes = sbytes;
|
|
||||||
this->_last_stream_progress = now;
|
|
||||||
this->start_keep_alive_timer();
|
|
||||||
} else if (now - this->_last_stream_progress >= this->_keep_alive_timeout) {
|
|
||||||
// Timeout
|
|
||||||
sslog.info("[Stream #{}] The session {} is idle for {} seconds, the peer {} is probably gone, close it",
|
|
||||||
plan_id, this, this->_keep_alive_timeout.count(), peer);
|
|
||||||
this->on_error();
|
|
||||||
} else {
|
|
||||||
// Start the timer to check again
|
|
||||||
sslog.info("[Stream #{}] The session {} made no progress with peer {}", plan_id, this, peer);
|
|
||||||
this->start_keep_alive_timer();
|
|
||||||
}
|
|
||||||
}).handle_exception([plan_id, peer, session = this->shared_from_this()] (auto ep) {
|
|
||||||
sslog.info("[Stream #{}] keep alive timer callback fails with peer {}: {}", plan_id, peer, ep);
|
|
||||||
});
|
|
||||||
});
|
|
||||||
_last_stream_progress = lowres_clock::now();
|
|
||||||
start_keep_alive_timer();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
utils::UUID stream_session::plan_id() {
|
utils::UUID stream_session::plan_id() {
|
||||||
|
|||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user