Compare commits
77 Commits
scylla-2.0
...
next-2.0
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
f19fbc3058 | ||
|
|
8eddb28954 | ||
|
|
5aaa8031a2 | ||
|
|
3d50e7077a | ||
|
|
4063e92f57 | ||
|
|
b6de30bb87 | ||
|
|
c23e3a1eda | ||
|
|
2732b6cf1d | ||
|
|
49722e74da | ||
|
|
ba7623ac55 | ||
|
|
9db2ff36f2 | ||
|
|
378029b8da | ||
|
|
2b7644dc36 | ||
|
|
4bd931ba59 | ||
|
|
78eebe74c7 | ||
|
|
30e21afb13 | ||
|
|
e8616b10e5 | ||
|
|
0cb842dde1 | ||
|
|
7945f5edda | ||
|
|
9c2a328000 | ||
|
|
98498c679b | ||
|
|
b147b5854b | ||
|
|
226095f4db | ||
|
|
3dd1f68590 | ||
|
|
e08e4c75d7 | ||
|
|
8bcb4e7439 | ||
|
|
97369adb1c | ||
|
|
c89ead5e55 | ||
|
|
46fd96d877 | ||
|
|
19806fc056 | ||
|
|
0b314a745f | ||
|
|
73870751d9 | ||
|
|
c8983034c0 | ||
|
|
77d14a6256 | ||
|
|
21259bcfb3 | ||
|
|
9f02b44537 | ||
|
|
9dc7a63014 | ||
|
|
5dcef25f6f | ||
|
|
f763bf7f0d | ||
|
|
9af9ca0d60 | ||
|
|
fbc30221b5 | ||
|
|
d17aa3cd1c | ||
|
|
f7e79322f1 | ||
|
|
e31331bdb2 | ||
|
|
6873e26060 | ||
|
|
24bee2c887 | ||
|
|
8bba15a709 | ||
|
|
ad68d3ecfd | ||
|
|
707ac9242e | ||
|
|
dae0563ff8 | ||
|
|
7ba50b87f1 | ||
|
|
1da277c78e | ||
|
|
36dfd4b990 | ||
|
|
c5ce2765dc | ||
|
|
25ffdf527b | ||
|
|
dbc6d9fe01 | ||
|
|
915683bddd | ||
|
|
7ca8988d0e | ||
|
|
383d7e6c91 | ||
|
|
7bef696ee5 | ||
|
|
a603111a85 | ||
|
|
d5884d3c7c | ||
|
|
e6cb685178 | ||
|
|
cd19e5885a | ||
|
|
f367031016 | ||
|
|
7ae67331ad | ||
|
|
0e6561169b | ||
|
|
5b3aa8e90d | ||
|
|
db9d502f82 | ||
|
|
cde39bffd0 | ||
|
|
0fbcc852a5 | ||
|
|
16d5f68886 | ||
|
|
91540c8181 | ||
|
|
eaa8ed929f | ||
|
|
5ba1621716 | ||
|
|
b4f515035a | ||
|
|
d55e3f6a7f |
@@ -1,6 +1,6 @@
|
||||
#!/bin/sh
|
||||
|
||||
VERSION=2.0.2
|
||||
VERSION=2.0.4
|
||||
|
||||
if test -f version
|
||||
then
|
||||
|
||||
@@ -114,7 +114,7 @@ struct hash<auth::authenticated_user> {
|
||||
|
||||
class auth::auth::permissions_cache {
|
||||
public:
|
||||
typedef utils::loading_cache<std::pair<authenticated_user, data_resource>, permission_set, utils::tuple_hash> cache_type;
|
||||
typedef utils::loading_cache<std::pair<authenticated_user, data_resource>, permission_set, utils::loading_cache_reload_enabled::yes, utils::simple_entry_size<permission_set>, utils::tuple_hash> cache_type;
|
||||
typedef typename cache_type::key_type key_type;
|
||||
|
||||
permissions_cache()
|
||||
|
||||
@@ -70,7 +70,7 @@ public:
|
||||
{
|
||||
if (!with_static_row) {
|
||||
if (_current == _end) {
|
||||
_current_start = _current_end = position_in_partition_view::after_all_clustered_rows();
|
||||
_current_start = position_in_partition_view::before_all_clustered_rows();
|
||||
} else {
|
||||
_current_start = position_in_partition_view::for_range_start(*_current);
|
||||
_current_end = position_in_partition_view::for_range_end(*_current);
|
||||
|
||||
20
configure.py
20
configure.py
@@ -238,6 +238,7 @@ scylla_tests = [
|
||||
'tests/view_schema_test',
|
||||
'tests/counter_test',
|
||||
'tests/cell_locker_test',
|
||||
'tests/loading_cache_test',
|
||||
]
|
||||
|
||||
apps = [
|
||||
@@ -730,6 +731,9 @@ if not try_compile(compiler=args.cxx, source='''\
|
||||
print('Installed boost version too old. Please update {}.'.format(pkgname("boost-devel")))
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
has_sanitize_address_use_after_scope = try_compile(compiler=args.cxx, flags=['-fsanitize-address-use-after-scope'], source='int f() {}')
|
||||
|
||||
defines = ' '.join(['-D' + d for d in defines])
|
||||
|
||||
globals().update(vars(args))
|
||||
@@ -863,7 +867,7 @@ with open(buildfile, 'w') as f:
|
||||
f.write(textwrap.dedent('''\
|
||||
cxxflags_{mode} = -I. -I $builddir/{mode}/gen -I seastar -I seastar/build/{mode}/gen
|
||||
rule cxx.{mode}
|
||||
command = $cxx -MD -MT $out -MF $out.d {seastar_cflags} $cxxflags $cxxflags_{mode} -c -o $out $in
|
||||
command = $cxx -MD -MT $out -MF $out.d {seastar_cflags} $cxxflags $cxxflags_{mode} $obj_cxxflags -c -o $out $in
|
||||
description = CXX $out
|
||||
depfile = $out.d
|
||||
rule link.{mode}
|
||||
@@ -881,7 +885,16 @@ with open(buildfile, 'w') as f:
|
||||
command = thrift -gen cpp:cob_style -out $builddir/{mode}/gen $in
|
||||
description = THRIFT $in
|
||||
rule antlr3.{mode}
|
||||
command = sed -e '/^#if 0/,/^#endif/d' $in > $builddir/{mode}/gen/$in && antlr3 $builddir/{mode}/gen/$in && sed -i 's/^\\( *\)\\(ImplTraits::CommonTokenType\\* [a-zA-Z0-9_]* = NULL;\\)$$/\\1const \\2/' build/{mode}/gen/${{stem}}Parser.cpp
|
||||
# We replace many local `ExceptionBaseType* ex` variables with a single function-scope one.
|
||||
# Because we add such a variable to every function, and because `ExceptionBaseType` is not a global
|
||||
# name, we also add a global typedef to avoid compilation errors.
|
||||
command = sed -e '/^#if 0/,/^#endif/d' $in > $builddir/{mode}/gen/$in $
|
||||
&& antlr3 $builddir/{mode}/gen/$in $
|
||||
&& sed -i -e 's/^\\( *\)\\(ImplTraits::CommonTokenType\\* [a-zA-Z0-9_]* = NULL;\\)$$/\\1const \\2/' $
|
||||
-e '1i using ExceptionBaseType = int;' $
|
||||
-e 's/^{{/{{ ExceptionBaseType\* ex = nullptr;/; $
|
||||
s/ExceptionBaseType\* ex = new/ex = new/' $
|
||||
build/{mode}/gen/${{stem}}Parser.cpp
|
||||
description = ANTLR3 $in
|
||||
''').format(mode = mode, **modeval))
|
||||
f.write('build {mode}: phony {artifacts}\n'.format(mode = mode,
|
||||
@@ -998,6 +1011,9 @@ with open(buildfile, 'w') as f:
|
||||
for cc in grammar.sources('$builddir/{}/gen'.format(mode)):
|
||||
obj = cc.replace('.cpp', '.o')
|
||||
f.write('build {}: cxx.{} {} || {}\n'.format(obj, mode, cc, ' '.join(serializers)))
|
||||
if cc.endswith('Parser.cpp') and has_sanitize_address_use_after_scope:
|
||||
# Parsers end up using huge amounts of stack space and overflowing their stack
|
||||
f.write(' obj_cxxflags = -fno-sanitize-address-use-after-scope\n')
|
||||
f.write('build seastar/build/{mode}/libseastar.a seastar/build/{mode}/apps/iotune/iotune seastar/build/{mode}/gen/http/request_parser.hh seastar/build/{mode}/gen/http/http_response_parser.hh: ninja {seastar_deps}\n'
|
||||
.format(**locals()))
|
||||
f.write(' pool = seastar_pool\n')
|
||||
|
||||
171
cql3/prepared_statements_cache.hh
Normal file
171
cql3/prepared_statements_cache.hh
Normal file
@@ -0,0 +1,171 @@
|
||||
/*
|
||||
* Copyright (C) 2017 ScyllaDB
|
||||
*
|
||||
* Modified by ScyllaDB
|
||||
*/
|
||||
|
||||
/*
|
||||
* This file is part of Scylla.
|
||||
*
|
||||
* Scylla is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Scylla is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with Scylla. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "utils/loading_cache.hh"
|
||||
#include "cql3/statements/prepared_statement.hh"
|
||||
|
||||
namespace cql3 {
|
||||
|
||||
using prepared_cache_entry = std::unique_ptr<statements::prepared_statement>;
|
||||
|
||||
struct prepared_cache_entry_size {
|
||||
size_t operator()(const prepared_cache_entry& val) {
|
||||
// TODO: improve the size approximation
|
||||
return 10000;
|
||||
}
|
||||
};
|
||||
|
||||
typedef bytes cql_prepared_id_type;
|
||||
typedef int32_t thrift_prepared_id_type;
|
||||
|
||||
/// \brief The key of the prepared statements cache
|
||||
///
|
||||
/// We are going to store the CQL and Thrift prepared statements in the same cache therefore we need generate the key
|
||||
/// that is going to be unique in both cases. Thrift use int32_t as a prepared statement ID, CQL - MD5 digest.
|
||||
///
|
||||
/// We are going to use an std::pair<CQL_PREP_ID_TYPE, int64_t> as a key. For CQL statements we will use {CQL_PREP_ID, std::numeric_limits<int64_t>::max()} as a key
|
||||
/// and for Thrift - {CQL_PREP_ID_TYPE(0), THRIFT_PREP_ID}. This way CQL and Thrift keys' values will never collide.
|
||||
class prepared_cache_key_type {
|
||||
public:
|
||||
using cache_key_type = std::pair<cql_prepared_id_type, int64_t>;
|
||||
|
||||
private:
|
||||
cache_key_type _key;
|
||||
|
||||
public:
|
||||
prepared_cache_key_type() = default;
|
||||
explicit prepared_cache_key_type(cql_prepared_id_type cql_id) : _key(std::move(cql_id), std::numeric_limits<int64_t>::max()) {}
|
||||
explicit prepared_cache_key_type(thrift_prepared_id_type thrift_id) : _key(cql_prepared_id_type(), thrift_id) {}
|
||||
|
||||
cache_key_type& key() { return _key; }
|
||||
const cache_key_type& key() const { return _key; }
|
||||
|
||||
static const cql_prepared_id_type& cql_id(const prepared_cache_key_type& key) {
|
||||
return key.key().first;
|
||||
}
|
||||
static thrift_prepared_id_type thrift_id(const prepared_cache_key_type& key) {
|
||||
return key.key().second;
|
||||
}
|
||||
};
|
||||
|
||||
class prepared_statements_cache {
|
||||
public:
|
||||
struct stats {
|
||||
uint64_t prepared_cache_evictions = 0;
|
||||
};
|
||||
|
||||
static stats& shard_stats() {
|
||||
static thread_local stats _stats;
|
||||
return _stats;
|
||||
}
|
||||
|
||||
struct prepared_cache_stats_updater {
|
||||
static void inc_hits() noexcept {}
|
||||
static void inc_misses() noexcept {}
|
||||
static void inc_blocks() noexcept {}
|
||||
static void inc_evictions() noexcept {
|
||||
++shard_stats().prepared_cache_evictions;
|
||||
}
|
||||
};
|
||||
|
||||
private:
|
||||
using cache_key_type = typename prepared_cache_key_type::cache_key_type;
|
||||
using cache_type = utils::loading_cache<cache_key_type, prepared_cache_entry, utils::loading_cache_reload_enabled::no, prepared_cache_entry_size, utils::tuple_hash, std::equal_to<cache_key_type>, prepared_cache_stats_updater>;
|
||||
using cache_value_ptr = typename cache_type::value_ptr;
|
||||
using cache_iterator = typename cache_type::iterator;
|
||||
using checked_weak_ptr = typename statements::prepared_statement::checked_weak_ptr;
|
||||
struct value_extractor_fn {
|
||||
checked_weak_ptr operator()(prepared_cache_entry& e) const {
|
||||
return e->checked_weak_from_this();
|
||||
}
|
||||
};
|
||||
|
||||
static const std::chrono::minutes entry_expiry;
|
||||
|
||||
public:
|
||||
using key_type = prepared_cache_key_type;
|
||||
using value_type = checked_weak_ptr;
|
||||
using statement_is_too_big = typename cache_type::entry_is_too_big;
|
||||
/// \note both iterator::reference and iterator::value_type are checked_weak_ptr
|
||||
using iterator = boost::transform_iterator<value_extractor_fn, cache_iterator>;
|
||||
|
||||
private:
|
||||
cache_type _cache;
|
||||
value_extractor_fn _value_extractor_fn;
|
||||
|
||||
public:
|
||||
prepared_statements_cache(logging::logger& logger)
|
||||
: _cache(memory::stats().total_memory() / 256, entry_expiry, logger)
|
||||
{}
|
||||
|
||||
template <typename LoadFunc>
|
||||
future<value_type> get(const key_type& key, LoadFunc&& load) {
|
||||
return _cache.get_ptr(key.key(), [load = std::forward<LoadFunc>(load)] (const cache_key_type&) { return load(); }).then([] (cache_value_ptr v_ptr) {
|
||||
return make_ready_future<value_type>((*v_ptr)->checked_weak_from_this());
|
||||
});
|
||||
}
|
||||
|
||||
iterator find(const key_type& key) {
|
||||
return boost::make_transform_iterator(_cache.find(key.key()), _value_extractor_fn);
|
||||
}
|
||||
|
||||
iterator end() {
|
||||
return boost::make_transform_iterator(_cache.end(), _value_extractor_fn);
|
||||
}
|
||||
|
||||
iterator begin() {
|
||||
return boost::make_transform_iterator(_cache.begin(), _value_extractor_fn);
|
||||
}
|
||||
|
||||
template <typename Pred>
|
||||
void remove_if(Pred&& pred) {
|
||||
static_assert(std::is_same<bool, std::result_of_t<Pred(::shared_ptr<cql_statement>)>>::value, "Bad Pred signature");
|
||||
|
||||
_cache.remove_if([&pred] (const prepared_cache_entry& e) {
|
||||
return pred(e->statement);
|
||||
});
|
||||
}
|
||||
|
||||
size_t size() const {
|
||||
return _cache.size();
|
||||
}
|
||||
|
||||
size_t memory_footprint() const {
|
||||
return _cache.memory_footprint();
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
namespace std { // for prepared_statements_cache log printouts
|
||||
inline std::ostream& operator<<(std::ostream& os, const typename cql3::prepared_cache_key_type::cache_key_type& p) {
|
||||
os << "{cql_id: " << p.first << ", thrift_id: " << p.second << "}";
|
||||
return os;
|
||||
}
|
||||
|
||||
inline std::ostream& operator<<(std::ostream& os, const cql3::prepared_cache_key_type& p) {
|
||||
os << p.key();
|
||||
return os;
|
||||
}
|
||||
}
|
||||
@@ -57,11 +57,14 @@ using namespace statements;
|
||||
using namespace cql_transport::messages;
|
||||
|
||||
logging::logger log("query_processor");
|
||||
logging::logger prep_cache_log("prepared_statements_cache");
|
||||
|
||||
distributed<query_processor> _the_query_processor;
|
||||
|
||||
const sstring query_processor::CQL_VERSION = "3.3.1";
|
||||
|
||||
const std::chrono::minutes prepared_statements_cache::entry_expiry = std::chrono::minutes(60);
|
||||
|
||||
class query_processor::internal_state {
|
||||
service::query_state _qs;
|
||||
public:
|
||||
@@ -95,6 +98,7 @@ query_processor::query_processor(distributed<service::storage_proxy>& proxy,
|
||||
, _proxy(proxy)
|
||||
, _db(db)
|
||||
, _internal_state(new internal_state())
|
||||
, _prepared_cache(prep_cache_log)
|
||||
{
|
||||
namespace sm = seastar::metrics;
|
||||
|
||||
@@ -130,6 +134,15 @@ query_processor::query_processor(distributed<service::storage_proxy>& proxy,
|
||||
|
||||
sm::make_derive("batches_unlogged_from_logged", _cql_stats.batches_unlogged_from_logged,
|
||||
sm::description("Counts a total number of LOGGED batches that were executed as UNLOGGED batches.")),
|
||||
|
||||
sm::make_derive("prepared_cache_evictions", [] { return prepared_statements_cache::shard_stats().prepared_cache_evictions; },
|
||||
sm::description("Counts a number of prepared statements cache entries evictions.")),
|
||||
|
||||
sm::make_gauge("prepared_cache_size", [this] { return _prepared_cache.size(); },
|
||||
sm::description("A number of entries in the prepared statements cache.")),
|
||||
|
||||
sm::make_gauge("prepared_cache_memory_footprint", [this] { return _prepared_cache.memory_footprint(); },
|
||||
sm::description("Size (in bytes) of the prepared statements cache.")),
|
||||
});
|
||||
|
||||
service::get_local_migration_manager().register_listener(_migration_subscriber.get());
|
||||
@@ -197,31 +210,21 @@ query_processor::process_statement(::shared_ptr<cql_statement> statement,
|
||||
}
|
||||
|
||||
future<::shared_ptr<cql_transport::messages::result_message::prepared>>
|
||||
query_processor::prepare(const std::experimental::string_view& query_string, service::query_state& query_state)
|
||||
query_processor::prepare(sstring query_string, service::query_state& query_state)
|
||||
{
|
||||
auto& client_state = query_state.get_client_state();
|
||||
return prepare(query_string, client_state, client_state.is_thrift());
|
||||
return prepare(std::move(query_string), client_state, client_state.is_thrift());
|
||||
}
|
||||
|
||||
future<::shared_ptr<cql_transport::messages::result_message::prepared>>
|
||||
query_processor::prepare(const std::experimental::string_view& query_string,
|
||||
const service::client_state& client_state,
|
||||
bool for_thrift)
|
||||
query_processor::prepare(sstring query_string, const service::client_state& client_state, bool for_thrift)
|
||||
{
|
||||
auto existing = get_stored_prepared_statement(query_string, client_state.get_raw_keyspace(), for_thrift);
|
||||
if (existing) {
|
||||
return make_ready_future<::shared_ptr<cql_transport::messages::result_message::prepared>>(existing);
|
||||
using namespace cql_transport::messages;
|
||||
if (for_thrift) {
|
||||
return prepare_one<result_message::prepared::thrift>(std::move(query_string), client_state, compute_thrift_id, prepared_cache_key_type::thrift_id);
|
||||
} else {
|
||||
return prepare_one<result_message::prepared::cql>(std::move(query_string), client_state, compute_id, prepared_cache_key_type::cql_id);
|
||||
}
|
||||
|
||||
return futurize<::shared_ptr<cql_transport::messages::result_message::prepared>>::apply([this, &query_string, &client_state, for_thrift] {
|
||||
auto prepared = get_statement(query_string, client_state);
|
||||
auto bound_terms = prepared->statement->get_bound_terms();
|
||||
if (bound_terms > std::numeric_limits<uint16_t>::max()) {
|
||||
throw exceptions::invalid_request_exception(sprint("Too many markers(?). %d markers exceed the allowed maximum of %d", bound_terms, std::numeric_limits<uint16_t>::max()));
|
||||
}
|
||||
assert(bound_terms == prepared->bound_names.size());
|
||||
return store_prepared_statement(query_string, client_state.get_raw_keyspace(), std::move(prepared), for_thrift);
|
||||
});
|
||||
}
|
||||
|
||||
::shared_ptr<cql_transport::messages::result_message::prepared>
|
||||
@@ -229,50 +232,11 @@ query_processor::get_stored_prepared_statement(const std::experimental::string_v
|
||||
const sstring& keyspace,
|
||||
bool for_thrift)
|
||||
{
|
||||
using namespace cql_transport::messages;
|
||||
if (for_thrift) {
|
||||
auto statement_id = compute_thrift_id(query_string, keyspace);
|
||||
auto it = _thrift_prepared_statements.find(statement_id);
|
||||
if (it == _thrift_prepared_statements.end()) {
|
||||
return ::shared_ptr<result_message::prepared>();
|
||||
}
|
||||
return ::make_shared<result_message::prepared::thrift>(statement_id, it->second->checked_weak_from_this());
|
||||
return get_stored_prepared_statement_one<result_message::prepared::thrift>(query_string, keyspace, compute_thrift_id, prepared_cache_key_type::thrift_id);
|
||||
} else {
|
||||
auto statement_id = compute_id(query_string, keyspace);
|
||||
auto it = _prepared_statements.find(statement_id);
|
||||
if (it == _prepared_statements.end()) {
|
||||
return ::shared_ptr<result_message::prepared>();
|
||||
}
|
||||
return ::make_shared<result_message::prepared::cql>(statement_id, it->second->checked_weak_from_this());
|
||||
}
|
||||
}
|
||||
|
||||
future<::shared_ptr<cql_transport::messages::result_message::prepared>>
|
||||
query_processor::store_prepared_statement(const std::experimental::string_view& query_string,
|
||||
const sstring& keyspace,
|
||||
std::unique_ptr<statements::prepared_statement> prepared,
|
||||
bool for_thrift)
|
||||
{
|
||||
#if 0
|
||||
// Concatenate the current keyspace so we don't mix prepared statements between keyspace (#5352).
|
||||
// (if the keyspace is null, queryString has to have a fully-qualified keyspace so it's fine.
|
||||
long statementSize = measure(prepared.statement);
|
||||
// don't execute the statement if it's bigger than the allowed threshold
|
||||
if (statementSize > MAX_CACHE_PREPARED_MEMORY)
|
||||
throw new InvalidRequestException(String.format("Prepared statement of size %d bytes is larger than allowed maximum of %d bytes.",
|
||||
statementSize,
|
||||
MAX_CACHE_PREPARED_MEMORY));
|
||||
#endif
|
||||
prepared->raw_cql_statement = query_string.data();
|
||||
if (for_thrift) {
|
||||
auto statement_id = compute_thrift_id(query_string, keyspace);
|
||||
auto msg = ::make_shared<result_message::prepared::thrift>(statement_id, prepared->checked_weak_from_this());
|
||||
_thrift_prepared_statements.emplace(statement_id, std::move(prepared));
|
||||
return make_ready_future<::shared_ptr<result_message::prepared>>(std::move(msg));
|
||||
} else {
|
||||
auto statement_id = compute_id(query_string, keyspace);
|
||||
auto msg = ::make_shared<result_message::prepared::cql>(statement_id, prepared->checked_weak_from_this());
|
||||
_prepared_statements.emplace(statement_id, std::move(prepared));
|
||||
return make_ready_future<::shared_ptr<result_message::prepared>>(std::move(msg));
|
||||
return get_stored_prepared_statement_one<result_message::prepared::cql>(query_string, keyspace, compute_id, prepared_cache_key_type::cql_id);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -289,19 +253,19 @@ static sstring hash_target(const std::experimental::string_view& query_string, c
|
||||
return keyspace + query_string.to_string();
|
||||
}
|
||||
|
||||
bytes query_processor::compute_id(const std::experimental::string_view& query_string, const sstring& keyspace)
|
||||
prepared_cache_key_type query_processor::compute_id(const std::experimental::string_view& query_string, const sstring& keyspace)
|
||||
{
|
||||
return md5_calculate(hash_target(query_string, keyspace));
|
||||
return prepared_cache_key_type(md5_calculate(hash_target(query_string, keyspace)));
|
||||
}
|
||||
|
||||
int32_t query_processor::compute_thrift_id(const std::experimental::string_view& query_string, const sstring& keyspace)
|
||||
prepared_cache_key_type query_processor::compute_thrift_id(const std::experimental::string_view& query_string, const sstring& keyspace)
|
||||
{
|
||||
auto target = hash_target(query_string, keyspace);
|
||||
uint32_t h = 0;
|
||||
for (auto&& c : hash_target(query_string, keyspace)) {
|
||||
h = 31*h + c;
|
||||
}
|
||||
return static_cast<int32_t>(h);
|
||||
return prepared_cache_key_type(static_cast<int32_t>(h));
|
||||
}
|
||||
|
||||
std::unique_ptr<prepared_statement>
|
||||
@@ -527,7 +491,7 @@ void query_processor::migration_subscriber::on_drop_view(const sstring& ks_name,
|
||||
|
||||
void query_processor::migration_subscriber::remove_invalid_prepared_statements(sstring ks_name, std::experimental::optional<sstring> cf_name)
|
||||
{
|
||||
_qp->invalidate_prepared_statements([&] (::shared_ptr<cql_statement> stmt) {
|
||||
_qp->_prepared_cache.remove_if([&] (::shared_ptr<cql_statement> stmt) {
|
||||
return this->should_invalidate(ks_name, cf_name, stmt);
|
||||
});
|
||||
}
|
||||
|
||||
@@ -57,6 +57,7 @@
|
||||
#include "statements/prepared_statement.hh"
|
||||
#include "transport/messages/result_message.hh"
|
||||
#include "untyped_result_set.hh"
|
||||
#include "prepared_statements_cache.hh"
|
||||
|
||||
namespace cql3 {
|
||||
|
||||
@@ -64,9 +65,32 @@ namespace statements {
|
||||
class batch_statement;
|
||||
}
|
||||
|
||||
class prepared_statement_is_too_big : public std::exception {
|
||||
public:
|
||||
static constexpr int max_query_prefix = 100;
|
||||
|
||||
private:
|
||||
sstring _msg;
|
||||
|
||||
public:
|
||||
prepared_statement_is_too_big(const sstring& query_string)
|
||||
: _msg(seastar::format("Prepared statement is too big: {}", query_string.substr(0, max_query_prefix)))
|
||||
{
|
||||
// mark that we clipped the query string
|
||||
if (query_string.size() > max_query_prefix) {
|
||||
_msg += "...";
|
||||
}
|
||||
}
|
||||
|
||||
virtual const char* what() const noexcept override {
|
||||
return _msg.c_str();
|
||||
}
|
||||
};
|
||||
|
||||
class query_processor {
|
||||
public:
|
||||
class migration_subscriber;
|
||||
|
||||
private:
|
||||
std::unique_ptr<migration_subscriber> _migration_subscriber;
|
||||
distributed<service::storage_proxy>& _proxy;
|
||||
@@ -127,9 +151,7 @@ private:
|
||||
}
|
||||
};
|
||||
#endif
|
||||
|
||||
std::unordered_map<bytes, std::unique_ptr<statements::prepared_statement>> _prepared_statements;
|
||||
std::unordered_map<int32_t, std::unique_ptr<statements::prepared_statement>> _thrift_prepared_statements;
|
||||
prepared_statements_cache _prepared_cache;
|
||||
std::unordered_map<sstring, std::unique_ptr<statements::prepared_statement>> _internal_statements;
|
||||
#if 0
|
||||
|
||||
@@ -221,21 +243,14 @@ private:
|
||||
}
|
||||
#endif
|
||||
public:
|
||||
statements::prepared_statement::checked_weak_ptr get_prepared(const bytes& id) {
|
||||
auto it = _prepared_statements.find(id);
|
||||
if (it == _prepared_statements.end()) {
|
||||
statements::prepared_statement::checked_weak_ptr get_prepared(const prepared_cache_key_type& key) {
|
||||
auto it = _prepared_cache.find(key);
|
||||
if (it == _prepared_cache.end()) {
|
||||
return statements::prepared_statement::checked_weak_ptr();
|
||||
}
|
||||
return it->second->checked_weak_from_this();
|
||||
return *it;
|
||||
}
|
||||
|
||||
statements::prepared_statement::checked_weak_ptr get_prepared_for_thrift(int32_t id) {
|
||||
auto it = _thrift_prepared_statements.find(id);
|
||||
if (it == _thrift_prepared_statements.end()) {
|
||||
return statements::prepared_statement::checked_weak_ptr();
|
||||
}
|
||||
return it->second->checked_weak_from_this();
|
||||
}
|
||||
#if 0
|
||||
public static void validateKey(ByteBuffer key) throws InvalidRequestException
|
||||
{
|
||||
@@ -435,42 +450,61 @@ public:
|
||||
#endif
|
||||
|
||||
future<::shared_ptr<cql_transport::messages::result_message::prepared>>
|
||||
prepare(const std::experimental::string_view& query_string, service::query_state& query_state);
|
||||
prepare(sstring query_string, service::query_state& query_state);
|
||||
|
||||
future<::shared_ptr<cql_transport::messages::result_message::prepared>>
|
||||
prepare(const std::experimental::string_view& query_string, const service::client_state& client_state, bool for_thrift);
|
||||
prepare(sstring query_string, const service::client_state& client_state, bool for_thrift);
|
||||
|
||||
static bytes compute_id(const std::experimental::string_view& query_string, const sstring& keyspace);
|
||||
static int32_t compute_thrift_id(const std::experimental::string_view& query_string, const sstring& keyspace);
|
||||
static prepared_cache_key_type compute_id(const std::experimental::string_view& query_string, const sstring& keyspace);
|
||||
static prepared_cache_key_type compute_thrift_id(const std::experimental::string_view& query_string, const sstring& keyspace);
|
||||
|
||||
private:
|
||||
///
|
||||
/// \tparam ResultMsgType type of the returned result message (CQL or Thrift)
|
||||
/// \tparam PreparedKeyGenerator a function that generates the prepared statement cache key for given query and keyspace
|
||||
/// \tparam IdGetter a function that returns the corresponding prepared statement ID (CQL or Thrift) for a given prepared statement cache key
|
||||
/// \param query_string
|
||||
/// \param client_state
|
||||
/// \param id_gen prepared ID generator, called before the first deferring
|
||||
/// \param id_getter prepared ID getter, passed to deferred context by reference. The caller must ensure its liveness.
|
||||
/// \return
|
||||
template <typename ResultMsgType, typename PreparedKeyGenerator, typename IdGetter>
|
||||
future<::shared_ptr<cql_transport::messages::result_message::prepared>>
|
||||
prepare_one(sstring query_string, const service::client_state& client_state, PreparedKeyGenerator&& id_gen, IdGetter&& id_getter) {
|
||||
return do_with(id_gen(query_string, client_state.get_raw_keyspace()), std::move(query_string), [this, &client_state, &id_getter] (const prepared_cache_key_type& key, const sstring& query_string) {
|
||||
return _prepared_cache.get(key, [this, &query_string, &client_state] {
|
||||
auto prepared = get_statement(query_string, client_state);
|
||||
auto bound_terms = prepared->statement->get_bound_terms();
|
||||
if (bound_terms > std::numeric_limits<uint16_t>::max()) {
|
||||
throw exceptions::invalid_request_exception(sprint("Too many markers(?). %d markers exceed the allowed maximum of %d", bound_terms, std::numeric_limits<uint16_t>::max()));
|
||||
}
|
||||
assert(bound_terms == prepared->bound_names.size());
|
||||
prepared->raw_cql_statement = query_string;
|
||||
return make_ready_future<std::unique_ptr<statements::prepared_statement>>(std::move(prepared));
|
||||
}).then([&key, &id_getter] (auto prep_ptr) {
|
||||
return make_ready_future<::shared_ptr<cql_transport::messages::result_message::prepared>>(::make_shared<ResultMsgType>(id_getter(key), std::move(prep_ptr)));
|
||||
}).handle_exception_type([&query_string] (typename prepared_statements_cache::statement_is_too_big&) {
|
||||
return make_exception_future<::shared_ptr<cql_transport::messages::result_message::prepared>>(prepared_statement_is_too_big(query_string));
|
||||
});
|
||||
});
|
||||
};
|
||||
|
||||
template <typename ResultMsgType, typename KeyGenerator, typename IdGetter>
|
||||
::shared_ptr<cql_transport::messages::result_message::prepared>
|
||||
get_stored_prepared_statement_one(const std::experimental::string_view& query_string, const sstring& keyspace, KeyGenerator&& key_gen, IdGetter&& id_getter)
|
||||
{
|
||||
auto cache_key = key_gen(query_string, keyspace);
|
||||
auto it = _prepared_cache.find(cache_key);
|
||||
if (it == _prepared_cache.end()) {
|
||||
return ::shared_ptr<cql_transport::messages::result_message::prepared>();
|
||||
}
|
||||
|
||||
return ::make_shared<ResultMsgType>(id_getter(cache_key), *it);
|
||||
}
|
||||
|
||||
::shared_ptr<cql_transport::messages::result_message::prepared>
|
||||
get_stored_prepared_statement(const std::experimental::string_view& query_string, const sstring& keyspace, bool for_thrift);
|
||||
|
||||
future<::shared_ptr<cql_transport::messages::result_message::prepared>>
|
||||
store_prepared_statement(const std::experimental::string_view& query_string, const sstring& keyspace, std::unique_ptr<statements::prepared_statement> prepared, bool for_thrift);
|
||||
|
||||
// Erases the statements for which filter returns true.
|
||||
template <typename Pred>
|
||||
void invalidate_prepared_statements(Pred filter) {
|
||||
static_assert(std::is_same<bool, std::result_of_t<Pred(::shared_ptr<cql_statement>)>>::value,
|
||||
"bad Pred signature");
|
||||
for (auto it = _prepared_statements.begin(); it != _prepared_statements.end(); ) {
|
||||
if (filter(it->second->statement)) {
|
||||
it = _prepared_statements.erase(it);
|
||||
} else {
|
||||
++it;
|
||||
}
|
||||
}
|
||||
for (auto it = _thrift_prepared_statements.begin(); it != _thrift_prepared_statements.end(); ) {
|
||||
if (filter(it->second->statement)) {
|
||||
it = _thrift_prepared_statements.erase(it);
|
||||
} else {
|
||||
++it;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#if 0
|
||||
public ResultMessage processPrepared(CQLStatement statement, QueryState queryState, QueryOptions options)
|
||||
throws RequestExecutionException, RequestValidationException
|
||||
|
||||
@@ -75,7 +75,7 @@ cql3::statements::create_user_statement::execute(distributed<service::storage_pr
|
||||
throw exceptions::invalid_request_exception(sprint("User %s already exists", _username));
|
||||
}
|
||||
if (exists && _if_not_exists) {
|
||||
make_ready_future<::shared_ptr<cql_transport::messages::result_message>>();
|
||||
return make_ready_future<::shared_ptr<cql_transport::messages::result_message>>();
|
||||
}
|
||||
return auth::authenticator::get().create(_username, _opts->options()).then([this] {
|
||||
return auth::auth::insert_user(_username, _superuser).then([] {
|
||||
|
||||
@@ -886,7 +886,8 @@ column_family::seal_active_streaming_memtable_immediate() {
|
||||
return old->clear_gently();
|
||||
}
|
||||
});
|
||||
}).handle_exception([old] (auto ep) {
|
||||
}).handle_exception([old, newtab] (auto ep) {
|
||||
newtab->mark_for_deletion();
|
||||
dblog.error("failed to write streamed sstable: {}", ep);
|
||||
return make_exception_future<>(ep);
|
||||
});
|
||||
@@ -924,7 +925,8 @@ future<> column_family::seal_active_streaming_memtable_big(streaming_memtable_bi
|
||||
auto&& priority = service::get_local_streaming_write_priority();
|
||||
return write_memtable_to_sstable(*old, newtab, incremental_backups_enabled(), priority, true, _config.background_writer_scheduling_group).then([this, newtab, old, &smb] {
|
||||
smb.sstables.emplace_back(newtab);
|
||||
}).handle_exception([] (auto ep) {
|
||||
}).handle_exception([newtab] (auto ep) {
|
||||
newtab->mark_for_deletion();
|
||||
dblog.error("failed to write streamed sstable: {}", ep);
|
||||
return make_exception_future<>(ep);
|
||||
});
|
||||
|
||||
@@ -64,8 +64,11 @@
|
||||
#include "db/config.hh"
|
||||
#include "md5_hasher.hh"
|
||||
|
||||
#include <seastar/util/noncopyable_function.hh>
|
||||
|
||||
#include <boost/algorithm/string/predicate.hpp>
|
||||
#include <boost/range/algorithm/copy.hpp>
|
||||
#include <boost/range/algorithm/transform.hpp>
|
||||
#include <boost/range/adaptor/map.hpp>
|
||||
#include <boost/range/join.hpp>
|
||||
|
||||
@@ -126,7 +129,11 @@ static void merge_tables_and_views(distributed<service::storage_proxy>& proxy,
|
||||
std::map<qualified_name, schema_mutations>&& views_before,
|
||||
std::map<qualified_name, schema_mutations>&& views_after);
|
||||
|
||||
static void merge_types(distributed<service::storage_proxy>& proxy,
|
||||
struct user_types_to_drop final {
|
||||
seastar::noncopyable_function<void()> drop;
|
||||
};
|
||||
|
||||
static user_types_to_drop merge_types(distributed<service::storage_proxy>& proxy,
|
||||
schema_result&& before,
|
||||
schema_result&& after);
|
||||
|
||||
@@ -832,7 +839,7 @@ static future<> do_merge_schema(distributed<service::storage_proxy>& proxy, std:
|
||||
#endif
|
||||
|
||||
std::set<sstring> keyspaces_to_drop = merge_keyspaces(proxy, std::move(old_keyspaces), std::move(new_keyspaces)).get0();
|
||||
merge_types(proxy, std::move(old_types), std::move(new_types));
|
||||
auto types_to_drop = merge_types(proxy, std::move(old_types), std::move(new_types));
|
||||
merge_tables_and_views(proxy,
|
||||
std::move(old_column_families), std::move(new_column_families),
|
||||
std::move(old_views), std::move(new_views));
|
||||
@@ -840,6 +847,8 @@ static future<> do_merge_schema(distributed<service::storage_proxy>& proxy, std:
|
||||
mergeFunctions(oldFunctions, newFunctions);
|
||||
mergeAggregates(oldAggregates, newAggregates);
|
||||
#endif
|
||||
types_to_drop.drop();
|
||||
|
||||
proxy.local().get_db().invoke_on_all([keyspaces_to_drop = std::move(keyspaces_to_drop)] (database& db) {
|
||||
// it is safe to drop a keyspace only when all nested ColumnFamilies where deleted
|
||||
return do_for_each(keyspaces_to_drop, [&db] (auto keyspace_to_drop) {
|
||||
@@ -996,30 +1005,37 @@ static void merge_tables_and_views(distributed<service::storage_proxy>& proxy,
|
||||
}).get();
|
||||
}
|
||||
|
||||
static inline void collect_types(std::set<sstring>& keys, schema_result& result, std::vector<user_type>& to)
|
||||
struct naked_user_type {
|
||||
const sstring keyspace;
|
||||
const sstring qualified_name;
|
||||
};
|
||||
|
||||
static inline void collect_types(std::set<sstring>& keys, schema_result& result, std::vector<naked_user_type>& to)
|
||||
{
|
||||
for (auto&& key : keys) {
|
||||
auto&& value = result[key];
|
||||
auto types = create_types_from_schema_partition(schema_result_value_type{key, std::move(value)});
|
||||
std::move(types.begin(), types.end(), std::back_inserter(to));
|
||||
boost::transform(types, std::back_inserter(to), [] (user_type type) {
|
||||
return naked_user_type{std::move(type->_keyspace), std::move(type->name())};
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// see the comments for merge_keyspaces()
|
||||
static void merge_types(distributed<service::storage_proxy>& proxy, schema_result&& before, schema_result&& after)
|
||||
// see the comments for merge_keyspaces()
|
||||
static user_types_to_drop merge_types(distributed<service::storage_proxy>& proxy, schema_result&& before, schema_result&& after)
|
||||
{
|
||||
std::vector<user_type> created, altered, dropped;
|
||||
std::vector<naked_user_type> created, altered, dropped;
|
||||
|
||||
auto diff = difference(before, after, indirect_equal_to<lw_shared_ptr<query::result_set>>());
|
||||
|
||||
collect_types(diff.entries_only_on_left, before, dropped); // Keyspaces with no more types
|
||||
collect_types(diff.entries_only_on_right, after, created); // New keyspaces with types
|
||||
|
||||
for (auto&& key : diff.entries_differing) {
|
||||
for (auto&& keyspace : diff.entries_differing) {
|
||||
// The user types of this keyspace differ, so diff the current types with the updated ones
|
||||
auto current_types = proxy.local().get_db().local().find_keyspace(key).metadata()->user_types()->get_all_types();
|
||||
auto current_types = proxy.local().get_db().local().find_keyspace(keyspace).metadata()->user_types()->get_all_types();
|
||||
decltype(current_types) updated_types;
|
||||
auto ts = create_types_from_schema_partition(schema_result_value_type{key, std::move(after[key])});
|
||||
auto ts = create_types_from_schema_partition(schema_result_value_type{keyspace, std::move(after[keyspace])});
|
||||
updated_types.reserve(ts.size());
|
||||
for (auto&& type : ts) {
|
||||
updated_types[type->_name] = std::move(type);
|
||||
@@ -1027,36 +1043,46 @@ static void merge_types(distributed<service::storage_proxy>& proxy, schema_resul
|
||||
|
||||
auto delta = difference(current_types, updated_types, indirect_equal_to<user_type>());
|
||||
|
||||
for (auto&& key : delta.entries_only_on_left) {
|
||||
dropped.emplace_back(current_types[key]);
|
||||
for (auto&& type_name : delta.entries_only_on_left) {
|
||||
dropped.emplace_back(naked_user_type{keyspace, current_types[type_name]->name()});
|
||||
}
|
||||
for (auto&& key : delta.entries_only_on_right) {
|
||||
created.emplace_back(std::move(updated_types[key]));
|
||||
for (auto&& type_name : delta.entries_only_on_right) {
|
||||
created.emplace_back(naked_user_type{keyspace, updated_types[type_name]->name()});
|
||||
}
|
||||
for (auto&& key : delta.entries_differing) {
|
||||
altered.emplace_back(std::move(updated_types[key]));
|
||||
for (auto&& type_name : delta.entries_differing) {
|
||||
altered.emplace_back(naked_user_type{keyspace, updated_types[type_name]->name()});
|
||||
}
|
||||
}
|
||||
|
||||
proxy.local().get_db().invoke_on_all([&created, &dropped, &altered] (database& db) {
|
||||
// Create and update user types before any tables/views are created that potentially
|
||||
// use those types. Similarly, defer dropping until after tables/views that may use
|
||||
// some of these user types are dropped.
|
||||
|
||||
proxy.local().get_db().invoke_on_all([&created, &altered] (database& db) {
|
||||
return seastar::async([&] {
|
||||
for (auto&& type : created) {
|
||||
auto user_type = dynamic_pointer_cast<const user_type_impl>(parse_type(type->name()));
|
||||
auto user_type = dynamic_pointer_cast<const user_type_impl>(parse_type(type.qualified_name));
|
||||
db.find_keyspace(user_type->_keyspace).add_user_type(user_type);
|
||||
service::get_local_migration_manager().notify_create_user_type(user_type).get();
|
||||
}
|
||||
for (auto&& type : dropped) {
|
||||
auto user_type = dynamic_pointer_cast<const user_type_impl>(parse_type(type->name()));
|
||||
db.find_keyspace(user_type->_keyspace).remove_user_type(user_type);
|
||||
service::get_local_migration_manager().notify_drop_user_type(user_type).get();
|
||||
}
|
||||
for (auto&& type : altered) {
|
||||
auto user_type = dynamic_pointer_cast<const user_type_impl>(parse_type(type->name()));
|
||||
auto user_type = dynamic_pointer_cast<const user_type_impl>(parse_type(type.qualified_name));
|
||||
db.find_keyspace(user_type->_keyspace).add_user_type(user_type);
|
||||
service::get_local_migration_manager().notify_update_user_type(user_type).get();
|
||||
}
|
||||
});
|
||||
}).get();
|
||||
|
||||
return user_types_to_drop{[&proxy, dropped = std::move(dropped)] {
|
||||
proxy.local().get_db().invoke_on_all([dropped = std::move(dropped)](database& db) {
|
||||
return do_for_each(dropped, [&db](auto& user_type_to_drop) {
|
||||
auto user_type = dynamic_pointer_cast<const user_type_impl>(
|
||||
parse_type(std::move(user_type_to_drop.qualified_name)));
|
||||
db.find_keyspace(user_type->_keyspace).remove_user_type(user_type);
|
||||
return service::get_local_migration_manager().notify_drop_user_type(user_type);
|
||||
});
|
||||
}).get();
|
||||
}};
|
||||
}
|
||||
|
||||
#if 0
|
||||
|
||||
@@ -59,14 +59,11 @@ future<> boot_strapper::bootstrap() {
|
||||
streamer->add_ranges(keyspace_name, ranges);
|
||||
}
|
||||
|
||||
return streamer->fetch_async().then_wrapped([streamer] (auto&& f) {
|
||||
try {
|
||||
auto state = f.get0();
|
||||
} catch (...) {
|
||||
throw std::runtime_error(sprint("Error during boostrap: %s", std::current_exception()));
|
||||
}
|
||||
return streamer->stream_async().then([streamer] () {
|
||||
service::get_local_storage_service().finish_bootstrapping();
|
||||
return make_ready_future<>();
|
||||
}).handle_exception([streamer] (std::exception_ptr eptr) {
|
||||
blogger.warn("Eror during bootstrap: {}", eptr);
|
||||
return make_exception_future<>(std::move(eptr));
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
@@ -210,7 +210,36 @@ bool range_streamer::use_strict_sources_for_ranges(const sstring& keyspace_name)
|
||||
&& _metadata.get_all_endpoints().size() != strat.get_replication_factor();
|
||||
}
|
||||
|
||||
void range_streamer::add_tx_ranges(const sstring& keyspace_name, std::unordered_map<inet_address, dht::token_range_vector> ranges_per_endpoint, std::vector<sstring> column_families) {
|
||||
if (_nr_rx_added) {
|
||||
throw std::runtime_error("Mixed sending and receiving is not supported");
|
||||
}
|
||||
_nr_tx_added++;
|
||||
_to_stream.emplace(keyspace_name, std::move(ranges_per_endpoint));
|
||||
auto inserted = _column_families.emplace(keyspace_name, std::move(column_families)).second;
|
||||
if (!inserted) {
|
||||
throw std::runtime_error("Can not add column_families for the same keyspace more than once");
|
||||
}
|
||||
}
|
||||
|
||||
void range_streamer::add_rx_ranges(const sstring& keyspace_name, std::unordered_map<inet_address, dht::token_range_vector> ranges_per_endpoint, std::vector<sstring> column_families) {
|
||||
if (_nr_tx_added) {
|
||||
throw std::runtime_error("Mixed sending and receiving is not supported");
|
||||
}
|
||||
_nr_rx_added++;
|
||||
_to_stream.emplace(keyspace_name, std::move(ranges_per_endpoint));
|
||||
auto inserted = _column_families.emplace(keyspace_name, std::move(column_families)).second;
|
||||
if (!inserted) {
|
||||
throw std::runtime_error("Can not add column_families for the same keyspace more than once");
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: This is the legacy range_streamer interface, it is add_rx_ranges which adds rx ranges.
|
||||
void range_streamer::add_ranges(const sstring& keyspace_name, dht::token_range_vector ranges) {
|
||||
if (_nr_tx_added) {
|
||||
throw std::runtime_error("Mixed sending and receiving is not supported");
|
||||
}
|
||||
_nr_rx_added++;
|
||||
auto ranges_for_keyspace = use_strict_sources_for_ranges(keyspace_name)
|
||||
? get_all_ranges_with_strict_sources_for(keyspace_name, ranges)
|
||||
: get_all_ranges_with_sources_for(keyspace_name, ranges);
|
||||
@@ -231,26 +260,114 @@ void range_streamer::add_ranges(const sstring& keyspace_name, dht::token_range_v
|
||||
logger.debug("{} : range {} from source {} for keyspace {}", _description, x.second, x.first, keyspace_name);
|
||||
}
|
||||
}
|
||||
_to_fetch.emplace(keyspace_name, std::move(range_fetch_map));
|
||||
_to_stream.emplace(keyspace_name, std::move(range_fetch_map));
|
||||
}
|
||||
|
||||
future<streaming::stream_state> range_streamer::fetch_async() {
|
||||
for (auto& fetch : _to_fetch) {
|
||||
const auto& keyspace = fetch.first;
|
||||
for (auto& x : fetch.second) {
|
||||
auto& source = x.first;
|
||||
auto& ranges = x.second;
|
||||
/* Send messages to respective folks to stream data over to me */
|
||||
if (logger.is_enabled(logging::log_level::debug)) {
|
||||
logger.debug("{}ing from {} ranges {}", _description, source, ranges);
|
||||
future<> range_streamer::stream_async() {
|
||||
return seastar::async([this] {
|
||||
int sleep_time = 60;
|
||||
for (;;) {
|
||||
try {
|
||||
do_stream_async().get();
|
||||
break;
|
||||
} catch (...) {
|
||||
logger.warn("{} failed to stream. Will retry in {} seconds ...", _description, sleep_time);
|
||||
sleep_abortable(std::chrono::seconds(sleep_time)).get();
|
||||
sleep_time *= 1.5;
|
||||
if (++_nr_retried >= _nr_max_retry) {
|
||||
throw;
|
||||
}
|
||||
}
|
||||
_stream_plan.request_ranges(source, keyspace, ranges);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
future<> range_streamer::do_stream_async() {
|
||||
auto nr_ranges_remaining = nr_ranges_to_stream();
|
||||
logger.info("{} starts, nr_ranges_remaining={}", _description, nr_ranges_remaining);
|
||||
auto start = lowres_clock::now();
|
||||
return do_for_each(_to_stream, [this, start, description = _description] (auto& stream) {
|
||||
const auto& keyspace = stream.first;
|
||||
auto& ip_range_vec = stream.second;
|
||||
// Fetch from or send to peer node in parallel
|
||||
return parallel_for_each(ip_range_vec, [this, description, keyspace] (auto& ip_range) {
|
||||
auto& source = ip_range.first;
|
||||
auto& range_vec = ip_range.second;
|
||||
return seastar::async([this, description, keyspace, source, &range_vec] () mutable {
|
||||
// TODO: It is better to use fiber instead of thread here because
|
||||
// creating a thread per peer can be some memory in a large cluster.
|
||||
auto start_time = lowres_clock::now();
|
||||
unsigned sp_index = 0;
|
||||
unsigned nr_ranges_streamed = 0;
|
||||
size_t nr_ranges_total = range_vec.size();
|
||||
size_t nr_ranges_per_stream_plan = nr_ranges_total / 10;
|
||||
dht::token_range_vector ranges_to_stream;
|
||||
auto do_streaming = [&] {
|
||||
auto sp = stream_plan(sprint("%s-%s-index-%d", description, keyspace, sp_index++));
|
||||
logger.info("{} with {} for keyspace={}, {} out of {} ranges: ranges = {}",
|
||||
description, source, keyspace, nr_ranges_streamed, nr_ranges_total, ranges_to_stream.size());
|
||||
if (_nr_rx_added) {
|
||||
sp.request_ranges(source, keyspace, ranges_to_stream, _column_families[keyspace]);
|
||||
} else if (_nr_tx_added) {
|
||||
sp.transfer_ranges(source, keyspace, ranges_to_stream, _column_families[keyspace]);
|
||||
}
|
||||
sp.execute().discard_result().get();
|
||||
ranges_to_stream.clear();
|
||||
};
|
||||
try {
|
||||
for (auto it = range_vec.begin(); it < range_vec.end();) {
|
||||
ranges_to_stream.push_back(*it);
|
||||
it = range_vec.erase(it);
|
||||
nr_ranges_streamed++;
|
||||
if (ranges_to_stream.size() < nr_ranges_per_stream_plan) {
|
||||
continue;
|
||||
} else {
|
||||
do_streaming();
|
||||
}
|
||||
}
|
||||
if (ranges_to_stream.size() > 0) {
|
||||
do_streaming();
|
||||
}
|
||||
} catch (...) {
|
||||
for (auto& range : ranges_to_stream) {
|
||||
range_vec.push_back(range);
|
||||
}
|
||||
auto t = std::chrono::duration_cast<std::chrono::seconds>(lowres_clock::now() - start_time).count();
|
||||
logger.warn("{} with {} for keyspace={} failed, took {} seconds: {}", description, source, keyspace, t, std::current_exception());
|
||||
throw;
|
||||
}
|
||||
auto t = std::chrono::duration_cast<std::chrono::seconds>(lowres_clock::now() - start_time).count();
|
||||
logger.info("{} with {} for keyspace={} succeeded, took {} seconds", description, source, keyspace, t);
|
||||
});
|
||||
|
||||
});
|
||||
}).finally([this, start] {
|
||||
auto t = std::chrono::duration_cast<std::chrono::seconds>(lowres_clock::now() - start).count();
|
||||
auto nr_ranges_remaining = nr_ranges_to_stream();
|
||||
if (nr_ranges_remaining) {
|
||||
logger.warn("{} failed, took {} seconds, nr_ranges_remaining={}", _description, t, nr_ranges_remaining);
|
||||
} else {
|
||||
logger.info("{} succeeded, took {} seconds, nr_ranges_remaining={}", _description, t, nr_ranges_remaining);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
size_t range_streamer::nr_ranges_to_stream() {
|
||||
size_t nr_ranges_remaining = 0;
|
||||
for (auto& fetch : _to_stream) {
|
||||
const auto& keyspace = fetch.first;
|
||||
auto& ip_range_vec = fetch.second;
|
||||
for (auto& ip_range : ip_range_vec) {
|
||||
auto& source = ip_range.first;
|
||||
auto& range_vec = ip_range.second;
|
||||
nr_ranges_remaining += range_vec.size();
|
||||
logger.debug("Remaining: keyspace={}, source={}, ranges={}", keyspace, source, range_vec);
|
||||
}
|
||||
}
|
||||
|
||||
return _stream_plan.execute();
|
||||
return nr_ranges_remaining;
|
||||
}
|
||||
|
||||
|
||||
std::unordered_multimap<inet_address, dht::token_range>
|
||||
range_streamer::get_work_map(const std::unordered_multimap<dht::token_range, inet_address>& ranges_with_source_target,
|
||||
const sstring& keyspace) {
|
||||
|
||||
@@ -119,6 +119,8 @@ public:
|
||||
}
|
||||
|
||||
void add_ranges(const sstring& keyspace_name, dht::token_range_vector ranges);
|
||||
void add_tx_ranges(const sstring& keyspace_name, std::unordered_map<inet_address, dht::token_range_vector> ranges_per_endpoint, std::vector<sstring> column_families = {});
|
||||
void add_rx_ranges(const sstring& keyspace_name, std::unordered_map<inet_address, dht::token_range_vector> ranges_per_endpoint, std::vector<sstring> column_families = {});
|
||||
private:
|
||||
bool use_strict_sources_for_ranges(const sstring& keyspace_name);
|
||||
/**
|
||||
@@ -159,16 +161,25 @@ public:
|
||||
}
|
||||
#endif
|
||||
public:
|
||||
future<streaming::stream_state> fetch_async();
|
||||
future<> stream_async();
|
||||
future<> do_stream_async();
|
||||
size_t nr_ranges_to_stream();
|
||||
private:
|
||||
distributed<database>& _db;
|
||||
token_metadata& _metadata;
|
||||
std::unordered_set<token> _tokens;
|
||||
inet_address _address;
|
||||
sstring _description;
|
||||
std::unordered_multimap<sstring, std::unordered_map<inet_address, dht::token_range_vector>> _to_fetch;
|
||||
std::unordered_multimap<sstring, std::unordered_map<inet_address, dht::token_range_vector>> _to_stream;
|
||||
std::unordered_set<std::unique_ptr<i_source_filter>> _source_filters;
|
||||
stream_plan _stream_plan;
|
||||
std::unordered_map<sstring, std::vector<sstring>> _column_families;
|
||||
// Retry the stream plan _nr_max_retry times
|
||||
unsigned _nr_retried = 0;
|
||||
unsigned _nr_max_retry = 5;
|
||||
// Number of tx and rx ranges added
|
||||
unsigned _nr_tx_added = 0;
|
||||
unsigned _nr_rx_added = 0;
|
||||
};
|
||||
|
||||
} // dht
|
||||
|
||||
2
dist/ami/files/scylla-ami
vendored
2
dist/ami/files/scylla-ami
vendored
Submodule dist/ami/files/scylla-ami updated: be90a3fb9f...fa2461de01
1
dist/common/modprobe.d/scylla-raid0.conf
vendored
1
dist/common/modprobe.d/scylla-raid0.conf
vendored
@@ -1 +0,0 @@
|
||||
options raid0 devices_discard_performance=Y
|
||||
@@ -6,7 +6,7 @@ After=network.target
|
||||
Type=simple
|
||||
User=scylla
|
||||
Group=scylla
|
||||
ExecStart=/usr/lib/scylla/scylla-housekeeping --uuid-file /var/lib/scylla-housekeeping/housekeeping.uuid --repo-files '/etc/yum.repos.d/scylla*.repo' -q -c /etc/scylla.d/housekeeping.cfg version --mode d
|
||||
ExecStart=/usr/lib/scylla/scylla-housekeeping --uuid-file /var/lib/scylla-housekeeping/housekeeping.uuid -q -c /etc/scylla.d/housekeeping.cfg --repo-files @@REPOFILES@@ version --mode d
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
@@ -6,7 +6,7 @@ After=network.target
|
||||
Type=simple
|
||||
User=scylla
|
||||
Group=scylla
|
||||
ExecStart=/usr/lib/scylla/scylla-housekeeping --uuid-file /var/lib/scylla-housekeeping/housekeeping.uuid -q --repo-files '/etc/yum.repos.d/scylla*.repo' -c /etc/scylla.d/housekeeping.cfg version --mode r
|
||||
ExecStart=/usr/lib/scylla/scylla-housekeeping --uuid-file /var/lib/scylla-housekeeping/housekeeping.uuid -q -c /etc/scylla.d/housekeeping.cfg --repo-files @@REPOFILES@@ version --mode r
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
6
dist/debian/build_deb.sh
vendored
6
dist/debian/build_deb.sh
vendored
@@ -196,8 +196,10 @@ else
|
||||
fi
|
||||
cp dist/common/systemd/scylla-server.service.in debian/scylla-server.service
|
||||
sed -i -e "s#@@SYSCONFDIR@@#/etc/default#g" debian/scylla-server.service
|
||||
cp dist/common/systemd/scylla-housekeeping-daily.service debian/scylla-server.scylla-housekeeping-daily.service
|
||||
cp dist/common/systemd/scylla-housekeeping-restart.service debian/scylla-server.scylla-housekeeping-restart.service
|
||||
cp dist/common/systemd/scylla-housekeeping-daily.service.in debian/scylla-server.scylla-housekeeping-daily.service
|
||||
sed -i -e "s#@@REPOFILES@@#'/etc/apt/sources.list.d/scylla*.list'#g" debian/scylla-server.scylla-housekeeping-daily.service
|
||||
cp dist/common/systemd/scylla-housekeeping-restart.service.in debian/scylla-server.scylla-housekeeping-restart.service
|
||||
sed -i -e "s#@@REPOFILES@@#'/etc/apt/sources.list.d/scylla*.list'#g" debian/scylla-server.scylla-housekeeping-restart.service
|
||||
cp dist/common/systemd/node-exporter.service debian/scylla-server.node-exporter.service
|
||||
|
||||
if [ $REBUILD -eq 1 ]; then
|
||||
|
||||
2
dist/debian/control.in
vendored
2
dist/debian/control.in
vendored
@@ -40,7 +40,7 @@ Description: Scylla kernel tuning configuration
|
||||
Package: scylla
|
||||
Section: metapackages
|
||||
Architecture: any
|
||||
Depends: scylla-server, scylla-jmx, scylla-tools, scylla-kernel-conf
|
||||
Depends: scylla-server, scylla-jmx, scylla-tools, scylla-tools-core, scylla-kernel-conf
|
||||
Description: Scylla database metapackage
|
||||
Scylla is a highly scalable, eventually consistent, distributed,
|
||||
partitioned row DB.
|
||||
|
||||
6
dist/redhat/build_rpm.sh
vendored
6
dist/redhat/build_rpm.sh
vendored
@@ -104,9 +104,9 @@ fi
|
||||
|
||||
|
||||
if [ $JOBS -gt 0 ]; then
|
||||
SRPM_OPTS="$SRPM_OPTS --define='_smp_mflags -j$JOBS'"
|
||||
RPM_JOBS_OPTS=(--define="_smp_mflags -j$JOBS")
|
||||
fi
|
||||
sudo mock --buildsrpm --root=$TARGET --resultdir=`pwd`/build/srpms --spec=build/scylla.spec --sources=build/scylla-$VERSION.tar $SRPM_OPTS
|
||||
sudo mock --buildsrpm --root=$TARGET --resultdir=`pwd`/build/srpms --spec=build/scylla.spec --sources=build/scylla-$VERSION.tar $SRPM_OPTS "${RPM_JOBS_OPTS[@]}"
|
||||
if [ "$TARGET" = "epel-7-x86_64" ] && [ $REBUILD = 1 ]; then
|
||||
./dist/redhat/centos_dep/build_dependency.sh
|
||||
sudo mock --init --root=$TARGET
|
||||
@@ -116,4 +116,4 @@ elif [ "$TARGET" = "epel-7-x86_64" ] && [ $REBUILD = 0 ]; then
|
||||
TARGET=scylla-$TARGET
|
||||
RPM_OPTS="$RPM_OPTS --configdir=dist/redhat/mock"
|
||||
fi
|
||||
sudo mock --rebuild --root=$TARGET --resultdir=`pwd`/build/rpms $RPM_OPTS build/srpms/scylla-$VERSION*.src.rpm
|
||||
sudo mock --rebuild --root=$TARGET --resultdir=`pwd`/build/rpms $RPM_OPTS "${RPM_JOBS_OPTS[@]}" build/srpms/scylla-$VERSION*.src.rpm
|
||||
|
||||
14
dist/redhat/centos_dep/binutils.diff
vendored
14
dist/redhat/centos_dep/binutils.diff
vendored
@@ -33,8 +33,8 @@
|
||||
Requires(post): coreutils
|
||||
-Requires(post): %{_sbindir}/alternatives
|
||||
-Requires(preun): %{_sbindir}/alternatives
|
||||
+Requires(post): /sbin/alternatives
|
||||
+Requires(preun): /sbin/alternatives
|
||||
+Requires(post): /usr/sbin/alternatives
|
||||
+Requires(preun): /usr/sbin/alternatives
|
||||
%endif
|
||||
|
||||
# On ARM EABI systems, we do want -gnueabi to be part of the
|
||||
@@ -58,13 +58,13 @@
|
||||
%if "%{build_gold}" == "both"
|
||||
%__rm -f %{_bindir}/%{?cross}ld
|
||||
-%{_sbindir}/alternatives --install %{_bindir}/%{?cross}ld %{?cross}ld \
|
||||
+/sbin/alternatives --install %{_bindir}/%{?cross}ld %{?cross}ld \
|
||||
+/usr/sbin/alternatives --install %{_bindir}/%{?cross}ld %{?cross}ld \
|
||||
%{_bindir}/%{?cross}ld.bfd %{ld_bfd_priority}
|
||||
-%{_sbindir}/alternatives --install %{_bindir}/%{?cross}ld %{?cross}ld \
|
||||
+/sbin/alternatives --install %{_bindir}/%{?cross}ld %{?cross}ld \
|
||||
+/usr/sbin/alternatives --install %{_bindir}/%{?cross}ld %{?cross}ld \
|
||||
%{_bindir}/%{?cross}ld.gold %{ld_gold_priority}
|
||||
-%{_sbindir}/alternatives --auto %{?cross}ld
|
||||
+/sbin/alternatives --auto %{?cross}ld
|
||||
+/usr/sbin/alternatives --auto %{?cross}ld
|
||||
%endif
|
||||
%if %{isnative}
|
||||
/sbin/ldconfig
|
||||
@@ -74,8 +74,8 @@
|
||||
if [ $1 = 0 ]; then
|
||||
- %{_sbindir}/alternatives --remove %{?cross}ld %{_bindir}/%{?cross}ld.bfd
|
||||
- %{_sbindir}/alternatives --remove %{?cross}ld %{_bindir}/%{?cross}ld.gold
|
||||
+ /sbin/alternatives --remove %{?cross}ld %{_bindir}/%{?cross}ld.bfd
|
||||
+ /sbin/alternatives --remove %{?cross}ld %{_bindir}/%{?cross}ld.gold
|
||||
+ /usr/sbin/alternatives --remove %{?cross}ld %{_bindir}/%{?cross}ld.bfd
|
||||
+ /usr/sbin/alternatives --remove %{?cross}ld %{_bindir}/%{?cross}ld.gold
|
||||
fi
|
||||
%endif
|
||||
%if %{isnative}
|
||||
|
||||
23
dist/redhat/scylla.spec.in
vendored
23
dist/redhat/scylla.spec.in
vendored
@@ -7,14 +7,14 @@ Group: Applications/Databases
|
||||
License: AGPLv3
|
||||
URL: http://www.scylladb.com/
|
||||
Source0: %{name}-@@VERSION@@-@@RELEASE@@.tar
|
||||
Requires: scylla-server = @@VERSION@@ scylla-jmx = @@VERSION@@ scylla-tools = @@VERSION@@ scylla-kernel-conf = @@VERSION@@
|
||||
Requires: scylla-server = @@VERSION@@ scylla-jmx = @@VERSION@@ scylla-tools = @@VERSION@@ scylla-tools-core = @@VERSION@@ scylla-kernel-conf = @@VERSION@@
|
||||
Obsoletes: scylla-server < 1.1
|
||||
|
||||
%description
|
||||
Scylla is a highly scalable, eventually consistent, distributed,
|
||||
partitioned row DB.
|
||||
This package installs all required packages for ScyllaDB, including
|
||||
scylla-server, scylla-jmx, scylla-tools.
|
||||
scylla-server, scylla-jmx, scylla-tools, scylla-tools-core.
|
||||
|
||||
# this is needed to prevent python compilation error on CentOS (#2235)
|
||||
%if 0%{?rhel}
|
||||
@@ -78,6 +78,10 @@ python3.4 ./configure.py --enable-dpdk --mode=release --static-stdc++ --static-b
|
||||
ninja-build %{?_smp_mflags} build/release/scylla build/release/iotune
|
||||
cp dist/common/systemd/scylla-server.service.in build/scylla-server.service
|
||||
sed -i -e "s#@@SYSCONFDIR@@#/etc/sysconfig#g" build/scylla-server.service
|
||||
cp dist/common/systemd/scylla-housekeeping-restart.service.in build/scylla-housekeeping-restart.service
|
||||
sed -i -e "s#@@REPOFILES@@#'/etc/yum.repos.d/scylla*.repo'#g" build/scylla-housekeeping-restart.service
|
||||
cp dist/common/systemd/scylla-housekeeping-daily.service.in build/scylla-housekeeping-daily.service
|
||||
sed -i -e "s#@@REPOFILES@@#'/etc/yum.repos.d/scylla*.repo'#g" build/scylla-housekeeping-daily.service
|
||||
|
||||
%install
|
||||
rm -rf $RPM_BUILD_ROOT
|
||||
@@ -88,9 +92,6 @@ mkdir -p $RPM_BUILD_ROOT%{_sysconfdir}/security/limits.d/
|
||||
mkdir -p $RPM_BUILD_ROOT%{_sysconfdir}/collectd.d/
|
||||
mkdir -p $RPM_BUILD_ROOT%{_sysconfdir}/scylla/
|
||||
mkdir -p $RPM_BUILD_ROOT%{_sysconfdir}/scylla.d/
|
||||
%if 0%{?rhel}
|
||||
mkdir -p $RPM_BUILD_ROOT%{_sysconfdir}/modprobe.d/
|
||||
%endif
|
||||
mkdir -p $RPM_BUILD_ROOT%{_sysctldir}/
|
||||
mkdir -p $RPM_BUILD_ROOT%{_docdir}/scylla/
|
||||
mkdir -p $RPM_BUILD_ROOT%{_unitdir}
|
||||
@@ -101,9 +102,6 @@ install -m644 dist/common/limits.d/scylla.conf $RPM_BUILD_ROOT%{_sysconfdir}/sec
|
||||
install -m644 dist/common/collectd.d/scylla.conf $RPM_BUILD_ROOT%{_sysconfdir}/collectd.d/
|
||||
install -m644 dist/common/scylla.d/*.conf $RPM_BUILD_ROOT%{_sysconfdir}/scylla.d/
|
||||
install -m644 dist/common/sysctl.d/*.conf $RPM_BUILD_ROOT%{_sysctldir}/
|
||||
%if 0%{?rhel}
|
||||
install -m644 dist/common/modprobe.d/*.conf $RPM_BUILD_ROOT%{_sysconfdir}/modprobe.d/
|
||||
%endif
|
||||
install -d -m755 $RPM_BUILD_ROOT%{_sysconfdir}/scylla
|
||||
install -m644 conf/scylla.yaml $RPM_BUILD_ROOT%{_sysconfdir}/scylla/
|
||||
install -m644 conf/cassandra-rackdc.properties $RPM_BUILD_ROOT%{_sysconfdir}/scylla/
|
||||
@@ -267,18 +265,9 @@ if Scylla is the main application on your server and you wish to optimize its la
|
||||
# We cannot use the sysctl_apply rpm macro because it is not present in 7.0
|
||||
# following is a "manual" expansion
|
||||
/usr/lib/systemd/systemd-sysctl 99-scylla-sched.conf >/dev/null 2>&1 || :
|
||||
# Write modprobe.d params when module already loaded
|
||||
%if 0%{?rhel}
|
||||
if [ -e /sys/module/raid0/parameters/devices_discard_performance ]; then
|
||||
echo Y > /sys/module/raid0/parameters/devices_discard_performance
|
||||
fi
|
||||
%endif
|
||||
|
||||
%files kernel-conf
|
||||
%defattr(-,root,root)
|
||||
%if 0%{?rhel}
|
||||
%config(noreplace) %{_sysconfdir}/modprobe.d/*.conf
|
||||
%endif
|
||||
%{_sysctldir}/*.conf
|
||||
|
||||
%changelog
|
||||
|
||||
@@ -461,7 +461,8 @@ future<> gossiper::apply_state_locally(std::map<inet_address, endpoint_state> ma
|
||||
int local_generation = local_ep_state_ptr.get_heart_beat_state().get_generation();
|
||||
int remote_generation = remote_state.get_heart_beat_state().get_generation();
|
||||
logger.trace("{} local generation {}, remote generation {}", ep, local_generation, remote_generation);
|
||||
if (local_generation != 0 && remote_generation > local_generation + MAX_GENERATION_DIFFERENCE) {
|
||||
// A node was removed with nodetool removenode can have a generation of 2
|
||||
if (local_generation > 2 && remote_generation > local_generation + MAX_GENERATION_DIFFERENCE) {
|
||||
// assume some peer has corrupted memory and is broadcasting an unbelievable generation about another peer (or itself)
|
||||
logger.warn("received an invalid gossip generation for peer {}; local generation = {}, received generation = {}",
|
||||
ep, local_generation, remote_generation);
|
||||
@@ -832,6 +833,7 @@ int gossiper::get_max_endpoint_state_version(endpoint_state state) {
|
||||
|
||||
// Runs inside seastar::async context
|
||||
void gossiper::evict_from_membership(inet_address endpoint) {
|
||||
auto permit = lock_endpoint(endpoint).get0();
|
||||
_unreachable_endpoints.erase(endpoint);
|
||||
container().invoke_on_all([endpoint] (auto& g) {
|
||||
g.endpoint_state_map.erase(endpoint);
|
||||
@@ -982,7 +984,7 @@ future<> gossiper::assassinate_endpoint(sstring address) {
|
||||
logger.warn("Assassinating {} via gossip", endpoint);
|
||||
if (es) {
|
||||
auto& ss = service::get_local_storage_service();
|
||||
auto tokens = ss.get_token_metadata().get_tokens(endpoint);
|
||||
tokens = ss.get_token_metadata().get_tokens(endpoint);
|
||||
if (tokens.empty()) {
|
||||
logger.warn("Unable to calculate tokens for {}. Will use a random one", address);
|
||||
throw std::runtime_error(sprint("Unable to calculate tokens for %s", endpoint));
|
||||
|
||||
@@ -100,7 +100,6 @@ future<> ec2_multi_region_snitch::gossiper_starting() {
|
||||
// Note: currently gossiper "main" instance always runs on CPU0 therefore
|
||||
// this function will be executed on CPU0 only.
|
||||
//
|
||||
ec2_snitch::gossiper_starting();
|
||||
|
||||
using namespace gms;
|
||||
auto& g = get_local_gossiper();
|
||||
|
||||
@@ -110,7 +110,11 @@ void token_metadata::update_normal_tokens(std::unordered_map<inet_address, std::
|
||||
inet_address endpoint = i.first;
|
||||
std::unordered_set<token>& tokens = i.second;
|
||||
|
||||
assert(!tokens.empty());
|
||||
if (tokens.empty()) {
|
||||
auto msg = sprint("tokens is empty in update_normal_tokens");
|
||||
tlogger.error("{}", msg);
|
||||
throw std::runtime_error(msg);
|
||||
}
|
||||
|
||||
for(auto it = _token_to_endpoint_map.begin(), ite = _token_to_endpoint_map.end(); it != ite;) {
|
||||
if(it->second == endpoint) {
|
||||
@@ -141,7 +145,11 @@ void token_metadata::update_normal_tokens(std::unordered_map<inet_address, std::
|
||||
}
|
||||
|
||||
size_t token_metadata::first_token_index(const token& start) const {
|
||||
assert(_sorted_tokens.size() > 0);
|
||||
if (_sorted_tokens.empty()) {
|
||||
auto msg = sprint("sorted_tokens is empty in first_token_index!");
|
||||
tlogger.error("{}", msg);
|
||||
throw std::runtime_error(msg);
|
||||
}
|
||||
auto it = std::lower_bound(_sorted_tokens.begin(), _sorted_tokens.end(), start);
|
||||
if (it == _sorted_tokens.end()) {
|
||||
return 0;
|
||||
@@ -292,7 +300,11 @@ void token_metadata::add_bootstrap_tokens(std::unordered_set<token> tokens, inet
|
||||
}
|
||||
|
||||
void token_metadata::remove_bootstrap_tokens(std::unordered_set<token> tokens) {
|
||||
assert(!tokens.empty());
|
||||
if (tokens.empty()) {
|
||||
auto msg = sprint("tokens is empty in remove_bootstrap_tokens!");
|
||||
tlogger.error("{}", msg);
|
||||
throw std::runtime_error(msg);
|
||||
}
|
||||
for (auto t : tokens) {
|
||||
_bootstrap_tokens.erase(t);
|
||||
}
|
||||
@@ -320,7 +332,11 @@ void token_metadata::remove_from_moving(inet_address endpoint) {
|
||||
token token_metadata::get_predecessor(token t) {
|
||||
auto& tokens = sorted_tokens();
|
||||
auto it = std::lower_bound(tokens.begin(), tokens.end(), t);
|
||||
assert(it != tokens.end() && *it == t);
|
||||
if (it == tokens.end() || *it != t) {
|
||||
auto msg = sprint("token error in get_predecessor!");
|
||||
tlogger.error("{}", msg);
|
||||
throw std::runtime_error(msg);
|
||||
}
|
||||
if (it == tokens.begin()) {
|
||||
// If the token is the first element, its preprocessor is the last element
|
||||
return tokens.back();
|
||||
|
||||
@@ -514,7 +514,6 @@ shared_ptr<messaging_service::rpc_protocol_client_wrapper> messaging_service::ge
|
||||
}();
|
||||
|
||||
auto remote_addr = ipv4_addr(get_preferred_ip(id.addr).raw_addr(), must_encrypt ? _ssl_port : _port);
|
||||
auto local_addr = ipv4_addr{_listen_address.raw_addr(), 0};
|
||||
|
||||
rpc::client_options opts;
|
||||
// send keepalive messages each minute if connection is idle, drop connection after 10 failures
|
||||
@@ -526,9 +525,9 @@ shared_ptr<messaging_service::rpc_protocol_client_wrapper> messaging_service::ge
|
||||
|
||||
auto client = must_encrypt ?
|
||||
::make_shared<rpc_protocol_client_wrapper>(*_rpc, std::move(opts),
|
||||
remote_addr, local_addr, _credentials) :
|
||||
remote_addr, ipv4_addr(), _credentials) :
|
||||
::make_shared<rpc_protocol_client_wrapper>(*_rpc, std::move(opts),
|
||||
remote_addr, local_addr);
|
||||
remote_addr);
|
||||
|
||||
it = _clients[idx].emplace(id, shard_info(std::move(client))).first;
|
||||
uint32_t src_cpu_id = engine().cpu_id();
|
||||
@@ -640,59 +639,6 @@ auto send_message_timeout(messaging_service* ms, messaging_verb verb, msg_addr i
|
||||
});
|
||||
}
|
||||
|
||||
template <typename MsgIn, typename... MsgOut>
|
||||
auto send_message_timeout_and_retry(messaging_service* ms, messaging_verb verb, msg_addr id,
|
||||
std::chrono::seconds timeout, int nr_retry, std::chrono::seconds wait, MsgOut... msg) {
|
||||
using MsgInTuple = typename futurize_t<MsgIn>::value_type;
|
||||
return do_with(int(nr_retry), std::move(msg)..., [ms, verb, id, timeout, wait, nr_retry] (auto& retry, const auto&... messages) {
|
||||
return repeat_until_value([ms, verb, id, timeout, wait, nr_retry, &retry, &messages...] {
|
||||
return send_message_timeout<MsgIn>(ms, verb, id, timeout, messages...).then_wrapped(
|
||||
[ms, verb, id, timeout, wait, nr_retry, &retry] (auto&& f) mutable {
|
||||
auto vb = int(verb);
|
||||
try {
|
||||
MsgInTuple ret = f.get();
|
||||
if (retry != nr_retry) {
|
||||
mlogger.info("Retry verb={} to {}, retry={}: OK", vb, id, retry);
|
||||
}
|
||||
return make_ready_future<stdx::optional<MsgInTuple>>(std::move(ret));
|
||||
} catch (rpc::timeout_error) {
|
||||
mlogger.info("Retry verb={} to {}, retry={}: timeout in {} seconds", vb, id, retry, timeout.count());
|
||||
throw;
|
||||
} catch (rpc::closed_error) {
|
||||
mlogger.info("Retry verb={} to {}, retry={}: {}", vb, id, retry, std::current_exception());
|
||||
// Stop retrying if retry reaches 0 or message service is shutdown
|
||||
// or the remote node is removed from gossip (on_remove())
|
||||
retry--;
|
||||
if (retry == 0) {
|
||||
mlogger.debug("Retry verb={} to {}, retry={}: stop retrying: retry == 0", vb, id, retry);
|
||||
throw;
|
||||
}
|
||||
if (ms->is_stopping()) {
|
||||
mlogger.debug("Retry verb={} to {}, retry={}: stop retrying: messaging_service is stopped",
|
||||
vb, id, retry);
|
||||
throw;
|
||||
}
|
||||
if (!gms::get_local_gossiper().is_known_endpoint(id.addr)) {
|
||||
mlogger.debug("Retry verb={} to {}, retry={}: stop retrying: node is removed from the cluster",
|
||||
vb, id, retry);
|
||||
throw;
|
||||
}
|
||||
return sleep_abortable(wait).then([] {
|
||||
return make_ready_future<stdx::optional<MsgInTuple>>(stdx::nullopt);
|
||||
}).handle_exception([vb, id, retry] (std::exception_ptr ep) {
|
||||
mlogger.debug("Retry verb={} to {}, retry={}: stop retrying: {}", vb, id, retry, ep);
|
||||
return make_exception_future<stdx::optional<MsgInTuple>>(ep);
|
||||
});
|
||||
} catch (...) {
|
||||
throw;
|
||||
}
|
||||
});
|
||||
}).then([ms = ms->shared_from_this()] (MsgInTuple result) {
|
||||
return futurize<MsgIn>::from_tuple(std::move(result));
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
// Send one way message for verb
|
||||
template <typename... MsgOut>
|
||||
auto send_message_oneway(messaging_service* ms, messaging_verb verb, msg_addr id, MsgOut&&... msg) {
|
||||
@@ -707,13 +653,6 @@ auto send_message_oneway_timeout(messaging_service* ms, Timeout timeout, messagi
|
||||
|
||||
// Wrappers for verbs
|
||||
|
||||
// Retransmission parameters for streaming verbs.
|
||||
// A stream plan gives up retrying in 10*30 + 10*60 seconds (15 minutes) at
|
||||
// most, 10*30 seconds (5 minutes) at least.
|
||||
static constexpr int streaming_nr_retry = 10;
|
||||
static constexpr std::chrono::seconds streaming_timeout{10*60};
|
||||
static constexpr std::chrono::seconds streaming_wait_before_retry{30};
|
||||
|
||||
// PREPARE_MESSAGE
|
||||
void messaging_service::register_prepare_message(std::function<future<streaming::prepare_message> (const rpc::client_info& cinfo,
|
||||
streaming::prepare_message msg, UUID plan_id, sstring description)>&& func) {
|
||||
@@ -721,8 +660,7 @@ void messaging_service::register_prepare_message(std::function<future<streaming:
|
||||
}
|
||||
future<streaming::prepare_message> messaging_service::send_prepare_message(msg_addr id, streaming::prepare_message msg, UUID plan_id,
|
||||
sstring description) {
|
||||
return send_message_timeout_and_retry<streaming::prepare_message>(this, messaging_verb::PREPARE_MESSAGE, id,
|
||||
streaming_timeout, streaming_nr_retry, streaming_wait_before_retry,
|
||||
return send_message<streaming::prepare_message>(this, messaging_verb::PREPARE_MESSAGE, id,
|
||||
std::move(msg), plan_id, std::move(description));
|
||||
}
|
||||
|
||||
@@ -731,8 +669,7 @@ void messaging_service::register_prepare_done_message(std::function<future<> (co
|
||||
register_handler(this, messaging_verb::PREPARE_DONE_MESSAGE, std::move(func));
|
||||
}
|
||||
future<> messaging_service::send_prepare_done_message(msg_addr id, UUID plan_id, unsigned dst_cpu_id) {
|
||||
return send_message_timeout_and_retry<void>(this, messaging_verb::PREPARE_DONE_MESSAGE, id,
|
||||
streaming_timeout, streaming_nr_retry, streaming_wait_before_retry,
|
||||
return send_message<void>(this, messaging_verb::PREPARE_DONE_MESSAGE, id,
|
||||
plan_id, dst_cpu_id);
|
||||
}
|
||||
|
||||
@@ -741,8 +678,7 @@ void messaging_service::register_stream_mutation(std::function<future<> (const r
|
||||
register_handler(this, messaging_verb::STREAM_MUTATION, std::move(func));
|
||||
}
|
||||
future<> messaging_service::send_stream_mutation(msg_addr id, UUID plan_id, frozen_mutation fm, unsigned dst_cpu_id, bool fragmented) {
|
||||
return send_message_timeout_and_retry<void>(this, messaging_verb::STREAM_MUTATION, id,
|
||||
streaming_timeout, streaming_nr_retry, streaming_wait_before_retry,
|
||||
return send_message<void>(this, messaging_verb::STREAM_MUTATION, id,
|
||||
plan_id, std::move(fm), dst_cpu_id, fragmented);
|
||||
}
|
||||
|
||||
@@ -757,19 +693,17 @@ void messaging_service::register_stream_mutation_done(std::function<future<> (co
|
||||
});
|
||||
}
|
||||
future<> messaging_service::send_stream_mutation_done(msg_addr id, UUID plan_id, dht::token_range_vector ranges, UUID cf_id, unsigned dst_cpu_id) {
|
||||
return send_message_timeout_and_retry<void>(this, messaging_verb::STREAM_MUTATION_DONE, id,
|
||||
streaming_timeout, streaming_nr_retry, streaming_wait_before_retry,
|
||||
return send_message<void>(this, messaging_verb::STREAM_MUTATION_DONE, id,
|
||||
plan_id, std::move(ranges), cf_id, dst_cpu_id);
|
||||
}
|
||||
|
||||
// COMPLETE_MESSAGE
|
||||
void messaging_service::register_complete_message(std::function<future<> (const rpc::client_info& cinfo, UUID plan_id, unsigned dst_cpu_id)>&& func) {
|
||||
void messaging_service::register_complete_message(std::function<future<> (const rpc::client_info& cinfo, UUID plan_id, unsigned dst_cpu_id, rpc::optional<bool> failed)>&& func) {
|
||||
register_handler(this, messaging_verb::COMPLETE_MESSAGE, std::move(func));
|
||||
}
|
||||
future<> messaging_service::send_complete_message(msg_addr id, UUID plan_id, unsigned dst_cpu_id) {
|
||||
return send_message_timeout_and_retry<void>(this, messaging_verb::COMPLETE_MESSAGE, id,
|
||||
streaming_timeout, streaming_nr_retry, streaming_wait_before_retry,
|
||||
plan_id, dst_cpu_id);
|
||||
future<> messaging_service::send_complete_message(msg_addr id, UUID plan_id, unsigned dst_cpu_id, bool failed) {
|
||||
return send_message<void>(this, messaging_verb::COMPLETE_MESSAGE, id,
|
||||
plan_id, dst_cpu_id, failed);
|
||||
}
|
||||
|
||||
void messaging_service::register_gossip_echo(std::function<future<> ()>&& func) {
|
||||
|
||||
@@ -249,8 +249,8 @@ public:
|
||||
void register_stream_mutation_done(std::function<future<> (const rpc::client_info& cinfo, UUID plan_id, dht::token_range_vector ranges, UUID cf_id, unsigned dst_cpu_id)>&& func);
|
||||
future<> send_stream_mutation_done(msg_addr id, UUID plan_id, dht::token_range_vector ranges, UUID cf_id, unsigned dst_cpu_id);
|
||||
|
||||
void register_complete_message(std::function<future<> (const rpc::client_info& cinfo, UUID plan_id, unsigned dst_cpu_id)>&& func);
|
||||
future<> send_complete_message(msg_addr id, UUID plan_id, unsigned dst_cpu_id);
|
||||
void register_complete_message(std::function<future<> (const rpc::client_info& cinfo, UUID plan_id, unsigned dst_cpu_id, rpc::optional<bool> failed)>&& func);
|
||||
future<> send_complete_message(msg_addr id, UUID plan_id, unsigned dst_cpu_id, bool failed = false);
|
||||
|
||||
// Wrapper for REPAIR_CHECKSUM_RANGE verb
|
||||
void register_repair_checksum_range(std::function<future<partition_checksum> (sstring keyspace, sstring cf, dht::token_range range, rpc::optional<repair_checksum> hash_version)>&& func);
|
||||
|
||||
@@ -545,13 +545,19 @@ lw_shared_ptr<partition_snapshot> partition_entry::read(schema_ptr entry_schema,
|
||||
std::vector<range_tombstone>
|
||||
partition_snapshot::range_tombstones(const schema& s, position_in_partition_view start, position_in_partition_view end)
|
||||
{
|
||||
partition_version* v = &*version();
|
||||
if (!v->next()) {
|
||||
return boost::copy_range<std::vector<range_tombstone>>(
|
||||
v->partition().row_tombstones().slice(s, start, end));
|
||||
}
|
||||
range_tombstone_list list(s);
|
||||
for (auto&& v : versions()) {
|
||||
for (auto&& rt : v.partition().row_tombstones().slice(s, start, end)) {
|
||||
while (v) {
|
||||
for (auto&& rt : v->partition().row_tombstones().slice(s, start, end)) {
|
||||
list.apply(s, rt);
|
||||
}
|
||||
v = v->next();
|
||||
}
|
||||
return boost::copy_range<std::vector<range_tombstone>>(list);
|
||||
return boost::copy_range<std::vector<range_tombstone>>(list.slice(s, start, end));
|
||||
}
|
||||
|
||||
std::ostream& operator<<(std::ostream& out, partition_entry& e) {
|
||||
|
||||
@@ -124,6 +124,7 @@ void range_tombstone_list::insert_from(const schema& s,
|
||||
if (less(end_bound, it->end_bound())) {
|
||||
end = it->end;
|
||||
end_kind = it->end_kind;
|
||||
end_bound = bound_view(end, end_kind);
|
||||
}
|
||||
it = rev.erase(it);
|
||||
} else if (c > 0) {
|
||||
|
||||
2
seastar
2
seastar
Submodule seastar updated: 0dbedf03dc...da2e1afaa8
@@ -929,7 +929,17 @@ void storage_service::handle_state_removing(inet_address endpoint, std::vector<s
|
||||
slogger.warn("{}", err);
|
||||
throw std::runtime_error(err);
|
||||
}
|
||||
restore_replica_count(endpoint, ep.value()).get();
|
||||
// Kick off streaming commands. No need to wait for
|
||||
// restore_replica_count to complete which can take a long time,
|
||||
// since when it completes, this node will send notification to
|
||||
// tell the removal_coordinator with IP address notify_endpoint
|
||||
// that the restore process is finished on this node. This node
|
||||
// will be removed from _replicating_nodes on the
|
||||
// removal_coordinator.
|
||||
auto notify_endpoint = ep.value();
|
||||
restore_replica_count(endpoint, notify_endpoint).handle_exception([endpoint, notify_endpoint] (auto ep) {
|
||||
slogger.info("Failed to restore_replica_count for node {}, notify_endpoint={} : {}", endpoint, notify_endpoint, ep);
|
||||
});
|
||||
}
|
||||
} else { // now that the gossiper has told us about this nonexistent member, notify the gossiper to remove it
|
||||
if (sstring(gms::versioned_value::REMOVED_TOKEN) == pieces[0]) {
|
||||
@@ -981,6 +991,7 @@ void storage_service::on_change(inet_address endpoint, application_state state,
|
||||
boost::split(pieces, value.value, boost::is_any_of(sstring(versioned_value::DELIMITER_STR)));
|
||||
if (pieces.empty()) {
|
||||
slogger.warn("Fail to split status in on_change: endpoint={}, app_state={}, value={}", endpoint, state, value);
|
||||
return;
|
||||
}
|
||||
sstring move_name = pieces[0];
|
||||
if (move_name == sstring(versioned_value::STATUS_BOOTSTRAPPING)) {
|
||||
@@ -1029,8 +1040,8 @@ void storage_service::on_remove(gms::inet_address endpoint) {
|
||||
|
||||
void storage_service::on_dead(gms::inet_address endpoint, gms::endpoint_state state) {
|
||||
slogger.debug("endpoint={} on_dead", endpoint);
|
||||
netw::get_local_messaging_service().remove_rpc_client(netw::msg_addr{endpoint, 0});
|
||||
get_storage_service().invoke_on_all([endpoint] (auto&& ss) {
|
||||
netw::get_local_messaging_service().remove_rpc_client(netw::msg_addr{endpoint, 0});
|
||||
for (auto&& subscriber : ss._lifecycle_subscribers) {
|
||||
try {
|
||||
subscriber->on_down(endpoint);
|
||||
@@ -2348,15 +2359,12 @@ future<> storage_service::rebuild(sstring source_dc) {
|
||||
for (const auto& keyspace_name : ss._db.local().get_non_system_keyspaces()) {
|
||||
streamer->add_ranges(keyspace_name, ss.get_local_ranges(keyspace_name));
|
||||
}
|
||||
return streamer->fetch_async().then_wrapped([streamer] (auto&& f) {
|
||||
try {
|
||||
auto state = f.get0();
|
||||
} catch (...) {
|
||||
// This is used exclusively through JMX, so log the full trace but only throw a simple RTE
|
||||
slogger.error("Error while rebuilding node: {}", std::current_exception());
|
||||
throw std::runtime_error(sprint("Error while rebuilding node: %s", std::current_exception()));
|
||||
}
|
||||
return make_ready_future<>();
|
||||
return streamer->stream_async().then([streamer] {
|
||||
slogger.info("Streaming for rebuild successful");
|
||||
}).handle_exception([] (auto ep) {
|
||||
// This is used exclusively through JMX, so log the full trace but only throw a simple RTE
|
||||
slogger.warn("Error while rebuilding node: {}", std::current_exception());
|
||||
return make_exception_future<>(std::move(ep));
|
||||
});
|
||||
});
|
||||
}
|
||||
@@ -2483,10 +2491,8 @@ void storage_service::unbootstrap() {
|
||||
}
|
||||
|
||||
future<> storage_service::restore_replica_count(inet_address endpoint, inet_address notify_endpoint) {
|
||||
std::unordered_multimap<sstring, std::unordered_map<inet_address, dht::token_range_vector>> ranges_to_fetch;
|
||||
|
||||
auto streamer = make_lw_shared<dht::range_streamer>(_db, get_token_metadata(), get_broadcast_address(), "Restore_replica_count");
|
||||
auto my_address = get_broadcast_address();
|
||||
|
||||
auto non_system_keyspaces = _db.local().get_non_system_keyspaces();
|
||||
for (const auto& keyspace_name : non_system_keyspaces) {
|
||||
std::unordered_multimap<dht::token_range, inet_address> changed_ranges = get_changed_ranges_for_leaving(keyspace_name, endpoint);
|
||||
@@ -2497,26 +2503,15 @@ future<> storage_service::restore_replica_count(inet_address endpoint, inet_addr
|
||||
}
|
||||
}
|
||||
std::unordered_multimap<inet_address, dht::token_range> source_ranges = get_new_source_ranges(keyspace_name, my_new_ranges);
|
||||
std::unordered_map<inet_address, dht::token_range_vector> tmp;
|
||||
std::unordered_map<inet_address, dht::token_range_vector> ranges_per_endpoint;
|
||||
for (auto& x : source_ranges) {
|
||||
tmp[x.first].emplace_back(x.second);
|
||||
ranges_per_endpoint[x.first].emplace_back(x.second);
|
||||
}
|
||||
ranges_to_fetch.emplace(keyspace_name, std::move(tmp));
|
||||
streamer->add_rx_ranges(keyspace_name, std::move(ranges_per_endpoint));
|
||||
}
|
||||
auto sp = make_lw_shared<streaming::stream_plan>("Restore replica count");
|
||||
for (auto& x: ranges_to_fetch) {
|
||||
const sstring& keyspace_name = x.first;
|
||||
std::unordered_map<inet_address, dht::token_range_vector>& maps = x.second;
|
||||
for (auto& m : maps) {
|
||||
auto source = m.first;
|
||||
auto ranges = m.second;
|
||||
slogger.debug("Requesting from {} ranges {}", source, ranges);
|
||||
sp->request_ranges(source, keyspace_name, ranges);
|
||||
}
|
||||
}
|
||||
return sp->execute().then_wrapped([this, sp, notify_endpoint] (auto&& f) {
|
||||
return streamer->stream_async().then_wrapped([this, streamer, notify_endpoint] (auto&& f) {
|
||||
try {
|
||||
auto state = f.get0();
|
||||
f.get();
|
||||
return this->send_replication_notification(notify_endpoint);
|
||||
} catch (...) {
|
||||
slogger.warn("Streaming to restore replica count failed: {}", std::current_exception());
|
||||
@@ -2608,8 +2603,7 @@ void storage_service::leave_ring() {
|
||||
|
||||
future<>
|
||||
storage_service::stream_ranges(std::unordered_map<sstring, std::unordered_multimap<dht::token_range, inet_address>> ranges_to_stream_by_keyspace) {
|
||||
// First, we build a list of ranges to stream to each host, per table
|
||||
std::unordered_map<sstring, std::unordered_map<inet_address, dht::token_range_vector>> sessions_to_stream_by_keyspace;
|
||||
auto streamer = make_lw_shared<dht::range_streamer>(_db, get_token_metadata(), get_broadcast_address(), "Unbootstrap");
|
||||
for (auto& entry : ranges_to_stream_by_keyspace) {
|
||||
const auto& keyspace = entry.first;
|
||||
auto& ranges_with_endpoints = entry.second;
|
||||
@@ -2624,26 +2618,13 @@ storage_service::stream_ranges(std::unordered_map<sstring, std::unordered_multim
|
||||
inet_address endpoint = end_point_entry.second;
|
||||
ranges_per_endpoint[endpoint].emplace_back(r);
|
||||
}
|
||||
sessions_to_stream_by_keyspace.emplace(keyspace, std::move(ranges_per_endpoint));
|
||||
streamer->add_tx_ranges(keyspace, std::move(ranges_per_endpoint));
|
||||
}
|
||||
auto sp = make_lw_shared<streaming::stream_plan>("Unbootstrap");
|
||||
for (auto& entry : sessions_to_stream_by_keyspace) {
|
||||
const auto& keyspace_name = entry.first;
|
||||
// TODO: we can move to avoid copy of std::vector
|
||||
auto& ranges_per_endpoint = entry.second;
|
||||
|
||||
for (auto& ranges_entry : ranges_per_endpoint) {
|
||||
auto& ranges = ranges_entry.second;
|
||||
auto new_endpoint = ranges_entry.first;
|
||||
// TODO each call to transferRanges re-flushes, this is potentially a lot of waste
|
||||
sp->transfer_ranges(new_endpoint, keyspace_name, ranges);
|
||||
}
|
||||
}
|
||||
return sp->execute().discard_result().then([sp] {
|
||||
return streamer->stream_async().then([streamer] {
|
||||
slogger.info("stream_ranges successful");
|
||||
}).handle_exception([] (auto ep) {
|
||||
slogger.info("stream_ranges failed: {}", ep);
|
||||
return make_exception_future(std::runtime_error("stream_ranges failed"));
|
||||
slogger.warn("stream_ranges failed: {}", ep);
|
||||
return make_exception_future<>(std::move(ep));
|
||||
});
|
||||
}
|
||||
|
||||
@@ -2677,16 +2658,18 @@ future<> storage_service::stream_hints() {
|
||||
// stream all hints -- range list will be a singleton of "the entire ring"
|
||||
dht::token_range_vector ranges = {dht::token_range::make_open_ended_both_sides()};
|
||||
slogger.debug("stream_hints: ranges={}", ranges);
|
||||
std::unordered_map<inet_address, dht::token_range_vector> ranges_per_endpoint;
|
||||
ranges_per_endpoint[hints_destination_host] = std::move(ranges);
|
||||
|
||||
auto sp = make_lw_shared<streaming::stream_plan>("Hints");
|
||||
std::vector<sstring> column_families = { db::system_keyspace::HINTS };
|
||||
auto streamer = make_lw_shared<dht::range_streamer>(_db, get_token_metadata(), get_broadcast_address(), "Hints");
|
||||
auto keyspace = db::system_keyspace::NAME;
|
||||
sp->transfer_ranges(hints_destination_host, keyspace, ranges, column_families);
|
||||
return sp->execute().discard_result().then([sp] {
|
||||
std::vector<sstring> column_families = { db::system_keyspace::HINTS };
|
||||
streamer->add_tx_ranges(keyspace, std::move(ranges_per_endpoint), column_families);
|
||||
return streamer->stream_async().then([streamer] {
|
||||
slogger.info("stream_hints successful");
|
||||
}).handle_exception([] (auto ep) {
|
||||
slogger.info("stream_hints failed: {}", ep);
|
||||
return make_exception_future(std::runtime_error("stream_hints failed"));
|
||||
slogger.warn("stream_hints failed: {}", ep);
|
||||
return make_exception_future<>(std::move(ep));
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
@@ -21,10 +21,9 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
#include <seastar/core/shared_future.hh>
|
||||
#include <seastar/core/future.hh>
|
||||
#include "utils/loading_shared_values.hh"
|
||||
|
||||
namespace sstables {
|
||||
|
||||
@@ -36,50 +35,26 @@ using index_list = std::vector<index_entry>;
|
||||
class shared_index_lists {
|
||||
public:
|
||||
using key_type = uint64_t;
|
||||
struct stats {
|
||||
static thread_local struct stats {
|
||||
uint64_t hits = 0; // Number of times entry was found ready
|
||||
uint64_t misses = 0; // Number of times entry was not found
|
||||
uint64_t blocks = 0; // Number of times entry was not ready (>= misses)
|
||||
};
|
||||
private:
|
||||
class entry : public enable_lw_shared_from_this<entry> {
|
||||
public:
|
||||
key_type key;
|
||||
index_list list;
|
||||
shared_promise<> loaded;
|
||||
shared_index_lists& parent;
|
||||
} _shard_stats;
|
||||
|
||||
entry(shared_index_lists& parent, key_type key)
|
||||
: key(key), parent(parent)
|
||||
{ }
|
||||
~entry() {
|
||||
parent._lists.erase(key);
|
||||
}
|
||||
bool operator==(const entry& e) const { return key == e.key; }
|
||||
bool operator!=(const entry& e) const { return key != e.key; }
|
||||
struct stats_updater {
|
||||
static void inc_hits() noexcept { ++_shard_stats.hits; }
|
||||
static void inc_misses() noexcept { ++_shard_stats.misses; }
|
||||
static void inc_blocks() noexcept { ++_shard_stats.blocks; }
|
||||
static void inc_evictions() noexcept {}
|
||||
};
|
||||
std::unordered_map<key_type, entry*> _lists;
|
||||
static thread_local stats _shard_stats;
|
||||
public:
|
||||
|
||||
using loading_shared_lists_type = utils::loading_shared_values<key_type, index_list, std::hash<key_type>, std::equal_to<key_type>, stats_updater>;
|
||||
// Pointer to index_list
|
||||
class list_ptr {
|
||||
lw_shared_ptr<entry> _e;
|
||||
public:
|
||||
using element_type = index_list;
|
||||
list_ptr() = default;
|
||||
explicit list_ptr(lw_shared_ptr<entry> e) : _e(std::move(e)) {}
|
||||
explicit operator bool() const { return static_cast<bool>(_e); }
|
||||
index_list& operator*() { return _e->list; }
|
||||
const index_list& operator*() const { return _e->list; }
|
||||
index_list* operator->() { return &_e->list; }
|
||||
const index_list* operator->() const { return &_e->list; }
|
||||
using list_ptr = loading_shared_lists_type::entry_ptr;
|
||||
private:
|
||||
|
||||
index_list release() {
|
||||
auto res = _e.owned() ? index_list(std::move(_e->list)) : index_list(_e->list);
|
||||
_e = {};
|
||||
return std::move(res);
|
||||
}
|
||||
};
|
||||
loading_shared_lists_type _lists;
|
||||
public:
|
||||
|
||||
shared_index_lists() = default;
|
||||
shared_index_lists(shared_index_lists&&) = delete;
|
||||
@@ -93,41 +68,8 @@ public:
|
||||
//
|
||||
// The loader object does not survive deferring, so the caller must deal with its liveness.
|
||||
template<typename Loader>
|
||||
future<list_ptr> get_or_load(key_type key, Loader&& loader) {
|
||||
auto i = _lists.find(key);
|
||||
lw_shared_ptr<entry> e;
|
||||
auto f = [&] {
|
||||
if (i != _lists.end()) {
|
||||
e = i->second->shared_from_this();
|
||||
return e->loaded.get_shared_future();
|
||||
} else {
|
||||
++_shard_stats.misses;
|
||||
e = make_lw_shared<entry>(*this, key);
|
||||
auto f = e->loaded.get_shared_future();
|
||||
auto res = _lists.emplace(key, e.get());
|
||||
assert(res.second);
|
||||
futurize_apply(loader, key).then_wrapped([e](future<index_list>&& f) mutable {
|
||||
if (f.failed()) {
|
||||
e->loaded.set_exception(f.get_exception());
|
||||
} else {
|
||||
e->list = f.get0();
|
||||
e->loaded.set_value();
|
||||
}
|
||||
});
|
||||
return f;
|
||||
}
|
||||
}();
|
||||
if (!f.available()) {
|
||||
++_shard_stats.blocks;
|
||||
return f.then([e]() mutable {
|
||||
return list_ptr(std::move(e));
|
||||
});
|
||||
} else if (f.failed()) {
|
||||
return make_exception_future<list_ptr>(std::move(f).get_exception());
|
||||
} else {
|
||||
++_shard_stats.hits;
|
||||
return make_ready_future<list_ptr>(list_ptr(std::move(e)));
|
||||
}
|
||||
future<list_ptr> get_or_load(const key_type& key, Loader&& loader) {
|
||||
return _lists.get_or_load(key, std::forward<Loader>(loader));
|
||||
}
|
||||
|
||||
static const stats& shard_stats() { return _shard_stats; }
|
||||
|
||||
@@ -100,7 +100,7 @@ void stream_coordinator::connect_all_stream_sessions() {
|
||||
for (auto& x : _peer_sessions) {
|
||||
auto& session = x.second;
|
||||
session->start();
|
||||
sslog.info("[Stream #{}] Beginning stream session with {}", session->plan_id(), session->peer);
|
||||
sslog.debug("[Stream #{}] Beginning stream session with {}", session->plan_id(), session->peer);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -291,4 +291,15 @@ void stream_manager::on_restart(inet_address endpoint, endpoint_state ep_state)
|
||||
}
|
||||
}
|
||||
|
||||
void stream_manager::on_dead(inet_address endpoint, endpoint_state ep_state) {
|
||||
if (has_peer(endpoint) && ep_state.is_shutdown()) {
|
||||
sslog.info("stream_manager: Close all stream_session with peer = {} in on_dead", endpoint);
|
||||
get_stream_manager().invoke_on_all([endpoint] (auto& sm) {
|
||||
sm.fail_sessions(endpoint);
|
||||
}).handle_exception([endpoint] (auto ep) {
|
||||
sslog.warn("stream_manager: Fail to close sessions peer = {} in on_dead", endpoint);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace streaming
|
||||
|
||||
@@ -156,7 +156,7 @@ public:
|
||||
virtual void before_change(inet_address endpoint, endpoint_state current_state, application_state new_state_key, const versioned_value& new_value) override {}
|
||||
virtual void on_change(inet_address endpoint, application_state state, const versioned_value& value) override {}
|
||||
virtual void on_alive(inet_address endpoint, endpoint_state state) override {}
|
||||
virtual void on_dead(inet_address endpoint, endpoint_state state) override {}
|
||||
virtual void on_dead(inet_address endpoint, endpoint_state state) override;
|
||||
virtual void on_remove(inet_address endpoint) override;
|
||||
virtual void on_restart(inet_address endpoint, endpoint_state ep_state) override;
|
||||
|
||||
|
||||
@@ -54,7 +54,7 @@ future<stream_state> stream_result_future::init_sending_side(UUID plan_id_, sstr
|
||||
sr->add_event_listener(listener);
|
||||
}
|
||||
|
||||
sslog.info("[Stream #{}] Executing streaming plan for {}", plan_id_, description_);
|
||||
sslog.info("[Stream #{}] Executing streaming plan for {} with peers={}, master", plan_id_, description_, coordinator_->get_peers());
|
||||
|
||||
// Initialize and start all sessions
|
||||
for (auto& session : coordinator_->get_all_stream_sessions()) {
|
||||
@@ -74,7 +74,7 @@ shared_ptr<stream_result_future> stream_result_future::init_receiving_side(UUID
|
||||
sslog.warn(err.c_str());
|
||||
throw std::runtime_error(err);
|
||||
}
|
||||
sslog.info("[Stream #{}] Creating new streaming plan for {}, with {}", plan_id, description, from);
|
||||
sslog.info("[Stream #{}] Executing streaming plan for {} with peers={}, slave", plan_id, description, from);
|
||||
bool is_receiving = true;
|
||||
sr = make_shared<stream_result_future>(plan_id, description, is_receiving);
|
||||
sm.register_receiving(sr);
|
||||
@@ -83,7 +83,7 @@ shared_ptr<stream_result_future> stream_result_future::init_receiving_side(UUID
|
||||
|
||||
void stream_result_future::handle_session_prepared(shared_ptr<stream_session> session) {
|
||||
auto si = session->make_session_info();
|
||||
sslog.info("[Stream #{}] Prepare completed with {}. Receiving {}, sending {}",
|
||||
sslog.debug("[Stream #{}] Prepare completed with {}. Receiving {}, sending {}",
|
||||
session->plan_id(),
|
||||
session->peer,
|
||||
si.get_total_files_to_receive(),
|
||||
@@ -94,7 +94,7 @@ void stream_result_future::handle_session_prepared(shared_ptr<stream_session> se
|
||||
}
|
||||
|
||||
void stream_result_future::handle_session_complete(shared_ptr<stream_session> session) {
|
||||
sslog.info("[Stream #{}] Session with {} is complete, state={}", session->plan_id(), session->peer, session->get_state());
|
||||
sslog.debug("[Stream #{}] Session with {} is complete, state={}", session->plan_id(), session->peer, session->get_state());
|
||||
auto event = session_complete_event(session);
|
||||
fire_stream_event(std::move(event));
|
||||
auto si = session->make_session_info();
|
||||
@@ -120,25 +120,25 @@ void stream_result_future::maybe_complete() {
|
||||
sm.show_streams();
|
||||
}
|
||||
auto duration = std::chrono::duration_cast<std::chrono::duration<float>>(lowres_clock::now() - _start_time).count();
|
||||
sm.get_progress_on_all_shards(plan_id).then([plan_id, duration] (auto sbytes) {
|
||||
auto tx_bw = sstring("+inf");
|
||||
auto rx_bw = sstring("+inf");
|
||||
auto stats = make_lw_shared<sstring>("");
|
||||
sm.get_progress_on_all_shards(plan_id).then([plan_id, duration, stats] (auto sbytes) {
|
||||
auto tx_bw = sstring("0");
|
||||
auto rx_bw = sstring("0");
|
||||
if (std::fabs(duration) > FLT_EPSILON) {
|
||||
tx_bw = sprint("%.3f", sbytes.bytes_sent / duration / (1024 * 1024));
|
||||
rx_bw = sprint("%.3f", sbytes.bytes_received / duration / (1024 * 1024));
|
||||
tx_bw = sprint("%.2f", sbytes.bytes_sent / duration / 1024);
|
||||
rx_bw = sprint("%.2f", sbytes.bytes_received / duration / 1024);
|
||||
}
|
||||
sslog.info("[Stream #{}] bytes_sent = {}, bytes_received = {}, tx_bandwidth = {} MiB/s, rx_bandwidth = {} MiB/s",
|
||||
plan_id, sbytes.bytes_sent, sbytes.bytes_received, tx_bw, rx_bw);
|
||||
*stats = sprint("tx=%ld KiB, %s KiB/s, rx=%ld KiB, %s KiB/s", sbytes.bytes_sent / 1024, tx_bw, sbytes.bytes_received / 1024, rx_bw);
|
||||
}).handle_exception([plan_id] (auto ep) {
|
||||
sslog.warn("[Stream #{}] Fail to get progess on all shards: {}", plan_id, ep);
|
||||
}).finally([this, plan_id, &sm] {
|
||||
}).finally([this, plan_id, stats, &sm] () {
|
||||
sm.remove_stream(plan_id);
|
||||
auto final_state = get_current_state();
|
||||
if (final_state.has_failed_session()) {
|
||||
sslog.warn("[Stream #{}] Stream failed for streaming plan {}, peers={}", plan_id, description, _coordinator->get_peers());
|
||||
sslog.warn("[Stream #{}] Streaming plan for {} failed, peers={}, {}", plan_id, description, _coordinator->get_peers(), *stats);
|
||||
_done.set_exception(stream_exception(final_state, "Stream failed"));
|
||||
} else {
|
||||
sslog.info("[Stream #{}] All sessions completed for streaming plan {}, peers={}", plan_id, description, _coordinator->get_peers());
|
||||
sslog.info("[Stream #{}] Streaming plan for {} succeeded, peers={}, {}", plan_id, description, _coordinator->get_peers(), *stats);
|
||||
_done.set_value(final_state);
|
||||
}
|
||||
});
|
||||
|
||||
@@ -176,11 +176,20 @@ void stream_session::init_messaging_service_handler() {
|
||||
});
|
||||
});
|
||||
});
|
||||
ms().register_complete_message([] (const rpc::client_info& cinfo, UUID plan_id, unsigned dst_cpu_id) {
|
||||
ms().register_complete_message([] (const rpc::client_info& cinfo, UUID plan_id, unsigned dst_cpu_id, rpc::optional<bool> failed) {
|
||||
const auto& from = cinfo.retrieve_auxiliary<gms::inet_address>("baddr");
|
||||
// Be compatible with old version. Do nothing but return a ready future.
|
||||
sslog.debug("[Stream #{}] COMPLETE_MESSAGE from {} dst_cpu_id={}", plan_id, from, dst_cpu_id);
|
||||
return make_ready_future<>();
|
||||
if (failed && *failed) {
|
||||
return smp::submit_to(dst_cpu_id, [plan_id, from, dst_cpu_id] () {
|
||||
auto session = get_session(plan_id, from, "COMPLETE_MESSAGE");
|
||||
sslog.debug("[Stream #{}] COMPLETE_MESSAGE with error flag from {} dst_cpu_id={}", plan_id, from, dst_cpu_id);
|
||||
session->received_failed_complete_message();
|
||||
return make_ready_future<>();
|
||||
});
|
||||
} else {
|
||||
// Be compatible with old version. Do nothing but return a ready future.
|
||||
sslog.debug("[Stream #{}] COMPLETE_MESSAGE from {} dst_cpu_id={}", plan_id, from, dst_cpu_id);
|
||||
return make_ready_future<>();
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
@@ -227,7 +236,9 @@ future<> stream_session::on_initialization_complete() {
|
||||
for (auto& summary : msg.summaries) {
|
||||
this->prepare_receiving(summary);
|
||||
}
|
||||
_stream_result->handle_session_prepared(this->shared_from_this());
|
||||
if (_stream_result) {
|
||||
_stream_result->handle_session_prepared(this->shared_from_this());
|
||||
}
|
||||
} catch (...) {
|
||||
sslog.warn("[Stream #{}] Fail to send PREPARE_MESSAGE to {}, {}", this->plan_id(), id, std::current_exception());
|
||||
throw;
|
||||
@@ -248,9 +259,19 @@ future<> stream_session::on_initialization_complete() {
|
||||
});
|
||||
}
|
||||
|
||||
void stream_session::received_failed_complete_message() {
|
||||
sslog.info("[Stream #{}] Received failed complete message, peer={}", plan_id(), peer);
|
||||
_received_failed_complete_message = true;
|
||||
close_session(stream_session_state::FAILED);
|
||||
}
|
||||
|
||||
void stream_session::abort() {
|
||||
sslog.info("[Stream #{}] Aborted stream session={}, peer={}, is_initialized={}", plan_id(), this, peer, is_initialized());
|
||||
close_session(stream_session_state::FAILED);
|
||||
}
|
||||
|
||||
void stream_session::on_error() {
|
||||
sslog.warn("[Stream #{}] Streaming error occurred", plan_id());
|
||||
// fail session
|
||||
sslog.warn("[Stream #{}] Streaming error occurred, peer={}", plan_id(), peer);
|
||||
close_session(stream_session_state::FAILED);
|
||||
}
|
||||
|
||||
@@ -300,7 +321,9 @@ future<prepare_message> stream_session::prepare(std::vector<stream_request> requ
|
||||
}
|
||||
}
|
||||
prepare.dst_cpu_id = engine().cpu_id();;
|
||||
_stream_result->handle_session_prepared(shared_from_this());
|
||||
if (_stream_result) {
|
||||
_stream_result->handle_session_prepared(shared_from_this());
|
||||
}
|
||||
return make_ready_future<prepare_message>(std::move(prepare));
|
||||
}
|
||||
|
||||
@@ -309,10 +332,6 @@ void stream_session::follower_start_sent() {
|
||||
this->start_streaming_files();
|
||||
}
|
||||
|
||||
void stream_session::session_failed() {
|
||||
close_session(stream_session_state::FAILED);
|
||||
}
|
||||
|
||||
session_info stream_session::make_session_info() {
|
||||
std::vector<stream_summary> receiving_summaries;
|
||||
for (auto& receiver : _receivers) {
|
||||
@@ -339,28 +358,41 @@ void stream_session::transfer_task_completed(UUID cf_id) {
|
||||
maybe_completed();
|
||||
}
|
||||
|
||||
void stream_session::send_complete_message() {
|
||||
void stream_session::transfer_task_completed_all() {
|
||||
_transfers.clear();
|
||||
sslog.debug("[Stream #{}] transfer task_completed: all done, stream_receive_task.size={} stream_transfer_task.size={}",
|
||||
plan_id(), _receivers.size(), _transfers.size());
|
||||
maybe_completed();
|
||||
}
|
||||
|
||||
void stream_session::send_failed_complete_message() {
|
||||
if (!is_initialized()) {
|
||||
return;
|
||||
}
|
||||
auto plan_id = this->plan_id();
|
||||
if (_received_failed_complete_message) {
|
||||
sslog.debug("[Stream #{}] Skip sending failed message back to peer", plan_id);
|
||||
return;
|
||||
}
|
||||
if (!_complete_sent) {
|
||||
_complete_sent = true;
|
||||
} else {
|
||||
return;
|
||||
}
|
||||
auto id = msg_addr{this->peer, this->dst_cpu_id};
|
||||
auto plan_id = this->plan_id();
|
||||
sslog.debug("[Stream #{}] SEND COMPLETE_MESSAGE to {}", plan_id, id);
|
||||
auto session = shared_from_this();
|
||||
this->ms().send_complete_message(id, plan_id, this->dst_cpu_id).then([session, id, plan_id] {
|
||||
bool failed = true;
|
||||
this->ms().send_complete_message(id, plan_id, this->dst_cpu_id, failed).then([session, id, plan_id] {
|
||||
sslog.debug("[Stream #{}] GOT COMPLETE_MESSAGE Reply from {}", plan_id, id.addr);
|
||||
}).handle_exception([session, id, plan_id] (auto ep) {
|
||||
sslog.warn("[Stream #{}] COMPLETE_MESSAGE for {} has failed: {}", plan_id, id.addr, ep);
|
||||
session->on_error();
|
||||
sslog.debug("[Stream #{}] COMPLETE_MESSAGE for {} has failed: {}", plan_id, id.addr, ep);
|
||||
});
|
||||
}
|
||||
|
||||
bool stream_session::maybe_completed() {
|
||||
bool completed = _receivers.empty() && _transfers.empty();
|
||||
if (completed) {
|
||||
send_complete_message();
|
||||
sslog.debug("[Stream #{}] maybe_completed: {} -> COMPLETE: session={}, peer={}", plan_id(), _state, this, peer);
|
||||
close_session(stream_session_state::COMPLETE);
|
||||
}
|
||||
@@ -379,11 +411,15 @@ void stream_session::start_streaming_files() {
|
||||
if (!_transfers.empty()) {
|
||||
set_state(stream_session_state::STREAMING);
|
||||
}
|
||||
for (auto it = _transfers.begin(); it != _transfers.end();) {
|
||||
stream_transfer_task& task = it->second;
|
||||
it++;
|
||||
task.start();
|
||||
}
|
||||
do_for_each(_transfers.begin(), _transfers.end(), [this] (auto& item) {
|
||||
sslog.debug("[Stream #{}] Start to send cf_id={}", this->plan_id(), item.first);
|
||||
return item.second.execute();
|
||||
}).then([this] {
|
||||
this->transfer_task_completed_all();
|
||||
}).handle_exception([this] (auto ep) {
|
||||
sslog.warn("[Stream #{}] Failed to send: {}", this->plan_id(), ep);
|
||||
this->on_error();
|
||||
});
|
||||
}
|
||||
|
||||
std::vector<column_family*> stream_session::get_column_family_stores(const sstring& keyspace, const std::vector<sstring>& column_families) {
|
||||
@@ -460,12 +496,15 @@ void stream_session::close_session(stream_session_state final_state) {
|
||||
receiving_failed(x.first);
|
||||
task.abort();
|
||||
}
|
||||
send_failed_complete_message();
|
||||
}
|
||||
|
||||
// Note that we shouldn't block on this close because this method is called on the handler
|
||||
// incoming thread (so we would deadlock).
|
||||
//handler.close();
|
||||
_stream_result->handle_session_complete(shared_from_this());
|
||||
if (_stream_result) {
|
||||
_stream_result->handle_session_complete(shared_from_this());
|
||||
}
|
||||
|
||||
sslog.debug("[Stream #{}] close_session session={}, state={}, cancel keep_alive timer", plan_id(), this, final_state);
|
||||
_keep_alive.cancel();
|
||||
@@ -480,15 +519,19 @@ void stream_session::start() {
|
||||
}
|
||||
auto connecting = netw::get_local_messaging_service().get_preferred_ip(peer);
|
||||
if (peer == connecting) {
|
||||
sslog.info("[Stream #{}] Starting streaming to {}", plan_id(), peer);
|
||||
sslog.debug("[Stream #{}] Starting streaming to {}", plan_id(), peer);
|
||||
} else {
|
||||
sslog.info("[Stream #{}] Starting streaming to {} through {}", plan_id(), peer, connecting);
|
||||
sslog.debug("[Stream #{}] Starting streaming to {} through {}", plan_id(), peer, connecting);
|
||||
}
|
||||
on_initialization_complete().handle_exception([this] (auto ep) {
|
||||
this->on_error();
|
||||
});
|
||||
}
|
||||
|
||||
bool stream_session::is_initialized() const {
|
||||
return bool(_stream_result);
|
||||
}
|
||||
|
||||
void stream_session::init(shared_ptr<stream_result_future> stream_result_) {
|
||||
_stream_result = stream_result_;
|
||||
_keep_alive.set_callback([this] {
|
||||
|
||||
@@ -151,7 +151,7 @@ public:
|
||||
* Each {@code StreamSession} is identified by this InetAddress which is broadcast address of the node streaming.
|
||||
*/
|
||||
inet_address peer;
|
||||
unsigned dst_cpu_id;
|
||||
unsigned dst_cpu_id = 0;
|
||||
private:
|
||||
// should not be null when session is started
|
||||
shared_ptr<stream_result_future> _stream_result;
|
||||
@@ -174,11 +174,12 @@ private:
|
||||
|
||||
stream_session_state _state = stream_session_state::INITIALIZED;
|
||||
bool _complete_sent = false;
|
||||
bool _received_failed_complete_message = false;
|
||||
|
||||
// If the session is idle for 300 minutes, close the session
|
||||
std::chrono::seconds _keep_alive_timeout{60 * 300};
|
||||
// Check every 10 minutes
|
||||
std::chrono::seconds _keep_alive_interval{60 * 10};
|
||||
// If the session is idle for 10 minutes, close the session
|
||||
std::chrono::seconds _keep_alive_timeout{60 * 10};
|
||||
// Check every 1 minutes
|
||||
std::chrono::seconds _keep_alive_interval{60};
|
||||
timer<lowres_clock> _keep_alive;
|
||||
stream_bytes _last_stream_bytes;
|
||||
lowres_clock::time_point _last_stream_progress;
|
||||
@@ -231,6 +232,8 @@ public:
|
||||
|
||||
void start();
|
||||
|
||||
bool is_initialized() const;
|
||||
|
||||
/**
|
||||
* Request data fetch task to this session.
|
||||
*
|
||||
@@ -299,6 +302,10 @@ public:
|
||||
*/
|
||||
void on_error();
|
||||
|
||||
void abort();
|
||||
|
||||
void received_failed_complete_message();
|
||||
|
||||
/**
|
||||
* Prepare this session for sending/receiving files.
|
||||
*/
|
||||
@@ -311,11 +318,6 @@ public:
|
||||
*/
|
||||
void complete();
|
||||
|
||||
/**
|
||||
* Call back on receiving {@code StreamMessage.Type.SESSION_FAILED} message.
|
||||
*/
|
||||
void session_failed();
|
||||
|
||||
/**
|
||||
* @return Current snapshot of this session info.
|
||||
*/
|
||||
@@ -333,8 +335,9 @@ public:
|
||||
|
||||
void receive_task_completed(UUID cf_id);
|
||||
void transfer_task_completed(UUID cf_id);
|
||||
void transfer_task_completed_all();
|
||||
private:
|
||||
void send_complete_message();
|
||||
void send_failed_complete_message();
|
||||
bool maybe_completed();
|
||||
void prepare_receiving(stream_summary& summary);
|
||||
void start_streaming_files();
|
||||
|
||||
@@ -134,7 +134,7 @@ future<> send_mutations(lw_shared_ptr<send_info> si) {
|
||||
});
|
||||
}
|
||||
|
||||
void stream_transfer_task::start() {
|
||||
future<> stream_transfer_task::execute() {
|
||||
auto plan_id = session->plan_id();
|
||||
auto cf_id = this->cf_id;
|
||||
auto dst_cpu_id = session->dst_cpu_id;
|
||||
@@ -143,7 +143,7 @@ void stream_transfer_task::start() {
|
||||
sslog.debug("[Stream #{}] stream_transfer_task: cf_id={}", plan_id, cf_id);
|
||||
sort_and_merge_ranges();
|
||||
_shard_ranges = dht::split_ranges_to_shards(_ranges, *schema);
|
||||
parallel_for_each(_shard_ranges, [this, dst_cpu_id, plan_id, cf_id, id] (auto& item) {
|
||||
return parallel_for_each(_shard_ranges, [this, dst_cpu_id, plan_id, cf_id, id] (auto& item) {
|
||||
auto& shard = item.first;
|
||||
auto& prs = item.second;
|
||||
return session->get_db().invoke_on(shard, [plan_id, cf_id, id, dst_cpu_id, prs = std::move(prs)] (database& db) mutable {
|
||||
@@ -160,10 +160,9 @@ void stream_transfer_task::start() {
|
||||
}).then([this, id, plan_id, cf_id] {
|
||||
sslog.debug("[Stream #{}] GOT STREAM_MUTATION_DONE Reply from {}", plan_id, id.addr);
|
||||
session->start_keep_alive_timer();
|
||||
session->transfer_task_completed(cf_id);
|
||||
}).handle_exception([this, plan_id, id] (auto ep){
|
||||
sslog.warn("[Stream #{}] stream_transfer_task: Fail to send to {}: {}", plan_id, id, ep);
|
||||
this->session->on_error();
|
||||
std::rethrow_exception(ep);
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
@@ -78,7 +78,7 @@ public:
|
||||
return _total_size;
|
||||
}
|
||||
|
||||
void start();
|
||||
future<> execute();
|
||||
|
||||
void append_ranges(const dht::token_range_vector& ranges);
|
||||
void sort_and_merge_ranges();
|
||||
|
||||
9
test.py
9
test.py
@@ -81,7 +81,7 @@ boost_tests = [
|
||||
'virtual_reader_test',
|
||||
'counter_test',
|
||||
'cell_locker_test',
|
||||
'clustering_ranges_walker_test',
|
||||
'view_schema_test',
|
||||
]
|
||||
|
||||
other_tests = [
|
||||
@@ -135,9 +135,9 @@ if __name__ == "__main__":
|
||||
for mode in modes_to_run:
|
||||
prefix = os.path.join('build', mode, 'tests')
|
||||
for test in other_tests:
|
||||
test_to_run.append((os.path.join(prefix, test), 'other'))
|
||||
test_to_run.append((os.path.join(prefix, test), 'other', '-c2 -m4G'.split()))
|
||||
for test in boost_tests:
|
||||
test_to_run.append((os.path.join(prefix, test), 'boost'))
|
||||
test_to_run.append((os.path.join(prefix, test), 'boost', '-c2 -m4G'.split()))
|
||||
|
||||
if 'release' in modes_to_run:
|
||||
test_to_run.append(('build/release/tests/lsa_async_eviction_test', 'other',
|
||||
@@ -151,11 +151,9 @@ if __name__ == "__main__":
|
||||
test_to_run.append(('build/release/tests/row_cache_alloc_stress', 'other',
|
||||
'-c1 -m1G'.split()))
|
||||
test_to_run.append(('build/release/tests/sstable_test', 'boost', ['-c1']))
|
||||
test_to_run.append(('build/release/tests/view_schema_test', 'boost', ['-c1']))
|
||||
test_to_run.append(('build/release/tests/row_cache_stress_test', 'other', '-c1 -m1G --seconds 10'.split()))
|
||||
if 'debug' in modes_to_run:
|
||||
test_to_run.append(('build/debug/tests/sstable_test', 'boost', ['-c1']))
|
||||
test_to_run.append(('build/debug/tests/view_schema_test', 'boost', ['-c1']))
|
||||
|
||||
if args.name:
|
||||
test_to_run = [t for t in test_to_run if args.name in t[0]]
|
||||
@@ -167,6 +165,7 @@ if __name__ == "__main__":
|
||||
# disable false positive due to new (with_alignment(...)) ...
|
||||
env['ASAN_OPTIONS'] = 'alloc_dealloc_mismatch=0'
|
||||
env['UBSAN_OPTIONS'] = 'print_stacktrace=1'
|
||||
env['BOOST_TEST_CATCH_SYSTEM_ERRORS'] = 'no'
|
||||
for n, test in enumerate(test_to_run):
|
||||
path = test[0]
|
||||
exec_args = test[2] if len(test) >= 3 else []
|
||||
|
||||
@@ -120,7 +120,7 @@ public:
|
||||
});
|
||||
}
|
||||
|
||||
virtual future<bytes> prepare(sstring query) override {
|
||||
virtual future<cql3::prepared_cache_key_type> prepare(sstring query) override {
|
||||
return qp().invoke_on_all([query, this] (auto& local_qp) {
|
||||
auto qs = this->make_query_state();
|
||||
return local_qp.prepare(query, *qs).finally([qs] {}).discard_result();
|
||||
@@ -130,7 +130,7 @@ public:
|
||||
}
|
||||
|
||||
virtual future<::shared_ptr<cql_transport::messages::result_message>> execute_prepared(
|
||||
bytes id,
|
||||
cql3::prepared_cache_key_type id,
|
||||
std::vector<cql3::raw_value> values) override
|
||||
{
|
||||
auto prepared = local_qp().get_prepared(id);
|
||||
|
||||
@@ -32,6 +32,7 @@
|
||||
#include "transport/messages/result_message_base.hh"
|
||||
#include "cql3/query_options_fwd.hh"
|
||||
#include "cql3/values.hh"
|
||||
#include "cql3/prepared_statements_cache.hh"
|
||||
#include "bytes.hh"
|
||||
#include "schema.hh"
|
||||
|
||||
@@ -43,7 +44,7 @@ namespace cql3 {
|
||||
|
||||
class not_prepared_exception : public std::runtime_error {
|
||||
public:
|
||||
not_prepared_exception(const bytes& id) : std::runtime_error(sprint("Not prepared: %s", id)) {}
|
||||
not_prepared_exception(const cql3::prepared_cache_key_type& id) : std::runtime_error(sprint("Not prepared: %s", id)) {}
|
||||
};
|
||||
|
||||
namespace db {
|
||||
@@ -59,10 +60,10 @@ public:
|
||||
virtual future<::shared_ptr<cql_transport::messages::result_message>> execute_cql(
|
||||
const sstring& text, std::unique_ptr<cql3::query_options> qo) = 0;
|
||||
|
||||
virtual future<bytes> prepare(sstring query) = 0;
|
||||
virtual future<cql3::prepared_cache_key_type> prepare(sstring query) = 0;
|
||||
|
||||
virtual future<::shared_ptr<cql_transport::messages::result_message>> execute_prepared(
|
||||
bytes id, std::vector<cql3::raw_value> values) = 0;
|
||||
cql3::prepared_cache_key_type id, std::vector<cql3::raw_value> values) = 0;
|
||||
|
||||
virtual future<> create_table(std::function<schema(const sstring&)> schema_maker) = 0;
|
||||
|
||||
|
||||
321
tests/loading_cache_test.cc
Normal file
321
tests/loading_cache_test.cc
Normal file
@@ -0,0 +1,321 @@
|
||||
/*
|
||||
* Copyright (C) 2017 ScyllaDB
|
||||
*/
|
||||
|
||||
/*
|
||||
* This file is part of Scylla.
|
||||
*
|
||||
* Scylla is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Scylla is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with Scylla. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <boost/test/unit_test.hpp>
|
||||
#include "utils/loading_shared_values.hh"
|
||||
#include "utils/loading_cache.hh"
|
||||
#include <seastar/core/file.hh>
|
||||
#include <seastar/core/thread.hh>
|
||||
#include <seastar/core/sstring.hh>
|
||||
#include <seastar/core/reactor.hh>
|
||||
#include <seastar/core/sleep.hh>
|
||||
|
||||
|
||||
#include "seastarx.hh"
|
||||
|
||||
#include "tests/test-utils.hh"
|
||||
#include "tmpdir.hh"
|
||||
#include "log.hh"
|
||||
|
||||
#include <vector>
|
||||
#include <numeric>
|
||||
#include <random>
|
||||
|
||||
/// Get a random integer in the [0, max) range.
|
||||
/// \param upper bound of the random value range
|
||||
/// \return The uniformly distributed random integer from the [0, \ref max) range.
|
||||
static int rand_int(int max) {
|
||||
std::random_device rd; // only used once to initialise (seed) engine
|
||||
std::mt19937 rng(rd()); // random-number engine used (Mersenne-Twister in this case)
|
||||
std::uniform_int_distribution<int> uni(0, max - 1); // guaranteed unbiased
|
||||
return uni(rng);
|
||||
}
|
||||
|
||||
|
||||
#include "disk-error-handler.hh"
|
||||
|
||||
thread_local disk_error_signal_type general_disk_error;
|
||||
thread_local disk_error_signal_type commit_error;
|
||||
|
||||
static const sstring test_file_name = "loading_cache_test.txt";
|
||||
static const sstring test_string = "1";
|
||||
static bool file_prepared = false;
|
||||
static constexpr int num_loaders = 1000;
|
||||
|
||||
static logging::logger test_logger("loading_cache_test");
|
||||
|
||||
static thread_local int load_count;
|
||||
static const tmpdir& get_tmpdir() {
|
||||
static thread_local tmpdir tmp;
|
||||
return tmp;
|
||||
}
|
||||
|
||||
static future<> prepare() {
|
||||
if (file_prepared) {
|
||||
return make_ready_future<>();
|
||||
}
|
||||
|
||||
return open_file_dma((boost::filesystem::path(get_tmpdir().path) / test_file_name.c_str()).c_str(), open_flags::create | open_flags::wo).then([] (file f) {
|
||||
return do_with(std::move(f), [] (file& f) {
|
||||
return f.dma_write(0, test_string.c_str(), test_string.size() + 1).then([] (size_t s) {
|
||||
BOOST_REQUIRE_EQUAL(s, test_string.size() + 1);
|
||||
file_prepared = true;
|
||||
});
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
static future<sstring> loader(const int& k) {
|
||||
return open_file_dma((boost::filesystem::path(get_tmpdir().path) / test_file_name.c_str()).c_str(), open_flags::ro).then([] (file f) -> future<sstring> {
|
||||
return do_with(std::move(f), [] (file& f) -> future<sstring> {
|
||||
return f.dma_read_exactly<char>(0, test_string.size() + 1).then([] (auto buf) {
|
||||
sstring str(buf.get());
|
||||
BOOST_REQUIRE_EQUAL(str, test_string);
|
||||
++load_count;
|
||||
return make_ready_future<sstring>(std::move(str));
|
||||
});
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
SEASTAR_TEST_CASE(test_loading_shared_values_parallel_loading_same_key) {
|
||||
return seastar::async([] {
|
||||
std::vector<int> ivec(num_loaders);
|
||||
load_count = 0;
|
||||
utils::loading_shared_values<int, sstring> shared_values;
|
||||
std::list<typename utils::loading_shared_values<int, sstring>::entry_ptr> anchors_list;
|
||||
|
||||
prepare().get();
|
||||
|
||||
std::fill(ivec.begin(), ivec.end(), 0);
|
||||
|
||||
parallel_for_each(ivec, [&] (int& k) {
|
||||
return shared_values.get_or_load(k, loader).then([&] (auto entry_ptr) {
|
||||
anchors_list.emplace_back(std::move(entry_ptr));
|
||||
});
|
||||
}).get();
|
||||
|
||||
// "loader" must be called exactly once
|
||||
BOOST_REQUIRE_EQUAL(load_count, 1);
|
||||
BOOST_REQUIRE_EQUAL(shared_values.size(), 1);
|
||||
anchors_list.clear();
|
||||
});
|
||||
}
|
||||
|
||||
SEASTAR_TEST_CASE(test_loading_shared_values_parallel_loading_different_keys) {
|
||||
return seastar::async([] {
|
||||
std::vector<int> ivec(num_loaders);
|
||||
load_count = 0;
|
||||
utils::loading_shared_values<int, sstring> shared_values;
|
||||
std::list<typename utils::loading_shared_values<int, sstring>::entry_ptr> anchors_list;
|
||||
|
||||
prepare().get();
|
||||
|
||||
std::iota(ivec.begin(), ivec.end(), 0);
|
||||
|
||||
parallel_for_each(ivec, [&] (int& k) {
|
||||
return shared_values.get_or_load(k, loader).then([&] (auto entry_ptr) {
|
||||
anchors_list.emplace_back(std::move(entry_ptr));
|
||||
});
|
||||
}).get();
|
||||
|
||||
// "loader" must be called once for each key
|
||||
BOOST_REQUIRE_EQUAL(load_count, num_loaders);
|
||||
BOOST_REQUIRE_EQUAL(shared_values.size(), num_loaders);
|
||||
anchors_list.clear();
|
||||
});
|
||||
}
|
||||
|
||||
SEASTAR_TEST_CASE(test_loading_shared_values_rehash) {
|
||||
return seastar::async([] {
|
||||
std::vector<int> ivec(num_loaders);
|
||||
load_count = 0;
|
||||
utils::loading_shared_values<int, sstring> shared_values;
|
||||
std::list<typename utils::loading_shared_values<int, sstring>::entry_ptr> anchors_list;
|
||||
|
||||
prepare().get();
|
||||
|
||||
std::iota(ivec.begin(), ivec.end(), 0);
|
||||
|
||||
// verify that load factor is always in the (0.25, 0.75) range
|
||||
for (int k = 0; k < num_loaders; ++k) {
|
||||
shared_values.get_or_load(k, loader).then([&] (auto entry_ptr) {
|
||||
anchors_list.emplace_back(std::move(entry_ptr));
|
||||
}).get();
|
||||
BOOST_REQUIRE_LE(shared_values.size(), 3 * shared_values.buckets_count() / 4);
|
||||
}
|
||||
|
||||
BOOST_REQUIRE_GE(shared_values.size(), shared_values.buckets_count() / 4);
|
||||
|
||||
// minimum buckets count (by default) is 16, so don't check for less than 4 elements
|
||||
for (int k = 0; k < num_loaders - 4; ++k) {
|
||||
anchors_list.pop_back();
|
||||
shared_values.rehash();
|
||||
BOOST_REQUIRE_GE(shared_values.size(), shared_values.buckets_count() / 4);
|
||||
}
|
||||
|
||||
anchors_list.clear();
|
||||
});
|
||||
}
|
||||
|
||||
SEASTAR_TEST_CASE(test_loading_shared_values_parallel_loading_explicit_eviction) {
|
||||
return seastar::async([] {
|
||||
std::vector<int> ivec(num_loaders);
|
||||
load_count = 0;
|
||||
utils::loading_shared_values<int, sstring> shared_values;
|
||||
std::vector<typename utils::loading_shared_values<int, sstring>::entry_ptr> anchors_vec(num_loaders);
|
||||
|
||||
prepare().get();
|
||||
|
||||
std::iota(ivec.begin(), ivec.end(), 0);
|
||||
|
||||
parallel_for_each(ivec, [&] (int& k) {
|
||||
return shared_values.get_or_load(k, loader).then([&] (auto entry_ptr) {
|
||||
anchors_vec[k] = std::move(entry_ptr);
|
||||
});
|
||||
}).get();
|
||||
|
||||
int rand_key = rand_int(num_loaders);
|
||||
BOOST_REQUIRE(shared_values.find(rand_key) != shared_values.end());
|
||||
anchors_vec[rand_key] = nullptr;
|
||||
BOOST_REQUIRE_MESSAGE(shared_values.find(rand_key) == shared_values.end(), format("explicit removal for key {} failed", rand_key));
|
||||
anchors_vec.clear();
|
||||
});
|
||||
}
|
||||
|
||||
SEASTAR_TEST_CASE(test_loading_cache_loading_same_key) {
|
||||
return seastar::async([] {
|
||||
using namespace std::chrono;
|
||||
std::vector<int> ivec(num_loaders);
|
||||
load_count = 0;
|
||||
utils::loading_cache<int, sstring> loading_cache(num_loaders, 1s, test_logger);
|
||||
|
||||
prepare().get();
|
||||
|
||||
std::fill(ivec.begin(), ivec.end(), 0);
|
||||
|
||||
parallel_for_each(ivec, [&] (int& k) {
|
||||
return loading_cache.get_ptr(k, loader).discard_result();
|
||||
}).get();
|
||||
|
||||
// "loader" must be called exactly once
|
||||
BOOST_REQUIRE_EQUAL(load_count, 1);
|
||||
BOOST_REQUIRE_EQUAL(loading_cache.size(), 1);
|
||||
loading_cache.stop().get();
|
||||
});
|
||||
}
|
||||
|
||||
SEASTAR_TEST_CASE(test_loading_cache_loading_different_keys) {
|
||||
return seastar::async([] {
|
||||
using namespace std::chrono;
|
||||
std::vector<int> ivec(num_loaders);
|
||||
load_count = 0;
|
||||
utils::loading_cache<int, sstring> loading_cache(num_loaders, 1s, test_logger);
|
||||
|
||||
prepare().get();
|
||||
|
||||
std::iota(ivec.begin(), ivec.end(), 0);
|
||||
|
||||
parallel_for_each(ivec, [&] (int& k) {
|
||||
return loading_cache.get_ptr(k, loader).discard_result();
|
||||
}).get();
|
||||
|
||||
BOOST_REQUIRE_EQUAL(load_count, num_loaders);
|
||||
BOOST_REQUIRE_EQUAL(loading_cache.size(), num_loaders);
|
||||
loading_cache.stop().get();
|
||||
});
|
||||
}
|
||||
|
||||
SEASTAR_TEST_CASE(test_loading_cache_loading_expiry_eviction) {
|
||||
return seastar::async([] {
|
||||
using namespace std::chrono;
|
||||
utils::loading_cache<int, sstring> loading_cache(num_loaders, 20ms, test_logger);
|
||||
|
||||
prepare().get();
|
||||
|
||||
loading_cache.get_ptr(0, loader).discard_result().get();
|
||||
|
||||
BOOST_REQUIRE(loading_cache.find(0) != loading_cache.end());
|
||||
|
||||
// timers get delayed sometimes (especially in a debug mode)
|
||||
constexpr int max_retry = 10;
|
||||
int i = 0;
|
||||
do_until(
|
||||
[&] { return i++ > max_retry || loading_cache.find(0) == loading_cache.end(); },
|
||||
[] { return sleep(40ms); }
|
||||
).get();
|
||||
BOOST_REQUIRE(loading_cache.find(0) == loading_cache.end());
|
||||
loading_cache.stop().get();
|
||||
});
|
||||
}
|
||||
|
||||
SEASTAR_TEST_CASE(test_loading_cache_loading_reloading) {
|
||||
return seastar::async([] {
|
||||
using namespace std::chrono;
|
||||
load_count = 0;
|
||||
utils::loading_cache<int, sstring, utils::loading_cache_reload_enabled::yes> loading_cache(num_loaders, 100ms, 20ms, test_logger, loader);
|
||||
prepare().get();
|
||||
loading_cache.get_ptr(0, loader).discard_result().get();
|
||||
sleep(60ms).get();
|
||||
BOOST_REQUIRE_MESSAGE(load_count >= 2, format("load_count is {}", load_count));
|
||||
loading_cache.stop().get();
|
||||
});
|
||||
}
|
||||
|
||||
SEASTAR_TEST_CASE(test_loading_cache_max_size_eviction) {
|
||||
return seastar::async([] {
|
||||
using namespace std::chrono;
|
||||
load_count = 0;
|
||||
utils::loading_cache<int, sstring> loading_cache(1, 1s, test_logger);
|
||||
|
||||
prepare().get();
|
||||
|
||||
for (int i = 0; i < num_loaders; ++i) {
|
||||
loading_cache.get_ptr(i % 2, loader).discard_result().get();
|
||||
}
|
||||
|
||||
BOOST_REQUIRE_EQUAL(load_count, num_loaders);
|
||||
BOOST_REQUIRE_EQUAL(loading_cache.size(), 1);
|
||||
loading_cache.stop().get();
|
||||
});
|
||||
}
|
||||
|
||||
SEASTAR_TEST_CASE(test_loading_cache_reload_during_eviction) {
|
||||
return seastar::async([] {
|
||||
using namespace std::chrono;
|
||||
load_count = 0;
|
||||
utils::loading_cache<int, sstring, utils::loading_cache_reload_enabled::yes> loading_cache(1, 100ms, 10ms, test_logger, loader);
|
||||
|
||||
prepare().get();
|
||||
|
||||
auto curr_time = lowres_clock::now();
|
||||
int i = 0;
|
||||
|
||||
// this will cause reloading when values are being actively evicted due to the limited cache size
|
||||
do_until(
|
||||
[&] { return lowres_clock::now() - curr_time > 1s; },
|
||||
[&] { return loading_cache.get_ptr(i++ % 2).discard_result(); }
|
||||
).get();
|
||||
|
||||
BOOST_REQUIRE_EQUAL(loading_cache.size(), 1);
|
||||
loading_cache.stop().get();
|
||||
});
|
||||
}
|
||||
@@ -1194,3 +1194,39 @@ SEASTAR_TEST_CASE(test_reclaiming_runs_as_long_as_there_is_soft_pressure) {
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
SEASTAR_TEST_CASE(test_zone_reclaiming_preserves_free_size) {
|
||||
return seastar::async([] {
|
||||
region r;
|
||||
with_allocator(r.allocator(), [&] {
|
||||
chunked_fifo<managed_bytes> objs;
|
||||
|
||||
auto zone_size = max_zone_segments * segment_size;
|
||||
|
||||
// We need to generate 3 zones, so that at least one zone (not last) can be released fully. The first
|
||||
// zone would not due to emergency reserve.
|
||||
while (logalloc::shard_tracker().region_occupancy().used_space() < zone_size * 2 + zone_size / 4) {
|
||||
objs.emplace_back(managed_bytes(managed_bytes::initialized_later(), 1024));
|
||||
}
|
||||
|
||||
BOOST_TEST_MESSAGE(logalloc::shard_tracker().non_lsa_used_space());
|
||||
BOOST_TEST_MESSAGE(logalloc::shard_tracker().region_occupancy());
|
||||
|
||||
while (logalloc::shard_tracker().region_occupancy().used_space() >= logalloc::segment_size * 2) {
|
||||
objs.pop_front();
|
||||
}
|
||||
|
||||
BOOST_TEST_MESSAGE(logalloc::shard_tracker().non_lsa_used_space());
|
||||
BOOST_TEST_MESSAGE(logalloc::shard_tracker().region_occupancy());
|
||||
|
||||
auto before = logalloc::shard_tracker().non_lsa_used_space();
|
||||
logalloc::shard_tracker().reclaim(logalloc::segment_size);
|
||||
auto after = logalloc::shard_tracker().non_lsa_used_space();
|
||||
|
||||
BOOST_TEST_MESSAGE(logalloc::shard_tracker().non_lsa_used_space());
|
||||
BOOST_TEST_MESSAGE(logalloc::shard_tracker().region_occupancy());
|
||||
|
||||
BOOST_REQUIRE(after <= before);
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
@@ -259,8 +259,9 @@ static void test_fast_forwarding_across_partitions_to_empty_range(populate_fn po
|
||||
|
||||
mutation_source ms = populate(s, partitions);
|
||||
|
||||
auto pr = dht::partition_range::make({keys[0]}, {keys[1]});
|
||||
mutation_reader rd = ms(s,
|
||||
dht::partition_range::make({keys[0]}, {keys[1]}),
|
||||
pr,
|
||||
query::full_slice,
|
||||
default_priority_class(),
|
||||
nullptr,
|
||||
@@ -280,14 +281,16 @@ static void test_fast_forwarding_across_partitions_to_empty_range(populate_fn po
|
||||
// ...don't finish consumption to leave the reader in the middle of partition
|
||||
}
|
||||
|
||||
rd.fast_forward_to(dht::partition_range::make({missing_key}, {missing_key})).get();
|
||||
pr = dht::partition_range::make({missing_key}, {missing_key});
|
||||
rd.fast_forward_to(pr).get();
|
||||
|
||||
{
|
||||
streamed_mutation_opt smo = rd().get0();
|
||||
BOOST_REQUIRE(!smo);
|
||||
}
|
||||
|
||||
rd.fast_forward_to(dht::partition_range::make({keys[3]}, {keys[3]})).get();
|
||||
pr = dht::partition_range::make({keys[3]}, {keys[3]});
|
||||
rd.fast_forward_to(pr).get();
|
||||
|
||||
{
|
||||
streamed_mutation_opt smo = rd().get0();
|
||||
@@ -303,7 +306,8 @@ static void test_fast_forwarding_across_partitions_to_empty_range(populate_fn po
|
||||
BOOST_REQUIRE(!smo);
|
||||
}
|
||||
|
||||
rd.fast_forward_to(dht::partition_range::make_starting_with({keys[keys.size() - 1]})).get();
|
||||
pr = dht::partition_range::make_starting_with({keys[keys.size() - 1]});
|
||||
rd.fast_forward_to(pr).get();
|
||||
|
||||
{
|
||||
streamed_mutation_opt smo = rd().get0();
|
||||
@@ -314,7 +318,8 @@ static void test_fast_forwarding_across_partitions_to_empty_range(populate_fn po
|
||||
// ...don't finish consumption to leave the reader in the middle of partition
|
||||
}
|
||||
|
||||
rd.fast_forward_to(dht::partition_range::make({key_after_all}, {key_after_all})).get();
|
||||
pr = dht::partition_range::make({key_after_all}, {key_after_all});
|
||||
rd.fast_forward_to(pr).get();
|
||||
|
||||
{
|
||||
streamed_mutation_opt smo = rd().get0();
|
||||
@@ -1274,7 +1279,7 @@ public:
|
||||
set_random_cells(row.cells(), column_kind::regular_column);
|
||||
row.marker() = random_row_marker();
|
||||
} else {
|
||||
m.partition().clustered_row(*_schema, ckey, is_dummy::yes, continuous);
|
||||
m.partition().clustered_row(*_schema, position_in_partition::after_all_clustered_rows(), is_dummy::yes, continuous);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -592,6 +592,24 @@ BOOST_AUTO_TEST_CASE(test_add_overlapping_range_to_range_with_empty_end) {
|
||||
BOOST_REQUIRE(it == l.end());
|
||||
}
|
||||
|
||||
// Reproduces https://github.com/scylladb/scylla/issues/3083
|
||||
BOOST_AUTO_TEST_CASE(test_coalescing_with_end_bound_inclusiveness_change_with_prefix_bound) {
|
||||
range_tombstone_list l(*s);
|
||||
|
||||
auto rt1 = rtie(4, 8, 4);
|
||||
auto rt2 = range_tombstone(key({8, 1}), bound_kind::incl_start, key({10}), bound_kind::excl_end, {1, gc_now});
|
||||
|
||||
l.apply(*s, rt1);
|
||||
l.apply(*s, rt2);
|
||||
|
||||
l.apply(*s, rt(1, 5, 4));
|
||||
|
||||
auto it = l.begin();
|
||||
assert_rt(rtie(1, 8, 4), *it++);
|
||||
assert_rt(rt2, *it++);
|
||||
BOOST_REQUIRE(it == l.end());
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(test_search_with_empty_start) {
|
||||
range_tombstone_list l(*s);
|
||||
|
||||
|
||||
@@ -1886,3 +1886,47 @@ SEASTAR_TEST_CASE(test_concurrent_population_before_latest_version_iterator) {
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
SEASTAR_TEST_CASE(test_tombstone_merging_of_overlapping_tombstones_in_many_versions) {
|
||||
return seastar::async([] {
|
||||
simple_schema s;
|
||||
cache_tracker tracker;
|
||||
memtable_snapshot_source underlying(s.schema());
|
||||
|
||||
auto pk = s.make_pkey(0);
|
||||
auto pr = dht::partition_range::make_singular(pk);
|
||||
|
||||
mutation m1(pk, s.schema());
|
||||
m1.partition().apply_delete(*s.schema(),
|
||||
s.make_range_tombstone(s.make_ckey_range(2, 107), s.new_tombstone()));
|
||||
s.add_row(m1, s.make_ckey(5), "val");
|
||||
|
||||
// What is important here is that it contains a newer range tombstone
|
||||
// which trims [2, 107] from m1 into (100, 107], which starts after ck=5.
|
||||
mutation m2(pk, s.schema());
|
||||
m2.partition().apply_delete(*s.schema(),
|
||||
s.make_range_tombstone(s.make_ckey_range(1, 100), s.new_tombstone()));
|
||||
|
||||
row_cache cache(s.schema(), snapshot_source([&] { return underlying(); }), tracker);
|
||||
|
||||
auto make_sm = [&] {
|
||||
auto rd = cache.make_reader(s.schema());
|
||||
auto smo = rd().get0();
|
||||
BOOST_REQUIRE(smo);
|
||||
streamed_mutation& sm = *smo;
|
||||
sm.set_max_buffer_size(1);
|
||||
return std::move(sm);
|
||||
};
|
||||
|
||||
apply(cache, underlying, m1);
|
||||
populate_range(cache, pr, s.make_ckey_range(0, 3));
|
||||
|
||||
auto sm1 = make_sm();
|
||||
|
||||
apply(cache, underlying, m2);
|
||||
|
||||
assert_that(cache.make_reader(s.schema()))
|
||||
.produces(m1 + m2)
|
||||
.produces_end_of_stream();
|
||||
});
|
||||
}
|
||||
|
||||
@@ -408,7 +408,7 @@ SEASTAR_TEST_CASE(test_prepared_statement_is_invalidated_by_schema_change) {
|
||||
logging::logger_registry().set_logger_level("query_processor", logging::log_level::debug);
|
||||
e.execute_cql("create keyspace tests with replication = { 'class' : 'SimpleStrategy', 'replication_factor' : 1 };").get();
|
||||
e.execute_cql("create table tests.table1 (pk int primary key, c1 int, c2 int);").get();
|
||||
bytes id = e.prepare("select * from tests.table1;").get0();
|
||||
auto id = e.prepare("select * from tests.table1;").get0();
|
||||
|
||||
e.execute_cql("alter table tests.table1 add s1 int;").get();
|
||||
|
||||
|
||||
@@ -43,12 +43,16 @@ public:
|
||||
api::timestamp_type new_timestamp() {
|
||||
return _timestamp++;
|
||||
}
|
||||
tombstone new_tombstone() {
|
||||
return {new_timestamp(), gc_clock::now()};
|
||||
}
|
||||
public:
|
||||
simple_schema()
|
||||
using with_static = bool_class<class static_tag>;
|
||||
simple_schema(with_static ws = with_static::yes)
|
||||
: _s(schema_builder("ks", "cf")
|
||||
.with_column("pk", utf8_type, column_kind::partition_key)
|
||||
.with_column("ck", utf8_type, column_kind::clustering_key)
|
||||
.with_column("s1", utf8_type, column_kind::static_column)
|
||||
.with_column("s1", utf8_type, ws ? column_kind::static_column : column_kind::regular_column)
|
||||
.with_column("v", utf8_type)
|
||||
.build())
|
||||
, _v_def(*_s->get_column_definition(to_bytes("v")))
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1002,7 +1002,7 @@ public:
|
||||
|
||||
void execute_prepared_cql3_query(tcxx::function<void(CqlResult const& _return)> cob, tcxx::function<void(::apache::thrift::TDelayedException* _throw)> exn_cob, const int32_t itemId, const std::vector<std::string> & values, const ConsistencyLevel::type consistency) {
|
||||
with_exn_cob(std::move(exn_cob), [&] {
|
||||
auto prepared = _query_processor.local().get_prepared_for_thrift(itemId);
|
||||
auto prepared = _query_processor.local().get_prepared(cql3::prepared_cache_key_type(itemId));
|
||||
if (!prepared) {
|
||||
throw make_exception<InvalidRequestException>("Prepared query with id %d not found", itemId);
|
||||
}
|
||||
|
||||
103
thrift/server.cc
103
thrift/server.cc
@@ -50,6 +50,8 @@ using namespace apache::thrift::protocol;
|
||||
using namespace apache::thrift::async;
|
||||
using namespace ::cassandra;
|
||||
|
||||
using namespace std::chrono_literals;
|
||||
|
||||
class thrift_stats {
|
||||
seastar::metrics::metric_groups _metrics;
|
||||
public:
|
||||
@@ -68,8 +70,10 @@ thrift_server::~thrift_server() {
|
||||
}
|
||||
|
||||
future<> thrift_server::stop() {
|
||||
auto f = _stop_gate.close();
|
||||
std::for_each(_listeners.begin(), _listeners.end(), std::mem_fn(&server_socket::abort_accept));
|
||||
std::for_each(_connections_list.begin(), _connections_list.end(), std::mem_fn(&connection::shutdown));
|
||||
return make_ready_future<>();
|
||||
return f;
|
||||
}
|
||||
|
||||
struct handler_deleter {
|
||||
@@ -101,8 +105,27 @@ thrift_server::connection::connection(thrift_server& server, connected_socket&&
|
||||
}
|
||||
|
||||
thrift_server::connection::~connection() {
|
||||
--_server._current_connections;
|
||||
_server._connections_list.erase(_server._connections_list.iterator_to(*this));
|
||||
if (is_linked()) {
|
||||
--_server._current_connections;
|
||||
_server._connections_list.erase(_server._connections_list.iterator_to(*this));
|
||||
}
|
||||
}
|
||||
|
||||
thrift_server::connection::connection(connection&& other)
|
||||
: _server(other._server)
|
||||
, _fd(std::move(other._fd))
|
||||
, _read_buf(std::move(other._read_buf))
|
||||
, _write_buf(std::move(other._write_buf))
|
||||
, _transport(std::move(other._transport))
|
||||
, _input(std::move(other._input))
|
||||
, _output(std::move(other._output))
|
||||
, _in_proto(std::move(other._in_proto))
|
||||
, _out_proto(std::move(other._out_proto))
|
||||
, _processor(std::move(other._processor)) {
|
||||
if (other.is_linked()) {
|
||||
boost::intrusive::list<connection>::node_algorithms::init(this_ptr());
|
||||
boost::intrusive::list<connection>::node_algorithms::swap_nodes(other.this_ptr(), this_ptr());
|
||||
}
|
||||
}
|
||||
|
||||
future<>
|
||||
@@ -190,29 +213,65 @@ thrift_server::listen(ipv4_addr addr, bool keepalive) {
|
||||
|
||||
void
|
||||
thrift_server::do_accepts(int which, bool keepalive) {
|
||||
_listeners[which].accept().then([this, which, keepalive] (connected_socket fd, socket_address addr) mutable {
|
||||
fd.set_nodelay(true);
|
||||
fd.set_keepalive(keepalive);
|
||||
auto conn = new connection(*this, std::move(fd), addr);
|
||||
conn->process().then_wrapped([this, conn] (future<> f) {
|
||||
conn->shutdown();
|
||||
delete conn;
|
||||
try {
|
||||
f.get();
|
||||
} catch (std::exception& ex) {
|
||||
tlogger.debug("request error {}", ex.what());
|
||||
}
|
||||
if (_stop_gate.is_closed()) {
|
||||
return;
|
||||
}
|
||||
with_gate(_stop_gate, [&, this] {
|
||||
return _listeners[which].accept().then([this, which, keepalive] (connected_socket fd, socket_address addr) {
|
||||
fd.set_nodelay(true);
|
||||
fd.set_keepalive(keepalive);
|
||||
with_gate(_stop_gate, [&, this] {
|
||||
return do_with(connection(*this, std::move(fd), addr), [this] (auto& conn) {
|
||||
return conn.process().then_wrapped([this, &conn] (future<> f) {
|
||||
conn.shutdown();
|
||||
try {
|
||||
f.get();
|
||||
} catch (std::exception& ex) {
|
||||
tlogger.debug("request error {}", ex.what());
|
||||
}
|
||||
});
|
||||
});
|
||||
});
|
||||
do_accepts(which, keepalive);
|
||||
}).handle_exception([this, which, keepalive] (auto ex) {
|
||||
tlogger.debug("accept failed {}", ex);
|
||||
this->maybe_retry_accept(which, keepalive, std::move(ex));
|
||||
});
|
||||
do_accepts(which, keepalive);
|
||||
}).then_wrapped([] (future<> f) {
|
||||
try {
|
||||
f.get();
|
||||
} catch (std::exception& ex) {
|
||||
std::cout << "accept failed: " << ex.what() << "\n";
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
void thrift_server::maybe_retry_accept(int which, bool keepalive, std::exception_ptr ex) {
|
||||
auto retry = [this, which, keepalive] {
|
||||
tlogger.debug("retrying accept after failure");
|
||||
do_accepts(which, keepalive);
|
||||
};
|
||||
auto retry_with_backoff = [&] {
|
||||
// FIXME: Consider using exponential backoff
|
||||
sleep(1ms).then([retry = std::move(retry)] { retry(); });
|
||||
};
|
||||
try {
|
||||
std::rethrow_exception(std::move(ex));
|
||||
} catch (const std::system_error& e) {
|
||||
switch (e.code().value()) {
|
||||
// FIXME: Don't retry for other fatal errors
|
||||
case EBADF:
|
||||
break;
|
||||
case ENFILE:
|
||||
case EMFILE:
|
||||
case ENOMEM:
|
||||
retry_with_backoff();
|
||||
default:
|
||||
retry();
|
||||
}
|
||||
} catch (const std::bad_alloc&) {
|
||||
retry_with_backoff();
|
||||
} catch (const seastar::gate_closed_exception&) {
|
||||
return;
|
||||
} catch (...) {
|
||||
retry();
|
||||
}
|
||||
}
|
||||
|
||||
uint64_t
|
||||
thrift_server::total_connections() const {
|
||||
return _total_connections;
|
||||
|
||||
@@ -25,6 +25,7 @@
|
||||
#include "core/reactor.hh"
|
||||
#include "core/distributed.hh"
|
||||
#include "cql3/query_processor.hh"
|
||||
#include <seastar/core/gate.hh>
|
||||
#include <memory>
|
||||
#include <cstdint>
|
||||
#include <boost/intrusive/list.hpp>
|
||||
@@ -79,6 +80,7 @@ class thrift_server {
|
||||
public:
|
||||
connection(thrift_server& server, connected_socket&& fd, socket_address addr);
|
||||
~connection();
|
||||
connection(connection&&);
|
||||
future<> process();
|
||||
future<> read();
|
||||
future<> write();
|
||||
@@ -96,6 +98,7 @@ private:
|
||||
uint64_t _current_connections = 0;
|
||||
uint64_t _requests_served = 0;
|
||||
boost::intrusive::list<connection> _connections_list;
|
||||
seastar::gate _stop_gate;
|
||||
public:
|
||||
thrift_server(distributed<database>& db, distributed<cql3::query_processor>& qp);
|
||||
~thrift_server();
|
||||
@@ -105,6 +108,9 @@ public:
|
||||
uint64_t total_connections() const;
|
||||
uint64_t current_connections() const;
|
||||
uint64_t requests_served() const;
|
||||
|
||||
private:
|
||||
void maybe_retry_accept(int which, bool keepalive, std::exception_ptr ex);
|
||||
};
|
||||
|
||||
#endif /* APPS_SEASTAR_THRIFT_SERVER_HH_ */
|
||||
|
||||
@@ -66,12 +66,12 @@ void cql_server::event_notifier::on_create_keyspace(const sstring& ks_name)
|
||||
{
|
||||
for (auto&& conn : _schema_change_listeners) {
|
||||
using namespace cql_transport;
|
||||
with_gate(conn->_pending_requests_gate, [&] {
|
||||
return conn->write_response(conn->make_schema_change_event(event::schema_change{
|
||||
if (!conn->_pending_requests_gate.is_closed()) {
|
||||
conn->write_response(conn->make_schema_change_event(event::schema_change{
|
||||
event::schema_change::change_type::CREATED,
|
||||
ks_name
|
||||
}));
|
||||
});
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
@@ -79,14 +79,14 @@ void cql_server::event_notifier::on_create_column_family(const sstring& ks_name,
|
||||
{
|
||||
for (auto&& conn : _schema_change_listeners) {
|
||||
using namespace cql_transport;
|
||||
with_gate(conn->_pending_requests_gate, [&] {
|
||||
return conn->write_response(conn->make_schema_change_event(event::schema_change{
|
||||
if (!conn->_pending_requests_gate.is_closed()) {
|
||||
conn->write_response(conn->make_schema_change_event(event::schema_change{
|
||||
event::schema_change::change_type::CREATED,
|
||||
event::schema_change::target_type::TABLE,
|
||||
ks_name,
|
||||
cf_name
|
||||
}));
|
||||
});
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
@@ -94,14 +94,14 @@ void cql_server::event_notifier::on_create_user_type(const sstring& ks_name, con
|
||||
{
|
||||
for (auto&& conn : _schema_change_listeners) {
|
||||
using namespace cql_transport;
|
||||
with_gate(conn->_pending_requests_gate, [&] {
|
||||
return conn->write_response(conn->make_schema_change_event(event::schema_change{
|
||||
if (!conn->_pending_requests_gate.is_closed()) {
|
||||
conn->write_response(conn->make_schema_change_event(event::schema_change{
|
||||
event::schema_change::change_type::CREATED,
|
||||
event::schema_change::target_type::TYPE,
|
||||
ks_name,
|
||||
type_name
|
||||
}));
|
||||
});
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
@@ -124,12 +124,12 @@ void cql_server::event_notifier::on_update_keyspace(const sstring& ks_name)
|
||||
{
|
||||
for (auto&& conn : _schema_change_listeners) {
|
||||
using namespace cql_transport;
|
||||
with_gate(conn->_pending_requests_gate, [&] {
|
||||
return conn->write_response(conn->make_schema_change_event(event::schema_change{
|
||||
if (!conn->_pending_requests_gate.is_closed()) {
|
||||
conn->write_response(conn->make_schema_change_event(event::schema_change{
|
||||
event::schema_change::change_type::UPDATED,
|
||||
ks_name
|
||||
}));
|
||||
});
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
@@ -137,14 +137,14 @@ void cql_server::event_notifier::on_update_column_family(const sstring& ks_name,
|
||||
{
|
||||
for (auto&& conn : _schema_change_listeners) {
|
||||
using namespace cql_transport;
|
||||
with_gate(conn->_pending_requests_gate, [&] {
|
||||
return conn->write_response(conn->make_schema_change_event(event::schema_change{
|
||||
if (!conn->_pending_requests_gate.is_closed()) {
|
||||
conn->write_response(conn->make_schema_change_event(event::schema_change{
|
||||
event::schema_change::change_type::UPDATED,
|
||||
event::schema_change::target_type::TABLE,
|
||||
ks_name,
|
||||
cf_name
|
||||
}));
|
||||
});
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
@@ -152,14 +152,14 @@ void cql_server::event_notifier::on_update_user_type(const sstring& ks_name, con
|
||||
{
|
||||
for (auto&& conn : _schema_change_listeners) {
|
||||
using namespace cql_transport;
|
||||
with_gate(conn->_pending_requests_gate, [&] {
|
||||
return conn->write_response(conn->make_schema_change_event(event::schema_change{
|
||||
if (!conn->_pending_requests_gate.is_closed()) {
|
||||
conn->write_response(conn->make_schema_change_event(event::schema_change{
|
||||
event::schema_change::change_type::UPDATED,
|
||||
event::schema_change::target_type::TYPE,
|
||||
ks_name,
|
||||
type_name
|
||||
}));
|
||||
});
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
@@ -182,12 +182,12 @@ void cql_server::event_notifier::on_drop_keyspace(const sstring& ks_name)
|
||||
{
|
||||
for (auto&& conn : _schema_change_listeners) {
|
||||
using namespace cql_transport;
|
||||
with_gate(conn->_pending_requests_gate, [&] {
|
||||
return conn->write_response(conn->make_schema_change_event(event::schema_change{
|
||||
if (!conn->_pending_requests_gate.is_closed()) {
|
||||
conn->write_response(conn->make_schema_change_event(event::schema_change{
|
||||
event::schema_change::change_type::DROPPED,
|
||||
ks_name
|
||||
}));
|
||||
});
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
@@ -195,14 +195,14 @@ void cql_server::event_notifier::on_drop_column_family(const sstring& ks_name, c
|
||||
{
|
||||
for (auto&& conn : _schema_change_listeners) {
|
||||
using namespace cql_transport;
|
||||
with_gate(conn->_pending_requests_gate, [&] {
|
||||
return conn->write_response(conn->make_schema_change_event(event::schema_change{
|
||||
if (!conn->_pending_requests_gate.is_closed()) {
|
||||
conn->write_response(conn->make_schema_change_event(event::schema_change{
|
||||
event::schema_change::change_type::DROPPED,
|
||||
event::schema_change::target_type::TABLE,
|
||||
ks_name,
|
||||
cf_name
|
||||
}));
|
||||
});
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
@@ -210,14 +210,14 @@ void cql_server::event_notifier::on_drop_user_type(const sstring& ks_name, const
|
||||
{
|
||||
for (auto&& conn : _schema_change_listeners) {
|
||||
using namespace cql_transport;
|
||||
with_gate(conn->_pending_requests_gate, [&] {
|
||||
return conn->write_response(conn->make_schema_change_event(event::schema_change{
|
||||
if (!conn->_pending_requests_gate.is_closed()) {
|
||||
conn->write_response(conn->make_schema_change_event(event::schema_change{
|
||||
event::schema_change::change_type::DROPPED,
|
||||
event::schema_change::target_type::TYPE,
|
||||
ks_name,
|
||||
type_name
|
||||
}));
|
||||
});
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
@@ -240,9 +240,9 @@ void cql_server::event_notifier::on_join_cluster(const gms::inet_address& endpoi
|
||||
{
|
||||
for (auto&& conn : _topology_change_listeners) {
|
||||
using namespace cql_transport;
|
||||
with_gate(conn->_pending_requests_gate, [&] {
|
||||
return conn->write_response(conn->make_topology_change_event(event::topology_change::new_node(endpoint, conn->_server_addr.port)));
|
||||
});
|
||||
if (!conn->_pending_requests_gate.is_closed()) {
|
||||
conn->write_response(conn->make_topology_change_event(event::topology_change::new_node(endpoint, conn->_server_addr.port)));
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
@@ -250,9 +250,9 @@ void cql_server::event_notifier::on_leave_cluster(const gms::inet_address& endpo
|
||||
{
|
||||
for (auto&& conn : _topology_change_listeners) {
|
||||
using namespace cql_transport;
|
||||
with_gate(conn->_pending_requests_gate, [&] {
|
||||
return conn->write_response(conn->make_topology_change_event(event::topology_change::removed_node(endpoint, conn->_server_addr.port)));
|
||||
});
|
||||
if (!conn->_pending_requests_gate.is_closed()) {
|
||||
conn->write_response(conn->make_topology_change_event(event::topology_change::removed_node(endpoint, conn->_server_addr.port)));
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
@@ -260,9 +260,9 @@ void cql_server::event_notifier::on_move(const gms::inet_address& endpoint)
|
||||
{
|
||||
for (auto&& conn : _topology_change_listeners) {
|
||||
using namespace cql_transport;
|
||||
with_gate(conn->_pending_requests_gate, [&] {
|
||||
return conn->write_response(conn->make_topology_change_event(event::topology_change::moved_node(endpoint, conn->_server_addr.port)));
|
||||
});
|
||||
if (!conn->_pending_requests_gate.is_closed()) {
|
||||
conn->write_response(conn->make_topology_change_event(event::topology_change::moved_node(endpoint, conn->_server_addr.port)));
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
@@ -273,9 +273,9 @@ void cql_server::event_notifier::on_up(const gms::inet_address& endpoint)
|
||||
if (!was_up) {
|
||||
for (auto&& conn : _status_change_listeners) {
|
||||
using namespace cql_transport;
|
||||
with_gate(conn->_pending_requests_gate, [&] {
|
||||
return conn->write_response(conn->make_status_change_event(event::status_change::node_up(endpoint, conn->_server_addr.port)));
|
||||
});
|
||||
if (!conn->_pending_requests_gate.is_closed()) {
|
||||
conn->write_response(conn->make_status_change_event(event::status_change::node_up(endpoint, conn->_server_addr.port)));
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -287,9 +287,9 @@ void cql_server::event_notifier::on_down(const gms::inet_address& endpoint)
|
||||
if (!was_down) {
|
||||
for (auto&& conn : _status_change_listeners) {
|
||||
using namespace cql_transport;
|
||||
with_gate(conn->_pending_requests_gate, [&] {
|
||||
return conn->write_response(conn->make_status_change_event(event::status_change::node_down(endpoint, conn->_server_addr.port)));
|
||||
});
|
||||
if (!conn->_pending_requests_gate.is_closed()) {
|
||||
conn->write_response(conn->make_status_change_event(event::status_change::node_down(endpoint, conn->_server_addr.port)));
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -590,8 +590,8 @@ future<> cql_server::connection::process()
|
||||
return write_response(make_error(0, exceptions::exception_code::SERVER_ERROR, "unknown error", tracing::trace_state_ptr()));
|
||||
}
|
||||
}).finally([this] {
|
||||
_server._notifier->unregister_connection(this);
|
||||
return _pending_requests_gate.close().then([this] {
|
||||
_server._notifier->unregister_connection(this);
|
||||
return _ready_to_respond.finally([this] {
|
||||
return _write_buf.close();
|
||||
});
|
||||
@@ -826,15 +826,14 @@ future<response_type> cql_server::connection::process_prepare(uint16_t stream, b
|
||||
return parallel_for_each(cpus.begin(), cpus.end(), [this, query, cpu_id, &cs] (unsigned int c) mutable {
|
||||
if (c != cpu_id) {
|
||||
return smp::submit_to(c, [this, query, &cs] () mutable {
|
||||
_server._query_processor.local().prepare(query, cs, false);
|
||||
// FIXME: error handling
|
||||
return _server._query_processor.local().prepare(std::move(query), cs, false).discard_result();
|
||||
});
|
||||
} else {
|
||||
return make_ready_future<>();
|
||||
}
|
||||
}).then([this, query, stream, &cs] {
|
||||
}).then([this, query, stream, &cs] () mutable {
|
||||
tracing::trace(cs.get_trace_state(), "Done preparing on remote shards");
|
||||
return _server._query_processor.local().prepare(query, cs, false).then([this, stream, &cs] (auto msg) {
|
||||
return _server._query_processor.local().prepare(std::move(query), cs, false).then([this, stream, &cs] (auto msg) {
|
||||
tracing::trace(cs.get_trace_state(), "Done preparing on a local shard - preparing a result. ID is [{}]", seastar::value_of([&msg] {
|
||||
return messages::result_message::prepared::cql::get_id(msg);
|
||||
}));
|
||||
@@ -848,8 +847,9 @@ future<response_type> cql_server::connection::process_prepare(uint16_t stream, b
|
||||
|
||||
future<response_type> cql_server::connection::process_execute(uint16_t stream, bytes_view buf, service::client_state client_state)
|
||||
{
|
||||
auto id = read_short_bytes(buf);
|
||||
auto prepared = _server._query_processor.local().get_prepared(id);
|
||||
cql3::prepared_cache_key_type cache_key(read_short_bytes(buf));
|
||||
auto& id = cql3::prepared_cache_key_type::cql_id(cache_key);
|
||||
auto prepared = _server._query_processor.local().get_prepared(cache_key);
|
||||
if (!prepared) {
|
||||
throw exceptions::prepared_query_not_found_exception(id);
|
||||
}
|
||||
@@ -925,8 +925,9 @@ cql_server::connection::process_batch(uint16_t stream, bytes_view buf, service::
|
||||
break;
|
||||
}
|
||||
case 1: {
|
||||
auto id = read_short_bytes(buf);
|
||||
ps = _server._query_processor.local().get_prepared(id);
|
||||
cql3::prepared_cache_key_type cache_key(read_short_bytes(buf));
|
||||
auto& id = cql3::prepared_cache_key_type::cql_id(cache_key);
|
||||
ps = _server._query_processor.local().get_prepared(cache_key);
|
||||
if (!ps) {
|
||||
throw exceptions::prepared_query_not_found_exception(id);
|
||||
}
|
||||
|
||||
9
types.cc
9
types.cc
@@ -1963,8 +1963,7 @@ map_type_impl::to_string(const bytes& b) const {
|
||||
|
||||
size_t
|
||||
map_type_impl::hash(bytes_view v) const {
|
||||
// FIXME:
|
||||
abort();
|
||||
return std::hash<bytes_view>()(v);
|
||||
}
|
||||
|
||||
bytes
|
||||
@@ -2448,8 +2447,7 @@ set_type_impl::to_string(const bytes& b) const {
|
||||
|
||||
size_t
|
||||
set_type_impl::hash(bytes_view v) const {
|
||||
// FIXME:
|
||||
abort();
|
||||
return std::hash<bytes_view>()(v);
|
||||
}
|
||||
|
||||
bytes
|
||||
@@ -2637,8 +2635,7 @@ list_type_impl::to_string(const bytes& b) const {
|
||||
|
||||
size_t
|
||||
list_type_impl::hash(bytes_view v) const {
|
||||
// FIXME:
|
||||
abort();
|
||||
return std::hash<bytes_view>()(v);
|
||||
}
|
||||
|
||||
bytes
|
||||
|
||||
@@ -29,77 +29,54 @@
|
||||
#include <seastar/core/timer.hh>
|
||||
#include <seastar/core/gate.hh>
|
||||
|
||||
#include "utils/exceptions.hh"
|
||||
#include "exceptions/exceptions.hh"
|
||||
#include "utils/loading_shared_values.hh"
|
||||
#include "log.hh"
|
||||
|
||||
namespace bi = boost::intrusive;
|
||||
|
||||
namespace utils {
|
||||
// Simple variant of the "LoadingCache" used for permissions in origin.
|
||||
|
||||
typedef lowres_clock loading_cache_clock_type;
|
||||
typedef bi::list_base_hook<bi::link_mode<bi::auto_unlink>> auto_unlink_list_hook;
|
||||
using loading_cache_clock_type = seastar::lowres_clock;
|
||||
using auto_unlink_list_hook = bi::list_base_hook<bi::link_mode<bi::auto_unlink>>;
|
||||
|
||||
template<typename Tp, typename Key, typename Hash, typename EqualPred>
|
||||
class timestamped_val : public auto_unlink_list_hook, public bi::unordered_set_base_hook<bi::store_hash<true>> {
|
||||
template<typename Tp, typename Key, typename EntrySize , typename Hash, typename EqualPred, typename LoadingSharedValuesStats>
|
||||
class timestamped_val {
|
||||
public:
|
||||
typedef bi::list<timestamped_val, bi::constant_time_size<false>> lru_list_type;
|
||||
typedef Key key_type;
|
||||
typedef Tp value_type;
|
||||
using value_type = Tp;
|
||||
using loading_values_type = typename utils::loading_shared_values<Key, timestamped_val, Hash, EqualPred, LoadingSharedValuesStats, 256>;
|
||||
class lru_entry;
|
||||
class value_ptr;
|
||||
|
||||
private:
|
||||
std::experimental::optional<Tp> _opt_value;
|
||||
value_type _value;
|
||||
loading_cache_clock_type::time_point _loaded;
|
||||
loading_cache_clock_type::time_point _last_read;
|
||||
lru_list_type& _lru_list; /// MRU item is at the front, LRU - at the back
|
||||
Key _key;
|
||||
lru_entry* _lru_entry_ptr = nullptr; /// MRU item is at the front, LRU - at the back
|
||||
size_t _size = 0;
|
||||
|
||||
public:
|
||||
struct key_eq {
|
||||
bool operator()(const Key& k, const timestamped_val& c) const {
|
||||
return EqualPred()(k, c.key());
|
||||
}
|
||||
|
||||
bool operator()(const timestamped_val& c, const Key& k) const {
|
||||
return EqualPred()(c.key(), k);
|
||||
}
|
||||
};
|
||||
|
||||
timestamped_val(lru_list_type& lru_list, const Key& key)
|
||||
: _loaded(loading_cache_clock_type::now())
|
||||
timestamped_val(value_type val)
|
||||
: _value(std::move(val))
|
||||
, _loaded(loading_cache_clock_type::now())
|
||||
, _last_read(_loaded)
|
||||
, _lru_list(lru_list)
|
||||
, _key(key) {}
|
||||
|
||||
timestamped_val(lru_list_type& lru_list, Key&& key)
|
||||
: _loaded(loading_cache_clock_type::now())
|
||||
, _last_read(_loaded)
|
||||
, _lru_list(lru_list)
|
||||
, _key(std::move(key)) {}
|
||||
|
||||
timestamped_val(const timestamped_val&) = default;
|
||||
, _size(EntrySize()(_value))
|
||||
{}
|
||||
timestamped_val(timestamped_val&&) = default;
|
||||
|
||||
// Make sure copy/move-assignments don't go through the template below
|
||||
timestamped_val& operator=(const timestamped_val&) = default;
|
||||
timestamped_val& operator=(timestamped_val&) = default;
|
||||
timestamped_val& operator=(timestamped_val&&) = default;
|
||||
timestamped_val& operator=(value_type new_val) {
|
||||
assert(_lru_entry_ptr);
|
||||
|
||||
template <typename U>
|
||||
timestamped_val& operator=(U&& new_val) {
|
||||
_opt_value = std::forward<U>(new_val);
|
||||
_value = std::move(new_val);
|
||||
_loaded = loading_cache_clock_type::now();
|
||||
_lru_entry_ptr->cache_size() -= _size;
|
||||
_size = EntrySize()(_value);
|
||||
_lru_entry_ptr->cache_size() += _size;
|
||||
return *this;
|
||||
}
|
||||
|
||||
const Tp& value() {
|
||||
_last_read = loading_cache_clock_type::now();
|
||||
touch();
|
||||
return _opt_value.value();
|
||||
}
|
||||
|
||||
explicit operator bool() const noexcept {
|
||||
return bool(_opt_value);
|
||||
}
|
||||
value_type& value() noexcept { return _value; }
|
||||
const value_type& value() const noexcept { return _value; }
|
||||
|
||||
loading_cache_clock_type::time_point last_read() const noexcept {
|
||||
return _last_read;
|
||||
@@ -109,163 +86,353 @@ public:
|
||||
return _loaded;
|
||||
}
|
||||
|
||||
const Key& key() const {
|
||||
return _key;
|
||||
size_t size() const {
|
||||
return _size;
|
||||
}
|
||||
|
||||
friend bool operator==(const timestamped_val& a, const timestamped_val& b){
|
||||
return EqualPred()(a.key(), b.key());
|
||||
}
|
||||
|
||||
friend std::size_t hash_value(const timestamped_val& v) {
|
||||
return Hash()(v.key());
|
||||
bool ready() const noexcept {
|
||||
return _lru_entry_ptr;
|
||||
}
|
||||
|
||||
private:
|
||||
void touch() noexcept {
|
||||
assert(_lru_entry_ptr);
|
||||
_last_read = loading_cache_clock_type::now();
|
||||
_lru_entry_ptr->touch();
|
||||
}
|
||||
|
||||
void set_anchor_back_reference(lru_entry* lru_entry_ptr) noexcept {
|
||||
_lru_entry_ptr = lru_entry_ptr;
|
||||
}
|
||||
};
|
||||
|
||||
template <typename Tp>
|
||||
struct simple_entry_size {
|
||||
size_t operator()(const Tp& val) {
|
||||
return 1;
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Tp, typename Key, typename EntrySize , typename Hash, typename EqualPred, typename LoadingSharedValuesStats>
|
||||
class timestamped_val<Tp, Key, EntrySize, Hash, EqualPred, LoadingSharedValuesStats>::value_ptr {
|
||||
private:
|
||||
using ts_value_type = timestamped_val<Tp, Key, EntrySize, Hash, EqualPred, LoadingSharedValuesStats>;
|
||||
using loading_values_type = typename ts_value_type::loading_values_type;
|
||||
|
||||
public:
|
||||
using timestamped_val_ptr = typename loading_values_type::entry_ptr;
|
||||
using value_type = Tp;
|
||||
|
||||
private:
|
||||
timestamped_val_ptr _ts_val_ptr;
|
||||
|
||||
public:
|
||||
value_ptr(timestamped_val_ptr ts_val_ptr) : _ts_val_ptr(std::move(ts_val_ptr)) { _ts_val_ptr->touch(); }
|
||||
explicit operator bool() const noexcept { return bool(_ts_val_ptr); }
|
||||
value_type& operator*() const noexcept { return _ts_val_ptr->value(); }
|
||||
value_type* operator->() const noexcept { return &_ts_val_ptr->value(); }
|
||||
};
|
||||
|
||||
/// \brief This is and LRU list entry which is also an anchor for a loading_cache value.
|
||||
template<typename Tp, typename Key, typename EntrySize , typename Hash, typename EqualPred, typename LoadingSharedValuesStats>
|
||||
class timestamped_val<Tp, Key, EntrySize, Hash, EqualPred, LoadingSharedValuesStats>::lru_entry : public auto_unlink_list_hook {
|
||||
private:
|
||||
using ts_value_type = timestamped_val<Tp, Key, EntrySize, Hash, EqualPred, LoadingSharedValuesStats>;
|
||||
using loading_values_type = typename ts_value_type::loading_values_type;
|
||||
|
||||
public:
|
||||
using lru_list_type = bi::list<lru_entry, bi::constant_time_size<false>>;
|
||||
using timestamped_val_ptr = typename loading_values_type::entry_ptr;
|
||||
|
||||
private:
|
||||
timestamped_val_ptr _ts_val_ptr;
|
||||
lru_list_type& _lru_list;
|
||||
size_t& _cache_size;
|
||||
|
||||
public:
|
||||
lru_entry(timestamped_val_ptr ts_val, lru_list_type& lru_list, size_t& cache_size)
|
||||
: _ts_val_ptr(std::move(ts_val))
|
||||
, _lru_list(lru_list)
|
||||
, _cache_size(cache_size)
|
||||
{
|
||||
_ts_val_ptr->set_anchor_back_reference(this);
|
||||
_cache_size += _ts_val_ptr->size();
|
||||
}
|
||||
|
||||
~lru_entry() {
|
||||
_cache_size -= _ts_val_ptr->size();
|
||||
_ts_val_ptr->set_anchor_back_reference(nullptr);
|
||||
}
|
||||
|
||||
size_t& cache_size() noexcept {
|
||||
return _cache_size;
|
||||
}
|
||||
|
||||
/// Set this item as the most recently used item.
|
||||
/// The MRU item is going to be at the front of the _lru_list, the LRU item - at the back.
|
||||
void touch() noexcept {
|
||||
auto_unlink_list_hook::unlink();
|
||||
_lru_list.push_front(*this);
|
||||
}
|
||||
};
|
||||
|
||||
class shared_mutex {
|
||||
private:
|
||||
lw_shared_ptr<semaphore> _mutex_ptr;
|
||||
|
||||
public:
|
||||
shared_mutex() : _mutex_ptr(make_lw_shared<semaphore>(1)) {}
|
||||
semaphore& get() const noexcept {
|
||||
return *_mutex_ptr;
|
||||
const Key& key() const noexcept {
|
||||
return loading_values_type::to_key(_ts_val_ptr);
|
||||
}
|
||||
|
||||
timestamped_val& timestamped_value() noexcept { return *_ts_val_ptr; }
|
||||
const timestamped_val& timestamped_value() const noexcept { return *_ts_val_ptr; }
|
||||
timestamped_val_ptr timestamped_value_ptr() noexcept { return _ts_val_ptr; }
|
||||
};
|
||||
|
||||
enum class loading_cache_reload_enabled { no, yes };
|
||||
|
||||
/// \brief Loading cache is a cache that loads the value into the cache using the given asynchronous callback.
|
||||
///
|
||||
/// Each cached value if reloading is enabled (\tparam ReloadEnabled == loading_cache_reload_enabled::yes) is reloaded after
|
||||
/// the "refresh" time period since it was loaded for the last time.
|
||||
///
|
||||
/// The values are going to be evicted from the cache if they are not accessed during the "expiration" period or haven't
|
||||
/// been reloaded even once during the same period.
|
||||
///
|
||||
/// If "expiration" is set to zero - the caching is going to be disabled and get_XXX(...) is going to call the "loader" callback
|
||||
/// every time in order to get the requested value.
|
||||
///
|
||||
/// \note In order to avoid the eviction of cached entries due to "aging" of the contained value the user has to choose
|
||||
/// the "expiration" to be at least ("refresh" + "max load latency"). This way the value is going to stay in the cache and is going to be
|
||||
/// read in a non-blocking way as long as it's frequently accessed. Note however that since reloading is an asynchronous
|
||||
/// procedure it may get delayed by other running task. Therefore choosing the "expiration" too close to the ("refresh" + "max load latency")
|
||||
/// value one risks to have his/her cache values evicted when the system is heavily loaded.
|
||||
///
|
||||
/// The cache is also limited in size and if adding the next value is going
|
||||
/// to exceed the cache size limit the least recently used value(s) is(are) going to be evicted until the size of the cache
|
||||
/// becomes such that adding the new value is not going to break the size limit. If the new entry's size is greater than
|
||||
/// the cache size then the get_XXX(...) method is going to return a future with the loading_cache::entry_is_too_big exception.
|
||||
///
|
||||
/// The size of the cache is defined as a sum of sizes of all cached entries.
|
||||
/// The size of each entry is defined by the value returned by the \tparam EntrySize predicate applied on it.
|
||||
///
|
||||
/// The get(key) or get_ptr(key) methods ensures that the "loader" callback is called only once for each cached entry regardless of how many
|
||||
/// callers are calling for the get_XXX(key) for the same "key" at the same time. Only after the value is evicted from the cache
|
||||
/// it's going to be "loaded" in the context of get_XXX(key). As long as the value is cached get_XXX(key) is going to return the
|
||||
/// cached value immediately and reload it in the background every "refresh" time period as described above.
|
||||
///
|
||||
/// \tparam Key type of the cache key
|
||||
/// \tparam Tp type of the cached value
|
||||
/// \tparam ReloadEnabled if loading_cache_reload_enabled::yes allow reloading the values otherwise don't reload
|
||||
/// \tparam EntrySize predicate to calculate the entry size
|
||||
/// \tparam Hash hash function
|
||||
/// \tparam EqualPred equality predicate
|
||||
/// \tparam LoadingSharedValuesStats statistics incrementing class (see utils::loading_shared_values)
|
||||
/// \tparam Alloc elements allocator
|
||||
template<typename Key,
|
||||
typename Tp,
|
||||
loading_cache_reload_enabled ReloadEnabled = loading_cache_reload_enabled::no,
|
||||
typename EntrySize = simple_entry_size<Tp>,
|
||||
typename Hash = std::hash<Key>,
|
||||
typename EqualPred = std::equal_to<Key>,
|
||||
typename Alloc = std::allocator<timestamped_val<Tp, Key, Hash, EqualPred>>,
|
||||
typename SharedMutexMapAlloc = std::allocator<std::pair<const Key, shared_mutex>>>
|
||||
typename LoadingSharedValuesStats = utils::do_nothing_loading_shared_values_stats,
|
||||
typename Alloc = std::allocator<typename timestamped_val<Tp, Key, EntrySize, Hash, EqualPred, LoadingSharedValuesStats>::lru_entry>>
|
||||
class loading_cache {
|
||||
private:
|
||||
typedef timestamped_val<Tp, Key, Hash, EqualPred> ts_value_type;
|
||||
typedef bi::unordered_set<ts_value_type, bi::power_2_buckets<true>, bi::compare_hash<true>> set_type;
|
||||
typedef std::unordered_map<Key, shared_mutex, Hash, EqualPred, SharedMutexMapAlloc> write_mutex_map_type;
|
||||
typedef typename ts_value_type::lru_list_type lru_list_type;
|
||||
typedef typename set_type::bucket_traits bi_set_bucket_traits;
|
||||
|
||||
static constexpr int initial_num_buckets = 256;
|
||||
static constexpr int max_num_buckets = 1024 * 1024;
|
||||
using ts_value_type = timestamped_val<Tp, Key, EntrySize, Hash, EqualPred, LoadingSharedValuesStats>;
|
||||
using loading_values_type = typename ts_value_type::loading_values_type;
|
||||
using timestamped_val_ptr = typename loading_values_type::entry_ptr;
|
||||
using ts_value_lru_entry = typename ts_value_type::lru_entry;
|
||||
using set_iterator = typename loading_values_type::iterator;
|
||||
using lru_list_type = typename ts_value_lru_entry::lru_list_type;
|
||||
struct value_extractor_fn {
|
||||
Tp& operator()(ts_value_type& tv) const {
|
||||
return tv.value();
|
||||
}
|
||||
};
|
||||
|
||||
public:
|
||||
typedef Tp value_type;
|
||||
typedef Key key_type;
|
||||
typedef typename set_type::iterator iterator;
|
||||
using value_type = Tp;
|
||||
using key_type = Key;
|
||||
using value_ptr = typename ts_value_type::value_ptr;
|
||||
|
||||
class entry_is_too_big : public std::exception {};
|
||||
using iterator = boost::transform_iterator<value_extractor_fn, set_iterator>;
|
||||
|
||||
private:
|
||||
loading_cache(size_t max_size, std::chrono::milliseconds expiry, std::chrono::milliseconds refresh, logging::logger& logger)
|
||||
: _max_size(max_size)
|
||||
, _expiry(expiry)
|
||||
, _refresh(refresh)
|
||||
, _logger(logger)
|
||||
, _timer([this] { on_timer(); })
|
||||
{
|
||||
// Sanity check: if expiration period is given then non-zero refresh period and maximal size are required
|
||||
if (caching_enabled() && (_refresh == std::chrono::milliseconds(0) || _max_size == 0)) {
|
||||
throw exceptions::configuration_exception("loading_cache: caching is enabled but refresh period and/or max_size are zero");
|
||||
}
|
||||
}
|
||||
|
||||
public:
|
||||
template<typename Func>
|
||||
loading_cache(size_t max_size, std::chrono::milliseconds expiry, std::chrono::milliseconds refresh, logging::logger& logger, Func&& load)
|
||||
: _buckets(initial_num_buckets)
|
||||
, _set(bi_set_bucket_traits(_buckets.data(), _buckets.size()))
|
||||
, _max_size(max_size)
|
||||
, _expiry(expiry)
|
||||
, _refresh(refresh)
|
||||
, _logger(logger)
|
||||
, _load(std::forward<Func>(load)) {
|
||||
: loading_cache(max_size, expiry, refresh, logger)
|
||||
{
|
||||
static_assert(ReloadEnabled == loading_cache_reload_enabled::yes, "This constructor should only be invoked when ReloadEnabled == loading_cache_reload_enabled::yes");
|
||||
static_assert(std::is_same<future<value_type>, std::result_of_t<Func(const key_type&)>>::value, "Bad Func signature");
|
||||
|
||||
_load = std::forward<Func>(load);
|
||||
|
||||
// If expiration period is zero - caching is disabled
|
||||
if (!caching_enabled()) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Sanity check: if expiration period is given then non-zero refresh period and maximal size are required
|
||||
if (_refresh == std::chrono::milliseconds(0) || _max_size == 0) {
|
||||
throw exceptions::configuration_exception("loading_cache: caching is enabled but refresh period and/or max_size are zero");
|
||||
_timer_period = std::min(_expiry, _refresh);
|
||||
_timer.arm(_timer_period);
|
||||
}
|
||||
|
||||
loading_cache(size_t max_size, std::chrono::milliseconds expiry, logging::logger& logger)
|
||||
: loading_cache(max_size, expiry, loading_cache_clock_type::time_point::max().time_since_epoch(), logger)
|
||||
{
|
||||
static_assert(ReloadEnabled == loading_cache_reload_enabled::no, "This constructor should only be invoked when ReloadEnabled == loading_cache_reload_enabled::no");
|
||||
|
||||
// If expiration period is zero - caching is disabled
|
||||
if (!caching_enabled()) {
|
||||
return;
|
||||
}
|
||||
|
||||
_timer.set_callback([this] { on_timer(); });
|
||||
_timer.arm(_refresh);
|
||||
_timer_period = _expiry;
|
||||
_timer.arm(_timer_period);
|
||||
}
|
||||
|
||||
~loading_cache() {
|
||||
_set.clear_and_dispose([] (ts_value_type* ptr) { loading_cache::destroy_ts_value(ptr); });
|
||||
_lru_list.erase_and_dispose(_lru_list.begin(), _lru_list.end(), [] (ts_value_lru_entry* ptr) { loading_cache::destroy_ts_value(ptr); });
|
||||
}
|
||||
|
||||
template <typename LoadFunc>
|
||||
future<value_ptr> get_ptr(const Key& k, LoadFunc&& load) {
|
||||
static_assert(std::is_same<future<value_type>, std::result_of_t<LoadFunc(const key_type&)>>::value, "Bad LoadFunc signature");
|
||||
// We shouldn't be here if caching is disabled
|
||||
assert(caching_enabled());
|
||||
|
||||
return _loading_values.get_or_load(k, [this, load = std::forward<LoadFunc>(load)] (const Key& k) mutable {
|
||||
return load(k).then([this] (value_type val) {
|
||||
return ts_value_type(std::move(val));
|
||||
});
|
||||
}).then([this, k] (timestamped_val_ptr ts_val_ptr) {
|
||||
// check again since it could have already been inserted and initialized
|
||||
if (!ts_val_ptr->ready()) {
|
||||
_logger.trace("{}: storing the value for the first time", k);
|
||||
|
||||
if (ts_val_ptr->size() > _max_size) {
|
||||
return make_exception_future<value_ptr>(entry_is_too_big());
|
||||
}
|
||||
|
||||
ts_value_lru_entry* new_lru_entry = Alloc().allocate(1);
|
||||
new(new_lru_entry) ts_value_lru_entry(std::move(ts_val_ptr), _lru_list, _current_size);
|
||||
|
||||
// This will "touch" the entry and add it to the LRU list - we must do this before the shrink() call.
|
||||
value_ptr vp(new_lru_entry->timestamped_value_ptr());
|
||||
|
||||
// Remove the least recently used items if map is too big.
|
||||
shrink();
|
||||
|
||||
return make_ready_future<value_ptr>(std::move(vp));
|
||||
}
|
||||
|
||||
return make_ready_future<value_ptr>(std::move(ts_val_ptr));
|
||||
});
|
||||
}
|
||||
|
||||
future<value_ptr> get_ptr(const Key& k) {
|
||||
static_assert(ReloadEnabled == loading_cache_reload_enabled::yes, "reload must be enabled");
|
||||
return get_ptr(k, _load);
|
||||
}
|
||||
|
||||
future<Tp> get(const Key& k) {
|
||||
static_assert(ReloadEnabled == loading_cache_reload_enabled::yes, "reload must be enabled");
|
||||
|
||||
// If caching is disabled - always load in the foreground
|
||||
if (!caching_enabled()) {
|
||||
return _load(k);
|
||||
return _load(k).then([] (Tp val) {
|
||||
return make_ready_future<Tp>(std::move(val));
|
||||
});
|
||||
}
|
||||
|
||||
// If the key is not in the cache yet, then find_or_create() is going to
|
||||
// create a new uninitialized value in the map. If the value is already
|
||||
// in the cache (the fast path) simply return the value. Otherwise, take
|
||||
// the mutex and try to load the value (the slow path).
|
||||
iterator ts_value_it = find_or_create(k);
|
||||
if (*ts_value_it) {
|
||||
return make_ready_future<Tp>(ts_value_it->value());
|
||||
} else {
|
||||
return slow_load(k);
|
||||
}
|
||||
return get_ptr(k).then([] (value_ptr v_ptr) {
|
||||
return make_ready_future<Tp>(*v_ptr);
|
||||
});
|
||||
}
|
||||
|
||||
future<> stop() {
|
||||
return _timer_reads_gate.close().finally([this] { _timer.cancel(); });
|
||||
}
|
||||
|
||||
iterator find(const Key& k) noexcept {
|
||||
return boost::make_transform_iterator(set_find(k), _value_extractor_fn);
|
||||
}
|
||||
|
||||
iterator end() {
|
||||
return boost::make_transform_iterator(_loading_values.end(), _value_extractor_fn);
|
||||
}
|
||||
|
||||
iterator begin() {
|
||||
return boost::make_transform_iterator(_loading_values.begin(), _value_extractor_fn);
|
||||
}
|
||||
|
||||
template <typename Pred>
|
||||
void remove_if(Pred&& pred) {
|
||||
static_assert(std::is_same<bool, std::result_of_t<Pred(const value_type&)>>::value, "Bad Pred signature");
|
||||
|
||||
_lru_list.remove_and_dispose_if([this, &pred] (const ts_value_lru_entry& v) {
|
||||
return pred(v.timestamped_value().value());
|
||||
}, [this] (ts_value_lru_entry* p) {
|
||||
loading_cache::destroy_ts_value(p);
|
||||
});
|
||||
}
|
||||
|
||||
size_t size() const {
|
||||
return _loading_values.size();
|
||||
}
|
||||
|
||||
/// \brief returns the memory size the currently cached entries occupy according to the EntrySize predicate.
|
||||
size_t memory_footprint() const {
|
||||
return _current_size;
|
||||
}
|
||||
|
||||
private:
|
||||
set_iterator set_find(const Key& k) noexcept {
|
||||
set_iterator it = _loading_values.find(k);
|
||||
set_iterator end_it = set_end();
|
||||
|
||||
if (it == end_it || !it->ready()) {
|
||||
return end_it;
|
||||
}
|
||||
return it;
|
||||
}
|
||||
|
||||
set_iterator set_end() noexcept {
|
||||
return _loading_values.end();
|
||||
}
|
||||
|
||||
set_iterator set_begin() noexcept {
|
||||
return _loading_values.begin();
|
||||
}
|
||||
|
||||
bool caching_enabled() const {
|
||||
return _expiry != std::chrono::milliseconds(0);
|
||||
}
|
||||
|
||||
/// Look for the entry with the given key. It it doesn't exist - create a new one and add it to the _set.
|
||||
///
|
||||
/// \param k The key to look for
|
||||
///
|
||||
/// \return An iterator to the value with the given key (always dirrerent from _set.end())
|
||||
template <typename KeyType>
|
||||
iterator find_or_create(KeyType&& k) {
|
||||
iterator i = _set.find(k, Hash(), typename ts_value_type::key_eq());
|
||||
if (i == _set.end()) {
|
||||
ts_value_type* new_ts_val = Alloc().allocate(1);
|
||||
new(new_ts_val) ts_value_type(_lru_list, std::forward<KeyType>(k));
|
||||
auto p = _set.insert(*new_ts_val);
|
||||
i = p.first;
|
||||
}
|
||||
|
||||
return i;
|
||||
}
|
||||
|
||||
static void destroy_ts_value(ts_value_type* val) {
|
||||
val->~ts_value_type();
|
||||
static void destroy_ts_value(ts_value_lru_entry* val) {
|
||||
val->~ts_value_lru_entry();
|
||||
Alloc().deallocate(val, 1);
|
||||
}
|
||||
|
||||
future<Tp> slow_load(const Key& k) {
|
||||
// If the key is not in the cache yet, then _write_mutex_map[k] is going
|
||||
// to create a new value with the initialized mutex. The mutex is going
|
||||
// to serialize the producers and only the first one is going to
|
||||
// actually issue a load operation and initialize the value with the
|
||||
// received result. The rest are going to see (and read) the initialized
|
||||
// value when they enter the critical section.
|
||||
shared_mutex sm = _write_mutex_map[k];
|
||||
return with_semaphore(sm.get(), 1, [this, k] {
|
||||
iterator ts_value_it = find_or_create(k);
|
||||
if (*ts_value_it) {
|
||||
return make_ready_future<Tp>(ts_value_it->value());
|
||||
future<> reload(ts_value_lru_entry& lru_entry) {
|
||||
return _load(lru_entry.key()).then_wrapped([this, key = lru_entry.key()] (auto&& f) mutable {
|
||||
// if the entry has been evicted by now - simply end here
|
||||
set_iterator it = this->set_find(key);
|
||||
if (it == this->set_end()) {
|
||||
this->_logger.trace("{}: entry was dropped during the reload", key);
|
||||
return make_ready_future<>();
|
||||
}
|
||||
_logger.trace("{}: storing the value for the first time", k);
|
||||
return _load(k).then([this, k] (Tp t) {
|
||||
// we have to "re-read" the _set here because the value may have been evicted by now
|
||||
iterator ts_value_it = find_or_create(std::move(k));
|
||||
*ts_value_it = std::move(t);
|
||||
return make_ready_future<Tp>(ts_value_it->value());
|
||||
});
|
||||
}).finally([sm] {});
|
||||
}
|
||||
|
||||
future<> reload(ts_value_type& ts_val) {
|
||||
return _load(ts_val.key()).then_wrapped([this, &ts_val] (auto&& f) {
|
||||
// The exceptions are related to the load operation itself.
|
||||
// We should ignore them for the background reads - if
|
||||
// they persist the value will age and will be reloaded in
|
||||
@@ -273,120 +440,97 @@ private:
|
||||
// will be propagated up to the user and will fail the
|
||||
// corresponding query.
|
||||
try {
|
||||
ts_val = f.get0();
|
||||
*it = f.get0();
|
||||
} catch (std::exception& e) {
|
||||
_logger.debug("{}: reload failed: {}", ts_val.key(), e.what());
|
||||
this->_logger.debug("{}: reload failed: {}", key, e.what());
|
||||
} catch (...) {
|
||||
_logger.debug("{}: reload failed: unknown error", ts_val.key());
|
||||
this->_logger.debug("{}: reload failed: unknown error", key);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
void erase(iterator it) {
|
||||
_set.erase_and_dispose(it, [] (ts_value_type* ptr) { loading_cache::destroy_ts_value(ptr); });
|
||||
// no need to delete the item from _lru_list - it's auto-deleted
|
||||
return make_ready_future<>();
|
||||
});
|
||||
}
|
||||
|
||||
void drop_expired() {
|
||||
auto now = loading_cache_clock_type::now();
|
||||
_lru_list.remove_and_dispose_if([now, this] (const ts_value_type& v) {
|
||||
_lru_list.remove_and_dispose_if([now, this] (const ts_value_lru_entry& lru_entry) {
|
||||
using namespace std::chrono;
|
||||
// An entry should be discarded if it hasn't been reloaded for too long or nobody cares about it anymore
|
||||
const ts_value_type& v = lru_entry.timestamped_value();
|
||||
auto since_last_read = now - v.last_read();
|
||||
auto since_loaded = now - v.loaded();
|
||||
if (_expiry < since_last_read || _expiry < since_loaded) {
|
||||
_logger.trace("drop_expired(): {}: dropping the entry: _expiry {}, ms passed since: loaded {} last_read {}", v.key(), _expiry.count(), duration_cast<milliseconds>(since_loaded).count(), duration_cast<milliseconds>(since_last_read).count());
|
||||
if (_expiry < since_last_read || (ReloadEnabled == loading_cache_reload_enabled::yes && _expiry < since_loaded)) {
|
||||
_logger.trace("drop_expired(): {}: dropping the entry: _expiry {}, ms passed since: loaded {} last_read {}", lru_entry.key(), _expiry.count(), duration_cast<milliseconds>(since_loaded).count(), duration_cast<milliseconds>(since_last_read).count());
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}, [this] (ts_value_type* p) {
|
||||
erase(_set.iterator_to(*p));
|
||||
}, [this] (ts_value_lru_entry* p) {
|
||||
loading_cache::destroy_ts_value(p);
|
||||
});
|
||||
}
|
||||
|
||||
// Shrink the cache to the _max_size discarding the least recently used items
|
||||
void shrink() {
|
||||
if (_set.size() > _max_size) {
|
||||
auto num_items_to_erase = _set.size() - _max_size;
|
||||
for (size_t i = 0; i < num_items_to_erase; ++i) {
|
||||
using namespace std::chrono;
|
||||
ts_value_type& ts_val = *_lru_list.rbegin();
|
||||
_logger.trace("shrink(): {}: dropping the entry: ms since last_read {}", ts_val.key(), duration_cast<milliseconds>(loading_cache_clock_type::now() - ts_val.last_read()).count());
|
||||
erase(_set.iterator_to(ts_val));
|
||||
}
|
||||
while (_current_size > _max_size) {
|
||||
using namespace std::chrono;
|
||||
ts_value_lru_entry& lru_entry = *_lru_list.rbegin();
|
||||
_logger.trace("shrink(): {}: dropping the entry: ms since last_read {}", lru_entry.key(), duration_cast<milliseconds>(loading_cache_clock_type::now() - lru_entry.timestamped_value().last_read()).count());
|
||||
loading_cache::destroy_ts_value(&lru_entry);
|
||||
}
|
||||
}
|
||||
|
||||
void rehash() {
|
||||
size_t new_buckets_count = 0;
|
||||
|
||||
// Don't grow or shrink too fast even if there is a steep drop/growth in the number of elements in the set.
|
||||
// Exponential growth/backoff should be good enough.
|
||||
//
|
||||
// Try to keep the load factor between 0.25 and 1.0.
|
||||
if (_set.size() < _current_buckets_count / 4) {
|
||||
new_buckets_count = _current_buckets_count / 4;
|
||||
} else if (_set.size() > _current_buckets_count) {
|
||||
new_buckets_count = _current_buckets_count * 2;
|
||||
// Try to bring the load factors of the _loading_values into a known range.
|
||||
void periodic_rehash() noexcept {
|
||||
try {
|
||||
_loading_values.rehash();
|
||||
} catch (...) {
|
||||
// if rehashing fails - continue with the current buckets array
|
||||
}
|
||||
|
||||
if (new_buckets_count < initial_num_buckets || new_buckets_count > max_num_buckets) {
|
||||
return;
|
||||
}
|
||||
|
||||
std::vector<typename set_type::bucket_type> new_buckets(new_buckets_count);
|
||||
_set.rehash(bi_set_bucket_traits(new_buckets.data(), new_buckets.size()));
|
||||
_logger.trace("rehash(): buckets count changed: {} -> {}", _current_buckets_count, new_buckets_count);
|
||||
|
||||
_buckets.swap(new_buckets);
|
||||
_current_buckets_count = new_buckets_count;
|
||||
}
|
||||
|
||||
void on_timer() {
|
||||
_logger.trace("on_timer(): start");
|
||||
|
||||
auto timer_start_tp = loading_cache_clock_type::now();
|
||||
|
||||
// Clear all cached mutexes
|
||||
_write_mutex_map.clear();
|
||||
|
||||
// Clean up items that were not touched for the whole _expiry period.
|
||||
drop_expired();
|
||||
|
||||
// Remove the least recently used items if map is too big.
|
||||
shrink();
|
||||
|
||||
// check if rehashing is needed and do it if it is.
|
||||
rehash();
|
||||
periodic_rehash();
|
||||
|
||||
if (ReloadEnabled == loading_cache_reload_enabled::no) {
|
||||
_logger.trace("on_timer(): rearming");
|
||||
_timer.arm(loading_cache_clock_type::now() + _timer_period);
|
||||
return;
|
||||
}
|
||||
|
||||
// Reload all those which vlaue needs to be reloaded.
|
||||
with_gate(_timer_reads_gate, [this, timer_start_tp] {
|
||||
return parallel_for_each(_set.begin(), _set.end(), [this, curr_time = timer_start_tp] (auto& ts_val) {
|
||||
_logger.trace("on_timer(): {}: checking the value age", ts_val.key());
|
||||
if (ts_val && ts_val.loaded() + _refresh < curr_time) {
|
||||
_logger.trace("on_timer(): {}: reloading the value", ts_val.key());
|
||||
return this->reload(ts_val);
|
||||
with_gate(_timer_reads_gate, [this] {
|
||||
return parallel_for_each(_lru_list.begin(), _lru_list.end(), [this] (ts_value_lru_entry& lru_entry) {
|
||||
_logger.trace("on_timer(): {}: checking the value age", lru_entry.key());
|
||||
if (lru_entry.timestamped_value().loaded() + _refresh < loading_cache_clock_type::now()) {
|
||||
_logger.trace("on_timer(): {}: reloading the value", lru_entry.key());
|
||||
return this->reload(lru_entry);
|
||||
}
|
||||
return now();
|
||||
}).finally([this, timer_start_tp] {
|
||||
}).finally([this] {
|
||||
_logger.trace("on_timer(): rearming");
|
||||
_timer.arm(timer_start_tp + _refresh);
|
||||
_timer.arm(loading_cache_clock_type::now() + _timer_period);
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
std::vector<typename set_type::bucket_type> _buckets;
|
||||
size_t _current_buckets_count = initial_num_buckets;
|
||||
set_type _set;
|
||||
write_mutex_map_type _write_mutex_map;
|
||||
loading_values_type _loading_values;
|
||||
lru_list_type _lru_list;
|
||||
size_t _max_size;
|
||||
size_t _current_size = 0;
|
||||
size_t _max_size = 0;
|
||||
std::chrono::milliseconds _expiry;
|
||||
std::chrono::milliseconds _refresh;
|
||||
loading_cache_clock_type::duration _timer_period;
|
||||
logging::logger& _logger;
|
||||
std::function<future<Tp>(const Key&)> _load;
|
||||
timer<lowres_clock> _timer;
|
||||
seastar::gate _timer_reads_gate;
|
||||
value_extractor_fn _value_extractor_fn;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
@@ -137,7 +137,11 @@ private:
|
||||
using set_type = bi::unordered_set<entry, bi::power_2_buckets<true>, bi::compare_hash<true>>;
|
||||
using bi_set_bucket_traits = typename set_type::bucket_traits;
|
||||
using set_iterator = typename set_type::iterator;
|
||||
using value_extractor_fn = std::function<value_type& (entry&)>;
|
||||
struct value_extractor_fn {
|
||||
value_type& operator()(entry& e) const {
|
||||
return e.value();
|
||||
}
|
||||
};
|
||||
enum class shrinking_is_allowed { no, yes };
|
||||
|
||||
public:
|
||||
@@ -186,7 +190,6 @@ public:
|
||||
loading_shared_values()
|
||||
: _buckets(InitialBucketsCount)
|
||||
, _set(bi_set_bucket_traits(_buckets.data(), _buckets.size()))
|
||||
, _value_extractor_fn([] (entry& e) -> value_type& { return e.value(); })
|
||||
{
|
||||
static_assert(noexcept(Stats::inc_evictions()), "Stats::inc_evictions must be non-throwing");
|
||||
static_assert(noexcept(Stats::inc_hits()), "Stats::inc_hits must be non-throwing");
|
||||
|
||||
@@ -117,6 +117,7 @@ public:
|
||||
void reclaim_all_free_segments();
|
||||
occupancy_stats region_occupancy();
|
||||
occupancy_stats occupancy();
|
||||
size_t non_lsa_used_space();
|
||||
void set_reclamation_step(size_t step_in_segments) { _reclamation_step = step_in_segments; }
|
||||
size_t reclamation_step() const { return _reclamation_step; }
|
||||
void enable_abort_on_bad_alloc() { _abort_on_bad_alloc = true; }
|
||||
@@ -153,6 +154,10 @@ occupancy_stats tracker::occupancy() {
|
||||
return _impl->occupancy();
|
||||
}
|
||||
|
||||
size_t tracker::non_lsa_used_space() const {
|
||||
return _impl->non_lsa_used_space();
|
||||
}
|
||||
|
||||
void tracker::full_compaction() {
|
||||
return _impl->full_compaction();
|
||||
}
|
||||
@@ -291,7 +296,7 @@ static inline bool can_allocate_more_memory(size_t size)
|
||||
class segment_zone : public bi::set_base_hook<>, public bi::slist_base_hook<> {
|
||||
struct free_segment : public bi::slist_base_hook<> { };
|
||||
|
||||
static constexpr size_t maximum_size = 256;
|
||||
static constexpr size_t maximum_size = max_zone_segments;
|
||||
static constexpr size_t minimum_size = 16;
|
||||
static thread_local size_t next_attempt_size;
|
||||
|
||||
@@ -574,10 +579,8 @@ size_t segment_pool::reclaim_segments(size_t target) {
|
||||
bi::slist<segment_zone> zones_to_remove;
|
||||
for (auto& zone : _all_zones | boost::adaptors::reversed) {
|
||||
if (zone.empty()) {
|
||||
if (reclaimed_segments < target || !zone.free_segment_count()) {
|
||||
reclaimed_segments += zone.free_segment_count();
|
||||
zones_to_remove.push_front(zone);
|
||||
}
|
||||
reclaimed_segments += zone.free_segment_count();
|
||||
zones_to_remove.push_front(zone);
|
||||
} else if (zone.free_segment_count()) {
|
||||
_free_segments_in_zones += zone.free_segment_count();
|
||||
zone.rebuild_free_segments_list();
|
||||
@@ -1681,6 +1684,11 @@ occupancy_stats tracker::impl::occupancy() {
|
||||
return occ;
|
||||
}
|
||||
|
||||
size_t tracker::impl::non_lsa_used_space() {
|
||||
auto free_space_in_zones = shard_segment_pool.free_segments_in_zones() * segment_size;
|
||||
return memory::stats().allocated_memory() - region_occupancy().total_space() - free_space_in_zones;
|
||||
}
|
||||
|
||||
void tracker::impl::reclaim_all_free_segments()
|
||||
{
|
||||
llogger.debug("Reclaiming all free segments");
|
||||
@@ -2013,11 +2021,8 @@ tracker::impl::impl() {
|
||||
sm::make_gauge("large_objects_total_space_bytes", [this] { return shard_segment_pool.non_lsa_memory_in_use(); },
|
||||
sm::description("Holds a current size of allocated non-LSA memory.")),
|
||||
|
||||
sm::make_gauge("non_lsa_used_space_bytes",
|
||||
[this] {
|
||||
auto free_space_in_zones = shard_segment_pool.free_segments_in_zones() * segment_size;
|
||||
return memory::stats().allocated_memory() - region_occupancy().total_space() - free_space_in_zones;
|
||||
}, sm::description("Holds a current amount of used non-LSA memory.")),
|
||||
sm::make_gauge("non_lsa_used_space_bytes", [this] { return non_lsa_used_space(); },
|
||||
sm::description("Holds a current amount of used non-LSA memory.")),
|
||||
|
||||
sm::make_gauge("free_space_in_zones", [this] { return shard_segment_pool.free_segments_in_zones() * segment_size; },
|
||||
sm::description("Holds a current amount of free memory in zones.")),
|
||||
|
||||
@@ -43,6 +43,7 @@ class allocating_section;
|
||||
|
||||
constexpr int segment_size_shift = 18; // 256K; see #151, #152
|
||||
constexpr size_t segment_size = 1 << segment_size_shift;
|
||||
constexpr size_t max_zone_segments = 256;
|
||||
|
||||
//
|
||||
// Frees some amount of objects from the region to which it's attached.
|
||||
@@ -455,6 +456,9 @@ public:
|
||||
// Returns statistics for all segments allocated by LSA on this shard.
|
||||
occupancy_stats occupancy();
|
||||
|
||||
// Returns amount of allocated memory not managed by LSA
|
||||
size_t non_lsa_used_space() const;
|
||||
|
||||
impl& get_impl() { return *_impl; }
|
||||
|
||||
// Set the minimum number of segments reclaimed during single reclamation cycle.
|
||||
|
||||
Reference in New Issue
Block a user