Compare commits
47 Commits
branch-2.2
...
branch-2.2
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
d27eb734a7 | ||
|
|
e6aeb490b5 | ||
|
|
2e3b09b593 | ||
|
|
92c74f4e0b | ||
|
|
89d835e9e3 | ||
|
|
263a740084 | ||
|
|
7f24b5319e | ||
|
|
fe16c0e985 | ||
|
|
f85badaaac | ||
|
|
2193d41683 | ||
|
|
1e1f0c29bf | ||
|
|
84d4588b5f | ||
|
|
7b43b26709 | ||
|
|
0ed01acf15 | ||
|
|
7ce160f408 | ||
|
|
5017d9b46a | ||
|
|
50b6ab3552 | ||
|
|
b1652823aa | ||
|
|
02b24aec34 | ||
|
|
22eea4d8cf | ||
|
|
d257f6d57c | ||
|
|
6fca92ac3c | ||
|
|
26e3917046 | ||
|
|
3892594a93 | ||
|
|
4b24439841 | ||
|
|
a02a4592d8 | ||
|
|
b6e1c08451 | ||
|
|
9469afcd27 | ||
|
|
240b9f122b | ||
|
|
cb16cd7724 | ||
|
|
c864d198fc | ||
|
|
25125e9c4f | ||
|
|
faf10fe6aa | ||
|
|
f76269cdcf | ||
|
|
a9b0ccf116 | ||
|
|
abc5941f87 | ||
|
|
a152ac12af | ||
|
|
c274fdf2ec | ||
|
|
5b88d6b4d6 | ||
|
|
2d626e1cf8 | ||
|
|
c11bd3e1cf | ||
|
|
9df3df92bc | ||
|
|
8ad9578a6c | ||
|
|
4cb6061a9f | ||
|
|
1940e6bd95 | ||
|
|
044cfde5f3 | ||
|
|
262a246436 |
@@ -1,6 +1,6 @@
|
||||
#!/bin/sh
|
||||
|
||||
VERSION=2.2.rc2
|
||||
VERSION=2.2.2
|
||||
|
||||
if test -f version
|
||||
then
|
||||
@@ -14,7 +14,7 @@ else
|
||||
# where counter starts at 1 and increments for successive versions.
|
||||
# This ensures that the package manager will select your custom
|
||||
# package over the standard release.
|
||||
SCYLLA_BUILD=1.mv
|
||||
SCYLLA_BUILD=0
|
||||
SCYLLA_RELEASE=$SCYLLA_BUILD.$DATE.$GIT_COMMIT
|
||||
fi
|
||||
|
||||
|
||||
@@ -2129,41 +2129,6 @@
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"path":"/storage_service/view_build_statuses/{keyspace}/{view}",
|
||||
"operations":[
|
||||
{
|
||||
"method":"GET",
|
||||
"summary":"Gets the progress of a materialized view build",
|
||||
"type":"array",
|
||||
"items":{
|
||||
"type":"mapper"
|
||||
},
|
||||
"nickname":"view_build_statuses",
|
||||
"produces":[
|
||||
"application/json"
|
||||
],
|
||||
"parameters":[
|
||||
{
|
||||
"name":"keyspace",
|
||||
"description":"The keyspace",
|
||||
"required":true,
|
||||
"allowMultiple":false,
|
||||
"type":"string",
|
||||
"paramType":"path"
|
||||
},
|
||||
{
|
||||
"name":"view",
|
||||
"description":"View name",
|
||||
"required":true,
|
||||
"allowMultiple":false,
|
||||
"type":"string",
|
||||
"paramType":"path"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"models":{
|
||||
@@ -2228,11 +2193,11 @@
|
||||
"description":"The column family"
|
||||
},
|
||||
"total":{
|
||||
"type":"int",
|
||||
"type":"long",
|
||||
"description":"The total snapshot size"
|
||||
},
|
||||
"live":{
|
||||
"type":"int",
|
||||
"type":"long",
|
||||
"description":"The live snapshot size"
|
||||
}
|
||||
}
|
||||
|
||||
@@ -852,15 +852,6 @@ void set_storage_service(http_context& ctx, routes& r) {
|
||||
return make_ready_future<json::json_return_type>(map_to_key_value(ownership, res));
|
||||
});
|
||||
});
|
||||
|
||||
ss::view_build_statuses.set(r, [&ctx] (std::unique_ptr<request> req) {
|
||||
auto keyspace = validate_keyspace(ctx, req->param);
|
||||
auto view = req->param["view"];
|
||||
return service::get_local_storage_service().view_build_statuses(std::move(keyspace), std::move(view)).then([] (std::unordered_map<sstring, sstring> status) {
|
||||
std::vector<storage_service_json::mapper> res;
|
||||
return make_ready_future<json::json_return_type>(map_to_key_value(std::move(status), res));
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -149,7 +149,9 @@ static sstring gensalt() {
|
||||
// blowfish 2011 fix, blowfish, sha512, sha256, md5
|
||||
for (sstring pfx : { "$2y$", "$2a$", "$6$", "$5$", "$1$" }) {
|
||||
salt = pfx + input;
|
||||
if (crypt_r("fisk", salt.c_str(), &tlcrypt)) {
|
||||
const char* e = crypt_r("fisk", salt.c_str(), &tlcrypt);
|
||||
|
||||
if (e && (e[0] != '*')) {
|
||||
prefix = pfx;
|
||||
return salt;
|
||||
}
|
||||
|
||||
@@ -127,7 +127,7 @@ public:
|
||||
|
||||
class compaction_controller : public backlog_controller {
|
||||
public:
|
||||
static constexpr unsigned normalization_factor = 10;
|
||||
static constexpr unsigned normalization_factor = 30;
|
||||
compaction_controller(seastar::scheduling_group sg, const ::io_priority_class& iop, float static_shares) : backlog_controller(sg, iop, static_shares) {}
|
||||
compaction_controller(seastar::scheduling_group sg, const ::io_priority_class& iop, std::chrono::milliseconds interval, std::function<float()> current_backlog)
|
||||
: backlog_controller(sg, iop, std::move(interval),
|
||||
|
||||
@@ -60,6 +60,7 @@ class cache_flat_mutation_reader final : public flat_mutation_reader::impl {
|
||||
// - _next_row_in_range = _next.position() < _upper_bound
|
||||
// - _last_row points at a direct predecessor of the next row which is going to be read.
|
||||
// Used for populating continuity.
|
||||
// - _population_range_starts_before_all_rows is set accordingly
|
||||
reading_from_underlying,
|
||||
|
||||
end_of_stream
|
||||
@@ -86,6 +87,13 @@ class cache_flat_mutation_reader final : public flat_mutation_reader::impl {
|
||||
partition_snapshot_row_cursor _next_row;
|
||||
bool _next_row_in_range = false;
|
||||
|
||||
// True iff current population interval, since the previous clustering row, starts before all clustered rows.
|
||||
// We cannot just look at _lower_bound, because emission of range tombstones changes _lower_bound and
|
||||
// because we mark clustering intervals as continuous when consuming a clustering_row, it would prevent
|
||||
// us from marking the interval as continuous.
|
||||
// Valid when _state == reading_from_underlying.
|
||||
bool _population_range_starts_before_all_rows;
|
||||
|
||||
future<> do_fill_buffer(db::timeout_clock::time_point);
|
||||
void copy_from_cache_to_buffer();
|
||||
future<> process_static_row(db::timeout_clock::time_point);
|
||||
@@ -226,6 +234,7 @@ inline
|
||||
future<> cache_flat_mutation_reader::do_fill_buffer(db::timeout_clock::time_point timeout) {
|
||||
if (_state == state::move_to_underlying) {
|
||||
_state = state::reading_from_underlying;
|
||||
_population_range_starts_before_all_rows = _lower_bound.is_before_all_clustered_rows(*_schema);
|
||||
auto end = _next_row_in_range ? position_in_partition(_next_row.position())
|
||||
: position_in_partition(_upper_bound);
|
||||
return _read_context->fast_forward_to(position_range{_lower_bound, std::move(end)}, timeout).then([this, timeout] {
|
||||
@@ -351,7 +360,7 @@ future<> cache_flat_mutation_reader::read_from_underlying(db::timeout_clock::tim
|
||||
|
||||
inline
|
||||
bool cache_flat_mutation_reader::ensure_population_lower_bound() {
|
||||
if (!_ck_ranges_curr->start()) {
|
||||
if (_population_range_starts_before_all_rows) {
|
||||
return true;
|
||||
}
|
||||
if (!_last_row.refresh(*_snp)) {
|
||||
@@ -406,6 +415,7 @@ inline
|
||||
void cache_flat_mutation_reader::maybe_add_to_cache(const clustering_row& cr) {
|
||||
if (!can_populate()) {
|
||||
_last_row = nullptr;
|
||||
_population_range_starts_before_all_rows = false;
|
||||
_read_context->cache().on_mispopulate();
|
||||
return;
|
||||
}
|
||||
@@ -439,6 +449,7 @@ void cache_flat_mutation_reader::maybe_add_to_cache(const clustering_row& cr) {
|
||||
with_allocator(standard_allocator(), [&] {
|
||||
_last_row = partition_snapshot_row_weakref(*_snp, it, true);
|
||||
});
|
||||
_population_range_starts_before_all_rows = false;
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
@@ -228,7 +228,6 @@ scylla_tests = [
|
||||
'tests/memory_footprint',
|
||||
'tests/perf/perf_sstable',
|
||||
'tests/cql_query_test',
|
||||
'tests/secondary_index_test',
|
||||
'tests/storage_proxy_test',
|
||||
'tests/schema_change_test',
|
||||
'tests/mutation_reader_test',
|
||||
@@ -274,8 +273,6 @@ scylla_tests = [
|
||||
'tests/input_stream_test',
|
||||
'tests/virtual_reader_test',
|
||||
'tests/view_schema_test',
|
||||
'tests/view_build_test',
|
||||
'tests/view_complex_test',
|
||||
'tests/counter_test',
|
||||
'tests/cell_locker_test',
|
||||
'tests/row_locker_test',
|
||||
@@ -495,7 +492,6 @@ scylla_core = (['database.cc',
|
||||
'cql3/variable_specifications.cc',
|
||||
'db/consistency_level.cc',
|
||||
'db/system_keyspace.cc',
|
||||
'db/system_distributed_keyspace.cc',
|
||||
'db/schema_tables.cc',
|
||||
'db/cql_type_parser.cc',
|
||||
'db/legacy_schema_migrator.cc',
|
||||
@@ -506,12 +502,12 @@ scylla_core = (['database.cc',
|
||||
'db/config.cc',
|
||||
'db/extensions.cc',
|
||||
'db/heat_load_balance.cc',
|
||||
'db/index/secondary_index.cc',
|
||||
'db/marshal/type_parser.cc',
|
||||
'db/batchlog_manager.cc',
|
||||
'db/view/view.cc',
|
||||
'db/view/row_locking.cc',
|
||||
'index/secondary_index_manager.cc',
|
||||
'index/secondary_index.cc',
|
||||
'utils/UUID_gen.cc',
|
||||
'utils/i_filter.cc',
|
||||
'utils/bloom_filter.cc',
|
||||
|
||||
@@ -22,7 +22,6 @@
|
||||
#include "cql3/column_identifier.hh"
|
||||
#include "exceptions/exceptions.hh"
|
||||
#include "cql3/selection/simple_selector.hh"
|
||||
#include "cql3/util.hh"
|
||||
|
||||
#include <regex>
|
||||
|
||||
@@ -63,11 +62,14 @@ sstring column_identifier::to_string() const {
|
||||
}
|
||||
|
||||
sstring column_identifier::to_cql_string() const {
|
||||
return util::maybe_quote(_text);
|
||||
}
|
||||
|
||||
sstring column_identifier::raw::to_cql_string() const {
|
||||
return util::maybe_quote(_text);
|
||||
static const std::regex unquoted_identifier_re("[a-z][a-z0-9_]*");
|
||||
if (std::regex_match(_text.begin(), _text.end(), unquoted_identifier_re)) {
|
||||
return _text;
|
||||
}
|
||||
static const std::regex double_quote_re("\"");
|
||||
std::string result = _text;
|
||||
std::regex_replace(result, double_quote_re, "\"\"");
|
||||
return '"' + result + '"';
|
||||
}
|
||||
|
||||
column_identifier::raw::raw(sstring raw_text, bool keep_case)
|
||||
|
||||
@@ -123,7 +123,6 @@ public:
|
||||
bool operator!=(const raw& other) const;
|
||||
|
||||
virtual sstring to_string() const;
|
||||
sstring to_cql_string() const;
|
||||
|
||||
friend std::hash<column_identifier::raw>;
|
||||
friend std::ostream& operator<<(std::ostream& out, const column_identifier::raw& id);
|
||||
|
||||
@@ -395,15 +395,18 @@ operator<<(std::ostream& os, const cql3_type::raw& r) {
|
||||
|
||||
namespace util {
|
||||
|
||||
sstring maybe_quote(const sstring& identifier) {
|
||||
static const std::regex unquoted_identifier_re("[a-z][a-z0-9_]*");
|
||||
if (std::regex_match(identifier.begin(), identifier.end(), unquoted_identifier_re)) {
|
||||
return identifier;
|
||||
sstring maybe_quote(const sstring& s) {
|
||||
static const std::regex unquoted("\\w*");
|
||||
static const std::regex double_quote("\"");
|
||||
|
||||
if (std::regex_match(s.begin(), s.end(), unquoted)) {
|
||||
return s;
|
||||
}
|
||||
static const std::regex double_quote_re("\"");
|
||||
std::string result = identifier;
|
||||
std::regex_replace(result, double_quote_re, "\"\"");
|
||||
return '"' + result + '"';
|
||||
std::ostringstream ss;
|
||||
ss << "\"";
|
||||
std::regex_replace(std::ostreambuf_iterator<char>(ss), s.begin(), s.end(), double_quote, "\"\"");
|
||||
ss << "\"";
|
||||
return ss.str();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -67,6 +67,12 @@ class error_collector : public error_listener<RecognizerType, ExceptionBaseType>
|
||||
*/
|
||||
const sstring_view _query;
|
||||
|
||||
/**
|
||||
* An empty bitset to be used as a workaround for AntLR null dereference
|
||||
* bug.
|
||||
*/
|
||||
static typename ExceptionBaseType::BitsetListType _empty_bit_list;
|
||||
|
||||
public:
|
||||
|
||||
/**
|
||||
@@ -144,6 +150,14 @@ private:
|
||||
break;
|
||||
}
|
||||
default:
|
||||
// AntLR Exception class has a bug of dereferencing a null
|
||||
// pointer in the displayRecognitionError. The following
|
||||
// if statement makes sure it will not be null before the
|
||||
// call to that function (displayRecognitionError).
|
||||
// bug reference: https://github.com/antlr/antlr3/issues/191
|
||||
if (!ex->get_expectingSet()) {
|
||||
ex->set_expectingSet(&_empty_bit_list);
|
||||
}
|
||||
ex->displayRecognitionError(token_names, msg);
|
||||
}
|
||||
return msg.str();
|
||||
@@ -345,4 +359,8 @@ private:
|
||||
#endif
|
||||
};
|
||||
|
||||
template<typename RecognizerType, typename TokenType, typename ExceptionBaseType>
|
||||
typename ExceptionBaseType::BitsetListType
|
||||
error_collector<RecognizerType,TokenType,ExceptionBaseType>::_empty_bit_list = typename ExceptionBaseType::BitsetListType();
|
||||
|
||||
}
|
||||
|
||||
@@ -209,19 +209,18 @@ void query_options::prepare(const std::vector<::shared_ptr<column_specification>
|
||||
}
|
||||
|
||||
auto& names = *_names;
|
||||
std::vector<cql3::raw_value> ordered_values;
|
||||
std::vector<cql3::raw_value_view> ordered_values;
|
||||
ordered_values.reserve(specs.size());
|
||||
for (auto&& spec : specs) {
|
||||
auto& spec_name = spec->name->text();
|
||||
for (size_t j = 0; j < names.size(); j++) {
|
||||
if (names[j] == spec_name) {
|
||||
ordered_values.emplace_back(_values[j]);
|
||||
ordered_values.emplace_back(_value_views[j]);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
_values = std::move(ordered_values);
|
||||
fill_value_views();
|
||||
_value_views = std::move(ordered_values);
|
||||
}
|
||||
|
||||
void query_options::fill_value_views()
|
||||
|
||||
@@ -172,30 +172,7 @@ query_processor::query_processor(distributed<service::storage_proxy>& proxy, dis
|
||||
sm::make_gauge(
|
||||
"prepared_cache_memory_footprint",
|
||||
[this] { return _prepared_cache.memory_footprint(); },
|
||||
sm::description("Size (in bytes) of the prepared statements cache.")),
|
||||
|
||||
sm::make_derive(
|
||||
"secondary_index_creates",
|
||||
_cql_stats.secondary_index_creates,
|
||||
sm::description("Counts a total number of CQL CREATE INDEX requests.")),
|
||||
|
||||
sm::make_derive(
|
||||
"secondary_index_drops",
|
||||
_cql_stats.secondary_index_drops,
|
||||
sm::description("Counts a total number of CQL DROP INDEX requests.")),
|
||||
|
||||
// secondary_index_reads total count is also included in all cql reads
|
||||
sm::make_derive(
|
||||
"secondary_index_reads",
|
||||
_cql_stats.secondary_index_reads,
|
||||
sm::description("Counts a total number of CQL read requests performed using secondary indexes.")),
|
||||
|
||||
// secondary_index_rows_read total count is also included in all cql rows read
|
||||
sm::make_derive(
|
||||
"secondary_index_rows_read",
|
||||
_cql_stats.secondary_index_rows_read,
|
||||
sm::description("Counts a total number of rows read during CQL requests performed using secondary indexes."))
|
||||
});
|
||||
sm::description("Size (in bytes) of the prepared statements cache."))});
|
||||
|
||||
service::get_local_migration_manager().register_listener(_migration_subscriber.get());
|
||||
}
|
||||
|
||||
@@ -64,15 +64,13 @@ class single_column_primary_key_restrictions : public primary_key_restrictions<V
|
||||
using bounds_range_type = typename primary_key_restrictions<ValueType>::bounds_range_type;
|
||||
private:
|
||||
schema_ptr _schema;
|
||||
bool _allow_filtering;
|
||||
::shared_ptr<single_column_restrictions> _restrictions;
|
||||
bool _slice;
|
||||
bool _contains;
|
||||
bool _in;
|
||||
public:
|
||||
single_column_primary_key_restrictions(schema_ptr schema, bool allow_filtering)
|
||||
single_column_primary_key_restrictions(schema_ptr schema)
|
||||
: _schema(schema)
|
||||
, _allow_filtering(allow_filtering)
|
||||
, _restrictions(::make_shared<single_column_restrictions>(schema))
|
||||
, _slice(false)
|
||||
, _contains(false)
|
||||
@@ -112,7 +110,7 @@ public:
|
||||
}
|
||||
|
||||
void do_merge_with(::shared_ptr<single_column_restriction> restriction) {
|
||||
if (!_restrictions->empty() && !_allow_filtering) {
|
||||
if (!_restrictions->empty()) {
|
||||
auto last_column = *_restrictions->last_column();
|
||||
auto new_column = restriction->get_column_def();
|
||||
|
||||
|
||||
@@ -202,6 +202,14 @@ public:
|
||||
const query_options& options,
|
||||
gc_clock::time_point now) const override;
|
||||
|
||||
virtual std::vector<bytes_opt> values_raw(const query_options& options) const = 0;
|
||||
|
||||
virtual std::vector<bytes_opt> values(const query_options& options) const override {
|
||||
std::vector<bytes_opt> ret = values_raw(options);
|
||||
std::sort(ret.begin(),ret.end());
|
||||
ret.erase(std::unique(ret.begin(),ret.end()),ret.end());
|
||||
return ret;
|
||||
}
|
||||
#if 0
|
||||
@Override
|
||||
protected final boolean isSupportedBy(SecondaryIndex index)
|
||||
@@ -224,7 +232,7 @@ public:
|
||||
return abstract_restriction::term_uses_function(_values, ks_name, function_name);
|
||||
}
|
||||
|
||||
virtual std::vector<bytes_opt> values(const query_options& options) const override {
|
||||
virtual std::vector<bytes_opt> values_raw(const query_options& options) const override {
|
||||
std::vector<bytes_opt> ret;
|
||||
for (auto&& v : _values) {
|
||||
ret.emplace_back(to_bytes_opt(v->bind_and_get(options)));
|
||||
@@ -249,7 +257,7 @@ public:
|
||||
return false;
|
||||
}
|
||||
|
||||
virtual std::vector<bytes_opt> values(const query_options& options) const override {
|
||||
virtual std::vector<bytes_opt> values_raw(const query_options& options) const override {
|
||||
auto&& lval = dynamic_pointer_cast<multi_item_terminal>(_marker->bind(options));
|
||||
if (!lval) {
|
||||
throw exceptions::invalid_request_exception("Invalid null value for IN restriction");
|
||||
|
||||
@@ -41,17 +41,14 @@ using boost::adaptors::transformed;
|
||||
|
||||
template<typename T>
|
||||
class statement_restrictions::initial_key_restrictions : public primary_key_restrictions<T> {
|
||||
bool _allow_filtering;
|
||||
public:
|
||||
initial_key_restrictions(bool allow_filtering)
|
||||
: _allow_filtering(allow_filtering) {}
|
||||
using bounds_range_type = typename primary_key_restrictions<T>::bounds_range_type;
|
||||
|
||||
::shared_ptr<primary_key_restrictions<T>> do_merge_to(schema_ptr schema, ::shared_ptr<restriction> restriction) const {
|
||||
if (restriction->is_multi_column()) {
|
||||
throw std::runtime_error(sprint("%s not implemented", __PRETTY_FUNCTION__));
|
||||
}
|
||||
return ::make_shared<single_column_primary_key_restrictions<T>>(schema, _allow_filtering)->merge_to(schema, restriction);
|
||||
return ::make_shared<single_column_primary_key_restrictions<T>>(schema)->merge_to(schema, restriction);
|
||||
}
|
||||
::shared_ptr<primary_key_restrictions<T>> merge_to(schema_ptr schema, ::shared_ptr<restriction> restriction) override {
|
||||
if (restriction->is_multi_column()) {
|
||||
@@ -60,7 +57,7 @@ public:
|
||||
if (restriction->is_on_token()) {
|
||||
return static_pointer_cast<token_restriction>(restriction);
|
||||
}
|
||||
return ::make_shared<single_column_primary_key_restrictions<T>>(schema, _allow_filtering)->merge_to(restriction);
|
||||
return ::make_shared<single_column_primary_key_restrictions<T>>(schema)->merge_to(restriction);
|
||||
}
|
||||
void merge_with(::shared_ptr<restriction> restriction) override {
|
||||
throw exceptions::unsupported_operation_exception();
|
||||
@@ -125,10 +122,9 @@ statement_restrictions::initial_key_restrictions<clustering_key_prefix>::merge_t
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
::shared_ptr<primary_key_restrictions<T>> statement_restrictions::get_initial_key_restrictions(bool allow_filtering) {
|
||||
static thread_local ::shared_ptr<primary_key_restrictions<T>> initial_kr_true = ::make_shared<initial_key_restrictions<T>>(true);
|
||||
static thread_local ::shared_ptr<primary_key_restrictions<T>> initial_kr_false = ::make_shared<initial_key_restrictions<T>>(false);
|
||||
return allow_filtering ? initial_kr_true : initial_kr_false;
|
||||
::shared_ptr<primary_key_restrictions<T>> statement_restrictions::get_initial_key_restrictions() {
|
||||
static thread_local ::shared_ptr<primary_key_restrictions<T>> initial_kr = ::make_shared<initial_key_restrictions<T>>();
|
||||
return initial_kr;
|
||||
}
|
||||
|
||||
std::vector<::shared_ptr<column_identifier>>
|
||||
@@ -145,10 +141,10 @@ statement_restrictions::get_partition_key_unrestricted_components() const {
|
||||
return r;
|
||||
}
|
||||
|
||||
statement_restrictions::statement_restrictions(schema_ptr schema, bool allow_filtering)
|
||||
statement_restrictions::statement_restrictions(schema_ptr schema)
|
||||
: _schema(schema)
|
||||
, _partition_key_restrictions(get_initial_key_restrictions<partition_key>(allow_filtering))
|
||||
, _clustering_columns_restrictions(get_initial_key_restrictions<clustering_key_prefix>(allow_filtering))
|
||||
, _partition_key_restrictions(get_initial_key_restrictions<partition_key>())
|
||||
, _clustering_columns_restrictions(get_initial_key_restrictions<clustering_key_prefix>())
|
||||
, _nonprimary_key_restrictions(::make_shared<single_column_restrictions>(schema))
|
||||
{ }
|
||||
#if 0
|
||||
@@ -166,9 +162,8 @@ statement_restrictions::statement_restrictions(database& db,
|
||||
::shared_ptr<variable_specifications> bound_names,
|
||||
bool selects_only_static_columns,
|
||||
bool select_a_collection,
|
||||
bool for_view,
|
||||
bool allow_filtering)
|
||||
: statement_restrictions(schema, allow_filtering)
|
||||
bool for_view)
|
||||
: statement_restrictions(schema)
|
||||
{
|
||||
/*
|
||||
* WHERE clause. For a given entity, rules are: - EQ relation conflicts with anything else (including a 2nd EQ)
|
||||
@@ -332,17 +327,6 @@ void statement_restrictions::process_partition_key_restrictions(bool has_queriab
|
||||
_is_key_range = true;
|
||||
_uses_secondary_indexing = has_queriable_index;
|
||||
}
|
||||
if (_partition_key_restrictions->is_slice() && !_partition_key_restrictions->is_on_token() && !for_view) {
|
||||
// A SELECT query may not request a slice (range) of partition keys
|
||||
// without using token(). This is because there is no way to do this
|
||||
// query efficiently: mumur3 turns a contiguous range of partition
|
||||
// keys into tokens all over the token space.
|
||||
// However, in a SELECT statement used to define a materialized view,
|
||||
// such a slice is fine - it is used to check whether individual
|
||||
// partitions, match, and does not present a performance problem.
|
||||
throw exceptions::invalid_request_exception(
|
||||
"Only EQ and IN relation are supported on the partition key (unless you use the token() function)");
|
||||
}
|
||||
}
|
||||
|
||||
bool statement_restrictions::has_partition_key_unrestricted_components() const {
|
||||
|
||||
@@ -67,7 +67,7 @@ private:
|
||||
class initial_key_restrictions;
|
||||
|
||||
template<typename T>
|
||||
static ::shared_ptr<primary_key_restrictions<T>> get_initial_key_restrictions(bool allow_filtering);
|
||||
static ::shared_ptr<primary_key_restrictions<T>> get_initial_key_restrictions();
|
||||
|
||||
/**
|
||||
* Restrictions on partitioning columns
|
||||
@@ -108,7 +108,7 @@ public:
|
||||
* @param cfm the column family meta data
|
||||
* @return a new empty <code>StatementRestrictions</code>.
|
||||
*/
|
||||
statement_restrictions(schema_ptr schema, bool allow_filtering);
|
||||
statement_restrictions(schema_ptr schema);
|
||||
|
||||
statement_restrictions(database& db,
|
||||
schema_ptr schema,
|
||||
@@ -117,8 +117,7 @@ public:
|
||||
::shared_ptr<variable_specifications> bound_names,
|
||||
bool selects_only_static_columns,
|
||||
bool select_a_collection,
|
||||
bool for_view = false,
|
||||
bool allow_filtering = false);
|
||||
bool for_view = false);
|
||||
private:
|
||||
void add_restriction(::shared_ptr<restriction> restriction);
|
||||
void add_single_column_restriction(::shared_ptr<single_column_restriction> restriction);
|
||||
|
||||
@@ -105,9 +105,11 @@ public:
|
||||
virtual void reset() = 0;
|
||||
|
||||
virtual assignment_testable::test_result test_assignment(database& db, const sstring& keyspace, ::shared_ptr<column_specification> receiver) override {
|
||||
if (receiver->type == get_type()) {
|
||||
auto t1 = receiver->type->underlying_type();
|
||||
auto t2 = get_type()->underlying_type();
|
||||
if (t1 == t2) {
|
||||
return assignment_testable::test_result::EXACT_MATCH;
|
||||
} else if (receiver->type->is_value_compatible_with(*get_type())) {
|
||||
} else if (t1->is_value_compatible_with(*t2)) {
|
||||
return assignment_testable::test_result::WEAKLY_ASSIGNABLE;
|
||||
} else {
|
||||
return assignment_testable::test_result::NOT_ASSIGNABLE;
|
||||
|
||||
@@ -116,6 +116,18 @@ single_column_relation::to_receivers(schema_ptr schema, const column_definition&
|
||||
throw exceptions::invalid_request_exception(sprint(
|
||||
"IN predicates on non-primary-key columns (%s) is not yet supported", column_def.name_as_text()));
|
||||
}
|
||||
} else if (is_slice()) {
|
||||
// Non EQ relation is not supported without token(), even if we have a 2ndary index (since even those
|
||||
// are ordered by partitioner).
|
||||
// Note: In theory we could allow it for 2ndary index queries with ALLOW FILTERING, but that would
|
||||
// probably require some special casing
|
||||
// Note bis: This is also why we don't bother handling the 'tuple' notation of #4851 for keys. If we
|
||||
// lift the limitation for 2ndary
|
||||
// index with filtering, we'll need to handle it though.
|
||||
if (column_def.is_partition_key()) {
|
||||
throw exceptions::invalid_request_exception(
|
||||
"Only EQ and IN relation are supported on the partition key (unless you use the token() function)");
|
||||
}
|
||||
}
|
||||
|
||||
if (is_contains() && !receiver->type->is_collection()) {
|
||||
|
||||
@@ -134,7 +134,7 @@ protected:
|
||||
#endif
|
||||
|
||||
virtual sstring to_string() const override {
|
||||
auto entity_as_string = _entity->to_cql_string();
|
||||
auto entity_as_string = _entity->to_string();
|
||||
if (_map_key) {
|
||||
entity_as_string = sprint("%s[%s]", std::move(entity_as_string), _map_key->to_string());
|
||||
}
|
||||
|
||||
@@ -247,11 +247,10 @@ future<shared_ptr<cql_transport::event::schema_change>> alter_table_statement::a
|
||||
cfm.with_column(column_name->name(), type, _is_static ? column_kind::static_column : column_kind::regular_column);
|
||||
|
||||
// Adding a column to a table which has an include all view requires the column to be added to the view
|
||||
// as well. If the view has a regular base column in its PK, then the column ID needs to be updated in
|
||||
// view_info; for that, rebuild the schema.
|
||||
// as well
|
||||
if (!_is_static) {
|
||||
for (auto&& view : cf.views()) {
|
||||
if (view->view_info()->include_all_columns() || view->view_info()->base_non_pk_column_in_view_pk()) {
|
||||
if (view->view_info()->include_all_columns()) {
|
||||
schema_builder builder(view);
|
||||
builder.with_column(column_name->name(), type);
|
||||
view_updates.push_back(view_ptr(builder.build()));
|
||||
@@ -306,10 +305,14 @@ future<shared_ptr<cql_transport::event::schema_change>> alter_table_statement::a
|
||||
}
|
||||
}
|
||||
|
||||
if (!cf.views().empty()) {
|
||||
// If a column is dropped which is included in a view, we don't allow the drop to take place.
|
||||
auto view_names = ::join(", ", cf.views()
|
||||
| boost::adaptors::filtered([&] (auto&& v) { return bool(v->get_column_definition(column_name->name())); })
|
||||
| boost::adaptors::transformed([] (auto&& v) { return v->cf_name(); }));
|
||||
if (!view_names.empty()) {
|
||||
throw exceptions::invalid_request_exception(sprint(
|
||||
"Cannot drop column %s on base table %s.%s with materialized views",
|
||||
column_name, keyspace(), column_family()));
|
||||
"Cannot drop column %s, depended on by materialized views (%s.{%s})",
|
||||
column_name, keyspace(), view_names));
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -252,7 +252,6 @@ create_index_statement::announce_migration(distributed<service::storage_proxy>&
|
||||
sprint("Index %s is a duplicate of existing index %s", index.name(), existing_index.value().name()));
|
||||
}
|
||||
}
|
||||
++_cql_stats->secondary_index_creates;
|
||||
schema_builder builder{schema};
|
||||
builder.with_index(index);
|
||||
return service::get_local_migration_manager().announce_column_family_update(
|
||||
@@ -268,7 +267,6 @@ create_index_statement::announce_migration(distributed<service::storage_proxy>&
|
||||
|
||||
std::unique_ptr<cql3::statements::prepared_statement>
|
||||
create_index_statement::prepare(database& db, cql_stats& stats) {
|
||||
_cql_stats = &stats;
|
||||
return std::make_unique<prepared_statement>(make_shared<create_index_statement>(*this));
|
||||
}
|
||||
|
||||
@@ -281,7 +279,7 @@ index_metadata create_index_statement::make_index_metadata(schema_ptr schema,
|
||||
index_options_map new_options = options;
|
||||
auto target_option = boost::algorithm::join(targets | boost::adaptors::transformed(
|
||||
[schema](const auto &target) -> sstring {
|
||||
return target->as_string();
|
||||
return target->as_cql_string(schema);
|
||||
}), ",");
|
||||
new_options.emplace(index_target::target_option_name, target_option);
|
||||
return index_metadata{name, new_options, kind};
|
||||
|
||||
@@ -70,7 +70,7 @@ class create_index_statement : public schema_altering_statement {
|
||||
const std::vector<::shared_ptr<index_target::raw>> _raw_targets;
|
||||
const ::shared_ptr<index_prop_defs> _properties;
|
||||
const bool _if_not_exists;
|
||||
cql_stats* _cql_stats = nullptr;
|
||||
|
||||
|
||||
public:
|
||||
create_index_statement(::shared_ptr<cf_name> name, ::shared_ptr<index_name> index_name,
|
||||
|
||||
@@ -127,25 +127,22 @@ static bool validate_primary_key(
|
||||
"Cannot use Static column '%s' in PRIMARY KEY of materialized view", def->name_as_text()));
|
||||
}
|
||||
|
||||
bool new_non_pk_column = false;
|
||||
if (base_pk.find(def) == base_pk.end()) {
|
||||
if (has_non_pk_column) {
|
||||
throw exceptions::invalid_request_exception(sprint(
|
||||
"Cannot include more than one non-primary key column '%s' in materialized view primary key", def->name_as_text()));
|
||||
}
|
||||
new_non_pk_column = true;
|
||||
return true;
|
||||
}
|
||||
|
||||
// We don't need to include the "IS NOT NULL" filter on a non-composite partition key
|
||||
// because we will never allow a single partition key to be NULL
|
||||
bool is_non_composite_partition_key = def->is_partition_key() &&
|
||||
schema->partition_key_columns().size() == 1;
|
||||
if (!is_non_composite_partition_key && !restrictions.is_restricted(def)) {
|
||||
if (schema->partition_key_columns().size() > 1 && !restrictions.is_restricted(def)) {
|
||||
throw exceptions::invalid_request_exception(sprint(
|
||||
"Primary key column '%s' is required to be filtered by 'IS NOT NULL'", def->name_as_text()));
|
||||
}
|
||||
|
||||
return new_non_pk_column;
|
||||
return false;
|
||||
}
|
||||
|
||||
future<shared_ptr<cql_transport::event::schema_change>> create_view_statement::announce_migration(distributed<service::storage_proxy>& proxy, bool is_local_only) {
|
||||
@@ -250,6 +247,13 @@ future<shared_ptr<cql_transport::event::schema_change>> create_view_statement::a
|
||||
boost::range::join(schema->partition_key_columns(), schema->clustering_key_columns())
|
||||
| boost::adaptors::transformed([](auto&& def) { return &def; }));
|
||||
|
||||
if (_partition_keys.empty()) {
|
||||
throw exceptions::invalid_request_exception(sprint("Must select at least a column for a Materialized View"));
|
||||
}
|
||||
if (_clustering_keys.empty()) {
|
||||
throw exceptions::invalid_request_exception(sprint("No columns are defined for Materialized View other than primary key"));
|
||||
}
|
||||
|
||||
// Validate the primary key clause, ensuring only one non-PK base column is used in the view's PK.
|
||||
bool has_non_pk_column = false;
|
||||
std::unordered_set<const column_definition*> target_primary_keys;
|
||||
@@ -291,26 +295,18 @@ future<shared_ptr<cql_transport::event::schema_change>> create_view_statement::a
|
||||
bool def_in_target_pk = std::find(target_primary_keys.begin(), target_primary_keys.end(), &def) != target_primary_keys.end();
|
||||
if (included_def && !def_in_target_pk) {
|
||||
target_non_pk_columns.push_back(&def);
|
||||
}
|
||||
if (def.is_primary_key() && !def_in_target_pk) {
|
||||
} else if (def.is_primary_key() && !def_in_target_pk) {
|
||||
missing_pk_columns.push_back(&def);
|
||||
}
|
||||
}
|
||||
|
||||
if (!missing_pk_columns.empty()) {
|
||||
auto column_names = ::join(", ", missing_pk_columns | boost::adaptors::transformed(std::mem_fn(&column_definition::name_as_text)));
|
||||
auto column_names = ::join(", ", missing_pk_columns | boost::adaptors::transformed(std::mem_fn(&column_definition::name)));
|
||||
throw exceptions::invalid_request_exception(sprint(
|
||||
"Cannot create Materialized View %s without primary key columns from base %s (%s)",
|
||||
column_family(), _base_name->get_column_family(), column_names));
|
||||
}
|
||||
|
||||
if (_partition_keys.empty()) {
|
||||
throw exceptions::invalid_request_exception(sprint("Must select at least a column for a Materialized View"));
|
||||
}
|
||||
if (_clustering_keys.empty()) {
|
||||
throw exceptions::invalid_request_exception(sprint("No columns are defined for Materialized View other than primary key"));
|
||||
}
|
||||
|
||||
schema_builder builder{keyspace(), column_family()};
|
||||
auto add_columns = [this, &builder] (std::vector<const column_definition*>& defs, column_kind kind) mutable {
|
||||
for (auto* def : defs) {
|
||||
|
||||
@@ -86,7 +86,6 @@ future<shared_ptr<cql_transport::event::schema_change>> drop_index_statement::an
|
||||
if (!cfm) {
|
||||
return make_ready_future<::shared_ptr<cql_transport::event::schema_change>>(nullptr);
|
||||
}
|
||||
++_cql_stats->secondary_index_drops;
|
||||
auto builder = schema_builder(cfm);
|
||||
builder.without_index(_index_name);
|
||||
return service::get_local_migration_manager().announce_column_family_update(builder.build(), false, {}, is_local_only).then([cfm] {
|
||||
@@ -103,7 +102,6 @@ future<shared_ptr<cql_transport::event::schema_change>> drop_index_statement::an
|
||||
|
||||
std::unique_ptr<cql3::statements::prepared_statement>
|
||||
drop_index_statement::prepare(database& db, cql_stats& stats) {
|
||||
_cql_stats = &stats;
|
||||
return std::make_unique<prepared_statement>(make_shared<drop_index_statement>(*this));
|
||||
}
|
||||
|
||||
|
||||
@@ -56,7 +56,6 @@ namespace statements {
|
||||
class drop_index_statement : public schema_altering_statement {
|
||||
sstring _index_name;
|
||||
bool _if_exists;
|
||||
cql_stats* _cql_stats = nullptr;
|
||||
public:
|
||||
drop_index_statement(::shared_ptr<index_name> index_name, bool if_exists);
|
||||
|
||||
@@ -75,4 +74,4 @@ private:
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
@@ -41,7 +41,7 @@
|
||||
|
||||
#include <set>
|
||||
#include "index_prop_defs.hh"
|
||||
#include "index/secondary_index.hh"
|
||||
#include "db/index/secondary_index.hh"
|
||||
|
||||
void cql3::statements::index_prop_defs::validate() {
|
||||
static std::set<sstring> keywords({ sstring(KW_OPTIONS) });
|
||||
|
||||
@@ -41,7 +41,7 @@
|
||||
|
||||
#include <stdexcept>
|
||||
#include "index_target.hh"
|
||||
#include "index/secondary_index.hh"
|
||||
#include "db/index/secondary_index.hh"
|
||||
|
||||
namespace cql3 {
|
||||
|
||||
@@ -59,10 +59,6 @@ sstring index_target::as_cql_string(schema_ptr schema) const {
|
||||
return sprint("%s(%s)", to_sstring(type), column->to_cql_string());
|
||||
}
|
||||
|
||||
sstring index_target::as_string() const {
|
||||
return column->to_string();
|
||||
}
|
||||
|
||||
index_target::target_type index_target::from_sstring(const sstring& s)
|
||||
{
|
||||
if (s == "keys") {
|
||||
|
||||
@@ -43,6 +43,7 @@
|
||||
|
||||
#include "core/shared_ptr.hh"
|
||||
#include "cql3/column_identifier.hh"
|
||||
#include "db/index/secondary_index.hh"
|
||||
|
||||
namespace cql3 {
|
||||
|
||||
@@ -64,7 +65,6 @@ struct index_target {
|
||||
}
|
||||
|
||||
sstring as_cql_string(schema_ptr schema) const;
|
||||
sstring as_string() const;
|
||||
|
||||
static sstring index_option(target_type type);
|
||||
static target_type from_column_definition(const column_definition& cd);
|
||||
|
||||
@@ -276,14 +276,13 @@ select_statement::do_execute(distributed<service::storage_proxy>& proxy,
|
||||
return do_with(
|
||||
cql3::selection::result_set_builder(*_selection, now,
|
||||
options.get_cql_serialization_format()),
|
||||
[this, p, page_size, now](auto& builder) {
|
||||
[p, page_size, now](auto& builder) {
|
||||
return do_until([p] {return p->is_exhausted();},
|
||||
[p, &builder, page_size, now] {
|
||||
return p->fetch_page(builder, page_size, now);
|
||||
}
|
||||
).then([this, &builder] {
|
||||
).then([&builder] {
|
||||
auto rs = builder.build();
|
||||
update_stats_rows_read(rs->size());
|
||||
auto msg = ::make_shared<cql_transport::messages::result_message::rows>(std::move(rs));
|
||||
return make_ready_future<shared_ptr<cql_transport::messages::result_message>>(std::move(msg));
|
||||
});
|
||||
@@ -303,7 +302,6 @@ select_statement::do_execute(distributed<service::storage_proxy>& proxy,
|
||||
rs->get_metadata().set_has_more_pages(p->state());
|
||||
}
|
||||
|
||||
update_stats_rows_read(rs->size());
|
||||
auto msg = ::make_shared<cql_transport::messages::result_message::rows>(std::move(rs));
|
||||
return make_ready_future<shared_ptr<cql_transport::messages::result_message>>(std::move(msg));
|
||||
});
|
||||
@@ -322,10 +320,10 @@ select_statement::execute(distributed<service::storage_proxy>& proxy,
|
||||
// is no way to tell which of these rows belong to the query result before
|
||||
// doing post-query ordering.
|
||||
if (needs_post_query_ordering() && _limit) {
|
||||
return do_with(std::forward<dht::partition_range_vector>(partition_ranges), [this, &proxy, &state, &options, cmd](auto& prs) {
|
||||
return do_with(std::forward<dht::partition_range_vector>(partition_ranges), [this, &proxy, &state, &options, cmd](auto prs) {
|
||||
assert(cmd->partition_limit == query::max_partitions);
|
||||
query::result_merger merger(cmd->row_limit * prs.size(), query::max_partitions);
|
||||
return map_reduce(prs.begin(), prs.end(), [this, &proxy, &state, &options, cmd] (auto& pr) {
|
||||
return map_reduce(prs.begin(), prs.end(), [this, &proxy, &state, &options, cmd] (auto pr) {
|
||||
dht::partition_range_vector prange { pr };
|
||||
auto command = ::make_lw_shared<query::read_command>(*cmd);
|
||||
return proxy.local().query(_schema,
|
||||
@@ -347,54 +345,6 @@ select_statement::execute(distributed<service::storage_proxy>& proxy,
|
||||
}
|
||||
}
|
||||
|
||||
// Function for fetching the selected columns from a list of clustering rows.
|
||||
// It is currently used only in our Secondary Index implementation - ordinary
|
||||
// CQL SELECT statements do not have the syntax to request a list of rows.
|
||||
// FIXME: The current implementation is very inefficient - it requests each
|
||||
// row separately (and all in parallel). Even multiple rows from a single
|
||||
// partition are requested separately. This last case can be easily improved,
|
||||
// but to implement the general case (multiple rows from multiple partitions)
|
||||
// efficiently, we will need more support from other layers.
|
||||
// Note that currently we do not make any assumptions on the order of the keys
|
||||
// given to this function, for more efficient implementation with a large
|
||||
// list, we should probably require that the keys be ordered in token order
|
||||
// (see also issue #3423).
|
||||
future<shared_ptr<cql_transport::messages::result_message>>
|
||||
select_statement::execute(distributed<service::storage_proxy>& proxy,
|
||||
lw_shared_ptr<query::read_command> cmd,
|
||||
std::vector<primary_key>&& primary_keys,
|
||||
service::query_state& state,
|
||||
const query_options& options,
|
||||
gc_clock::time_point now)
|
||||
{
|
||||
return do_with(std::move(primary_keys), [this, &proxy, &state, &options, cmd] (auto& keys) {
|
||||
assert(cmd->partition_limit == query::max_partitions);
|
||||
query::result_merger merger(cmd->row_limit, query::max_partitions);
|
||||
// there is no point to produce rows beyond the first row_limit:
|
||||
auto end = keys.size() <= cmd->row_limit ? keys.end() : keys.begin() + cmd->row_limit;
|
||||
return map_reduce(keys.begin(), end, [this, &proxy, &state, &options, cmd] (auto& key) {
|
||||
auto command = ::make_lw_shared<query::read_command>(*cmd);
|
||||
// for each partition, read just one clustering row (TODO: can
|
||||
// get all needed rows of one partition at once.)
|
||||
command->slice._row_ranges.clear();
|
||||
if (key.clustering) {
|
||||
command->slice._row_ranges.push_back(query::clustering_range::make_singular(key.clustering));
|
||||
}
|
||||
return proxy.local().query(_schema,
|
||||
command,
|
||||
{dht::partition_range::make_singular(key.partition)},
|
||||
options.get_consistency(),
|
||||
state.get_trace_state()).then([] (foreign_ptr<lw_shared_ptr<query::result>>&& result, service::replicas_per_token_range) {
|
||||
return std::move(result);
|
||||
});
|
||||
}, std::move(merger));
|
||||
}).then([this, &options, now, cmd] (auto result) {
|
||||
// note that cmd here still has the garbage clustering range in slice,
|
||||
// but process_results() ignores this part of the slice setting.
|
||||
return this->process_results(std::move(result), cmd, options, now);
|
||||
});
|
||||
}
|
||||
|
||||
future<::shared_ptr<cql_transport::messages::result_message>>
|
||||
select_statement::execute_internal(distributed<service::storage_proxy>& proxy,
|
||||
service::query_state& state,
|
||||
@@ -415,10 +365,10 @@ select_statement::execute_internal(distributed<service::storage_proxy>& proxy,
|
||||
++_stats.reads;
|
||||
|
||||
if (needs_post_query_ordering() && _limit) {
|
||||
return do_with(std::move(partition_ranges), [this, &proxy, &state, command] (auto& prs) {
|
||||
return do_with(std::move(partition_ranges), [this, &proxy, &state, command] (auto prs) {
|
||||
assert(command->partition_limit == query::max_partitions);
|
||||
query::result_merger merger(command->row_limit * prs.size(), query::max_partitions);
|
||||
return map_reduce(prs.begin(), prs.end(), [this, &proxy, &state, command] (auto& pr) {
|
||||
return map_reduce(prs.begin(), prs.end(), [this, &proxy, &state, command] (auto pr) {
|
||||
dht::partition_range_vector prange { pr };
|
||||
auto cmd = ::make_lw_shared<query::read_command>(*command);
|
||||
return proxy.local().query(_schema, cmd, std::move(prange), db::consistency_level::ONE, state.get_trace_state(),
|
||||
@@ -457,7 +407,6 @@ select_statement::process_results(foreign_ptr<lw_shared_ptr<query::result>> resu
|
||||
}
|
||||
rs->trim(cmd->row_limit);
|
||||
}
|
||||
update_stats_rows_read(rs->size());
|
||||
return ::make_shared<cql_transport::messages::result_message::rows>(std::move(rs));
|
||||
}
|
||||
|
||||
@@ -549,47 +498,10 @@ indexed_table_select_statement::do_execute(distributed<service::storage_proxy>&
|
||||
auto now = gc_clock::now();
|
||||
|
||||
++_stats.reads;
|
||||
++_stats.secondary_index_reads;
|
||||
|
||||
assert(_restrictions->uses_secondary_indexing());
|
||||
|
||||
// Secondary index search has two steps: 1. use the index table to find a
|
||||
// list of primary keys matching the query. 2. read the rows matching
|
||||
// these primary keys from the base table and return the selected columns.
|
||||
// In "whole_partitions" case, we can do the above in whole partition
|
||||
// granularity. "partition_slices" is similar, but we fetch the same
|
||||
// clustering prefix (make_partition_slice()) from a list of partitions.
|
||||
// In other cases we need to list, and retrieve, individual rows and
|
||||
// not entire partitions. See issue #3405 for more details.
|
||||
bool whole_partitions = false;
|
||||
bool partition_slices = false;
|
||||
if (_schema->clustering_key_size() == 0) {
|
||||
// Obviously, if there are no clustering columns, then we can work at
|
||||
// the granularity of whole partitions.
|
||||
whole_partitions = true;
|
||||
} else {
|
||||
if (_index.depends_on(*(_schema->clustering_key_columns().begin()))) {
|
||||
// Searching on the *first* clustering column means in each of
|
||||
// matching partition, we can take the same contiguous clustering
|
||||
// slice (clustering prefix).
|
||||
partition_slices = true;
|
||||
} else {
|
||||
// Search on any partition column means that either all rows
|
||||
// match or all don't, so we can work with whole partitions.
|
||||
for (auto& cdef : _schema->partition_key_columns()) {
|
||||
if (_index.depends_on(cdef)) {
|
||||
whole_partitions = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (whole_partitions || partition_slices) {
|
||||
// In this case, can use our normal query machinery, which retrieves
|
||||
// entire partitions or the same slice for many partitions.
|
||||
return find_index_partition_ranges(proxy, state, options).then([limit, now, &state, &options, &proxy, this] (dht::partition_range_vector partition_ranges) {
|
||||
auto command = ::make_lw_shared<query::read_command>(
|
||||
return find_index_partition_ranges(proxy, state, options).then([limit, now, &state, &options, &proxy, this] (dht::partition_range_vector partition_ranges) {
|
||||
auto command = ::make_lw_shared<query::read_command>(
|
||||
_schema->id(),
|
||||
_schema->version(),
|
||||
make_partition_slice(options),
|
||||
@@ -599,70 +511,35 @@ indexed_table_select_statement::do_execute(distributed<service::storage_proxy>&
|
||||
query::max_partitions,
|
||||
utils::UUID(),
|
||||
options.get_timestamp(state));
|
||||
return this->execute(proxy, command, std::move(partition_ranges), state, options, now);
|
||||
});
|
||||
} else {
|
||||
// In this case, we need to retrieve a list of rows (not entire
|
||||
// partitions) and then retrieve those specific rows.
|
||||
return find_index_clustering_rows(proxy, state, options).then([limit, now, &state, &options, &proxy, this] (std::vector<primary_key> primary_keys) {
|
||||
auto command = ::make_lw_shared<query::read_command>(
|
||||
_schema->id(),
|
||||
_schema->version(),
|
||||
// Note: the "clustering bounds" set in make_partition_slice()
|
||||
// here is garbage, and will be overridden by execute() anyway
|
||||
make_partition_slice(options),
|
||||
limit,
|
||||
now,
|
||||
tracing::make_trace_info(state.get_trace_state()),
|
||||
query::max_partitions,
|
||||
utils::UUID(),
|
||||
options.get_timestamp(state));
|
||||
return this->execute(proxy, command, std::move(primary_keys), state, options, now);
|
||||
});
|
||||
}
|
||||
return this->execute(proxy, command, std::move(partition_ranges), state, options, now);
|
||||
});
|
||||
}
|
||||
|
||||
// Utility function for getting the schema of the materialized view used for
|
||||
// the secondary index implementation.
|
||||
static schema_ptr
|
||||
get_index_schema(distributed<service::storage_proxy>& proxy,
|
||||
const secondary_index::index& index,
|
||||
const schema_ptr& schema,
|
||||
tracing::trace_state_ptr& trace_state)
|
||||
{
|
||||
const auto& im = index.metadata();
|
||||
sstring index_table_name = im.name() + "_index";
|
||||
tracing::add_table_name(trace_state, schema->ks_name(), index_table_name);
|
||||
return proxy.local().get_db().local().find_schema(schema->ks_name(), index_table_name);
|
||||
}
|
||||
|
||||
// Utility function for reading from the index view (get_index_view()))
|
||||
// the posting-list for a particular value of the indexed column.
|
||||
// Remember a secondary index can only be created on a single column.
|
||||
//static future<service::storage_proxy::coordinator_query_result>
|
||||
static future<foreign_ptr<lw_shared_ptr<query::result>>, std::unordered_map<nonwrapping_range<dht::token>, std::vector<utils::UUID>>>
|
||||
read_posting_list(distributed<service::storage_proxy>& proxy,
|
||||
schema_ptr view_schema,
|
||||
const std::vector<::shared_ptr<restrictions::restrictions>>& index_restrictions,
|
||||
const query_options& options,
|
||||
int32_t limit,
|
||||
service::query_state& state,
|
||||
gc_clock::time_point now)
|
||||
future<dht::partition_range_vector>
|
||||
indexed_table_select_statement::find_index_partition_ranges(distributed<service::storage_proxy>& proxy,
|
||||
service::query_state& state,
|
||||
const query_options& options)
|
||||
{
|
||||
const auto& im = _index.metadata();
|
||||
sstring index_table_name = sprint("%s_index", im.name());
|
||||
tracing::add_table_name(state.get_trace_state(), keyspace(), index_table_name);
|
||||
auto& db = proxy.local().get_db().local();
|
||||
const auto& view = db.find_column_family(_schema->ks_name(), index_table_name);
|
||||
dht::partition_range_vector partition_ranges;
|
||||
// FIXME: there should be only one index restriction for this index!
|
||||
// Perhaps even one index restriction entirely (do we support
|
||||
// intersection queries?).
|
||||
for (const auto& restriction : index_restrictions) {
|
||||
auto pk = partition_key::from_optional_exploded(*view_schema, restriction->values(options));
|
||||
auto dk = dht::global_partitioner().decorate_key(*view_schema, pk);
|
||||
for (const auto& restriction : _restrictions->index_restrictions()) {
|
||||
auto pk = partition_key::from_optional_exploded(*view.schema(), restriction->values(options));
|
||||
auto dk = dht::global_partitioner().decorate_key(*view.schema(), pk);
|
||||
auto range = dht::partition_range::make_singular(dk);
|
||||
partition_ranges.emplace_back(range);
|
||||
}
|
||||
partition_slice_builder partition_slice_builder{*view_schema};
|
||||
|
||||
auto now = gc_clock::now();
|
||||
int32_t limit = get_limit(options);
|
||||
|
||||
partition_slice_builder partition_slice_builder{*view.schema()};
|
||||
auto cmd = ::make_lw_shared<query::read_command>(
|
||||
view_schema->id(),
|
||||
view_schema->version(),
|
||||
view.schema()->id(),
|
||||
view.schema()->version(),
|
||||
partition_slice_builder.build(),
|
||||
limit,
|
||||
now,
|
||||
@@ -670,111 +547,35 @@ read_posting_list(distributed<service::storage_proxy>& proxy,
|
||||
query::max_partitions,
|
||||
utils::UUID(),
|
||||
options.get_timestamp(state));
|
||||
return proxy.local().query(view_schema,
|
||||
return proxy.local().query(view.schema(),
|
||||
cmd,
|
||||
std::move(partition_ranges),
|
||||
options.get_consistency(),
|
||||
state.get_trace_state());
|
||||
}
|
||||
|
||||
// Note: the partitions keys returned by this function will be sorted in
|
||||
// lexicographical order of the partition key columns (in the way that
|
||||
// clustering keys are sorted) - NOT in token order. See issue #3423.
|
||||
future<dht::partition_range_vector>
|
||||
indexed_table_select_statement::find_index_partition_ranges(distributed<service::storage_proxy>& proxy,
|
||||
service::query_state& state,
|
||||
const query_options& options)
|
||||
{
|
||||
schema_ptr view = get_index_schema(proxy, _index, _schema, state.get_trace_state());
|
||||
auto now = gc_clock::now();
|
||||
return read_posting_list(proxy, view, _restrictions->index_restrictions(), options, get_limit(options), state, now).then(
|
||||
[this, now, &options, view] (foreign_ptr<lw_shared_ptr<query::result>> result, service::replicas_per_token_range) {
|
||||
state.get_trace_state()).then([cmd, this, &options, now, &view] (foreign_ptr<lw_shared_ptr<query::result>> result,
|
||||
service::replicas_per_token_range) {
|
||||
std::vector<const column_definition*> columns;
|
||||
for (const column_definition& cdef : _schema->partition_key_columns()) {
|
||||
columns.emplace_back(view->get_column_definition(cdef.name()));
|
||||
columns.emplace_back(view.schema()->get_column_definition(cdef.name()));
|
||||
}
|
||||
auto selection = selection::selection::for_columns(view, columns);
|
||||
auto selection = selection::selection::for_columns(view.schema(), columns);
|
||||
cql3::selection::result_set_builder builder(*selection, now, options.get_cql_serialization_format());
|
||||
// FIXME: read_posting_list already asks to read primary keys only.
|
||||
// why do we need to specify this again?
|
||||
auto slice = partition_slice_builder(*view).build();
|
||||
query::result_view::consume(*result,
|
||||
slice,
|
||||
cql3::selection::result_set_builder::visitor(builder, *view, *selection));
|
||||
cmd->slice,
|
||||
cql3::selection::result_set_builder::visitor(builder, *view.schema(), *selection));
|
||||
auto rs = cql3::untyped_result_set(::make_shared<cql_transport::messages::result_message::rows>(std::move(builder.build())));
|
||||
dht::partition_range_vector partition_ranges;
|
||||
partition_ranges.reserve(rs.size());
|
||||
// We are reading the list of primary keys as rows of a single
|
||||
// partition (in the index view), so they are sorted in
|
||||
// lexicographical order (N.B. this is NOT token order!). We need
|
||||
// to avoid outputting the same partition key twice, but luckily in
|
||||
// the sorted order, these will be adjacent.
|
||||
stdx::optional<dht::decorated_key> last_dk;
|
||||
for (size_t i = 0; i < rs.size(); i++) {
|
||||
const auto& row = rs.at(i);
|
||||
std::vector<bytes> pk_columns;
|
||||
for (const auto& column : row.get_columns()) {
|
||||
pk_columns.push_back(row.get_blob(column->name->to_string()));
|
||||
auto blob = row.get_blob(column->name->to_cql_string());
|
||||
auto pk = partition_key::from_exploded(*_schema, { blob });
|
||||
auto dk = dht::global_partitioner().decorate_key(*_schema, pk);
|
||||
auto range = dht::partition_range::make_singular(dk);
|
||||
partition_ranges.emplace_back(range);
|
||||
}
|
||||
auto pk = partition_key::from_exploded(*_schema, pk_columns);
|
||||
auto dk = dht::global_partitioner().decorate_key(*_schema, pk);
|
||||
if (last_dk && last_dk->equal(*_schema, dk)) {
|
||||
// Another row of the same partition, no need to output the
|
||||
// same partition key again.
|
||||
continue;
|
||||
}
|
||||
last_dk = dk;
|
||||
auto range = dht::partition_range::make_singular(dk);
|
||||
partition_ranges.emplace_back(range);
|
||||
}
|
||||
return partition_ranges;
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
// Note: the partitions keys returned by this function will be sorted in
|
||||
// lexicographical order of the partition key columns (in the way that
|
||||
// clustering keys are sorted) - NOT in token order. See issue #3423.
|
||||
future<std::vector<indexed_table_select_statement::primary_key>>
|
||||
indexed_table_select_statement::find_index_clustering_rows(distributed<service::storage_proxy>& proxy, service::query_state& state, const query_options& options)
|
||||
{
|
||||
schema_ptr view = get_index_schema(proxy, _index, _schema, state.get_trace_state());
|
||||
auto now = gc_clock::now();
|
||||
return read_posting_list(proxy, view, _restrictions->index_restrictions(), options, get_limit(options), state, now).then(
|
||||
[this, now, &options, view] (foreign_ptr<lw_shared_ptr<query::result>> result, service::replicas_per_token_range) {
|
||||
std::vector<const column_definition*> columns;
|
||||
for (const column_definition& cdef : _schema->partition_key_columns()) {
|
||||
columns.emplace_back(view->get_column_definition(cdef.name()));
|
||||
}
|
||||
for (const column_definition& cdef : _schema->clustering_key_columns()) {
|
||||
columns.emplace_back(view->get_column_definition(cdef.name()));
|
||||
}
|
||||
auto selection = selection::selection::for_columns(view, columns);
|
||||
cql3::selection::result_set_builder builder(*selection, now, options.get_cql_serialization_format());
|
||||
// FIXME: read_posting_list already asks to read primary keys only.
|
||||
// why do we need to specify this again?
|
||||
auto slice = partition_slice_builder(*view).build();
|
||||
query::result_view::consume(*result,
|
||||
slice,
|
||||
cql3::selection::result_set_builder::visitor(builder, *view, *selection));
|
||||
auto rs = cql3::untyped_result_set(::make_shared<cql_transport::messages::result_message::rows>(std::move(builder.build())));
|
||||
std::vector<primary_key> primary_keys;
|
||||
primary_keys.reserve(rs.size());
|
||||
for (size_t i = 0; i < rs.size(); i++) {
|
||||
const auto& row = rs.at(i);
|
||||
auto pk_columns = _schema->partition_key_columns() | boost::adaptors::transformed([&] (auto& cdef) {
|
||||
return row.get_blob(cdef.name_as_text());
|
||||
});
|
||||
auto pk = partition_key::from_range(pk_columns);
|
||||
auto dk = dht::global_partitioner().decorate_key(*_schema, pk);
|
||||
auto ck_columns = _schema->clustering_key_columns() | boost::adaptors::transformed([&] (auto& cdef) {
|
||||
return row.get_blob(cdef.name_as_text());
|
||||
});
|
||||
auto ck = clustering_key::from_range(ck_columns);
|
||||
primary_keys.emplace_back(primary_key{std::move(dk), std::move(ck)});
|
||||
}
|
||||
return primary_keys;
|
||||
});
|
||||
return make_ready_future<dht::partition_range_vector>(partition_ranges);
|
||||
}).finally([cmd] {});
|
||||
}
|
||||
|
||||
namespace raw {
|
||||
@@ -856,10 +657,8 @@ select_statement::prepare_restrictions(database& db,
|
||||
bool for_view)
|
||||
{
|
||||
try {
|
||||
// FIXME: this method should take a separate allow_filtering parameter
|
||||
// and pass it on. Currently we pass "for_view" as allow_filtering.
|
||||
return ::make_shared<restrictions::statement_restrictions>(db, schema, statement_type::SELECT, std::move(_where_clause), bound_names,
|
||||
selection->contains_only_static_columns(), selection->contains_a_collection(), for_view, for_view);
|
||||
selection->contains_only_static_columns(), selection->contains_a_collection(), for_view);
|
||||
} catch (const exceptions::unrecognized_entity_exception& e) {
|
||||
if (contains_alias(e.entity)) {
|
||||
throw exceptions::invalid_request_exception(sprint("Aliases aren't allowed in the where clause ('%s')", e.relation->to_string()));
|
||||
@@ -1060,22 +859,15 @@ namespace util {
|
||||
shared_ptr<cql3::statements::raw::select_statement> build_select_statement(
|
||||
const sstring_view& cf_name,
|
||||
const sstring_view& where_clause,
|
||||
bool select_all_columns,
|
||||
const std::vector<column_definition>& selected_columns) {
|
||||
std::vector<sstring_view> included_columns) {
|
||||
std::ostringstream out;
|
||||
out << "SELECT ";
|
||||
if (select_all_columns) {
|
||||
if (included_columns.empty()) {
|
||||
out << "*";
|
||||
} else {
|
||||
// If the column name is not entirely lowercase (or digits or _),
|
||||
// when output to CQL it must be quoted to preserve case as well
|
||||
// as non alphanumeric characters.
|
||||
auto cols = boost::copy_range<std::vector<sstring>>(selected_columns
|
||||
| boost::adaptors::transformed(std::mem_fn(&column_definition::name_as_cql_string)));
|
||||
out << join(", ", cols);
|
||||
out << join(", ", included_columns);
|
||||
}
|
||||
// Note that cf_name may need to be quoted, just like column names above.
|
||||
out << " FROM " << util::maybe_quote(cf_name.to_string()) << " WHERE " << where_clause << " ALLOW FILTERING";
|
||||
out << " FROM " << cf_name << " WHERE " << where_clause << " ALLOW FILTERING";
|
||||
return do_with_parser(out.str(), std::mem_fn(&cql3_parser::CqlParser::selectStatement));
|
||||
}
|
||||
|
||||
|
||||
@@ -124,19 +124,6 @@ public:
|
||||
lw_shared_ptr<query::read_command> cmd, dht::partition_range_vector&& partition_ranges, service::query_state& state,
|
||||
const query_options& options, gc_clock::time_point now);
|
||||
|
||||
struct primary_key {
|
||||
dht::decorated_key partition;
|
||||
clustering_key_prefix clustering;
|
||||
};
|
||||
|
||||
future<::shared_ptr<cql_transport::messages::result_message>> execute(
|
||||
distributed<service::storage_proxy>& proxy,
|
||||
lw_shared_ptr<query::read_command> cmd,
|
||||
std::vector<primary_key>&& primary_keys,
|
||||
service::query_state& state,
|
||||
const query_options& options,
|
||||
gc_clock::time_point now);
|
||||
|
||||
shared_ptr<cql_transport::messages::result_message> process_results(foreign_ptr<lw_shared_ptr<query::result>> results,
|
||||
lw_shared_ptr<query::read_command> cmd, const query_options& options, gc_clock::time_point now);
|
||||
|
||||
@@ -151,9 +138,6 @@ public:
|
||||
protected:
|
||||
int32_t get_limit(const query_options& options) const;
|
||||
bool needs_post_query_ordering() const;
|
||||
virtual void update_stats_rows_read(int64_t rows_read) {
|
||||
_stats.rows_read += rows_read;
|
||||
}
|
||||
};
|
||||
|
||||
class primary_key_select_statement : public select_statement {
|
||||
@@ -205,15 +189,6 @@ private:
|
||||
future<dht::partition_range_vector> find_index_partition_ranges(distributed<service::storage_proxy>& proxy,
|
||||
service::query_state& state,
|
||||
const query_options& options);
|
||||
|
||||
future<std::vector<primary_key>> find_index_clustering_rows(distributed<service::storage_proxy>& proxy,
|
||||
service::query_state& state,
|
||||
const query_options& options);
|
||||
|
||||
virtual void update_stats_rows_read(int64_t rows_read) override {
|
||||
_stats.rows_read += rows_read;
|
||||
_stats.secondary_index_rows_read += rows_read;
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
@@ -35,12 +35,6 @@ struct cql_stats {
|
||||
uint64_t batches_pure_logged = 0;
|
||||
uint64_t batches_pure_unlogged = 0;
|
||||
uint64_t batches_unlogged_from_logged = 0;
|
||||
uint64_t rows_read = 0;
|
||||
|
||||
int64_t secondary_index_creates = 0;
|
||||
int64_t secondary_index_drops = 0;
|
||||
int64_t secondary_index_reads = 0;
|
||||
int64_t secondary_index_rows_read = 0;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
@@ -142,7 +142,6 @@ public:
|
||||
using row = untyped_result_set_row;
|
||||
typedef std::vector<row> rows_type;
|
||||
using const_iterator = rows_type::const_iterator;
|
||||
using iterator = rows_type::const_iterator;
|
||||
|
||||
untyped_result_set(::shared_ptr<cql_transport::messages::result_message>);
|
||||
untyped_result_set(untyped_result_set&&) = default;
|
||||
|
||||
@@ -53,6 +53,9 @@ update_parameters::get_prefetched_list(
|
||||
return {};
|
||||
}
|
||||
|
||||
if (column.is_static()) {
|
||||
ckey = clustering_key_view::make_empty();
|
||||
}
|
||||
auto i = _prefetched->rows.find(std::make_pair(std::move(pkey), std::move(ckey)));
|
||||
if (i == _prefetched->rows.end()) {
|
||||
return {};
|
||||
|
||||
14
cql3/util.hh
14
cql3/util.hh
@@ -72,23 +72,11 @@ inline sstring rename_column_in_where_clause(const sstring_view& where_clause, c
|
||||
return relations_to_where_clause(std::move(new_relations));
|
||||
}
|
||||
|
||||
/// build a CQL "select" statement with the desired parameters.
|
||||
/// If select_all_columns==true, all columns are selected and the value of
|
||||
/// selected_columns is ignored.
|
||||
shared_ptr<cql3::statements::raw::select_statement> build_select_statement(
|
||||
const sstring_view& cf_name,
|
||||
const sstring_view& where_clause,
|
||||
bool select_all_columns,
|
||||
const std::vector<column_definition>& selected_columns);
|
||||
std::vector<sstring_view> included_columns);
|
||||
|
||||
/// maybe_quote() takes an identifier - the name of a column, table or
|
||||
/// keyspace name - and transforms it to a string which can be used in CQL
|
||||
/// commands. Namely, if the identifier is not entirely lower-case (including
|
||||
/// digits and underscores), it needs to be quoted to be represented in CQL.
|
||||
/// Without this quoting, CQL folds uppercase letters to lower case, and
|
||||
/// forbids non-alpha-numeric characters in identifier names.
|
||||
/// Quoting involves wrapping the string in double-quotes ("). A double-quote
|
||||
/// character itself is quoted by doubling it.
|
||||
sstring maybe_quote(const sstring& s);
|
||||
|
||||
} // namespace util
|
||||
|
||||
136
database.cc
136
database.cc
@@ -361,9 +361,13 @@ filter_sstable_for_reader(std::vector<sstables::shared_sstable>&& sstables, colu
|
||||
};
|
||||
sstables.erase(boost::remove_if(sstables, sstable_has_not_key), sstables.end());
|
||||
|
||||
// FIXME: Workaround for https://github.com/scylladb/scylla/issues/3552
|
||||
// and https://github.com/scylladb/scylla/issues/3553
|
||||
const bool filtering_broken = true;
|
||||
|
||||
// no clustering filtering is applied if schema defines no clustering key or
|
||||
// compaction strategy thinks it will not benefit from such an optimization.
|
||||
if (!schema->clustering_key_size() || !cf.get_compaction_strategy().use_clustering_key_filter()) {
|
||||
if (filtering_broken || !schema->clustering_key_size() || !cf.get_compaction_strategy().use_clustering_key_filter()) {
|
||||
return sstables;
|
||||
}
|
||||
::cf_stats* stats = cf.cf_stats();
|
||||
@@ -1247,17 +1251,6 @@ void column_family::set_metrics() {
|
||||
ms::make_gauge("live_sstable", ms::description("Live sstable count"), _stats.live_sstable_count)(cf)(ks),
|
||||
ms::make_gauge("pending_compaction", ms::description("Estimated number of compactions pending for this column family"), _stats.pending_compactions)(cf)(ks)
|
||||
});
|
||||
|
||||
// View metrics are created only for base tables, so there's no point in adding them to views (which cannot act as base tables for other views)
|
||||
if (!_schema->is_view()) {
|
||||
_metrics.add_group("column_family", {
|
||||
ms::make_total_operations("view_updates_pushed_remote", _view_stats.view_updates_pushed_remote, ms::description("Number of updates (mutations) pushed to remote view replicas"))(cf)(ks),
|
||||
ms::make_total_operations("view_updates_failed_remote", _view_stats.view_updates_failed_remote, ms::description("Number of updates (mutations) that failed to be pushed to remote view replicas"))(cf)(ks),
|
||||
ms::make_total_operations("view_updates_pushed_local", _view_stats.view_updates_pushed_local, ms::description("Number of updates (mutations) pushed to local view replicas"))(cf)(ks),
|
||||
ms::make_total_operations("view_updates_failed_local", _view_stats.view_updates_failed_local, ms::description("Number of updates (mutations) that failed to be pushed to local view replicas"))(cf)(ks),
|
||||
});
|
||||
}
|
||||
|
||||
if (_schema->ks_name() != db::system_keyspace::NAME && _schema->ks_name() != db::schema_tables::v3::NAME && _schema->ks_name() != "system_traces") {
|
||||
_metrics.add_group("column_family", {
|
||||
ms::make_histogram("read_latency", ms::description("Read latency histogram"), [this] {return _stats.estimated_read.get_histogram(std::chrono::microseconds(100));})(cf)(ks),
|
||||
@@ -1644,9 +1637,9 @@ future<> distributed_loader::open_sstable(distributed<database>& db, sstables::e
|
||||
// to distribute evenly the resource usage among all shards.
|
||||
|
||||
return db.invoke_on(column_family::calculate_shard_from_sstable_generation(comps.generation),
|
||||
[&db, comps = std::move(comps), func = std::move(func), pc] (database& local) {
|
||||
[&db, comps = std::move(comps), func = std::move(func), &pc] (database& local) {
|
||||
|
||||
return with_semaphore(local.sstable_load_concurrency_sem(), 1, [&db, &local, comps = std::move(comps), func = std::move(func), pc] {
|
||||
return with_semaphore(local.sstable_load_concurrency_sem(), 1, [&db, &local, comps = std::move(comps), func = std::move(func), &pc] {
|
||||
auto& cf = local.find_column_family(comps.ks, comps.cf);
|
||||
|
||||
auto f = sstables::sstable::load_shared_components(cf.schema(), cf._config.datadir, comps.generation, comps.version, comps.format, pc);
|
||||
@@ -1981,31 +1974,31 @@ future<> distributed_loader::populate_column_family(distributed<database>& db, s
|
||||
// case is still an invalid case, but it is way easier for us to treat it
|
||||
// by waiting for all files to be loaded, and then checking if we saw a
|
||||
// file during scan_dir, without its corresponding TOC.
|
||||
enum class component_status {
|
||||
enum class status {
|
||||
has_some_file,
|
||||
has_toc_file,
|
||||
has_temporary_toc_file,
|
||||
};
|
||||
|
||||
struct sstable_descriptor {
|
||||
component_status status;
|
||||
sstables::sstable::version_types version;
|
||||
sstables::sstable::format_types format;
|
||||
std::experimental::optional<sstables::sstable::version_types> version;
|
||||
std::experimental::optional<sstables::sstable::format_types> format;
|
||||
};
|
||||
|
||||
auto verifier = make_lw_shared<std::unordered_map<unsigned long, sstable_descriptor>>();
|
||||
auto verifier = make_lw_shared<std::unordered_map<unsigned long, status>>();
|
||||
auto descriptor = make_lw_shared<sstable_descriptor>();
|
||||
|
||||
return do_with(std::vector<future<>>(), [&db, sstdir = std::move(sstdir), verifier, ks, cf] (std::vector<future<>>& futures) {
|
||||
return lister::scan_dir(sstdir, { directory_entry_type::regular }, [&db, verifier, &futures] (lister::path sstdir, directory_entry de) {
|
||||
return do_with(std::vector<future<>>(), [&db, sstdir = std::move(sstdir), verifier, descriptor, ks, cf] (std::vector<future<>>& futures) {
|
||||
return lister::scan_dir(sstdir, { directory_entry_type::regular }, [&db, verifier, descriptor, &futures] (lister::path sstdir, directory_entry de) {
|
||||
// FIXME: The secondary indexes are in this level, but with a directory type, (starting with ".")
|
||||
auto f = distributed_loader::probe_file(db, sstdir.native(), de.name).then([verifier, sstdir, de] (auto entry) {
|
||||
auto f = distributed_loader::probe_file(db, sstdir.native(), de.name).then([verifier, descriptor, sstdir, de] (auto entry) {
|
||||
if (entry.component == sstables::sstable::component_type::TemporaryStatistics) {
|
||||
return remove_file(sstables::sstable::filename(sstdir.native(), entry.ks, entry.cf, entry.version, entry.generation,
|
||||
entry.format, sstables::sstable::component_type::TemporaryStatistics));
|
||||
}
|
||||
|
||||
if (verifier->count(entry.generation)) {
|
||||
if (verifier->at(entry.generation).status == component_status::has_toc_file) {
|
||||
if (verifier->at(entry.generation) == status::has_toc_file) {
|
||||
lister::path file_path(sstdir / de.name.c_str());
|
||||
if (entry.component == sstables::sstable::component_type::TOC) {
|
||||
throw sstables::malformed_sstable_exception("Invalid State encountered. TOC file already processed", file_path.native());
|
||||
@@ -2013,19 +2006,27 @@ future<> distributed_loader::populate_column_family(distributed<database>& db, s
|
||||
throw sstables::malformed_sstable_exception("Invalid State encountered. Temporary TOC file found after TOC file was processed", file_path.native());
|
||||
}
|
||||
} else if (entry.component == sstables::sstable::component_type::TOC) {
|
||||
verifier->at(entry.generation).status = component_status::has_toc_file;
|
||||
verifier->at(entry.generation) = status::has_toc_file;
|
||||
} else if (entry.component == sstables::sstable::component_type::TemporaryTOC) {
|
||||
verifier->at(entry.generation).status = component_status::has_temporary_toc_file;
|
||||
verifier->at(entry.generation) = status::has_temporary_toc_file;
|
||||
}
|
||||
} else {
|
||||
if (entry.component == sstables::sstable::component_type::TOC) {
|
||||
verifier->emplace(entry.generation, sstable_descriptor{component_status::has_toc_file, entry.version, entry.format});
|
||||
verifier->emplace(entry.generation, status::has_toc_file);
|
||||
} else if (entry.component == sstables::sstable::component_type::TemporaryTOC) {
|
||||
verifier->emplace(entry.generation, sstable_descriptor{component_status::has_temporary_toc_file, entry.version, entry.format});
|
||||
verifier->emplace(entry.generation, status::has_temporary_toc_file);
|
||||
} else {
|
||||
verifier->emplace(entry.generation, sstable_descriptor{component_status::has_some_file, entry.version, entry.format});
|
||||
verifier->emplace(entry.generation, status::has_some_file);
|
||||
}
|
||||
}
|
||||
|
||||
// Retrieve both version and format used for this column family.
|
||||
if (!descriptor->version) {
|
||||
descriptor->version = entry.version;
|
||||
}
|
||||
if (!descriptor->format) {
|
||||
descriptor->format = entry.format;
|
||||
}
|
||||
return make_ready_future<>();
|
||||
});
|
||||
|
||||
@@ -2056,12 +2057,14 @@ future<> distributed_loader::populate_column_family(distributed<database>& db, s
|
||||
}
|
||||
return make_ready_future<>();
|
||||
});
|
||||
}).then([verifier, sstdir, ks = std::move(ks), cf = std::move(cf)] {
|
||||
return do_for_each(*verifier, [sstdir = std::move(sstdir), ks = std::move(ks), cf = std::move(cf), verifier] (auto v) {
|
||||
if (v.second.status == component_status::has_temporary_toc_file) {
|
||||
}).then([verifier, sstdir, descriptor, ks = std::move(ks), cf = std::move(cf)] {
|
||||
return do_for_each(*verifier, [sstdir = std::move(sstdir), ks = std::move(ks), cf = std::move(cf), descriptor, verifier] (auto v) {
|
||||
if (v.second == status::has_temporary_toc_file) {
|
||||
unsigned long gen = v.first;
|
||||
sstables::sstable::version_types version = v.second.version;
|
||||
sstables::sstable::format_types format = v.second.format;
|
||||
assert(descriptor->version);
|
||||
sstables::sstable::version_types version = descriptor->version.value();
|
||||
assert(descriptor->format);
|
||||
sstables::sstable::format_types format = descriptor->format.value();
|
||||
|
||||
if (engine().cpu_id() != 0) {
|
||||
dblog.debug("At directory: {}, partial SSTable with generation {} not relevant for this shard, ignoring", sstdir, v.first);
|
||||
@@ -2069,7 +2072,7 @@ future<> distributed_loader::populate_column_family(distributed<database>& db, s
|
||||
}
|
||||
// shard 0 is the responsible for removing a partial sstable.
|
||||
return sstables::sstable::remove_sstable_with_temp_toc(ks, cf, sstdir, gen, version, format);
|
||||
} else if (v.second.status != component_status::has_toc_file) {
|
||||
} else if (v.second != status::has_toc_file) {
|
||||
throw sstables::malformed_sstable_exception(sprint("At directory: %s: no TOC found for SSTable with generation %d!. Refusing to boot", sstdir, v.first));
|
||||
}
|
||||
return make_ready_future<>();
|
||||
@@ -2160,6 +2163,11 @@ database::database(const db::config& cfg, database_config dbcfg)
|
||||
void backlog_controller::adjust() {
|
||||
auto backlog = _current_backlog();
|
||||
|
||||
if (backlog >= _control_points.back().input) {
|
||||
update_controller(_control_points.back().output);
|
||||
return;
|
||||
}
|
||||
|
||||
// interpolate to find out which region we are. This run infrequently and there are a fixed
|
||||
// number of points so a simple loop will do.
|
||||
size_t idx = 1;
|
||||
@@ -2676,7 +2684,6 @@ future<> database::drop_column_family(const sstring& ks_name, const sstring& cf_
|
||||
auto uuid = find_uuid(ks_name, cf_name);
|
||||
auto cf = _column_families.at(uuid);
|
||||
remove(*cf);
|
||||
cf->clear_views();
|
||||
auto& ks = find_keyspace(ks_name);
|
||||
return truncate(ks, *cf, std::move(tsf), snapshot).finally([this, cf] {
|
||||
return cf->stop();
|
||||
@@ -2810,6 +2817,7 @@ keyspace::make_column_family_config(const schema& s, const db::config& db_config
|
||||
cfg.enable_disk_writes = _config.enable_disk_writes;
|
||||
cfg.enable_commitlog = _config.enable_commitlog;
|
||||
cfg.enable_cache = _config.enable_cache;
|
||||
cfg.compaction_enforce_min_threshold = _config.compaction_enforce_min_threshold;
|
||||
cfg.dirty_memory_manager = _config.dirty_memory_manager;
|
||||
cfg.streaming_dirty_memory_manager = _config.streaming_dirty_memory_manager;
|
||||
cfg.read_concurrency_semaphore = _config.read_concurrency_semaphore;
|
||||
@@ -2928,11 +2936,6 @@ bool database::has_schema(const sstring& ks_name, const sstring& cf_name) const
|
||||
return _ks_cf_to_uuid.count(std::make_pair(ks_name, cf_name)) > 0;
|
||||
}
|
||||
|
||||
std::vector<view_ptr> database::get_views() const {
|
||||
return boost::copy_range<std::vector<view_ptr>>(get_non_system_column_families()
|
||||
| boost::adaptors::filtered([] (auto& cf) { return cf->schema()->is_view(); })
|
||||
| boost::adaptors::transformed([] (auto& cf) { return view_ptr(cf->schema()); }));
|
||||
}
|
||||
|
||||
void database::create_in_memory_keyspace(const lw_shared_ptr<keyspace_metadata>& ksm) {
|
||||
keyspace ks(ksm, std::move(make_keyspace_config(*ksm)));
|
||||
@@ -3278,7 +3281,7 @@ future<mutation> database::do_apply_counter_update(column_family& cf, const froz
|
||||
std::move(regular_columns), { }, { }, cql_serialization_format::internal(), query::max_rows);
|
||||
|
||||
return do_with(std::move(slice), std::move(m), std::vector<locked_cell>(),
|
||||
[this, &cf, timeout, trace_state = std::move(trace_state), op = cf.write_in_progress()] (const query::partition_slice& slice, mutation& m, std::vector<locked_cell>& locks) mutable {
|
||||
[this, &cf, timeout, trace_state = std::move(trace_state)] (const query::partition_slice& slice, mutation& m, std::vector<locked_cell>& locks) mutable {
|
||||
tracing::trace(trace_state, "Acquiring counter locks");
|
||||
return cf.lock_counter_cells(m, timeout).then([&, m_schema = cf.schema(), trace_state = std::move(trace_state), timeout, this] (std::vector<locked_cell> lcs) mutable {
|
||||
locks = std::move(lcs);
|
||||
@@ -3511,19 +3514,16 @@ future<> database::do_apply(schema_ptr s, const frozen_mutation& m, db::timeout_
|
||||
throw std::runtime_error(sprint("attempted to mutate using not synced schema of %s.%s, version=%s",
|
||||
s->ks_name(), s->cf_name(), s->version()));
|
||||
}
|
||||
|
||||
// Signal to view building code that a write is in progress,
|
||||
// so it knows when new writes start being sent to a new view.
|
||||
auto op = cf.write_in_progress();
|
||||
if (cf.views().empty()) {
|
||||
return apply_with_commitlog(std::move(s), cf, std::move(uuid), m, timeout).finally([op = std::move(op)] { });
|
||||
return apply_with_commitlog(std::move(s), cf, std::move(uuid), m, timeout);
|
||||
}
|
||||
future<row_locker::lock_holder> f = cf.push_view_replica_updates(s, m, timeout);
|
||||
return f.then([this, s = std::move(s), uuid = std::move(uuid), &m, timeout, &cf, op = std::move(op)] (row_locker::lock_holder lock) mutable {
|
||||
return f.then([this, s = std::move(s), uuid = std::move(uuid), &m, timeout] (row_locker::lock_holder lock) {
|
||||
auto& cf = find_column_family(uuid);
|
||||
return apply_with_commitlog(std::move(s), cf, std::move(uuid), m, timeout).finally(
|
||||
// Hold the local lock on the base-table partition or row
|
||||
// taken before the read, until the update is done.
|
||||
[lock = std::move(lock), op = std::move(op)] { });
|
||||
[lock = std::move(lock)] { });
|
||||
});
|
||||
}
|
||||
|
||||
@@ -3587,6 +3587,7 @@ database::make_keyspace_config(const keyspace_metadata& ksm) {
|
||||
cfg.enable_commitlog = false;
|
||||
cfg.enable_cache = false;
|
||||
}
|
||||
cfg.compaction_enforce_min_threshold = _cfg->compaction_enforce_min_threshold();
|
||||
cfg.dirty_memory_manager = &_dirty_memory_manager;
|
||||
cfg.streaming_dirty_memory_manager = &_streaming_dirty_memory_manager;
|
||||
cfg.read_concurrency_semaphore = &_read_concurrency_sem;
|
||||
@@ -4293,12 +4294,10 @@ void column_family::set_schema(schema_ptr s) {
|
||||
|
||||
static std::vector<view_ptr>::iterator find_view(std::vector<view_ptr>& views, const view_ptr& v) {
|
||||
return std::find_if(views.begin(), views.end(), [&v] (auto&& e) {
|
||||
return e->id() == v->id();
|
||||
return e->cf_name() == v->cf_name();
|
||||
});
|
||||
}
|
||||
|
||||
void column_family::add_or_update_view(view_ptr v) {
|
||||
v->view_info()->initialize_base_dependent_fields(*schema());
|
||||
auto existing = find_view(_views, v);
|
||||
if (existing != _views.end()) {
|
||||
*existing = std::move(v);
|
||||
@@ -4314,10 +4313,6 @@ void column_family::remove_view(view_ptr v) {
|
||||
}
|
||||
}
|
||||
|
||||
void column_family::clear_views() {
|
||||
_views.clear();
|
||||
}
|
||||
|
||||
const std::vector<view_ptr>& column_family::views() const {
|
||||
return _views;
|
||||
}
|
||||
@@ -4353,8 +4348,8 @@ future<> column_family::generate_and_propagate_view_updates(const schema_ptr& ba
|
||||
flat_mutation_reader_from_mutations({std::move(m)}),
|
||||
std::move(existings)).then([this, timeout, base_token = std::move(base_token)] (auto&& updates) mutable {
|
||||
return seastar::get_units(*_config.view_update_concurrency_semaphore, 1, timeout).then(
|
||||
[this, base_token = std::move(base_token), updates = std::move(updates)] (auto units) mutable {
|
||||
db::view::mutate_MV(std::move(base_token), std::move(updates), _view_stats).handle_exception([units = std::move(units)] (auto ignored) { });
|
||||
[base_token = std::move(base_token), updates = std::move(updates)] (auto units) mutable {
|
||||
db::view::mutate_MV(std::move(base_token), std::move(updates)).handle_exception([units = std::move(units)] (auto ignored) { });
|
||||
});
|
||||
});
|
||||
}
|
||||
@@ -4402,7 +4397,7 @@ future<row_locker::lock_holder> column_family::push_view_replica_updates(const s
|
||||
std::move(slice),
|
||||
std::move(m),
|
||||
[base, views = std::move(views), lock = std::move(lock), this, timeout] (auto& pk, auto& slice, auto& m) mutable {
|
||||
auto reader = this->make_reader(
|
||||
auto reader = this->as_mutation_source().make_reader(
|
||||
base,
|
||||
pk,
|
||||
slice,
|
||||
@@ -4494,31 +4489,6 @@ column_family::local_base_lock(const schema_ptr& s, const dht::decorated_key& pk
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Given some updates on the base table and assuming there are no pre-existing, overlapping updates,
|
||||
* generates the mutations to be applied to the base table's views, and sends them to the paired
|
||||
* view replicas. The future resolves when the updates have been acknowledged by the repicas, i.e.,
|
||||
* propagating the view updates to the view replicas happens synchronously.
|
||||
*
|
||||
* @param views the affected views which need to be updated.
|
||||
* @param base_token The token to use to match the base replica with the paired replicas.
|
||||
* @param reader the base table updates being applied, which all correspond to the base token.
|
||||
* @return a future that resolves when the updates have been acknowledged by the view replicas
|
||||
*/
|
||||
future<> column_family::populate_views(
|
||||
std::vector<view_ptr> views,
|
||||
dht::token base_token,
|
||||
flat_mutation_reader&& reader) {
|
||||
auto& schema = reader.schema();
|
||||
return db::view::generate_view_updates(
|
||||
schema,
|
||||
std::move(views),
|
||||
std::move(reader),
|
||||
{ }).then([base_token = std::move(base_token), this] (auto&& updates) {
|
||||
return db::view::mutate_MV(std::move(base_token), std::move(updates), _view_stats);
|
||||
});
|
||||
}
|
||||
|
||||
void column_family::set_hit_rate(gms::inet_address addr, cache_temperature rate) {
|
||||
auto& e = _cluster_cache_hit_rates[addr];
|
||||
e.rate = rate;
|
||||
|
||||
29
database.hh
29
database.hh
@@ -297,6 +297,7 @@ public:
|
||||
bool enable_cache = true;
|
||||
bool enable_commitlog = true;
|
||||
bool enable_incremental_backups = false;
|
||||
bool compaction_enforce_min_threshold = false;
|
||||
::dirty_memory_manager* dirty_memory_manager = &default_dirty_memory_manager;
|
||||
::dirty_memory_manager* streaming_dirty_memory_manager = &default_dirty_memory_manager;
|
||||
reader_concurrency_semaphore* read_concurrency_semaphore;
|
||||
@@ -345,7 +346,6 @@ private:
|
||||
schema_ptr _schema;
|
||||
config _config;
|
||||
mutable stats _stats;
|
||||
mutable db::view::stats _view_stats;
|
||||
|
||||
uint64_t _failed_counter_applies_to_memtable = 0;
|
||||
|
||||
@@ -463,11 +463,6 @@ private:
|
||||
double _cached_percentile = -1;
|
||||
lowres_clock::time_point _percentile_cache_timestamp;
|
||||
std::chrono::milliseconds _percentile_cache_value;
|
||||
|
||||
// Phaser used to synchronize with in-progress writes. This is useful for code that,
|
||||
// after some modification, needs to ensure that news writes will see it before
|
||||
// it can proceed, such as the view building code.
|
||||
utils::phased_barrier _pending_writes_phaser;
|
||||
private:
|
||||
void update_stats_for_new_sstable(uint64_t disk_space_used_by_sstable, const std::vector<unsigned>& shards_for_the_sstable) noexcept;
|
||||
// Adds new sstable to the set of sstables
|
||||
@@ -741,6 +736,10 @@ public:
|
||||
_config.enable_incremental_backups = val;
|
||||
}
|
||||
|
||||
bool compaction_enforce_min_threshold() const {
|
||||
return _config.compaction_enforce_min_threshold;
|
||||
}
|
||||
|
||||
const sstables::sstable_set& get_sstable_set() const;
|
||||
lw_shared_ptr<sstable_list> get_sstables() const;
|
||||
lw_shared_ptr<sstable_list> get_sstables_including_compacted_undeleted() const;
|
||||
@@ -791,17 +790,8 @@ public:
|
||||
|
||||
future<> run_with_compaction_disabled(std::function<future<> ()> func);
|
||||
|
||||
utils::phased_barrier::operation write_in_progress() {
|
||||
return _pending_writes_phaser.start();
|
||||
}
|
||||
|
||||
future<> await_pending_writes() {
|
||||
return _pending_writes_phaser.advance_and_await();
|
||||
}
|
||||
|
||||
void add_or_update_view(view_ptr v);
|
||||
void remove_view(view_ptr v);
|
||||
void clear_views();
|
||||
const std::vector<view_ptr>& views() const;
|
||||
future<row_locker::lock_holder> push_view_replica_updates(const schema_ptr& s, const frozen_mutation& fm, db::timeout_clock::time_point timeout) const;
|
||||
void add_coordinator_read_latency(utils::estimated_histogram::duration latency);
|
||||
@@ -814,12 +804,6 @@ public:
|
||||
uint64_t large_partition_warning_threshold_bytes() const {
|
||||
return _config.large_partition_warning_threshold_bytes;
|
||||
}
|
||||
|
||||
future<> populate_views(
|
||||
std::vector<view_ptr>,
|
||||
dht::token base_token,
|
||||
flat_mutation_reader&&);
|
||||
|
||||
private:
|
||||
std::vector<view_ptr> affected_views(const schema_ptr& base, const mutation& update) const;
|
||||
future<> generate_and_propagate_view_updates(const schema_ptr& base,
|
||||
@@ -1000,6 +984,7 @@ public:
|
||||
bool enable_disk_writes = true;
|
||||
bool enable_cache = true;
|
||||
bool enable_incremental_backups = false;
|
||||
bool compaction_enforce_min_threshold = false;
|
||||
::dirty_memory_manager* dirty_memory_manager = &default_dirty_memory_manager;
|
||||
::dirty_memory_manager* streaming_dirty_memory_manager = &default_dirty_memory_manager;
|
||||
reader_concurrency_semaphore* read_concurrency_semaphore;
|
||||
@@ -1299,8 +1284,6 @@ public:
|
||||
|
||||
std::vector<lw_shared_ptr<column_family>> get_non_system_column_families() const;
|
||||
|
||||
std::vector<view_ptr> get_views() const;
|
||||
|
||||
const std::unordered_map<std::pair<sstring, sstring>, utils::UUID, utils::tuple_hash>&
|
||||
get_column_families_mapping() const {
|
||||
return _ks_cf_to_uuid;
|
||||
|
||||
@@ -125,6 +125,9 @@ public:
|
||||
val(compaction_static_shares, float, 0, Used, \
|
||||
"If set to higher than 0, ignore the controller's output and set the compaction shares statically. Do not set this unless you know what you are doing and suspect a problem in the controller. This option will be retired when the controller reaches more maturity" \
|
||||
) \
|
||||
val(compaction_enforce_min_threshold, bool, false, Used, \
|
||||
"If set to true, enforce the min_threshold option for compactions strictly. If false (default), Scylla may decide to compact even if below min_threshold" \
|
||||
) \
|
||||
/* Initialization properties */ \
|
||||
/* The minimal properties needed for configuring a cluster. */ \
|
||||
val(cluster_name, sstring, "", Used, \
|
||||
@@ -735,7 +738,6 @@ public:
|
||||
val(enable_sstable_data_integrity_check, bool, false, Used, "Enable interposer which checks for integrity of every sstable write." \
|
||||
" Performance is affected to some extent as a result. Useful to help debugging problems that may arise at another layers.") \
|
||||
val(cpu_scheduler, bool, true, Used, "Enable cpu scheduling") \
|
||||
val(view_building, bool, true, Used, "Enable view building; should only be set to false when the node is experience issues due to view building") \
|
||||
/* done! */
|
||||
|
||||
#define _make_value_member(name, type, deflt, status, desc, ...) \
|
||||
|
||||
@@ -42,8 +42,10 @@ const std::string manager::FILENAME_PREFIX("HintsLog" + commitlog::descriptor::S
|
||||
const std::chrono::seconds manager::hint_file_write_timeout = std::chrono::seconds(2);
|
||||
const std::chrono::seconds manager::hints_flush_period = std::chrono::seconds(10);
|
||||
const std::chrono::seconds manager::space_watchdog::_watchdog_period = std::chrono::seconds(1);
|
||||
// TODO: remove this when we switch to C++17
|
||||
constexpr size_t manager::_max_hints_send_queue_length;
|
||||
|
||||
size_t db::hints::resource_manager::max_shard_disk_space_size;
|
||||
size_t db::hints::manager::max_shard_disk_space_size;
|
||||
|
||||
manager::manager(sstring hints_directory, std::vector<sstring> hinted_dcs, int64_t max_hint_window_ms, distributed<database>& db)
|
||||
: _hints_dir(boost::filesystem::path(hints_directory) / format("{:d}", engine().cpu_id()).c_str())
|
||||
@@ -51,8 +53,8 @@ manager::manager(sstring hints_directory, std::vector<sstring> hinted_dcs, int64
|
||||
, _local_snitch_ptr(locator::i_endpoint_snitch::get_local_snitch_ptr())
|
||||
, _max_hint_window_us(max_hint_window_ms * 1000)
|
||||
, _local_db(db.local())
|
||||
, _max_send_in_flight_memory(std::max(memory::stats().total_memory() / 10, resource_manager::max_hints_send_queue_length))
|
||||
, _min_send_hint_budget(_max_send_in_flight_memory / resource_manager::max_hints_send_queue_length)
|
||||
, _max_send_in_flight_memory(std::max(memory::stats().total_memory() / 10, _max_hints_send_queue_length))
|
||||
, _min_send_hint_budget(_max_send_in_flight_memory / _max_hints_send_queue_length)
|
||||
, _send_limiter(_max_send_in_flight_memory)
|
||||
, _space_watchdog(*this)
|
||||
{
|
||||
@@ -254,8 +256,8 @@ future<db::commitlog> manager::end_point_hints_manager::add_store() noexcept {
|
||||
commitlog::config cfg;
|
||||
|
||||
cfg.commit_log_location = _hints_dir.c_str();
|
||||
cfg.commitlog_segment_size_in_mb = resource_manager::hint_segment_size_in_mb;
|
||||
cfg.commitlog_total_space_in_mb = resource_manager::max_hints_per_ep_size_mb;
|
||||
cfg.commitlog_segment_size_in_mb = _hint_segment_size_in_mb;
|
||||
cfg.commitlog_total_space_in_mb = _max_hints_per_ep_size_mb;
|
||||
cfg.fname_prefix = manager::FILENAME_PREFIX;
|
||||
|
||||
return commitlog::create_commitlog(std::move(cfg)).then([this] (commitlog l) {
|
||||
@@ -333,7 +335,7 @@ future<> manager::end_point_hints_manager::sender::do_send_one_mutation(mutation
|
||||
// to be generated as a result of hints sending.
|
||||
if (boost::range::find(natural_endpoints, end_point_key()) != natural_endpoints.end()) {
|
||||
manager_logger.trace("Sending directly to {}", end_point_key());
|
||||
return _proxy.send_to_endpoint(std::move(m), end_point_key(), { }, write_type::SIMPLE);
|
||||
return _proxy.send_to_endpoint(std::move(m), end_point_key(), write_type::SIMPLE);
|
||||
} else {
|
||||
manager_logger.trace("Endpoints set has changed and {} is no longer a replica. Mutating from scratch...", end_point_key());
|
||||
return _proxy.mutate({std::move(m)}, consistency_level::ALL, nullptr);
|
||||
@@ -468,9 +470,9 @@ void manager::space_watchdog::on_timer() {
|
||||
}).then([this] {
|
||||
// Adjust the quota to take into account the space we guarantee to every end point manager
|
||||
size_t adjusted_quota = 0;
|
||||
size_t delta = _shard_manager._ep_managers.size() * resource_manager::hint_segment_size_in_mb * 1024 * 1024;
|
||||
if (resource_manager::max_shard_disk_space_size > delta) {
|
||||
adjusted_quota = resource_manager::max_shard_disk_space_size - delta;
|
||||
size_t delta = _shard_manager._ep_managers.size() * _hint_segment_size_in_mb * 1024 * 1024;
|
||||
if (max_shard_disk_space_size > delta) {
|
||||
adjusted_quota = max_shard_disk_space_size - delta;
|
||||
}
|
||||
|
||||
bool can_hint = _total_size < adjusted_quota;
|
||||
@@ -508,7 +510,7 @@ void manager::space_watchdog::on_timer() {
|
||||
bool manager::too_many_in_flight_hints_for(ep_key_type ep) const noexcept {
|
||||
// There is no need to check the DC here because if there is an in-flight hint for this end point then this means that
|
||||
// its DC has already been checked and found to be ok.
|
||||
return _stats.size_of_hints_in_progress > resource_manager::max_size_of_hints_in_progress && !utils::fb_utilities::is_me(ep) && hints_in_progress_for(ep) > 0 && local_gossiper().get_endpoint_downtime(ep) <= _max_hint_window_us;
|
||||
return _stats.size_of_hints_in_progress > _max_size_of_hints_in_progress && !utils::fb_utilities::is_me(ep) && hints_in_progress_for(ep) > 0 && local_gossiper().get_endpoint_downtime(ep) <= _max_hint_window_us;
|
||||
}
|
||||
|
||||
bool manager::can_hint_for(ep_key_type ep) const noexcept {
|
||||
@@ -525,7 +527,7 @@ bool manager::can_hint_for(ep_key_type ep) const noexcept {
|
||||
// hints is more than the maximum allowed value.
|
||||
//
|
||||
// In the worst case there's going to be (_max_size_of_hints_in_progress + N - 1) in-flight hints, where N is the total number Nodes in the cluster.
|
||||
if (_stats.size_of_hints_in_progress > resource_manager::max_size_of_hints_in_progress && hints_in_progress_for(ep) > 0) {
|
||||
if (_stats.size_of_hints_in_progress > _max_size_of_hints_in_progress && hints_in_progress_for(ep) > 0) {
|
||||
manager_logger.trace("size_of_hints_in_progress {} hints_in_progress_for({}) {}", _stats.size_of_hints_in_progress, ep, hints_in_progress_for(ep));
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -34,7 +34,6 @@
|
||||
#include "gms/gossiper.hh"
|
||||
#include "db/commitlog/commitlog.hh"
|
||||
#include "utils/loading_shared_values.hh"
|
||||
#include "db/hints/resource_manager.hh"
|
||||
|
||||
namespace db {
|
||||
namespace hints {
|
||||
@@ -379,8 +378,13 @@ public:
|
||||
static const std::string FILENAME_PREFIX;
|
||||
static const std::chrono::seconds hints_flush_period;
|
||||
static const std::chrono::seconds hint_file_write_timeout;
|
||||
static size_t max_shard_disk_space_size;
|
||||
|
||||
private:
|
||||
static constexpr uint64_t _max_size_of_hints_in_progress = 10 * 1024 * 1024; // 10MB
|
||||
static constexpr size_t _hint_segment_size_in_mb = 32;
|
||||
static constexpr size_t _max_hints_per_ep_size_mb = 128; // 4 files 32MB each
|
||||
static constexpr size_t _max_hints_send_queue_length = 128;
|
||||
const boost::filesystem::path _hints_dir;
|
||||
|
||||
node_to_hint_store_factory_type _store_factory;
|
||||
|
||||
@@ -1,47 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2018 ScyllaDB
|
||||
*/
|
||||
|
||||
/*
|
||||
* This file is part of Scylla.
|
||||
*
|
||||
* Scylla is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Scylla is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with Scylla. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
#include <seastar/core/semaphore.hh>
|
||||
#include <seastar/core/gate.hh>
|
||||
#include <seastar/core/memory.hh>
|
||||
#include <seastar/core/future.hh>
|
||||
#include "seastarx.hh"
|
||||
#include <unordered_set>
|
||||
#include <boost/filesystem.hpp>
|
||||
#include <gms/inet_address.hh>
|
||||
|
||||
namespace db {
|
||||
namespace hints {
|
||||
|
||||
class resource_manager {
|
||||
public:
|
||||
static constexpr uint64_t max_size_of_hints_in_progress = 10 * 1024 * 1024; // 10MB
|
||||
static constexpr size_t hint_segment_size_in_mb = 32;
|
||||
static constexpr size_t max_hints_per_ep_size_mb = 128; // 4 files 32MB each
|
||||
static constexpr size_t max_hints_send_queue_length = 128;
|
||||
static size_t max_shard_disk_space_size;
|
||||
};
|
||||
|
||||
}
|
||||
}
|
||||
389
db/index/secondary_index.hh
Normal file
389
db/index/secondary_index.hh
Normal file
@@ -0,0 +1,389 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright (C) 2015 ScyllaDB
|
||||
*
|
||||
* Modified by ScyllaDB
|
||||
*/
|
||||
|
||||
/*
|
||||
* This file is part of Scylla.
|
||||
*
|
||||
* Scylla is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Scylla is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with Scylla. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "core/sstring.hh"
|
||||
#include "seastarx.hh"
|
||||
|
||||
namespace db {
|
||||
namespace index {
|
||||
|
||||
/**
|
||||
* Abstract base class for different types of secondary indexes.
|
||||
*
|
||||
* Do not extend this directly, please pick from PerColumnSecondaryIndex or PerRowSecondaryIndex
|
||||
*/
|
||||
class secondary_index {
|
||||
public:
|
||||
static const sstring custom_index_option_name;
|
||||
|
||||
/**
|
||||
* The name of the option used to specify that the index is on the collection keys.
|
||||
*/
|
||||
static const sstring index_keys_option_name;
|
||||
|
||||
/**
|
||||
* The name of the option used to specify that the index is on the collection values.
|
||||
*/
|
||||
static const sstring index_values_option_name;
|
||||
|
||||
/**
|
||||
* The name of the option used to specify that the index is on the collection (map) entries.
|
||||
*/
|
||||
static const sstring index_entries_option_name;
|
||||
|
||||
#if 0 // TODO:
|
||||
|
||||
public static final AbstractType<?> keyComparator = StorageService.getPartitioner().preservesOrder()
|
||||
? BytesType.instance
|
||||
: new LocalByPartionerType(StorageService.getPartitioner());
|
||||
|
||||
/**
|
||||
* Base CF that has many indexes
|
||||
*/
|
||||
protected ColumnFamilyStore baseCfs;
|
||||
|
||||
|
||||
/**
|
||||
* The column definitions which this index is responsible for
|
||||
*/
|
||||
protected final Set<ColumnDefinition> columnDefs = Collections.newSetFromMap(new ConcurrentHashMap<ColumnDefinition,Boolean>());
|
||||
|
||||
/**
|
||||
* Perform any initialization work
|
||||
*/
|
||||
public abstract void init();
|
||||
|
||||
/**
|
||||
* Reload an existing index following a change to its configuration,
|
||||
* or that of the indexed column(s). Differs from init() in that we expect
|
||||
* expect new resources (such as CFS for a KEYS index) to be created by
|
||||
* init() but not here
|
||||
*/
|
||||
public abstract void reload();
|
||||
|
||||
/**
|
||||
* Validates the index_options passed in the ColumnDef
|
||||
* @throws ConfigurationException
|
||||
*/
|
||||
public abstract void validateOptions() throws ConfigurationException;
|
||||
|
||||
/**
|
||||
* @return The name of the index
|
||||
*/
|
||||
abstract public String getIndexName();
|
||||
|
||||
/**
|
||||
* All internal 2ndary indexes will return "_internal_" for this. Custom
|
||||
* 2ndary indexes will return their class name. This only matter for
|
||||
* SecondaryIndexManager.groupByIndexType.
|
||||
*/
|
||||
String indexTypeForGrouping()
|
||||
{
|
||||
// Our internal indexes overwrite this
|
||||
return getClass().getCanonicalName();
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the unique name for this index and column
|
||||
* to be stored in the SystemKeyspace that tracks if each column is built
|
||||
*
|
||||
* @param columnName the name of the column
|
||||
* @return the unique name
|
||||
*/
|
||||
abstract public String getNameForSystemKeyspace(ByteBuffer columnName);
|
||||
|
||||
/**
|
||||
* Checks if the index for specified column is fully built
|
||||
*
|
||||
* @param columnName the column
|
||||
* @return true if the index is fully built
|
||||
*/
|
||||
public boolean isIndexBuilt(ByteBuffer columnName)
|
||||
{
|
||||
return SystemKeyspace.isIndexBuilt(baseCfs.keyspace.getName(), getNameForSystemKeyspace(columnName));
|
||||
}
|
||||
|
||||
public void setIndexBuilt()
|
||||
{
|
||||
for (ColumnDefinition columnDef : columnDefs)
|
||||
SystemKeyspace.setIndexBuilt(baseCfs.keyspace.getName(), getNameForSystemKeyspace(columnDef.name.bytes));
|
||||
}
|
||||
|
||||
public void setIndexRemoved()
|
||||
{
|
||||
for (ColumnDefinition columnDef : columnDefs)
|
||||
SystemKeyspace.setIndexRemoved(baseCfs.keyspace.getName(), getNameForSystemKeyspace(columnDef.name.bytes));
|
||||
}
|
||||
|
||||
/**
|
||||
* Called at query time
|
||||
* Creates a implementation specific searcher instance for this index type
|
||||
* @param columns the list of columns which belong to this index type
|
||||
* @return the secondary index search impl
|
||||
*/
|
||||
protected abstract SecondaryIndexSearcher createSecondaryIndexSearcher(Set<ByteBuffer> columns);
|
||||
|
||||
/**
|
||||
* Forces this indexes' in memory data to disk
|
||||
*/
|
||||
public abstract void forceBlockingFlush();
|
||||
|
||||
/**
|
||||
* Allow access to the underlying column family store if there is one
|
||||
* @return the underlying column family store or null
|
||||
*/
|
||||
public abstract ColumnFamilyStore getIndexCfs();
|
||||
|
||||
|
||||
/**
|
||||
* Delete all files and references to this index
|
||||
* @param columnName the indexed column to remove
|
||||
*/
|
||||
public abstract void removeIndex(ByteBuffer columnName);
|
||||
|
||||
/**
|
||||
* Remove the index and unregisters this index's mbean if one exists
|
||||
*/
|
||||
public abstract void invalidate();
|
||||
|
||||
/**
|
||||
* Truncate all the data from the current index
|
||||
*
|
||||
* @param truncatedAt The truncation timestamp, all data before that timestamp should be rejected.
|
||||
*/
|
||||
public abstract void truncateBlocking(long truncatedAt);
|
||||
|
||||
/**
|
||||
* Builds the index using the data in the underlying CFS
|
||||
* Blocks till it's complete
|
||||
*/
|
||||
protected void buildIndexBlocking()
|
||||
{
|
||||
logger.info(String.format("Submitting index build of %s for data in %s",
|
||||
getIndexName(), StringUtils.join(baseCfs.getSSTables(), ", ")));
|
||||
|
||||
try (Refs<SSTableReader> sstables = baseCfs.selectAndReference(ColumnFamilyStore.CANONICAL_SSTABLES).refs)
|
||||
{
|
||||
SecondaryIndexBuilder builder = new SecondaryIndexBuilder(baseCfs,
|
||||
Collections.singleton(getIndexName()),
|
||||
new ReducingKeyIterator(sstables));
|
||||
Future<?> future = CompactionManager.instance.submitIndexBuild(builder);
|
||||
FBUtilities.waitOnFuture(future);
|
||||
forceBlockingFlush();
|
||||
setIndexBuilt();
|
||||
}
|
||||
logger.info("Index build of {} complete", getIndexName());
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Builds the index using the data in the underlying CF, non blocking
|
||||
*
|
||||
*
|
||||
* @return A future object which the caller can block on (optional)
|
||||
*/
|
||||
public Future<?> buildIndexAsync()
|
||||
{
|
||||
// if we're just linking in the index to indexedColumns on an already-built index post-restart, we're done
|
||||
boolean allAreBuilt = true;
|
||||
for (ColumnDefinition cdef : columnDefs)
|
||||
{
|
||||
if (!SystemKeyspace.isIndexBuilt(baseCfs.keyspace.getName(), getNameForSystemKeyspace(cdef.name.bytes)))
|
||||
{
|
||||
allAreBuilt = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (allAreBuilt)
|
||||
return null;
|
||||
|
||||
// build it asynchronously; addIndex gets called by CFS open and schema update, neither of which
|
||||
// we want to block for a long period. (actual build is serialized on CompactionManager.)
|
||||
Runnable runnable = new Runnable()
|
||||
{
|
||||
public void run()
|
||||
{
|
||||
baseCfs.forceBlockingFlush();
|
||||
buildIndexBlocking();
|
||||
}
|
||||
};
|
||||
FutureTask<?> f = new FutureTask<Object>(runnable, null);
|
||||
|
||||
new Thread(f, "Creating index: " + getIndexName()).start();
|
||||
return f;
|
||||
}
|
||||
|
||||
public ColumnFamilyStore getBaseCfs()
|
||||
{
|
||||
return baseCfs;
|
||||
}
|
||||
|
||||
private void setBaseCfs(ColumnFamilyStore baseCfs)
|
||||
{
|
||||
this.baseCfs = baseCfs;
|
||||
}
|
||||
|
||||
public Set<ColumnDefinition> getColumnDefs()
|
||||
{
|
||||
return columnDefs;
|
||||
}
|
||||
|
||||
void addColumnDef(ColumnDefinition columnDef)
|
||||
{
|
||||
columnDefs.add(columnDef);
|
||||
}
|
||||
|
||||
void removeColumnDef(ByteBuffer name)
|
||||
{
|
||||
Iterator<ColumnDefinition> it = columnDefs.iterator();
|
||||
while (it.hasNext())
|
||||
{
|
||||
if (it.next().name.bytes.equals(name))
|
||||
it.remove();
|
||||
}
|
||||
}
|
||||
|
||||
/** Returns true if the index supports lookups for the given operator, false otherwise. */
|
||||
public boolean supportsOperator(Operator operator)
|
||||
{
|
||||
return operator == Operator.EQ;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the decoratedKey for a column value. Assumes an index CFS is present.
|
||||
* @param value column value
|
||||
* @return decorated key
|
||||
*/
|
||||
public DecoratedKey getIndexKeyFor(ByteBuffer value)
|
||||
{
|
||||
return getIndexCfs().partitioner.decorateKey(value);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if the provided cell name is indexed by this secondary index.
|
||||
*/
|
||||
public abstract boolean indexes(CellName name);
|
||||
|
||||
/**
|
||||
* This is the primary way to create a secondary index instance for a CF column.
|
||||
* It will validate the index_options before initializing.
|
||||
*
|
||||
* @param baseCfs the source of data for the Index
|
||||
* @param cdef the meta information about this column (index_type, index_options, name, etc...)
|
||||
*
|
||||
* @return The secondary index instance for this column
|
||||
* @throws ConfigurationException
|
||||
*/
|
||||
public static SecondaryIndex createInstance(ColumnFamilyStore baseCfs, ColumnDefinition cdef) throws ConfigurationException
|
||||
{
|
||||
SecondaryIndex index;
|
||||
|
||||
switch (cdef.getIndexType())
|
||||
{
|
||||
case KEYS:
|
||||
index = new KeysIndex();
|
||||
break;
|
||||
case COMPOSITES:
|
||||
index = CompositesIndex.create(cdef);
|
||||
break;
|
||||
case CUSTOM:
|
||||
assert cdef.getIndexOptions() != null;
|
||||
String class_name = cdef.getIndexOptions().get(CUSTOM_INDEX_OPTION_NAME);
|
||||
assert class_name != null;
|
||||
try
|
||||
{
|
||||
index = (SecondaryIndex) Class.forName(class_name).newInstance();
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
throw new RuntimeException("Unknown index type: " + cdef.getIndexName());
|
||||
}
|
||||
|
||||
index.addColumnDef(cdef);
|
||||
index.validateOptions();
|
||||
index.setBaseCfs(baseCfs);
|
||||
|
||||
return index;
|
||||
}
|
||||
|
||||
public abstract boolean validate(ByteBuffer rowKey, Cell cell);
|
||||
|
||||
public abstract long estimateResultRows();
|
||||
|
||||
/**
|
||||
* Returns the index comparator for index backed by CFS, or null.
|
||||
*
|
||||
* Note: it would be cleaner to have this be a member method. However we need this when opening indexes
|
||||
* sstables, but by then the CFS won't be fully initiated, so the SecondaryIndex object won't be accessible.
|
||||
*/
|
||||
public static CellNameType getIndexComparator(CFMetaData baseMetadata, ColumnDefinition cdef)
|
||||
{
|
||||
switch (cdef.getIndexType())
|
||||
{
|
||||
case KEYS:
|
||||
return new SimpleDenseCellNameType(keyComparator);
|
||||
case COMPOSITES:
|
||||
return CompositesIndex.getIndexComparator(baseMetadata, cdef);
|
||||
case CUSTOM:
|
||||
return null;
|
||||
}
|
||||
throw new AssertionError();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString()
|
||||
{
|
||||
return Objects.toStringHelper(this).add("columnDefs", columnDefs).toString();
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
};
|
||||
|
||||
}
|
||||
}
|
||||
@@ -1,143 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2018 ScyllaDB
|
||||
*/
|
||||
|
||||
/*
|
||||
* This file is part of Scylla.
|
||||
*
|
||||
* Scylla is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Scylla is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with Scylla. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include "db/system_distributed_keyspace.hh"
|
||||
|
||||
#include "cql3/untyped_result_set.hh"
|
||||
#include "database.hh"
|
||||
#include "db/consistency_level_type.hh"
|
||||
#include "db/system_keyspace.hh"
|
||||
#include "schema_builder.hh"
|
||||
#include "types.hh"
|
||||
|
||||
#include <seastar/core/reactor.hh>
|
||||
#include <seastar/core/shared_ptr.hh>
|
||||
|
||||
#include <boost/range/adaptor/transformed.hpp>
|
||||
|
||||
#include <optional>
|
||||
#include <vector>
|
||||
#include <experimental/optional>
|
||||
|
||||
namespace db {
|
||||
|
||||
schema_ptr view_build_status() {
|
||||
static thread_local auto schema = [] {
|
||||
auto id = generate_legacy_id(system_distributed_keyspace::NAME, system_distributed_keyspace::VIEW_BUILD_STATUS);
|
||||
return schema_builder(system_distributed_keyspace::NAME, system_distributed_keyspace::VIEW_BUILD_STATUS, std::experimental::make_optional(id))
|
||||
.with_column("keyspace_name", utf8_type, column_kind::partition_key)
|
||||
.with_column("view_name", utf8_type, column_kind::partition_key)
|
||||
.with_column("host_id", uuid_type, column_kind::clustering_key)
|
||||
.with_column("status", utf8_type)
|
||||
.with_version(system_keyspace::generate_schema_version(id))
|
||||
.build();
|
||||
}();
|
||||
return schema;
|
||||
}
|
||||
|
||||
static std::vector<schema_ptr> all_tables() {
|
||||
return {
|
||||
view_build_status(),
|
||||
};
|
||||
}
|
||||
|
||||
system_distributed_keyspace::system_distributed_keyspace(cql3::query_processor& qp, service::migration_manager& mm)
|
||||
: _qp(qp)
|
||||
, _mm(mm) {
|
||||
}
|
||||
|
||||
future<> system_distributed_keyspace::start() {
|
||||
if (engine().cpu_id() != 0) {
|
||||
return make_ready_future<>();
|
||||
}
|
||||
|
||||
static auto ignore_existing = [] (seastar::noncopyable_function<future<>()> func) {
|
||||
return futurize_apply(std::move(func)).handle_exception_type([] (exceptions::already_exists_exception& ignored) { });
|
||||
};
|
||||
|
||||
// We use min_timestamp so that the default keyspace metadata will lose with any manual adjustments.
|
||||
// See issue #2129.
|
||||
return ignore_existing([this] {
|
||||
auto ksm = keyspace_metadata::new_keyspace(
|
||||
NAME,
|
||||
"org.apache.cassandra.locator.SimpleStrategy",
|
||||
{{"replication_factor", "3"}},
|
||||
true);
|
||||
return _mm.announce_new_keyspace(ksm, api::min_timestamp, false);
|
||||
}).then([this] {
|
||||
return do_with(all_tables(), [this] (std::vector<schema_ptr>& tables) {
|
||||
return do_for_each(tables, [this] (schema_ptr table) {
|
||||
return ignore_existing([this, table = std::move(table)] {
|
||||
return _mm.announce_new_column_family(std::move(table), false);
|
||||
});
|
||||
});
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
future<> system_distributed_keyspace::stop() {
|
||||
return make_ready_future<>();
|
||||
}
|
||||
|
||||
future<std::unordered_map<utils::UUID, sstring>> system_distributed_keyspace::view_status(sstring ks_name, sstring view_name) const {
|
||||
return _qp.process(
|
||||
sprint("SELECT host_id, status FROM %s.%s WHERE keyspace_name = ? AND view_name = ?", NAME, VIEW_BUILD_STATUS),
|
||||
db::consistency_level::ONE,
|
||||
{ std::move(ks_name), std::move(view_name) },
|
||||
false).then([this] (::shared_ptr<cql3::untyped_result_set> cql_result) {
|
||||
return boost::copy_range<std::unordered_map<utils::UUID, sstring>>(*cql_result
|
||||
| boost::adaptors::transformed([] (const cql3::untyped_result_set::row& row) {
|
||||
auto host_id = row.get_as<utils::UUID>("host_id");
|
||||
auto status = row.get_as<sstring>("status");
|
||||
return std::pair(std::move(host_id), std::move(status));
|
||||
}));
|
||||
});
|
||||
}
|
||||
|
||||
future<> system_distributed_keyspace::start_view_build(sstring ks_name, sstring view_name) const {
|
||||
return db::system_keyspace::get_local_host_id().then([this, ks_name = std::move(ks_name), view_name = std::move(view_name)] (utils::UUID host_id) {
|
||||
return _qp.process(
|
||||
sprint("INSERT INTO %s.%s (keyspace_name, view_name, host_id, status) VALUES (?, ?, ?, ?)", NAME, VIEW_BUILD_STATUS),
|
||||
db::consistency_level::ONE,
|
||||
{ std::move(ks_name), std::move(view_name), std::move(host_id), "STARTED" },
|
||||
false).discard_result();
|
||||
});
|
||||
}
|
||||
|
||||
future<> system_distributed_keyspace::finish_view_build(sstring ks_name, sstring view_name) const {
|
||||
return db::system_keyspace::get_local_host_id().then([this, ks_name = std::move(ks_name), view_name = std::move(view_name)] (utils::UUID host_id) {
|
||||
return _qp.process(
|
||||
sprint("UPDATE %s.%s SET status = ? WHERE keyspace_name = ? AND view_name = ? AND host_id = ?", NAME, VIEW_BUILD_STATUS),
|
||||
db::consistency_level::ONE,
|
||||
{ "SUCCESS", std::move(ks_name), std::move(view_name), std::move(host_id) },
|
||||
false).discard_result();
|
||||
});
|
||||
}
|
||||
|
||||
future<> system_distributed_keyspace::remove_view(sstring ks_name, sstring view_name) const {
|
||||
return _qp.process(
|
||||
sprint("DELETE FROM %s.%s WHERE keyspace_name = ? AND view_name = ?", NAME, VIEW_BUILD_STATUS),
|
||||
db::consistency_level::ONE,
|
||||
{ std::move(ks_name), std::move(view_name) },
|
||||
false).discard_result();
|
||||
}
|
||||
|
||||
}
|
||||
@@ -1,58 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2018 ScyllaDB
|
||||
*/
|
||||
|
||||
/*
|
||||
* This file is part of Scylla.
|
||||
*
|
||||
* Scylla is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Scylla is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with Scylla. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "bytes.hh"
|
||||
#include "cql3/query_processor.hh"
|
||||
#include "schema.hh"
|
||||
#include "service/migration_manager.hh"
|
||||
#include "utils/UUID.hh"
|
||||
|
||||
#include <seastar/core/future.hh>
|
||||
#include <seastar/core/sstring.hh>
|
||||
|
||||
#include <unordered_map>
|
||||
|
||||
namespace db {
|
||||
|
||||
class system_distributed_keyspace {
|
||||
public:
|
||||
static constexpr auto NAME = "system_distributed";
|
||||
static constexpr auto VIEW_BUILD_STATUS = "view_build_status";
|
||||
|
||||
private:
|
||||
cql3::query_processor& _qp;
|
||||
service::migration_manager& _mm;
|
||||
|
||||
public:
|
||||
system_distributed_keyspace(cql3::query_processor&, service::migration_manager&);
|
||||
|
||||
future<> start();
|
||||
future<> stop();
|
||||
|
||||
future<std::unordered_map<utils::UUID, sstring>> view_status(sstring ks_name, sstring view_name) const;
|
||||
future<> start_view_build(sstring ks_name, sstring view_name) const;
|
||||
future<> finish_view_build(sstring ks_name, sstring view_name) const;
|
||||
future<> remove_view(sstring ks_name, sstring view_name) const;
|
||||
};
|
||||
|
||||
}
|
||||
@@ -74,7 +74,6 @@
|
||||
#include "db/size_estimates_virtual_reader.hh"
|
||||
#include "db/timeout_clock.hh"
|
||||
#include "sstables/sstables.hh"
|
||||
#include "db/view/build_progress_virtual_reader.hh"
|
||||
#include "db/schema_tables.hh"
|
||||
|
||||
using days = std::chrono::duration<int, std::ratio<24 * 3600>>;
|
||||
@@ -643,22 +642,6 @@ schema_ptr built_views() {
|
||||
return schema;
|
||||
}
|
||||
|
||||
schema_ptr scylla_views_builds_in_progress() {
|
||||
static thread_local auto schema = [] {
|
||||
auto id = generate_legacy_id(NAME, SCYLLA_VIEWS_BUILDS_IN_PROGRESS);
|
||||
return schema_builder(NAME, SCYLLA_VIEWS_BUILDS_IN_PROGRESS, stdx::make_optional(id))
|
||||
.with_column("keyspace_name", utf8_type, column_kind::partition_key)
|
||||
.with_column("view_name", utf8_type, column_kind::clustering_key)
|
||||
.with_column("cpu_id", int32_type, column_kind::clustering_key)
|
||||
.with_column("next_token", utf8_type)
|
||||
.with_column("generation_number", int32_type)
|
||||
.with_column("first_token", utf8_type)
|
||||
.with_version(generate_schema_version(id))
|
||||
.build();
|
||||
}();
|
||||
return schema;
|
||||
}
|
||||
|
||||
} //</v3>
|
||||
|
||||
namespace legacy {
|
||||
@@ -1558,8 +1541,7 @@ std::vector<schema_ptr> all_tables() {
|
||||
r.insert(r.end(), { built_indexes(), hints(), batchlog(), paxos(), local(),
|
||||
peers(), peer_events(), range_xfers(),
|
||||
compactions_in_progress(), compaction_history(),
|
||||
sstable_activity(), size_estimates(), v3::views_builds_in_progress(), v3::built_views(),
|
||||
v3::scylla_views_builds_in_progress(),
|
||||
sstable_activity(), size_estimates(),
|
||||
});
|
||||
// legacy schema
|
||||
r.insert(r.end(), {
|
||||
@@ -1576,14 +1558,10 @@ static void maybe_add_virtual_reader(schema_ptr s, database& db) {
|
||||
if (s.get() == size_estimates().get()) {
|
||||
db.find_column_family(s).set_virtual_reader(mutation_source(db::size_estimates::virtual_reader()));
|
||||
}
|
||||
if (s.get() == v3::views_builds_in_progress().get()) {
|
||||
db.find_column_family(s).set_virtual_reader(mutation_source(db::view::build_progress_virtual_reader(db)));
|
||||
}
|
||||
}
|
||||
|
||||
static bool maybe_write_in_user_memory(schema_ptr s, database& db) {
|
||||
return (s.get() == batchlog().get())
|
||||
|| s == v3::scylla_views_builds_in_progress();
|
||||
return (s.get() == batchlog().get());
|
||||
}
|
||||
|
||||
void make(database& db, bool durable, bool volatile_testing_only) {
|
||||
@@ -1805,85 +1783,6 @@ mutation make_size_estimates_mutation(const sstring& ks, std::vector<range_estim
|
||||
return m_to_apply;
|
||||
}
|
||||
|
||||
future<> register_view_for_building(sstring ks_name, sstring view_name, const dht::token& token) {
|
||||
sstring req = sprint("INSERT INTO system.%s (keyspace_name, view_name, generation_number, cpu_id, first_token) VALUES (?, ?, ?, ?, ?)",
|
||||
v3::SCYLLA_VIEWS_BUILDS_IN_PROGRESS);
|
||||
return execute_cql(
|
||||
std::move(req),
|
||||
std::move(ks_name),
|
||||
std::move(view_name),
|
||||
0,
|
||||
int32_t(engine().cpu_id()),
|
||||
dht::global_partitioner().to_sstring(token)).discard_result();
|
||||
}
|
||||
|
||||
future<> update_view_build_progress(sstring ks_name, sstring view_name, const dht::token& token) {
|
||||
sstring req = sprint("INSERT INTO system.%s (keyspace_name, view_name, next_token, cpu_id) VALUES (?, ?, ?, ?)",
|
||||
v3::SCYLLA_VIEWS_BUILDS_IN_PROGRESS);
|
||||
return execute_cql(
|
||||
std::move(req),
|
||||
std::move(ks_name),
|
||||
std::move(view_name),
|
||||
dht::global_partitioner().to_sstring(token),
|
||||
int32_t(engine().cpu_id())).discard_result();
|
||||
}
|
||||
|
||||
future<> remove_view_build_progress_across_all_shards(sstring ks_name, sstring view_name) {
|
||||
return execute_cql(
|
||||
sprint("DELETE FROM system.%s WHERE keyspace_name = ? AND view_name = ?", v3::SCYLLA_VIEWS_BUILDS_IN_PROGRESS),
|
||||
std::move(ks_name),
|
||||
std::move(view_name)).discard_result();
|
||||
}
|
||||
|
||||
future<> mark_view_as_built(sstring ks_name, sstring view_name) {
|
||||
return execute_cql(
|
||||
sprint("INSERT INTO system.%s (keyspace_name, view_name) VALUES (?, ?)", v3::BUILT_VIEWS),
|
||||
std::move(ks_name),
|
||||
std::move(view_name)).discard_result();
|
||||
}
|
||||
|
||||
future<> remove_built_view(sstring ks_name, sstring view_name) {
|
||||
return execute_cql(
|
||||
sprint("DELETE FROM system.%s WHERE keyspace_name = ? AND view_name = ?", v3::BUILT_VIEWS),
|
||||
std::move(ks_name),
|
||||
std::move(view_name)).discard_result();
|
||||
}
|
||||
|
||||
future<std::vector<view_name>> load_built_views() {
|
||||
return execute_cql(sprint("SELECT * FROM system.%s", v3::BUILT_VIEWS)).then([] (::shared_ptr<cql3::untyped_result_set> cql_result) {
|
||||
return boost::copy_range<std::vector<view_name>>(*cql_result
|
||||
| boost::adaptors::transformed([] (const cql3::untyped_result_set::row& row) {
|
||||
auto ks_name = row.get_as<sstring>("keyspace_name");
|
||||
auto cf_name = row.get_as<sstring>("view_name");
|
||||
return std::pair(std::move(ks_name), std::move(cf_name));
|
||||
}));
|
||||
});
|
||||
}
|
||||
|
||||
future<std::vector<view_build_progress>> load_view_build_progress() {
|
||||
return execute_cql(sprint("SELECT keyspace_name, view_name, first_token, next_token, cpu_id FROM system.%s",
|
||||
v3::SCYLLA_VIEWS_BUILDS_IN_PROGRESS)).then([] (::shared_ptr<cql3::untyped_result_set> cql_result) {
|
||||
std::vector<view_build_progress> progress;
|
||||
for (auto& row : *cql_result) {
|
||||
auto ks_name = row.get_as<sstring>("keyspace_name");
|
||||
auto cf_name = row.get_as<sstring>("view_name");
|
||||
auto first_token = dht::global_partitioner().from_sstring(row.get_as<sstring>("first_token"));
|
||||
auto next_token_sstring = row.get_opt<sstring>("next_token");
|
||||
std::optional<dht::token> next_token;
|
||||
if (next_token_sstring) {
|
||||
next_token = dht::global_partitioner().from_sstring(std::move(next_token_sstring).value());
|
||||
}
|
||||
auto cpu_id = row.get_as<int32_t>("cpu_id");
|
||||
progress.emplace_back(view_build_progress{
|
||||
view_name(std::move(ks_name), std::move(cf_name)),
|
||||
std::move(first_token),
|
||||
std::move(next_token),
|
||||
static_cast<shard_id>(cpu_id)});
|
||||
}
|
||||
return progress;
|
||||
});
|
||||
}
|
||||
|
||||
} // namespace system_keyspace
|
||||
|
||||
sstring system_keyspace_name() {
|
||||
|
||||
@@ -40,10 +40,8 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <optional>
|
||||
#include <unordered_map>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
#include "schema.hh"
|
||||
#include "utils/UUID.hh"
|
||||
#include "gms/inet_address.hh"
|
||||
@@ -101,7 +99,6 @@ static constexpr auto SIZE_ESTIMATES = "size_estimates";
|
||||
static constexpr auto AVAILABLE_RANGES = "available_ranges";
|
||||
static constexpr auto VIEWS_BUILDS_IN_PROGRESS = "views_builds_in_progress";
|
||||
static constexpr auto BUILT_VIEWS = "built_views";
|
||||
static constexpr auto SCYLLA_VIEWS_BUILDS_IN_PROGRESS = "scylla_views_builds_in_progress";
|
||||
}
|
||||
|
||||
namespace legacy {
|
||||
@@ -125,14 +122,6 @@ struct range_estimates {
|
||||
int64_t mean_partition_size;
|
||||
};
|
||||
|
||||
using view_name = std::pair<sstring, sstring>;
|
||||
struct view_build_progress {
|
||||
view_name view;
|
||||
dht::token first_token;
|
||||
std::optional<dht::token> next_token;
|
||||
shard_id cpu_id;
|
||||
};
|
||||
|
||||
extern schema_ptr hints();
|
||||
extern schema_ptr batchlog();
|
||||
extern schema_ptr built_indexes(); // TODO (from Cassandra): make private
|
||||
@@ -662,13 +651,5 @@ future<> set_bootstrap_state(bootstrap_state state);
|
||||
*/
|
||||
mutation make_size_estimates_mutation(const sstring& ks, std::vector<range_estimates> estimates);
|
||||
|
||||
future<> register_view_for_building(sstring ks_name, sstring view_name, const dht::token& token);
|
||||
future<> update_view_build_progress(sstring ks_name, sstring view_name, const dht::token& token);
|
||||
future<> remove_view_build_progress_across_all_shards(sstring ks_name, sstring view_name);
|
||||
future<> mark_view_as_built(sstring ks_name, sstring view_name);
|
||||
future<> remove_built_view(sstring ks_name, sstring view_name);
|
||||
future<std::vector<view_name>> load_built_views();
|
||||
future<std::vector<view_build_progress>> load_view_build_progress();
|
||||
|
||||
} // namespace system_keyspace
|
||||
} // namespace db
|
||||
|
||||
@@ -1,195 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2018 ScyllaDB
|
||||
*/
|
||||
|
||||
/*
|
||||
* This file is part of Scylla.
|
||||
*
|
||||
* Scylla is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Scylla is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with Scylla. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include "database.hh"
|
||||
#include "db/system_keyspace.hh"
|
||||
#include "db/timeout_clock.hh"
|
||||
#include "dht/i_partitioner.hh"
|
||||
#include "flat_mutation_reader.hh"
|
||||
#include "mutation_fragment.hh"
|
||||
#include "mutation_reader.hh"
|
||||
#include "query-request.hh"
|
||||
#include "schema.hh"
|
||||
#include "tracing/tracing.hh"
|
||||
|
||||
#include <boost/range/iterator_range.hpp>
|
||||
|
||||
#include <iterator>
|
||||
#include <memory>
|
||||
|
||||
namespace db::view {
|
||||
|
||||
// Allows a user to query the views_builds_in_progress system table
|
||||
// in terms of the scylla_views_builds_in_progress one, which is
|
||||
// a superset of the former. When querying, we don't have to adjust
|
||||
// the clustering key, but we have to adjust the requested regular
|
||||
// columns. When reading the results from the scylla_views_builds_in_progress
|
||||
// table, we adjust the clustering key (we shed the cpu_id column) and map
|
||||
// back the regular columns.
|
||||
class build_progress_virtual_reader {
|
||||
database& _db;
|
||||
|
||||
struct build_progress_reader : flat_mutation_reader::impl {
|
||||
column_id _scylla_next_token_col;
|
||||
column_id _scylla_generation_number_col;
|
||||
column_id _legacy_last_token_col;
|
||||
column_id _legacy_generation_number_col;
|
||||
const query::partition_slice& _legacy_slice;
|
||||
query::partition_slice _slice;
|
||||
flat_mutation_reader _underlying;
|
||||
|
||||
build_progress_reader(
|
||||
schema_ptr legacy_schema,
|
||||
column_family& scylla_views_build_progress,
|
||||
const dht::partition_range& range,
|
||||
const query::partition_slice& slice,
|
||||
const io_priority_class& pc,
|
||||
tracing::trace_state_ptr trace_state,
|
||||
streamed_mutation::forwarding fwd,
|
||||
mutation_reader::forwarding fwd_mr)
|
||||
: flat_mutation_reader::impl(std::move(legacy_schema))
|
||||
, _scylla_next_token_col(scylla_views_build_progress.schema()->get_column_definition("next_token")->id)
|
||||
, _scylla_generation_number_col(scylla_views_build_progress.schema()->get_column_definition("generation_number")->id)
|
||||
, _legacy_last_token_col(_schema->get_column_definition("last_token")->id)
|
||||
, _legacy_generation_number_col(_schema->get_column_definition("generation_number")->id)
|
||||
, _legacy_slice(slice)
|
||||
, _slice(adjust_partition_slice())
|
||||
, _underlying(scylla_views_build_progress.make_reader(
|
||||
scylla_views_build_progress.schema(),
|
||||
range,
|
||||
slice,
|
||||
pc,
|
||||
std::move(trace_state),
|
||||
fwd,
|
||||
fwd_mr)) {
|
||||
}
|
||||
|
||||
const schema& underlying_schema() const {
|
||||
return *_underlying.schema();
|
||||
}
|
||||
|
||||
query::partition_slice adjust_partition_slice() {
|
||||
auto slice = _legacy_slice;
|
||||
std::vector<column_id> adjusted_columns;
|
||||
for (auto col_id : slice.regular_columns) {
|
||||
if (col_id == _legacy_last_token_col) {
|
||||
adjusted_columns.push_back(_scylla_next_token_col);
|
||||
} else if (col_id == _legacy_generation_number_col) {
|
||||
adjusted_columns.push_back(_scylla_generation_number_col);
|
||||
}
|
||||
}
|
||||
slice.regular_columns = std::move(adjusted_columns);
|
||||
return slice;
|
||||
}
|
||||
|
||||
clustering_key adjust_ckey(clustering_key& ck) {
|
||||
if (ck.size(underlying_schema()) < 3) {
|
||||
return std::move(ck);
|
||||
}
|
||||
// Drop the cpu_id from the clustering key
|
||||
auto end = ck.begin(*_schema);
|
||||
std::advance(end, 1);
|
||||
auto r = boost::make_iterator_range(ck.begin(*_schema), std::move(end));
|
||||
return clustering_key_prefix::from_exploded(r);
|
||||
}
|
||||
|
||||
virtual future<> fill_buffer(db::timeout_clock::time_point timeout) override {
|
||||
return _underlying.fill_buffer(timeout).then([this] {
|
||||
_end_of_stream = _underlying.is_end_of_stream();
|
||||
while (!_underlying.is_buffer_empty()) {
|
||||
auto mf = _underlying.pop_mutation_fragment();
|
||||
if (mf.is_clustering_row()) {
|
||||
auto scylla_in_progress_row = std::move(mf).as_clustering_row();
|
||||
auto legacy_in_progress_row = row();
|
||||
// Drop the first_token from the regular columns
|
||||
scylla_in_progress_row.cells().for_each_cell([&, this] (column_id id, atomic_cell_or_collection& c) {
|
||||
if (id == _scylla_next_token_col) {
|
||||
legacy_in_progress_row.append_cell(_legacy_last_token_col, std::move(c));
|
||||
} else if (id == _scylla_generation_number_col) {
|
||||
legacy_in_progress_row.append_cell(_legacy_generation_number_col, std::move(c));
|
||||
}
|
||||
});
|
||||
mf = clustering_row(
|
||||
adjust_ckey(scylla_in_progress_row.key()),
|
||||
std::move(scylla_in_progress_row.tomb()),
|
||||
std::move(scylla_in_progress_row.marker()),
|
||||
std::move(legacy_in_progress_row));
|
||||
} else if (mf.is_range_tombstone()) {
|
||||
auto scylla_in_progress_rt = std::move(mf).as_range_tombstone();
|
||||
mf = range_tombstone(
|
||||
adjust_ckey(scylla_in_progress_rt.start),
|
||||
scylla_in_progress_rt.start_kind,
|
||||
scylla_in_progress_rt.end,
|
||||
scylla_in_progress_rt.end_kind,
|
||||
scylla_in_progress_rt.tomb);
|
||||
}
|
||||
push_mutation_fragment(std::move(mf));
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
virtual void next_partition() override {
|
||||
_end_of_stream = false;
|
||||
clear_buffer_to_next_partition();
|
||||
if (is_buffer_empty()) {
|
||||
_underlying.next_partition();
|
||||
}
|
||||
}
|
||||
|
||||
virtual future<> fast_forward_to(const dht::partition_range& pr, db::timeout_clock::time_point timeout) override {
|
||||
clear_buffer();
|
||||
_end_of_stream = false;
|
||||
return _underlying.fast_forward_to(pr, timeout);
|
||||
}
|
||||
|
||||
virtual future<> fast_forward_to(position_range range, db::timeout_clock::time_point timeout) override {
|
||||
forward_buffer_to(range.start());
|
||||
_end_of_stream = false;
|
||||
return _underlying.fast_forward_to(std::move(range), timeout);
|
||||
}
|
||||
};
|
||||
|
||||
public:
|
||||
build_progress_virtual_reader(database& db)
|
||||
: _db(db) {
|
||||
}
|
||||
|
||||
flat_mutation_reader operator()(
|
||||
schema_ptr s,
|
||||
const dht::partition_range& range,
|
||||
const query::partition_slice& slice,
|
||||
const io_priority_class& pc,
|
||||
tracing::trace_state_ptr trace_state,
|
||||
streamed_mutation::forwarding fwd,
|
||||
mutation_reader::forwarding fwd_mr) {
|
||||
return flat_mutation_reader(std::make_unique<build_progress_reader>(
|
||||
std::move(s),
|
||||
_db.find_column_family(s->ks_name(), system_keyspace::v3::SCYLLA_VIEWS_BUILDS_IN_PROGRESS),
|
||||
range,
|
||||
slice,
|
||||
pc,
|
||||
std::move(trace_state),
|
||||
fwd,
|
||||
fwd_mr));
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
998
db/view/view.cc
998
db/view/view.cc
File diff suppressed because it is too large
Load Diff
@@ -33,13 +33,6 @@ namespace db {
|
||||
|
||||
namespace view {
|
||||
|
||||
struct stats {
|
||||
int64_t view_updates_pushed_local = 0;
|
||||
int64_t view_updates_pushed_remote = 0;
|
||||
int64_t view_updates_failed_local = 0;
|
||||
int64_t view_updates_failed_remote = 0;
|
||||
};
|
||||
|
||||
/**
|
||||
* Whether the view filter considers the specified partition key.
|
||||
*
|
||||
@@ -99,7 +92,8 @@ query::clustering_row_ranges calculate_affected_clustering_ranges(
|
||||
const mutation_partition& mp,
|
||||
const std::vector<view_ptr>& views);
|
||||
|
||||
future<> mutate_MV(const dht::token& base_token, std::vector<mutation> mutations, db::view::stats& stats);
|
||||
future<> mutate_MV(const dht::token& base_token,
|
||||
std::vector<mutation> mutations);
|
||||
|
||||
}
|
||||
|
||||
|
||||
@@ -1,197 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2018 ScyllaDB
|
||||
*/
|
||||
|
||||
/*
|
||||
* This file is part of Scylla.
|
||||
*
|
||||
* Scylla is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Scylla is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with Scylla. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "database_fwd.hh"
|
||||
#include "db/system_keyspace.hh"
|
||||
#include "db/system_distributed_keyspace.hh"
|
||||
#include "dht/i_partitioner.hh"
|
||||
#include "keys.hh"
|
||||
#include "query-request.hh"
|
||||
#include "service/migration_listener.hh"
|
||||
#include "service/migration_manager.hh"
|
||||
#include "sstables/sstable_set.hh"
|
||||
#include "utils/exponential_backoff_retry.hh"
|
||||
#include "utils/serialized_action.hh"
|
||||
#include "utils/UUID.hh"
|
||||
|
||||
#include <seastar/core/abort_source.hh>
|
||||
#include <seastar/core/future.hh>
|
||||
#include <seastar/core/lowres_clock.hh>
|
||||
#include <seastar/core/semaphore.hh>
|
||||
#include <seastar/core/sharded.hh>
|
||||
#include <seastar/core/shared_future.hh>
|
||||
#include <seastar/core/shared_ptr.hh>
|
||||
|
||||
#include <optional>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
|
||||
namespace db::view {
|
||||
|
||||
/**
|
||||
* The view_builder is a sharded service responsible for building all defined materialized views.
|
||||
* This process entails walking over the existing data in a given base table, and using it to
|
||||
* calculate and insert the respective entries for one or more views.
|
||||
*
|
||||
* We employ a flat_mutation_reader for each base table for which we're building views.
|
||||
*
|
||||
* We aim to be resource-conscious. On a given shard, at any given moment, we consume at most
|
||||
* from one reader. We also strive for fairness, in that each build step inserts entries for
|
||||
* the views of a different base. Each build step reads and generates updates for batch_size rows.
|
||||
*
|
||||
* We lack a controller, which could potentially allow us to go faster (to execute multiple steps at
|
||||
* the same time, or consume more rows per batch), and also which would apply backpressure, so we
|
||||
* could, for example, delay executing a build step.
|
||||
*
|
||||
* View building is necessarily a sharded process. That means that on restart, if the number of shards
|
||||
* has changed, we need to calculate the most conservative token range that has been built, and build
|
||||
* the remainder.
|
||||
*
|
||||
* Interaction with the system tables:
|
||||
* - When we start building a view, we add an entry to the scylla_views_builds_in_progress
|
||||
* system table. If the node restarts at this point, we'll consider these newly inserted
|
||||
* views as having made no progress, and we'll treat them as new views;
|
||||
* - When we finish a build step, we update the progress of the views that we built during
|
||||
* this step by writing the next token to the scylla_views_builds_in_progress table. If
|
||||
* the node restarts here, we'll start building the views at the token in the next_token column.
|
||||
* - When we finish building a view, we mark it as completed in the built views system table, and
|
||||
* remove it from the in-progress system table. Under failure, the following can happen:
|
||||
* * When we fail to mark the view as built, we'll redo the last step upon node reboot;
|
||||
* * When we fail to delete the in-progress record, upon reboot we'll remove this record.
|
||||
* A view is marked as completed only when all shards have finished their share of the work, that is,
|
||||
* if a view is not built, then all shards will still have an entry in the in-progress system table,
|
||||
* - A view that a shard finished building, but not all other shards, remains in the in-progress system
|
||||
* table, with first_token == next_token.
|
||||
* Interaction with the distributed system table (view_build_status):
|
||||
* - When we start building a view, we mark the view build as being in-progress;
|
||||
* - When we finish building a view, we mark the view as being built. Upon failure,
|
||||
* we ensure that if the view is in the in-progress system table, then it may not
|
||||
* have been written to this table. We don't load the built views from this table
|
||||
* when starting. When starting, the following happens:
|
||||
* * If the view is in the system.built_views table and not the in-progress
|
||||
* system table, then it will be in view_build_status;
|
||||
* * If the view is in the system.built_views table and not in this one, it
|
||||
* will still be in the in-progress system table - we detect this and mark
|
||||
* it as built in this table too, keeping the invariant;
|
||||
* * If the view is in this table but not in system.built_views, then it will
|
||||
* also be in the in-progress system table - we don't detect this and will
|
||||
* redo the missing step, for simplicity.
|
||||
*/
|
||||
class view_builder final : public service::migration_listener::only_view_notifications, public seastar::peering_sharded_service<view_builder> {
|
||||
/**
|
||||
* Keeps track of the build progress for a particular view.
|
||||
* When the view is built, next_token == first_token.
|
||||
*/
|
||||
struct view_build_status final {
|
||||
view_ptr view;
|
||||
dht::token first_token;
|
||||
std::optional<dht::token> next_token;
|
||||
};
|
||||
|
||||
/**
|
||||
* Keeps track of the build progress for all the views of a particular
|
||||
* base table. Each execution of the build step comprises a query of
|
||||
* the base table for the selected range.
|
||||
*
|
||||
* We pin the set of sstables that potentially contain data that should be added to a
|
||||
* view (they are pinned by the flat_mutation_reader). Adding a view v' overwrites the
|
||||
* set of pinned sstables, regardless of there being another view v'' being built. The
|
||||
* new set will potentially contain new data already in v'', written as part of the write
|
||||
* path. We assume this case is rare and optimize for fewer disk space in detriment of
|
||||
* network bandwidth.
|
||||
*/
|
||||
struct build_step final {
|
||||
// Ensure we pin the column_family. It may happen that all views are removed,
|
||||
// and that the base table is too before we can detect it.
|
||||
lw_shared_ptr<column_family> base;
|
||||
query::partition_slice pslice;
|
||||
dht::partition_range prange;
|
||||
flat_mutation_reader reader{nullptr};
|
||||
dht::decorated_key current_key{dht::minimum_token(), partition_key::make_empty()};
|
||||
std::vector<view_build_status> build_status;
|
||||
|
||||
const dht::token& current_token() const {
|
||||
return current_key.token();
|
||||
}
|
||||
};
|
||||
|
||||
using base_to_build_step_type = std::unordered_map<utils::UUID, build_step>;
|
||||
|
||||
database& _db;
|
||||
db::system_distributed_keyspace& _sys_dist_ks;
|
||||
service::migration_manager& _mm;
|
||||
base_to_build_step_type _base_to_build_step;
|
||||
base_to_build_step_type::iterator _current_step = _base_to_build_step.end();
|
||||
serialized_action _build_step{std::bind(&view_builder::do_build_step, this)};
|
||||
// Ensures bookkeeping operations are serialized, meaning that while we execute
|
||||
// a build step we don't consider newly added or removed views. This simplifies
|
||||
// the algorithms. Also synchronizes an operation wrt. a call to stop().
|
||||
seastar::semaphore _sem{1};
|
||||
seastar::abort_source _as;
|
||||
future<> _started = make_ready_future<>();
|
||||
// Used to coordinate between shards the conclusion of the build process for a particular view.
|
||||
std::unordered_set<utils::UUID> _built_views;
|
||||
// Used for testing.
|
||||
std::unordered_map<std::pair<sstring, sstring>, seastar::shared_promise<>, utils::tuple_hash> _build_notifiers;
|
||||
|
||||
public:
|
||||
static constexpr size_t batch_size = 128;
|
||||
|
||||
public:
|
||||
view_builder(database&, db::system_distributed_keyspace&, service::migration_manager&);
|
||||
view_builder(view_builder&&) = delete;
|
||||
|
||||
/**
|
||||
* Loads the state stored in the system tables to resume building the existing views.
|
||||
* Requires that all views have been loaded from the system tables and are accessible
|
||||
* through the database, and that the commitlog has been replayed.
|
||||
*/
|
||||
future<> start();
|
||||
|
||||
/**
|
||||
* Stops the view building process.
|
||||
*/
|
||||
future<> stop();
|
||||
|
||||
virtual void on_create_view(const sstring& ks_name, const sstring& view_name) override;
|
||||
virtual void on_update_view(const sstring& ks_name, const sstring& view_name, bool columns_changed) override;
|
||||
virtual void on_drop_view(const sstring& ks_name, const sstring& view_name) override;
|
||||
|
||||
// For tests
|
||||
future<> wait_until_built(const sstring& ks_name, const sstring& view_name, lowres_clock::time_point timeout);
|
||||
|
||||
private:
|
||||
build_step& get_or_create_build_step(utils::UUID);
|
||||
void initialize_reader_at_current_token(build_step&);
|
||||
void load_view_status(view_build_status, std::unordered_set<utils::UUID>&);
|
||||
void reshard(std::vector<std::vector<view_build_status>>, std::unordered_set<utils::UUID>&);
|
||||
future<> calculate_shard_build_step(std::vector<system_keyspace::view_name>, std::vector<system_keyspace::view_build_progress>);
|
||||
future<> add_new_view(view_ptr, build_step&);
|
||||
future<> do_build_step();
|
||||
void execute(build_step&, exponential_backoff_retry);
|
||||
future<> maybe_mark_view_as_built(view_ptr, dht::token);
|
||||
|
||||
struct consumer;
|
||||
};
|
||||
|
||||
}
|
||||
2
dist/ami/files/.bash_profile
vendored
2
dist/ami/files/.bash_profile
vendored
@@ -120,7 +120,7 @@ else
|
||||
fi
|
||||
fi
|
||||
echo -n " "
|
||||
/usr/lib/scylla/scylla_ec2_check
|
||||
/usr/lib/scylla/scylla_ec2_check --nic eth0
|
||||
if [ $? -eq 0 ]; then
|
||||
echo
|
||||
fi
|
||||
|
||||
30
dist/common/scripts/scylla_ec2_check
vendored
30
dist/common/scripts/scylla_ec2_check
vendored
@@ -2,6 +2,12 @@
|
||||
|
||||
. /usr/lib/scylla/scylla_lib.sh
|
||||
|
||||
print_usage() {
|
||||
echo "scylla_ec2_check --nic eth0"
|
||||
echo " --nic specify NIC"
|
||||
exit 1
|
||||
}
|
||||
|
||||
get_en_interface_type() {
|
||||
TYPE=`curl -s http://169.254.169.254/latest/meta-data/instance-type|cut -d . -f 1`
|
||||
SUBTYPE=`curl -s http://169.254.169.254/latest/meta-data/instance-type|cut -d . -f 2`
|
||||
@@ -18,7 +24,7 @@ get_en_interface_type() {
|
||||
}
|
||||
|
||||
is_vpc_enabled() {
|
||||
MAC=`cat /sys/class/net/eth0/address`
|
||||
MAC=`cat /sys/class/net/$1/address`
|
||||
VPC_AVAIL=`curl -s http://169.254.169.254/latest/meta-data/network/interfaces/macs/$MAC/|grep vpc-id`
|
||||
[ -n "$VPC_AVAIL" ]
|
||||
}
|
||||
@@ -27,9 +33,27 @@ if ! is_ec2; then
|
||||
exit 0
|
||||
fi
|
||||
|
||||
if [ $# -eq 0 ]; then
|
||||
print_usage
|
||||
fi
|
||||
while [ $# -gt 0 ]; do
|
||||
case "$1" in
|
||||
"--nic")
|
||||
verify_args $@
|
||||
NIC="$2"
|
||||
shift 2
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
if ! is_valid_nic $NIC; then
|
||||
echo "NIC $NIC doesn't exist."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
TYPE=`curl -s http://169.254.169.254/latest/meta-data/instance-type`
|
||||
EN=`get_en_interface_type`
|
||||
DRIVER=`ethtool -i eth0|awk '/^driver:/ {print $2}'`
|
||||
DRIVER=`ethtool -i $NIC|awk '/^driver:/ {print $2}'`
|
||||
if [ "$EN" = "" ]; then
|
||||
tput setaf 1
|
||||
tput bold
|
||||
@@ -39,7 +63,7 @@ if [ "$EN" = "" ]; then
|
||||
echo "More documentation available at: "
|
||||
echo "http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/enhanced-networking.html#enabling_enhanced_networking"
|
||||
exit 1
|
||||
elif ! is_vpc_enabled; then
|
||||
elif ! is_vpc_enabled $NIC; then
|
||||
tput setaf 1
|
||||
tput bold
|
||||
echo "VPC is not enabled!"
|
||||
|
||||
4
dist/common/scripts/scylla_lib.sh
vendored
4
dist/common/scripts/scylla_lib.sh
vendored
@@ -91,6 +91,10 @@ create_perftune_conf() {
|
||||
/usr/lib/scylla/perftune.py --tune net --nic "$nic" $mode --dump-options-file > /etc/scylla.d/perftune.yaml
|
||||
}
|
||||
|
||||
is_valid_nic() {
|
||||
[ -d /sys/class/net/$1 ]
|
||||
}
|
||||
|
||||
. /etc/os-release
|
||||
if is_debian_variant || is_gentoo_variant; then
|
||||
SYSCONFIG=/etc/default
|
||||
|
||||
57
dist/common/scripts/scylla_setup
vendored
57
dist/common/scripts/scylla_setup
vendored
@@ -39,6 +39,27 @@ print_usage() {
|
||||
exit 1
|
||||
}
|
||||
|
||||
interactive_choose_nic() {
|
||||
NICS=$(for i in /sys/class/net/*;do nic=`basename $i`; if [ "$nic" != "lo" ]; then echo $nic; fi; done)
|
||||
NR_NICS=`echo $NICS|wc -w`
|
||||
if [ $NR_NICS -eq 0 ]; then
|
||||
echo "NIC not found."
|
||||
exit 1
|
||||
elif [ $NR_NICS -eq 1 ]; then
|
||||
NIC=$NICS
|
||||
else
|
||||
echo "Please select NIC from following list: "
|
||||
while true; do
|
||||
echo $NICS
|
||||
echo -n "> "
|
||||
read NIC
|
||||
if is_valid_nic $NIC; then
|
||||
break
|
||||
fi
|
||||
done
|
||||
fi
|
||||
}
|
||||
|
||||
interactive_ask_service() {
|
||||
echo $1
|
||||
echo $2
|
||||
@@ -112,14 +133,20 @@ run_setup_script() {
|
||||
name=$1
|
||||
shift 1
|
||||
$* &&:
|
||||
if [ $? -ne 0 ] && [ $INTERACTIVE -eq 1 ]; then
|
||||
printf "${RED}$name setup failed. press any key to continue...${NO_COLOR}\n"
|
||||
read
|
||||
return 1
|
||||
if [ $? -ne 0 ]; then
|
||||
if [ $INTERACTIVE -eq 1 ]; then
|
||||
printf "${RED}$name setup failed. press any key to continue...${NO_COLOR}\n"
|
||||
read
|
||||
return 1
|
||||
else
|
||||
printf "$name setup failed.\n"
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
return 0
|
||||
}
|
||||
|
||||
NIC="eth0"
|
||||
AMI=0
|
||||
SET_NIC=0
|
||||
DEV_MODE=0
|
||||
@@ -260,7 +287,8 @@ if is_ec2; then
|
||||
EC2_CHECK=$?
|
||||
fi
|
||||
if [ $EC2_CHECK -eq 1 ]; then
|
||||
/usr/lib/scylla/scylla_ec2_check
|
||||
interactive_choose_nic
|
||||
/usr/lib/scylla/scylla_ec2_check --nic $NIC
|
||||
fi
|
||||
fi
|
||||
|
||||
@@ -447,24 +475,6 @@ if [ $INTERACTIVE -eq 1 ]; then
|
||||
interactive_ask_service "Do you want to setup sysconfig?" "Answer yes to do system wide configuration customized for Scylla. Answer no to do nothing." "yes" &&:
|
||||
SYSCONFIG_SETUP=$?
|
||||
if [ $SYSCONFIG_SETUP -eq 1 ]; then
|
||||
NICS=$(for i in /sys/class/net/*;do nic=`basename $i`; if [ "$nic" != "lo" ]; then echo $nic; fi; done)
|
||||
NR_NICS=`echo $NICS|wc -w`
|
||||
if [ $NR_NICS -eq 0 ]; then
|
||||
echo "NIC not found."
|
||||
exit 1
|
||||
elif [ $NR_NICS -eq 1 ]; then
|
||||
NIC=$NICS
|
||||
else
|
||||
echo "Please select NIC from following list: "
|
||||
while true; do
|
||||
echo $NICS
|
||||
echo -n "> "
|
||||
read NIC
|
||||
if [ -e /sys/class/net/$NIC ]; then
|
||||
break
|
||||
fi
|
||||
done
|
||||
fi
|
||||
interactive_ask_service "Do you want to optimize NIC queue settings?" "Answer yes to enable network card optimization and improve performance. Answer no to skip this optimization." "yes" &&:
|
||||
SET_NIC=$?
|
||||
fi
|
||||
@@ -474,6 +484,7 @@ if [ $SYSCONFIG_SETUP -eq 1 ]; then
|
||||
if [ $SET_NIC -eq 1 ]; then
|
||||
SETUP_ARGS="--setup-nic"
|
||||
fi
|
||||
interactive_choose_nic
|
||||
run_setup_script "NIC queue" /usr/lib/scylla/scylla_sysconfig_setup --nic $NIC $SETUP_ARGS
|
||||
fi
|
||||
|
||||
|
||||
13
dist/debian/build_deb.sh
vendored
13
dist/debian/build_deb.sh
vendored
@@ -254,19 +254,18 @@ if [ "$TARGET" != "trusty" ]; then
|
||||
cp dist/common/systemd/node-exporter.service debian/scylla-server.node-exporter.service
|
||||
fi
|
||||
|
||||
sudo cp ./dist/debian/pbuilderrc ~root/.pbuilderrc
|
||||
if [ $NO_CLEAN -eq 0 ]; then
|
||||
sudo rm -fv /var/cache/pbuilder/scylla-server-$TARGET.tgz
|
||||
sudo -H DIST=$TARGET /usr/sbin/pbuilder clean
|
||||
sudo -H DIST=$TARGET /usr/sbin/pbuilder create --allow-untrusted
|
||||
sudo DIST=$TARGET /usr/sbin/pbuilder clean --configfile ./dist/debian/pbuilderrc
|
||||
sudo DIST=$TARGET /usr/sbin/pbuilder create --configfile ./dist/debian/pbuilderrc --allow-untrusted
|
||||
fi
|
||||
if [ $JOBS -ne 0 ]; then
|
||||
DEB_BUILD_OPTIONS="parallel=$JOBS"
|
||||
fi
|
||||
sudo -H DIST=$TARGET /usr/sbin/pbuilder update --allow-untrusted
|
||||
sudo -H DIST=$TARGET /usr/sbin/pbuilder update --configfile ./dist/debian/pbuilderrc --allow-untrusted
|
||||
if [ "$TARGET" = "trusty" ] || [ "$TARGET" = "xenial" ] || [ "$TARGET" = "yakkety" ] || [ "$TARGET" = "zesty" ] || [ "$TARGET" = "artful" ] || [ "$TARGET" = "bionic" ]; then
|
||||
sudo -H DIST=$TARGET /usr/sbin/pbuilder execute --save-after-exec dist/debian/ubuntu_enable_ppa.sh
|
||||
sudo DIST=$TARGET /usr/sbin/pbuilder execute --configfile ./dist/debian/pbuilderrc --save-after-exec dist/debian/ubuntu_enable_ppa.sh
|
||||
elif [ "$TARGET" = "jessie" ] || [ "$TARGET" = "stretch" ]; then
|
||||
sudo -H DIST=$TARGET /usr/sbin/pbuilder execute --save-after-exec dist/debian/debian_install_gpgkey.sh
|
||||
sudo DIST=$TARGET /usr/sbin/pbuilder execute --configfile ./dist/debian/pbuilderrc --save-after-exec dist/debian/debian_install_gpgkey.sh
|
||||
fi
|
||||
sudo -H DIST=$TARGET DEB_BUILD_OPTIONS=$DEB_BUILD_OPTIONS pdebuild --buildresult build/debs
|
||||
sudo -H DIST=$TARGET DEB_BUILD_OPTIONS=$DEB_BUILD_OPTIONS pdebuild --configfile ./dist/debian/pbuilderrc --buildresult build/debs
|
||||
|
||||
@@ -183,10 +183,7 @@ flat_mutation_reader make_delegating_reader(flat_mutation_reader& r) {
|
||||
flat_mutation_reader make_forwardable(flat_mutation_reader m) {
|
||||
class reader : public flat_mutation_reader::impl {
|
||||
flat_mutation_reader _underlying;
|
||||
position_range _current = {
|
||||
position_in_partition(position_in_partition::partition_start_tag_t()),
|
||||
position_in_partition(position_in_partition::after_static_row_tag_t())
|
||||
};
|
||||
position_range _current;
|
||||
mutation_fragment_opt _next;
|
||||
// When resolves, _next is engaged or _end_of_stream is set.
|
||||
future<> ensure_next() {
|
||||
@@ -201,7 +198,10 @@ flat_mutation_reader make_forwardable(flat_mutation_reader m) {
|
||||
});
|
||||
}
|
||||
public:
|
||||
reader(flat_mutation_reader r) : impl(r.schema()), _underlying(std::move(r)) { }
|
||||
reader(flat_mutation_reader r) : impl(r.schema()), _underlying(std::move(r)), _current({
|
||||
position_in_partition(position_in_partition::partition_start_tag_t()),
|
||||
position_in_partition(position_in_partition::after_static_row_tag_t())
|
||||
}) { }
|
||||
virtual future<> fill_buffer(db::timeout_clock::time_point timeout) override {
|
||||
return repeat([this] {
|
||||
if (is_buffer_full()) {
|
||||
@@ -621,37 +621,3 @@ make_flat_multi_range_reader(schema_ptr s, mutation_source source, const dht::pa
|
||||
return make_flat_mutation_reader<flat_multi_range_mutation_reader>(std::move(s), std::move(source), ranges,
|
||||
slice, pc, std::move(trace_state), fwd_mr);
|
||||
}
|
||||
|
||||
flat_mutation_reader
|
||||
make_flat_mutation_reader_from_fragments(schema_ptr schema, std::deque<mutation_fragment> fragments) {
|
||||
class reader : public flat_mutation_reader::impl {
|
||||
std::deque<mutation_fragment> _fragments;
|
||||
public:
|
||||
reader(schema_ptr schema, std::deque<mutation_fragment> fragments)
|
||||
: flat_mutation_reader::impl(std::move(schema))
|
||||
, _fragments(std::move(fragments)) {
|
||||
}
|
||||
virtual future<> fill_buffer(db::timeout_clock::time_point) override {
|
||||
while (!(_end_of_stream = _fragments.empty()) && !is_buffer_full()) {
|
||||
push_mutation_fragment(std::move(_fragments.front()));
|
||||
_fragments.pop_front();
|
||||
}
|
||||
return make_ready_future<>();
|
||||
}
|
||||
virtual void next_partition() override {
|
||||
clear_buffer_to_next_partition();
|
||||
if (is_buffer_empty()) {
|
||||
while (!(_end_of_stream = _fragments.empty()) && !_fragments.front().is_partition_start()) {
|
||||
_fragments.pop_front();
|
||||
}
|
||||
}
|
||||
}
|
||||
virtual future<> fast_forward_to(position_range pr, db::timeout_clock::time_point timeout) override {
|
||||
throw std::runtime_error("This reader can't be fast forwarded to another range.");
|
||||
}
|
||||
virtual future<> fast_forward_to(const dht::partition_range& pr, db::timeout_clock::time_point timeout) override {
|
||||
throw std::runtime_error("This reader can't be fast forwarded to another position.");
|
||||
}
|
||||
};
|
||||
return make_flat_mutation_reader<reader>(std::move(schema), std::move(fragments));
|
||||
}
|
||||
|
||||
@@ -32,8 +32,6 @@
|
||||
#include <seastar/util/gcc6-concepts.hh>
|
||||
#include "db/timeout_clock.hh"
|
||||
|
||||
#include <deque>
|
||||
|
||||
using seastar::future;
|
||||
|
||||
class mutation_source;
|
||||
@@ -557,9 +555,6 @@ make_flat_multi_range_reader(schema_ptr s, mutation_source source, const dht::pa
|
||||
tracing::trace_state_ptr trace_state = nullptr,
|
||||
flat_mutation_reader::partition_range_forwarding fwd_mr = flat_mutation_reader::partition_range_forwarding::yes);
|
||||
|
||||
flat_mutation_reader
|
||||
make_flat_mutation_reader_from_fragments(schema_ptr, std::deque<mutation_fragment>);
|
||||
|
||||
// Calls the consumer for each element of the reader's stream until end of stream
|
||||
// is reached or the consumer requests iteration to stop by returning stop_iteration::yes.
|
||||
// The consumer should accept mutation as the argument and return stop_iteration.
|
||||
|
||||
@@ -1005,7 +1005,7 @@ future<> gossiper::assassinate_endpoint(sstring address) {
|
||||
logger.warn("Assassinating {} via gossip", endpoint);
|
||||
if (es) {
|
||||
auto& ss = service::get_local_storage_service();
|
||||
auto tokens = ss.get_token_metadata().get_tokens(endpoint);
|
||||
tokens = ss.get_token_metadata().get_tokens(endpoint);
|
||||
if (tokens.empty()) {
|
||||
logger.warn("Unable to calculate tokens for {}. Will use a random one", address);
|
||||
throw std::runtime_error(sprint("Unable to calculate tokens for %s", endpoint));
|
||||
|
||||
@@ -1,77 +0,0 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright (C) 2015 ScyllaDB
|
||||
*
|
||||
* Modified by ScyllaDB
|
||||
*/
|
||||
|
||||
/*
|
||||
* This file is part of Scylla.
|
||||
*
|
||||
* Scylla is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Scylla is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with Scylla. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "core/sstring.hh"
|
||||
#include "seastarx.hh"
|
||||
|
||||
namespace db {
|
||||
namespace index {
|
||||
|
||||
/**
|
||||
* Abstract base class for different types of secondary indexes.
|
||||
*
|
||||
* Do not extend this directly, please pick from PerColumnSecondaryIndex or PerRowSecondaryIndex
|
||||
*/
|
||||
class secondary_index {
|
||||
public:
|
||||
static const sstring custom_index_option_name;
|
||||
|
||||
/**
|
||||
* The name of the option used to specify that the index is on the collection keys.
|
||||
*/
|
||||
static const sstring index_keys_option_name;
|
||||
|
||||
/**
|
||||
* The name of the option used to specify that the index is on the collection values.
|
||||
*/
|
||||
static const sstring index_values_option_name;
|
||||
|
||||
/**
|
||||
* The name of the option used to specify that the index is on the collection (map) entries.
|
||||
*/
|
||||
static const sstring index_entries_option_name;
|
||||
|
||||
};
|
||||
|
||||
}
|
||||
}
|
||||
@@ -42,7 +42,6 @@
|
||||
#include "index/secondary_index_manager.hh"
|
||||
|
||||
#include "cql3/statements/index_target.hh"
|
||||
#include "cql3/util.hh"
|
||||
#include "index/target_parser.hh"
|
||||
#include "db/query_context.hh"
|
||||
#include "schema_builder.hh"
|
||||
@@ -94,14 +93,11 @@ void secondary_index_manager::add_index(const index_metadata& im) {
|
||||
_indices.emplace(im.name(), index{index_target_name, im});
|
||||
}
|
||||
|
||||
static sstring index_table_name(const sstring& index_name) {
|
||||
return sprint("%s_index", index_name);
|
||||
}
|
||||
|
||||
view_ptr secondary_index_manager::create_view_for_index(const index_metadata& im) const {
|
||||
auto schema = _cf.schema();
|
||||
sstring index_table_name = sprint("%s_index", im.name());
|
||||
sstring index_target_name = im.options().at(cql3::statements::index_target::target_option_name);
|
||||
schema_builder builder{schema->ks_name(), index_table_name(im.name())};
|
||||
schema_builder builder{schema->ks_name(), index_table_name};
|
||||
auto target = target_parser::parse(schema, im);
|
||||
const auto* index_target = std::get<const column_definition*>(target);
|
||||
auto target_type = std::get<cql3::statements::index_target::target_type>(target);
|
||||
@@ -110,9 +106,6 @@ view_ptr secondary_index_manager::create_view_for_index(const index_metadata& im
|
||||
}
|
||||
builder.with_column(index_target->name(), index_target->type, column_kind::partition_key);
|
||||
for (auto& col : schema->partition_key_columns()) {
|
||||
if (col == *index_target) {
|
||||
continue;
|
||||
}
|
||||
builder.with_column(col.name(), col.type, column_kind::clustering_key);
|
||||
}
|
||||
for (auto& col : schema->clustering_key_columns()) {
|
||||
@@ -121,7 +114,7 @@ view_ptr secondary_index_manager::create_view_for_index(const index_metadata& im
|
||||
}
|
||||
builder.with_column(col.name(), col.type, column_kind::clustering_key);
|
||||
}
|
||||
const sstring where_clause = sprint("%s IS NOT NULL", cql3::util::maybe_quote(index_target_name));
|
||||
const sstring where_clause = sprint("%s IS NOT NULL", index_target_name);
|
||||
builder.with_view_info(*schema, false, where_clause);
|
||||
return view_ptr{builder.build()};
|
||||
}
|
||||
@@ -136,14 +129,4 @@ std::vector<index_metadata> secondary_index_manager::get_dependent_indices(const
|
||||
std::vector<index> secondary_index_manager::list_indexes() const {
|
||||
return boost::copy_range<std::vector<index>>(_indices | boost::adaptors::map_values);
|
||||
}
|
||||
|
||||
bool secondary_index_manager::is_index(view_ptr view) const {
|
||||
for (auto& i : list_indexes()) {
|
||||
if (view->cf_name() == index_table_name(i.metadata().name())) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -70,7 +70,6 @@ public:
|
||||
view_ptr create_view_for_index(const index_metadata& index) const;
|
||||
std::vector<index_metadata> get_dependent_indices(const column_definition& cdef) const;
|
||||
std::vector<index> list_indexes() const;
|
||||
bool is_index(view_ptr) const;
|
||||
private:
|
||||
void add_index(const index_metadata& im);
|
||||
};
|
||||
|
||||
@@ -79,6 +79,18 @@ struct target_parser {
|
||||
target_type = index_target::target_type::values;
|
||||
}
|
||||
|
||||
// in the case of a quoted column name the name in the target string
|
||||
// will be enclosed in quotes, which we need to unwrap. It may also
|
||||
// include quote characters internally, escaped like so:
|
||||
// abc"def -> abc""def.
|
||||
// Because the target string is stored in a CQL compatible form, we
|
||||
// need to un-escape any such quotes to get the actual column name
|
||||
static const sstring quote{"\""};
|
||||
if (boost::starts_with(target, quote)) {
|
||||
column_name = column_name.substr(1, column_name.length()-2);
|
||||
static const std::regex two_quotes("\"\"");
|
||||
column_name = std::regex_replace(std::string{column_name}, two_quotes, std::string{quote});
|
||||
}
|
||||
auto column = schema->get_column_definition(utf8_type->decompose(column_name));
|
||||
if (!column) {
|
||||
return stdx::nullopt;
|
||||
|
||||
4
init.cc
4
init.cc
@@ -34,8 +34,8 @@ logging::logger startlog("init");
|
||||
// duplicated in cql_test_env.cc
|
||||
// until proper shutdown is done.
|
||||
|
||||
void init_storage_service(distributed<database>& db, sharded<auth::service>& auth_service, sharded<db::system_distributed_keyspace>& sys_dist_ks) {
|
||||
service::init_storage_service(db, auth_service, sys_dist_ks).get();
|
||||
void init_storage_service(distributed<database>& db, sharded<auth::service>& auth_service) {
|
||||
service::init_storage_service(db, auth_service).get();
|
||||
// #293 - do not stop anything
|
||||
//engine().at_exit([] { return service::deinit_storage_service(); });
|
||||
}
|
||||
|
||||
3
init.hh
3
init.hh
@@ -25,7 +25,6 @@
|
||||
#include <seastar/core/distributed.hh>
|
||||
#include "auth/service.hh"
|
||||
#include "db/config.hh"
|
||||
#include "db/system_distributed_keyspace.hh"
|
||||
#include "database.hh"
|
||||
#include "log.hh"
|
||||
|
||||
@@ -37,7 +36,7 @@ extern logging::logger startlog;
|
||||
|
||||
class bad_configuration_error : public std::exception {};
|
||||
|
||||
void init_storage_service(distributed<database>& db, sharded<auth::service>&, sharded<db::system_distributed_keyspace>&);
|
||||
void init_storage_service(distributed<database>& db, sharded<auth::service>&);
|
||||
void init_ms_fd_gossiper(sstring listen_address
|
||||
, uint16_t storage_port
|
||||
, uint16_t ssl_storage_port
|
||||
|
||||
40
keys.hh
40
keys.hh
@@ -146,19 +146,6 @@ public:
|
||||
auto components(const schema& s) const {
|
||||
return components();
|
||||
}
|
||||
|
||||
bool is_empty() const {
|
||||
return _bytes.empty();
|
||||
}
|
||||
|
||||
explicit operator bool() const {
|
||||
return !is_empty();
|
||||
}
|
||||
|
||||
// For backward compatibility with existing code.
|
||||
bool is_empty(const schema& s) const {
|
||||
return is_empty();
|
||||
}
|
||||
};
|
||||
|
||||
template <typename TopLevel, typename TopLevelView>
|
||||
@@ -317,19 +304,6 @@ public:
|
||||
return get_compound_type(s)->end(_bytes);
|
||||
}
|
||||
|
||||
bool is_empty() const {
|
||||
return _bytes.empty();
|
||||
}
|
||||
|
||||
explicit operator bool() const {
|
||||
return !is_empty();
|
||||
}
|
||||
|
||||
// For backward compatibility with existing code.
|
||||
bool is_empty(const schema& s) const {
|
||||
return is_empty();
|
||||
}
|
||||
|
||||
// Returns a range of bytes_view
|
||||
auto components() const {
|
||||
return TopLevelView::compound::element_type::components(representation());
|
||||
@@ -542,6 +516,10 @@ public:
|
||||
bool is_full(const schema& s) const {
|
||||
return TopLevel::get_compound_type(s)->is_full(base::_bytes);
|
||||
}
|
||||
|
||||
bool is_empty(const schema& s) const {
|
||||
return TopLevel::get_compound_type(s)->is_empty(base::_bytes);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename TopLevel, typename TopLevelView, typename FullTopLevel>
|
||||
@@ -560,6 +538,10 @@ public:
|
||||
return TopLevel::get_compound_type(s)->is_full(base::_bytes);
|
||||
}
|
||||
|
||||
bool is_empty(const schema& s) const {
|
||||
return TopLevel::get_compound_type(s)->is_empty(base::_bytes);
|
||||
}
|
||||
|
||||
// Can be called only if is_full()
|
||||
FullTopLevel to_full(const schema& s) const {
|
||||
return FullTopLevel::from_exploded(s, base::explode(s));
|
||||
@@ -739,6 +721,10 @@ public:
|
||||
static const compound& get_compound_type(const schema& s) {
|
||||
return s.clustering_key_prefix_type();
|
||||
}
|
||||
|
||||
static clustering_key_prefix_view make_empty() {
|
||||
return { bytes_view() };
|
||||
}
|
||||
};
|
||||
|
||||
class clustering_key_prefix : public prefix_compound_wrapper<clustering_key_prefix, clustering_key_prefix_view, clustering_key> {
|
||||
@@ -817,4 +803,4 @@ struct appending_hash<clustering_key_prefix> {
|
||||
void operator()(Hasher& h, const clustering_key_prefix& ck, const schema& s) const {
|
||||
appending_hash<clustering_key_prefix_view>()(h, ck.view(), s);
|
||||
}
|
||||
};
|
||||
};
|
||||
@@ -119,9 +119,17 @@ insert_token_range_to_sorted_container_while_unwrapping(
|
||||
const dht::token& tok,
|
||||
dht::token_range_vector& ret) {
|
||||
if (prev_tok < tok) {
|
||||
ret.emplace_back(
|
||||
dht::token_range::bound(prev_tok, false),
|
||||
dht::token_range::bound(tok, true));
|
||||
auto pos = ret.end();
|
||||
if (!ret.empty() && !std::prev(pos)->end()) {
|
||||
// We inserted a wrapped range (a, b] previously as
|
||||
// (-inf, b], (a, +inf). So now we insert in the next-to-last
|
||||
// position to keep the last range (a, +inf) at the end.
|
||||
pos = std::prev(pos);
|
||||
}
|
||||
ret.insert(pos,
|
||||
dht::token_range{
|
||||
dht::token_range::bound(prev_tok, false),
|
||||
dht::token_range::bound(tok, true)});
|
||||
} else {
|
||||
ret.emplace_back(
|
||||
dht::token_range::bound(prev_tok, false),
|
||||
|
||||
55
main.cc
55
main.cc
@@ -35,12 +35,10 @@
|
||||
#include "service/load_broadcaster.hh"
|
||||
#include "streaming/stream_session.hh"
|
||||
#include "db/system_keyspace.hh"
|
||||
#include "db/system_distributed_keyspace.hh"
|
||||
#include "db/batchlog_manager.hh"
|
||||
#include "db/commitlog/commitlog.hh"
|
||||
#include "db/hints/manager.hh"
|
||||
#include "db/commitlog/commitlog_replayer.hh"
|
||||
#include "db/view/view_builder.hh"
|
||||
#include "utils/runtime.hh"
|
||||
#include "utils/file_lock.hh"
|
||||
#include "log.hh"
|
||||
@@ -471,11 +469,9 @@ int main(int ac, char** av) {
|
||||
ctx.http_server.listen(ipv4_addr{ip, api_port}).get();
|
||||
startlog.info("Scylla API server listening on {}:{} ...", api_address, api_port);
|
||||
static sharded<auth::service> auth_service;
|
||||
static sharded<db::system_distributed_keyspace> sys_dist_ks;
|
||||
supervisor::notify("initializing storage service");
|
||||
init_storage_service(db, auth_service, sys_dist_ks);
|
||||
init_storage_service(db, auth_service);
|
||||
supervisor::notify("starting per-shard database core");
|
||||
|
||||
// Note: changed from using a move here, because we want the config object intact.
|
||||
database_config dbcfg;
|
||||
auto make_sched_group = [&] (sstring name, unsigned shares) {
|
||||
@@ -514,24 +510,18 @@ int main(int ac, char** av) {
|
||||
db.local().get_config().data_file_directories().cend());
|
||||
directories.insert(db.local().get_config().commitlog_directory());
|
||||
|
||||
supervisor::notify("creating hints directories");
|
||||
if (hinted_handoff_enabled) {
|
||||
supervisor::notify("creating hints directories");
|
||||
using namespace boost::filesystem;
|
||||
|
||||
boost::filesystem::path hints_base_dir(db.local().get_config().hints_directory());
|
||||
dirs.touch_and_lock(db.local().get_config().hints_directory()).get();
|
||||
directories.insert(db.local().get_config().hints_directory());
|
||||
for (unsigned i = 0; i < smp::count; ++i) {
|
||||
sstring shard_dir((hints_base_dir / seastar::to_sstring(i).c_str()).native());
|
||||
dirs.touch_and_lock(shard_dir).get();
|
||||
directories.insert(std::move(shard_dir));
|
||||
}
|
||||
boost::filesystem::path view_pending_updates_base_dir = boost::filesystem::path(db.local().get_config().data_file_directories()[0]) / "view_pending_updates";
|
||||
sstring view_pending_updates_base_dir_str = view_pending_updates_base_dir.native();
|
||||
dirs.touch_and_lock(view_pending_updates_base_dir_str).get();
|
||||
directories.insert(view_pending_updates_base_dir_str);
|
||||
for (unsigned i = 0; i < smp::count; ++i) {
|
||||
sstring shard_dir((view_pending_updates_base_dir / seastar::to_sstring(i).c_str()).native());
|
||||
dirs.touch_and_lock(shard_dir).get();
|
||||
directories.insert(std::move(shard_dir));
|
||||
path hints_base_dir(db.local().get_config().hints_directory());
|
||||
dirs.touch_and_lock(db.local().get_config().hints_directory()).get();
|
||||
directories.insert(db.local().get_config().hints_directory());
|
||||
for (unsigned i = 0; i < smp::count; ++i) {
|
||||
sstring shard_dir((hints_base_dir / seastar::to_sstring(i).c_str()).native());
|
||||
dirs.touch_and_lock(shard_dir).get();
|
||||
directories.insert(std::move(shard_dir));
|
||||
}
|
||||
}
|
||||
|
||||
supervisor::notify("verifying directories");
|
||||
@@ -639,7 +629,7 @@ int main(int ac, char** av) {
|
||||
}
|
||||
// If the same sstable is shared by several shards, it cannot be
|
||||
// deleted until all shards decide to compact it. So we want to
|
||||
// start these compactions now. Note we start compacting only after
|
||||
// start thse compactions now. Note we start compacting only after
|
||||
// all sstables in this CF were loaded on all shards - otherwise
|
||||
// we will have races between the compaction and loading processes
|
||||
// We also want to trigger regular compaction on boot.
|
||||
@@ -707,17 +697,10 @@ int main(int ac, char** av) {
|
||||
gms::get_local_gossiper().wait_for_gossip_to_settle().get();
|
||||
api::set_server_gossip_settle(ctx).get();
|
||||
|
||||
supervisor::notify("starting hinted handoff manager");
|
||||
db::hints::manager::rebalance().get();
|
||||
proxy.invoke_on_all([] (service::storage_proxy& local_proxy) {
|
||||
local_proxy.start_hints_manager(gms::get_local_gossiper().shared_from_this());
|
||||
}).get();
|
||||
|
||||
static sharded<db::view::view_builder> view_builder;
|
||||
if (cfg->view_building()) {
|
||||
supervisor::notify("starting the view builder");
|
||||
view_builder.start(std::ref(db), std::ref(sys_dist_ks), std::ref(mm)).get();
|
||||
view_builder.invoke_on_all(&db::view::view_builder::start).get();
|
||||
if (hinted_handoff_enabled) {
|
||||
supervisor::notify("starting hinted handoff manager");
|
||||
db::hints::manager::rebalance().get();
|
||||
proxy.invoke_on_all([] (service::storage_proxy& local_proxy) { local_proxy.start_hints_manager(gms::get_local_gossiper().shared_from_this()); }).get();
|
||||
}
|
||||
|
||||
supervisor::notify("starting native transport");
|
||||
@@ -750,10 +733,6 @@ int main(int ac, char** av) {
|
||||
return service::get_local_storage_service().drain_on_shutdown();
|
||||
});
|
||||
|
||||
engine().at_exit([] {
|
||||
return view_builder.stop();
|
||||
});
|
||||
|
||||
engine().at_exit([&db] {
|
||||
return db.invoke_on_all([](auto& db) {
|
||||
return db.get_compaction_manager().stop();
|
||||
|
||||
@@ -215,7 +215,7 @@ public:
|
||||
}
|
||||
t.apply(current_tombstone);
|
||||
bool is_live = cr.marker().compact_and_expire(t.tomb(), _query_time, _can_gc, _gc_before);
|
||||
is_live |= cr.cells().compact_and_expire(_schema, column_kind::regular_column, t, _query_time, _can_gc, _gc_before, cr.marker());
|
||||
is_live |= cr.cells().compact_and_expire(_schema, column_kind::regular_column, t, _query_time, _can_gc, _gc_before);
|
||||
if (only_live() && is_live) {
|
||||
partition_is_not_empty(consumer);
|
||||
auto stop = consumer.consume(std::move(cr), t, true);
|
||||
|
||||
@@ -75,11 +75,6 @@ public:
|
||||
return !_t && _marker.is_missing() && _cells.empty();
|
||||
}
|
||||
|
||||
bool is_live(const schema& s, tombstone base_tombstone = tombstone(), gc_clock::time_point now = gc_clock::time_point::min()) const {
|
||||
base_tombstone.apply(_t.tomb());
|
||||
return _marker.is_live(base_tombstone, now) || _cells.is_live(s, column_kind::regular_column, base_tombstone, now);
|
||||
}
|
||||
|
||||
void apply(const schema& s, clustering_row&& cr) {
|
||||
_marker.apply(std::move(cr._marker));
|
||||
_t.apply(cr._t, _marker);
|
||||
@@ -143,10 +138,6 @@ public:
|
||||
return _cells.empty();
|
||||
}
|
||||
|
||||
bool is_live(const schema& s, gc_clock::time_point now = gc_clock::time_point::min()) const {
|
||||
return _cells.is_live(s, column_kind::static_column, tombstone(), now);
|
||||
}
|
||||
|
||||
void apply(const schema& s, const row& r) {
|
||||
_cells.apply(s, column_kind::static_column, r);
|
||||
}
|
||||
|
||||
@@ -35,7 +35,6 @@
|
||||
#include "intrusive_set_external_comparator.hh"
|
||||
#include "counters.hh"
|
||||
#include "row_cache.hh"
|
||||
#include "view_info.hh"
|
||||
#include <seastar/core/execution_stage.hh>
|
||||
|
||||
template<bool reversed>
|
||||
@@ -1090,7 +1089,7 @@ row::apply_monotonically(const column_definition& column, atomic_cell_or_collect
|
||||
if (_type == storage_type::vector && id < max_vector_size) {
|
||||
if (id >= _storage.vector.v.size()) {
|
||||
_storage.vector.v.resize(id);
|
||||
_storage.vector.v.emplace_back(cell_and_hash{std::move(value), std::move(hash)});
|
||||
_storage.vector.v.emplace_back(std::move(value), std::move(hash));
|
||||
_storage.vector.present.set(id);
|
||||
_size++;
|
||||
} else if (auto& cell_and_hash = _storage.vector.v[id]; !bool(cell_and_hash.cell)) {
|
||||
@@ -1265,8 +1264,8 @@ uint32_t mutation_partition::do_compact(const schema& s,
|
||||
deletable_row& row = e.row();
|
||||
row_tombstone tomb = tombstone_for_row(s, e);
|
||||
|
||||
bool is_live = row.marker().compact_and_expire(tomb.tomb(), query_time, can_gc, gc_before);
|
||||
is_live |= row.cells().compact_and_expire(s, column_kind::regular_column, tomb, query_time, can_gc, gc_before, row.marker());
|
||||
bool is_live = row.cells().compact_and_expire(s, column_kind::regular_column, tomb, query_time, can_gc, gc_before);
|
||||
is_live |= row.marker().compact_and_expire(tomb.tomb(), query_time, can_gc, gc_before);
|
||||
|
||||
if (should_purge_row_tombstone(row.deleted_at())) {
|
||||
row.remove_tombstone();
|
||||
@@ -1336,12 +1335,8 @@ deletable_row::is_live(const schema& s, tombstone base_tombstone, gc_clock::time
|
||||
// row is live. Otherwise, a row is considered live if it has any cell
|
||||
// which is live.
|
||||
base_tombstone.apply(_deleted_at.tomb());
|
||||
return _marker.is_live(base_tombstone, query_time) || _cells.is_live(s, column_kind::regular_column, base_tombstone, query_time);
|
||||
}
|
||||
|
||||
bool
|
||||
row::is_live(const schema& s, column_kind kind, tombstone base_tombstone, gc_clock::time_point query_time) const {
|
||||
return has_any_live_data(s, kind, *this, base_tombstone, query_time);
|
||||
return _marker.is_live(base_tombstone, query_time)
|
||||
|| has_any_live_data(s, column_kind::regular_column, _cells, base_tombstone, query_time);
|
||||
}
|
||||
|
||||
bool
|
||||
@@ -1553,30 +1548,9 @@ void row::apply_monotonically(const schema& s, column_kind kind, row&& other) {
|
||||
});
|
||||
}
|
||||
|
||||
// When views contain a primary key column that is not part of the base table primary key,
|
||||
// that column determines whether the row is live or not. We need to ensure that when that
|
||||
// cell is dead, and thus the derived row marker, either by normal deletion of by TTL, so
|
||||
// is the rest of the row. To ensure that none of the regular columns keep the row alive,
|
||||
// we erase the live cells according to the shadowable_tombstone rules.
|
||||
static bool dead_marker_shadows_row(const schema& s, column_kind kind, const row_marker& marker) {
|
||||
return s.is_view()
|
||||
&& s.view_info()->base_non_pk_column_in_view_pk()
|
||||
&& !marker.is_live()
|
||||
&& kind == column_kind::regular_column; // not applicable to static rows
|
||||
}
|
||||
|
||||
bool row::compact_and_expire(
|
||||
const schema& s,
|
||||
column_kind kind,
|
||||
row_tombstone tomb,
|
||||
gc_clock::time_point query_time,
|
||||
can_gc_fn& can_gc,
|
||||
gc_clock::time_point gc_before,
|
||||
const row_marker& marker)
|
||||
bool row::compact_and_expire(const schema& s, column_kind kind, row_tombstone tomb, gc_clock::time_point query_time,
|
||||
can_gc_fn& can_gc, gc_clock::time_point gc_before)
|
||||
{
|
||||
if (dead_marker_shadows_row(s, kind, marker)) {
|
||||
tomb.apply(shadowable_tombstone(api::max_timestamp, gc_clock::time_point::max()), row_marker());
|
||||
}
|
||||
bool any_live = false;
|
||||
remove_if([&] (column_id id, atomic_cell_or_collection& c) {
|
||||
bool erase = false;
|
||||
@@ -1618,17 +1592,6 @@ bool row::compact_and_expire(
|
||||
return any_live;
|
||||
}
|
||||
|
||||
bool row::compact_and_expire(
|
||||
const schema& s,
|
||||
column_kind kind,
|
||||
row_tombstone tomb,
|
||||
gc_clock::time_point query_time,
|
||||
can_gc_fn& can_gc,
|
||||
gc_clock::time_point gc_before) {
|
||||
row_marker m;
|
||||
return compact_and_expire(s, kind, tomb, query_time, can_gc, gc_before, m);
|
||||
}
|
||||
|
||||
deletable_row deletable_row::difference(const schema& s, column_kind kind, const deletable_row& other) const
|
||||
{
|
||||
deletable_row dr;
|
||||
@@ -1790,9 +1753,10 @@ void mutation_querier::query_static_row(const row& r, tombstone current_tombston
|
||||
} else if (_short_reads_allowed) {
|
||||
seastar::measuring_output_stream stream;
|
||||
ser::qr_partition__static_row__cells<seastar::measuring_output_stream> out(stream, { });
|
||||
auto start = stream.size();
|
||||
get_compacted_row_slice(_schema, slice, column_kind::static_column,
|
||||
r, slice.static_columns, _static_cells_wr);
|
||||
_memory_accounter.update(stream.size());
|
||||
r, slice.static_columns, out);
|
||||
_memory_accounter.update(stream.size() - start);
|
||||
}
|
||||
if (_pw.requested_digest()) {
|
||||
max_timestamp max_ts{_pw.last_modified()};
|
||||
@@ -1853,8 +1817,9 @@ stop_iteration mutation_querier::consume(clustering_row&& cr, row_tombstone curr
|
||||
} else if (_short_reads_allowed) {
|
||||
seastar::measuring_output_stream stream;
|
||||
ser::qr_partition__rows<seastar::measuring_output_stream> out(stream, { });
|
||||
auto start = stream.size();
|
||||
write_row(out);
|
||||
stop = _memory_accounter.update_and_check(stream.size());
|
||||
stop = _memory_accounter.update_and_check(stream.size() - start);
|
||||
}
|
||||
|
||||
_live_clustering_rows++;
|
||||
|
||||
@@ -74,6 +74,17 @@ using cell_hash_opt = seastar::optimized_optional<cell_hash>;
|
||||
struct cell_and_hash {
|
||||
atomic_cell_or_collection cell;
|
||||
mutable cell_hash_opt hash;
|
||||
|
||||
cell_and_hash() = default;
|
||||
cell_and_hash(cell_and_hash&&) noexcept = default;
|
||||
cell_and_hash& operator=(cell_and_hash&&) noexcept = default;
|
||||
cell_and_hash(const cell_and_hash&) = default;
|
||||
cell_and_hash& operator=(const cell_and_hash&) = default;
|
||||
|
||||
cell_and_hash(atomic_cell_or_collection&& cell, cell_hash_opt hash)
|
||||
: cell(std::move(cell))
|
||||
, hash(hash)
|
||||
{ }
|
||||
};
|
||||
|
||||
//
|
||||
@@ -314,22 +325,8 @@ public:
|
||||
// Expires cells based on query_time. Expires tombstones based on gc_before
|
||||
// and max_purgeable. Removes cells covered by tomb.
|
||||
// Returns true iff there are any live cells left.
|
||||
bool compact_and_expire(
|
||||
const schema& s,
|
||||
column_kind kind,
|
||||
row_tombstone tomb,
|
||||
gc_clock::time_point query_time,
|
||||
can_gc_fn&,
|
||||
gc_clock::time_point gc_before,
|
||||
const row_marker& marker);
|
||||
|
||||
bool compact_and_expire(
|
||||
const schema& s,
|
||||
column_kind kind,
|
||||
row_tombstone tomb,
|
||||
gc_clock::time_point query_time,
|
||||
can_gc_fn&,
|
||||
gc_clock::time_point gc_before);
|
||||
bool compact_and_expire(const schema& s, column_kind kind, row_tombstone tomb, gc_clock::time_point query_time,
|
||||
can_gc_fn&, gc_clock::time_point gc_before);
|
||||
|
||||
row difference(const schema&, column_kind, const row& other) const;
|
||||
|
||||
@@ -341,8 +338,6 @@ public:
|
||||
|
||||
void prepare_hash(const schema& s, column_kind kind) const;
|
||||
|
||||
bool is_live(const schema&, column_kind kind, tombstone tomb = tombstone(), gc_clock::time_point now = gc_clock::time_point::min()) const;
|
||||
|
||||
friend std::ostream& operator<<(std::ostream& os, const row& r);
|
||||
};
|
||||
|
||||
|
||||
@@ -273,6 +273,11 @@ public:
|
||||
return is_partition_end() || (_ck && _ck->is_empty(s) && _bound_weight > 0);
|
||||
}
|
||||
|
||||
bool is_before_all_clustered_rows(const schema& s) const {
|
||||
return _type < partition_region::clustered
|
||||
|| (_type == partition_region::clustered && _ck->is_empty(s) && _bound_weight < 0);
|
||||
}
|
||||
|
||||
template<typename Hasher>
|
||||
void feed_hash(Hasher& hasher, const schema& s) const {
|
||||
::feed_hash(hasher, _bound_weight);
|
||||
|
||||
81
querier.cc
81
querier.cc
@@ -152,34 +152,33 @@ const size_t querier_cache::max_queriers_memory_usage = memory::stats().total_me
|
||||
void querier_cache::scan_cache_entries() {
|
||||
const auto now = lowres_clock::now();
|
||||
|
||||
auto it = _meta_entries.begin();
|
||||
const auto end = _meta_entries.end();
|
||||
auto it = _entries.begin();
|
||||
const auto end = _entries.end();
|
||||
while (it != end && it->is_expired(now)) {
|
||||
if (*it) {
|
||||
++_stats.time_based_evictions;
|
||||
}
|
||||
it = _meta_entries.erase(it);
|
||||
_stats.population = _entries.size();
|
||||
++_stats.time_based_evictions;
|
||||
--_stats.population;
|
||||
it = _entries.erase(it);
|
||||
}
|
||||
}
|
||||
|
||||
querier_cache::entries::iterator querier_cache::find_querier(utils::UUID key, const dht::partition_range& range, tracing::trace_state_ptr trace_state) {
|
||||
const auto queriers = _entries.equal_range(key);
|
||||
const auto queriers = _index.equal_range(key);
|
||||
|
||||
if (queriers.first == _entries.end()) {
|
||||
if (queriers.first == _index.end()) {
|
||||
tracing::trace(trace_state, "Found no cached querier for key {}", key);
|
||||
return _entries.end();
|
||||
}
|
||||
|
||||
const auto it = std::find_if(queriers.first, queriers.second, [&] (const std::pair<const utils::UUID, entry>& elem) {
|
||||
return elem.second.get().matches(range);
|
||||
const auto it = std::find_if(queriers.first, queriers.second, [&] (const entry& e) {
|
||||
return e.value().matches(range);
|
||||
});
|
||||
|
||||
if (it == queriers.second) {
|
||||
tracing::trace(trace_state, "Found cached querier(s) for key {} but none matches the query range {}", key, range);
|
||||
return _entries.end();
|
||||
}
|
||||
tracing::trace(trace_state, "Found cached querier for key {} and range {}", key, range);
|
||||
return it;
|
||||
return it->pos();
|
||||
}
|
||||
|
||||
querier_cache::querier_cache(std::chrono::seconds entry_ttl)
|
||||
@@ -199,8 +198,7 @@ void querier_cache::insert(utils::UUID key, querier&& q, tracing::trace_state_pt
|
||||
|
||||
tracing::trace(trace_state, "Caching querier with key {}", key);
|
||||
|
||||
auto memory_usage = boost::accumulate(
|
||||
_entries | boost::adaptors::map_values | boost::adaptors::transformed(std::mem_fn(&querier_cache::entry::memory_usage)), size_t(0));
|
||||
auto memory_usage = boost::accumulate(_entries | boost::adaptors::transformed(std::mem_fn(&entry::memory_usage)), size_t(0));
|
||||
|
||||
// We add the memory-usage of the to-be added querier to the memory-usage
|
||||
// of all the cached queriers. We now need to makes sure this number is
|
||||
@@ -210,20 +208,20 @@ void querier_cache::insert(utils::UUID key, querier&& q, tracing::trace_state_pt
|
||||
memory_usage += q.memory_usage();
|
||||
|
||||
if (memory_usage >= max_queriers_memory_usage) {
|
||||
auto it = _meta_entries.begin();
|
||||
const auto end = _meta_entries.end();
|
||||
auto it = _entries.begin();
|
||||
const auto end = _entries.end();
|
||||
while (it != end && memory_usage >= max_queriers_memory_usage) {
|
||||
if (*it) {
|
||||
++_stats.memory_based_evictions;
|
||||
memory_usage -= it->get_entry().memory_usage();
|
||||
}
|
||||
it = _meta_entries.erase(it);
|
||||
++_stats.memory_based_evictions;
|
||||
memory_usage -= it->memory_usage();
|
||||
--_stats.population;
|
||||
it = _entries.erase(it);
|
||||
}
|
||||
}
|
||||
|
||||
const auto it = _entries.emplace(key, entry::param{std::move(q), _entry_ttl}).first;
|
||||
_meta_entries.emplace_back(_entries, it);
|
||||
_stats.population = _entries.size();
|
||||
auto& e = _entries.emplace_back(key, std::move(q), lowres_clock::now() + _entry_ttl);
|
||||
e.set_pos(--_entries.end());
|
||||
_index.insert(e);
|
||||
++_stats.population;
|
||||
}
|
||||
|
||||
querier querier_cache::lookup(utils::UUID key,
|
||||
@@ -240,9 +238,9 @@ querier querier_cache::lookup(utils::UUID key,
|
||||
return create_fun();
|
||||
}
|
||||
|
||||
auto q = std::move(it->second).get();
|
||||
auto q = std::move(*it).value();
|
||||
_entries.erase(it);
|
||||
_stats.population = _entries.size();
|
||||
--_stats.population;
|
||||
|
||||
const auto can_be_used = q.can_be_used_for_page(only_live, s, range, slice);
|
||||
if (can_be_used == querier::can_use::yes) {
|
||||
@@ -265,25 +263,24 @@ bool querier_cache::evict_one() {
|
||||
return false;
|
||||
}
|
||||
|
||||
auto it = _meta_entries.begin();
|
||||
const auto end = _meta_entries.end();
|
||||
while (it != end) {
|
||||
const auto is_live = bool(*it);
|
||||
it = _meta_entries.erase(it);
|
||||
_stats.population = _entries.size();
|
||||
if (is_live) {
|
||||
++_stats.resource_based_evictions;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
++_stats.resource_based_evictions;
|
||||
--_stats.population;
|
||||
_entries.pop_front();
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void querier_cache::evict_all_for_table(const utils::UUID& schema_id) {
|
||||
_meta_entries.remove_if([&] (const meta_entry& me) {
|
||||
return !me || me.get_entry().get().schema()->id() == schema_id;
|
||||
});
|
||||
_stats.population = _entries.size();
|
||||
auto it = _entries.begin();
|
||||
const auto end = _entries.end();
|
||||
while (it != end) {
|
||||
if (it->schema().id() == schema_id) {
|
||||
--_stats.population;
|
||||
it = _entries.erase(it);
|
||||
} else {
|
||||
++it;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
querier_cache_context::querier_cache_context(querier_cache& cache, utils::UUID key, bool is_first_page)
|
||||
|
||||
103
querier.hh
103
querier.hh
@@ -24,7 +24,8 @@
|
||||
#include "mutation_compactor.hh"
|
||||
#include "mutation_reader.hh"
|
||||
|
||||
#include <seastar/core/weak_ptr.hh>
|
||||
#include <boost/intrusive/set.hpp>
|
||||
|
||||
#include <variant>
|
||||
|
||||
/// One-stop object for serving queries.
|
||||
@@ -264,75 +265,65 @@ public:
|
||||
};
|
||||
|
||||
private:
|
||||
class entry : public weakly_referencable<entry> {
|
||||
querier _querier;
|
||||
lowres_clock::time_point _expires;
|
||||
public:
|
||||
// Since entry cannot be moved and unordered_map::emplace can pass only
|
||||
// a single param to it's mapped-type we need to force a single-param
|
||||
// constructor for entry. Oh C++...
|
||||
struct param {
|
||||
querier q;
|
||||
std::chrono::seconds ttl;
|
||||
};
|
||||
class entry : public boost::intrusive::set_base_hook<boost::intrusive::link_mode<boost::intrusive::auto_unlink>> {
|
||||
// Self reference so that we can remove the entry given an `entry&`.
|
||||
std::list<entry>::iterator _pos;
|
||||
const utils::UUID _key;
|
||||
const lowres_clock::time_point _expires;
|
||||
querier _value;
|
||||
|
||||
explicit entry(param p)
|
||||
: _querier(std::move(p.q))
|
||||
, _expires(lowres_clock::now() + p.ttl) {
|
||||
public:
|
||||
entry(utils::UUID key, querier q, lowres_clock::time_point expires)
|
||||
: _key(key)
|
||||
, _expires(expires)
|
||||
, _value(std::move(q)) {
|
||||
}
|
||||
|
||||
std::list<entry>::iterator pos() const {
|
||||
return _pos;
|
||||
}
|
||||
|
||||
void set_pos(std::list<entry>::iterator pos) {
|
||||
_pos = pos;
|
||||
}
|
||||
|
||||
const utils::UUID& key() const {
|
||||
return _key;
|
||||
}
|
||||
|
||||
const ::schema& schema() const {
|
||||
return *_value.schema();
|
||||
}
|
||||
|
||||
bool is_expired(const lowres_clock::time_point& now) const {
|
||||
return _expires <= now;
|
||||
}
|
||||
|
||||
const querier& get() const & {
|
||||
return _querier;
|
||||
}
|
||||
|
||||
querier&& get() && {
|
||||
return std::move(_querier);
|
||||
}
|
||||
|
||||
size_t memory_usage() const {
|
||||
return _querier.memory_usage();
|
||||
return _value.memory_usage();
|
||||
}
|
||||
|
||||
const querier& value() const & {
|
||||
return _value;
|
||||
}
|
||||
|
||||
querier value() && {
|
||||
return std::move(_value);
|
||||
}
|
||||
};
|
||||
|
||||
using entries = std::unordered_map<utils::UUID, entry>;
|
||||
|
||||
class meta_entry {
|
||||
entries& _entries;
|
||||
weak_ptr<entry> _entry_ptr;
|
||||
entries::iterator _entry_it;
|
||||
|
||||
public:
|
||||
meta_entry(entries& e, entries::iterator it)
|
||||
: _entries(e)
|
||||
, _entry_ptr(it->second.weak_from_this())
|
||||
, _entry_it(it) {
|
||||
}
|
||||
|
||||
~meta_entry() {
|
||||
if (_entry_ptr) {
|
||||
_entries.erase(_entry_it);
|
||||
}
|
||||
}
|
||||
|
||||
bool is_expired(const lowres_clock::time_point& now) const {
|
||||
return !_entry_ptr || _entry_ptr->is_expired(now);
|
||||
}
|
||||
|
||||
explicit operator bool() const {
|
||||
return bool(_entry_ptr);
|
||||
}
|
||||
|
||||
const entry& get_entry() const {
|
||||
return *_entry_ptr;
|
||||
}
|
||||
struct key_of_entry {
|
||||
using type = utils::UUID;
|
||||
const type& operator()(const entry& e) { return e.key(); }
|
||||
};
|
||||
|
||||
using entries = std::list<entry>;
|
||||
using index = boost::intrusive::multiset<entry, boost::intrusive::key_of_value<key_of_entry>,
|
||||
boost::intrusive::constant_time_size<false>>;
|
||||
|
||||
private:
|
||||
entries _entries;
|
||||
std::list<meta_entry> _meta_entries;
|
||||
index _index;
|
||||
timer<lowres_clock> _expiry_timer;
|
||||
std::chrono::seconds _entry_ttl;
|
||||
stats _stats;
|
||||
|
||||
@@ -871,11 +871,6 @@ schema_builder& schema_builder::without_index(const sstring& name) {
|
||||
return *this;
|
||||
}
|
||||
|
||||
schema_builder& schema_builder::without_indexes() {
|
||||
_raw._indices_by_name.clear();
|
||||
return *this;
|
||||
}
|
||||
|
||||
schema_ptr schema_builder::build() {
|
||||
schema::raw_schema new_raw = _raw; // Copy so that build() remains idempotent.
|
||||
|
||||
|
||||
@@ -261,7 +261,6 @@ public:
|
||||
|
||||
schema_builder& with_index(const index_metadata& im);
|
||||
schema_builder& without_index(const sstring& name);
|
||||
schema_builder& without_indexes();
|
||||
|
||||
default_names get_default_names() const {
|
||||
return default_names(_raw);
|
||||
|
||||
2
seastar
2
seastar
Submodule seastar updated: a9812cc5a9...88cb58cfbf
@@ -73,26 +73,6 @@ public:
|
||||
virtual void on_drop_function(const sstring& ks_name, const sstring& function_name) = 0;
|
||||
virtual void on_drop_aggregate(const sstring& ks_name, const sstring& aggregate_name) = 0;
|
||||
virtual void on_drop_view(const sstring& ks_name, const sstring& view_name) = 0;
|
||||
|
||||
class only_view_notifications;
|
||||
};
|
||||
|
||||
class migration_listener::only_view_notifications : public migration_listener {
|
||||
virtual void on_create_keyspace(const sstring& ks_name) { }
|
||||
virtual void on_create_column_family(const sstring& ks_name, const sstring& cf_name) { }
|
||||
virtual void on_create_user_type(const sstring& ks_name, const sstring& type_name) { }
|
||||
virtual void on_create_function(const sstring& ks_name, const sstring& function_name) { }
|
||||
virtual void on_create_aggregate(const sstring& ks_name, const sstring& aggregate_name) { }
|
||||
virtual void on_update_keyspace(const sstring& ks_name) { }
|
||||
virtual void on_update_column_family(const sstring& ks_name, const sstring& cf_name, bool columns_changed) { }
|
||||
virtual void on_update_user_type(const sstring& ks_name, const sstring& type_name) { }
|
||||
virtual void on_update_function(const sstring& ks_name, const sstring& function_name) { }
|
||||
virtual void on_update_aggregate(const sstring& ks_name, const sstring& aggregate_name) { }
|
||||
virtual void on_drop_keyspace(const sstring& ks_name) { }
|
||||
virtual void on_drop_column_family(const sstring& ks_name, const sstring& cf_name) { }
|
||||
virtual void on_drop_user_type(const sstring& ks_name, const sstring& type_name) { }
|
||||
virtual void on_drop_function(const sstring& ks_name, const sstring& function_name) { }
|
||||
virtual void on_drop_aggregate(const sstring& ks_name, const sstring& aggregate_name) { }
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
@@ -48,8 +48,6 @@
|
||||
#include "service/migration_task.hh"
|
||||
#include "utils/runtime.hh"
|
||||
#include "gms/gossiper.hh"
|
||||
#include "view_info.hh"
|
||||
#include "schema_builder.hh"
|
||||
|
||||
namespace service {
|
||||
|
||||
@@ -666,33 +664,19 @@ future<> migration_manager::announce_column_family_drop(const sstring& ks_name,
|
||||
throw exceptions::invalid_request_exception("Cannot use DROP TABLE on Materialized View");
|
||||
}
|
||||
auto&& views = old_cfm.views();
|
||||
if (views.size() > schema->all_indices().size()) {
|
||||
auto explicit_view_names = views
|
||||
| boost::adaptors::filtered([&old_cfm](const view_ptr& v) { return !old_cfm.get_index_manager().is_index(v); })
|
||||
| boost::adaptors::transformed([](const view_ptr& v) { return v->cf_name(); });
|
||||
if (!views.empty()) {
|
||||
throw exceptions::invalid_request_exception(sprint(
|
||||
"Cannot drop table when materialized views still depend on it (%s.{%s})",
|
||||
ks_name, ::join(", ", explicit_view_names)));
|
||||
ks_name, ::join(", ", views | boost::adaptors::transformed([](auto&& v) { return v->cf_name(); }))));
|
||||
}
|
||||
mlogger.info("Drop table '{}.{}'", schema->ks_name(), schema->cf_name());
|
||||
|
||||
auto maybe_drop_secondary_indexes = make_ready_future<std::vector<mutation>>();
|
||||
if (!schema->all_indices().empty()) {
|
||||
auto builder = schema_builder(schema).without_indexes();
|
||||
maybe_drop_secondary_indexes = db::schema_tables::make_update_table_mutations(db.find_keyspace(ks_name).metadata(), schema, builder.build(), api::new_timestamp(), false);
|
||||
}
|
||||
|
||||
return maybe_drop_secondary_indexes.then([announce_locally, ks_name, schema, &db, &old_cfm] (auto&& drop_si_mutations) {
|
||||
return db::schema_tables::make_drop_table_mutations(db.find_keyspace(ks_name).metadata(), schema, api::new_timestamp())
|
||||
.then([drop_si_mutations = std::move(drop_si_mutations), announce_locally] (auto&& mutations) mutable {
|
||||
mutations.insert(mutations.end(), std::make_move_iterator(drop_si_mutations.begin()), std::make_move_iterator(drop_si_mutations.end()));
|
||||
return announce(std::move(mutations), announce_locally);
|
||||
});
|
||||
});
|
||||
return db::schema_tables::make_drop_table_mutations(db.find_keyspace(ks_name).metadata(), schema, api::new_timestamp())
|
||||
.then([announce_locally] (auto&& mutations) {
|
||||
return announce(std::move(mutations), announce_locally);
|
||||
});
|
||||
} catch (const no_such_column_family& e) {
|
||||
throw exceptions::configuration_exception(sprint("Cannot drop non existing table '%s' in keyspace '%s'.", cf_name, ks_name));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
future<> migration_manager::announce_type_drop(user_type dropped_type, bool announce_locally)
|
||||
@@ -763,9 +747,6 @@ future<> migration_manager::announce_view_drop(const sstring& ks_name,
|
||||
if (!view->is_view()) {
|
||||
throw exceptions::invalid_request_exception("Cannot use DROP MATERIALIZED VIEW on Table");
|
||||
}
|
||||
if (db.find_column_family(view->view_info()->base_id()).get_index_manager().is_index(view_ptr(view))) {
|
||||
throw exceptions::invalid_request_exception("Cannot use DROP MATERIALIZED VIEW on Index");
|
||||
}
|
||||
auto keyspace = db.find_keyspace(ks_name).metadata();
|
||||
mlogger.info("Drop view '{}.{}'", view->ks_name(), view->cf_name());
|
||||
return db::schema_tables::make_drop_view_mutations(std::move(keyspace), view_ptr(std::move(view)), api::new_timestamp())
|
||||
|
||||
@@ -144,7 +144,11 @@ future<lowres_clock::duration> cache_hitrate_calculator::recalculate_hitrates()
|
||||
return _db.invoke_on_all([this, rates = std::move(rates), cpuid = engine().cpu_id()] (database& db) {
|
||||
sstring gstate;
|
||||
for (auto& cf : db.get_column_families() | boost::adaptors::filtered(non_system_filter)) {
|
||||
stat s = rates.at(cf.first);
|
||||
auto it = rates.find(cf.first);
|
||||
if (it == rates.end()) { // a table may be added before map/reduce compltes and this code runs
|
||||
continue;
|
||||
}
|
||||
stat s = it->second;
|
||||
float rate = 0;
|
||||
if (s.h) {
|
||||
rate = s.h / (s.h + s.m);
|
||||
|
||||
@@ -83,7 +83,7 @@ private:
|
||||
_last_replicas = state->get_last_replicas();
|
||||
} else {
|
||||
// Reusing readers is currently only supported for singular queries.
|
||||
if (_ranges.front().is_singular()) {
|
||||
if (!_ranges.empty() && query::is_single_partition(_ranges.front())) {
|
||||
_cmd->query_uuid = utils::make_random_uuid();
|
||||
}
|
||||
_cmd->is_first_page = true;
|
||||
|
||||
@@ -457,7 +457,7 @@ storage_proxy::response_id_type storage_proxy::register_response_handler(shared_
|
||||
// we are here because either cl was achieved, but targets left in the handler are not
|
||||
// responding, so a hint should be written for them, or cl == any in which case
|
||||
// hints are counted towards consistency, so we need to write hints and count how much was written
|
||||
auto hints = hint_to_dead_endpoints(e.handler->_mutation_holder, e.handler->get_targets(), e.handler->_type, e.handler->get_trace_state());
|
||||
auto hints = hint_to_dead_endpoints(e.handler->_mutation_holder, e.handler->get_targets(), e.handler->get_trace_state());
|
||||
e.handler->signal(hints);
|
||||
if (e.handler->_cl == db::consistency_level::ANY && hints) {
|
||||
slogger.trace("Wrote hint to satisfy CL.ANY after no replicas acknowledged the write");
|
||||
@@ -665,17 +665,15 @@ storage_proxy::storage_proxy(distributed<database>& db, stdx::optional<std::vect
|
||||
|
||||
});
|
||||
|
||||
_hints_enabled_for_user_writes = bool(hinted_handoff_enabled);
|
||||
if (!hinted_handoff_enabled) {
|
||||
hinted_handoff_enabled.emplace();
|
||||
}
|
||||
supervisor::notify("creating hints manager");
|
||||
slogger.trace("hinted DCs: {}", *hinted_handoff_enabled);
|
||||
if (hinted_handoff_enabled) {
|
||||
supervisor::notify("creating hints manager");
|
||||
slogger.trace("hinted DCs: {}", *hinted_handoff_enabled);
|
||||
|
||||
const db::config& cfg = _db.local().get_config();
|
||||
// Give each hints manager 10% of the available disk space. Give each shard an equal share of the available space.
|
||||
db::hints::resource_manager::max_shard_disk_space_size = boost::filesystem::space(cfg.hints_directory().c_str()).capacity / (10 * smp::count);
|
||||
_hints_manager.emplace(cfg.hints_directory(), *hinted_handoff_enabled, cfg.max_hint_window_in_ms(), _db);
|
||||
const db::config& cfg = _db.local().get_config();
|
||||
// Give each hints manager 10% of the available disk space. Give each shard an equal share of the available space.
|
||||
db::hints::manager::max_shard_disk_space_size = boost::filesystem::space(cfg.hints_directory().c_str()).capacity / (10 * smp::count);
|
||||
_hints_manager.emplace(cfg.hints_directory(), *hinted_handoff_enabled, cfg.max_hint_window_in_ms(), _db);
|
||||
}
|
||||
}
|
||||
|
||||
storage_proxy::rh_entry::rh_entry(shared_ptr<abstract_write_response_handler>&& h, std::function<void()>&& cb) : handler(std::move(h)), expire_timer(std::move(cb)) {}
|
||||
@@ -1160,7 +1158,7 @@ storage_proxy::create_write_response_handler(const mutation& m, db::consistency_
|
||||
|
||||
auto all = boost::range::join(natural_endpoints, pending_endpoints);
|
||||
|
||||
if (cannot_hint(all, type)) {
|
||||
if (std::find_if(all.begin(), all.end(), std::bind1st(std::mem_fn(&storage_proxy::cannot_hint), this)) != all.end()) {
|
||||
// avoid OOMing due to excess hints. we need to do this check even for "live" nodes, since we can
|
||||
// still generate hints for those if it's overloaded or simply dead but not yet known-to-be-dead.
|
||||
// The idea is that if we have over maxHintsInProgress hints in flight, this is probably due to
|
||||
@@ -1204,7 +1202,7 @@ void
|
||||
storage_proxy::hint_to_dead_endpoints(response_id_type id, db::consistency_level cl) {
|
||||
auto& h = *get_write_response_handler(id);
|
||||
|
||||
size_t hints = hint_to_dead_endpoints(h._mutation_holder, h.get_dead_endpoints(), h._type, h.get_trace_state());
|
||||
size_t hints = hint_to_dead_endpoints(h._mutation_holder, h.get_dead_endpoints(), h.get_trace_state());
|
||||
|
||||
if (cl == db::consistency_level::ANY) {
|
||||
// for cl==ANY hints are counted towards consistency
|
||||
@@ -1560,41 +1558,21 @@ storage_proxy::mutate_atomically(std::vector<mutation> mutations, db::consistenc
|
||||
});
|
||||
}
|
||||
|
||||
template<typename Range>
|
||||
bool storage_proxy::cannot_hint(const Range& targets, db::write_type type) {
|
||||
bool storage_proxy::cannot_hint(gms::inet_address target) {
|
||||
// if hints are disabled we "can always hint" since there's going to be no hint generated in this case
|
||||
return hints_enabled(type) && boost::algorithm::any_of(targets, std::bind(&db::hints::manager::too_many_in_flight_hints_for, &*_hints_manager, std::placeholders::_1));
|
||||
return hints_enabled() && _hints_manager->too_many_in_flight_hints_for(target);
|
||||
}
|
||||
|
||||
future<> storage_proxy::send_to_endpoint(
|
||||
mutation m,
|
||||
gms::inet_address target,
|
||||
std::vector<gms::inet_address> pending_endpoints,
|
||||
db::write_type type) {
|
||||
future<> storage_proxy::send_to_endpoint(mutation m, gms::inet_address target, db::write_type type) {
|
||||
utils::latency_counter lc;
|
||||
lc.start();
|
||||
|
||||
// View updates use consistency level ANY in order to fall back to hinted handoff in case of a failed update
|
||||
db::consistency_level cl = (type == db::write_type::VIEW) ? db::consistency_level::ANY : db::consistency_level::ONE;
|
||||
std::unordered_set<gms::inet_address> targets(pending_endpoints.begin(), pending_endpoints.end());
|
||||
targets.insert(std::move(target));
|
||||
return mutate_prepare(std::array<mutation, 1>{std::move(m)}, cl, type,
|
||||
[this, targets = std::move(targets), pending_endpoints = std::move(pending_endpoints)] (
|
||||
const mutation& m,
|
||||
db::consistency_level cl,
|
||||
db::write_type type) mutable {
|
||||
return mutate_prepare(std::array<mutation, 1>{std::move(m)}, db::consistency_level::ONE, type,
|
||||
[this, target] (const mutation& m, db::consistency_level cl, db::write_type type) {
|
||||
auto& ks = _db.local().find_keyspace(m.schema()->ks_name());
|
||||
return create_write_response_handler(
|
||||
ks,
|
||||
cl,
|
||||
type,
|
||||
std::make_unique<shared_mutation>(m),
|
||||
std::move(targets),
|
||||
pending_endpoints,
|
||||
{ },
|
||||
nullptr);
|
||||
}).then([this, cl] (std::vector<unique_response_handler> ids) {
|
||||
return mutate_begin(std::move(ids), cl);
|
||||
return create_write_response_handler(ks, cl, type, std::make_unique<shared_mutation>(m), {target}, {}, {}, nullptr);
|
||||
}).then([this] (std::vector<unique_response_handler> ids) {
|
||||
return mutate_begin(std::move(ids), db::consistency_level::ONE);
|
||||
}).then_wrapped([p = shared_from_this(), lc] (future<>&& f) {
|
||||
return p->mutate_end(std::move(f), lc, nullptr);
|
||||
});
|
||||
@@ -1715,9 +1693,9 @@ void storage_proxy::send_to_live_endpoints(storage_proxy::response_id_type respo
|
||||
|
||||
// returns number of hints stored
|
||||
template<typename Range>
|
||||
size_t storage_proxy::hint_to_dead_endpoints(std::unique_ptr<mutation_holder>& mh, const Range& targets, db::write_type type, tracing::trace_state_ptr tr_state) noexcept
|
||||
size_t storage_proxy::hint_to_dead_endpoints(std::unique_ptr<mutation_holder>& mh, const Range& targets, tracing::trace_state_ptr tr_state) noexcept
|
||||
{
|
||||
if (hints_enabled(type)) {
|
||||
if (hints_enabled()) {
|
||||
return boost::count_if(targets, [this, &mh, tr_state = std::move(tr_state)] (gms::inet_address target) mutable -> bool {
|
||||
return _hints_manager->store_hint(target, mh->schema(), mh->get_mutation_for(target), tr_state);
|
||||
});
|
||||
@@ -3242,9 +3220,22 @@ storage_proxy::query_partition_key_range(lw_shared_ptr<query::read_command> cmd,
|
||||
slogger.debug("Estimated result rows per range: {}; requested rows: {}, ranges.size(): {}; concurrent range requests: {}",
|
||||
result_rows_per_range, cmd->row_limit, ranges.size(), concurrency_factor);
|
||||
|
||||
// The call to `query_partition_key_range_concurrent()` below
|
||||
// updates `cmd` directly when processing the results. Under
|
||||
// some circumstances, when the query executes without deferring,
|
||||
// this updating will happen before the lambda object is constructed
|
||||
// and hence the updates will be visible to the lambda. This will
|
||||
// result in the merger below trimming the results according to the
|
||||
// updated (decremented) limits and causing the paging logic to
|
||||
// declare the query exhausted due to the non-full page. To avoid
|
||||
// this save the original values of the limits here and pass these
|
||||
// to the lambda below.
|
||||
const auto row_limit = cmd->row_limit;
|
||||
const auto partition_limit = cmd->partition_limit;
|
||||
|
||||
return query_partition_key_range_concurrent(timeout, std::move(results), cmd, cl, ranges.begin(), std::move(ranges), concurrency_factor,
|
||||
std::move(trace_state), cmd->row_limit, cmd->partition_limit)
|
||||
.then([row_limit = cmd->row_limit, partition_limit = cmd->partition_limit](std::vector<foreign_ptr<lw_shared_ptr<query::result>>> results) {
|
||||
.then([row_limit, partition_limit](std::vector<foreign_ptr<lw_shared_ptr<query::result>>> results) {
|
||||
query::result_merger merger(row_limit, partition_limit);
|
||||
merger.reserve(results.size());
|
||||
|
||||
@@ -3573,8 +3564,8 @@ get_restricted_ranges(locator::token_metadata& tm, const schema& s, dht::partiti
|
||||
return ranges;
|
||||
}
|
||||
|
||||
bool storage_proxy::hints_enabled(db::write_type type) noexcept {
|
||||
return _hints_enabled_for_user_writes || (type == db::write_type::VIEW && bool(_hints_manager));
|
||||
bool storage_proxy::hints_enabled() noexcept {
|
||||
return bool(_hints_manager);
|
||||
}
|
||||
|
||||
future<> storage_proxy::truncate_blocking(sstring keyspace, sstring cfname) {
|
||||
@@ -3607,6 +3598,7 @@ future<> storage_proxy::truncate_blocking(sstring keyspace, sstring cfname) {
|
||||
std::rethrow_exception(ep);
|
||||
} catch (rpc::timeout_error& e) {
|
||||
slogger.trace("Truncation of {} timed out: {}", cfname, e.what());
|
||||
throw;
|
||||
} catch (...) {
|
||||
throw;
|
||||
}
|
||||
|
||||
@@ -221,7 +221,6 @@ private:
|
||||
// just skip an entry if request no longer exists.
|
||||
circular_buffer<response_id_type> _throttled_writes;
|
||||
stdx::optional<db::hints::manager> _hints_manager;
|
||||
bool _hints_enabled_for_user_writes = false;
|
||||
stats _stats;
|
||||
static constexpr float CONCURRENT_SUBREQUESTS_MARGIN = 0.10;
|
||||
// for read repair chance calculation
|
||||
@@ -248,11 +247,10 @@ private:
|
||||
response_id_type create_write_response_handler(const std::unordered_map<gms::inet_address, std::experimental::optional<mutation>>&, db::consistency_level cl, db::write_type type, tracing::trace_state_ptr tr_state);
|
||||
void send_to_live_endpoints(response_id_type response_id, clock_type::time_point timeout);
|
||||
template<typename Range>
|
||||
size_t hint_to_dead_endpoints(std::unique_ptr<mutation_holder>& mh, const Range& targets, db::write_type type, tracing::trace_state_ptr tr_state) noexcept;
|
||||
size_t hint_to_dead_endpoints(std::unique_ptr<mutation_holder>& mh, const Range& targets, tracing::trace_state_ptr tr_state) noexcept;
|
||||
void hint_to_dead_endpoints(response_id_type, db::consistency_level);
|
||||
template<typename Range>
|
||||
bool cannot_hint(const Range& targets, db::write_type type);
|
||||
bool hints_enabled(db::write_type type) noexcept;
|
||||
bool cannot_hint(gms::inet_address target);
|
||||
bool hints_enabled() noexcept;
|
||||
std::vector<gms::inet_address> get_live_endpoints(keyspace& ks, const dht::token& token);
|
||||
std::vector<gms::inet_address> get_live_sorted_endpoints(keyspace& ks, const dht::token& token);
|
||||
db::read_repair_decision new_read_repair_decision(const schema& s);
|
||||
@@ -382,7 +380,7 @@ public:
|
||||
// Inspired by Cassandra's StorageProxy.sendToHintedEndpoints but without
|
||||
// hinted handoff support, and just one target. See also
|
||||
// send_to_live_endpoints() - another take on the same original function.
|
||||
future<> send_to_endpoint(mutation m, gms::inet_address target, std::vector<gms::inet_address> pending_endpoints, db::write_type type);
|
||||
future<> send_to_endpoint(mutation m, gms::inet_address target, db::write_type type);
|
||||
|
||||
/**
|
||||
* Performs the truncate operatoin, which effectively deletes all data from
|
||||
|
||||
@@ -104,12 +104,11 @@ int get_generation_number() {
|
||||
return generation_number;
|
||||
}
|
||||
|
||||
storage_service::storage_service(distributed<database>& db, sharded<auth::service>& auth_service, sharded<db::system_distributed_keyspace>& sys_dist_ks)
|
||||
storage_service::storage_service(distributed<database>& db, sharded<auth::service>& auth_service)
|
||||
: _db(db)
|
||||
, _auth_service(auth_service)
|
||||
, _replicate_action([this] { return do_replicate_to_all_cores(); })
|
||||
, _update_pending_ranges_action([this] { return do_update_pending_ranges(); })
|
||||
, _sys_dist_ks(sys_dist_ks) {
|
||||
, _update_pending_ranges_action([this] { return do_update_pending_ranges(); }) {
|
||||
sstable_read_error.connect([this] { isolate_on_error(); });
|
||||
sstable_write_error.connect([this] { isolate_on_error(); });
|
||||
general_disk_error.connect([this] { isolate_on_error(); });
|
||||
@@ -547,12 +546,6 @@ void storage_service::join_token_ring(int delay) {
|
||||
|
||||
supervisor::notify("starting tracing");
|
||||
tracing::tracing::start_tracing().get();
|
||||
|
||||
supervisor::notify("starting system distributed keyspace");
|
||||
_sys_dist_ks.start(
|
||||
std::ref(cql3::get_query_processor()),
|
||||
std::ref(service::get_migration_manager())).get();
|
||||
_sys_dist_ks.invoke_on_all(&db::system_distributed_keyspace::start).get();
|
||||
} else {
|
||||
slogger.info("Startup complete, but write survey mode is active, not becoming an active ring member. Use JMX (StorageService->joinRing()) to finalize ring joining.");
|
||||
}
|
||||
@@ -1261,9 +1254,6 @@ future<> storage_service::drain_on_shutdown() {
|
||||
tracing::tracing::tracing_instance().stop().get();
|
||||
slogger.info("Drain on shutdown: tracing is stopped");
|
||||
|
||||
ss._sys_dist_ks.invoke_on_all(&db::system_distributed_keyspace::stop).get();
|
||||
slogger.info("Drain on shutdown: system distributed keyspace stopped");
|
||||
|
||||
get_storage_proxy().invoke_on_all([] (storage_proxy& local_proxy) {
|
||||
return local_proxy.stop_hints_manager();
|
||||
}).get();
|
||||
@@ -3428,18 +3418,5 @@ storage_service::get_natural_endpoints(const sstring& keyspace, const token& pos
|
||||
return _db.local().find_keyspace(keyspace).get_replication_strategy().get_natural_endpoints(pos);
|
||||
}
|
||||
|
||||
future<std::unordered_map<sstring, sstring>>
|
||||
storage_service::view_build_statuses(sstring keyspace, sstring view_name) const {
|
||||
return _sys_dist_ks.local().view_status(std::move(keyspace), std::move(view_name)).then([this] (std::unordered_map<utils::UUID, sstring> status) {
|
||||
auto& endpoint_to_host_id = get_token_metadata().get_endpoint_to_host_id_map_for_reading();
|
||||
return boost::copy_range<std::unordered_map<sstring, sstring>>(endpoint_to_host_id
|
||||
| boost::adaptors::transformed([&status] (const std::pair<inet_address, utils::UUID>& p) {
|
||||
auto it = status.find(p.second);
|
||||
auto s = it != status.end() ? std::move(it->second) : "UNKNOWN";
|
||||
return std::pair(p.first.to_sstring(), std::move(s));
|
||||
}));
|
||||
});
|
||||
}
|
||||
|
||||
} // namespace service
|
||||
|
||||
|
||||
@@ -51,7 +51,6 @@
|
||||
#include "dht/token_range_endpoints.hh"
|
||||
#include "core/sleep.hh"
|
||||
#include "gms/application_state.hh"
|
||||
#include "db/system_distributed_keyspace.hh"
|
||||
#include "core/semaphore.hh"
|
||||
#include "utils/fb_utilities.hh"
|
||||
#include "utils/serialized_action.hh"
|
||||
@@ -131,7 +130,7 @@ private:
|
||||
bool _ms_stopped = false;
|
||||
bool _stream_manager_stopped = false;
|
||||
public:
|
||||
storage_service(distributed<database>& db, sharded<auth::service>&, sharded<db::system_distributed_keyspace>&);
|
||||
storage_service(distributed<database>& db, sharded<auth::service>&);
|
||||
void isolate_on_error();
|
||||
void isolate_on_commit_error();
|
||||
|
||||
@@ -728,7 +727,6 @@ private:
|
||||
future<> do_replicate_to_all_cores();
|
||||
serialized_action _replicate_action;
|
||||
serialized_action _update_pending_ranges_action;
|
||||
sharded<db::system_distributed_keyspace>& _sys_dist_ks;
|
||||
private:
|
||||
/**
|
||||
* Replicates token_metadata contents on shard0 instance to other shards.
|
||||
@@ -2023,8 +2021,6 @@ public:
|
||||
}
|
||||
#endif
|
||||
|
||||
future<std::unordered_map<sstring, sstring>> view_build_statuses(sstring keyspace, sstring view_name) const;
|
||||
|
||||
private:
|
||||
/**
|
||||
* Seed data to the endpoints that will be responsible for it at the future
|
||||
@@ -2302,8 +2298,8 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
inline future<> init_storage_service(distributed<database>& db, sharded<auth::service>& auth_service, sharded<db::system_distributed_keyspace>& sys_dist_ks) {
|
||||
return service::get_storage_service().start(std::ref(db), std::ref(auth_service), std::ref(sys_dist_ks));
|
||||
inline future<> init_storage_service(distributed<database>& db, sharded<auth::service>& auth_service) {
|
||||
return service::get_storage_service().start(std::ref(db), std::ref(auth_service));
|
||||
}
|
||||
|
||||
inline future<> deinit_storage_service() {
|
||||
|
||||
@@ -33,6 +33,7 @@
|
||||
#include "unimplemented.hh"
|
||||
#include "stdx.hh"
|
||||
#include "segmented_compress_params.hh"
|
||||
#include "utils/class_registrator.hh"
|
||||
|
||||
namespace sstables {
|
||||
|
||||
@@ -299,7 +300,8 @@ size_t local_compression::compress_max_size(size_t input_len) const {
|
||||
|
||||
void compression::set_compressor(compressor_ptr c) {
|
||||
if (c) {
|
||||
auto& cn = c->name();
|
||||
unqualified_name uqn(compressor::namespace_prefix, c->name());
|
||||
const sstring& cn = uqn;
|
||||
name.value = bytes(cn.begin(), cn.end());
|
||||
for (auto& p : c->options()) {
|
||||
if (p.first != compression_parameters::SSTABLE_COMPRESSION) {
|
||||
|
||||
@@ -294,6 +294,12 @@ size_tiered_compaction_strategy::get_sstables_for_compaction(column_family& cfs,
|
||||
return sstables::compaction_descriptor(std::move(most_interesting));
|
||||
}
|
||||
|
||||
// If we are not enforcing min_threshold explicitly, try any pair of SStables in the same tier.
|
||||
if (!cfs.compaction_enforce_min_threshold() && is_any_bucket_interesting(buckets, 2)) {
|
||||
std::vector<sstables::shared_sstable> most_interesting = most_interesting_bucket(std::move(buckets), 2, max_threshold);
|
||||
return sstables::compaction_descriptor(std::move(most_interesting));
|
||||
}
|
||||
|
||||
// if there is no sstable to compact in standard way, try compacting single sstable whose droppable tombstone
|
||||
// ratio is greater than threshold.
|
||||
// prefer oldest sstables from biggest size tiers because they will be easier to satisfy conditions for
|
||||
|
||||
3
test.py
3
test.py
@@ -40,7 +40,6 @@ boost_tests = [
|
||||
'mutation_reader_test',
|
||||
'serialized_action_test',
|
||||
'cql_query_test',
|
||||
'secondary_index_test',
|
||||
'storage_proxy_test',
|
||||
'schema_change_test',
|
||||
'sstable_mutation_test',
|
||||
@@ -87,8 +86,6 @@ boost_tests = [
|
||||
'counter_test',
|
||||
'cell_locker_test',
|
||||
'view_schema_test',
|
||||
'view_build_test',
|
||||
'view_complex_test',
|
||||
'clustering_ranges_walker_test',
|
||||
'vint_serialization_test',
|
||||
'duration_test',
|
||||
|
||||
@@ -215,3 +215,22 @@ SEASTAR_TEST_CASE(test_aggregate_count) {
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
SEASTAR_TEST_CASE(test_reverse_type_aggregation) {
|
||||
return do_with_cql_env_thread([&] (auto& e) {
|
||||
e.execute_cql("CREATE TABLE test(p int, c timestamp, v int, primary key (p, c)) with clustering order by (c desc)").get();
|
||||
e.execute_cql("INSERT INTO test(p, c, v) VALUES (1, 1, 1)").get();
|
||||
e.execute_cql("INSERT INTO test(p, c, v) VALUES (1, 2, 1)").get();
|
||||
|
||||
{
|
||||
auto tp = db_clock::from_time_t({ 0 }) + std::chrono::milliseconds(1);
|
||||
auto msg = e.execute_cql("SELECT min(c) FROM test").get0();
|
||||
assert_that(msg).is_rows().with_size(1).with_row({{timestamp_type->decompose(tp)}});
|
||||
}
|
||||
{
|
||||
auto tp = db_clock::from_time_t({ 0 }) + std::chrono::milliseconds(2);
|
||||
auto msg = e.execute_cql("SELECT max(c) FROM test").get0();
|
||||
assert_that(msg).is_rows().with_size(1).with_row({{timestamp_type->decompose(tp)}});
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
@@ -54,15 +54,6 @@ rows_assertions::is_empty() {
|
||||
return {*this};
|
||||
}
|
||||
|
||||
rows_assertions
|
||||
rows_assertions::is_not_empty() {
|
||||
auto row_count = _rows->rs().size();
|
||||
if (row_count == 0) {
|
||||
fail("Expected some rows, but was result was empty");
|
||||
}
|
||||
return {*this};
|
||||
}
|
||||
|
||||
rows_assertions
|
||||
rows_assertions::with_row(std::initializer_list<bytes_opt> values) {
|
||||
std::vector<bytes_opt> expected_row(values);
|
||||
|
||||
@@ -33,7 +33,6 @@ public:
|
||||
rows_assertions(shared_ptr<cql_transport::messages::result_message::rows> rows);
|
||||
rows_assertions with_size(size_t size);
|
||||
rows_assertions is_empty();
|
||||
rows_assertions is_not_empty();
|
||||
rows_assertions with_row(std::initializer_list<bytes_opt> values);
|
||||
|
||||
// Verifies that the result has the following rows and only that rows, in that order.
|
||||
|
||||
@@ -81,29 +81,6 @@ SEASTAR_TEST_CASE(test_create_table_with_id_statement) {
|
||||
});
|
||||
}
|
||||
|
||||
SEASTAR_TEST_CASE(test_drop_table_with_si_and_mv) {
|
||||
return do_with_cql_env([](cql_test_env& e) {
|
||||
return seastar::async([&e] {
|
||||
e.execute_cql("CREATE TABLE tbl (a int, b int, c float, PRIMARY KEY (a))").get();
|
||||
e.execute_cql("CREATE INDEX idx1 ON tbl (b)").get();
|
||||
e.execute_cql("CREATE INDEX idx2 ON tbl (c)").get();
|
||||
e.execute_cql("CREATE MATERIALIZED VIEW tbl_view AS SELECT c FROM tbl WHERE c IS NOT NULL PRIMARY KEY (c, a)").get();
|
||||
// dropping a table with materialized views is prohibited
|
||||
assert_that_failed(e.execute_cql("DROP TABLE tbl"));
|
||||
e.execute_cql("DROP MATERIALIZED VIEW tbl_view").get();
|
||||
// dropping a table with secondary indexes is fine
|
||||
e.execute_cql("DROP TABLE tbl").get();
|
||||
|
||||
e.execute_cql("CREATE TABLE tbl (a int, b int, c float, PRIMARY KEY (a))").get();
|
||||
e.execute_cql("CREATE INDEX idx1 ON tbl (b)").get();
|
||||
e.execute_cql("CREATE INDEX idx2 ON tbl (c)").get();
|
||||
e.execute_cql("CREATE MATERIALIZED VIEW tbl_view AS SELECT c FROM tbl WHERE c IS NOT NULL PRIMARY KEY (c, a)").get();
|
||||
// dropping whole keyspace with MV and SI is fine too
|
||||
e.execute_cql("DROP KEYSPACE ks").get();
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
SEASTAR_TEST_CASE(test_insert_statement) {
|
||||
return do_with_cql_env([] (cql_test_env& e) {
|
||||
return e.execute_cql("create table cf (p1 varchar, c1 int, r1 int, PRIMARY KEY (p1, c1));").discard_result().then([&e] {
|
||||
@@ -2070,10 +2047,9 @@ SEASTAR_TEST_CASE(test_in_restriction) {
|
||||
assert_that(msg).is_rows().with_size(0);
|
||||
return e.execute_cql("select r1 from tir where p1 in (2, 0, 2, 1);");
|
||||
}).then([&e] (shared_ptr<cql_transport::messages::result_message> msg) {
|
||||
assert_that(msg).is_rows().with_rows({
|
||||
assert_that(msg).is_rows().with_rows_ignore_order({
|
||||
{int32_type->decompose(4)},
|
||||
{int32_type->decompose(0)},
|
||||
{int32_type->decompose(4)},
|
||||
{int32_type->decompose(1)},
|
||||
{int32_type->decompose(2)},
|
||||
{int32_type->decompose(3)},
|
||||
@@ -2095,6 +2071,22 @@ SEASTAR_TEST_CASE(test_in_restriction) {
|
||||
{int32_type->decompose(2)},
|
||||
{int32_type->decompose(1)},
|
||||
});
|
||||
return e.prepare("select r1 from tir where p1 in ?");
|
||||
}).then([&e] (cql3::prepared_cache_key_type prepared_id){
|
||||
auto my_list_type = list_type_impl::get_instance(int32_type, true);
|
||||
std::vector<cql3::raw_value> raw_values;
|
||||
auto in_values_list = my_list_type->decompose(make_list_value(my_list_type,
|
||||
list_type_impl::native_type{{int(2), int(0), int(2), int(1)}}));
|
||||
raw_values.emplace_back(cql3::raw_value::make_value(in_values_list));
|
||||
return e.execute_prepared(prepared_id,raw_values);
|
||||
}).then([&e] (shared_ptr<cql_transport::messages::result_message> msg) {
|
||||
assert_that(msg).is_rows().with_rows_ignore_order({
|
||||
{int32_type->decompose(4)},
|
||||
{int32_type->decompose(0)},
|
||||
{int32_type->decompose(1)},
|
||||
{int32_type->decompose(2)},
|
||||
{int32_type->decompose(3)},
|
||||
});
|
||||
});
|
||||
});
|
||||
}
|
||||
@@ -2538,6 +2530,54 @@ SEASTAR_TEST_CASE(test_pg_style_string_literal) {
|
||||
});
|
||||
}
|
||||
|
||||
SEASTAR_TEST_CASE(test_secondary_index_regular_column_query) {
|
||||
return do_with_cql_env([] (cql_test_env& e) {
|
||||
return e.execute_cql("CREATE TABLE users (userid int, name text, email text, country text, PRIMARY KEY (userid));").discard_result().then([&e] {
|
||||
return e.execute_cql("CREATE INDEX ON users (email);").discard_result();
|
||||
}).then([&e] {
|
||||
return e.execute_cql("CREATE INDEX ON users (country);").discard_result();
|
||||
}).then([&e] {
|
||||
return e.execute_cql("INSERT INTO users (userid, name, email, country) VALUES (0, 'Bondie Easseby', 'beassebyv@house.gov', 'France');").discard_result();
|
||||
}).then([&e] {
|
||||
return e.execute_cql("INSERT INTO users (userid, name, email, country) VALUES (1, 'Demetri Curror', 'dcurrorw@techcrunch.com', 'France');").discard_result();
|
||||
}).then([&e] {
|
||||
return e.execute_cql("INSERT INTO users (userid, name, email, country) VALUES (2, 'Langston Paulisch', 'lpaulischm@reverbnation.com', 'United States');").discard_result();
|
||||
}).then([&e] {
|
||||
return e.execute_cql("INSERT INTO users (userid, name, email, country) VALUES (3, 'Channa Devote', 'cdevote14@marriott.com', 'Denmark');").discard_result();
|
||||
}).then([&e] {
|
||||
return e.execute_cql("SELECT email FROM users WHERE country = 'France';");
|
||||
}).then([&e] (shared_ptr<cql_transport::messages::result_message> msg) {
|
||||
assert_that(msg).is_rows().with_rows({
|
||||
{ utf8_type->decompose(sstring("beassebyv@house.gov")) },
|
||||
{ utf8_type->decompose(sstring("dcurrorw@techcrunch.com")) },
|
||||
});
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
SEASTAR_TEST_CASE(test_secondary_index_clustering_key_query) {
|
||||
return do_with_cql_env([] (cql_test_env& e) {
|
||||
return e.execute_cql("CREATE TABLE users (userid int, name text, email text, country text, PRIMARY KEY (userid, country));").discard_result().then([&e] {
|
||||
return e.execute_cql("CREATE INDEX ON users (country);").discard_result();
|
||||
}).then([&e] {
|
||||
return e.execute_cql("INSERT INTO users (userid, name, email, country) VALUES (0, 'Bondie Easseby', 'beassebyv@house.gov', 'France');").discard_result();
|
||||
}).then([&e] {
|
||||
return e.execute_cql("INSERT INTO users (userid, name, email, country) VALUES (1, 'Demetri Curror', 'dcurrorw@techcrunch.com', 'France');").discard_result();
|
||||
}).then([&e] {
|
||||
return e.execute_cql("INSERT INTO users (userid, name, email, country) VALUES (2, 'Langston Paulisch', 'lpaulischm@reverbnation.com', 'United States');").discard_result();
|
||||
}).then([&e] {
|
||||
return e.execute_cql("INSERT INTO users (userid, name, email, country) VALUES (3, 'Channa Devote', 'cdevote14@marriott.com', 'Denmark');").discard_result();
|
||||
}).then([&e] {
|
||||
return e.execute_cql("SELECT email FROM users WHERE country = 'France';");
|
||||
}).then([&e] (auto msg) {
|
||||
assert_that(msg).is_rows().with_rows({
|
||||
{ utf8_type->decompose(sstring("beassebyv@house.gov")) },
|
||||
{ utf8_type->decompose(sstring("dcurrorw@techcrunch.com")) },
|
||||
});
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
SEASTAR_TEST_CASE(test_insert_large_collection_values) {
|
||||
return do_with_cql_env([] (cql_test_env& e) {
|
||||
return seastar::async([&e] {
|
||||
@@ -2582,3 +2622,81 @@ SEASTAR_TEST_CASE(test_insert_large_collection_values) {
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
// Corner-case test that checks for the paging code's preparedness for an empty
|
||||
// range list.
|
||||
SEASTAR_TEST_CASE(test_empty_partition_range_scan) {
|
||||
return do_with_cql_env_thread([] (cql_test_env& e) {
|
||||
e.execute_cql("create keyspace empty_partition_range_scan with replication = {'class': 'SimpleStrategy', 'replication_factor': 1};").get();
|
||||
e.execute_cql("create table empty_partition_range_scan.tb (a int, b int, c int, val int, PRIMARY KEY ((a,b),c) );").get();
|
||||
|
||||
|
||||
auto qo = std::make_unique<cql3::query_options>(db::consistency_level::LOCAL_ONE, std::vector<cql3::raw_value>{},
|
||||
cql3::query_options::specific_options{1, nullptr, {}, api::new_timestamp()});
|
||||
auto res = e.execute_cql("select * from empty_partition_range_scan.tb where token (a,b) > 1 and token(a,b) <= 1;", std::move(qo)).get0();
|
||||
assert_that(res).is_rows().is_empty();
|
||||
});
|
||||
}
|
||||
|
||||
SEASTAR_TEST_CASE(test_static_multi_cell_static_lists_with_ckey) {
|
||||
return do_with_cql_env_thread([] (cql_test_env& e) {
|
||||
e.execute_cql("CREATE TABLE t (p int, c int, slist list<int> static, v int, PRIMARY KEY (p, c));").get();
|
||||
e.execute_cql("INSERT INTO t (p, c, slist, v) VALUES (1, 1, [1], 1); ").get();
|
||||
|
||||
{
|
||||
e.execute_cql("UPDATE t SET slist[0] = 3, v = 3 WHERE p = 1 AND c = 1;").get();
|
||||
auto msg = e.execute_cql("SELECT slist, v FROM t WHERE p = 1 AND c = 1;").get0();
|
||||
auto slist_type = list_type_impl::get_instance(int32_type, true);
|
||||
assert_that(msg).is_rows().with_row({
|
||||
{ slist_type->decompose(make_list_value(slist_type, list_type_impl::native_type({{3}}))) },
|
||||
{ int32_type->decompose(3) }
|
||||
});
|
||||
}
|
||||
{
|
||||
e.execute_cql("UPDATE t SET slist = [4], v = 4 WHERE p = 1 AND c = 1;").get();
|
||||
auto msg = e.execute_cql("SELECT slist, v FROM t WHERE p = 1 AND c = 1;").get0();
|
||||
auto slist_type = list_type_impl::get_instance(int32_type, true);
|
||||
assert_that(msg).is_rows().with_row({
|
||||
{ slist_type->decompose(make_list_value(slist_type, list_type_impl::native_type({{4}}))) },
|
||||
{ int32_type->decompose(4) }
|
||||
});
|
||||
}
|
||||
{
|
||||
e.execute_cql("UPDATE t SET slist = [3] + slist , v = 5 WHERE p = 1 AND c = 1;").get();
|
||||
auto msg = e.execute_cql("SELECT slist, v FROM t WHERE p = 1 AND c = 1;").get0();
|
||||
auto slist_type = list_type_impl::get_instance(int32_type, true);
|
||||
assert_that(msg).is_rows().with_row({
|
||||
{ slist_type->decompose(make_list_value(slist_type, list_type_impl::native_type({3, 4}))) },
|
||||
{ int32_type->decompose(5) }
|
||||
});
|
||||
}
|
||||
{
|
||||
e.execute_cql("UPDATE t SET slist = slist + [5] , v = 6 WHERE p = 1 AND c = 1;").get();
|
||||
auto msg = e.execute_cql("SELECT slist, v FROM t WHERE p = 1 AND c = 1;").get0();
|
||||
auto slist_type = list_type_impl::get_instance(int32_type, true);
|
||||
assert_that(msg).is_rows().with_row({
|
||||
{ slist_type->decompose(make_list_value(slist_type, list_type_impl::native_type({3, 4, 5}))) },
|
||||
{ int32_type->decompose(6) }
|
||||
});
|
||||
}
|
||||
{
|
||||
e.execute_cql("DELETE slist[2] from t WHERE p = 1;").get();
|
||||
auto msg = e.execute_cql("SELECT slist, v FROM t WHERE p = 1 AND c = 1;").get0();
|
||||
auto slist_type = list_type_impl::get_instance(int32_type, true);
|
||||
assert_that(msg).is_rows().with_row({
|
||||
{ slist_type->decompose(make_list_value(slist_type, list_type_impl::native_type({3, 4}))) },
|
||||
{ int32_type->decompose(6) }
|
||||
});
|
||||
}
|
||||
{
|
||||
e.execute_cql("UPDATE t SET slist = slist - [4] , v = 7 WHERE p = 1 AND c = 1;").get();
|
||||
auto msg = e.execute_cql("SELECT slist, v FROM t WHERE p = 1 AND c = 1;").get0();
|
||||
auto slist_type = list_type_impl::get_instance(int32_type, true);
|
||||
assert_that(msg).is_rows().with_row({
|
||||
{ slist_type->decompose(make_list_value(slist_type, list_type_impl::native_type({3}))) },
|
||||
{ int32_type->decompose(7) }
|
||||
});
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
@@ -39,7 +39,6 @@
|
||||
#include "tmpdir.hh"
|
||||
#include "db/query_context.hh"
|
||||
#include "test_services.hh"
|
||||
#include "db/view/view_builder.hh"
|
||||
|
||||
// TODO: remove (#293)
|
||||
#include "message/messaging_service.hh"
|
||||
@@ -48,7 +47,6 @@
|
||||
#include "service/storage_service.hh"
|
||||
#include "auth/service.hh"
|
||||
#include "db/system_keyspace.hh"
|
||||
#include "db/system_distributed_keyspace.hh"
|
||||
|
||||
namespace sstables {
|
||||
|
||||
@@ -90,7 +88,6 @@ public:
|
||||
private:
|
||||
::shared_ptr<distributed<database>> _db;
|
||||
::shared_ptr<sharded<auth::service>> _auth_service;
|
||||
::shared_ptr<sharded<db::view::view_builder>> _view_builder;
|
||||
lw_shared_ptr<tmpdir> _data_dir;
|
||||
private:
|
||||
struct core_local_state {
|
||||
@@ -115,13 +112,7 @@ private:
|
||||
return ::make_shared<service::query_state>(_core_local.local().client_state);
|
||||
}
|
||||
public:
|
||||
single_node_cql_env(
|
||||
::shared_ptr<distributed<database>> db,
|
||||
::shared_ptr<sharded<auth::service>> auth_service,
|
||||
::shared_ptr<sharded<db::view::view_builder>> view_builder)
|
||||
: _db(db)
|
||||
, _auth_service(std::move(auth_service))
|
||||
, _view_builder(std::move(view_builder))
|
||||
single_node_cql_env(::shared_ptr<distributed<database>> db, ::shared_ptr<sharded<auth::service>> auth_service) : _db(db), _auth_service(std::move(auth_service))
|
||||
{ }
|
||||
|
||||
virtual future<::shared_ptr<cql_transport::messages::result_message>> execute_cql(const sstring& text) override {
|
||||
@@ -264,10 +255,6 @@ public:
|
||||
return _auth_service->local();
|
||||
}
|
||||
|
||||
virtual db::view::view_builder& local_view_builder() override {
|
||||
return _view_builder->local();
|
||||
}
|
||||
|
||||
future<> start() {
|
||||
return _core_local.start(std::ref(*_auth_service));
|
||||
}
|
||||
@@ -307,14 +294,12 @@ public:
|
||||
cfg->data_file_directories() = {data_dir.path};
|
||||
}
|
||||
cfg->commitlog_directory() = data_dir.path + "/commitlog.dir";
|
||||
cfg->hints_directory() = data_dir.path + "/hints.dir";
|
||||
cfg->num_tokens() = 256;
|
||||
cfg->ring_delay_ms() = 500;
|
||||
cfg->experimental() = true;
|
||||
cfg->shutdown_announce_in_ms() = 0;
|
||||
boost::filesystem::create_directories((data_dir.path + "/system").c_str());
|
||||
boost::filesystem::create_directories(cfg->commitlog_directory().c_str());
|
||||
boost::filesystem::create_directories(cfg->hints_directory().c_str());
|
||||
|
||||
const gms::inet_address listen("127.0.0.1");
|
||||
auto& ms = netw::get_messaging_service();
|
||||
@@ -322,11 +307,9 @@ public:
|
||||
auto stop_ms = defer([&ms] { ms.stop().get(); });
|
||||
|
||||
auto auth_service = ::make_shared<sharded<auth::service>>();
|
||||
auto sys_dist_ks = seastar::sharded<db::system_distributed_keyspace>();
|
||||
auto stop_sys_dist_ks = defer([&sys_dist_ks] { sys_dist_ks.stop().get(); });
|
||||
|
||||
auto& ss = service::get_storage_service();
|
||||
ss.start(std::ref(*db), std::ref(*auth_service), std::ref(sys_dist_ks)).get();
|
||||
ss.start(std::ref(*db), std::ref(*auth_service)).get();
|
||||
auto stop_storage_service = defer([&ss] { ss.stop().get(); });
|
||||
|
||||
db->start(std::move(*cfg), database_config()).get();
|
||||
@@ -385,13 +368,6 @@ public:
|
||||
auth_service->stop().get();
|
||||
});
|
||||
|
||||
auto view_builder = ::make_shared<seastar::sharded<db::view::view_builder>>();
|
||||
view_builder->start(std::ref(*db), std::ref(sys_dist_ks), std::ref(mm)).get();
|
||||
view_builder->invoke_on_all(&db::view::view_builder::start).get();
|
||||
auto stop_view_builder = defer([view_builder] {
|
||||
view_builder->stop().get();
|
||||
});
|
||||
|
||||
// Create the testing user.
|
||||
try {
|
||||
auth::role_config config;
|
||||
@@ -407,7 +383,7 @@ public:
|
||||
// The default user may already exist if this `cql_test_env` is starting with previously populated data.
|
||||
}
|
||||
|
||||
single_node_cql_env env(db, auth_service, view_builder);
|
||||
single_node_cql_env env(db, auth_service);
|
||||
env.start().get();
|
||||
auto stop_env = defer([&env] { env.stop().get(); });
|
||||
|
||||
@@ -446,13 +422,12 @@ future<> do_with_cql_env_thread(std::function<void(cql_test_env&)> func) {
|
||||
class storage_service_for_tests::impl {
|
||||
distributed<database> _db;
|
||||
sharded<auth::service> _auth_service;
|
||||
sharded<db::system_distributed_keyspace> _sys_dist_ks;
|
||||
public:
|
||||
impl() {
|
||||
auto thread = seastar::thread_impl::get();
|
||||
assert(thread);
|
||||
netw::get_messaging_service().start(gms::inet_address("127.0.0.1"), 7000, false).get();
|
||||
service::get_storage_service().start(std::ref(_db), std::ref(_auth_service), std::ref(_sys_dist_ks)).get();
|
||||
netw::get_messaging_service().start(gms::inet_address("127.0.0.1")).get();
|
||||
service::get_storage_service().start(std::ref(_db), std::ref(_auth_service)).get();
|
||||
service::get_storage_service().invoke_on_all([] (auto& ss) {
|
||||
ss.enable_all_features();
|
||||
}).get();
|
||||
|
||||
@@ -38,10 +38,6 @@
|
||||
|
||||
class database;
|
||||
|
||||
namespace db::view {
|
||||
class view_builder;
|
||||
}
|
||||
|
||||
namespace auth {
|
||||
class service;
|
||||
}
|
||||
@@ -99,28 +95,9 @@ public:
|
||||
virtual distributed<cql3::query_processor> & qp() = 0;
|
||||
|
||||
virtual auth::service& local_auth_service() = 0;
|
||||
|
||||
virtual db::view::view_builder& local_view_builder() = 0;
|
||||
};
|
||||
|
||||
future<> do_with_cql_env(std::function<future<>(cql_test_env&)> func);
|
||||
future<> do_with_cql_env(std::function<future<>(cql_test_env&)> func, const db::config&);
|
||||
future<> do_with_cql_env_thread(std::function<void(cql_test_env&)> func);
|
||||
future<> do_with_cql_env_thread(std::function<void(cql_test_env&)> func, const db::config&);
|
||||
|
||||
template<typename EventuallySucceedingFunction>
|
||||
static void eventually(EventuallySucceedingFunction&& f, size_t max_attempts = 10) {
|
||||
size_t attempts = 0;
|
||||
while (true) {
|
||||
try {
|
||||
f();
|
||||
break;
|
||||
} catch (...) {
|
||||
if (++attempts < max_attempts) {
|
||||
sleep(std::chrono::milliseconds(1 << attempts)).get0();
|
||||
} else {
|
||||
throw;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -29,6 +29,9 @@
|
||||
#include "database.hh"
|
||||
#include "partition_slice_builder.hh"
|
||||
#include "frozen_mutation.hh"
|
||||
#include "mutation_source_test.hh"
|
||||
#include "schema_registry.hh"
|
||||
#include "service/migration_manager.hh"
|
||||
|
||||
SEASTAR_TEST_CASE(test_querying_with_limits) {
|
||||
return do_with_cql_env([](cql_test_env& e) {
|
||||
@@ -74,3 +77,33 @@ SEASTAR_TEST_CASE(test_querying_with_limits) {
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
SEASTAR_THREAD_TEST_CASE(test_database_with_data_in_sstables_is_a_mutation_source) {
|
||||
do_with_cql_env([] (cql_test_env& e) {
|
||||
run_mutation_source_tests([&] (schema_ptr s, const std::vector<mutation>& partitions) -> mutation_source {
|
||||
try {
|
||||
e.local_db().find_column_family(s->ks_name(), s->cf_name());
|
||||
service::get_local_migration_manager().announce_column_family_drop(s->ks_name(), s->cf_name(), true).get();
|
||||
} catch (const no_such_column_family&) {
|
||||
// expected
|
||||
}
|
||||
service::get_local_migration_manager().announce_new_column_family(s, true).get();
|
||||
column_family& cf = e.local_db().find_column_family(s);
|
||||
for (auto&& m : partitions) {
|
||||
e.local_db().apply(cf.schema(), freeze(m)).get();
|
||||
}
|
||||
cf.flush().get();
|
||||
cf.get_row_cache().invalidate([] {}).get();
|
||||
return mutation_source([&] (schema_ptr s,
|
||||
const dht::partition_range& range,
|
||||
const query::partition_slice& slice,
|
||||
const io_priority_class& pc,
|
||||
tracing::trace_state_ptr trace_state,
|
||||
streamed_mutation::forwarding fwd,
|
||||
mutation_reader::forwarding fwd_mr) {
|
||||
return cf.make_reader(s, range, slice, pc, std::move(trace_state), fwd, fwd_mr);
|
||||
});
|
||||
});
|
||||
return make_ready_future<>();
|
||||
}).get();
|
||||
}
|
||||
|
||||
@@ -22,7 +22,6 @@
|
||||
|
||||
#include "core/reactor.hh"
|
||||
#include "core/app-template.hh"
|
||||
#include "db/system_distributed_keyspace.hh"
|
||||
#include "message/messaging_service.hh"
|
||||
#include "gms/failure_detector.hh"
|
||||
#include "gms/gossiper.hh"
|
||||
@@ -70,8 +69,7 @@ int main(int ac, char ** av) {
|
||||
utils::fb_utilities::set_broadcast_rpc_address(listen);
|
||||
auto vv = std::make_shared<gms::versioned_value::factory>();
|
||||
locator::i_endpoint_snitch::create_snitch("SimpleSnitch").then([&auth_service, &db] {
|
||||
sharded<db::system_distributed_keyspace> sys_dist_ks;
|
||||
return service::init_storage_service(db, auth_service, sys_dist_ks);
|
||||
return service::init_storage_service(db, auth_service);
|
||||
}).then([vv, listen, config] {
|
||||
return netw::get_messaging_service().start(listen);
|
||||
}).then([config] {
|
||||
|
||||
@@ -31,16 +31,14 @@
|
||||
#include "service/storage_service.hh"
|
||||
#include "core/distributed.hh"
|
||||
#include "database.hh"
|
||||
#include "db/system_distributed_keyspace.hh"
|
||||
|
||||
SEASTAR_TEST_CASE(test_boot_shutdown){
|
||||
return seastar::async([] {
|
||||
distributed<database> db;
|
||||
sharded<auth::service> auth_service;
|
||||
sharded<db::system_distributed_keyspace> sys_dist_ks;
|
||||
utils::fb_utilities::set_broadcast_address(gms::inet_address("127.0.0.1"));
|
||||
locator::i_endpoint_snitch::create_snitch("SimpleSnitch").get();
|
||||
service::get_storage_service().start(std::ref(db), std::ref(auth_service), std::ref(sys_dist_ks)).get();
|
||||
service::get_storage_service().start(std::ref(db), std::ref(auth_service)).get();
|
||||
db.start().get();
|
||||
netw::get_messaging_service().start(gms::inet_address("127.0.0.1")).get();
|
||||
gms::get_failure_detector().start().get();
|
||||
|
||||
@@ -26,11 +26,13 @@
|
||||
|
||||
#include <boost/test/unit_test.hpp>
|
||||
#include <query-result-set.hh>
|
||||
#include <query-result-writer.hh>
|
||||
|
||||
#include "tests/test_services.hh"
|
||||
#include "tests/test-utils.hh"
|
||||
#include "tests/mutation_assertions.hh"
|
||||
#include "tests/result_set_assertions.hh"
|
||||
#include "tests/mutation_source_test.hh"
|
||||
|
||||
#include "mutation_query.hh"
|
||||
#include "core/do_with.hh"
|
||||
@@ -525,3 +527,22 @@ SEASTAR_TEST_CASE(test_partition_limit) {
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
SEASTAR_THREAD_TEST_CASE(test_result_size_calculation) {
|
||||
random_mutation_generator gen(random_mutation_generator::generate_counters::no);
|
||||
std::vector<mutation> mutations = gen(1);
|
||||
schema_ptr s = gen.schema();
|
||||
mutation_source source = make_source(std::move(mutations));
|
||||
query::result_memory_limiter l;
|
||||
query::partition_slice slice = make_full_slice(*s);
|
||||
slice.options.set<query::partition_slice::option::allow_short_read>();
|
||||
|
||||
query::result::builder digest_only_builder(slice, query::result_options{query::result_request::only_digest, query::digest_algorithm::xxHash}, l.new_digest_read(query::result_memory_limiter::maximum_result_size).get0());
|
||||
data_query(s, source, query::full_partition_range, slice, std::numeric_limits<uint32_t>::max(), std::numeric_limits<uint32_t>::max(), gc_clock::now(), digest_only_builder).get0();
|
||||
|
||||
query::result::builder result_and_digest_builder(slice, query::result_options{query::result_request::result_and_digest, query::digest_algorithm::xxHash}, l.new_data_read(query::result_memory_limiter::maximum_result_size).get0());
|
||||
data_query(s, source, query::full_partition_range, slice, std::numeric_limits<uint32_t>::max(), std::numeric_limits<uint32_t>::max(), gc_clock::now(), result_and_digest_builder).get0();
|
||||
|
||||
BOOST_REQUIRE_EQUAL(digest_only_builder.memory_accounter().used_memory(), result_and_digest_builder.memory_accounter().used_memory());
|
||||
}
|
||||
|
||||
|
||||
@@ -659,6 +659,46 @@ void test_mutation_reader_fragments_have_monotonic_positions(populate_fn populat
|
||||
});
|
||||
}
|
||||
|
||||
static void test_date_tiered_clustering_slicing(populate_fn populate) {
|
||||
BOOST_TEST_MESSAGE(__PRETTY_FUNCTION__);
|
||||
|
||||
simple_schema ss;
|
||||
|
||||
auto s = schema_builder(ss.schema())
|
||||
.set_compaction_strategy(sstables::compaction_strategy_type::date_tiered)
|
||||
.build();
|
||||
|
||||
auto pkey = ss.make_pkey();
|
||||
|
||||
mutation m1(s, pkey);
|
||||
ss.add_static_row(m1, "s");
|
||||
m1.partition().apply(ss.new_tombstone());
|
||||
ss.add_row(m1, ss.make_ckey(0), "v1");
|
||||
|
||||
mutation_source ms = populate(s, {m1});
|
||||
|
||||
// query row outside the range of existing rows to exercise sstable clustering key filter
|
||||
{
|
||||
auto slice = partition_slice_builder(*s)
|
||||
.with_range(ss.make_ckey_range(1, 2))
|
||||
.build();
|
||||
auto prange = dht::partition_range::make_singular(pkey);
|
||||
assert_that(ms.make_reader(s, prange, slice))
|
||||
.produces(m1, slice.row_ranges(*s, pkey.key()))
|
||||
.produces_end_of_stream();
|
||||
}
|
||||
|
||||
{
|
||||
auto slice = partition_slice_builder(*s)
|
||||
.with_range(query::clustering_range::make_singular(ss.make_ckey(0)))
|
||||
.build();
|
||||
auto prange = dht::partition_range::make_singular(pkey);
|
||||
assert_that(ms.make_reader(s, prange, slice))
|
||||
.produces(m1)
|
||||
.produces_end_of_stream();
|
||||
}
|
||||
}
|
||||
|
||||
static void test_clustering_slices(populate_fn populate) {
|
||||
BOOST_TEST_MESSAGE(__PRETTY_FUNCTION__);
|
||||
auto s = schema_builder("ks", "cf")
|
||||
@@ -1012,6 +1052,7 @@ void test_slicing_with_overlapping_range_tombstones(populate_fn populate) {
|
||||
}
|
||||
|
||||
void run_mutation_reader_tests(populate_fn populate) {
|
||||
test_date_tiered_clustering_slicing(populate);
|
||||
test_fast_forwarding_across_partitions_to_empty_range(populate);
|
||||
test_clustering_slices(populate);
|
||||
test_mutation_reader_fragments_have_monotonic_positions(populate);
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user