Compare commits
47 Commits
next
...
scylla-3.0
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
0f3a21f0bb | ||
|
|
976db7e9e0 | ||
|
|
996b86b804 | ||
|
|
b7b217cc43 | ||
|
|
c274430933 | ||
|
|
893a18a7c4 | ||
|
|
39b39058fc | ||
|
|
6bf4a73d88 | ||
|
|
ca4846dd63 | ||
|
|
2663ff7bc1 | ||
|
|
043a575fcd | ||
|
|
00dc400993 | ||
|
|
522a48a244 | ||
|
|
5faa28ce45 | ||
|
|
52be02558e | ||
|
|
a7cbfbe63f | ||
|
|
28fd2044d2 | ||
|
|
76ff2e5c3d | ||
|
|
7b34d54a96 | ||
|
|
26c31f6798 | ||
|
|
28fa66591a | ||
|
|
0fee1d9e43 | ||
|
|
76e72e28f4 | ||
|
|
f969e80965 | ||
|
|
2029134063 | ||
|
|
f30fe7bd17 | ||
|
|
aeb418af9e | ||
|
|
714e6d741f | ||
|
|
95c5872450 | ||
|
|
87f8968553 | ||
|
|
2895428d44 | ||
|
|
e18f182cfc | ||
|
|
cf8cdbf87d | ||
|
|
eb2814067d | ||
|
|
0c722d4547 | ||
|
|
54cf463430 | ||
|
|
d2a0622edd | ||
|
|
60edaec757 | ||
|
|
5802532cb3 | ||
|
|
83ea91055e | ||
|
|
e7863d3d54 | ||
|
|
57f124b905 | ||
|
|
40d8de5784 | ||
|
|
1468ec62de | ||
|
|
c6ef56ae1e | ||
|
|
ad62313b86 | ||
|
|
de87f798e1 |
2
.gitmodules
vendored
2
.gitmodules
vendored
@@ -1,6 +1,6 @@
|
||||
[submodule "seastar"]
|
||||
path = seastar
|
||||
url = ../seastar
|
||||
url = ../scylla-seastar
|
||||
ignore = dirty
|
||||
[submodule "swagger-ui"]
|
||||
path = swagger-ui
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
#!/bin/sh
|
||||
|
||||
VERSION=666.development
|
||||
VERSION=3.0.rc1
|
||||
|
||||
if test -f version
|
||||
then
|
||||
|
||||
@@ -2228,11 +2228,11 @@
|
||||
"description":"The column family"
|
||||
},
|
||||
"total":{
|
||||
"type":"int",
|
||||
"type":"long",
|
||||
"description":"The total snapshot size"
|
||||
},
|
||||
"live":{
|
||||
"type":"int",
|
||||
"type":"long",
|
||||
"description":"The live snapshot size"
|
||||
}
|
||||
}
|
||||
|
||||
@@ -200,8 +200,9 @@ public:
|
||||
return _current_start;
|
||||
}
|
||||
|
||||
position_in_partition_view upper_bound() const {
|
||||
return _current_end;
|
||||
// Returns the upper bound of the last range in provided ranges set
|
||||
position_in_partition_view uppermost_bound() const {
|
||||
return position_in_partition_view::for_range_end(_ranges.back());
|
||||
}
|
||||
|
||||
// When lower_bound() changes, this also does
|
||||
|
||||
@@ -67,6 +67,12 @@ class error_collector : public error_listener<RecognizerType, ExceptionBaseType>
|
||||
*/
|
||||
const sstring_view _query;
|
||||
|
||||
/**
|
||||
* An empty bitset to be used as a workaround for AntLR null dereference
|
||||
* bug.
|
||||
*/
|
||||
static typename ExceptionBaseType::BitsetListType _empty_bit_list;
|
||||
|
||||
public:
|
||||
|
||||
/**
|
||||
@@ -144,6 +150,14 @@ private:
|
||||
break;
|
||||
}
|
||||
default:
|
||||
// AntLR Exception class has a bug of dereferencing a null
|
||||
// pointer in the displayRecognitionError. The following
|
||||
// if statement makes sure it will not be null before the
|
||||
// call to that function (displayRecognitionError).
|
||||
// bug reference: https://github.com/antlr/antlr3/issues/191
|
||||
if (!ex->get_expectingSet()) {
|
||||
ex->set_expectingSet(&_empty_bit_list);
|
||||
}
|
||||
ex->displayRecognitionError(token_names, msg);
|
||||
}
|
||||
return msg.str();
|
||||
@@ -345,4 +359,8 @@ private:
|
||||
#endif
|
||||
};
|
||||
|
||||
template<typename RecognizerType, typename TokenType, typename ExceptionBaseType>
|
||||
typename ExceptionBaseType::BitsetListType
|
||||
error_collector<RecognizerType,TokenType,ExceptionBaseType>::_empty_bit_list = typename ExceptionBaseType::BitsetListType();
|
||||
|
||||
}
|
||||
|
||||
@@ -106,6 +106,11 @@ public:
|
||||
virtual size_t prefix_size() const {
|
||||
return 0;
|
||||
}
|
||||
|
||||
size_t prefix_size(const schema_ptr schema) const {
|
||||
return 0;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
template<>
|
||||
@@ -129,5 +134,23 @@ inline bool primary_key_restrictions<clustering_key>::needs_filtering(const sche
|
||||
return false;
|
||||
}
|
||||
|
||||
template<>
|
||||
inline size_t primary_key_restrictions<clustering_key>::prefix_size(const schema_ptr schema) const {
|
||||
size_t count = 0;
|
||||
if (schema->clustering_key_columns().empty()) {
|
||||
return count;
|
||||
}
|
||||
auto column_defs = get_column_defs();
|
||||
column_id expected_column_id = schema->clustering_key_columns().begin()->id;
|
||||
for (auto&& cdef : column_defs) {
|
||||
if (schema->position(*cdef) != expected_column_id) {
|
||||
return count;
|
||||
}
|
||||
expected_column_id++;
|
||||
count++;
|
||||
}
|
||||
return count;
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
@@ -166,19 +166,7 @@ public:
|
||||
}
|
||||
|
||||
virtual size_t prefix_size() const override {
|
||||
size_t count = 0;
|
||||
if (_schema->clustering_key_columns().empty()) {
|
||||
return count;
|
||||
}
|
||||
column_id expected_column_id = _schema->clustering_key_columns().begin()->id;
|
||||
for (const auto& restriction_entry : _restrictions->restrictions()) {
|
||||
if (_schema->position(*restriction_entry.first) != expected_column_id) {
|
||||
return count;
|
||||
}
|
||||
expected_column_id++;
|
||||
count++;
|
||||
}
|
||||
return count;
|
||||
return primary_key_restrictions<ValueType>::prefix_size(_schema);
|
||||
}
|
||||
|
||||
::shared_ptr<single_column_primary_key_restrictions<clustering_key>> get_longest_prefix_restrictions() {
|
||||
|
||||
@@ -337,6 +337,52 @@ const std::vector<::shared_ptr<restrictions>>& statement_restrictions::index_res
|
||||
return _index_restrictions;
|
||||
}
|
||||
|
||||
std::optional<secondary_index::index> statement_restrictions::find_idx(secondary_index::secondary_index_manager& sim) const {
|
||||
for (::shared_ptr<cql3::restrictions::restrictions> restriction : index_restrictions()) {
|
||||
for (const auto& cdef : restriction->get_column_defs()) {
|
||||
for (auto index : sim.list_indexes()) {
|
||||
if (index.depends_on(*cdef)) {
|
||||
return std::make_optional<secondary_index::index>(std::move(index));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
std::vector<const column_definition*> statement_restrictions::get_column_defs_for_filtering(database& db) const {
|
||||
std::vector<const column_definition*> column_defs_for_filtering;
|
||||
if (need_filtering()) {
|
||||
auto& sim = db.find_column_family(_schema).get_index_manager();
|
||||
std::optional<secondary_index::index> opt_idx = find_idx(sim);
|
||||
auto column_uses_indexing = [&opt_idx] (const column_definition* cdef) {
|
||||
return opt_idx && opt_idx->depends_on(*cdef);
|
||||
};
|
||||
if (_partition_key_restrictions->needs_filtering(*_schema)) {
|
||||
for (auto&& cdef : _partition_key_restrictions->get_column_defs()) {
|
||||
if (!column_uses_indexing(cdef)) {
|
||||
column_defs_for_filtering.emplace_back(cdef);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (_clustering_columns_restrictions->needs_filtering(*_schema)) {
|
||||
column_id first_non_prefix_id = _schema->clustering_key_columns().begin()->id +
|
||||
_clustering_columns_restrictions->prefix_size(_schema);
|
||||
for (auto&& cdef : _clustering_columns_restrictions->get_column_defs()) {
|
||||
if ((cdef->id >= first_non_prefix_id) && (!column_uses_indexing(cdef))) {
|
||||
column_defs_for_filtering.emplace_back(cdef);
|
||||
}
|
||||
}
|
||||
}
|
||||
for (auto&& cdef : _nonprimary_key_restrictions->get_column_defs()) {
|
||||
if (!column_uses_indexing(cdef)) {
|
||||
column_defs_for_filtering.emplace_back(cdef);
|
||||
}
|
||||
}
|
||||
}
|
||||
return column_defs_for_filtering;
|
||||
}
|
||||
|
||||
void statement_restrictions::process_partition_key_restrictions(bool has_queriable_index, bool for_view, bool allow_filtering) {
|
||||
// If there is a queriable index, no special condition are required on the other restrictions.
|
||||
// But we still need to know 2 things:
|
||||
|
||||
@@ -163,6 +163,20 @@ public:
|
||||
return _clustering_columns_restrictions;
|
||||
}
|
||||
|
||||
/**
|
||||
* Builds a possibly empty collection of column definitions that will be used for filtering
|
||||
* @param db - the database context
|
||||
* @return A list with the column definitions needed for filtering.
|
||||
*/
|
||||
std::vector<const column_definition*> get_column_defs_for_filtering(database& db) const;
|
||||
|
||||
/**
|
||||
* Determines the index to be used with the restriction.
|
||||
* @param db - the database context (for extracting index manager)
|
||||
* @return If an index can be used, an optional containing this index, otherwise an empty optional.
|
||||
*/
|
||||
std::optional<secondary_index::index> find_idx(secondary_index::secondary_index_manager& sim) const;
|
||||
|
||||
/**
|
||||
* Checks if the partition key has some unrestricted components.
|
||||
* @return <code>true</code> if the partition key has some unrestricted components, <code>false</code> otherwise.
|
||||
|
||||
@@ -156,9 +156,9 @@ public:
|
||||
return _factories->uses_function(ks_name, function_name);
|
||||
}
|
||||
|
||||
virtual uint32_t add_column_for_ordering(const column_definition& c) override {
|
||||
uint32_t index = selection::add_column_for_ordering(c);
|
||||
_factories->add_selector_for_ordering(c, index);
|
||||
virtual uint32_t add_column_for_post_processing(const column_definition& c) override {
|
||||
uint32_t index = selection::add_column_for_post_processing(c);
|
||||
_factories->add_selector_for_post_processing(c, index);
|
||||
return index;
|
||||
}
|
||||
|
||||
@@ -227,7 +227,7 @@ protected:
|
||||
return simple_selection::make(schema, std::move(columns), false);
|
||||
}
|
||||
|
||||
uint32_t selection::add_column_for_ordering(const column_definition& c) {
|
||||
uint32_t selection::add_column_for_post_processing(const column_definition& c) {
|
||||
_columns.push_back(&c);
|
||||
_metadata->add_non_serialized_column(c.column_specification);
|
||||
return _columns.size() - 1;
|
||||
|
||||
@@ -176,7 +176,7 @@ public:
|
||||
static ::shared_ptr<selection> wildcard(schema_ptr schema);
|
||||
static ::shared_ptr<selection> for_columns(schema_ptr schema, std::vector<const column_definition*> columns);
|
||||
|
||||
virtual uint32_t add_column_for_ordering(const column_definition& c);
|
||||
virtual uint32_t add_column_for_post_processing(const column_definition& c);
|
||||
|
||||
virtual bool uses_function(const sstring &ks_name, const sstring& function_name) const {
|
||||
return false;
|
||||
|
||||
@@ -53,6 +53,7 @@ selector_factories::selector_factories(std::vector<::shared_ptr<selectable>> sel
|
||||
: _contains_write_time_factory(false)
|
||||
, _contains_ttl_factory(false)
|
||||
, _number_of_aggregate_factories(0)
|
||||
, _number_of_factories_for_post_processing(0)
|
||||
{
|
||||
_factories.reserve(selectables.size());
|
||||
|
||||
@@ -76,8 +77,9 @@ bool selector_factories::uses_function(const sstring& ks_name, const sstring& fu
|
||||
return false;
|
||||
}
|
||||
|
||||
void selector_factories::add_selector_for_ordering(const column_definition& def, uint32_t index) {
|
||||
void selector_factories::add_selector_for_post_processing(const column_definition& def, uint32_t index) {
|
||||
_factories.emplace_back(simple_selector::new_factory(def.name_as_text(), index, def.type));
|
||||
++_number_of_factories_for_post_processing;
|
||||
}
|
||||
|
||||
std::vector<::shared_ptr<selector>> selector_factories::new_instances() const {
|
||||
|
||||
@@ -74,6 +74,11 @@ private:
|
||||
*/
|
||||
uint32_t _number_of_aggregate_factories;
|
||||
|
||||
/**
|
||||
* The number of factories that are only for post processing.
|
||||
*/
|
||||
uint32_t _number_of_factories_for_post_processing;
|
||||
|
||||
public:
|
||||
/**
|
||||
* Creates a new <code>SelectorFactories</code> instance and collect the column definitions.
|
||||
@@ -97,11 +102,12 @@ public:
|
||||
bool uses_function(const sstring& ks_name, const sstring& function_name) const;
|
||||
|
||||
/**
|
||||
* Adds a new <code>Selector.Factory</code> for a column that is needed only for ORDER BY purposes.
|
||||
* Adds a new <code>Selector.Factory</code> for a column that is needed only for ORDER BY or post
|
||||
* processing purposes.
|
||||
* @param def the column that is needed for ordering
|
||||
* @param index the index of the column definition in the Selection's list of columns
|
||||
*/
|
||||
void add_selector_for_ordering(const column_definition& def, uint32_t index);
|
||||
void add_selector_for_post_processing(const column_definition& def, uint32_t index);
|
||||
|
||||
/**
|
||||
* Checks if this <code>SelectorFactories</code> contains only factories for aggregates.
|
||||
@@ -111,7 +117,7 @@ public:
|
||||
*/
|
||||
bool contains_only_aggregate_functions() const {
|
||||
auto size = _factories.size();
|
||||
return size != 0 && _number_of_aggregate_factories == size;
|
||||
return size != 0 && _number_of_aggregate_factories == (size - _number_of_factories_for_post_processing);
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@@ -137,10 +137,15 @@ create_index_statement::validate(service::storage_proxy& proxy, const service::c
|
||||
|
||||
bool is_map = dynamic_cast<const collection_type_impl *>(cd->type.get()) != nullptr
|
||||
&& dynamic_cast<const collection_type_impl *>(cd->type.get())->is_map();
|
||||
bool is_frozen_collection = cd->type->is_collection() && !cd->type->is_multi_cell();
|
||||
bool is_collection = cd->type->is_collection();
|
||||
bool is_frozen_collection = is_collection && !cd->type->is_multi_cell();
|
||||
|
||||
if (is_frozen_collection) {
|
||||
validate_for_frozen_collection(target);
|
||||
} else if (is_collection) {
|
||||
// NOTICE(sarna): should be lifted after #2962 (indexes on non-frozen collections) is implemented
|
||||
throw exceptions::invalid_request_exception(
|
||||
sprint("Cannot create secondary index on non-frozen collection column %s", cd->name_as_text()));
|
||||
} else {
|
||||
validate_not_full_index(target);
|
||||
validate_is_values_index_if_target_column_not_collection(cd, target);
|
||||
|
||||
@@ -315,6 +315,27 @@ future<shared_ptr<cql_transport::event::schema_change>> create_view_statement::a
|
||||
throw exceptions::invalid_request_exception(sprint("No columns are defined for Materialized View other than primary key"));
|
||||
}
|
||||
|
||||
// The unique feature of a filter by a non-key column is that the
|
||||
// value of such column can be updated - and also be expired with TTL
|
||||
// and cause the view row to appear and disappear. We don't currently
|
||||
// support support this case - see issue #3430, and neither does
|
||||
// Cassandra - see see CASSANDRA-13798 and CASSANDRA-13832.
|
||||
// Actually, as CASSANDRA-13798 explains, the problem is "the liveness of
|
||||
// view row is now depending on multiple base columns (multiple filtered
|
||||
// non-pk base column + base column used in view pk)". When the filtered
|
||||
// column *is* the base column added to the view pk, we don't have this
|
||||
// problem. And this case actually works correctly.
|
||||
auto non_pk_restrictions = restrictions->get_non_pk_restriction();
|
||||
if (non_pk_restrictions.size() == 1 && has_non_pk_column &&
|
||||
std::find(target_primary_keys.begin(), target_primary_keys.end(), non_pk_restrictions.cbegin()->first) != target_primary_keys.end()) {
|
||||
// This case (filter by new PK column of the view) works, as explained above
|
||||
} else if (!non_pk_restrictions.empty()) {
|
||||
auto column_names = ::join(", ", non_pk_restrictions | boost::adaptors::map_keys | boost::adaptors::transformed(std::mem_fn(&column_definition::name_as_text)));
|
||||
throw exceptions::invalid_request_exception(sprint(
|
||||
"Non-primary key columns cannot be restricted in the SELECT statement used for materialized view %s creation (got restrictions on: %s)",
|
||||
column_family(), column_names));
|
||||
}
|
||||
|
||||
schema_builder builder{keyspace(), column_family()};
|
||||
auto add_columns = [this, &builder] (std::vector<const column_definition*>& defs, column_kind kind) mutable {
|
||||
for (auto* def : defs) {
|
||||
|
||||
@@ -141,6 +141,10 @@ private:
|
||||
/** If ALLOW FILTERING was not specified, this verifies that it is not needed */
|
||||
void check_needs_filtering(::shared_ptr<restrictions::statement_restrictions> restrictions);
|
||||
|
||||
void ensure_filtering_columns_retrieval(database& db,
|
||||
::shared_ptr<selection::selection> selection,
|
||||
::shared_ptr<restrictions::statement_restrictions> restrictions);
|
||||
|
||||
bool contains_alias(::shared_ptr<column_identifier> name);
|
||||
|
||||
::shared_ptr<column_specification> limit_receiver();
|
||||
|
||||
@@ -410,7 +410,7 @@ select_statement::do_execute(service::storage_proxy& proxy,
|
||||
}
|
||||
|
||||
command->slice.options.set<query::partition_slice::option::allow_short_read>();
|
||||
auto timeout = db::timeout_clock::now() + options.get_timeout_config().*get_timeout_config_selector();
|
||||
auto timeout_duration = options.get_timeout_config().*get_timeout_config_selector();
|
||||
auto p = service::pager::query_pagers::pager(_schema, _selection,
|
||||
state, options, command, std::move(key_ranges), _stats, _restrictions->need_filtering() ? _restrictions : nullptr);
|
||||
|
||||
@@ -418,9 +418,10 @@ select_statement::do_execute(service::storage_proxy& proxy,
|
||||
return do_with(
|
||||
cql3::selection::result_set_builder(*_selection, now,
|
||||
options.get_cql_serialization_format()),
|
||||
[this, p, page_size, now, timeout](auto& builder) {
|
||||
[this, p, page_size, now, timeout_duration](auto& builder) {
|
||||
return do_until([p] {return p->is_exhausted();},
|
||||
[p, &builder, page_size, now, timeout] {
|
||||
[p, &builder, page_size, now, timeout_duration] {
|
||||
auto timeout = db::timeout_clock::now() + timeout_duration;
|
||||
return p->fetch_page(builder, page_size, now, timeout);
|
||||
}
|
||||
).then([this, &builder] {
|
||||
@@ -439,6 +440,7 @@ select_statement::do_execute(service::storage_proxy& proxy,
|
||||
" you must either remove the ORDER BY or the IN and sort client side, or disable paging for this query");
|
||||
}
|
||||
|
||||
auto timeout = db::timeout_clock::now() + timeout_duration;
|
||||
if (_selection->is_trivial() && !_restrictions->need_filtering()) {
|
||||
return p->fetch_page_generator(page_size, now, timeout, _stats).then([this, p, limit] (result_generator generator) {
|
||||
auto meta = [&] () -> shared_ptr<const cql3::metadata> {
|
||||
@@ -492,15 +494,9 @@ generate_base_key_from_index_pk(const partition_key& index_pk, const clustering_
|
||||
return KeyType::from_range(exploded_base_key);
|
||||
}
|
||||
|
||||
future<shared_ptr<cql_transport::messages::result_message>>
|
||||
indexed_table_select_statement::execute_base_query(
|
||||
service::storage_proxy& proxy,
|
||||
dht::partition_range_vector&& partition_ranges,
|
||||
service::query_state& state,
|
||||
const query_options& options,
|
||||
gc_clock::time_point now,
|
||||
::shared_ptr<const service::pager::paging_state> paging_state) {
|
||||
auto cmd = ::make_lw_shared<query::read_command>(
|
||||
lw_shared_ptr<query::read_command>
|
||||
indexed_table_select_statement::prepare_command_for_base_query(const query_options& options, service::query_state& state, gc_clock::time_point now, bool use_paging) {
|
||||
lw_shared_ptr<query::read_command> cmd = ::make_lw_shared<query::read_command>(
|
||||
_schema->id(),
|
||||
_schema->version(),
|
||||
make_partition_slice(options),
|
||||
@@ -510,9 +506,25 @@ indexed_table_select_statement::execute_base_query(
|
||||
query::max_partitions,
|
||||
utils::UUID(),
|
||||
options.get_timestamp(state));
|
||||
if (options.get_page_size() > 0) {
|
||||
if (use_paging) {
|
||||
cmd->slice.options.set<query::partition_slice::option::allow_short_read>();
|
||||
cmd->slice.options.set<query::partition_slice::option::send_partition_key>();
|
||||
if (_schema->clustering_key_size() > 0) {
|
||||
cmd->slice.options.set<query::partition_slice::option::send_clustering_key>();
|
||||
}
|
||||
}
|
||||
return cmd;
|
||||
}
|
||||
|
||||
future<shared_ptr<cql_transport::messages::result_message>>
|
||||
indexed_table_select_statement::execute_base_query(
|
||||
service::storage_proxy& proxy,
|
||||
dht::partition_range_vector&& partition_ranges,
|
||||
service::query_state& state,
|
||||
const query_options& options,
|
||||
gc_clock::time_point now,
|
||||
::shared_ptr<const service::pager::paging_state> paging_state) {
|
||||
auto cmd = prepare_command_for_base_query(options, state, now, bool(paging_state));
|
||||
auto timeout = db::timeout_clock::now() + options.get_timeout_config().*get_timeout_config_selector();
|
||||
dht::partition_range_vector per_vnode_ranges;
|
||||
per_vnode_ranges.reserve(partition_ranges.size());
|
||||
@@ -586,19 +598,7 @@ indexed_table_select_statement::execute_base_query(
|
||||
const query_options& options,
|
||||
gc_clock::time_point now,
|
||||
::shared_ptr<const service::pager::paging_state> paging_state) {
|
||||
auto cmd = make_lw_shared<query::read_command>(
|
||||
_schema->id(),
|
||||
_schema->version(),
|
||||
make_partition_slice(options),
|
||||
get_limit(options),
|
||||
now,
|
||||
tracing::make_trace_info(state.get_trace_state()),
|
||||
query::max_partitions,
|
||||
utils::UUID(),
|
||||
options.get_timestamp(state));
|
||||
if (options.get_page_size() > 0) {
|
||||
cmd->slice.options.set<query::partition_slice::option::allow_short_read>();
|
||||
}
|
||||
auto cmd = prepare_command_for_base_query(options, state, now, bool(paging_state));
|
||||
auto timeout = db::timeout_clock::now() + options.get_timeout_config().*get_timeout_config_selector();
|
||||
|
||||
struct base_query_state {
|
||||
@@ -774,7 +774,8 @@ indexed_table_select_statement::prepare(database& db,
|
||||
ordering_comparator_type ordering_comparator,
|
||||
::shared_ptr<term> limit, cql_stats &stats)
|
||||
{
|
||||
auto index_opt = find_idx(db, schema, restrictions);
|
||||
auto& sim = db.find_column_family(schema).get_index_manager();
|
||||
auto index_opt = restrictions->find_idx(sim);
|
||||
if (!index_opt) {
|
||||
throw std::runtime_error("No index found.");
|
||||
}
|
||||
@@ -798,24 +799,6 @@ indexed_table_select_statement::prepare(database& db,
|
||||
|
||||
}
|
||||
|
||||
|
||||
stdx::optional<secondary_index::index> indexed_table_select_statement::find_idx(database& db,
|
||||
schema_ptr schema,
|
||||
::shared_ptr<restrictions::statement_restrictions> restrictions)
|
||||
{
|
||||
auto& sim = db.find_column_family(schema).get_index_manager();
|
||||
for (::shared_ptr<cql3::restrictions::restrictions> restriction : restrictions->index_restrictions()) {
|
||||
for (const auto& cdef : restriction->get_column_defs()) {
|
||||
for (auto index : sim.list_indexes()) {
|
||||
if (index.depends_on(*cdef)) {
|
||||
return stdx::make_optional<secondary_index::index>(std::move(index));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return stdx::nullopt;
|
||||
}
|
||||
|
||||
indexed_table_select_statement::indexed_table_select_statement(schema_ptr schema, uint32_t bound_terms,
|
||||
::shared_ptr<parameters> parameters,
|
||||
::shared_ptr<selection::selection> selection,
|
||||
@@ -1219,6 +1202,7 @@ std::unique_ptr<prepared_statement> select_statement::prepare(database& db, cql_
|
||||
}
|
||||
|
||||
check_needs_filtering(restrictions);
|
||||
ensure_filtering_columns_retrieval(db, selection, restrictions);
|
||||
|
||||
::shared_ptr<cql3::statements::select_statement> stmt;
|
||||
if (restrictions->uses_secondary_indexing()) {
|
||||
@@ -1357,7 +1341,7 @@ select_statement::get_ordering_comparator(schema_ptr schema,
|
||||
}
|
||||
auto index = selection->index_of(*def);
|
||||
if (index < 0) {
|
||||
index = selection->add_column_for_ordering(*def);
|
||||
index = selection->add_column_for_post_processing(*def);
|
||||
}
|
||||
|
||||
sorters.emplace_back(index, def->type);
|
||||
@@ -1444,6 +1428,23 @@ void select_statement::check_needs_filtering(::shared_ptr<restrictions::statemen
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds columns that are needed for the purpose of filtering to the selection.
|
||||
* The columns that are added to the selection are columns that
|
||||
* are needed for filtering on the coordinator but are not part of the selection.
|
||||
* The columns are added with a meta-data indicating they are not to be returned
|
||||
* to the user.
|
||||
*/
|
||||
void select_statement::ensure_filtering_columns_retrieval(database& db,
|
||||
::shared_ptr<selection::selection> selection,
|
||||
::shared_ptr<restrictions::statement_restrictions> restrictions) {
|
||||
for (auto&& cdef : restrictions->get_column_defs_for_filtering(db)) {
|
||||
if (!selection->has_column(*cdef)) {
|
||||
selection->add_column_for_post_processing(*cdef);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool select_statement::contains_alias(::shared_ptr<column_identifier> name) {
|
||||
return std::any_of(_select_clause.begin(), _select_clause.end(), [name] (auto raw) {
|
||||
return raw->alias && *name == *raw->alias;
|
||||
|
||||
@@ -186,10 +186,6 @@ public:
|
||||
schema_ptr view_schema);
|
||||
|
||||
private:
|
||||
static stdx::optional<secondary_index::index> find_idx(database& db,
|
||||
schema_ptr schema,
|
||||
::shared_ptr<restrictions::statement_restrictions> restrictions);
|
||||
|
||||
virtual future<::shared_ptr<cql_transport::messages::result_message>> do_execute(service::storage_proxy& proxy,
|
||||
service::query_state& state, const query_options& options) override;
|
||||
|
||||
@@ -214,6 +210,9 @@ private:
|
||||
gc_clock::time_point now,
|
||||
::shared_ptr<const service::pager::paging_state> paging_state);
|
||||
|
||||
lw_shared_ptr<query::read_command>
|
||||
prepare_command_for_base_query(const query_options& options, service::query_state& state, gc_clock::time_point now, bool use_paging);
|
||||
|
||||
future<shared_ptr<cql_transport::messages::result_message>>
|
||||
execute_base_query(
|
||||
service::storage_proxy& proxy,
|
||||
|
||||
@@ -1673,14 +1673,14 @@ const db::commitlog::config& db::commitlog::active_config() const {
|
||||
// No commit_io_check needed in the log reader since the database will fail
|
||||
// on error at startup if required
|
||||
future<std::unique_ptr<subscription<temporary_buffer<char>, db::replay_position>>>
|
||||
db::commitlog::read_log_file(const sstring& filename, commit_load_reader_func next, position_type off, const db::extensions* exts) {
|
||||
db::commitlog::read_log_file(const sstring& filename, seastar::io_priority_class read_io_prio_class, commit_load_reader_func next, position_type off, const db::extensions* exts) {
|
||||
struct work {
|
||||
private:
|
||||
file_input_stream_options make_file_input_stream_options() {
|
||||
file_input_stream_options make_file_input_stream_options(seastar::io_priority_class read_io_prio_class) {
|
||||
file_input_stream_options fo;
|
||||
fo.buffer_size = db::commitlog::segment::default_size;
|
||||
fo.read_ahead = 10;
|
||||
fo.io_priority_class = service::get_local_commitlog_priority();
|
||||
fo.io_priority_class = read_io_prio_class;
|
||||
return fo;
|
||||
}
|
||||
public:
|
||||
@@ -1699,8 +1699,8 @@ db::commitlog::read_log_file(const sstring& filename, commit_load_reader_func ne
|
||||
bool header = true;
|
||||
bool failed = false;
|
||||
|
||||
work(file f, position_type o = 0)
|
||||
: f(f), fin(make_file_input_stream(f, 0, make_file_input_stream_options())), start_off(o) {
|
||||
work(file f, seastar::io_priority_class read_io_prio_class, position_type o = 0)
|
||||
: f(f), fin(make_file_input_stream(f, 0, make_file_input_stream_options(read_io_prio_class))), start_off(o) {
|
||||
}
|
||||
work(work&&) = default;
|
||||
|
||||
@@ -1918,9 +1918,9 @@ db::commitlog::read_log_file(const sstring& filename, commit_load_reader_func ne
|
||||
return fut;
|
||||
});
|
||||
|
||||
return fut.then([off, next](file f) {
|
||||
return fut.then([off, next, read_io_prio_class] (file f) {
|
||||
f = make_checked_file(commit_error_handler, std::move(f));
|
||||
auto w = make_lw_shared<work>(std::move(f), off);
|
||||
auto w = make_lw_shared<work>(std::move(f), read_io_prio_class, off);
|
||||
auto ret = w->s.listen(next);
|
||||
|
||||
w->s.started().then(std::bind(&work::read_file, w.get())).then([w] {
|
||||
|
||||
@@ -355,7 +355,7 @@ public:
|
||||
};
|
||||
|
||||
static future<std::unique_ptr<subscription<temporary_buffer<char>, replay_position>>> read_log_file(
|
||||
const sstring&, commit_load_reader_func, position_type = 0, const db::extensions* = nullptr);
|
||||
const sstring&, seastar::io_priority_class read_io_prio_class, commit_load_reader_func, position_type = 0, const db::extensions* = nullptr);
|
||||
private:
|
||||
commitlog(config);
|
||||
|
||||
|
||||
@@ -58,6 +58,7 @@
|
||||
#include "converting_mutation_partition_applier.hh"
|
||||
#include "schema_registry.hh"
|
||||
#include "commitlog_entry.hh"
|
||||
#include "service/priority_manager.hh"
|
||||
|
||||
static logging::logger rlogger("commitlog_replayer");
|
||||
|
||||
@@ -223,7 +224,7 @@ db::commitlog_replayer::impl::recover(sstring file, const sstring& fname_prefix)
|
||||
auto s = make_lw_shared<stats>();
|
||||
auto& exts = _qp.local().db().local().get_config().extensions();
|
||||
|
||||
return db::commitlog::read_log_file(file,
|
||||
return db::commitlog::read_log_file(file, service::get_local_commitlog_priority(),
|
||||
std::bind(&impl::process, this, s.get(), std::placeholders::_1,
|
||||
std::placeholders::_2), p, &exts).then([](auto s) {
|
||||
auto f = s->done();
|
||||
|
||||
@@ -453,7 +453,7 @@ public:
|
||||
"The maximum number of tombstones a query can scan before aborting." \
|
||||
) \
|
||||
/* Network timeout settings */ \
|
||||
val(range_request_timeout_in_ms, uint32_t, 10000, Unused, \
|
||||
val(range_request_timeout_in_ms, uint32_t, 10000, Used, \
|
||||
"The time in milliseconds that the coordinator waits for sequential or index scans to complete." \
|
||||
) \
|
||||
val(read_request_timeout_in_ms, uint32_t, 5000, Used, \
|
||||
@@ -472,7 +472,7 @@ public:
|
||||
"The time in milliseconds that the coordinator waits for write operations to complete.\n" \
|
||||
"Related information: About hinted handoff writes" \
|
||||
) \
|
||||
val(request_timeout_in_ms, uint32_t, 10000, Unused, \
|
||||
val(request_timeout_in_ms, uint32_t, 10000, Used, \
|
||||
"The default timeout for other, miscellaneous operations.\n" \
|
||||
"Related information: About hinted handoff writes" \
|
||||
) \
|
||||
@@ -621,7 +621,7 @@ public:
|
||||
val(thrift_framed_transport_size_in_mb, uint32_t, 15, Unused, \
|
||||
"Frame size (maximum field length) for Thrift. The frame is the row or part of the row the application is inserting." \
|
||||
) \
|
||||
val(thrift_max_message_length_in_mb, uint32_t, 16, Unused, \
|
||||
val(thrift_max_message_length_in_mb, uint32_t, 16, Used, \
|
||||
"The maximum length of a Thrift message in megabytes, including all fields and internal Thrift overhead (1 byte of overhead for each frame). Message length is usually used in conjunction with batches. A frame length greater than or equal to 24 accommodates a batch with four inserts, each of which is 24 bytes. The required message length is greater than or equal to 24+24+24+24+4 (number of frames)." \
|
||||
) \
|
||||
/* Security properties */ \
|
||||
|
||||
@@ -35,6 +35,7 @@
|
||||
#include "disk-error-handler.hh"
|
||||
#include "lister.hh"
|
||||
#include "db/timeout_clock.hh"
|
||||
#include "service/priority_manager.hh"
|
||||
|
||||
using namespace std::literals::chrono_literals;
|
||||
|
||||
@@ -95,6 +96,7 @@ future<> manager::start(shared_ptr<service::storage_proxy> proxy_ptr, shared_ptr
|
||||
return compute_hints_dir_device_id();
|
||||
}).then([this] {
|
||||
_strorage_service_anchor->register_subscriber(this);
|
||||
set_started();
|
||||
});
|
||||
}
|
||||
|
||||
@@ -105,7 +107,7 @@ future<> manager::stop() {
|
||||
_strorage_service_anchor->unregister_subscriber(this);
|
||||
}
|
||||
|
||||
_stopping = true;
|
||||
set_stopping();
|
||||
|
||||
return _draining_eps_gate.close().finally([this] {
|
||||
return parallel_for_each(_ep_managers, [] (auto& pair) {
|
||||
@@ -277,7 +279,7 @@ inline bool manager::have_ep_manager(ep_key_type ep) const noexcept {
|
||||
}
|
||||
|
||||
bool manager::store_hint(ep_key_type ep, schema_ptr s, lw_shared_ptr<const frozen_mutation> fm, tracing::trace_state_ptr tr_state) noexcept {
|
||||
if (_stopping || !can_hint_for(ep)) {
|
||||
if (stopping() || !started() || !can_hint_for(ep)) {
|
||||
manager_logger.trace("Can't store a hint to {}", ep);
|
||||
++_stats.dropped;
|
||||
return false;
|
||||
@@ -502,7 +504,7 @@ bool manager::check_dc_for(ep_key_type ep) const noexcept {
|
||||
}
|
||||
|
||||
void manager::drain_for(gms::inet_address endpoint) {
|
||||
if (_stopping) {
|
||||
if (stopping()) {
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -543,6 +545,7 @@ manager::end_point_hints_manager::sender::sender(end_point_hints_manager& parent
|
||||
, _resource_manager(_shard_manager._resource_manager)
|
||||
, _proxy(local_storage_proxy)
|
||||
, _db(local_db)
|
||||
, _hints_cpu_sched_group(_db.get_streaming_scheduling_group())
|
||||
, _gossiper(local_gossiper)
|
||||
, _file_update_mutex(_ep_manager.file_update_mutex())
|
||||
{}
|
||||
@@ -555,6 +558,7 @@ manager::end_point_hints_manager::sender::sender(const sender& other, end_point_
|
||||
, _resource_manager(_shard_manager._resource_manager)
|
||||
, _proxy(other._proxy)
|
||||
, _db(other._db)
|
||||
, _hints_cpu_sched_group(other._hints_cpu_sched_group)
|
||||
, _gossiper(other._gossiper)
|
||||
, _file_update_mutex(_ep_manager.file_update_mutex())
|
||||
{}
|
||||
@@ -610,7 +614,10 @@ manager::end_point_hints_manager::sender::clock::duration manager::end_point_hin
|
||||
}
|
||||
|
||||
void manager::end_point_hints_manager::sender::start() {
|
||||
_stopped = seastar::async([this] {
|
||||
seastar::thread_attributes attr;
|
||||
|
||||
attr.sched_group = _hints_cpu_sched_group;
|
||||
_stopped = seastar::async(std::move(attr), [this] {
|
||||
manager_logger.trace("ep_manager({})::sender: started", end_point_key());
|
||||
while (!stopping()) {
|
||||
try {
|
||||
@@ -693,7 +700,7 @@ bool manager::end_point_hints_manager::sender::send_one_file(const sstring& fnam
|
||||
lw_shared_ptr<send_one_file_ctx> ctx_ptr = make_lw_shared<send_one_file_ctx>();
|
||||
|
||||
try {
|
||||
auto s = commitlog::read_log_file(fname, [this, secs_since_file_mod, &fname, ctx_ptr] (temporary_buffer<char> buf, db::replay_position rp) mutable {
|
||||
auto s = commitlog::read_log_file(fname, service::get_local_streaming_read_priority(), [this, secs_since_file_mod, &fname, ctx_ptr] (temporary_buffer<char> buf, db::replay_position rp) mutable {
|
||||
// Check that we can still send the next hint. Don't try to send it if the destination host
|
||||
// is DOWN or if we have already failed to send some of the previous hints.
|
||||
if (!draining() && ctx_ptr->state.contains(send_state::segment_replay_failed)) {
|
||||
@@ -759,7 +766,7 @@ void manager::end_point_hints_manager::sender::send_hints_maybe() noexcept {
|
||||
int replayed_segments_count = 0;
|
||||
|
||||
try {
|
||||
while (have_segments()) {
|
||||
while (replay_allowed() && have_segments()) {
|
||||
if (!send_one_file(*_segments_to_replay.begin())) {
|
||||
break;
|
||||
}
|
||||
@@ -936,5 +943,15 @@ future<> manager::rebalance(sstring hints_directory) {
|
||||
});
|
||||
}
|
||||
|
||||
void manager::update_backlog(size_t backlog, size_t max_backlog) {
|
||||
_backlog_size = backlog;
|
||||
_max_backlog_size = max_backlog;
|
||||
if (backlog < max_backlog) {
|
||||
allow_hints();
|
||||
} else {
|
||||
forbid_hints_for_eps_with_pending_hints();
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
@@ -69,6 +69,8 @@ private:
|
||||
class drain_tag {};
|
||||
using drain = seastar::bool_class<drain_tag>;
|
||||
|
||||
friend class space_watchdog;
|
||||
|
||||
public:
|
||||
class end_point_hints_manager {
|
||||
public:
|
||||
@@ -119,6 +121,7 @@ public:
|
||||
resource_manager& _resource_manager;
|
||||
service::storage_proxy& _proxy;
|
||||
database& _db;
|
||||
seastar::scheduling_group _hints_cpu_sched_group;
|
||||
gms::gossiper& _gossiper;
|
||||
seastar::shared_mutex& _file_update_mutex;
|
||||
|
||||
@@ -179,6 +182,10 @@ public:
|
||||
return _state.contains(state::stopping);
|
||||
}
|
||||
|
||||
bool replay_allowed() const noexcept {
|
||||
return _ep_manager.replay_allowed();
|
||||
}
|
||||
|
||||
/// \brief Try to send one hint read from the file.
|
||||
/// - Limit the maximum memory size of hints "in the air" and the maximum total number of hints "in the air".
|
||||
/// - Discard the hints that are older than the grace seconds value of the corresponding table.
|
||||
@@ -328,6 +335,10 @@ public:
|
||||
return _hints_in_progress;
|
||||
}
|
||||
|
||||
bool replay_allowed() const noexcept {
|
||||
return _shard_manager.replay_allowed();
|
||||
}
|
||||
|
||||
bool can_hint() const noexcept {
|
||||
return _state.contains(state::can_hint);
|
||||
}
|
||||
@@ -393,6 +404,17 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
enum class state {
|
||||
started, // hinting is currently allowed (start() call is complete)
|
||||
replay_allowed, // replaying (hints sending) is allowed
|
||||
stopping // hinting is not allowed - stopping is in progress (stop() method has been called)
|
||||
};
|
||||
|
||||
using state_set = enum_set<super_enum<state,
|
||||
state::started,
|
||||
state::replay_allowed,
|
||||
state::stopping>>;
|
||||
|
||||
private:
|
||||
using ep_key_type = typename end_point_hints_manager::key_type;
|
||||
using ep_managers_map_type = std::unordered_map<ep_key_type, end_point_hints_manager>;
|
||||
@@ -403,6 +425,7 @@ public:
|
||||
static const std::chrono::seconds hint_file_write_timeout;
|
||||
|
||||
private:
|
||||
state_set _state;
|
||||
const boost::filesystem::path _hints_dir;
|
||||
dev_t _hints_dir_device_id = 0;
|
||||
|
||||
@@ -414,7 +437,7 @@ private:
|
||||
locator::snitch_ptr& _local_snitch_ptr;
|
||||
int64_t _max_hint_window_us = 0;
|
||||
database& _local_db;
|
||||
bool _stopping = false;
|
||||
|
||||
seastar::gate _draining_eps_gate; // gate used to control the progress of ep_managers stopping not in the context of manager::stop() call
|
||||
|
||||
resource_manager& _resource_manager;
|
||||
@@ -424,9 +447,14 @@ private:
|
||||
seastar::metrics::metric_groups _metrics;
|
||||
std::unordered_set<ep_key_type> _eps_with_pending_hints;
|
||||
|
||||
size_t _max_backlog_size;
|
||||
size_t _backlog_size;
|
||||
|
||||
public:
|
||||
manager(sstring hints_directory, std::vector<sstring> hinted_dcs, int64_t max_hint_window_ms, resource_manager&res_manager, distributed<database>& db);
|
||||
virtual ~manager();
|
||||
manager(manager&&) = delete;
|
||||
manager& operator=(manager&&) = delete;
|
||||
void register_metrics(const sstring& group_name);
|
||||
future<> start(shared_ptr<service::storage_proxy> proxy_ptr, shared_ptr<gms::gossiper> gossiper_ptr, shared_ptr<service::storage_service> ss_ptr);
|
||||
future<> stop();
|
||||
@@ -503,6 +531,18 @@ public:
|
||||
void forbid_hints();
|
||||
void forbid_hints_for_eps_with_pending_hints();
|
||||
|
||||
size_t max_backlog_size() const {
|
||||
return _max_backlog_size;
|
||||
}
|
||||
|
||||
size_t backlog_size() const {
|
||||
return _backlog_size;
|
||||
}
|
||||
|
||||
void allow_replaying() noexcept {
|
||||
_state.set(state::replay_allowed);
|
||||
}
|
||||
|
||||
/// \brief Rebalance hints segments among all present shards.
|
||||
///
|
||||
/// The difference between the number of segments on every two shard will be not greater than 1 after the
|
||||
@@ -616,6 +656,28 @@ private:
|
||||
/// \param endpoint node that left the cluster
|
||||
void drain_for(gms::inet_address endpoint);
|
||||
|
||||
void update_backlog(size_t backlog, size_t max_backlog);
|
||||
|
||||
bool stopping() const noexcept {
|
||||
return _state.contains(state::stopping);
|
||||
}
|
||||
|
||||
void set_stopping() noexcept {
|
||||
_state.set(state::stopping);
|
||||
}
|
||||
|
||||
bool started() const noexcept {
|
||||
return _state.contains(state::started);
|
||||
}
|
||||
|
||||
void set_started() noexcept {
|
||||
_state.set(state::started);
|
||||
}
|
||||
|
||||
bool replay_allowed() const noexcept {
|
||||
return _state.contains(state::replay_allowed);
|
||||
}
|
||||
|
||||
public:
|
||||
ep_managers_map_type::iterator find_ep_manager(ep_key_type ep_key) noexcept {
|
||||
return _ep_managers.find(ep_key);
|
||||
|
||||
@@ -27,6 +27,7 @@
|
||||
#include "lister.hh"
|
||||
#include "disk-error-handler.hh"
|
||||
#include "seastarx.hh"
|
||||
#include <seastar/core/sleep.hh>
|
||||
|
||||
namespace db {
|
||||
namespace hints {
|
||||
@@ -65,19 +66,28 @@ const std::chrono::seconds space_watchdog::_watchdog_period = std::chrono::secon
|
||||
space_watchdog::space_watchdog(shard_managers_set& managers, per_device_limits_map& per_device_limits_map)
|
||||
: _shard_managers(managers)
|
||||
, _per_device_limits_map(per_device_limits_map)
|
||||
, _timer([this] { on_timer(); })
|
||||
{}
|
||||
|
||||
void space_watchdog::start() {
|
||||
_timer.arm(timer_clock_type::now());
|
||||
_started = seastar::async([this] {
|
||||
while (!_as.abort_requested()) {
|
||||
try {
|
||||
on_timer();
|
||||
} catch (...) {
|
||||
resource_manager_logger.trace("space_watchdog: unexpected exception - stop all hints generators");
|
||||
// Stop all hint generators if space_watchdog callback failed
|
||||
for (manager& shard_manager : _shard_managers) {
|
||||
shard_manager.forbid_hints();
|
||||
}
|
||||
}
|
||||
seastar::sleep_abortable(_watchdog_period, _as).get();
|
||||
}
|
||||
}).handle_exception_type([] (const seastar::sleep_aborted& ignored) { });
|
||||
}
|
||||
|
||||
future<> space_watchdog::stop() noexcept {
|
||||
try {
|
||||
return _gate.close().finally([this] { _timer.cancel(); });
|
||||
} catch (...) {
|
||||
return make_exception_future<>(std::current_exception());
|
||||
}
|
||||
_as.request_abort();
|
||||
return std::move(_started);
|
||||
}
|
||||
|
||||
future<> space_watchdog::scan_one_ep_dir(boost::filesystem::path path, manager& shard_manager, ep_key_type ep_key) {
|
||||
@@ -94,83 +104,62 @@ future<> space_watchdog::scan_one_ep_dir(boost::filesystem::path path, manager&
|
||||
});
|
||||
}
|
||||
|
||||
// Called from the context of a seastar::thread.
|
||||
void space_watchdog::on_timer() {
|
||||
with_gate(_gate, [this] {
|
||||
return futurize_apply([this] {
|
||||
_total_size = 0;
|
||||
// The hints directories are organized as follows:
|
||||
// <hints root>
|
||||
// |- <shard1 ID>
|
||||
// | |- <EP1 address>
|
||||
// | |- <hints file1>
|
||||
// | |- <hints file2>
|
||||
// | |- ...
|
||||
// | |- <EP2 address>
|
||||
// | |- ...
|
||||
// | |-...
|
||||
// |- <shard2 ID>
|
||||
// | |- ...
|
||||
// ...
|
||||
// |- <shardN ID>
|
||||
// | |- ...
|
||||
//
|
||||
|
||||
return do_for_each(_shard_managers, [this] (manager& shard_manager) {
|
||||
shard_manager.clear_eps_with_pending_hints();
|
||||
|
||||
// The hints directories are organized as follows:
|
||||
// <hints root>
|
||||
// |- <shard1 ID>
|
||||
// | |- <EP1 address>
|
||||
// | |- <hints file1>
|
||||
// | |- <hints file2>
|
||||
// | |- ...
|
||||
// | |- <EP2 address>
|
||||
// | |- ...
|
||||
// | |-...
|
||||
// |- <shard2 ID>
|
||||
// | |- ...
|
||||
// ...
|
||||
// |- <shardN ID>
|
||||
// | |- ...
|
||||
for (auto& per_device_limits : _per_device_limits_map | boost::adaptors::map_values) {
|
||||
_total_size = 0;
|
||||
for (manager& shard_manager : per_device_limits.managers) {
|
||||
shard_manager.clear_eps_with_pending_hints();
|
||||
lister::scan_dir(shard_manager.hints_dir(), {directory_entry_type::directory}, [this, &shard_manager] (lister::path dir, directory_entry de) {
|
||||
_files_count = 0;
|
||||
// Let's scan per-end-point directories and enumerate hints files...
|
||||
//
|
||||
return lister::scan_dir(shard_manager.hints_dir(), {directory_entry_type::directory}, [this, &shard_manager] (lister::path dir, directory_entry de) {
|
||||
_files_count = 0;
|
||||
// Let's scan per-end-point directories and enumerate hints files...
|
||||
//
|
||||
// Let's check if there is a corresponding end point manager (may not exist if the corresponding DC is
|
||||
// not hintable).
|
||||
// If exists - let's take a file update lock so that files are not changed under our feet. Otherwise, simply
|
||||
// continue to enumeration - there is no one to change them.
|
||||
auto it = shard_manager.find_ep_manager(de.name);
|
||||
if (it != shard_manager.ep_managers_end()) {
|
||||
return with_lock(it->second.file_update_mutex(), [this, &shard_manager, dir = std::move(dir), ep_name = std::move(de.name)]() mutable {
|
||||
return scan_one_ep_dir(dir / ep_name.c_str(), shard_manager, ep_key_type(ep_name));
|
||||
});
|
||||
} else {
|
||||
return scan_one_ep_dir(dir / de.name.c_str(), shard_manager, ep_key_type(de.name));
|
||||
}
|
||||
});
|
||||
}).then([this] {
|
||||
return do_for_each(_per_device_limits_map, [this](per_device_limits_map::value_type& per_device_limits_entry) {
|
||||
space_watchdog::per_device_limits& per_device_limits = per_device_limits_entry.second;
|
||||
|
||||
size_t adjusted_quota = 0;
|
||||
size_t delta = boost::accumulate(per_device_limits.managers, 0, [] (size_t sum, manager& shard_manager) {
|
||||
return sum + shard_manager.ep_managers_size() * resource_manager::hint_segment_size_in_mb * 1024 * 1024;
|
||||
// Let's check if there is a corresponding end point manager (may not exist if the corresponding DC is
|
||||
// not hintable).
|
||||
// If exists - let's take a file update lock so that files are not changed under our feet. Otherwise, simply
|
||||
// continue to enumeration - there is no one to change them.
|
||||
auto it = shard_manager.find_ep_manager(de.name);
|
||||
if (it != shard_manager.ep_managers_end()) {
|
||||
return with_lock(it->second.file_update_mutex(), [this, &shard_manager, dir = std::move(dir), ep_name = std::move(de.name)]() mutable {
|
||||
return scan_one_ep_dir(dir / ep_name.c_str(), shard_manager, ep_key_type(ep_name));
|
||||
});
|
||||
if (per_device_limits.max_shard_disk_space_size > delta) {
|
||||
adjusted_quota = per_device_limits.max_shard_disk_space_size - delta;
|
||||
}
|
||||
} else {
|
||||
return scan_one_ep_dir(dir / de.name.c_str(), shard_manager, ep_key_type(de.name));
|
||||
}
|
||||
}).get();
|
||||
}
|
||||
|
||||
bool can_hint = _total_size < adjusted_quota;
|
||||
resource_manager_logger.trace("space_watchdog: total_size ({}) {} max_shard_disk_space_size ({})", _total_size, can_hint ? "<" : ">=", adjusted_quota);
|
||||
|
||||
if (!can_hint) {
|
||||
for (manager& shard_manager : per_device_limits.managers) {
|
||||
shard_manager.forbid_hints_for_eps_with_pending_hints();
|
||||
}
|
||||
} else {
|
||||
for (manager& shard_manager : per_device_limits.managers) {
|
||||
shard_manager.allow_hints();
|
||||
}
|
||||
}
|
||||
});
|
||||
});
|
||||
}).handle_exception([this] (auto eptr) {
|
||||
resource_manager_logger.trace("space_watchdog: unexpected exception - stop all hints generators");
|
||||
// Stop all hint generators if space_watchdog callback failed
|
||||
for (manager& shard_manager : _shard_managers) {
|
||||
shard_manager.forbid_hints();
|
||||
}
|
||||
}).finally([this] {
|
||||
_timer.arm(_watchdog_period);
|
||||
// Adjust the quota to take into account the space we guarantee to every end point manager
|
||||
size_t adjusted_quota = 0;
|
||||
size_t delta = boost::accumulate(per_device_limits.managers, 0, [] (size_t sum, manager& shard_manager) {
|
||||
return sum + shard_manager.ep_managers_size() * resource_manager::hint_segment_size_in_mb * 1024 * 1024;
|
||||
});
|
||||
});
|
||||
if (per_device_limits.max_shard_disk_space_size > delta) {
|
||||
adjusted_quota = per_device_limits.max_shard_disk_space_size - delta;
|
||||
}
|
||||
|
||||
resource_manager_logger.trace("space_watchdog: consuming {}/{} bytes", _total_size, adjusted_quota);
|
||||
for (manager& shard_manager : per_device_limits.managers) {
|
||||
shard_manager.update_backlog(_total_size, adjusted_quota);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
future<> resource_manager::start(shared_ptr<service::storage_proxy> proxy_ptr, shared_ptr<gms::gossiper> gossiper_ptr, shared_ptr<service::storage_service> ss_ptr) {
|
||||
@@ -183,6 +172,10 @@ future<> resource_manager::start(shared_ptr<service::storage_proxy> proxy_ptr, s
|
||||
});
|
||||
}
|
||||
|
||||
void resource_manager::allow_replaying() noexcept {
|
||||
boost::for_each(_shard_managers, [] (manager& m) { m.allow_replaying(); });
|
||||
}
|
||||
|
||||
future<> resource_manager::stop() noexcept {
|
||||
return parallel_for_each(_shard_managers, [](manager& m) {
|
||||
return m.stop();
|
||||
@@ -201,14 +194,18 @@ future<> resource_manager::prepare_per_device_limits() {
|
||||
auto it = _per_device_limits_map.find(device_id);
|
||||
if (it == _per_device_limits_map.end()) {
|
||||
return is_mountpoint(shard_manager.hints_dir().parent_path()).then([this, device_id, &shard_manager](bool is_mountpoint) {
|
||||
// By default, give each group of managers 10% of the available disk space. Give each shard an equal share of the available space.
|
||||
size_t max_size = boost::filesystem::space(shard_manager.hints_dir().c_str()).capacity / (10 * smp::count);
|
||||
// If hints directory is a mountpoint, we assume it's on dedicated (i.e. not shared with data/commitlog/etc) storage.
|
||||
// Then, reserve 90% of all space instead of 10% above.
|
||||
if (is_mountpoint) {
|
||||
max_size *= 9;
|
||||
auto [it, inserted] = _per_device_limits_map.emplace(device_id, space_watchdog::per_device_limits{});
|
||||
// Since we possibly deferred, we need to recheck the _per_device_limits_map.
|
||||
if (inserted) {
|
||||
// By default, give each group of managers 10% of the available disk space. Give each shard an equal share of the available space.
|
||||
it->second.max_shard_disk_space_size = boost::filesystem::space(shard_manager.hints_dir().c_str()).capacity / (10 * smp::count);
|
||||
// If hints directory is a mountpoint, we assume it's on dedicated (i.e. not shared with data/commitlog/etc) storage.
|
||||
// Then, reserve 90% of all space instead of 10% above.
|
||||
if (is_mountpoint) {
|
||||
it->second.max_shard_disk_space_size *= 9;
|
||||
}
|
||||
}
|
||||
_per_device_limits_map.emplace(device_id, space_watchdog::per_device_limits{{std::ref(shard_manager)}, max_size});
|
||||
it->second.managers.emplace_back(std::ref(shard_manager));
|
||||
});
|
||||
} else {
|
||||
it->second.managers.emplace_back(std::ref(shard_manager));
|
||||
|
||||
@@ -22,6 +22,7 @@
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
#include <seastar/core/abort_source.hh>
|
||||
#include <seastar/core/semaphore.hh>
|
||||
#include <seastar/core/gate.hh>
|
||||
#include <seastar/core/memory.hh>
|
||||
@@ -78,8 +79,8 @@ private:
|
||||
shard_managers_set& _shard_managers;
|
||||
per_device_limits_map& _per_device_limits_map;
|
||||
|
||||
seastar::gate _gate;
|
||||
seastar::timer<timer_clock_type> _timer;
|
||||
future<> _started = make_ready_future<>();
|
||||
seastar::abort_source _as;
|
||||
int _files_count = 0;
|
||||
|
||||
public:
|
||||
@@ -137,6 +138,9 @@ public:
|
||||
, _space_watchdog(_shard_managers, _per_device_limits_map)
|
||||
{}
|
||||
|
||||
resource_manager(resource_manager&&) = delete;
|
||||
resource_manager& operator=(resource_manager&&) = delete;
|
||||
|
||||
future<semaphore_units<semaphore_default_exception_factory>> get_send_units_for(size_t buf_size);
|
||||
|
||||
bool too_many_hints_in_progress() const {
|
||||
@@ -156,6 +160,7 @@ public:
|
||||
}
|
||||
|
||||
future<> start(shared_ptr<service::storage_proxy> proxy_ptr, shared_ptr<gms::gossiper> gossiper_ptr, shared_ptr<service::storage_service> ss_ptr);
|
||||
void allow_replaying() noexcept;
|
||||
future<> stop() noexcept;
|
||||
void register_manager(manager& m);
|
||||
future<> prepare_per_device_limits();
|
||||
|
||||
@@ -1226,6 +1226,20 @@ future<> view_builder::calculate_shard_build_step(
|
||||
}
|
||||
}
|
||||
|
||||
// All shards need to arrive at the same decisions on whether or not to
|
||||
// restart a view build at some common token (reshard), and which token
|
||||
// to restart at. So we need to wait until all shards have read the view
|
||||
// build statuses before they can all proceed to make the (same) decision.
|
||||
// If we don't synchronoize here, a fast shard may make a decision, start
|
||||
// building and finish a build step - before the slowest shard even read
|
||||
// the view build information.
|
||||
container().invoke_on(0, [] (view_builder& builder) {
|
||||
if (++builder._shards_finished_read == smp::count) {
|
||||
builder._shards_finished_read_promise.set_value();
|
||||
}
|
||||
return builder._shards_finished_read_promise.get_shared_future();
|
||||
}).get();
|
||||
|
||||
std::unordered_set<utils::UUID> loaded_views;
|
||||
if (view_build_status_per_shard.size() != smp::count) {
|
||||
reshard(std::move(view_build_status_per_shard), loaded_views);
|
||||
|
||||
@@ -151,6 +151,10 @@ class view_builder final : public service::migration_listener::only_view_notific
|
||||
future<> _started = make_ready_future<>();
|
||||
// Used to coordinate between shards the conclusion of the build process for a particular view.
|
||||
std::unordered_set<utils::UUID> _built_views;
|
||||
// Counter and promise (both on shard 0 only!) allowing to wait for all
|
||||
// shards to have read the view build statuses
|
||||
unsigned _shards_finished_read = 0;
|
||||
seastar::shared_promise<> _shards_finished_read_promise;
|
||||
// Used for testing.
|
||||
std::unordered_map<std::pair<sstring, sstring>, seastar::shared_promise<>, utils::tuple_hash> _build_notifiers;
|
||||
|
||||
|
||||
2
dist/common/sysctl.d/99-scylla-aio.conf
vendored
Normal file
2
dist/common/sysctl.d/99-scylla-aio.conf
vendored
Normal file
@@ -0,0 +1,2 @@
|
||||
# Raise max AIO events
|
||||
fs.aio-max-nr = 1048576
|
||||
@@ -1 +1,2 @@
|
||||
dist/common/sysctl.d/99-scylla-sched.conf /etc/sysctl.d
|
||||
dist/common/sysctl.d/99-scylla-aio.conf /etc/sysctl.d
|
||||
|
||||
@@ -9,6 +9,7 @@ if [[ $KVER =~ 3\.13\.0\-([0-9]+)-generic ]]; then
|
||||
else
|
||||
# expect failures in virtualized environments
|
||||
sysctl -p/etc/sysctl.d/99-scylla-sched.conf || :
|
||||
sysctl -p/etc/sysctl.d/99-scylla-aio.conf || :
|
||||
fi
|
||||
|
||||
#DEBHELPER#
|
||||
|
||||
1
dist/redhat/scylla.spec.mustache
vendored
1
dist/redhat/scylla.spec.mustache
vendored
@@ -283,6 +283,7 @@ if Scylla is the main application on your server and you wish to optimize its la
|
||||
# We cannot use the sysctl_apply rpm macro because it is not present in 7.0
|
||||
# following is a "manual" expansion
|
||||
/usr/lib/systemd/systemd-sysctl 99-scylla-sched.conf >/dev/null 2>&1 || :
|
||||
/usr/lib/systemd/systemd-sysctl 99-scylla-aio.conf >/dev/null 2>&1 || :
|
||||
|
||||
%files kernel-conf
|
||||
%defattr(-,root,root)
|
||||
|
||||
@@ -129,26 +129,8 @@ public:
|
||||
update_is_normal();
|
||||
}
|
||||
|
||||
void apply_application_state(application_state key, versioned_value&& value) {
|
||||
auto&& e = _application_state[key];
|
||||
if (e.version < value.version) {
|
||||
e = std::move(value);
|
||||
}
|
||||
update_is_normal();
|
||||
}
|
||||
|
||||
void apply_application_state(application_state key, const versioned_value& value) {
|
||||
auto&& e = _application_state[key];
|
||||
if (e.version < value.version) {
|
||||
e = value;
|
||||
}
|
||||
update_is_normal();
|
||||
}
|
||||
|
||||
void apply_application_state(const endpoint_state& es) {
|
||||
for (auto&& e : es._application_state) {
|
||||
apply_application_state(e.first, e.second);
|
||||
}
|
||||
void add_application_state(const endpoint_state& es) {
|
||||
_application_state = es._application_state;
|
||||
update_is_normal();
|
||||
}
|
||||
|
||||
|
||||
@@ -930,7 +930,7 @@ void gossiper::make_random_gossip_digest(utils::chunked_vector<gossip_digest>& g
|
||||
future<> gossiper::replicate(inet_address ep, const endpoint_state& es) {
|
||||
return container().invoke_on_all([ep, es, orig = engine().cpu_id(), self = shared_from_this()] (gossiper& g) {
|
||||
if (engine().cpu_id() != orig) {
|
||||
g.endpoint_state_map[ep].apply_application_state(es);
|
||||
g.endpoint_state_map[ep].add_application_state(es);
|
||||
}
|
||||
});
|
||||
}
|
||||
@@ -939,7 +939,7 @@ future<> gossiper::replicate(inet_address ep, const std::map<application_state,
|
||||
return container().invoke_on_all([ep, &src, &changed, orig = engine().cpu_id(), self = shared_from_this()] (gossiper& g) {
|
||||
if (engine().cpu_id() != orig) {
|
||||
for (auto&& key : changed) {
|
||||
g.endpoint_state_map[ep].apply_application_state(key, src.at(key));
|
||||
g.endpoint_state_map[ep].add_application_state(key, src.at(key));
|
||||
}
|
||||
}
|
||||
});
|
||||
@@ -948,7 +948,7 @@ future<> gossiper::replicate(inet_address ep, const std::map<application_state,
|
||||
future<> gossiper::replicate(inet_address ep, application_state key, const versioned_value& value) {
|
||||
return container().invoke_on_all([ep, key, &value, orig = engine().cpu_id(), self = shared_from_this()] (gossiper& g) {
|
||||
if (engine().cpu_id() != orig) {
|
||||
g.endpoint_state_map[ep].apply_application_state(key, value);
|
||||
g.endpoint_state_map[ep].add_application_state(key, value);
|
||||
}
|
||||
});
|
||||
}
|
||||
@@ -1175,11 +1175,13 @@ stdx::optional<endpoint_state> gossiper::get_endpoint_state_for_endpoint(inet_ad
|
||||
}
|
||||
}
|
||||
|
||||
void gossiper::reset_endpoint_state_map() {
|
||||
endpoint_state_map.clear();
|
||||
future<> gossiper::reset_endpoint_state_map() {
|
||||
_unreachable_endpoints.clear();
|
||||
_live_endpoints.clear();
|
||||
_live_endpoints_just_added.clear();
|
||||
return container().invoke_on_all([] (gossiper& g) {
|
||||
g.endpoint_state_map.clear();
|
||||
});
|
||||
}
|
||||
|
||||
std::unordered_map<inet_address, endpoint_state>& gms::gossiper::get_endpoint_states() {
|
||||
@@ -1662,6 +1664,7 @@ void gossiper::maybe_initialize_local_state(int generation_nbr) {
|
||||
}
|
||||
}
|
||||
|
||||
// Runs inside seastar::async context
|
||||
void gossiper::add_saved_endpoint(inet_address ep) {
|
||||
if (ep == get_broadcast_address()) {
|
||||
logger.debug("Attempt to add self as saved endpoint");
|
||||
@@ -1687,6 +1690,7 @@ void gossiper::add_saved_endpoint(inet_address ep) {
|
||||
}
|
||||
ep_state.mark_dead();
|
||||
endpoint_state_map[ep] = ep_state;
|
||||
replicate(ep, ep_state).get();
|
||||
_unreachable_endpoints[ep] = now();
|
||||
logger.trace("Adding saved endpoint {} {}", ep, ep_state.get_heart_beat_state().get_generation());
|
||||
}
|
||||
@@ -1924,6 +1928,7 @@ void gossiper::mark_as_shutdown(const inet_address& endpoint) {
|
||||
auto& ep_state = *es;
|
||||
ep_state.add_application_state(application_state::STATUS, storage_service_value_factory().shutdown(true));
|
||||
ep_state.get_heart_beat_state().force_highest_possible_version_unsafe();
|
||||
replicate(endpoint, ep_state).get();
|
||||
mark_dead(endpoint, ep_state);
|
||||
get_local_failure_detector().force_conviction(endpoint);
|
||||
}
|
||||
|
||||
@@ -417,7 +417,7 @@ public:
|
||||
stdx::optional<endpoint_state> get_endpoint_state_for_endpoint(inet_address ep) const;
|
||||
|
||||
// removes ALL endpoint states; should only be called after shadow gossip
|
||||
void reset_endpoint_state_map();
|
||||
future<> reset_endpoint_state_map();
|
||||
|
||||
std::unordered_map<inet_address, endpoint_state>& get_endpoint_states();
|
||||
|
||||
|
||||
@@ -119,9 +119,17 @@ insert_token_range_to_sorted_container_while_unwrapping(
|
||||
const dht::token& tok,
|
||||
dht::token_range_vector& ret) {
|
||||
if (prev_tok < tok) {
|
||||
ret.emplace_back(
|
||||
dht::token_range::bound(prev_tok, false),
|
||||
dht::token_range::bound(tok, true));
|
||||
auto pos = ret.end();
|
||||
if (!ret.empty() && !std::prev(pos)->end()) {
|
||||
// We inserted a wrapped range (a, b] previously as
|
||||
// (-inf, b], (a, +inf). So now we insert in the next-to-last
|
||||
// position to keep the last range (a, +inf) at the end.
|
||||
pos = std::prev(pos);
|
||||
}
|
||||
ret.insert(pos,
|
||||
dht::token_range{
|
||||
dht::token_range::bound(prev_tok, false),
|
||||
dht::token_range::bound(tok, true)});
|
||||
} else {
|
||||
ret.emplace_back(
|
||||
dht::token_range::bound(prev_tok, false),
|
||||
|
||||
20
main.cc
20
main.cc
@@ -703,6 +703,17 @@ int main(int ac, char** av) {
|
||||
supervisor::notify("starting streaming service");
|
||||
streaming::stream_session::init_streaming_service(db).get();
|
||||
api::set_server_stream_manager(ctx).get();
|
||||
|
||||
supervisor::notify("starting hinted handoff manager");
|
||||
if (hinted_handoff_enabled) {
|
||||
db::hints::manager::rebalance(cfg->hints_directory()).get();
|
||||
}
|
||||
db::hints::manager::rebalance(cfg->data_file_directories()[0] + "/view_pending_updates").get();
|
||||
|
||||
proxy.invoke_on_all([] (service::storage_proxy& local_proxy) {
|
||||
local_proxy.start_hints_manager(gms::get_local_gossiper().shared_from_this(), service::get_local_storage_service().shared_from_this());
|
||||
}).get();
|
||||
|
||||
supervisor::notify("starting messaging service");
|
||||
// Start handling REPAIR_CHECKSUM_RANGE messages
|
||||
netw::get_messaging_service().invoke_on_all([&db] (auto& ms) {
|
||||
@@ -739,14 +750,9 @@ int main(int ac, char** av) {
|
||||
gms::get_local_gossiper().wait_for_gossip_to_settle().get();
|
||||
api::set_server_gossip_settle(ctx).get();
|
||||
|
||||
supervisor::notify("starting hinted handoff manager");
|
||||
if (hinted_handoff_enabled) {
|
||||
db::hints::manager::rebalance(cfg->hints_directory()).get();
|
||||
}
|
||||
db::hints::manager::rebalance(cfg->data_file_directories()[0] + "/view_pending_updates").get();
|
||||
|
||||
supervisor::notify("allow replaying hints");
|
||||
proxy.invoke_on_all([] (service::storage_proxy& local_proxy) {
|
||||
local_proxy.start_hints_manager(gms::get_local_gossiper().shared_from_this(), service::get_local_storage_service().shared_from_this());
|
||||
local_proxy.allow_replaying_hints();
|
||||
}).get();
|
||||
|
||||
static sharded<db::view::view_builder> view_builder;
|
||||
|
||||
@@ -214,7 +214,9 @@ private:
|
||||
|
||||
void update(const schema& s, const deletable_row& dr) {
|
||||
update(dr.marker());
|
||||
update(dr.deleted_at().tomb());
|
||||
row_tombstone row_tomb = dr.deleted_at();
|
||||
update(row_tomb.regular());
|
||||
update(row_tomb.tomb());
|
||||
update(s, dr.cells(), column_kind::regular_column);
|
||||
}
|
||||
|
||||
|
||||
@@ -135,12 +135,14 @@ struct messaging_service::rpc_protocol_wrapper : public rpc_protocol { using rpc
|
||||
// This should be integrated into messaging_service proper.
|
||||
class messaging_service::rpc_protocol_client_wrapper {
|
||||
std::unique_ptr<rpc_protocol::client> _p;
|
||||
::shared_ptr<seastar::tls::server_credentials> _credentials;
|
||||
public:
|
||||
rpc_protocol_client_wrapper(rpc_protocol& proto, rpc::client_options opts, ipv4_addr addr, ipv4_addr local = ipv4_addr())
|
||||
: _p(std::make_unique<rpc_protocol::client>(proto, std::move(opts), addr, local)) {
|
||||
}
|
||||
rpc_protocol_client_wrapper(rpc_protocol& proto, rpc::client_options opts, ipv4_addr addr, ipv4_addr local, ::shared_ptr<seastar::tls::server_credentials> c)
|
||||
: _p(std::make_unique<rpc_protocol::client>(proto, std::move(opts), seastar::tls::socket(c), addr, local))
|
||||
, _credentials(c)
|
||||
{}
|
||||
auto get_stats() const { return _p->get_stats(); }
|
||||
future<> stop() { return _p->stop(); }
|
||||
@@ -148,6 +150,19 @@ public:
|
||||
return _p->error();
|
||||
}
|
||||
operator rpc_protocol::client&() { return *_p; }
|
||||
|
||||
/**
|
||||
* #3787 Must ensure we use the right type of socker. I.e. tls or not.
|
||||
* See above, we retain credentials object so we here can know if we
|
||||
* are tls or not.
|
||||
*/
|
||||
template<typename Serializer, typename... Out>
|
||||
future<rpc::sink<Out...>> make_stream_sink() {
|
||||
if (_credentials) {
|
||||
return _p->make_stream_sink<Serializer, Out...>(seastar::tls::socket(_credentials));
|
||||
}
|
||||
return _p->make_stream_sink<Serializer, Out...>();
|
||||
}
|
||||
};
|
||||
|
||||
struct messaging_service::rpc_protocol_server_wrapper : public rpc_protocol::server { using rpc_protocol::server::server; };
|
||||
@@ -639,8 +654,9 @@ rpc::sink<int32_t> messaging_service::make_sink_for_stream_mutation_fragments(rp
|
||||
|
||||
future<rpc::sink<frozen_mutation_fragment>, rpc::source<int32_t>>
|
||||
messaging_service::make_sink_and_source_for_stream_mutation_fragments(utils::UUID schema_id, utils::UUID plan_id, utils::UUID cf_id, uint64_t estimated_partitions, msg_addr id) {
|
||||
rpc_protocol::client& rpc_client = *get_rpc_client(messaging_verb::STREAM_MUTATION_FRAGMENTS, id);
|
||||
return rpc_client.make_stream_sink<netw::serializer, frozen_mutation_fragment>().then([this, plan_id, schema_id, cf_id, estimated_partitions, &rpc_client] (rpc::sink<frozen_mutation_fragment> sink) mutable {
|
||||
auto wrapper = get_rpc_client(messaging_verb::STREAM_MUTATION_FRAGMENTS, id);
|
||||
rpc_protocol::client& rpc_client = *wrapper;
|
||||
return wrapper->make_stream_sink<netw::serializer, frozen_mutation_fragment>().then([this, plan_id, schema_id, cf_id, estimated_partitions, &rpc_client] (rpc::sink<frozen_mutation_fragment> sink) mutable {
|
||||
auto rpc_handler = rpc()->make_client<rpc::source<int32_t> (utils::UUID, utils::UUID, utils::UUID, uint64_t, rpc::sink<frozen_mutation_fragment>)>(messaging_verb::STREAM_MUTATION_FRAGMENTS);
|
||||
return rpc_handler(rpc_client , plan_id, schema_id, cf_id, estimated_partitions, sink).then([sink] (rpc::source<int32_t> source) mutable {
|
||||
return make_ready_future<rpc::sink<frozen_mutation_fragment>, rpc::source<int32_t>>(std::move(sink), std::move(source));
|
||||
|
||||
@@ -470,6 +470,9 @@ read_context::ready_to_save_state* read_context::prepare_reader_for_saving(
|
||||
if (stopped_reader_fut.failed()) {
|
||||
mmq_log.debug("Failed to stop reader on shard {}: {}", shard, stopped_reader_fut.get_exception());
|
||||
++_db.local().get_stats().multishard_query_failed_reader_stops;
|
||||
// We don't want to leave the reader in dismantling state, lest stop()
|
||||
// will try to wait on the reader_fut again and crash the application.
|
||||
rs = {};
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
@@ -609,9 +612,17 @@ future<> read_context::save_readers(circular_buffer<mutation_fragment> unconsume
|
||||
}
|
||||
|
||||
if (auto* maybe_future_dismantling_state = std::get_if<future_dismantling_state>(&rs)) {
|
||||
return maybe_future_dismantling_state->fut.then([this, &rs,
|
||||
finish_saving = std::move(finish_saving)] (dismantling_state&& next_state) mutable {
|
||||
rs = std::move(next_state);
|
||||
return maybe_future_dismantling_state->fut.then_wrapped([this, &rs,
|
||||
finish_saving = std::move(finish_saving)] (future<dismantling_state>&& next_state_fut) mutable {
|
||||
if (next_state_fut.failed()) {
|
||||
mmq_log.debug("Failed to stop reader: {}", next_state_fut.get_exception());
|
||||
++_db.local().get_stats().multishard_query_failed_reader_stops;
|
||||
// We don't want to leave the reader in future dismantling state, lest
|
||||
// stop() will try to wait on the fut again and crash the application.
|
||||
rs = {};
|
||||
return make_ready_future<>();
|
||||
}
|
||||
rs = next_state_fut.get0();
|
||||
return finish_saving(std::get<dismantling_state>(rs));
|
||||
});
|
||||
}
|
||||
|
||||
15
schema.cc
15
schema.cc
@@ -1121,6 +1121,21 @@ schema::has_static_columns() const {
|
||||
return !static_columns().empty();
|
||||
}
|
||||
|
||||
column_count_type
|
||||
schema::columns_count(column_kind kind) const {
|
||||
switch (kind) {
|
||||
case column_kind::partition_key:
|
||||
return partition_key_size();
|
||||
case column_kind::clustering_key:
|
||||
return clustering_key_size();
|
||||
case column_kind::static_column:
|
||||
return static_columns_count();
|
||||
case column_kind::regular_column:
|
||||
return regular_columns_count();
|
||||
default:
|
||||
std::abort();
|
||||
}
|
||||
}
|
||||
column_count_type
|
||||
schema::partition_key_size() const {
|
||||
return column_offset(column_kind::clustering_key);
|
||||
|
||||
@@ -701,6 +701,7 @@ public:
|
||||
bool is_last_partition_key(const column_definition& def) const;
|
||||
bool has_multi_cell_collections() const;
|
||||
bool has_static_columns() const;
|
||||
column_count_type columns_count(column_kind kind) const;
|
||||
column_count_type partition_key_size() const;
|
||||
column_count_type clustering_key_size() const;
|
||||
column_count_type static_columns_count() const;
|
||||
|
||||
2
seastar
2
seastar
Submodule seastar updated: 57128167aa...39b89de259
@@ -379,7 +379,7 @@ public:
|
||||
}
|
||||
|
||||
::shared_ptr<const paging_state> query_pager::state() const {
|
||||
return ::make_shared<paging_state>(*_last_pkey, _last_ckey, _exhausted ? 0 : _max, _cmd->query_uuid, _last_replicas, _query_read_repair_decision);
|
||||
return ::make_shared<paging_state>(_last_pkey.value_or(partition_key::make_empty()), _last_ckey, _exhausted ? 0 : _max, _cmd->query_uuid, _last_replicas, _query_read_repair_decision);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -3074,8 +3074,9 @@ storage_proxy::query_result_local(schema_ptr s, lw_shared_ptr<query::read_comman
|
||||
unsigned shard = _db.local().shard_of(pr.start()->value().token());
|
||||
_stats.replica_cross_shard_ops += shard != engine().cpu_id();
|
||||
return _db.invoke_on(shard, [max_size, gs = global_schema_ptr(s), prv = dht::partition_range_vector({pr}) /* FIXME: pr is copied */, cmd, opts, timeout, gt = tracing::global_trace_state_ptr(std::move(trace_state))] (database& db) mutable {
|
||||
tracing::trace(gt, "Start querying the token range that starts with {}", seastar::value_of([&prv] { return prv.begin()->start()->value().token(); }));
|
||||
return db.query(gs, *cmd, opts, prv, gt, max_size, timeout).then([trace_state = gt.get()](auto&& f, cache_temperature ht) {
|
||||
auto trace_state = gt.get();
|
||||
tracing::trace(trace_state, "Start querying the token range that starts with {}", seastar::value_of([&prv] { return prv.begin()->start()->value().token(); }));
|
||||
return db.query(gs, *cmd, opts, prv, trace_state, max_size, timeout).then([trace_state](auto&& f, cache_temperature ht) {
|
||||
tracing::trace(trace_state, "Querying is done");
|
||||
return make_ready_future<foreign_ptr<lw_shared_ptr<query::result>>, cache_temperature>(make_foreign(std::move(f)), ht);
|
||||
});
|
||||
@@ -4218,6 +4219,10 @@ future<> storage_proxy::start_hints_manager(shared_ptr<gms::gossiper> gossiper_p
|
||||
return _hints_resource_manager.start(shared_from_this(), gossiper_ptr, ss_ptr);
|
||||
}
|
||||
|
||||
void storage_proxy::allow_replaying_hints() noexcept {
|
||||
return _hints_resource_manager.allow_replaying();
|
||||
}
|
||||
|
||||
future<> storage_proxy::stop_hints_manager() {
|
||||
return _hints_resource_manager.stop();
|
||||
}
|
||||
|
||||
@@ -390,6 +390,7 @@ public:
|
||||
future<> stop();
|
||||
future<> stop_hints_manager();
|
||||
future<> start_hints_manager(shared_ptr<gms::gossiper> gossiper_ptr, shared_ptr<service::storage_service> ss_ptr);
|
||||
void allow_replaying_hints() noexcept;
|
||||
|
||||
const stats& get_stats() const {
|
||||
return _stats;
|
||||
|
||||
@@ -353,7 +353,7 @@ void storage_service::prepare_to_join(std::vector<inet_address> loaded_endpoints
|
||||
gossiper.check_knows_remote_features(local_features, peer_features);
|
||||
}
|
||||
|
||||
gossiper.reset_endpoint_state_map();
|
||||
gossiper.reset_endpoint_state_map().get();
|
||||
for (auto ep : loaded_endpoints) {
|
||||
gossiper.add_saved_endpoint(ep);
|
||||
}
|
||||
@@ -367,7 +367,7 @@ void storage_service::prepare_to_join(std::vector<inet_address> loaded_endpoints
|
||||
slogger.info("Checking remote features with gossip");
|
||||
gossiper.do_shadow_round().get();
|
||||
gossiper.check_knows_remote_features(local_features);
|
||||
gossiper.reset_endpoint_state_map();
|
||||
gossiper.reset_endpoint_state_map().get();
|
||||
for (auto ep : loaded_endpoints) {
|
||||
gossiper.add_saved_endpoint(ep);
|
||||
}
|
||||
@@ -1570,7 +1570,7 @@ future<> storage_service::check_for_endpoint_collision() {
|
||||
throw std::runtime_error("Other bootstrapping/leaving/moving nodes detected, cannot bootstrap while consistent_rangemovement is true (check_for_endpoint_collision)");
|
||||
} else {
|
||||
gossiper.goto_shadow_round();
|
||||
gossiper.reset_endpoint_state_map();
|
||||
gossiper.reset_endpoint_state_map().get();
|
||||
found_bootstrapping_node = true;
|
||||
auto elapsed = std::chrono::duration_cast<std::chrono::seconds>(gms::gossiper::clk::now() - t).count();
|
||||
slogger.info("Checking bootstrapping/leaving/moving nodes: node={}, status={}, sleep 1 second and check again ({} seconds elapsed) (check_for_endpoint_collision)", addr, state, elapsed);
|
||||
@@ -1582,7 +1582,7 @@ future<> storage_service::check_for_endpoint_collision() {
|
||||
}
|
||||
} while (found_bootstrapping_node);
|
||||
slogger.info("Checking bootstrapping/leaving/moving nodes: ok (check_for_endpoint_collision)");
|
||||
gossiper.reset_endpoint_state_map();
|
||||
gossiper.reset_endpoint_state_map().get();
|
||||
});
|
||||
}
|
||||
|
||||
@@ -1632,8 +1632,9 @@ future<std::unordered_set<token>> storage_service::prepare_replacement_info() {
|
||||
auto tokens = get_tokens_for(replace_address);
|
||||
// use the replacee's host Id as our own so we receive hints, etc
|
||||
return db::system_keyspace::set_local_host_id(host_id).discard_result().then([replace_address, tokens = std::move(tokens)] {
|
||||
gms::get_local_gossiper().reset_endpoint_state_map(); // clean up since we have what we need
|
||||
return make_ready_future<std::unordered_set<token>>(std::move(tokens));
|
||||
return gms::get_local_gossiper().reset_endpoint_state_map().then([tokens = std::move(tokens)] { // clean up since we have what we need
|
||||
return make_ready_future<std::unordered_set<token>>(std::move(tokens));
|
||||
});
|
||||
});
|
||||
});
|
||||
}
|
||||
@@ -2046,6 +2047,7 @@ future<> storage_service::start_rpc_server() {
|
||||
auto keepalive = cfg.rpc_keepalive();
|
||||
thrift_server_config tsc;
|
||||
tsc.timeout_config = make_timeout_config(cfg);
|
||||
tsc.max_request_size = cfg.thrift_max_message_length_in_mb() * (uint64_t(1) << 20);
|
||||
return seastar::net::dns::resolve_name(addr).then([&ss, tserver, addr, port, keepalive, tsc] (seastar::net::inet_address ip) {
|
||||
return tserver->start(std::ref(ss._db), std::ref(cql3::get_query_processor()), std::ref(ss._auth_service), tsc).then([tserver, port, addr, ip, keepalive] {
|
||||
// #293 - do not stop anything
|
||||
|
||||
@@ -53,63 +53,76 @@ inline column_values_fixed_lengths get_clustering_values_fixed_lengths(const ser
|
||||
* This way we don't need to looku them up by column name every time.
|
||||
*/
|
||||
class column_translation {
|
||||
public:
|
||||
struct column_info {
|
||||
// Disengaged 'id' means the column is missing from the current schema
|
||||
std::optional<column_id> id;
|
||||
std::optional<uint32_t> value_length;
|
||||
bool is_collection;
|
||||
bool is_counter;
|
||||
};
|
||||
|
||||
private:
|
||||
|
||||
struct state {
|
||||
|
||||
static std::tuple<std::vector<std::optional<column_id>>,
|
||||
std::vector<std::optional<uint32_t>>,
|
||||
std::vector<bool>,
|
||||
std::vector<bool>> build(
|
||||
static std::vector<column_info> build(
|
||||
const schema& s,
|
||||
const utils::chunked_vector<serialization_header::column_desc>& src,
|
||||
bool is_static) {
|
||||
std::vector<std::optional<column_id>> ids;
|
||||
std::vector<std::optional<column_id>> lens;
|
||||
std::vector<bool> is_collection;
|
||||
std::vector<bool> is_counter;
|
||||
std::vector<column_info> cols;
|
||||
if (s.is_dense()) {
|
||||
if (is_static) {
|
||||
ids.push_back(s.static_begin()->id);
|
||||
lens.push_back(s.static_begin()->type->value_length_if_fixed());
|
||||
is_collection.push_back(s.static_begin()->is_multi_cell());
|
||||
is_counter.push_back(s.static_begin()->is_counter());
|
||||
cols.push_back(column_info{
|
||||
s.static_begin()->id,
|
||||
s.static_begin()->type->value_length_if_fixed(),
|
||||
s.static_begin()->is_multi_cell(),
|
||||
s.static_begin()->is_counter()
|
||||
});
|
||||
} else {
|
||||
ids.push_back(s.regular_begin()->id);
|
||||
lens.push_back(s.regular_begin()->type->value_length_if_fixed());
|
||||
is_collection.push_back(s.regular_begin()->is_multi_cell());
|
||||
is_counter.push_back(s.regular_begin()->is_counter());
|
||||
cols.push_back(column_info{
|
||||
s.regular_begin()->id,
|
||||
s.regular_begin()->type->value_length_if_fixed(),
|
||||
s.regular_begin()->is_multi_cell(),
|
||||
s.regular_begin()->is_counter()
|
||||
});
|
||||
}
|
||||
} else {
|
||||
ids.reserve(src.size());
|
||||
lens.reserve(src.size());
|
||||
cols.reserve(src.size());
|
||||
for (auto&& desc : src) {
|
||||
const bytes& type_name = desc.type_name.value;
|
||||
data_type type = db::marshal::type_parser::parse(to_sstring_view(type_name));
|
||||
const column_definition* def = s.get_column_definition(desc.name.value);
|
||||
std::optional<column_id> id;
|
||||
if (def) {
|
||||
ids.push_back(def->id);
|
||||
lens.push_back(def->type->value_length_if_fixed());
|
||||
is_collection.push_back(def->is_multi_cell());
|
||||
is_counter.push_back(def->is_counter());
|
||||
} else {
|
||||
ids.push_back(std::nullopt);
|
||||
lens.push_back(std::nullopt);
|
||||
is_collection.push_back(false);
|
||||
is_counter.push_back(false);
|
||||
if (def->is_multi_cell() != type->is_multi_cell() || def->is_counter() != type->is_counter()) {
|
||||
throw malformed_sstable_exception(sprint(
|
||||
"{} definition in serialization header does not match schema. "
|
||||
"Schema collection = {}, counter = {}. Header collection = {}, counter = {}",
|
||||
def->name(),
|
||||
def->is_multi_cell(),
|
||||
def->is_counter(),
|
||||
type->is_multi_cell(),
|
||||
type->is_counter()));
|
||||
}
|
||||
id = def->id;
|
||||
}
|
||||
cols.push_back(column_info{
|
||||
id,
|
||||
type->value_length_if_fixed(),
|
||||
type->is_multi_cell(),
|
||||
type->is_counter()
|
||||
});
|
||||
}
|
||||
boost::range::stable_partition(cols, [](const column_info& column) { return !column.is_collection; });
|
||||
}
|
||||
return std::make_tuple(std::move(ids), std::move(lens), std::move(is_collection), std::move(is_counter));
|
||||
return cols;
|
||||
}
|
||||
|
||||
utils::UUID schema_uuid;
|
||||
std::vector<std::optional<column_id>> regular_schema_column_id_from_sstable;
|
||||
std::vector<std::optional<column_id>> static_schema_column_id_from_sstable;
|
||||
column_values_fixed_lengths regular_column_value_fix_lengths;
|
||||
column_values_fixed_lengths static_column_value_fix_lengths;
|
||||
std::vector<column_info> regular_schema_columns_from_sstable;
|
||||
std::vector<column_info> static_schema_columns_from_sstable;
|
||||
column_values_fixed_lengths clustering_column_value_fix_lengths;
|
||||
std::vector<bool> static_column_is_collection;
|
||||
std::vector<bool> regular_column_is_collection;
|
||||
std::vector<bool> static_column_is_counter;
|
||||
std::vector<bool> regular_column_is_counter;
|
||||
|
||||
state() = default;
|
||||
state(const state&) = delete;
|
||||
@@ -118,19 +131,11 @@ class column_translation {
|
||||
state& operator=(state&&) = default;
|
||||
|
||||
state(const schema& s, const serialization_header& header)
|
||||
: schema_uuid(s.version()) {
|
||||
std::tie(regular_schema_column_id_from_sstable,
|
||||
regular_column_value_fix_lengths,
|
||||
regular_column_is_collection,
|
||||
regular_column_is_counter) =
|
||||
build(s, header.regular_columns.elements, false);
|
||||
std::tie(static_schema_column_id_from_sstable,
|
||||
static_column_value_fix_lengths,
|
||||
static_column_is_collection,
|
||||
static_column_is_counter) =
|
||||
build(s, header.static_columns.elements, true);
|
||||
clustering_column_value_fix_lengths = get_clustering_values_fixed_lengths(header);
|
||||
}
|
||||
: schema_uuid(s.version())
|
||||
, regular_schema_columns_from_sstable(build(s, header.regular_columns.elements, false))
|
||||
, static_schema_columns_from_sstable(build(s, header.static_columns.elements, true))
|
||||
, clustering_column_value_fix_lengths (get_clustering_values_fixed_lengths(header))
|
||||
{}
|
||||
};
|
||||
|
||||
lw_shared_ptr<const state> _state = make_lw_shared<const state>();
|
||||
@@ -143,33 +148,15 @@ public:
|
||||
return *this;
|
||||
}
|
||||
|
||||
const std::vector<std::optional<column_id>>& regular_columns() const {
|
||||
return _state->regular_schema_column_id_from_sstable;
|
||||
const std::vector<column_info>& regular_columns() const {
|
||||
return _state->regular_schema_columns_from_sstable;
|
||||
}
|
||||
const std::vector<std::optional<column_id>>& static_columns() const {
|
||||
return _state->static_schema_column_id_from_sstable;
|
||||
}
|
||||
const std::vector<std::optional<uint32_t>>& regular_column_value_fix_legths() const {
|
||||
return _state->regular_column_value_fix_lengths;
|
||||
}
|
||||
const std::vector<std::optional<uint32_t>>& static_column_value_fix_legths() const {
|
||||
return _state->static_column_value_fix_lengths;
|
||||
const std::vector<column_info>& static_columns() const {
|
||||
return _state->static_schema_columns_from_sstable;
|
||||
}
|
||||
const std::vector<std::optional<uint32_t>>& clustering_column_value_fix_legths() const {
|
||||
return _state->clustering_column_value_fix_lengths;
|
||||
}
|
||||
const std::vector<bool>& static_column_is_collection() const {
|
||||
return _state->static_column_is_collection;
|
||||
}
|
||||
const std::vector<bool>& regular_column_is_collection() const {
|
||||
return _state->regular_column_is_collection;
|
||||
}
|
||||
const std::vector<bool>& static_column_is_counter() const {
|
||||
return _state->static_column_is_counter;
|
||||
}
|
||||
const std::vector<bool>& regular_column_is_counter() const {
|
||||
return _state->regular_column_is_counter;
|
||||
}
|
||||
};
|
||||
|
||||
}; // namespace sstables
|
||||
|
||||
@@ -531,11 +531,11 @@ public:
|
||||
}
|
||||
|
||||
void report_start(const sstring& formatted_msg) const override {
|
||||
clogger.debug("Compacting {}", formatted_msg);
|
||||
clogger.info("Compacting {}", formatted_msg);
|
||||
}
|
||||
|
||||
void report_finish(const sstring& formatted_msg, std::chrono::time_point<db_clock> ended_at) const override {
|
||||
clogger.debug("Compacted {}", formatted_msg);
|
||||
clogger.info("Compacted {}", formatted_msg);
|
||||
}
|
||||
|
||||
void backlog_tracker_adjust_charges() override {
|
||||
|
||||
@@ -65,9 +65,13 @@ public:
|
||||
if (!is_set()) {
|
||||
return row_marker();
|
||||
}
|
||||
return _ttl != gc_clock::duration::zero() || _local_deletion_time != gc_clock::time_point::max()
|
||||
? row_marker(_timestamp, _ttl, _local_deletion_time)
|
||||
: row_marker(_timestamp);
|
||||
if (is_expired_liveness_ttl(_ttl.count())) {
|
||||
return row_marker{tombstone{_timestamp, _local_deletion_time}};
|
||||
} else if (_ttl != gc_clock::duration::zero() || _local_deletion_time != gc_clock::time_point::max()) {
|
||||
return row_marker{_timestamp, _ttl, _local_deletion_time};
|
||||
}
|
||||
|
||||
return row_marker{_timestamp};
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@@ -131,7 +131,7 @@ inline api::timestamp_type parse_timestamp(const serialization_header& header,
|
||||
}
|
||||
|
||||
inline gc_clock::duration parse_ttl(uint32_t value) {
|
||||
if (value > std::numeric_limits<gc_clock::duration::rep>::max()) {
|
||||
if (value > max_ttl.count() && ! is_expired_liveness_ttl(value)) {
|
||||
throw malformed_sstable_exception(format("Too big ttl: {}", value));
|
||||
}
|
||||
return gc_clock::duration(value);
|
||||
|
||||
@@ -203,10 +203,10 @@ void write_clustering_prefix(file_writer& out, const schema& s,
|
||||
class missing_columns_input_range
|
||||
: public input_range_base<missing_columns_input_range, uint64_t> {
|
||||
private:
|
||||
const schema& _schema;
|
||||
const indexed_columns& _columns;
|
||||
const row& _row;
|
||||
mutable uint64_t _current_value = 0;
|
||||
mutable column_id _current_id = 0;
|
||||
mutable size_t _current_index = 0;
|
||||
mutable bool _large_mode_produced_size = false;
|
||||
|
||||
enum class encoding_mode {
|
||||
@@ -216,35 +216,35 @@ private:
|
||||
} _mode;
|
||||
|
||||
public:
|
||||
missing_columns_input_range(const schema& s, const row& row)
|
||||
: _schema(s)
|
||||
missing_columns_input_range(const indexed_columns& columns, const row& row)
|
||||
: _columns(columns)
|
||||
, _row(row) {
|
||||
|
||||
auto row_size = _row.size();
|
||||
auto total_size = _schema.regular_columns_count();
|
||||
auto total_size = _columns.size();
|
||||
|
||||
_current_id = row_size < total_size ? 0 : total_size;
|
||||
_current_index = row_size < total_size ? 0 : total_size;
|
||||
_mode = (total_size < 64) ? encoding_mode::small :
|
||||
(row_size < total_size / 2) ? encoding_mode::large_encode_present :
|
||||
encoding_mode::large_encode_missing;
|
||||
}
|
||||
|
||||
bool next() const {
|
||||
auto total_size = _schema.regular_columns_count();
|
||||
if (_current_id == total_size) {
|
||||
auto total_size = _columns.size();
|
||||
if (_current_index == total_size) {
|
||||
// No more values to encode
|
||||
return false;
|
||||
}
|
||||
|
||||
if (_mode == encoding_mode::small) {
|
||||
// Set bit for every missing column
|
||||
for (column_id id = 0; id < total_size; ++id) {
|
||||
auto cell = _row.find_cell(id);
|
||||
for (const auto& element: _columns | boost::adaptors::indexed()) {
|
||||
auto cell = _row.find_cell(element.value().get().id);
|
||||
if (!cell) {
|
||||
_current_value |= (uint64_t(1) << id);
|
||||
_current_value |= (uint64_t(1) << element.index());
|
||||
}
|
||||
}
|
||||
_current_id = total_size;
|
||||
_current_index = total_size;
|
||||
return true;
|
||||
} else {
|
||||
// For either of large modes, output the difference between total size and row size first
|
||||
@@ -255,25 +255,25 @@ public:
|
||||
}
|
||||
|
||||
if (_mode == encoding_mode::large_encode_present) {
|
||||
while (_current_id < total_size) {
|
||||
auto cell = _row.find_cell(_current_id);
|
||||
while (_current_index < total_size) {
|
||||
auto cell = _row.find_cell(_columns[_current_index].get().id);
|
||||
if (cell) {
|
||||
_current_value = _current_id;
|
||||
++_current_id;
|
||||
_current_value = _current_index;
|
||||
++_current_index;
|
||||
return true;
|
||||
}
|
||||
++_current_id;
|
||||
++_current_index;
|
||||
}
|
||||
} else {
|
||||
assert(_mode == encoding_mode::large_encode_missing);
|
||||
while (_current_id < total_size) {
|
||||
auto cell = _row.find_cell(_current_id);
|
||||
while (_current_index < total_size) {
|
||||
auto cell = _row.find_cell(_columns[_current_index].get().id);
|
||||
if (!cell) {
|
||||
_current_value = _current_id;
|
||||
++_current_id;
|
||||
_current_value = _current_index;
|
||||
++_current_index;
|
||||
return true;
|
||||
}
|
||||
++_current_id;
|
||||
++_current_index;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -285,12 +285,12 @@ public:
|
||||
|
||||
explicit operator bool() const
|
||||
{
|
||||
return (_current_id < _schema.regular_columns_count());
|
||||
return (_current_index < _columns.size());
|
||||
}
|
||||
};
|
||||
|
||||
void write_missing_columns(file_writer& out, const schema& s, const row& row) {
|
||||
for (const auto value: missing_columns_input_range{s, row}) {
|
||||
void write_missing_columns(file_writer& out, const indexed_columns& columns, const row& row) {
|
||||
for (const auto value: missing_columns_input_range{columns, row}) {
|
||||
write_vint(out, value);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -37,6 +37,8 @@ namespace sstables {
|
||||
|
||||
class file_writer;
|
||||
|
||||
using indexed_columns = std::vector<std::reference_wrapper<const column_definition>>;
|
||||
|
||||
// Utilities for writing integral values in variable-length format
|
||||
// See vint-serialization.hh for more details
|
||||
void write_unsigned_vint(file_writer& out, uint64_t value);
|
||||
@@ -75,7 +77,7 @@ void write_clustering_prefix(file_writer& out, const schema& s,
|
||||
const clustering_key_prefix& prefix, ephemerally_full_prefix is_ephemerally_full);
|
||||
|
||||
// Writes encoded information about missing columns in the given row
|
||||
void write_missing_columns(file_writer& out, const schema& s, const row& row);
|
||||
void write_missing_columns(file_writer& out, const indexed_columns& columns, const row& row);
|
||||
|
||||
// Helper functions for writing delta-encoded time-related values
|
||||
void write_delta_timestamp(file_writer& out, api::timestamp_type timestamp, const encoding_stats& enc_stats);
|
||||
|
||||
@@ -810,7 +810,8 @@ class mp_row_consumer_m : public consumer_m {
|
||||
streamed_mutation::forwarding _fwd;
|
||||
|
||||
std::optional<clustering_row> _in_progress_row;
|
||||
std::variant<std::monostate, clustering_row, range_tombstone> _stored;
|
||||
std::optional<clustering_row> _stored_row;
|
||||
std::optional<range_tombstone> _stored_tombstone;
|
||||
static_row _in_progress_static_row;
|
||||
bool _inside_static_row = false;
|
||||
|
||||
@@ -825,6 +826,10 @@ class mp_row_consumer_m : public consumer_m {
|
||||
clustering_key_prefix ck;
|
||||
bound_kind kind;
|
||||
tombstone tomb;
|
||||
|
||||
position_in_partition_view position() {
|
||||
return position_in_partition_view(position_in_partition_view::range_tag_t{}, bound_view(ck, kind));
|
||||
}
|
||||
};
|
||||
|
||||
inline friend std::ostream& operator<<(std::ostream& o, const sstables::mp_row_consumer_m::range_tombstone_start& rt_start) {
|
||||
@@ -872,49 +877,6 @@ class mp_row_consumer_m : public consumer_m {
|
||||
return _schema->column_at(column_type, *column_id);
|
||||
}
|
||||
|
||||
inline proceed maybe_push_row(clustering_row&& cr) {
|
||||
sstlog.trace("mp_row_consumer_m {}: maybe_push_row({})", this, cr);
|
||||
auto action = _mf_filter->apply(cr);
|
||||
switch (action) {
|
||||
case mutation_fragment_filter::result::emit:
|
||||
if (_opened_range_tombstone) {
|
||||
/* We have an opened range tombstone which means that the current row is spanned by that RT.
|
||||
* Since the row is to be emitted, so is the range tombstone that we form from the opened start
|
||||
* and the end built from the row position because it also overlaps with query ranges.
|
||||
*/
|
||||
auto ck = cr.key();
|
||||
bool was_non_full_key = clustering_key::make_full(*_schema, ck);
|
||||
auto end_kind = was_non_full_key ? bound_kind::excl_end : bound_kind::incl_end;
|
||||
_reader->push_mutation_fragment(range_tombstone(std::move(_opened_range_tombstone->ck),
|
||||
_opened_range_tombstone->kind,
|
||||
ck,
|
||||
end_kind,
|
||||
_opened_range_tombstone->tomb));
|
||||
_opened_range_tombstone->ck = std::move(ck);
|
||||
_opened_range_tombstone->kind = was_non_full_key ? bound_kind::incl_start : bound_kind::excl_start;
|
||||
}
|
||||
_reader->push_mutation_fragment(std::move(cr));
|
||||
break;
|
||||
case mutation_fragment_filter::result::ignore:
|
||||
if (_opened_range_tombstone) {
|
||||
// Trim the opened range up to the clustering key of the current row
|
||||
auto& ck = cr.key();
|
||||
bool was_non_full_key = clustering_key::make_full(*_schema, ck);
|
||||
_opened_range_tombstone->ck = std::move(ck);
|
||||
_opened_range_tombstone->kind = was_non_full_key ? bound_kind::incl_start : bound_kind::excl_start;
|
||||
}
|
||||
if (_mf_filter->is_current_range_changed()) {
|
||||
return proceed::no;
|
||||
}
|
||||
break;
|
||||
case mutation_fragment_filter::result::store_and_finish:
|
||||
_stored.emplace<clustering_row>(std::move(cr));
|
||||
return proceed::no;
|
||||
}
|
||||
|
||||
return proceed(!_reader->is_buffer_full());
|
||||
}
|
||||
|
||||
inline proceed maybe_push_range_tombstone(range_tombstone&& rt) {
|
||||
const auto action = _mf_filter->apply(rt);
|
||||
switch (action) {
|
||||
@@ -927,13 +889,23 @@ class mp_row_consumer_m : public consumer_m {
|
||||
}
|
||||
break;
|
||||
case mutation_fragment_filter::result::store_and_finish:
|
||||
_stored.emplace<range_tombstone>(std::move(rt));
|
||||
_stored_tombstone = std::move(rt);
|
||||
_reader->on_end_of_stream();
|
||||
return proceed::no;
|
||||
}
|
||||
|
||||
return proceed(!_reader->is_buffer_full());
|
||||
}
|
||||
|
||||
inline void reset_for_new_partition() {
|
||||
_is_mutation_end = true;
|
||||
_in_progress_row.reset();
|
||||
_stored_row.reset();
|
||||
_stored_tombstone.reset();
|
||||
_mf_filter.reset();
|
||||
_opened_range_tombstone.reset();
|
||||
}
|
||||
|
||||
public:
|
||||
|
||||
/*
|
||||
@@ -979,16 +951,32 @@ public:
|
||||
return proceed::no;
|
||||
}
|
||||
|
||||
std::visit(overloaded_functor{
|
||||
[this] (clustering_row&& cr) {
|
||||
maybe_push_row(std::move(cr));
|
||||
},
|
||||
[this] (range_tombstone&& rt) {
|
||||
maybe_push_range_tombstone(std::move(rt));
|
||||
},
|
||||
[] (std::monostate) {
|
||||
auto maybe_push = [this] (auto&& mfopt) {
|
||||
if (mfopt) {
|
||||
switch (_mf_filter->apply(*mfopt)) {
|
||||
case mutation_fragment_filter::result::emit:
|
||||
_reader->push_mutation_fragment(*std::exchange(mfopt, {}));
|
||||
break;
|
||||
case mutation_fragment_filter::result::ignore:
|
||||
mfopt.reset();
|
||||
if (_mf_filter->is_current_range_changed()) {
|
||||
return true;
|
||||
}
|
||||
break;
|
||||
case mutation_fragment_filter::result::store_and_finish:
|
||||
_reader->on_end_of_stream();
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}, std::exchange(_stored, std::monostate{}));
|
||||
return false;
|
||||
};
|
||||
|
||||
if (maybe_push(_stored_tombstone)) {
|
||||
return proceed::no;
|
||||
}
|
||||
if (maybe_push(_stored_row)) {
|
||||
return proceed::no;
|
||||
}
|
||||
|
||||
return proceed::yes;
|
||||
}
|
||||
@@ -1057,11 +1045,15 @@ public:
|
||||
return proceed::yes;
|
||||
}
|
||||
|
||||
virtual proceed consume_row_marker_and_tombstone(const liveness_info& info, tombstone t) override {
|
||||
sstlog.trace("mp_row_consumer_m {}: consume_row_marker_and_tombstone({}, {}), key={}",
|
||||
this, info.to_row_marker(), t, _in_progress_row->position());
|
||||
_in_progress_row->apply(t);
|
||||
virtual proceed consume_row_marker_and_tombstone(
|
||||
const liveness_info& info, tombstone tomb, tombstone shadowable_tomb) override {
|
||||
sstlog.trace("mp_row_consumer_m {}: consume_row_marker_and_tombstone({}, {}, {}), key={}",
|
||||
this, info.to_row_marker(), tomb, shadowable_tomb, _in_progress_row->position());
|
||||
_in_progress_row->apply(info.to_row_marker());
|
||||
_in_progress_row->apply(tomb);
|
||||
if (shadowable_tomb) {
|
||||
_in_progress_row->apply(shadowable_tombstone{shadowable_tomb});
|
||||
}
|
||||
return proceed::yes;
|
||||
}
|
||||
|
||||
@@ -1212,41 +1204,61 @@ public:
|
||||
if (!_cells.empty()) {
|
||||
fill_cells(column_kind::regular_column, _in_progress_row->cells());
|
||||
}
|
||||
return maybe_push_row(*std::exchange(_in_progress_row, {}));
|
||||
if (_opened_range_tombstone) {
|
||||
/* We have an opened range tombstone which means that the current row is spanned by that RT.
|
||||
*/
|
||||
auto ck = _in_progress_row->key();
|
||||
bool was_non_full_key = clustering_key::make_full(*_schema, ck);
|
||||
auto end_kind = was_non_full_key ? bound_kind::excl_end : bound_kind::incl_end;
|
||||
assert(!_stored_tombstone);
|
||||
_stored_tombstone = range_tombstone(std::move(_opened_range_tombstone->ck),
|
||||
_opened_range_tombstone->kind,
|
||||
ck,
|
||||
end_kind,
|
||||
_opened_range_tombstone->tomb);
|
||||
_opened_range_tombstone->ck = std::move(ck);
|
||||
_opened_range_tombstone->kind = was_non_full_key ? bound_kind::incl_start : bound_kind::excl_start;
|
||||
}
|
||||
_stored_row = *std::exchange(_in_progress_row, {});
|
||||
return proceed(push_ready_fragments() == proceed::yes && !_reader->is_buffer_full());
|
||||
}
|
||||
|
||||
return proceed(!_reader->is_buffer_full());
|
||||
}
|
||||
|
||||
virtual proceed consume_partition_end() override {
|
||||
sstlog.trace("mp_row_consumer_m {}: consume_partition_end()", this);
|
||||
virtual void on_end_of_stream() override {
|
||||
if (_opened_range_tombstone) {
|
||||
if (!_mf_filter || _mf_filter->out_of_range()) {
|
||||
throw sstables::malformed_sstable_exception("Unclosed range tombstone.");
|
||||
}
|
||||
auto range_end = _mf_filter->upper_bound();
|
||||
auto range_end = _mf_filter->uppermost_bound();
|
||||
position_in_partition::less_compare less(*_schema);
|
||||
auto start_pos = position_in_partition_view(position_in_partition_view::range_tag_t{},
|
||||
bound_view(_opened_range_tombstone->ck, _opened_range_tombstone->kind));
|
||||
if (!less(range_end, start_pos)) {
|
||||
auto end_bound = range_end.as_end_bound_view();
|
||||
consume_range_tombstone_end(end_bound.prefix(), end_bound.kind(), _opened_range_tombstone->tomb);
|
||||
auto rt = range_tombstone {std::move(_opened_range_tombstone->ck),
|
||||
_opened_range_tombstone->kind,
|
||||
end_bound.prefix(),
|
||||
end_bound.kind(),
|
||||
_opened_range_tombstone->tomb};
|
||||
_opened_range_tombstone.reset();
|
||||
_reader->push_mutation_fragment(std::move(rt));
|
||||
}
|
||||
}
|
||||
_is_mutation_end = true;
|
||||
_in_progress_row.reset();
|
||||
_stored.emplace<std::monostate>();
|
||||
_mf_filter.reset();
|
||||
consume_partition_end();
|
||||
}
|
||||
|
||||
virtual proceed consume_partition_end() override {
|
||||
sstlog.trace("mp_row_consumer_m {}: consume_partition_end()", this);
|
||||
reset_for_new_partition();
|
||||
return proceed::no;
|
||||
}
|
||||
|
||||
virtual void reset(sstables::indexable_element el) override {
|
||||
sstlog.trace("mp_row_consumer_m {}: reset({})", this, static_cast<int>(el));
|
||||
if (el == indexable_element::partition) {
|
||||
_is_mutation_end = true;
|
||||
_in_progress_row.reset();
|
||||
_stored.emplace<std::monostate>();
|
||||
_mf_filter.reset();
|
||||
reset_for_new_partition();
|
||||
} else {
|
||||
_is_mutation_end = false;
|
||||
}
|
||||
|
||||
@@ -154,8 +154,8 @@ public:
|
||||
return _walker.lower_bound();
|
||||
}
|
||||
|
||||
position_in_partition_view upper_bound() const {
|
||||
return _walker.upper_bound();
|
||||
position_in_partition_view uppermost_bound() const {
|
||||
return _walker.uppermost_bound();
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@@ -361,7 +361,7 @@ private:
|
||||
});
|
||||
}
|
||||
future<> advance_context(std::optional<position_in_partition_view> pos) {
|
||||
if (!pos) {
|
||||
if (!pos || pos->is_before_all_fragments(*_schema)) {
|
||||
return make_ready_future<>();
|
||||
}
|
||||
assert (_current_partition_key);
|
||||
|
||||
142
sstables/row.hh
142
sstables/row.hh
@@ -162,7 +162,8 @@ public:
|
||||
|
||||
virtual proceed consume_row_start(const std::vector<temporary_buffer<char>>& ecp) = 0;
|
||||
|
||||
virtual proceed consume_row_marker_and_tombstone(const sstables::liveness_info& info, tombstone t) = 0;
|
||||
virtual proceed consume_row_marker_and_tombstone(
|
||||
const sstables::liveness_info& info, tombstone tomb, tombstone shadowable_tomb) = 0;
|
||||
|
||||
virtual proceed consume_static_row_start() = 0;
|
||||
|
||||
@@ -193,6 +194,8 @@ public:
|
||||
|
||||
virtual proceed consume_row_end() = 0;
|
||||
|
||||
virtual void on_end_of_stream() = 0;
|
||||
|
||||
// Called when the reader is fast forwarded to given element.
|
||||
virtual void reset(sstables::indexable_element) = 0;
|
||||
|
||||
@@ -239,6 +242,7 @@ private:
|
||||
} _state = state::ROW_START;
|
||||
|
||||
row_consumer& _consumer;
|
||||
shared_sstable _sst;
|
||||
|
||||
temporary_buffer<char> _key;
|
||||
temporary_buffer<char> _val;
|
||||
@@ -268,6 +272,14 @@ public:
|
||||
// leave only the unprocessed part. The caller must handle calling
|
||||
// process() again, and/or refilling the buffer, as needed.
|
||||
data_consumer::processing_result process_state(temporary_buffer<char>& data) {
|
||||
try {
|
||||
return do_process_state(data);
|
||||
} catch (malformed_sstable_exception& exp) {
|
||||
throw malformed_sstable_exception(exp.what(), _sst->get_filename());
|
||||
}
|
||||
}
|
||||
private:
|
||||
data_consumer::processing_result do_process_state(temporary_buffer<char>& data) {
|
||||
#if 0
|
||||
// Testing hack: call process() for tiny chunks separately, to verify
|
||||
// that primitive types crossing input buffer are handled correctly.
|
||||
@@ -506,13 +518,15 @@ public:
|
||||
|
||||
return row_consumer::proceed::yes;
|
||||
}
|
||||
public:
|
||||
|
||||
data_consume_rows_context(const schema&,
|
||||
const shared_sstable&,
|
||||
const shared_sstable& sst,
|
||||
row_consumer& consumer,
|
||||
input_stream<char>&& input, uint64_t start, uint64_t maxlen)
|
||||
: continuous_data_consumer(std::move(input), start, maxlen)
|
||||
, _consumer(consumer) {
|
||||
, _consumer(consumer)
|
||||
, _sst(sst) {
|
||||
}
|
||||
|
||||
void verify_end_state() {
|
||||
@@ -571,6 +585,9 @@ private:
|
||||
ROW_BODY_DELETION,
|
||||
ROW_BODY_DELETION_2,
|
||||
ROW_BODY_DELETION_3,
|
||||
ROW_BODY_SHADOWABLE_DELETION,
|
||||
ROW_BODY_SHADOWABLE_DELETION_2,
|
||||
ROW_BODY_SHADOWABLE_DELETION_3,
|
||||
ROW_BODY_MARKER,
|
||||
ROW_BODY_MISSING_COLUMNS,
|
||||
ROW_BODY_MISSING_COLUMNS_2,
|
||||
@@ -608,8 +625,10 @@ private:
|
||||
} _state = state::PARTITION_START;
|
||||
|
||||
consumer_m& _consumer;
|
||||
shared_sstable _sst;
|
||||
const serialization_header& _header;
|
||||
column_translation _column_translation;
|
||||
const bool _has_shadowable_tombstones;
|
||||
|
||||
temporary_buffer<char> _pk;
|
||||
|
||||
@@ -620,16 +639,14 @@ private:
|
||||
|
||||
std::vector<temporary_buffer<char>> _row_key;
|
||||
|
||||
boost::iterator_range<std::vector<std::optional<column_id>>::const_iterator> _column_ids;
|
||||
boost::iterator_range<std::vector<std::optional<uint32_t>>::const_iterator> _column_value_fix_lengths;
|
||||
boost::iterator_range<std::vector<bool>::const_iterator> _column_is_collection;
|
||||
boost::iterator_range<std::vector<bool>::const_iterator> _column_is_counter;
|
||||
boost::iterator_range<std::vector<column_translation::column_info>::const_iterator> _columns;
|
||||
boost::dynamic_bitset<uint64_t> _columns_selector;
|
||||
uint64_t _missing_columns_to_read;
|
||||
|
||||
boost::iterator_range<std::vector<std::optional<uint32_t>>::const_iterator> _ck_column_value_fix_lengths;
|
||||
|
||||
tombstone _row_tombstone;
|
||||
tombstone _row_shadowable_tombstone;
|
||||
|
||||
column_flags_m _column_flags{0};
|
||||
api::timestamp_type _column_timestamp;
|
||||
@@ -656,46 +673,34 @@ private:
|
||||
*/
|
||||
tombstone _left_range_tombstone;
|
||||
tombstone _right_range_tombstone;
|
||||
void setup_columns(const std::vector<std::optional<column_id>>& column_ids,
|
||||
const std::vector<std::optional<uint32_t>>& column_value_fix_lengths,
|
||||
const std::vector<bool>& column_is_collection,
|
||||
const std::vector<bool>& column_is_counter) {
|
||||
_column_ids = boost::make_iterator_range(column_ids);
|
||||
_column_value_fix_lengths = boost::make_iterator_range(column_value_fix_lengths);
|
||||
_column_is_collection = boost::make_iterator_range(column_is_collection);
|
||||
_column_is_counter = boost::make_iterator_range(column_is_counter);
|
||||
void setup_columns(const std::vector<column_translation::column_info>& columns) {
|
||||
_columns = boost::make_iterator_range(columns);
|
||||
}
|
||||
bool is_current_column_present() {
|
||||
return _columns_selector.test(_columns_selector.size() - _column_ids.size());
|
||||
return _columns_selector.test(_columns_selector.size() - _columns.size());
|
||||
}
|
||||
void skip_absent_columns() {
|
||||
size_t pos = _columns_selector.find_first();
|
||||
if (pos == boost::dynamic_bitset<uint64_t>::npos) {
|
||||
pos = _column_ids.size();
|
||||
pos = _columns.size();
|
||||
}
|
||||
_column_ids.advance_begin(pos);
|
||||
_column_value_fix_lengths.advance_begin(pos);
|
||||
_column_is_collection.advance_begin(pos);
|
||||
_column_is_counter.advance_begin(pos);
|
||||
_columns.advance_begin(pos);
|
||||
}
|
||||
bool no_more_columns() { return _column_ids.empty(); }
|
||||
bool no_more_columns() { return _columns.empty(); }
|
||||
void move_to_next_column() {
|
||||
size_t current_pos = _columns_selector.size() - _column_ids.size();
|
||||
size_t current_pos = _columns_selector.size() - _columns.size();
|
||||
size_t next_pos = _columns_selector.find_next(current_pos);
|
||||
size_t jump_to_next = (next_pos == boost::dynamic_bitset<uint64_t>::npos) ? _column_ids.size()
|
||||
size_t jump_to_next = (next_pos == boost::dynamic_bitset<uint64_t>::npos) ? _columns.size()
|
||||
: next_pos - current_pos;
|
||||
_column_ids.advance_begin(jump_to_next);
|
||||
_column_value_fix_lengths.advance_begin(jump_to_next);
|
||||
_column_is_collection.advance_begin(jump_to_next);
|
||||
_column_is_counter.advance_begin(jump_to_next);
|
||||
_columns.advance_begin(jump_to_next);
|
||||
}
|
||||
bool is_column_simple() { return !_column_is_collection.front(); }
|
||||
bool is_column_counter() { return _column_is_counter.front(); }
|
||||
bool is_column_simple() { return !_columns.front().is_collection; }
|
||||
bool is_column_counter() { return _columns.front().is_counter; }
|
||||
std::optional<column_id> get_column_id() {
|
||||
return _column_ids.front();
|
||||
return _columns.front().id;
|
||||
}
|
||||
std::optional<uint32_t> get_column_value_length() {
|
||||
return _column_value_fix_lengths.front();
|
||||
return _columns.front().value_length;
|
||||
}
|
||||
void setup_ck(const std::vector<std::optional<uint32_t>>& column_value_fix_lengths) {
|
||||
_row_key.clear();
|
||||
@@ -751,6 +756,14 @@ public:
|
||||
}
|
||||
|
||||
data_consumer::processing_result process_state(temporary_buffer<char>& data) {
|
||||
try {
|
||||
return do_process_state(data);
|
||||
} catch (malformed_sstable_exception& exp) {
|
||||
throw malformed_sstable_exception(exp.what(), _sst->get_filename());
|
||||
}
|
||||
}
|
||||
private:
|
||||
data_consumer::processing_result do_process_state(temporary_buffer<char>& data) {
|
||||
switch (_state) {
|
||||
case state::PARTITION_START:
|
||||
partition_start_label:
|
||||
@@ -786,6 +799,7 @@ public:
|
||||
flags_label:
|
||||
_liveness.reset();
|
||||
_row_tombstone = {};
|
||||
_row_shadowable_tombstone = {};
|
||||
if (read_8(data) != read_status::ready) {
|
||||
_state = state::FLAGS_2;
|
||||
break;
|
||||
@@ -804,10 +818,7 @@ public:
|
||||
} else if (!_flags.has_extended_flags()) {
|
||||
_extended_flags = unfiltered_extended_flags_m(uint8_t{0u});
|
||||
_state = state::CLUSTERING_ROW;
|
||||
setup_columns(_column_translation.regular_columns(),
|
||||
_column_translation.regular_column_value_fix_legths(),
|
||||
_column_translation.regular_column_is_collection(),
|
||||
_column_translation.regular_column_is_counter());
|
||||
setup_columns(_column_translation.regular_columns());
|
||||
_ck_size = _column_translation.clustering_column_value_fix_legths().size();
|
||||
goto clustering_row_label;
|
||||
}
|
||||
@@ -817,12 +828,12 @@ public:
|
||||
}
|
||||
case state::EXTENDED_FLAGS:
|
||||
_extended_flags = unfiltered_extended_flags_m(_u8);
|
||||
if (_extended_flags.has_cassandra_shadowable_deletion()) {
|
||||
throw std::runtime_error("SSTables with Cassandra-style shadowable deletion cannot be read by Scylla");
|
||||
}
|
||||
if (_extended_flags.is_static()) {
|
||||
if (_is_first_unfiltered) {
|
||||
setup_columns(_column_translation.static_columns(),
|
||||
_column_translation.static_column_value_fix_legths(),
|
||||
_column_translation.static_column_is_collection(),
|
||||
_column_translation.static_column_is_counter());
|
||||
setup_columns(_column_translation.static_columns());
|
||||
_is_first_unfiltered = false;
|
||||
_consumer.consume_static_row_start();
|
||||
goto row_body_label;
|
||||
@@ -830,10 +841,7 @@ public:
|
||||
throw malformed_sstable_exception("static row should be a first unfiltered in a partition");
|
||||
}
|
||||
}
|
||||
setup_columns(_column_translation.regular_columns(),
|
||||
_column_translation.regular_column_value_fix_legths(),
|
||||
_column_translation.regular_column_is_collection(),
|
||||
_column_translation.regular_column_is_counter());
|
||||
setup_columns(_column_translation.regular_columns());
|
||||
_ck_size = _column_translation.clustering_column_value_fix_legths().size();
|
||||
case state::CLUSTERING_ROW:
|
||||
clustering_row_label:
|
||||
@@ -943,8 +951,8 @@ public:
|
||||
case state::ROW_BODY_DELETION:
|
||||
row_body_deletion_label:
|
||||
if (!_flags.has_deletion()) {
|
||||
_state = state::ROW_BODY_MARKER;
|
||||
goto row_body_marker_label;
|
||||
_state = state::ROW_BODY_SHADOWABLE_DELETION;
|
||||
goto row_body_shadowable_deletion_label;
|
||||
}
|
||||
if (read_unsigned_vint(data) != read_status::ready) {
|
||||
_state = state::ROW_BODY_DELETION_2;
|
||||
@@ -958,9 +966,32 @@ public:
|
||||
}
|
||||
case state::ROW_BODY_DELETION_3:
|
||||
_row_tombstone.deletion_time = parse_expiry(_header, _u64);
|
||||
case state::ROW_BODY_SHADOWABLE_DELETION:
|
||||
row_body_shadowable_deletion_label:
|
||||
if (_extended_flags.has_scylla_shadowable_deletion()) {
|
||||
if (!_has_shadowable_tombstones) {
|
||||
throw malformed_sstable_exception("Scylla shadowable tombstone flag is set but not supported on this SSTables");
|
||||
}
|
||||
} else {
|
||||
_state = state::ROW_BODY_MARKER;
|
||||
goto row_body_marker_label;
|
||||
}
|
||||
if (read_unsigned_vint(data) != read_status::ready) {
|
||||
_state = state::ROW_BODY_SHADOWABLE_DELETION_2;
|
||||
break;
|
||||
}
|
||||
case state::ROW_BODY_SHADOWABLE_DELETION_2:
|
||||
_row_shadowable_tombstone.timestamp = parse_timestamp(_header, _u64);
|
||||
if (read_unsigned_vint(data) != read_status::ready) {
|
||||
_state = state::ROW_BODY_SHADOWABLE_DELETION_3;
|
||||
break;
|
||||
}
|
||||
case state::ROW_BODY_SHADOWABLE_DELETION_3:
|
||||
_row_shadowable_tombstone.deletion_time = parse_expiry(_header, _u64);
|
||||
case state::ROW_BODY_MARKER:
|
||||
row_body_marker_label:
|
||||
if (_consumer.consume_row_marker_and_tombstone(_liveness, std::move(_row_tombstone)) == consumer_m::proceed::no) {
|
||||
if (_consumer.consume_row_marker_and_tombstone(
|
||||
_liveness, std::move(_row_tombstone), std::move(_row_shadowable_tombstone)) == consumer_m::proceed::no) {
|
||||
_state = state::ROW_BODY_MISSING_COLUMNS;
|
||||
break;
|
||||
}
|
||||
@@ -972,7 +1003,7 @@ public:
|
||||
}
|
||||
goto row_body_missing_columns_2_label;
|
||||
} else {
|
||||
_columns_selector = boost::dynamic_bitset<uint64_t>(_column_ids.size());
|
||||
_columns_selector = boost::dynamic_bitset<uint64_t>(_columns.size());
|
||||
_columns_selector.set();
|
||||
}
|
||||
case state::COLUMN:
|
||||
@@ -1110,17 +1141,17 @@ public:
|
||||
case state::ROW_BODY_MISSING_COLUMNS_2:
|
||||
row_body_missing_columns_2_label: {
|
||||
uint64_t missing_column_bitmap_or_count = _u64;
|
||||
if (_column_ids.size() < 64) {
|
||||
if (_columns.size() < 64) {
|
||||
_columns_selector.clear();
|
||||
_columns_selector.append(missing_column_bitmap_or_count);
|
||||
_columns_selector.flip();
|
||||
_columns_selector.resize(_column_ids.size());
|
||||
_columns_selector.resize(_columns.size());
|
||||
skip_absent_columns();
|
||||
goto column_label;
|
||||
}
|
||||
_columns_selector.resize(_column_ids.size());
|
||||
if (_column_ids.size() - missing_column_bitmap_or_count < _column_ids.size() / 2) {
|
||||
_missing_columns_to_read = _column_ids.size() - missing_column_bitmap_or_count;
|
||||
_columns_selector.resize(_columns.size());
|
||||
if (_columns.size() - missing_column_bitmap_or_count < _columns.size() / 2) {
|
||||
_missing_columns_to_read = _columns.size() - missing_column_bitmap_or_count;
|
||||
_columns_selector.reset();
|
||||
} else {
|
||||
_missing_columns_to_read = missing_column_bitmap_or_count;
|
||||
@@ -1275,6 +1306,7 @@ public:
|
||||
|
||||
return row_consumer::proceed::yes;
|
||||
}
|
||||
public:
|
||||
|
||||
data_consume_rows_context_m(const schema& s,
|
||||
const shared_sstable& sst,
|
||||
@@ -1284,8 +1316,10 @@ public:
|
||||
uint64_t maxlen)
|
||||
: continuous_data_consumer(std::move(input), start, maxlen)
|
||||
, _consumer(consumer)
|
||||
, _sst(sst)
|
||||
, _header(sst->get_serialization_header())
|
||||
, _column_translation(sst->get_column_translation(s, _header))
|
||||
, _has_shadowable_tombstones(sst->has_shadowable_tombstones())
|
||||
, _liveness(_header)
|
||||
{ }
|
||||
|
||||
@@ -1294,7 +1328,7 @@ public:
|
||||
// filter and using a promoted index), we may be in FLAGS or FLAGS_2
|
||||
// state instead of PARTITION_START.
|
||||
if (_state == state::FLAGS || _state == state::FLAGS_2) {
|
||||
_consumer.consume_partition_end();
|
||||
_consumer.on_end_of_stream();
|
||||
return;
|
||||
}
|
||||
if (_state != state::PARTITION_START || _prestate != prestate::NONE) {
|
||||
|
||||
@@ -2632,10 +2632,14 @@ enum class row_extended_flags : uint8_t {
|
||||
none = 0x00,
|
||||
// Whether the encoded row is a static. If there is no extended flag, the row is assumed not static.
|
||||
is_static = 0x01,
|
||||
// Whether the row deletion is shadowable. If there is no extended flag (or no row deletion)
|
||||
// the deletion is assumed not shadowable.
|
||||
// Cassandra-specific flag, indicates whether the row deletion is shadowable.
|
||||
// This flag is deprecated in Origin - see CASSANDRA-11500.
|
||||
has_shadowable_deletion = 0x02,
|
||||
// This flag is never set by Scylla and it fails to read files that have it set.
|
||||
has_shadowable_deletion_cassandra = 0x02,
|
||||
// Scylla-specific flag, indicates whether the row deletion is shadowable.
|
||||
// If set, the shadowable tombstone is writen right after the row deletion.
|
||||
// This is only used by Materialized Views that are not supposed to be exported.
|
||||
has_shadowable_deletion_scylla = 0x80,
|
||||
};
|
||||
|
||||
// A range tombstone marker (RT marker) represents a bound of a range tombstone
|
||||
@@ -2680,6 +2684,18 @@ GCC6_CONCEPT(
|
||||
};
|
||||
)
|
||||
|
||||
static indexed_columns get_indexed_columns_partitioned_by_atomicity(schema::const_iterator_range_type columns) {
|
||||
indexed_columns result;
|
||||
result.reserve(columns.size());
|
||||
for (const auto& col: columns) {
|
||||
result.emplace_back(col);
|
||||
}
|
||||
boost::range::stable_partition(
|
||||
result,
|
||||
[](const std::reference_wrapper<const column_definition>& column) { return column.get().is_atomic();});
|
||||
return result;
|
||||
}
|
||||
|
||||
// Used for writing SSTables in 'mc' format.
|
||||
class sstable_writer_m : public sstable_writer::writer_impl {
|
||||
private:
|
||||
@@ -2704,6 +2720,19 @@ private:
|
||||
index_sampling_state _index_sampling_state;
|
||||
range_tombstone_stream _range_tombstones;
|
||||
|
||||
// For static and regular columns, we write all simple columns first followed by collections
|
||||
// These containers have columns partitioned by atomicity
|
||||
const indexed_columns _static_columns;
|
||||
const indexed_columns _regular_columns;
|
||||
|
||||
struct cdef_and_collection {
|
||||
const column_definition* cdef;
|
||||
std::reference_wrapper<const atomic_cell_or_collection> collection;
|
||||
};
|
||||
|
||||
// Used to defer writing collections until all atomic cells are written
|
||||
std::vector<cdef_and_collection> _collections;
|
||||
|
||||
std::optional<rt_marker> _end_open_marker;
|
||||
|
||||
struct clustering_info {
|
||||
@@ -2791,7 +2820,7 @@ private:
|
||||
void write_collection(file_writer& writer, const column_definition& cdef, collection_mutation_view collection,
|
||||
const row_time_properties& properties, bool has_complex_deletion);
|
||||
|
||||
void write_cells(file_writer& writer, column_kind kind, const row& row_body, const row_time_properties& properties, bool has_complex_deletion = false);
|
||||
void write_cells(file_writer& writer, column_kind kind, const row& row_body, const row_time_properties& properties, bool has_complex_deletion);
|
||||
void write_row_body(file_writer& writer, const clustering_row& row, bool has_complex_deletion);
|
||||
void write_static_row(const row& static_row);
|
||||
|
||||
@@ -2828,6 +2857,8 @@ public:
|
||||
, _enc_stats(enc_stats)
|
||||
, _shard(shard)
|
||||
, _range_tombstones(_schema)
|
||||
, _static_columns(get_indexed_columns_partitioned_by_atomicity(s.static_columns()))
|
||||
, _regular_columns(get_indexed_columns_partitioned_by_atomicity(s.regular_columns()))
|
||||
{
|
||||
_sst.generate_toc(_schema.get_compressor_params().get_compressor(), _schema.bloom_filter_fp_chance());
|
||||
_sst.write_toc(_pc);
|
||||
@@ -3150,39 +3181,43 @@ void sstable_writer_m::write_liveness_info(file_writer& writer, const row_marker
|
||||
uint64_t timestamp = marker.timestamp();
|
||||
_c_stats.update_timestamp(timestamp);
|
||||
write_delta_timestamp(writer, timestamp);
|
||||
if (marker.is_expiring()) {
|
||||
auto ttl = marker.ttl().count();
|
||||
auto ldt = marker.expiry().time_since_epoch().count();
|
||||
|
||||
auto write_expiring_liveness_info = [this, &writer] (uint32_t ttl, uint64_t ldt) {
|
||||
_c_stats.update_ttl(ttl);
|
||||
_c_stats.update_local_deletion_time(ldt);
|
||||
write_delta_ttl(writer, ttl);
|
||||
write_delta_local_deletion_time(writer, ldt);
|
||||
};
|
||||
if (!marker.is_live()) {
|
||||
write_expiring_liveness_info(expired_liveness_ttl, marker.deletion_time().time_since_epoch().count());
|
||||
} else if (marker.is_expiring()) {
|
||||
write_expiring_liveness_info(marker.ttl().count(), marker.expiry().time_since_epoch().count());
|
||||
}
|
||||
}
|
||||
|
||||
void sstable_writer_m::write_collection(file_writer& writer, const column_definition& cdef,
|
||||
collection_mutation_view collection, const row_time_properties& properties, bool has_complex_deletion) {
|
||||
auto& ctype = *static_pointer_cast<const collection_type_impl>(cdef.type);
|
||||
collection.data.with_linearized([&] (bytes_view collection_bv) {
|
||||
auto mview = ctype.deserialize_mutation_form(collection_bv);
|
||||
if (has_complex_deletion) {
|
||||
auto dt = to_deletion_time(mview.tomb);
|
||||
write_delta_deletion_time(writer, dt);
|
||||
if (mview.tomb) {
|
||||
_c_stats.update_timestamp(dt.marked_for_delete_at);
|
||||
_c_stats.update_local_deletion_time(dt.local_deletion_time);
|
||||
collection.data.with_linearized([&] (bytes_view collection_bv) {
|
||||
auto mview = ctype.deserialize_mutation_form(collection_bv);
|
||||
if (has_complex_deletion) {
|
||||
auto dt = to_deletion_time(mview.tomb);
|
||||
write_delta_deletion_time(writer, dt);
|
||||
if (mview.tomb) {
|
||||
_c_stats.update_timestamp(dt.marked_for_delete_at);
|
||||
_c_stats.update_local_deletion_time(dt.local_deletion_time);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
write_vint(writer, mview.cells.size());
|
||||
if (!mview.cells.empty()) {
|
||||
++_c_stats.column_count;
|
||||
}
|
||||
for (const auto& [cell_path, cell]: mview.cells) {
|
||||
++_c_stats.cells_count;
|
||||
write_cell(writer, cell, cdef, properties, cell_path);
|
||||
}
|
||||
});
|
||||
write_vint(writer, mview.cells.size());
|
||||
if (!mview.cells.empty()) {
|
||||
++_c_stats.column_count;
|
||||
}
|
||||
for (const auto& [cell_path, cell]: mview.cells) {
|
||||
++_c_stats.cells_count;
|
||||
write_cell(writer, cell, cdef, properties, cell_path);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
void sstable_writer_m::write_cells(file_writer& writer, column_kind kind, const row& row_body,
|
||||
@@ -3191,11 +3226,11 @@ void sstable_writer_m::write_cells(file_writer& writer, column_kind kind, const
|
||||
// This differs from Origin where all updated columns are tracked and the set of filled columns of a row
|
||||
// is compared with the set of all columns filled in the memtable. So our encoding may be less optimal in some cases
|
||||
// but still valid.
|
||||
write_missing_columns(writer, _schema, row_body);
|
||||
write_missing_columns(writer, kind == column_kind::static_column ? _static_columns : _regular_columns, row_body);
|
||||
row_body.for_each_cell([this, &writer, kind, &properties, has_complex_deletion] (column_id id, const atomic_cell_or_collection& c) {
|
||||
auto&& column_definition = _schema.column_at(kind, id);
|
||||
if (!column_definition.is_atomic()) {
|
||||
write_collection(writer, column_definition, c.as_collection_mutation(), properties, has_complex_deletion);
|
||||
_collections.push_back({&column_definition, c});
|
||||
return;
|
||||
}
|
||||
atomic_cell_view cell = c.as_atomic_cell(column_definition);
|
||||
@@ -3203,15 +3238,26 @@ void sstable_writer_m::write_cells(file_writer& writer, column_kind kind, const
|
||||
++_c_stats.column_count;
|
||||
write_cell(writer, cell, column_definition, properties);
|
||||
});
|
||||
|
||||
for (const auto& col: _collections) {
|
||||
write_collection(writer, *col.cdef, col.collection.get().as_collection_mutation(), properties, has_complex_deletion);
|
||||
}
|
||||
_collections.clear();
|
||||
}
|
||||
|
||||
void sstable_writer_m::write_row_body(file_writer& writer, const clustering_row& row, bool has_complex_deletion) {
|
||||
write_liveness_info(writer, row.marker());
|
||||
if (row.tomb()) {
|
||||
auto dt = to_deletion_time(row.tomb().tomb());
|
||||
auto write_tombstone_and_update_stats = [this, &writer] (const tombstone& t) {
|
||||
auto dt = to_deletion_time(t);
|
||||
_c_stats.update_timestamp(dt.marked_for_delete_at);
|
||||
_c_stats.update_local_deletion_time(dt.local_deletion_time);
|
||||
write_delta_deletion_time(writer, dt);
|
||||
};
|
||||
if (row.tomb().regular()) {
|
||||
write_tombstone_and_update_stats(row.tomb().regular());
|
||||
}
|
||||
if (row.tomb().is_shadowable()) {
|
||||
write_tombstone_and_update_stats(row.tomb().tomb());
|
||||
}
|
||||
row_time_properties properties;
|
||||
if (!row.marker().is_missing()) {
|
||||
@@ -3237,6 +3283,27 @@ uint64_t calculate_write_size(Func&& func) {
|
||||
return written_size;
|
||||
}
|
||||
|
||||
// Find if any collection in the row contains a collection-wide tombstone
|
||||
static bool row_has_complex_deletion(const schema& s, const row& r, column_kind kind) {
|
||||
bool result = false;
|
||||
r.for_each_cell_until([&] (column_id id, const atomic_cell_or_collection& c) {
|
||||
auto&& cdef = s.column_at(kind, id);
|
||||
if (cdef.is_atomic()) {
|
||||
return stop_iteration::no;
|
||||
}
|
||||
auto t = static_pointer_cast<const collection_type_impl>(cdef.type);
|
||||
return c.as_collection_mutation().data.with_linearized([&] (bytes_view c_bv) {
|
||||
auto mview = t->deserialize_mutation_form(c_bv);
|
||||
if (mview.tomb) {
|
||||
result = true;
|
||||
}
|
||||
return stop_iteration(static_cast<bool>(mview.tomb));
|
||||
});
|
||||
});
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
void sstable_writer_m::write_static_row(const row& static_row) {
|
||||
assert(_schema.is_compound());
|
||||
|
||||
@@ -3246,13 +3313,16 @@ void sstable_writer_m::write_static_row(const row& static_row) {
|
||||
if (static_row.size() == _schema.static_columns_count()) {
|
||||
flags |= row_flags::has_all_columns;
|
||||
}
|
||||
|
||||
bool has_complex_deletion = row_has_complex_deletion(_schema, static_row, column_kind::static_column);
|
||||
if (has_complex_deletion) {
|
||||
flags |= row_flags::has_complex_deletion;
|
||||
}
|
||||
write(_sst.get_version(), *_data_writer, flags);
|
||||
write(_sst.get_version(), *_data_writer, row_extended_flags::is_static);
|
||||
|
||||
// Calculate the size of the row body
|
||||
auto write_row = [this, &static_row] (file_writer& writer) {
|
||||
write_cells(writer, column_kind::static_column, static_row, row_time_properties{});
|
||||
auto write_row = [this, &static_row, has_complex_deletion] (file_writer& writer) {
|
||||
write_cells(writer, column_kind::static_column, static_row, row_time_properties{}, has_complex_deletion);
|
||||
};
|
||||
|
||||
uint64_t row_body_size = calculate_write_size(write_row) + unsigned_vint::serialized_size(0);
|
||||
@@ -3273,48 +3343,29 @@ stop_iteration sstable_writer_m::consume(static_row&& sr) {
|
||||
return stop_iteration::no;
|
||||
}
|
||||
|
||||
// Find if any collection in the row contains a collection-wide tombstone
|
||||
static bool row_has_complex_deletion(const schema& s, const row& r) {
|
||||
bool result = false;
|
||||
r.for_each_cell_until([&] (column_id id, const atomic_cell_or_collection& c) {
|
||||
auto&& cdef = s.column_at(column_kind::regular_column, id);
|
||||
if (cdef.is_atomic()) {
|
||||
return stop_iteration::no;
|
||||
}
|
||||
auto t = static_pointer_cast<const collection_type_impl>(cdef.type);
|
||||
return c.as_collection_mutation().data.with_linearized([&] (bytes_view c_bv) {
|
||||
auto mview = t->deserialize_mutation_form(c_bv);
|
||||
if (mview.tomb) {
|
||||
result = true;
|
||||
}
|
||||
return stop_iteration(static_cast<bool>(mview.tomb));
|
||||
});
|
||||
});
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
void sstable_writer_m::write_clustered(const clustering_row& clustered_row, uint64_t prev_row_size) {
|
||||
row_flags flags = row_flags::none;
|
||||
row_extended_flags ext_flags = row_extended_flags::none;
|
||||
if (clustered_row.marker().is_live()) {
|
||||
const row_marker& marker = clustered_row.marker();
|
||||
if (!marker.is_missing()) {
|
||||
flags |= row_flags::has_timestamp;
|
||||
if (clustered_row.marker().is_expiring()) {
|
||||
if (!marker.is_live() || marker.is_expiring()) {
|
||||
flags |= row_flags::has_ttl;
|
||||
}
|
||||
}
|
||||
|
||||
if (clustered_row.tomb().tomb()) {
|
||||
if (clustered_row.tomb().regular()) {
|
||||
flags |= row_flags::has_deletion;
|
||||
if (clustered_row.tomb().tomb() && clustered_row.tomb().is_shadowable()) {
|
||||
ext_flags = row_extended_flags::has_shadowable_deletion;
|
||||
}
|
||||
}
|
||||
if (clustered_row.tomb().is_shadowable()) {
|
||||
flags |= row_flags::extension_flag;
|
||||
ext_flags = row_extended_flags::has_shadowable_deletion_scylla;
|
||||
}
|
||||
|
||||
if (clustered_row.cells().size() == _schema.regular_columns_count()) {
|
||||
flags |= row_flags::has_all_columns;
|
||||
}
|
||||
bool has_complex_deletion = row_has_complex_deletion(_schema, clustered_row.cells());
|
||||
bool has_complex_deletion = row_has_complex_deletion(_schema, clustered_row.cells(), column_kind::regular_column);
|
||||
if (has_complex_deletion) {
|
||||
flags |= row_flags::has_complex_deletion;
|
||||
}
|
||||
@@ -3689,7 +3740,8 @@ const sstring sstable::filename(sstring dir, sstring ks, sstring cf, version_typ
|
||||
format_types format, sstring component) {
|
||||
static std::unordered_map<version_types, const char*, enum_hash<version_types>> fmtmap = {
|
||||
{ sstable::version_types::ka, "{0}-{1}-{2}-{3}-{5}" },
|
||||
{ sstable::version_types::la, "{2}-{3}-{4}-{5}" }
|
||||
{ sstable::version_types::la, "{2}-{3}-{4}-{5}" },
|
||||
{ sstable::version_types::mc, "{2}-{3}-{4}-{5}" }
|
||||
};
|
||||
|
||||
return dir + "/" + seastar::format(fmtmap[version], ks, cf, _version_string.at(version), to_sstring(generation), _format_string.at(format), component);
|
||||
|
||||
@@ -611,6 +611,10 @@ public:
|
||||
return _schema->is_compound() || !has_scylla_component() || _components->scylla_metadata->has_feature(sstable_feature::NonCompoundRangeTombstones);
|
||||
}
|
||||
|
||||
bool has_shadowable_tombstones() const {
|
||||
return has_scylla_component() && _components->scylla_metadata->has_feature(sstable_feature::ShadowableTombstones);
|
||||
}
|
||||
|
||||
bool has_correct_max_deletion_time() const {
|
||||
return has_scylla_component();
|
||||
}
|
||||
|
||||
@@ -442,7 +442,8 @@ struct sharding_metadata {
|
||||
enum sstable_feature : uint8_t {
|
||||
NonCompoundPIEntries = 0, // See #2993
|
||||
NonCompoundRangeTombstones = 1, // See #2986
|
||||
End = 2
|
||||
ShadowableTombstones = 2, // See #3885
|
||||
End = 4,
|
||||
};
|
||||
|
||||
// Scylla-specific features enabled for a particular sstable.
|
||||
@@ -529,6 +530,13 @@ struct hash<sstables::metadata_type> : enum_hash<sstables::metadata_type> {};
|
||||
|
||||
namespace sstables {
|
||||
|
||||
// Special value to represent expired (i.e., 'dead') liveness info
|
||||
constexpr static uint32_t expired_liveness_ttl = std::numeric_limits<uint32_t>::max();
|
||||
|
||||
inline bool is_expired_liveness_ttl(uint32_t ttl) {
|
||||
return ttl == expired_liveness_ttl;
|
||||
}
|
||||
|
||||
struct statistics {
|
||||
disk_hash<uint32_t, metadata_type, uint32_t> hash;
|
||||
std::unordered_map<metadata_type, std::unique_ptr<metadata>> contents;
|
||||
@@ -595,6 +603,12 @@ public:
|
||||
|
||||
class unfiltered_extended_flags_m final {
|
||||
static const uint8_t IS_STATIC = 0x01u;
|
||||
// This flag is used by Cassandra but not supported by Scylla because
|
||||
// Scylla's representation of shadowable tombstones is different.
|
||||
// We only check it on reading and error out if set but never set ourselves.
|
||||
static const uint8_t HAS_CASSANDRA_SHADOWABLE_DELETION = 0x02u;
|
||||
// This flag is Scylla-specific and used for writing shadowable tombstones.
|
||||
static const uint8_t HAS_SCYLLA_SHADOWABLE_DELETION = 0x80u;
|
||||
uint8_t _flags;
|
||||
bool check_flag(const uint8_t flag) const {
|
||||
return (_flags & flag) != 0u;
|
||||
@@ -604,6 +618,12 @@ public:
|
||||
bool is_static() const {
|
||||
return check_flag(IS_STATIC);
|
||||
}
|
||||
bool has_cassandra_shadowable_deletion() const {
|
||||
return check_flag(HAS_CASSANDRA_SHADOWABLE_DELETION);
|
||||
}
|
||||
bool has_scylla_shadowable_deletion() const {
|
||||
return check_flag(HAS_SCYLLA_SHADOWABLE_DELETION);
|
||||
}
|
||||
};
|
||||
|
||||
class column_flags_m final {
|
||||
|
||||
@@ -205,7 +205,7 @@ void stream_session::init_messaging_service_handler() {
|
||||
};
|
||||
distribute_reader_and_consume_on_shards(s, dht::global_partitioner(),
|
||||
make_flat_mutation_reader<generating_reader>(s, std::move(get_next_mutation_fragment)),
|
||||
[cf_id, plan_id, s, estimated_partitions] (flat_mutation_reader reader) {
|
||||
[cf_id, plan_id, estimated_partitions] (flat_mutation_reader reader) {
|
||||
auto& cf = service::get_local_storage_service().db().local().find_column_family(cf_id);
|
||||
sstables::sstable_writer_config sst_cfg;
|
||||
sst_cfg.large_partition_handler = cf.get_large_partition_handler();
|
||||
|
||||
@@ -40,6 +40,7 @@
|
||||
#include "db/commitlog/commitlog.hh"
|
||||
#include "db/commitlog/rp_set.hh"
|
||||
#include "log.hh"
|
||||
#include "service/priority_manager.hh"
|
||||
|
||||
using namespace db;
|
||||
|
||||
@@ -290,7 +291,7 @@ SEASTAR_TEST_CASE(test_commitlog_delete_when_over_disk_limit) {
|
||||
SEASTAR_TEST_CASE(test_commitlog_reader){
|
||||
static auto count_mutations_in_segment = [] (sstring path) -> future<size_t> {
|
||||
auto count = make_lw_shared<size_t>(0);
|
||||
return db::commitlog::read_log_file(path, [count](temporary_buffer<char> buf, db::replay_position rp) {
|
||||
return db::commitlog::read_log_file(path, service::get_local_commitlog_priority(), [count](temporary_buffer<char> buf, db::replay_position rp) {
|
||||
sstring str(buf.get(), buf.size());
|
||||
BOOST_CHECK_EQUAL(str, "hej bubba cow");
|
||||
(*count)++;
|
||||
@@ -392,7 +393,7 @@ SEASTAR_TEST_CASE(test_commitlog_entry_corruption){
|
||||
BOOST_REQUIRE(!segments.empty());
|
||||
auto seg = segments[0];
|
||||
return corrupt_segment(seg, rps->at(1).pos + 4, 0x451234ab).then([seg, rps, &log] {
|
||||
return db::commitlog::read_log_file(seg, [rps](temporary_buffer<char> buf, db::replay_position rp) {
|
||||
return db::commitlog::read_log_file(seg, service::get_local_commitlog_priority(), [rps](temporary_buffer<char> buf, db::replay_position rp) {
|
||||
BOOST_CHECK_EQUAL(rp, rps->at(0));
|
||||
return make_ready_future<>();
|
||||
}).then([](auto s) {
|
||||
@@ -435,7 +436,7 @@ SEASTAR_TEST_CASE(test_commitlog_chunk_corruption){
|
||||
BOOST_REQUIRE(!segments.empty());
|
||||
auto seg = segments[0];
|
||||
return corrupt_segment(seg, rps->at(0).pos - 4, 0x451234ab).then([seg, rps, &log] {
|
||||
return db::commitlog::read_log_file(seg, [rps](temporary_buffer<char> buf, db::replay_position rp) {
|
||||
return db::commitlog::read_log_file(seg, service::get_local_commitlog_priority(), [rps](temporary_buffer<char> buf, db::replay_position rp) {
|
||||
BOOST_FAIL("Should not reach");
|
||||
return make_ready_future<>();
|
||||
}).then([](auto s) {
|
||||
@@ -477,7 +478,7 @@ SEASTAR_TEST_CASE(test_commitlog_reader_produce_exception){
|
||||
auto segments = log.get_active_segment_names();
|
||||
BOOST_REQUIRE(!segments.empty());
|
||||
auto seg = segments[0];
|
||||
return db::commitlog::read_log_file(seg, [](temporary_buffer<char> buf, db::replay_position rp) {
|
||||
return db::commitlog::read_log_file(seg, service::get_local_commitlog_priority(), [](temporary_buffer<char> buf, db::replay_position rp) {
|
||||
return make_exception_future(std::runtime_error("I am in a throwing mode"));
|
||||
}).then([](auto s) {
|
||||
return do_with(std::move(s), [](auto& s) {
|
||||
|
||||
@@ -143,3 +143,11 @@ rows_assertions result_msg_assertions::is_rows() {
|
||||
result_msg_assertions assert_that(shared_ptr<cql_transport::messages::result_message> msg) {
|
||||
return result_msg_assertions(msg);
|
||||
}
|
||||
|
||||
rows_assertions rows_assertions::with_serialized_columns_count(size_t columns_count) {
|
||||
size_t serialized_column_count = _rows->rs().get_metadata().column_count();
|
||||
if (serialized_column_count != columns_count) {
|
||||
fail(sprint("Expected %d serialized columns(s) but got %d", columns_count, serialized_column_count));
|
||||
}
|
||||
return {*this};
|
||||
}
|
||||
|
||||
@@ -40,6 +40,7 @@ public:
|
||||
rows_assertions with_rows(std::initializer_list<std::initializer_list<bytes_opt>> rows);
|
||||
// Verifies that the result has the following rows and only those rows.
|
||||
rows_assertions with_rows_ignore_order(std::vector<std::vector<bytes_opt>> rows);
|
||||
rows_assertions with_serialized_columns_count(size_t columns_count);
|
||||
};
|
||||
|
||||
class result_msg_assertions {
|
||||
|
||||
@@ -4040,3 +4040,81 @@ SEASTAR_TEST_CASE(test_select_with_mixed_order_table) {
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
SEASTAR_TEST_CASE(test_filtering) {
|
||||
return do_with_cql_env_thread([] (cql_test_env& e) {
|
||||
e.execute_cql("CREATE TABLE cf (k int, v int,m int,n int,o int,p int static, PRIMARY KEY ((k,v),m,n));").get();
|
||||
e.execute_cql(
|
||||
"BEGIN UNLOGGED BATCH \n"
|
||||
"INSERT INTO cf (k, v, m, n, o, p) VALUES (1, 1, 1, 1, 1 ,1 ); \n"
|
||||
"INSERT INTO cf (k, v, m, n, o, p) VALUES (2, 1, 2, 1, 2 ,2 ); \n"
|
||||
"INSERT INTO cf (k, v, m, n, o, p) VALUES (3, 1, 3, 1, 3 ,3 ); \n"
|
||||
"INSERT INTO cf (k, v, m, n, o, p) VALUES (4, 2, 1, 2, 4 ,4 ); \n"
|
||||
"INSERT INTO cf (k, v, m, n, o, p) VALUES (5, 2, 2, 2, 5 ,5 ); \n"
|
||||
"INSERT INTO cf (k, v, m, n, o, p) VALUES (6, 2, 3, 2, 6 ,6 ); \n"
|
||||
"INSERT INTO cf (k, v, m, n, o, p) VALUES (7, 3, 1, 3, 7 ,7 ); \n"
|
||||
"INSERT INTO cf (k, v, m, n, o, p) VALUES (8, 3, 2, 3, 8 ,8 ); \n"
|
||||
"INSERT INTO cf (k, v, m, n, o, p) VALUES (9, 3, 3, 3, 9 ,9 ); \n"
|
||||
"INSERT INTO cf (k, v, m, n, o, p) VALUES (10, 4, 1, 4,10,10); \n"
|
||||
"INSERT INTO cf (k, v, m, n, o, p) VALUES (11, 4, 2, 4,11,11); \n"
|
||||
"INSERT INTO cf (k, v, m, n, o, p) VALUES (12, 5, 3, 5,12,12); \n"
|
||||
"INSERT INTO cf (k, v, m, n, o, p) VALUES (12, 5, 4, 5,13,13); \n"
|
||||
"APPLY BATCH;"
|
||||
).get();
|
||||
|
||||
// Notice the with_serialized_columns_count() check before the set comparison.
|
||||
// Since we are dealing with the result set before serializing to the client,
|
||||
// there is an extra column that is used for the filtering, this column will
|
||||
// not be present in the responce to the client and with_serialized_columns_count()
|
||||
// verifies exactly that.
|
||||
|
||||
// test filtering on partition keys
|
||||
{
|
||||
auto msg = e.execute_cql("SELECT k FROM cf WHERE v=3 ALLOW FILTERING;").get0();
|
||||
assert_that(msg).is_rows().with_serialized_columns_count(1).with_rows_ignore_order({
|
||||
{ int32_type->decompose(7), int32_type->decompose(3)},
|
||||
{ int32_type->decompose(8), int32_type->decompose(3) },
|
||||
{ int32_type->decompose(9), int32_type->decompose(3) },
|
||||
});
|
||||
}
|
||||
|
||||
// test filtering on clustering keys
|
||||
{
|
||||
auto msg = e.execute_cql("SELECT k FROM cf WHERE n=4 ALLOW FILTERING;").get0();
|
||||
assert_that(msg).is_rows().with_serialized_columns_count(1).with_rows_ignore_order({
|
||||
{ int32_type->decompose(10), int32_type->decompose(4) },
|
||||
{ int32_type->decompose(11), int32_type->decompose(4) },
|
||||
});
|
||||
}
|
||||
|
||||
//test filtering on regular columns
|
||||
{
|
||||
auto msg = e.execute_cql("SELECT k FROM cf WHERE o>7 ALLOW FILTERING;").get0();
|
||||
assert_that(msg).is_rows().with_serialized_columns_count(1).with_rows_ignore_order({
|
||||
{ int32_type->decompose(8), int32_type->decompose(8) },
|
||||
{ int32_type->decompose(9), int32_type->decompose(9) },
|
||||
{ int32_type->decompose(10), int32_type->decompose(10) },
|
||||
{ int32_type->decompose(11), int32_type->decompose(11) },
|
||||
{ int32_type->decompose(12), int32_type->decompose(12) },
|
||||
{ int32_type->decompose(12), int32_type->decompose(13) },
|
||||
});
|
||||
}
|
||||
|
||||
//test filtering on static columns
|
||||
{
|
||||
auto msg = e.execute_cql("SELECT k FROM cf WHERE p>=10 AND p<=12 ALLOW FILTERING;").get0();
|
||||
assert_that(msg).is_rows().with_serialized_columns_count(1).with_rows_ignore_order({
|
||||
{ int32_type->decompose(10), int32_type->decompose(10) },
|
||||
{ int32_type->decompose(11), int32_type->decompose(11) },
|
||||
});
|
||||
}
|
||||
//test filtering with count
|
||||
{
|
||||
auto msg = e.execute_cql("SELECT COUNT(k) FROM cf WHERE n>3 ALLOW FILTERING;").get0();
|
||||
assert_that(msg).is_rows().with_serialized_columns_count(1).with_size(1).with_rows_ignore_order({
|
||||
{ long_type->decompose(4L), int32_type->decompose(4) },
|
||||
});
|
||||
}
|
||||
|
||||
});
|
||||
}
|
||||
|
||||
@@ -700,6 +700,46 @@ static void test_date_tiered_clustering_slicing(populate_fn populate) {
|
||||
}
|
||||
}
|
||||
|
||||
static void test_dropped_column_handling(populate_fn populate) {
|
||||
BOOST_TEST_MESSAGE(__PRETTY_FUNCTION__);
|
||||
schema_ptr write_schema = schema_builder("ks", "cf")
|
||||
.with_column("pk", int32_type, column_kind::partition_key)
|
||||
.with_column("ck", int32_type, column_kind::clustering_key)
|
||||
.with_column("val1", int32_type)
|
||||
.with_column("val2", int32_type)
|
||||
.build();
|
||||
schema_ptr read_schema = schema_builder("ks", "cf")
|
||||
.with_column("pk", int32_type, column_kind::partition_key)
|
||||
.with_column("ck", int32_type, column_kind::clustering_key)
|
||||
.with_column("val2", int32_type)
|
||||
.build();
|
||||
auto val2_cdef = read_schema->get_column_definition(to_bytes("val2"));
|
||||
auto to_ck = [write_schema] (int ck) {
|
||||
return clustering_key::from_single_value(*write_schema, int32_type->decompose(ck));
|
||||
};
|
||||
auto bytes = int32_type->decompose(int32_t(0));
|
||||
auto pk = partition_key::from_single_value(*write_schema, bytes);
|
||||
auto dk = dht::global_partitioner().decorate_key(*write_schema, pk);
|
||||
mutation partition(write_schema, pk);
|
||||
auto add_row = [&partition, &to_ck, write_schema] (int ck, int v1, int v2) {
|
||||
static constexpr api::timestamp_type write_timestamp = 1525385507816568;
|
||||
clustering_key ckey = to_ck(ck);
|
||||
partition.partition().apply_insert(*write_schema, ckey, write_timestamp);
|
||||
partition.set_cell(ckey, "val1", data_value{v1}, write_timestamp);
|
||||
partition.set_cell(ckey, "val2", data_value{v2}, write_timestamp);
|
||||
};
|
||||
add_row(1, 101, 201);
|
||||
add_row(2, 102, 202);
|
||||
add_row(3, 103, 203);
|
||||
assert_that(populate(write_schema, {partition}).make_reader(read_schema))
|
||||
.produces_partition_start(dk)
|
||||
.produces_row(to_ck(1), {{val2_cdef, int32_type->decompose(int32_t(201))}})
|
||||
.produces_row(to_ck(2), {{val2_cdef, int32_type->decompose(int32_t(202))}})
|
||||
.produces_row(to_ck(3), {{val2_cdef, int32_type->decompose(int32_t(203))}})
|
||||
.produces_partition_end()
|
||||
.produces_end_of_stream();
|
||||
}
|
||||
|
||||
static void test_clustering_slices(populate_fn populate) {
|
||||
BOOST_TEST_MESSAGE(__PRETTY_FUNCTION__);
|
||||
auto s = schema_builder("ks", "cf")
|
||||
@@ -807,16 +847,14 @@ static void test_clustering_slices(populate_fn populate) {
|
||||
.produces_row_with_key(ck2)
|
||||
.produces_end_of_stream();
|
||||
}
|
||||
|
||||
{
|
||||
auto slice = partition_slice_builder(*s)
|
||||
.with_range(query::clustering_range::make_singular(make_ck(1)))
|
||||
.build();
|
||||
assert_that(ds.make_reader(s, pr, slice))
|
||||
.produces(row1 + row2 + row3 + row4 + row5 + del_1)
|
||||
.produces(row1 + row2 + row3 + row4 + row5 + del_1, slice.row_ranges(*s, pk.key()))
|
||||
.produces_end_of_stream();
|
||||
}
|
||||
|
||||
{
|
||||
auto slice = partition_slice_builder(*s)
|
||||
.with_range(query::clustering_range::make_singular(make_ck(2)))
|
||||
@@ -831,7 +869,7 @@ static void test_clustering_slices(populate_fn populate) {
|
||||
.with_range(query::clustering_range::make_singular(make_ck(1, 2)))
|
||||
.build();
|
||||
assert_that(ds.make_reader(s, pr, slice))
|
||||
.produces(row3 + row4 + del_1)
|
||||
.produces(row3 + row4 + del_1, slice.row_ranges(*s, pk.key()))
|
||||
.produces_end_of_stream();
|
||||
}
|
||||
|
||||
@@ -840,7 +878,7 @@ static void test_clustering_slices(populate_fn populate) {
|
||||
.with_range(query::clustering_range::make_singular(make_ck(3)))
|
||||
.build();
|
||||
assert_that(ds.make_reader(s, pr, slice))
|
||||
.produces(row8 + del_3)
|
||||
.produces(row8 + del_3, slice.row_ranges(*s, pk.key()))
|
||||
.produces_end_of_stream();
|
||||
}
|
||||
|
||||
@@ -1064,6 +1102,7 @@ void run_mutation_reader_tests(populate_fn populate) {
|
||||
test_range_queries(populate);
|
||||
test_query_only_static_row(populate);
|
||||
test_query_no_clustering_ranges_no_static_columns(populate);
|
||||
test_dropped_column_handling(populate);
|
||||
}
|
||||
|
||||
void test_next_partition(populate_fn populate) {
|
||||
|
||||
@@ -30,6 +30,7 @@
|
||||
#include <map>
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
#include <boost/range/algorithm/adjacent_find.hpp>
|
||||
|
||||
static logging::logger nlogger("NetworkTopologyStrategyLogger");
|
||||
|
||||
@@ -52,6 +53,27 @@ void print_natural_endpoints(double point, const std::vector<inet_address> v) {
|
||||
nlogger.debug("{}", strm.str());
|
||||
}
|
||||
|
||||
#ifndef SEASTAR_DEBUG
|
||||
static void verify_sorted(const dht::token_range_vector& trv) {
|
||||
auto not_strictly_before = [] (const dht::token_range a, const dht::token_range b) {
|
||||
return !b.start()
|
||||
|| !a.end()
|
||||
|| a.end()->value() > b.start()->value()
|
||||
|| (a.end()->value() == b.start()->value() && a.end()->is_inclusive() && b.start()->is_inclusive());
|
||||
};
|
||||
BOOST_CHECK(boost::adjacent_find(trv, not_strictly_before) == trv.end());
|
||||
}
|
||||
#endif
|
||||
|
||||
static void check_ranges_are_sorted(abstract_replication_strategy* ars, gms::inet_address ep) {
|
||||
// Too slow in debug mode
|
||||
#ifndef SEASTAR_DEBUG
|
||||
verify_sorted(ars->get_ranges(ep));
|
||||
verify_sorted(ars->get_primary_ranges(ep));
|
||||
verify_sorted(ars->get_primary_ranges_within_dc(ep));
|
||||
#endif
|
||||
}
|
||||
|
||||
void strategy_sanity_check(
|
||||
abstract_replication_strategy* ars_ptr,
|
||||
const std::map<sstring, sstring>& options) {
|
||||
@@ -150,6 +172,7 @@ void full_ring_check(const std::vector<ring_point>& ring_points,
|
||||
auto endpoints2 = ars_ptr->get_natural_endpoints(t2);
|
||||
|
||||
endpoints_check(ars_ptr, endpoints2);
|
||||
check_ranges_are_sorted(ars_ptr, rp.host);
|
||||
BOOST_CHECK(cache_hit_count + 1 == ars_ptr->get_cache_hits_count());
|
||||
BOOST_CHECK(endpoints1 == endpoints2);
|
||||
}
|
||||
|
||||
@@ -342,7 +342,7 @@ SEASTAR_TEST_CASE(test_index_with_partition_key) {
|
||||
|
||||
// Queries that restrict only a part of the partition key and an index require filtering, because we need to compute token
|
||||
// in order to create a valid index view query
|
||||
BOOST_REQUIRE_THROW(e.execute_cql("SELECT * from tab WHERE a = 1 and e = 5"), exceptions::invalid_request_exception);
|
||||
BOOST_REQUIRE_THROW(e.execute_cql("SELECT * from tab WHERE a = 1 and e = 5").get(), exceptions::invalid_request_exception);
|
||||
|
||||
// Indexed queries with full primary key are allowed without filtering as well
|
||||
eventually([&] {
|
||||
@@ -362,7 +362,7 @@ SEASTAR_TEST_CASE(test_index_with_partition_key) {
|
||||
});
|
||||
|
||||
// This query needs filtering, because clustering key restrictions do not form a prefix
|
||||
BOOST_REQUIRE_THROW(e.execute_cql("SELECT * from tab WHERE a = 1 and b = 2 and d = 4 and e = 5"), exceptions::invalid_request_exception);
|
||||
BOOST_REQUIRE_THROW(e.execute_cql("SELECT * from tab WHERE a = 1 and b = 2 and d = 4 and e = 5").get(), exceptions::invalid_request_exception);
|
||||
eventually([&] {
|
||||
auto res = e.execute_cql("SELECT * from tab WHERE a = 1 and b = 2 and d = 4 and e = 5 ALLOW FILTERING").get0();
|
||||
assert_that(res).is_rows().with_rows({
|
||||
@@ -461,3 +461,45 @@ SEASTAR_TEST_CASE(test_index_on_pk_ck_with_paging) {
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
SEASTAR_TEST_CASE(test_secondary_index_collections) {
|
||||
return do_with_cql_env_thread([] (cql_test_env& e) {
|
||||
e.execute_cql("create table t (p int primary key, s1 set<int>, m1 map<int, text>, l1 list<int>, s2 frozen<set<int>>, m2 frozen<map<int, text>>, l2 frozen<list<int>>)").get();
|
||||
|
||||
//NOTICE(sarna): should be lifted after issue #2962 is resolved
|
||||
BOOST_REQUIRE_THROW(e.execute_cql("create index on t(s1)").get(), exceptions::invalid_request_exception);
|
||||
BOOST_REQUIRE_THROW(e.execute_cql("create index on t(m1)").get(), exceptions::invalid_request_exception);
|
||||
BOOST_REQUIRE_THROW(e.execute_cql("create index on t(l1)").get(), exceptions::invalid_request_exception);
|
||||
|
||||
e.execute_cql("create index on t(FULL(s2))").get();
|
||||
e.execute_cql("create index on t(FULL(m2))").get();
|
||||
e.execute_cql("create index on t(FULL(l2))").get();
|
||||
|
||||
e.execute_cql("insert into t(p, s2, m2, l2) values (1, {1}, {1: 'one', 2: 'two'}, [2])").get();
|
||||
e.execute_cql("insert into t(p, s2, m2, l2) values (2, {2}, {3: 'three'}, [3, 4, 5])").get();
|
||||
e.execute_cql("insert into t(p, s2, m2, l2) values (3, {3}, {5: 'five', 7: 'seven'}, [7, 8, 9])").get();
|
||||
|
||||
auto set_type = set_type_impl::get_instance(int32_type, true);
|
||||
auto map_type = map_type_impl::get_instance(int32_type, utf8_type, true);
|
||||
auto list_type = list_type_impl::get_instance(int32_type, true);
|
||||
|
||||
eventually([&] {
|
||||
auto res = e.execute_cql("SELECT p from t where s2 = {2}").get0();
|
||||
assert_that(res).is_rows().with_rows({{{int32_type->decompose(2)}}});
|
||||
res = e.execute_cql("SELECT p from t where s2 = {}").get0();
|
||||
assert_that(res).is_rows().with_size(0);
|
||||
});
|
||||
eventually([&] {
|
||||
auto res = e.execute_cql("SELECT p from t where m2 = {5: 'five', 7: 'seven'}").get0();
|
||||
assert_that(res).is_rows().with_rows({{{int32_type->decompose(3)}}});
|
||||
res = e.execute_cql("SELECT p from t where m2 = {1: 'one', 2: 'three'}").get0();
|
||||
assert_that(res).is_rows().with_size(0);
|
||||
});
|
||||
eventually([&] {
|
||||
auto res = e.execute_cql("SELECT p from t where l2 = [2]").get0();
|
||||
assert_that(res).is_rows().with_rows({{{int32_type->decompose(1)}}});
|
||||
res = e.execute_cql("SELECT p from t where l2 = [3]").get0();
|
||||
assert_that(res).is_rows().with_size(0);
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
@@ -4463,3 +4463,290 @@ SEASTAR_THREAD_TEST_CASE(test_complex_column_zero_subcolumns_read) {
|
||||
r.produces_end_of_stream();
|
||||
}
|
||||
|
||||
SEASTAR_THREAD_TEST_CASE(test_uncompressed_read_two_rows_fast_forwarding) {
|
||||
auto abj = defer([] { await_background_jobs().get(); });
|
||||
// Following tests run on files in tests/sstables/3.x/uncompressed/read_two_rows_fast_forwarding
|
||||
// They were created using following CQL statements:
|
||||
//
|
||||
// CREATE TABLE two_rows_fast_forwarding (pk int, ck int, rc int, PRIMARY KEY (pk, ck)) WITH compression = {'sstable_compression': ''};
|
||||
// INSERT INTO two_rows_fast_forwarding (pk, ck, rc) VALUES (0, 7, 7);
|
||||
// INSERT INTO two_rows_fast_forwarding (pk, ck, rc) VALUES (0, 8, 8);
|
||||
|
||||
static const sstring path = "tests/sstables/3.x/uncompressed/read_two_rows_fast_forwarding";
|
||||
static thread_local const schema_ptr s =
|
||||
schema_builder("test_ks", "two_rows_fast_forwarding")
|
||||
.with_column("pk", int32_type, column_kind::partition_key)
|
||||
.with_column("ck", int32_type, column_kind::clustering_key)
|
||||
.with_column("rc", int32_type)
|
||||
.build();
|
||||
sstable_assertions sst(s, path);
|
||||
sst.load();
|
||||
|
||||
auto to_pkey = [&] (int key) {
|
||||
auto bytes = int32_type->decompose(int32_t(key));
|
||||
auto pk = partition_key::from_single_value(*s, bytes);
|
||||
return dht::global_partitioner().decorate_key(*s, pk);
|
||||
};
|
||||
|
||||
auto to_ckey = [&] (int key) {
|
||||
auto bytes = int32_type->decompose(int32_t(key));
|
||||
return clustering_key::from_single_value(*s, bytes);
|
||||
};
|
||||
|
||||
auto rc_cdef = s->get_column_definition(to_bytes("rc"));
|
||||
BOOST_REQUIRE(rc_cdef);
|
||||
|
||||
auto to_expected = [rc_cdef] (int val) {
|
||||
return std::vector<flat_reader_assertions::expected_column>{{rc_cdef, int32_type->decompose(int32_t(val))}};
|
||||
};
|
||||
|
||||
auto r = assert_that(sst.read_range_rows_flat(query::full_partition_range,
|
||||
s->full_slice(),
|
||||
default_priority_class(),
|
||||
no_resource_tracking(),
|
||||
streamed_mutation::forwarding::yes));
|
||||
r.produces_partition_start(to_pkey(0))
|
||||
.produces_end_of_stream();
|
||||
|
||||
r.fast_forward_to(to_ckey(2), to_ckey(3));
|
||||
r.produces_end_of_stream();
|
||||
|
||||
r.fast_forward_to(to_ckey(4), to_ckey(5));
|
||||
r.produces_end_of_stream();
|
||||
|
||||
r.fast_forward_to(to_ckey(6), to_ckey(9));
|
||||
r.produces_row(to_ckey(7), to_expected(7))
|
||||
.produces_row(to_ckey(8), to_expected(8))
|
||||
.produces_end_of_stream();
|
||||
}
|
||||
|
||||
SEASTAR_THREAD_TEST_CASE(test_dead_row_marker) {
|
||||
auto abj = defer([] { await_background_jobs().get(); });
|
||||
sstring table_name = "dead_row_marker";
|
||||
// CREATE TABLE dead_row_marker (pk int, ck int, st int static, rc int , PRIMARY KEY (pk, ck)) WITH compression = {'sstable_compression': ''};
|
||||
schema_builder builder("sst3", table_name);
|
||||
builder.with_column("pk", int32_type, column_kind::partition_key);
|
||||
builder.with_column("ck", int32_type, column_kind::clustering_key);
|
||||
builder.with_column("st", int32_type, column_kind::static_column);
|
||||
builder.with_column("rc", int32_type);
|
||||
builder.set_compressor_params(compression_parameters());
|
||||
schema_ptr s = builder.build(schema_builder::compact_storage::no);
|
||||
|
||||
lw_shared_ptr<memtable> mt = make_lw_shared<memtable>(s);
|
||||
|
||||
auto key = partition_key::from_deeply_exploded(*s, { 1 });
|
||||
mutation mut{s, key};
|
||||
mut.set_static_cell("st", data_value{1135}, write_timestamp);
|
||||
|
||||
clustering_key ckey = clustering_key::from_deeply_exploded(*s, { 2 });
|
||||
auto& clustered_row = mut.partition().clustered_row(*s, ckey);
|
||||
clustered_row.apply(row_marker{tombstone{write_timestamp, write_time_point}});
|
||||
|
||||
mut.set_cell(ckey, "rc", data_value{777}, write_timestamp);
|
||||
|
||||
mt->apply(mut);
|
||||
|
||||
tmpdir tmp = write_and_compare_sstables(s, mt, table_name);
|
||||
}
|
||||
|
||||
SEASTAR_THREAD_TEST_CASE(test_shadowable_deletion) {
|
||||
/* The created SSTables content should match that of
|
||||
* an MV filled with the following queries:
|
||||
*
|
||||
* CREATE TABLE cf (p int PRIMARY KEY, v int) WITH compression = {'sstable_compression': ''};
|
||||
* CREATE MATERIALIZED VIEW mv AS SELECT * FROM cf WHERE p IS NOT NULL AND v IS NOT NULL PRIMARY KEY (v, p);
|
||||
* INSERT INTO cf (p, v) VALUES (1, 0);
|
||||
* UPDATE cf SET v = 1 WHERE p = 1;
|
||||
*/
|
||||
auto abj = defer([] { await_background_jobs().get(); });
|
||||
sstring table_name = "shadowable_deletion";
|
||||
schema_builder builder("sst3", table_name);
|
||||
builder.with_column("pk", int32_type, column_kind::partition_key);
|
||||
builder.with_column("ck", int32_type, column_kind::clustering_key);
|
||||
builder.set_compressor_params(compression_parameters());
|
||||
schema_ptr s = builder.build(schema_builder::compact_storage::no);
|
||||
|
||||
lw_shared_ptr<memtable> mt = make_lw_shared<memtable>(s);
|
||||
|
||||
clustering_key ckey = clustering_key::from_deeply_exploded(*s, { 1 });
|
||||
mutation mut1{s, partition_key::from_deeply_exploded(*s, {1})};
|
||||
{
|
||||
auto& clustered_row = mut1.partition().clustered_row(*s, ckey);
|
||||
clustered_row.apply(row_marker{api::timestamp_type{1540230880370422}});
|
||||
mt->apply(mut1);
|
||||
}
|
||||
|
||||
mutation mut2{s, partition_key::from_deeply_exploded(*s, {0})};
|
||||
{
|
||||
auto& clustered_row = mut2.partition().clustered_row(*s, ckey);
|
||||
api::timestamp_type ts {1540230874370065};
|
||||
gc_clock::time_point tp {gc_clock::duration(1540230880)};
|
||||
clustered_row.apply(row_marker{api::timestamp_type{ts}});
|
||||
clustered_row.apply(shadowable_tombstone(ts, tp));
|
||||
mt->apply(mut2);
|
||||
}
|
||||
|
||||
tmpdir tmp = write_and_compare_sstables(s, mt, table_name);
|
||||
validate_read(s, tmp.path, {mut1, mut2});
|
||||
}
|
||||
|
||||
SEASTAR_THREAD_TEST_CASE(test_regular_and_shadowable_deletion) {
|
||||
/* The created SSTables content should match that of
|
||||
* an MV filled with the following queries:
|
||||
*
|
||||
* CREATE TABLE cf (p INT, c INT, v INT, PRIMARY KEY (p, c));
|
||||
* CREATE MATERIALIZED VIEW mvf AS SELECT * FROM cf WHERE p IS NOT NULL AND c IS NOT NULL AND v IS NOT NULL PRIMARY KEY (v, p, c);
|
||||
* INSERT INTO cf (p, c, v) VALUES (1, 1, 0) USING TIMESTAMP 1540230874370001;
|
||||
* DELETE FROM cf USING TIMESTAMP 1540230874370001 WHERE p = 1 AND c = 1;
|
||||
* UPDATE cf USING TIMESTAMP 1540230874370002 SET v = 0 WHERE p = 1 AND c = 1;
|
||||
* UPDATE cf USING TIMESTAMP 1540230874370003 SET v = 1 WHERE p = 1 AND c = 1;
|
||||
*/
|
||||
auto abj = defer([] { await_background_jobs().get(); });
|
||||
sstring table_name = "regular_and_shadowable_deletion";
|
||||
schema_builder builder("sst3", table_name);
|
||||
builder.with_column("v", int32_type, column_kind::partition_key);
|
||||
builder.with_column("p", int32_type, column_kind::clustering_key);
|
||||
builder.with_column("c", int32_type, column_kind::clustering_key);
|
||||
builder.set_compressor_params(compression_parameters());
|
||||
schema_ptr s = builder.build(schema_builder::compact_storage::no);
|
||||
|
||||
auto make_tombstone = [] (int64_t ts, int32_t tp) {
|
||||
return tombstone{api::timestamp_type{ts}, gc_clock::time_point(gc_clock::duration(tp))};
|
||||
};
|
||||
|
||||
lw_shared_ptr<memtable> mt = make_lw_shared<memtable>(s);
|
||||
|
||||
clustering_key ckey = clustering_key::from_deeply_exploded(*s, { {1}, {1} });
|
||||
mutation mut1{s, partition_key::from_deeply_exploded(*s, {1})};
|
||||
{
|
||||
auto& clustered_row = mut1.partition().clustered_row(*s, ckey);
|
||||
clustered_row.apply(row_marker{api::timestamp_type{1540230874370003}});
|
||||
clustered_row.apply(make_tombstone(1540230874370001, 1540251167));
|
||||
mt->apply(mut1);
|
||||
}
|
||||
|
||||
mutation mut2{s, partition_key::from_deeply_exploded(*s, {0})};
|
||||
{
|
||||
auto& clustered_row = mut2.partition().clustered_row(*s, ckey);
|
||||
gc_clock::time_point tp {gc_clock::duration(1540230880)};
|
||||
clustered_row.apply(row_marker{api::timestamp_type{1540230874370002}});
|
||||
clustered_row.apply(make_tombstone(1540230874370001, 1540251167));
|
||||
clustered_row.apply(shadowable_tombstone(make_tombstone(1540230874370002, 1540251216)));
|
||||
mt->apply(mut2);
|
||||
}
|
||||
|
||||
tmpdir tmp = write_and_compare_sstables(s, mt, table_name);
|
||||
validate_read(s, tmp.path, {mut1, mut2});
|
||||
}
|
||||
|
||||
SEASTAR_THREAD_TEST_CASE(test_write_static_row_with_missing_columns) {
|
||||
auto abj = defer([] { await_background_jobs().get(); });
|
||||
sstring table_name = "static_row_with_missing_columns";
|
||||
// CREATE TABLE static_row (pk int, ck int, st1 int static, st2 int static, rc int, PRIMARY KEY (pk, ck)) WITH compression = {'sstable_compression': ''};
|
||||
schema_builder builder("sst3", table_name);
|
||||
builder.with_column("pk", int32_type, column_kind::partition_key);
|
||||
builder.with_column("ck", int32_type, column_kind::clustering_key);
|
||||
builder.with_column("st1", int32_type, column_kind::static_column);
|
||||
builder.with_column("st2", int32_type, column_kind::static_column);
|
||||
builder.with_column("rc", int32_type);
|
||||
builder.set_compressor_params(compression_parameters());
|
||||
schema_ptr s = builder.build(schema_builder::compact_storage::no);
|
||||
|
||||
lw_shared_ptr<memtable> mt = make_lw_shared<memtable>(s);
|
||||
|
||||
// INSERT INTO static_row (pk, ck, st1, rc) VALUES (0, 1, 2, 3);
|
||||
auto key = partition_key::from_deeply_exploded(*s, {0});
|
||||
mutation mut{s, key};
|
||||
clustering_key ckey = clustering_key::from_deeply_exploded(*s, { 1 });
|
||||
mut.partition().apply_insert(*s, ckey, write_timestamp);
|
||||
mut.set_static_cell("st1", data_value{2}, write_timestamp);
|
||||
mut.set_cell(ckey, "rc", data_value{3}, write_timestamp);
|
||||
mt->apply(mut);
|
||||
|
||||
tmpdir tmp = write_and_compare_sstables(s, mt, table_name);
|
||||
validate_read(s, tmp.path, {mut});
|
||||
}
|
||||
|
||||
SEASTAR_THREAD_TEST_CASE(test_write_interleaved_atomic_and_collection_columns) {
|
||||
auto abj = defer([] { await_background_jobs().get(); });
|
||||
sstring table_name = "interleaved_atomic_and_collection_columns";
|
||||
// CREATE TABLE interleaved_atomic_and_collection_columns ( pk int, ck int, rc1 int, rc2 set<int>, rc3 int, rc4 set<int>,
|
||||
// rc5 int, rc6 set<int>, PRIMARY KEY (pk, ck)) WITH compression = {'sstable_compression': ''};
|
||||
auto set_of_ints_type = set_type_impl::get_instance(int32_type, true);
|
||||
schema_builder builder("sst3", table_name);
|
||||
builder.with_column("pk", int32_type, column_kind::partition_key);
|
||||
builder.with_column("ck", int32_type, column_kind::clustering_key);
|
||||
builder.with_column("rc1", int32_type);
|
||||
builder.with_column("rc2", set_of_ints_type);
|
||||
builder.with_column("rc3", int32_type);
|
||||
builder.with_column("rc4", set_of_ints_type);
|
||||
builder.with_column("rc5", int32_type);
|
||||
builder.with_column("rc6", set_of_ints_type);
|
||||
builder.set_compressor_params(compression_parameters());
|
||||
schema_ptr s = builder.build(schema_builder::compact_storage::no);
|
||||
|
||||
lw_shared_ptr<memtable> mt = make_lw_shared<memtable>(s);
|
||||
|
||||
// INSERT INTO interleaved_atomic_and_collection_columns (pk, ck, rc1, rc4, rc5)
|
||||
// VALUES (0, 1, 2, {3, 4}, 5) USING TIMESTAMP 1525385507816568;
|
||||
auto key = partition_key::from_deeply_exploded(*s, {0});
|
||||
mutation mut{s, key};
|
||||
clustering_key ckey = clustering_key::from_deeply_exploded(*s, { 1 });
|
||||
mut.partition().apply_insert(*s, ckey, write_timestamp);
|
||||
mut.set_cell(ckey, "rc1", data_value{2}, write_timestamp);
|
||||
|
||||
set_type_impl::mutation set_values;
|
||||
set_values.tomb = tombstone {write_timestamp - 1, write_time_point};
|
||||
set_values.cells.emplace_back(int32_type->decompose(3), atomic_cell::make_live(*bytes_type, write_timestamp, bytes_view{}));
|
||||
set_values.cells.emplace_back(int32_type->decompose(4), atomic_cell::make_live(*bytes_type, write_timestamp, bytes_view{}));
|
||||
mut.set_clustered_cell(ckey, *s->get_column_definition("rc4"), set_of_ints_type->serialize_mutation_form(set_values));
|
||||
|
||||
mut.set_cell(ckey, "rc5", data_value{5}, write_timestamp);
|
||||
mt->apply(mut);
|
||||
|
||||
tmpdir tmp = write_and_compare_sstables(s, mt, table_name);
|
||||
validate_read(s, tmp.path, {mut});
|
||||
}
|
||||
|
||||
SEASTAR_THREAD_TEST_CASE(test_write_static_interleaved_atomic_and_collection_columns) {
|
||||
auto abj = defer([] { await_background_jobs().get(); });
|
||||
sstring table_name = "static_interleaved_atomic_and_collection_columns";
|
||||
// CREATE TABLE static_interleaved_atomic_and_collection_columns ( pk int, ck int, st1 int static,
|
||||
// st2 set<int> static, st3 int static, st4 set<int> static, st5 int static, st6 set<int> static,
|
||||
// PRIMARY KEY (pk, ck)) WITH compression = {'sstable_compression': ''};
|
||||
auto set_of_ints_type = set_type_impl::get_instance(int32_type, true);
|
||||
schema_builder builder("sst3", table_name);
|
||||
builder.with_column("pk", int32_type, column_kind::partition_key);
|
||||
builder.with_column("ck", int32_type, column_kind::clustering_key);
|
||||
builder.with_column("st1", int32_type, column_kind::static_column);
|
||||
builder.with_column("st2", set_of_ints_type, column_kind::static_column);
|
||||
builder.with_column("st3", int32_type, column_kind::static_column);
|
||||
builder.with_column("st4", set_of_ints_type, column_kind::static_column);
|
||||
builder.with_column("st5", int32_type, column_kind::static_column);
|
||||
builder.with_column("st6", set_of_ints_type, column_kind::static_column);
|
||||
builder.set_compressor_params(compression_parameters());
|
||||
schema_ptr s = builder.build(schema_builder::compact_storage::no);
|
||||
|
||||
lw_shared_ptr<memtable> mt = make_lw_shared<memtable>(s);
|
||||
|
||||
// INSERT INTO static_interleaved_atomic_and_collection_columns (pk, ck, st1, st4, st5)
|
||||
// VALUES (0, 1, 2, {3, 4}, 5) USING TIMESTAMP 1525385507816568;
|
||||
auto key = partition_key::from_deeply_exploded(*s, {0});
|
||||
mutation mut{s, key};
|
||||
clustering_key ckey = clustering_key::from_deeply_exploded(*s, { 1 });
|
||||
mut.partition().apply_insert(*s, ckey, write_timestamp);
|
||||
mut.set_static_cell("st1", data_value{2}, write_timestamp);
|
||||
|
||||
set_type_impl::mutation set_values;
|
||||
set_values.tomb = tombstone {write_timestamp - 1, write_time_point};
|
||||
set_values.cells.emplace_back(int32_type->decompose(3), atomic_cell::make_live(*bytes_type, write_timestamp, bytes_view{}));
|
||||
set_values.cells.emplace_back(int32_type->decompose(4), atomic_cell::make_live(*bytes_type, write_timestamp, bytes_view{}));
|
||||
mut.set_static_cell(*s->get_column_definition("st4"), set_of_ints_type->serialize_mutation_form(set_values));
|
||||
|
||||
mut.set_static_cell("st5", data_value{5}, write_timestamp);
|
||||
mt->apply(mut);
|
||||
|
||||
tmpdir tmp = write_and_compare_sstables(s, mt, table_name);
|
||||
validate_read(s, tmp.path, {mut});
|
||||
}
|
||||
|
||||
|
||||
@@ -2510,38 +2510,34 @@ SEASTAR_TEST_CASE(check_multi_schema) {
|
||||
// e blob
|
||||
//);
|
||||
return for_each_sstable_version([] (const sstables::sstable::version_types version) {
|
||||
// We prohibit altering types for SSTables in 'mc' format.
|
||||
// This is compliant with the Origin behaviour - see CASSANDRA-12443
|
||||
if (version != sstables::sstable::version_types::mc) {
|
||||
auto set_of_ints_type = set_type_impl::get_instance(int32_type, true);
|
||||
auto builder = schema_builder("test", "test_multi_schema")
|
||||
.with_column("a", int32_type, column_kind::partition_key)
|
||||
.with_column("c", set_of_ints_type)
|
||||
.with_column("d", int32_type)
|
||||
.with_column("e", int32_type);
|
||||
auto s = builder.build();
|
||||
auto set_of_ints_type = set_type_impl::get_instance(int32_type, true);
|
||||
auto builder = schema_builder("test", "test_multi_schema")
|
||||
.with_column("a", int32_type, column_kind::partition_key)
|
||||
.with_column("c", set_of_ints_type)
|
||||
.with_column("d", int32_type)
|
||||
.with_column("e", bytes_type);
|
||||
auto s = builder.build();
|
||||
|
||||
auto sst = make_sstable(s, get_test_dir("multi_schema_test", s), 1, version, big);
|
||||
auto f = sst->load();
|
||||
return f.then([sst, s] {
|
||||
auto reader = make_lw_shared(sstable_reader(sst, s));
|
||||
return read_mutation_from_flat_mutation_reader(*reader, db::no_timeout).then([reader, s] (mutation_opt m) {
|
||||
BOOST_REQUIRE(m);
|
||||
BOOST_REQUIRE(m->key().equal(*s, partition_key::from_singular(*s, 0)));
|
||||
auto& rows = m->partition().clustered_rows();
|
||||
BOOST_REQUIRE_EQUAL(rows.calculate_size(), 1);
|
||||
auto& row = rows.begin()->row();
|
||||
BOOST_REQUIRE(!row.deleted_at());
|
||||
auto& cells = row.cells();
|
||||
BOOST_REQUIRE_EQUAL(cells.size(), 1);
|
||||
auto& cdef = *s->get_column_definition("e");
|
||||
BOOST_REQUIRE_EQUAL(cells.cell_at(cdef.id).as_atomic_cell(cdef).value(), int32_type->decompose(5));
|
||||
return (*reader)(db::no_timeout);
|
||||
}).then([reader, s] (mutation_fragment_opt m) {
|
||||
BOOST_REQUIRE(!m);
|
||||
});
|
||||
auto sst = make_sstable(s, get_test_dir("multi_schema_test", s), 1, version, big);
|
||||
auto f = sst->load();
|
||||
return f.then([sst, s] {
|
||||
auto reader = make_lw_shared(sstable_reader(sst, s));
|
||||
return read_mutation_from_flat_mutation_reader(*reader, db::no_timeout).then([reader, s] (mutation_opt m) {
|
||||
BOOST_REQUIRE(m);
|
||||
BOOST_REQUIRE(m->key().equal(*s, partition_key::from_singular(*s, 0)));
|
||||
auto& rows = m->partition().clustered_rows();
|
||||
BOOST_REQUIRE_EQUAL(rows.calculate_size(), 1);
|
||||
auto& row = rows.begin()->row();
|
||||
BOOST_REQUIRE(!row.deleted_at());
|
||||
auto& cells = row.cells();
|
||||
BOOST_REQUIRE_EQUAL(cells.size(), 1);
|
||||
auto& cdef = *s->get_column_definition("e");
|
||||
BOOST_REQUIRE_EQUAL(cells.cell_at(cdef.id).as_atomic_cell(cdef).value(), int32_type->decompose(5));
|
||||
return (*reader)(db::no_timeout);
|
||||
}).then([reader, s] (mutation_fragment_opt m) {
|
||||
BOOST_REQUIRE(!m);
|
||||
});
|
||||
}
|
||||
});
|
||||
return make_ready_future<>();
|
||||
});
|
||||
}
|
||||
|
||||
@@ -410,7 +410,7 @@ SEASTAR_TEST_CASE(test_sstable_conforms_to_mutation_source) {
|
||||
return seastar::async([] {
|
||||
auto wait_bg = seastar::defer([] { sstables::await_background_jobs().get(); });
|
||||
storage_service_for_tests ssft;
|
||||
for (auto version : {sstables::sstable::version_types::ka, sstables::sstable::version_types::la}) {
|
||||
for (auto version : all_sstable_versions) {
|
||||
for (auto index_block_size : {1, 128, 64*1024}) {
|
||||
sstable_writer_config cfg;
|
||||
cfg.promoted_index_block_size = index_block_size;
|
||||
|
||||
Binary file not shown.
Binary file not shown.
@@ -0,0 +1 @@
|
||||
2712473259
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -0,0 +1,8 @@
|
||||
Data.db
|
||||
Digest.crc32
|
||||
Index.db
|
||||
TOC.txt
|
||||
Filter.db
|
||||
Statistics.db
|
||||
Summary.db
|
||||
CRC.db
|
||||
Binary file not shown.
Binary file not shown.
@@ -0,0 +1 @@
|
||||
2548599407
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -0,0 +1,8 @@
|
||||
CRC.db
|
||||
Filter.db
|
||||
Statistics.db
|
||||
TOC.txt
|
||||
Digest.crc32
|
||||
Index.db
|
||||
Summary.db
|
||||
Data.db
|
||||
Binary file not shown.
@@ -0,0 +1 @@
|
||||
1117317764
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -0,0 +1 @@
|
||||
476890539
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -0,0 +1 @@
|
||||
1657818438
|
||||
Binary file not shown.
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user