/* * Copyright (C) 2016 ScyllaDB */ /* * This file is part of Scylla. * * Scylla is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * Scylla is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with Scylla. If not, see . */ #pragma once #include "mutation_partition.hh" #include "utils/optimized_optional.hh" #include namespace stdx = std::experimental; // mutation_fragments are the objects that streamed_mutation are going to // stream. They can represent: // - a static row // - a clustering row // - start of range tombstone // - end of range rombstone // // There exists an ordering (implemented in position_in_partition class) between // mutation_fragment objects. It reflects the order in which content of // partition appears in the sstables. class position_in_partition; class clustering_row { clustering_key_prefix _ck; tombstone _t; row_marker _marker; row _cells; public: explicit clustering_row(clustering_key_prefix ck) : _ck(std::move(ck)) { } clustering_row(clustering_key_prefix ck, tombstone t, row_marker marker, row cells) : _ck(std::move(ck)), _t(t), _marker(std::move(marker)), _cells(std::move(cells)) { } clustering_row(const rows_entry& re) : _ck(re.key()), _t(re.row().deleted_at()), _marker(re.row().marker()), _cells(re.row().cells()) { } clustering_row(rows_entry&& re) : _ck(std::move(re.key())), _t(re.row().deleted_at()), _marker(re.row().marker()), _cells(std::move(re.row().cells())) { } clustering_key_prefix& key() { return _ck; } const clustering_key_prefix& key() const { return _ck; } tombstone tomb() const { return _t; } void remove_tombstone() { _t = tombstone(); } const row_marker& marker() const { return _marker; } row_marker& marker() { return _marker; } const row& cells() const { return _cells; } row& cells() { return _cells; } bool empty() const { return !_t && _marker.is_missing() && _cells.empty(); } void apply(const schema& s, clustering_row&& cr) { _t.apply(cr._t); _marker.apply(std::move(cr._marker)); _cells.apply(s, column_kind::regular_column, std::move(cr._cells)); } void set_cell(const column_definition& def, atomic_cell_or_collection&& value) { _cells.apply(def, std::move(value)); } void apply(row_marker rm) { _marker.apply(std::move(rm)); } void apply(tombstone t) { _t.apply(t); } void apply(const schema& s, const rows_entry& r) { _t.apply(r.row().deleted_at()); _marker.apply(r.row().marker()); _cells.apply(s, column_kind::regular_column, r.row().cells()); } position_in_partition position() const; size_t memory_usage() const { return _ck.memory_usage() + _cells.memory_usage(); } }; class static_row { row _cells; public: static_row() = default; explicit static_row(const row& r) : _cells(r) { } explicit static_row(row&& r) : _cells(std::move(r)) { } row& cells() { return _cells; } const row& cells() const { return _cells; } bool empty() const { return _cells.empty(); } void apply(const schema& s, const row& r) { _cells.apply(s, column_kind::static_column, r); } void apply(const schema& s, static_row&& sr) { _cells.apply(s, column_kind::static_column, std::move(sr._cells)); } void set_cell(const column_definition& def, atomic_cell_or_collection&& value) { _cells.apply(def, std::move(value)); } position_in_partition position() const; size_t memory_usage() const { return _cells.memory_usage(); } }; class mutation_fragment { public: enum class kind { static_row, clustering_row, range_tombstone, }; private: union data { data() { } ~data() { } static_row _static_row; clustering_row _clustering_row; range_tombstone _range_tombstone; }; private: kind _kind; std::unique_ptr _data; mutation_fragment() = default; explicit operator bool() const noexcept { return bool(_data); } void destroy_data() noexcept; friend class optimized_optional; int row_type_weight() const { return !is_static_row(); } int bound_kind_weight() const; friend class position_in_partition; public: mutation_fragment(static_row&& r); mutation_fragment(clustering_row&& r); mutation_fragment(range_tombstone&& r); mutation_fragment(const mutation_fragment&) = delete; mutation_fragment(mutation_fragment&& other) = default; mutation_fragment& operator=(const mutation_fragment&) = delete; mutation_fragment& operator=(mutation_fragment&& other) noexcept { if (this != &other) { this->~mutation_fragment(); new (this) mutation_fragment(std::move(other)); } return *this; } ~mutation_fragment() { if (_data) { destroy_data(); } } position_in_partition position() const; bool has_key() const { return !is_static_row(); } // Requirements: has_key() == true const clustering_key_prefix& key() const; kind mutation_fragment_kind() const { return _kind; } bool is_static_row() const { return _kind == kind::static_row; } bool is_clustering_row() const { return _kind == kind::clustering_row; } bool is_range_tombstone() const { return _kind == kind::range_tombstone; } static_row& as_static_row() { return _data->_static_row; } clustering_row& as_clustering_row() { return _data->_clustering_row; } range_tombstone& as_range_tombstone() { return _data->_range_tombstone; } const static_row& as_static_row() const { return _data->_static_row; } const clustering_row& as_clustering_row() const { return _data->_clustering_row; } const range_tombstone& as_range_tombstone() const { return _data->_range_tombstone; } // Requirements: mutation_fragment_kind() == mf.mutation_fragment_kind() && !is_range_tombstone() void apply(const schema& s, mutation_fragment&& mf); /* template concept bool MutationFragmentConsumer() { return requires(T t, static_row sr, clustering_row cr, range_tombstone rt) { { t.consume(std::move(sr)) } -> ReturnType; { t.consume(std::move(cr)) } -> ReturnType; { t.consume(std::move(rt)) } -> ReturnType; }; } */ template decltype(auto) consume(Consumer& consumer) && { switch (_kind) { case kind::static_row: return consumer.consume(std::move(_data->_static_row)); case kind::clustering_row: return consumer.consume(std::move(_data->_clustering_row)); case kind::range_tombstone: return consumer.consume(std::move(_data->_range_tombstone)); } abort(); } /* template concept bool MutationFragmentVisitor() { return requires(T t, const static_row& sr, const clustering_row& cr, const range_tombstone& rt) { { t(sr) } -> ReturnType; { t(cr) } -> ReturnType; { t(rt) } -> ReturnType; }; } */ template decltype(auto) visit(Visitor&& visitor) const { switch (_kind) { case kind::static_row: return visitor(as_static_row()); case kind::clustering_row: return visitor(as_clustering_row()); case kind::range_tombstone: return visitor(as_range_tombstone()); } abort(); } size_t memory_usage() const { return sizeof(data) + visit([] (auto& mf) { return mf.memory_usage(); }); } }; class position_in_partition { int _bound_weight = 0; stdx::optional _ck; private: // 0 for static row, 1 for clustering row and range tombstones begin/end int row_type_weight() const { return !!_ck; } int bound_kind_weight() const { return _bound_weight; } public: struct static_row_tag_t { }; struct clustering_row_tag_t { }; struct range_tombstone_tag_t { }; explicit position_in_partition(static_row_tag_t) { } position_in_partition(clustering_row_tag_t, clustering_key_prefix ck) : _ck(std::move(ck)) { } position_in_partition(range_tombstone_tag_t, bound_view bv) : _bound_weight(weight(bv.kind)), _ck(bv.prefix) { } clustering_key_prefix& key() { return *_ck; } const clustering_key_prefix& key() const { return *_ck; } class less_compare { bound_view::compare _cmp; private: template bool compare(const T& a, const U& b) const { auto a_rt_weight = a.row_type_weight(); auto b_rt_weight = b.row_type_weight(); if (!a_rt_weight || !b_rt_weight) { return a_rt_weight < b_rt_weight; } return _cmp(a.key(), a.bound_kind_weight(), b.key(), b.bound_kind_weight()); } public: less_compare(const schema& s) : _cmp(s) { } bool operator()(const position_in_partition& a, const position_in_partition& b) const { return compare(a, b); } bool operator()(const mutation_fragment& a, const mutation_fragment& b) const { return compare(a, b); } bool operator()(const position_in_partition& a, const mutation_fragment& b) const { return compare(a, b); } bool operator()(const mutation_fragment& a, const position_in_partition& b) const { return compare(a, b); } bool operator()(const position_in_partition& a, const rows_entry& b) const { return !a.row_type_weight() || _cmp(a.key(), a.bound_kind_weight(), b.key(), 0); } bool operator()(const rows_entry& a, const position_in_partition& b) const { return b.row_type_weight() && _cmp(a.key(), 0, b.key(), b.bound_kind_weight()); } bool operator()(const rows_entry& a, const rows_entry& b) const { return _cmp(a.key(), 0, b.key(), 0); } bool operator()(const range_tombstone& a, const mutation_fragment& b) const { return b.row_type_weight() && _cmp(a.start, weight(a.start_kind), b.key(), b.bound_kind_weight()); } bool operator()(const mutation_fragment& a, const range_tombstone& b) const { return !a.row_type_weight() || _cmp(a.key(), a.bound_kind_weight(), b.start, weight(b.start_kind)); } bool operator()(const bound_view& a, const rows_entry& b) const { return _cmp(a.prefix, weight(a.kind), b.key(), 0); } bool operator()(const rows_entry& a, const bound_view& b) const { return _cmp(a.key(), 0, b.prefix, weight(b.kind)); } bool operator()(const bound_view& a, const mutation_fragment& b) const { return b.row_type_weight() && _cmp(a.prefix, weight(a.kind), b.key(), b.bound_kind_weight()); } }; class equal_compare { clustering_key_prefix::equality _equal; template bool compare(const T& a, const U& b) const { return a.row_type_weight() == b.row_type_weight() && (!a.row_type_weight() || (_equal(a.key(), b.key()) && a.bound_kind_weight() == b.bound_kind_weight())); } public: equal_compare(const schema& s) : _equal(s) { } bool operator()(const position_in_partition& a, const position_in_partition& b) const { return compare(a, b); } bool operator()(const mutation_fragment& a, const mutation_fragment& b) const { return compare(a, b); } bool operator()(const position_in_partition& a, const mutation_fragment& b) const { return compare(a, b); } bool operator()(const mutation_fragment& a, const position_in_partition& b) const { return compare(a, b); } bool operator()(const rows_entry& a, const clustering_row& b) const { return _equal(a.key(), b.key()); } bool operator()(const clustering_row& a, const clustering_row& b) const { return _equal(a.key(), b.key()); } bool operator()(const clustering_row& a, const rows_entry& b) const { return _equal(a.key(), b.key()); } }; }; inline position_in_partition static_row::position() const { return position_in_partition(position_in_partition::static_row_tag_t()); } inline position_in_partition clustering_row::position() const { return position_in_partition(position_in_partition::clustering_row_tag_t(), _ck); } template<> struct move_constructor_disengages { enum { value = true }; }; using mutation_fragment_opt = optimized_optional; // streamed_mutation represents a mutation in a form of a stream of // mutation_fragments. streamed_mutation emits mutation fragments in the order // they should appear in the sstables, i.e. static row is always the first one, // then clustering rows and range tombstones are emitted according to the // lexicographical ordering of their clustering keys and bounds of the range // tombstones. // // Range tombstones are disjoint, i.e. after emitting // range_tombstone_begin it is guaranteed that there is going to be a single // range_tombstone_end before another range_tombstone_begin is emitted. // // The ordering of mutation_fragments also guarantees that by the time the // consumer sees a clustering row it has already received all relevant tombstones. // // Partition key and partition tombstone are not streamed and is part of the // streamed_mutation itself. class streamed_mutation { public: // streamed_mutation uses batching. The mutation implementations are // supposed to fill a buffer with mutation fragments until is_buffer_full() // or end of stream is encountered. class impl { protected: // FIXME: use size in bytes of the mutation_fragments static constexpr size_t buffer_size = 16; schema_ptr _schema; dht::decorated_key _key; tombstone _partition_tombstone; bool _end_of_stream = false; circular_buffer _buffer; friend class streamed_mutation; protected: template void push_mutation_fragment(Args&&... args) { _buffer.emplace_back(std::forward(args)...); } public: explicit impl(schema_ptr s, dht::decorated_key dk, tombstone pt) : _schema(std::move(s)), _key(std::move(dk)), _partition_tombstone(pt) { _buffer.reserve(buffer_size); } virtual ~impl() { } virtual future<> fill_buffer() = 0; bool is_end_of_stream() const { return _end_of_stream; } bool is_buffer_empty() const { return _buffer.empty(); } bool is_buffer_full() const { return _buffer.size() >= buffer_size; } mutation_fragment pop_mutation_fragment() { auto mf = std::move(_buffer.front()); _buffer.pop_front(); return mf; } future operator()() { if (is_buffer_empty()) { if (is_end_of_stream()) { return make_ready_future(); } return fill_buffer().then([this] { return operator()(); }); } return make_ready_future(pop_mutation_fragment()); } }; private: std::unique_ptr _impl; streamed_mutation() = default; explicit operator bool() const { return bool(_impl); } friend class optimized_optional; public: explicit streamed_mutation(std::unique_ptr i) : _impl(std::move(i)) { } const partition_key& key() const { return _impl->_key.key(); } const dht::decorated_key& decorated_key() const { return _impl->_key; } schema_ptr schema() const { return _impl->_schema; } tombstone partition_tombstone() const { return _impl->_partition_tombstone; } bool is_end_of_stream() const { return _impl->is_end_of_stream(); } bool is_buffer_empty() const { return _impl->is_buffer_empty(); } bool is_buffer_full() const { return _impl->is_buffer_full(); } mutation_fragment pop_mutation_fragment() { return _impl->pop_mutation_fragment(); } future<> fill_buffer() { return _impl->fill_buffer(); } future operator()() { return _impl->operator()(); } }; std::ostream& operator<<(std::ostream& os, const streamed_mutation& sm); template streamed_mutation make_streamed_mutation(Args&&... args) { return streamed_mutation(std::make_unique(std::forward(args)...)); } template<> struct move_constructor_disengages { enum { value = true }; }; using streamed_mutation_opt = optimized_optional; /* template concept bool StreamedMutationConsumer() { return MutationFragmentConsumer && requires(T t, tombstone tomb) { { t.consume(tomb) } -> stop_iteration; t.consume_end_of_stream(); }; } */ template auto consume(streamed_mutation& m, Consumer consumer) { return do_with(std::move(consumer), [&m] (Consumer& c) { if (c.consume(m.partition_tombstone()) == stop_iteration::yes) { return make_ready_future().then([&] { return c.consume_end_of_stream(); }); } return repeat([&m, &c] { if (m.is_buffer_empty()) { if (m.is_end_of_stream()) { return make_ready_future(stop_iteration::yes); } return m.fill_buffer().then([] { return stop_iteration::no; }); } return make_ready_future(m.pop_mutation_fragment().consume(c)); }).then([&c] { return c.consume_end_of_stream(); }); }); } class mutation; streamed_mutation streamed_mutation_from_mutation(mutation); //Requires all streamed_mutations to have the same schema. streamed_mutation merge_mutations(std::vector); streamed_mutation reverse_streamed_mutation(streamed_mutation); // range_tombstone_stream is a helper object that simplifies producing a stream // of range tombstones and merging it with a stream of clustering rows. // Tombstones are added using apply() and retrieved using get_next(). // // get_next(const rows_entry&) and get_next(const mutation_fragment&) allow // merging the stream of tombstones with a stream of clustering rows. If these // overloads return disengaged optional it means that there is no tombstone // in the stream that should be emitted before the object given as an argument. // (And, consequently, if the optional is engaged that tombstone should be // emitted first). After calling any of these overloads with a mutation_fragment // which is at some position in partition P no range tombstone can be added to // the stream which start bound is before that position. // // get_next() overload which doesn't take any arguments is used to return the // remaining tombstones. After it was called no new tombstones can be added // to the stream. class range_tombstone_stream { const schema& _schema; position_in_partition::less_compare _cmp; range_tombstone_list _list; bool _inside_range_tombstone = false; private: mutation_fragment_opt do_get_next(); public: range_tombstone_stream(const schema& s) : _schema(s), _cmp(s), _list(s) { } mutation_fragment_opt get_next(const rows_entry&); mutation_fragment_opt get_next(const mutation_fragment&); mutation_fragment_opt get_next(); void apply(range_tombstone&& rt) { _list.apply(_schema, std::move(rt)); } void apply(const range_tombstone_list& list) { _list.apply(_schema, list); } }; // mutation_hasher is an equivalent of hashing_partition_visitor for // streamed mutations. // // mutation_hasher *IS NOT* compatible with hashing_partition_visitor. // // streamed_mutations do not guarantee that the emitted range tombstones // are disjoint. However, we need to hash them after they are made disjoint // because only in such form the hash won't depend on the unpredictable // factors (e.g. which sstables contain which parts of the mutation). template class mutation_hasher { const schema& _schema; Hasher& _hasher; bound_view::compare _cmp; range_tombstone_list _rt_list; bool _inside_range_tombstone = false; private: void consume_cell(const column_definition& col, const atomic_cell_or_collection& cell) { feed_hash(_hasher, col.name()); feed_hash(_hasher, col.type->name()); if (col.is_atomic()) { feed_hash(_hasher, cell.as_atomic_cell()); } else { feed_hash(_hasher, cell.as_collection_mutation()); } } void consume_range_tombstone_start(const range_tombstone& rt) { rt.start.feed_hash(_hasher, _schema); feed_hash(_hasher, rt.start_kind); feed_hash(_hasher, rt.tomb); } void consume_range_tombstone_end(const range_tombstone& rt) { rt.end.feed_hash(_hasher, _schema); feed_hash(_hasher, rt.end_kind); } void pop_rt_front() { auto& rt = *_rt_list.tombstones().begin(); _rt_list.tombstones().erase(_rt_list.begin()); current_deleter()(&rt); } void consume_range_tombstones_until(const clustering_row& cr) { while (!_rt_list.empty()) { auto it = _rt_list.begin(); if (_inside_range_tombstone) { if (_cmp(it->end_bound(), cr.key())) { consume_range_tombstone_end(*it); _inside_range_tombstone = false; pop_rt_front(); } else { break; } } else { if (_cmp(it->start_bound(), cr.key())) { consume_range_tombstone_start(*it); _inside_range_tombstone = true; } else { break; } } } } void consume_range_tombstones_until_end() { if (_inside_range_tombstone) { consume_range_tombstone_end(*_rt_list.begin()); pop_rt_front(); } for (auto&& rt : _rt_list) { consume_range_tombstone_start(rt); consume_range_tombstone_end(rt); } } public: mutation_hasher(const schema& s, Hasher& h) : _schema(s), _hasher(h), _cmp(s), _rt_list(s) { } stop_iteration consume(tombstone t) { feed_hash(_hasher, t); return stop_iteration::no; } stop_iteration consume(const static_row& sr) { sr.cells().for_each_cell([&] (column_id id, const atomic_cell_or_collection& cell) { auto&& col = _schema.static_column_at(id); consume_cell(col, cell); }); return stop_iteration::no; } stop_iteration consume(const clustering_row& cr) { consume_range_tombstones_until(cr); cr.key().feed_hash(_hasher, _schema); feed_hash(_hasher, cr.tomb()); feed_hash(_hasher, cr.marker()); cr.cells().for_each_cell([&] (column_id id, const atomic_cell_or_collection& cell) { auto&& col = _schema.regular_column_at(id); consume_cell(col, cell); }); return stop_iteration::no; } stop_iteration consume(range_tombstone&& rt) { _rt_list.apply(_schema, std::move(rt)); return stop_iteration::no; } void consume_end_of_stream() { consume_range_tombstones_until_end(); } };