/* * Copyright (C) 2016 ScyllaDB */ /* * This file is part of Scylla. * * Scylla is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * Scylla is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with Scylla. If not, see . */ #pragma once #include "mutation_fragment.hh" static inline bool has_ck_selector(const query::clustering_row_ranges& ranges) { // Like PK range, an empty row range, should be considered an "exclude all" restriction return ranges.empty() || std::any_of(ranges.begin(), ranges.end(), [](auto& r) { return !r.is_full(); }); } enum class emit_only_live_rows { no, yes, }; enum class compact_for_sstables { no, yes, }; GCC6_CONCEPT( template concept bool CompactedFragmentsConsumer = requires(T obj, tombstone t, const dht::decorated_key& dk, static_row sr, clustering_row cr, range_tombstone rt, tombstone current_tombstone, row_tombstone current_row_tombstone, bool is_alive) { obj.consume_new_partition(dk); obj.consume(t); { obj.consume(std::move(sr), current_tombstone, is_alive) } -> stop_iteration; { obj.consume(std::move(cr), current_row_tombstone, is_alive) } -> stop_iteration; { obj.consume(std::move(rt)) } -> stop_iteration; { obj.consume_end_of_partition() } -> stop_iteration; obj.consume_end_of_stream(); }; ) // emit_only_live::yes will cause compact_for_query to emit only live // static and clustering rows. It doesn't affect the way range tombstones are // emitted. template class compact_mutation_state { const schema& _schema; gc_clock::time_point _query_time; gc_clock::time_point _gc_before; std::function _get_max_purgeable; can_gc_fn _can_gc; api::timestamp_type _max_purgeable = api::missing_timestamp; const query::partition_slice& _slice; uint32_t _row_limit{}; uint32_t _partition_limit{}; uint32_t _partition_row_limit{}; range_tombstone_accumulator _range_tombstones; bool _static_row_live{}; uint32_t _rows_in_current_partition; uint32_t _current_partition_limit; bool _empty_partition{}; const dht::decorated_key* _dk{}; dht::decorated_key _last_dk; bool _has_ck_selector{}; std::optional _last_static_row; private: static constexpr bool only_live() { return OnlyLive == emit_only_live_rows::yes; } static constexpr bool sstable_compaction() { return SSTableCompaction == compact_for_sstables::yes; } template void partition_is_not_empty(Consumer& consumer) { if (_empty_partition) { _empty_partition = false; consumer.consume_new_partition(*_dk); auto pt = _range_tombstones.get_partition_tombstone(); if (pt && !can_purge_tombstone(pt)) { consumer.consume(pt); } } } bool can_purge_tombstone(const tombstone& t) { return t.deletion_time < _gc_before && can_gc(t); }; bool can_purge_tombstone(const row_tombstone& t) { return t.max_deletion_time() < _gc_before && can_gc(t.tomb()); }; bool can_gc(tombstone t) { if (!sstable_compaction()) { return true; } if (!t) { return false; } if (_max_purgeable == api::missing_timestamp) { _max_purgeable = _get_max_purgeable(*_dk); } return t.timestamp < _max_purgeable; }; public: struct parameters { static constexpr emit_only_live_rows only_live = OnlyLive; static constexpr compact_for_sstables sstable_compaction = SSTableCompaction; }; compact_mutation_state(compact_mutation_state&&) = delete; // Because 'this' is captured compact_mutation_state(const schema& s, gc_clock::time_point query_time, const query::partition_slice& slice, uint32_t limit, uint32_t partition_limit) : _schema(s) , _query_time(query_time) , _gc_before(saturating_subtract(query_time, s.gc_grace_seconds())) , _can_gc(always_gc) , _slice(slice) , _row_limit(limit) , _partition_limit(partition_limit) , _partition_row_limit(_slice.options.contains(query::partition_slice::option::distinct) ? 1 : slice.partition_row_limit()) , _range_tombstones(s, _slice.options.contains(query::partition_slice::option::reversed)) , _last_dk({dht::token(), partition_key::make_empty()}) { static_assert(!sstable_compaction(), "This constructor cannot be used for sstable compaction."); } compact_mutation_state(const schema& s, gc_clock::time_point compaction_time, std::function get_max_purgeable) : _schema(s) , _query_time(compaction_time) , _gc_before(saturating_subtract(_query_time, s.gc_grace_seconds())) , _get_max_purgeable(std::move(get_max_purgeable)) , _can_gc([this] (tombstone t) { return can_gc(t); }) , _slice(s.full_slice()) , _range_tombstones(s, false) , _last_dk({dht::token(), partition_key::make_empty()}) { static_assert(sstable_compaction(), "This constructor can only be used for sstable compaction."); static_assert(!only_live(), "SSTable compaction cannot be run with emit_only_live_rows::yes."); } void consume_new_partition(const dht::decorated_key& dk) { auto& pk = dk.key(); _dk = &dk; _has_ck_selector = has_ck_selector(_slice.row_ranges(_schema, pk)); _empty_partition = true; _rows_in_current_partition = 0; _static_row_live = false; _range_tombstones.clear(); _current_partition_limit = std::min(_row_limit, _partition_row_limit); _max_purgeable = api::missing_timestamp; _last_static_row.reset(); } template GCC6_CONCEPT( requires CompactedFragmentsConsumer ) void consume(tombstone t, Consumer& consumer) { _range_tombstones.set_partition_tombstone(t); if (!only_live() && !can_purge_tombstone(t)) { partition_is_not_empty(consumer); } } template GCC6_CONCEPT( requires CompactedFragmentsConsumer ) stop_iteration consume(static_row&& sr, Consumer& consumer) { _last_static_row = static_row(_schema, sr); auto current_tombstone = _range_tombstones.get_partition_tombstone(); bool is_live = sr.cells().compact_and_expire(_schema, column_kind::static_column, row_tombstone(current_tombstone), _query_time, _can_gc, _gc_before); _static_row_live = is_live; if (is_live || (!only_live() && !sr.empty())) { partition_is_not_empty(consumer); return consumer.consume(std::move(sr), current_tombstone, is_live); } return stop_iteration::no; } template GCC6_CONCEPT( requires CompactedFragmentsConsumer ) stop_iteration consume(clustering_row&& cr, Consumer& consumer) { auto current_tombstone = _range_tombstones.tombstone_for_row(cr.key()); auto t = cr.tomb(); if (t.tomb() <= current_tombstone || can_purge_tombstone(t)) { cr.remove_tombstone(); } t.apply(current_tombstone); bool is_live = cr.marker().compact_and_expire(t.tomb(), _query_time, _can_gc, _gc_before); is_live |= cr.cells().compact_and_expire(_schema, column_kind::regular_column, t, _query_time, _can_gc, _gc_before, cr.marker()); if (only_live() && is_live) { partition_is_not_empty(consumer); auto stop = consumer.consume(std::move(cr), t, true); if (++_rows_in_current_partition == _current_partition_limit) { return stop_iteration::yes; } return stop; } else if (!only_live()) { auto stop = stop_iteration::no; if (!cr.empty()) { partition_is_not_empty(consumer); stop = consumer.consume(std::move(cr), t, is_live); } if (!sstable_compaction() && is_live && ++_rows_in_current_partition == _current_partition_limit) { return stop_iteration::yes; } return stop; } return stop_iteration::no; } template GCC6_CONCEPT( requires CompactedFragmentsConsumer ) stop_iteration consume(range_tombstone&& rt, Consumer& consumer) { _range_tombstones.apply(rt); // FIXME: drop tombstone if it is fully covered by other range tombstones if (!can_purge_tombstone(rt.tomb) && rt.tomb > _range_tombstones.get_partition_tombstone()) { partition_is_not_empty(consumer); return consumer.consume(std::move(rt)); } return stop_iteration::no; } template GCC6_CONCEPT( requires CompactedFragmentsConsumer ) stop_iteration consume_end_of_partition(Consumer& consumer) { if (!_empty_partition) { // #589 - Do not add extra row for statics unless we did a CK range-less query. // See comment in query if (_rows_in_current_partition == 0 && _static_row_live && !_has_ck_selector) { ++_rows_in_current_partition; } _row_limit -= _rows_in_current_partition; _partition_limit -= _rows_in_current_partition > 0; auto stop = consumer.consume_end_of_partition(); if (!sstable_compaction()) { return _row_limit && _partition_limit && stop != stop_iteration::yes ? stop_iteration::no : stop_iteration::yes; } } return stop_iteration::no; } template GCC6_CONCEPT( requires CompactedFragmentsConsumer ) auto consume_end_of_stream(Consumer& consumer) { if (_dk) { _last_dk = *_dk; _dk = &_last_dk; } return consumer.consume_end_of_stream(); } /// The decorated key of the partition the compaction is positioned in. /// Can be null if the compaction wasn't started yet. const dht::decorated_key* current_partition() const { return _dk; } /// Reset limits and query-time to the new page's ones and re-emit the /// partition-header and static row if there are clustering rows or range /// tombstones left in the partition. template GCC6_CONCEPT( requires CompactedFragmentsConsumer ) void start_new_page(uint32_t row_limit, uint32_t partition_limit, gc_clock::time_point query_time, mutation_fragment::kind next_fragment_kind, Consumer& consumer) { _empty_partition = true; _static_row_live = false; _row_limit = row_limit; _partition_limit = partition_limit; _rows_in_current_partition = 0; _current_partition_limit = std::min(_row_limit, _partition_row_limit); _query_time = query_time; _gc_before = saturating_subtract(query_time, _schema.gc_grace_seconds()); if ((next_fragment_kind == mutation_fragment::kind::clustering_row || next_fragment_kind == mutation_fragment::kind::range_tombstone) && _last_static_row) { // Stopping here would cause an infinite loop so ignore return value. consume(*std::exchange(_last_static_row, {}), consumer); } } bool are_limits_reached() const { return _row_limit == 0 || _partition_limit == 0; } }; template GCC6_CONCEPT( requires CompactedFragmentsConsumer ) class compact_mutation { lw_shared_ptr> _state; Consumer _consumer; public: compact_mutation(const schema& s, gc_clock::time_point query_time, const query::partition_slice& slice, uint32_t limit, uint32_t partition_limit, Consumer consumer) : _state(make_lw_shared>(s, query_time, slice, limit, partition_limit)) , _consumer(std::move(consumer)) { } compact_mutation(const schema& s, gc_clock::time_point compaction_time, Consumer consumer, std::function get_max_purgeable) : _state(make_lw_shared>(s, compaction_time, get_max_purgeable)) , _consumer(std::move(consumer)) { } compact_mutation(lw_shared_ptr> state, Consumer consumer) : _state(std::move(state)) , _consumer(std::move(consumer)) { } void consume_new_partition(const dht::decorated_key& dk) { _state->consume_new_partition(dk); } void consume(tombstone t) { _state->consume(std::move(t), _consumer); } stop_iteration consume(static_row&& sr) { return _state->consume(std::move(sr), _consumer); } stop_iteration consume(clustering_row&& cr) { return _state->consume(std::move(cr), _consumer); } stop_iteration consume(range_tombstone&& rt) { return _state->consume(std::move(rt), _consumer); } stop_iteration consume_end_of_partition() { return _state->consume_end_of_partition(_consumer); } auto consume_end_of_stream() { return _state->consume_end_of_stream(_consumer); } }; template GCC6_CONCEPT( requires CompactedFragmentsConsumer ) struct compact_for_query : compact_mutation { using compact_mutation::compact_mutation; }; template using compact_for_query_state = compact_mutation_state; using compact_for_mutation_query_state = compact_for_query_state; using compact_for_data_query_state = compact_for_query_state; template GCC6_CONCEPT( requires CompactedFragmentsConsumer ) struct compact_for_compaction : compact_mutation { using compact_mutation::compact_mutation; };