From cec9b2d1141a4fc35bfeee793d2371b541fbb21a Mon Sep 17 00:00:00 2001 From: Tomasz Grabiec Date: Mon, 19 Dec 2022 16:54:23 +0100 Subject: [PATCH] mutation_partition_v2: Implement compact() For convenience, will be used in unit tests. --- mutation_partition_v2.cc | 39 +++++++++++++++++++++++++++++++++++++++ mutation_partition_v2.hh | 10 ++++++++++ 2 files changed, 49 insertions(+) diff --git a/mutation_partition_v2.cc b/mutation_partition_v2.cc index 35655c957d..2f76aff9f7 100644 --- a/mutation_partition_v2.cc +++ b/mutation_partition_v2.cc @@ -1166,3 +1166,42 @@ mutation_partition_v2::maybe_drop(const schema& s, del(&e); return next_i; } + +void mutation_partition_v2::compact(const schema& s, cache_tracker* tracker) { + mutation_application_stats stats; + auto i = _rows.begin(); + rows_type::iterator prev_i; + while (i != _rows.end()) { + i->compact(s, _tombstone); + if (prev_i) { + // We cannot call maybe_drop() on i because the entry may become redundant + // only after the next entry is compacted, e.g. when next entry's range tombstone is dropped. + maybe_drop(s, tracker, prev_i, stats); + } + prev_i = i++; + } + if (prev_i) { + maybe_drop(s, tracker, prev_i, stats); + } +} + +bool has_redundant_dummies(const mutation_partition_v2& p) { + bool last_dummy = false; + bool last_cont = false; + tombstone last_rt; + auto i = p.clustered_rows().begin(); + while (i != p.clustered_rows().end()) { + const rows_entry& e = *i; + if (last_dummy) { + bool redundant = last_cont == bool(e.continuous()) && last_rt == e.range_tombstone(); + if (redundant) { + return true; + } + } + last_dummy = bool(e.dummy()); + last_rt = e.range_tombstone(); + last_cont = bool(e.continuous()); + ++i; + } + return false; +} diff --git a/mutation_partition_v2.hh b/mutation_partition_v2.hh index d5d5d924ed..cd81ebb37d 100644 --- a/mutation_partition_v2.hh +++ b/mutation_partition_v2.hh @@ -226,6 +226,12 @@ public: // Strong exception guarantees. void upgrade(const schema& old_schema, const schema& new_schema); + // Transforms this instance into a minimal one which still represents the same set of writes. + // Does not garbage collect expired data, so the result is clock-independent and + // should produce the same result on all replicas. + // has_redundant_dummies(*this) is guaranteed to be false after this. + void compact(const schema&, cache_tracker*); + mutation_partition as_mutation_partition(const schema&) const; private: // Erases the entry if it's safe to do so without changing the logical state of the partition. @@ -292,3 +298,7 @@ inline mutation_partition_v2& mutation_partition_v2::container_of(rows_type& rows) { return *boost::intrusive::get_parent_from_member(&rows, &mutation_partition_v2::_rows); } + +// Returns true iff the mutation contains dummy rows which are redundant, +// meaning that they can be removed without affecting the set of writes represented by the mutation. +bool has_redundant_dummies(const mutation_partition_v2&);