From 6a729b4e9e514de8ee8a44c18d0aea68a8eead60 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Botond=20D=C3=A9nes?= Date: Tue, 17 Mar 2026 16:45:22 +0200 Subject: [PATCH] mutation/collaction_mutation: collection_mutation_view: add methods to inspect content Add size, tombstone getters and begin()/end() to iterate over the cells. Allows inspecting the content of the collection in-place, without deserializing into an intermediate representation (collection_mutation_description[_view]). This will be used to gradually replace all usage of collection_mutation_description[_view]. Deliberately avoding the use of collection_mutation_input_stream as that one is also a target for elimination. The names for the accessors are tomb() and size(), following existing conventions in mutation/. Also rename is_empty() -> empty() to align with this convention. There is a single caller to update only. All accessors (new or pre-existing) are made to work with default-constructed `collection_mutation` (i.e. one containing empty buffer). No users yet. --- mutation/collection_mutation.cc | 59 +++++++++++++++++++++++++++++- mutation/collection_mutation.hh | 64 ++++++++++++++++++++++++++++++++- mutation/mutation_partition.cc | 2 +- 3 files changed, 122 insertions(+), 3 deletions(-) diff --git a/mutation/collection_mutation.cc b/mutation/collection_mutation.cc index 6c7dc896a9..57aaeb5567 100644 --- a/mutation/collection_mutation.cc +++ b/mutation/collection_mutation.cc @@ -54,7 +54,28 @@ collection_mutation_view atomic_cell_or_collection::as_collection_mutation() con return collection_mutation_view{managed_bytes_view(_data)}; } -bool collection_mutation_view::is_empty() const { +namespace { + +// Reads (and consumes) the tombstone prefix from `v`. Returns the tombstone, +// which is empty if the has_tombstone flag was not set. +tombstone read_collection_tombstone(managed_bytes_view& v) { + if (read_simple(v)) { + auto timestamp = read_simple(v); + auto deletion_time = read_simple(v); + return tombstone{timestamp, gc_clock::time_point(gc_clock::duration(deletion_time))}; + } + return tombstone{}; +} + +// Reads (and consumes) the cell-count field from `v`, assuming the tombstone +// prefix has already been consumed. +uint32_t read_collection_size(managed_bytes_view& v) { + return read_simple(v); +} + +} // anonymous namespace + +bool collection_mutation_view::empty() const { auto in = collection_mutation_input_stream(data); auto has_tomb = in.read_trivial(); return !has_tomb && in.read_trivial() == 0; @@ -104,6 +125,42 @@ api::timestamp_type collection_mutation_view::last_update(const abstract_type& t return max; } +tombstone collection_mutation_view::tomb() const { + auto v = data; + return read_collection_tombstone(v); +} + +uint32_t collection_mutation_view::size() const { + auto v = data; + read_collection_tombstone(v); // skip tombstone if present + return read_collection_size(v); +} + +collection_mutation_view::iterator::iterator(managed_bytes_view data) { + read_collection_tombstone(data); // skip tombstone if present + _remaining_count = read_collection_size(data); + _remaining = data; + ++*this; +} + +void collection_mutation_view::iterator::advance() { + auto key_size = read_simple(_remaining); + auto key = _remaining.prefix(key_size); + _remaining.remove_prefix(key_size); + auto vsize = read_simple(_remaining); + auto value = _remaining.prefix(vsize); + _remaining.remove_prefix(vsize); + _current = value_type{key, atomic_cell_view::from_bytes(value)}; +} + +collection_mutation_view::iterator collection_mutation_view::begin() const { + return iterator(data); +} + +collection_mutation_view::iterator collection_mutation_view::end() const { + return iterator{}; +} + auto fmt::formatter::format(const collection_mutation_view::printer& cmvp, fmt::format_context& ctx) const -> decltype(ctx.out()) { auto out = ctx.out(); diff --git a/mutation/collection_mutation.hh b/mutation/collection_mutation.hh index e0c5f94314..e324aa22c4 100644 --- a/mutation/collection_mutation.hh +++ b/mutation/collection_mutation.hh @@ -16,6 +16,8 @@ #include "compaction/compaction_garbage_collector.hh" #include #include +#include +#include class abstract_type; class compaction_garbage_collector; @@ -83,7 +85,7 @@ public: managed_bytes_view data; // Is this a noop mutation? - bool is_empty() const; + bool empty() const; // Is any of the stored cells live (not deleted nor expired) at the time point `tp`, // given the later of the tombstones `t` and the one stored in the mutation (if any)? @@ -101,6 +103,66 @@ public: return f(deserialize_collection_mutation(type, stream)); } + // Returns the collection-level tombstone, or an empty tombstone if none is present. + tombstone tomb() const; + + // Returns the number of cells stored in the mutation. + uint32_t size() const; + + // Forward iterator that deserializes cells on the fly. + // Each element is a (key, value) pair where key is a managed_bytes_view of the serialized + // cell key (path) and value is an atomic_cell_view of the serialized cell value. + // The iterator does not require type information to advance. + // The underlying collection_mutation_view must outlive the iterator. + class iterator { + public: + using iterator_category = std::forward_iterator_tag; + using iterator_concept = std::forward_iterator_tag; + using value_type = std::pair; + using difference_type = std::ptrdiff_t; + using pointer = const value_type*; + using reference = const value_type&; + private: + managed_bytes_view _remaining; + uint32_t _remaining_count = 0; + std::optional _current; + + void advance(); + explicit iterator(managed_bytes_view data); + public: + // Default-constructs an end iterator. + iterator() = default; + + reference operator*() const { return *_current; } + pointer operator->() const { return &*_current; } + + iterator& operator++() { + if (_remaining_count) { + advance(); + --_remaining_count; + } else { + _current.reset(); + } + return *this; + } + + iterator operator++(int) { + auto tmp = *this; + ++*this; + return tmp; + } + + bool operator==(const iterator& o) const { + // End iterator has _remaining = 0 and _current = nullopt. + return _remaining_count == o._remaining_count && bool(_current) == bool(o._current); + } + + friend class collection_mutation_view; + }; + + iterator begin() const; + iterator end() const; + class printer { const abstract_type& _type; const collection_mutation_view& _cmv; diff --git a/mutation/mutation_partition.cc b/mutation/mutation_partition.cc index d4e027345c..27ec5e7cc8 100644 --- a/mutation/mutation_partition.cc +++ b/mutation/mutation_partition.cc @@ -1782,7 +1782,7 @@ row row::difference(const schema& s, column_kind kind, const row& other) const } else { auto diff = ::difference(*cdef.type, c->cell.as_collection_mutation(), it->cell.as_collection_mutation()); - if (!static_cast(diff).is_empty()) { + if (!static_cast(diff).empty()) { r.append_cell(c.key(), std::move(diff)); } }