mutation/collaction_mutation: collection_mutation_view: add methods to inspect content

Add size, tombstone getters and begin()/end() to iterate over the cells.
Allows inspecting the content of the collection in-place, without
deserializing into an intermediate representation
(collection_mutation_description[_view]). This will be used to gradually
replace all usage of collection_mutation_description[_view].
Deliberately avoding the use of collection_mutation_input_stream as that
one is also a target for elimination.

The names for the accessors are tomb() and size(), following existing
conventions in mutation/. Also rename is_empty() -> empty() to align
with this convention. There is a single caller to update only.

All accessors (new or pre-existing) are made to work with
default-constructed `collection_mutation` (i.e. one containing empty
buffer).

No users yet.
This commit is contained in:
Botond Dénes
2026-03-17 16:45:22 +02:00
parent 11d7128bf6
commit 6a729b4e9e
3 changed files with 122 additions and 3 deletions

View File

@@ -54,7 +54,28 @@ collection_mutation_view atomic_cell_or_collection::as_collection_mutation() con
return collection_mutation_view{managed_bytes_view(_data)};
}
bool collection_mutation_view::is_empty() const {
namespace {
// Reads (and consumes) the tombstone prefix from `v`. Returns the tombstone,
// which is empty if the has_tombstone flag was not set.
tombstone read_collection_tombstone(managed_bytes_view& v) {
if (read_simple<uint8_t>(v)) {
auto timestamp = read_simple<api::timestamp_type>(v);
auto deletion_time = read_simple<gc_clock::duration::rep>(v);
return tombstone{timestamp, gc_clock::time_point(gc_clock::duration(deletion_time))};
}
return tombstone{};
}
// Reads (and consumes) the cell-count field from `v`, assuming the tombstone
// prefix has already been consumed.
uint32_t read_collection_size(managed_bytes_view& v) {
return read_simple<uint32_t>(v);
}
} // anonymous namespace
bool collection_mutation_view::empty() const {
auto in = collection_mutation_input_stream(data);
auto has_tomb = in.read_trivial<uint8_t>();
return !has_tomb && in.read_trivial<uint32_t>() == 0;
@@ -104,6 +125,42 @@ api::timestamp_type collection_mutation_view::last_update(const abstract_type& t
return max;
}
tombstone collection_mutation_view::tomb() const {
auto v = data;
return read_collection_tombstone(v);
}
uint32_t collection_mutation_view::size() const {
auto v = data;
read_collection_tombstone(v); // skip tombstone if present
return read_collection_size(v);
}
collection_mutation_view::iterator::iterator(managed_bytes_view data) {
read_collection_tombstone(data); // skip tombstone if present
_remaining_count = read_collection_size(data);
_remaining = data;
++*this;
}
void collection_mutation_view::iterator::advance() {
auto key_size = read_simple<uint32_t>(_remaining);
auto key = _remaining.prefix(key_size);
_remaining.remove_prefix(key_size);
auto vsize = read_simple<uint32_t>(_remaining);
auto value = _remaining.prefix(vsize);
_remaining.remove_prefix(vsize);
_current = value_type{key, atomic_cell_view::from_bytes(value)};
}
collection_mutation_view::iterator collection_mutation_view::begin() const {
return iterator(data);
}
collection_mutation_view::iterator collection_mutation_view::end() const {
return iterator{};
}
auto fmt::formatter<collection_mutation_view::printer>::format(const collection_mutation_view::printer& cmvp, fmt::format_context& ctx) const
-> decltype(ctx.out()) {
auto out = ctx.out();

View File

@@ -16,6 +16,8 @@
#include "compaction/compaction_garbage_collector.hh"
#include <iosfwd>
#include <forward_list>
#include <iterator>
#include <optional>
class abstract_type;
class compaction_garbage_collector;
@@ -83,7 +85,7 @@ public:
managed_bytes_view data;
// Is this a noop mutation?
bool is_empty() const;
bool empty() const;
// Is any of the stored cells live (not deleted nor expired) at the time point `tp`,
// given the later of the tombstones `t` and the one stored in the mutation (if any)?
@@ -101,6 +103,66 @@ public:
return f(deserialize_collection_mutation(type, stream));
}
// Returns the collection-level tombstone, or an empty tombstone if none is present.
tombstone tomb() const;
// Returns the number of cells stored in the mutation.
uint32_t size() const;
// Forward iterator that deserializes cells on the fly.
// Each element is a (key, value) pair where key is a managed_bytes_view of the serialized
// cell key (path) and value is an atomic_cell_view of the serialized cell value.
// The iterator does not require type information to advance.
// The underlying collection_mutation_view must outlive the iterator.
class iterator {
public:
using iterator_category = std::forward_iterator_tag;
using iterator_concept = std::forward_iterator_tag;
using value_type = std::pair<managed_bytes_view, atomic_cell_view>;
using difference_type = std::ptrdiff_t;
using pointer = const value_type*;
using reference = const value_type&;
private:
managed_bytes_view _remaining;
uint32_t _remaining_count = 0;
std::optional<value_type> _current;
void advance();
explicit iterator(managed_bytes_view data);
public:
// Default-constructs an end iterator.
iterator() = default;
reference operator*() const { return *_current; }
pointer operator->() const { return &*_current; }
iterator& operator++() {
if (_remaining_count) {
advance();
--_remaining_count;
} else {
_current.reset();
}
return *this;
}
iterator operator++(int) {
auto tmp = *this;
++*this;
return tmp;
}
bool operator==(const iterator& o) const {
// End iterator has _remaining = 0 and _current = nullopt.
return _remaining_count == o._remaining_count && bool(_current) == bool(o._current);
}
friend class collection_mutation_view;
};
iterator begin() const;
iterator end() const;
class printer {
const abstract_type& _type;
const collection_mutation_view& _cmv;

View File

@@ -1782,7 +1782,7 @@ row row::difference(const schema& s, column_kind kind, const row& other) const
} else {
auto diff = ::difference(*cdef.type,
c->cell.as_collection_mutation(), it->cell.as_collection_mutation());
if (!static_cast<collection_mutation_view>(diff).is_empty()) {
if (!static_cast<collection_mutation_view>(diff).empty()) {
r.append_cell(c.key(), std::move(diff));
}
}