Files
scylladb/mutation/collection_mutation.hh
Botond Dénes d1cb102bd2 mutation/collection_mutation: make collection_mutation::_data private
Nobody should be looking at the raw data storage directly. The
collection can be inspected via collection_mutation_view.
Added a data() && accessor, to be able to extract the raw data for
storage in atomic_cell_or_collection.
2026-05-21 10:36:59 +03:00

211 lines
7.5 KiB
C++

/*
* Copyright (C) 2019-present ScyllaDB
*/
/*
* SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.1
*/
#pragma once
#include "utils/chunked_vector.hh"
#include "schema/schema_fwd.hh"
#include "gc_clock.hh"
#include "mutation/atomic_cell.hh"
#include "mutation/compact_and_expire_result.hh"
#include "compaction/compaction_garbage_collector.hh"
#include <iosfwd>
#include <forward_list>
#include <iterator>
#include <optional>
class abstract_type;
class compaction_garbage_collector;
class row_tombstone;
namespace ser {
class collection_cell_view;
}
class collection_mutation_view {
public:
managed_bytes_view data;
// Is this a noop mutation?
bool empty() const;
// Is any of the stored cells live (not deleted nor expired) at the time point `tp`,
// given the later of the tombstones `t` and the one stored in the mutation (if any)?
// Requires a type to reconstruct the structural information.
bool is_any_live(const abstract_type&, tombstone t = tombstone(), gc_clock::time_point tp = gc_clock::time_point::min()) const;
// The maximum of timestamps of the mutation's cells and tombstone.
api::timestamp_type last_update(const abstract_type&) const;
// Returns the collection-level tombstone, or an empty tombstone if none is present.
tombstone tomb() const;
// Returns the number of cells stored in the mutation.
uint32_t size() const;
// Forward iterator that deserializes cells on the fly.
// Each element is a (key, value) pair where key is a managed_bytes_view of the serialized
// cell key (path) and value is an atomic_cell_view of the serialized cell value.
// The iterator does not require type information to advance.
// The underlying collection_mutation_view must outlive the iterator.
class iterator {
public:
using iterator_category = std::forward_iterator_tag;
using iterator_concept = std::forward_iterator_tag;
using value_type = std::pair<managed_bytes_view, atomic_cell_view>;
using difference_type = std::ptrdiff_t;
using pointer = const value_type*;
using reference = const value_type&;
private:
managed_bytes_view _remaining;
uint32_t _remaining_count = 0;
std::optional<value_type> _current;
void advance();
explicit iterator(managed_bytes_view data);
public:
// Default-constructs an end iterator.
iterator() = default;
reference operator*() const { return *_current; }
pointer operator->() const { return &*_current; }
iterator& operator++() {
if (_remaining_count) {
advance();
--_remaining_count;
} else {
_current.reset();
}
return *this;
}
iterator operator++(int) {
auto tmp = *this;
++*this;
return tmp;
}
bool operator==(const iterator& o) const {
// End iterator has _remaining = 0 and _current = nullopt.
return _remaining_count == o._remaining_count && bool(_current) == bool(o._current);
}
friend class collection_mutation_view;
};
iterator begin() const;
iterator end() const;
class printer {
const abstract_type& _type;
const collection_mutation_view& _cmv;
public:
printer(const abstract_type& type, const collection_mutation_view& cmv)
: _type(type), _cmv(cmv) {}
friend fmt::formatter<printer>;
};
};
// A serialized mutation of a collection of cells.
// Used to represent mutations of collections (lists, maps, sets) or non-frozen user defined types.
// It contains a sequence of cells, each representing a mutation of a single entry (element or field) of the collection.
// Each cell has an associated 'key' (or 'path'). The meaning of each (key, cell) pair is:
// for sets: the key is the serialized set element, the cell contains no data (except liveness information),
// for maps: the key is the serialized map element's key, the cell contains the serialized map element's value,
// for lists: the key is a timeuuid identifying the list entry, the cell contains the serialized value,
// for user types: the key is an index identifying the field, the cell contains the value of the field.
// The mutation may also contain a collection-wide tombstone.
class collection_mutation {
managed_bytes _data;
public:
collection_mutation();
collection_mutation(collection_mutation_view);
collection_mutation(managed_bytes);
operator collection_mutation_view() const;
managed_bytes&& data() && { return std::move(_data); }
};
class collection_mutation_writer {
public:
using value_type = std::pair<managed_bytes_view, atomic_cell_view>;
private:
bytes_ostream _out;
bytes::value_type* _size_buffer;
tombstone _tomb;
int32_t _size{0};
public:
explicit collection_mutation_writer(tombstone tomb);
bool empty() const {
return !_tomb && _size == 0;
}
tombstone tombstone() const {
return _tomb;
}
void push_back(managed_bytes_view key, atomic_cell_view value);
void push_back(managed_bytes_view key, atomic_cell value) {
push_back(std::move(key), atomic_cell_view(value));
}
void push_back(value_type kv) {
push_back(std::move(kv.first), std::move(kv.second));
}
collection_mutation finish() &&;
};
struct collection_mutation_compact_and_expire_result {
collection_mutation collection; // can be empty
compact_and_expire_result result;
};
// Expires cells based on query_time. Expires tombstones based on max_purgeable and gc_before.
// Removes cells covered by base_tomb or cmv.tomb.
collection_mutation_compact_and_expire_result compact_and_expire(
collection_mutation_view cmv,
column_id id,
const abstract_type& type,
row_tombstone base_tomb,
gc_clock::time_point query_time,
can_gc_fn& can_gc,
gc_clock::time_point gc_before,
compaction_garbage_collector* collector);
collection_mutation merge(const abstract_type&, collection_mutation_view, collection_mutation_view);
collection_mutation difference(const abstract_type&, collection_mutation_view, collection_mutation_view);
// Transcode a collection from the IDL representation directly into the
// collection_mutation serialization format, without using any intermediary representation.
// Only the final collection-mutation blob is allocated, no intermediate allocations needed.
// Safe to use in LSA, it won't produce garbage.
collection_mutation read_from_collection_cell_view(const abstract_type&, const ser::collection_cell_view&);
// Serializes the given collection of cells to a sequence of bytes ready to be sent over the CQL protocol.
bytes_ostream serialize_for_cql(const abstract_type&, collection_mutation_view);
// Like serialize_for_cql, but uses an extended format that embeds per-element
// timestamps and expiries, for use with WRITETIME(col[key]) / TTL(col[key])
// and WRITETIME(col.field) / TTL(col.field) selectors.
// The format is: [cql-bytes-length as uint32][regular CQL bytes][count as int32]
// [per-element: (key-len as int32)(key bytes)(timestamp as int64)(expiry as int64 in gc_clock ticks, -1 if none)]
bytes_ostream serialize_for_cql_with_timestamps(const abstract_type&, collection_mutation_view);
template <>
struct fmt::formatter<collection_mutation_view::printer> : fmt::formatter<string_view> {
auto format(const collection_mutation_view::printer&, fmt::format_context& ctx) const
-> decltype(ctx.out());
};