mirror of
https://github.com/scylladb/scylladb.git
synced 2026-05-30 19:46:48 +00:00
Nobody should be looking at the raw data storage directly. The collection can be inspected via collection_mutation_view. Added a data() && accessor, to be able to extract the raw data for storage in atomic_cell_or_collection.
211 lines
7.5 KiB
C++
211 lines
7.5 KiB
C++
/*
|
|
* Copyright (C) 2019-present ScyllaDB
|
|
*/
|
|
|
|
/*
|
|
* SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.1
|
|
*/
|
|
|
|
#pragma once
|
|
|
|
#include "utils/chunked_vector.hh"
|
|
#include "schema/schema_fwd.hh"
|
|
#include "gc_clock.hh"
|
|
#include "mutation/atomic_cell.hh"
|
|
#include "mutation/compact_and_expire_result.hh"
|
|
#include "compaction/compaction_garbage_collector.hh"
|
|
#include <iosfwd>
|
|
#include <forward_list>
|
|
#include <iterator>
|
|
#include <optional>
|
|
|
|
class abstract_type;
|
|
class compaction_garbage_collector;
|
|
class row_tombstone;
|
|
|
|
namespace ser {
|
|
class collection_cell_view;
|
|
}
|
|
|
|
class collection_mutation_view {
|
|
public:
|
|
managed_bytes_view data;
|
|
|
|
// Is this a noop mutation?
|
|
bool empty() const;
|
|
|
|
// Is any of the stored cells live (not deleted nor expired) at the time point `tp`,
|
|
// given the later of the tombstones `t` and the one stored in the mutation (if any)?
|
|
// Requires a type to reconstruct the structural information.
|
|
bool is_any_live(const abstract_type&, tombstone t = tombstone(), gc_clock::time_point tp = gc_clock::time_point::min()) const;
|
|
|
|
// The maximum of timestamps of the mutation's cells and tombstone.
|
|
api::timestamp_type last_update(const abstract_type&) const;
|
|
|
|
// Returns the collection-level tombstone, or an empty tombstone if none is present.
|
|
tombstone tomb() const;
|
|
|
|
// Returns the number of cells stored in the mutation.
|
|
uint32_t size() const;
|
|
|
|
// Forward iterator that deserializes cells on the fly.
|
|
// Each element is a (key, value) pair where key is a managed_bytes_view of the serialized
|
|
// cell key (path) and value is an atomic_cell_view of the serialized cell value.
|
|
// The iterator does not require type information to advance.
|
|
// The underlying collection_mutation_view must outlive the iterator.
|
|
class iterator {
|
|
public:
|
|
using iterator_category = std::forward_iterator_tag;
|
|
using iterator_concept = std::forward_iterator_tag;
|
|
using value_type = std::pair<managed_bytes_view, atomic_cell_view>;
|
|
using difference_type = std::ptrdiff_t;
|
|
using pointer = const value_type*;
|
|
using reference = const value_type&;
|
|
private:
|
|
managed_bytes_view _remaining;
|
|
uint32_t _remaining_count = 0;
|
|
std::optional<value_type> _current;
|
|
|
|
void advance();
|
|
explicit iterator(managed_bytes_view data);
|
|
public:
|
|
// Default-constructs an end iterator.
|
|
iterator() = default;
|
|
|
|
reference operator*() const { return *_current; }
|
|
pointer operator->() const { return &*_current; }
|
|
|
|
iterator& operator++() {
|
|
if (_remaining_count) {
|
|
advance();
|
|
--_remaining_count;
|
|
} else {
|
|
_current.reset();
|
|
}
|
|
return *this;
|
|
}
|
|
|
|
iterator operator++(int) {
|
|
auto tmp = *this;
|
|
++*this;
|
|
return tmp;
|
|
}
|
|
|
|
bool operator==(const iterator& o) const {
|
|
// End iterator has _remaining = 0 and _current = nullopt.
|
|
return _remaining_count == o._remaining_count && bool(_current) == bool(o._current);
|
|
}
|
|
|
|
friend class collection_mutation_view;
|
|
};
|
|
|
|
iterator begin() const;
|
|
iterator end() const;
|
|
|
|
class printer {
|
|
const abstract_type& _type;
|
|
const collection_mutation_view& _cmv;
|
|
public:
|
|
printer(const abstract_type& type, const collection_mutation_view& cmv)
|
|
: _type(type), _cmv(cmv) {}
|
|
friend fmt::formatter<printer>;
|
|
};
|
|
};
|
|
|
|
// A serialized mutation of a collection of cells.
|
|
// Used to represent mutations of collections (lists, maps, sets) or non-frozen user defined types.
|
|
// It contains a sequence of cells, each representing a mutation of a single entry (element or field) of the collection.
|
|
// Each cell has an associated 'key' (or 'path'). The meaning of each (key, cell) pair is:
|
|
// for sets: the key is the serialized set element, the cell contains no data (except liveness information),
|
|
// for maps: the key is the serialized map element's key, the cell contains the serialized map element's value,
|
|
// for lists: the key is a timeuuid identifying the list entry, the cell contains the serialized value,
|
|
// for user types: the key is an index identifying the field, the cell contains the value of the field.
|
|
// The mutation may also contain a collection-wide tombstone.
|
|
class collection_mutation {
|
|
managed_bytes _data;
|
|
|
|
public:
|
|
collection_mutation();
|
|
collection_mutation(collection_mutation_view);
|
|
collection_mutation(managed_bytes);
|
|
operator collection_mutation_view() const;
|
|
|
|
managed_bytes&& data() && { return std::move(_data); }
|
|
};
|
|
|
|
class collection_mutation_writer {
|
|
public:
|
|
using value_type = std::pair<managed_bytes_view, atomic_cell_view>;
|
|
|
|
private:
|
|
bytes_ostream _out;
|
|
bytes::value_type* _size_buffer;
|
|
|
|
tombstone _tomb;
|
|
int32_t _size{0};
|
|
public:
|
|
explicit collection_mutation_writer(tombstone tomb);
|
|
|
|
bool empty() const {
|
|
return !_tomb && _size == 0;
|
|
}
|
|
|
|
tombstone tombstone() const {
|
|
return _tomb;
|
|
}
|
|
|
|
void push_back(managed_bytes_view key, atomic_cell_view value);
|
|
void push_back(managed_bytes_view key, atomic_cell value) {
|
|
push_back(std::move(key), atomic_cell_view(value));
|
|
}
|
|
|
|
void push_back(value_type kv) {
|
|
push_back(std::move(kv.first), std::move(kv.second));
|
|
}
|
|
|
|
collection_mutation finish() &&;
|
|
};
|
|
|
|
struct collection_mutation_compact_and_expire_result {
|
|
collection_mutation collection; // can be empty
|
|
compact_and_expire_result result;
|
|
};
|
|
|
|
// Expires cells based on query_time. Expires tombstones based on max_purgeable and gc_before.
|
|
// Removes cells covered by base_tomb or cmv.tomb.
|
|
collection_mutation_compact_and_expire_result compact_and_expire(
|
|
collection_mutation_view cmv,
|
|
column_id id,
|
|
const abstract_type& type,
|
|
row_tombstone base_tomb,
|
|
gc_clock::time_point query_time,
|
|
can_gc_fn& can_gc,
|
|
gc_clock::time_point gc_before,
|
|
compaction_garbage_collector* collector);
|
|
|
|
collection_mutation merge(const abstract_type&, collection_mutation_view, collection_mutation_view);
|
|
|
|
collection_mutation difference(const abstract_type&, collection_mutation_view, collection_mutation_view);
|
|
|
|
// Transcode a collection from the IDL representation directly into the
|
|
// collection_mutation serialization format, without using any intermediary representation.
|
|
// Only the final collection-mutation blob is allocated, no intermediate allocations needed.
|
|
// Safe to use in LSA, it won't produce garbage.
|
|
collection_mutation read_from_collection_cell_view(const abstract_type&, const ser::collection_cell_view&);
|
|
|
|
// Serializes the given collection of cells to a sequence of bytes ready to be sent over the CQL protocol.
|
|
bytes_ostream serialize_for_cql(const abstract_type&, collection_mutation_view);
|
|
|
|
// Like serialize_for_cql, but uses an extended format that embeds per-element
|
|
// timestamps and expiries, for use with WRITETIME(col[key]) / TTL(col[key])
|
|
// and WRITETIME(col.field) / TTL(col.field) selectors.
|
|
// The format is: [cql-bytes-length as uint32][regular CQL bytes][count as int32]
|
|
// [per-element: (key-len as int32)(key bytes)(timestamp as int64)(expiry as int64 in gc_clock ticks, -1 if none)]
|
|
bytes_ostream serialize_for_cql_with_timestamps(const abstract_type&, collection_mutation_view);
|
|
|
|
template <>
|
|
struct fmt::formatter<collection_mutation_view::printer> : fmt::formatter<string_view> {
|
|
auto format(const collection_mutation_view::printer&, fmt::format_context& ctx) const
|
|
-> decltype(ctx.out());
|
|
};
|