Files
scylladb/bytes_ostream.hh
Avi Kivity 11d651b606 utils/managed_bytes, serializer: add conversion between buffer_view<bytes_ostream> and managed_bytes_view
The codebase evolved to have several different ways to hold a fragmented
buffer: fragmented_temporary_buffer (for data received from the network;
not relevant for this discussion); bytes_ostream (for fragmented data that
is built incrementally; also used for a serialized result_set), and
managed_bytes (used for lsa and serialized individual values in
expression evaluation).

One problem with this state of affairs is that using data in one
fragmented form with functions that accept another fragmented form
requires either a copy, or templating everything. The former is
unpalatable for fast-path code, and the latter is undesirable for
compile time and run-time code footprint. So we'd like to make
the various forms compatible.

In 53e0dc7530 ("bytes_ostream: base on managed_bytes") we changed
bytes_ostream to have the same underlying data structure as
managed_bytes, so all that remains is to add the right API. This
is somewhat difficult as the data is hidden in multiple layers:
ser::buffer_view<> is used to abstract a slice of bytes_ostream,
and this is further abstracted by using iterators into bytes_ostream
rather than directly using the internals. Likewise, it's impossible
to construct a managed_bytes_view from the internals.

Hack through all of these by adding extract_implementation() methods,
and a build_managed_bytes_view_from_internals() helper. These are all
used by new APIs buffer_view_to_managed_bytes_view() that extract
the internals and put them back together again.

Ideally we wouldn't need any of this, but unifying the type system
in this area is quite an undertaking, so we need some shortcuts.
2023-05-07 17:17:34 +03:00

491 lines
15 KiB
C++

/*
* Copyright (C) 2015-present ScyllaDB
*/
/*
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
#pragma once
#include <boost/range/iterator_range.hpp>
#include "bytes.hh"
#include "utils/managed_bytes.hh"
#include "utils/hashing.hh"
#include <seastar/core/simple-stream.hh>
#include <seastar/core/loop.hh>
#include <bit>
#include <concepts>
/**
* Utility for writing data into a buffer when its final size is not known up front.
*
* Internally the data is written into a chain of chunks allocated on-demand.
* No resizing of previously written data happens.
*
*/
class bytes_ostream {
public:
using size_type = bytes::size_type;
using value_type = bytes::value_type;
using fragment_type = bytes_view;
static constexpr size_type max_chunk_size() { return max_alloc_size() - sizeof(chunk); }
private:
static_assert(sizeof(value_type) == 1, "value_type is assumed to be one byte long");
// Note: while appending data, chunk::size refers to the allocated space in the chunk,
// and chunk::frag_size refers to the currently occupied space in the chunk.
// After building, the first chunk::size is the whole object size, and chunk::frag_size
// doesn't change. This fits with managed_bytes interpretation.
using chunk = blob_storage;
static constexpr size_type default_chunk_size{512};
static constexpr size_type max_alloc_size() { return 128 * 1024; }
private:
blob_storage::ref_type _begin;
chunk* _current;
size_type _size;
size_type _initial_chunk_size = default_chunk_size;
public:
class fragment_iterator {
public:
using iterator_category = std::input_iterator_tag;
using value_type = bytes_view;
using difference_type = std::ptrdiff_t;
using pointer = bytes_view*;
using reference = bytes_view&;
struct implementation {
blob_storage* current_chunk;
};
private:
chunk* _current = nullptr;
public:
fragment_iterator() = default;
fragment_iterator(chunk* current) : _current(current) {}
fragment_iterator(const fragment_iterator&) = default;
fragment_iterator& operator=(const fragment_iterator&) = default;
bytes_view operator*() const {
return { _current->data, _current->frag_size };
}
bytes_view operator->() const {
return *(*this);
}
fragment_iterator& operator++() {
_current = _current->next;
return *this;
}
fragment_iterator operator++(int) {
fragment_iterator tmp(*this);
++(*this);
return tmp;
}
bool operator==(const fragment_iterator&) const = default;
implementation extract_implementation() const {
return implementation {
.current_chunk = _current,
};
}
};
using const_iterator = fragment_iterator;
class output_iterator {
public:
using iterator_category = std::output_iterator_tag;
using difference_type = std::ptrdiff_t;
using value_type = bytes_ostream::value_type;
using pointer = bytes_ostream::value_type*;
using reference = bytes_ostream::value_type&;
friend class bytes_ostream;
private:
bytes_ostream* _ostream = nullptr;
private:
explicit output_iterator(bytes_ostream& os) : _ostream(&os) { }
public:
reference operator*() const { return *_ostream->write_place_holder(1); }
output_iterator& operator++() { return *this; }
output_iterator operator++(int) { return *this; }
};
private:
inline size_type current_space_left() const {
if (!_current) {
return 0;
}
return _current->size - _current->frag_size;
}
// Figure out next chunk size.
// - must be enough for data_size + sizeof(chunk)
// - must be at least _initial_chunk_size
// - try to double each time to prevent too many allocations
// - should not exceed max_alloc_size, unless data_size requires so
// - will be power-of-two so the allocated memory can be fully utilized.
size_type next_alloc_size(size_t data_size) const {
auto next_size = _current
? _current->size * 2
: _initial_chunk_size;
next_size = std::min(next_size, max_alloc_size());
auto r = std::max<size_type>(next_size, data_size + sizeof(chunk));
return std::bit_ceil(r);
}
// Makes room for a contiguous region of given size.
// The region is accounted for as already written.
// size must not be zero.
[[gnu::always_inline]]
value_type* alloc(size_type size) {
if (__builtin_expect(size <= current_space_left(), true)) {
auto ret = _current->data + _current->frag_size;
_current->frag_size += size;
_size += size;
return ret;
} else {
return alloc_new(size);
}
}
[[gnu::noinline]]
value_type* alloc_new(size_type size) {
auto alloc_size = next_alloc_size(size);
auto space = malloc(alloc_size);
if (!space) {
throw std::bad_alloc();
}
auto backref = _current ? &_current->next : &_begin;
auto new_chunk = new (space) chunk(backref, alloc_size - sizeof(chunk), size);
_current = new_chunk;
_size += size;
return _current->data;
}
[[gnu::noinline]]
void free_chain(chunk* c) noexcept {
while (c) {
auto n = c->next;
c->~chunk();
::free(c);
c = n;
}
}
public:
explicit bytes_ostream(size_t initial_chunk_size) noexcept
: _begin()
, _current(nullptr)
, _size(0)
, _initial_chunk_size(initial_chunk_size)
{ }
bytes_ostream() noexcept : bytes_ostream(default_chunk_size) {}
bytes_ostream(bytes_ostream&& o) noexcept
: _begin(std::exchange(o._begin, {}))
, _current(o._current)
, _size(o._size)
, _initial_chunk_size(o._initial_chunk_size)
{
o._current = nullptr;
o._size = 0;
}
bytes_ostream(const bytes_ostream& o)
: _begin()
, _current(nullptr)
, _size(0)
, _initial_chunk_size(o._initial_chunk_size)
{
append(o);
}
~bytes_ostream() {
free_chain(_begin.ptr);
}
bytes_ostream& operator=(const bytes_ostream& o) {
if (this != &o) {
auto x = bytes_ostream(o);
*this = std::move(x);
}
return *this;
}
bytes_ostream& operator=(bytes_ostream&& o) noexcept {
if (this != &o) {
this->~bytes_ostream();
new (this) bytes_ostream(std::move(o));
}
return *this;
}
template <typename T>
struct place_holder {
value_type* ptr;
// makes the place_holder looks like a stream
seastar::simple_output_stream get_stream() {
return seastar::simple_output_stream(reinterpret_cast<char*>(ptr), sizeof(T));
}
};
// Returns a place holder for a value to be written later.
template <std::integral T>
inline
place_holder<T>
write_place_holder() {
return place_holder<T>{alloc(sizeof(T))};
}
[[gnu::always_inline]]
value_type* write_place_holder(size_type size) {
return alloc(size);
}
// Writes given sequence of bytes
[[gnu::always_inline]]
inline void write(bytes_view v) {
if (v.empty()) {
return;
}
auto this_size = std::min(v.size(), size_t(current_space_left()));
if (__builtin_expect(this_size, true)) {
memcpy(_current->data + _current->frag_size, v.begin(), this_size);
_current->frag_size += this_size;
_size += this_size;
v.remove_prefix(this_size);
}
while (!v.empty()) {
auto this_size = std::min(v.size(), size_t(max_chunk_size()));
std::copy_n(v.begin(), this_size, alloc_new(this_size));
v.remove_prefix(this_size);
}
}
[[gnu::always_inline]]
void write(const char* ptr, size_t size) {
write(bytes_view(reinterpret_cast<const signed char*>(ptr), size));
}
bool is_linearized() const {
return !_begin || !_begin->next;
}
// Call only when is_linearized()
bytes_view view() const {
assert(is_linearized());
if (!_current) {
return bytes_view();
}
return bytes_view(_current->data, _size);
}
// Makes the underlying storage contiguous and returns a view to it.
// Invalidates all previously created placeholders.
bytes_view linearize() {
if (is_linearized()) {
return view();
}
auto space = malloc(_size + sizeof(chunk));
if (!space) {
throw std::bad_alloc();
}
auto old_begin = _begin;
auto new_chunk = new (space) chunk(&_begin, _size, _size);
auto dst = new_chunk->data;
auto r = old_begin.ptr;
while (r) {
auto next = r->next;
dst = std::copy_n(r->data, r->frag_size, dst);
r->~chunk();
::free(r);
r = next;
}
_current = new_chunk;
_begin = std::move(new_chunk);
return bytes_view(_current->data, _size);
}
// Returns the amount of bytes written so far
size_type size() const {
return _size;
}
// For the FragmentRange concept
size_type size_bytes() const {
return _size;
}
bool empty() const {
return _size == 0;
}
void reserve(size_t size) {
// FIXME: implement
}
void append(const bytes_ostream& o) {
for (auto&& bv : o.fragments()) {
write(bv);
}
}
// Removes n bytes from the end of the bytes_ostream.
// Beware of O(n) algorithm.
void remove_suffix(size_t n) {
_size -= n;
auto left = _size;
auto current = _begin.ptr;
while (current) {
if (current->frag_size >= left) {
current->frag_size = left;
_current = current;
free_chain(current->next);
current->next = nullptr;
return;
}
left -= current->frag_size;
current = current->next;
}
}
// begin() and end() form an input range to bytes_view representing fragments.
// Any modification of this instance invalidates iterators.
fragment_iterator begin() const { return { _begin.ptr }; }
fragment_iterator end() const { return { nullptr }; }
output_iterator write_begin() { return output_iterator(*this); }
boost::iterator_range<fragment_iterator> fragments() const {
return { begin(), end() };
}
struct position {
chunk* _chunk;
size_type _offset;
};
position pos() const {
return { _current, _current ? _current->frag_size : 0 };
}
// Returns the amount of bytes written since given position.
// "pos" must be valid.
size_type written_since(position pos) {
chunk* c = pos._chunk;
if (!c) {
return _size;
}
size_type total = c->frag_size - pos._offset;
c = c->next;
while (c) {
total += c->frag_size;
c = c->next;
}
return total;
}
// Rollbacks all data written after "pos".
// Invalidates all placeholders and positions created after "pos".
void retract(position pos) {
if (!pos._chunk) {
*this = {};
return;
}
_size -= written_since(pos);
_current = pos._chunk;
free_chain(_current->next);
_current->next = nullptr;
_current->frag_size = pos._offset;
}
void reduce_chunk_count() {
// FIXME: This is a simplified version. It linearizes the whole buffer
// if its size is below max_chunk_size. We probably could also gain
// some read performance by doing "real" reduction, i.e. merging
// all chunks until all but the last one is max_chunk_size.
if (size() < max_chunk_size()) {
linearize();
}
}
bool operator==(const bytes_ostream& other) const {
auto as = fragments().begin();
auto as_end = fragments().end();
auto bs = other.fragments().begin();
auto bs_end = other.fragments().end();
auto a = *as++;
auto b = *bs++;
while (!a.empty() || !b.empty()) {
auto now = std::min(a.size(), b.size());
if (!std::equal(a.begin(), a.begin() + now, b.begin(), b.begin() + now)) {
return false;
}
a.remove_prefix(now);
if (a.empty() && as != as_end) {
a = *as++;
}
b.remove_prefix(now);
if (b.empty() && bs != bs_end) {
b = *bs++;
}
}
return true;
}
// Makes this instance empty.
//
// The first buffer is not deallocated, so callers may rely on the
// fact that if they write less than the initial chunk size between
// the clear() calls then writes will not involve any memory allocations,
// except for the first write made on this instance.
void clear() {
if (_begin.ptr) {
_begin.ptr->frag_size = 0;
_size = 0;
free_chain(_begin.ptr->next);
_begin.ptr->next = nullptr;
_current = _begin.ptr;
}
}
managed_bytes to_managed_bytes() && {
if (_size) {
_begin.ptr->size = _size;
_current = nullptr;
_size = 0;
auto begin_ptr = _begin.ptr;
_begin.ptr = nullptr;
return managed_bytes(begin_ptr);
} else {
return managed_bytes();
}
}
// Makes this instance empty using async continuations, while allowing yielding.
//
// The first buffer is not deallocated, so callers may rely on the
// fact that if they write less than the initial chunk size between
// the clear() calls then writes will not involve any memory allocations,
// except for the first write made on this instance.
future<> clear_gently() noexcept {
if (!_begin.ptr) {
return make_ready_future<>();
}
_begin->frag_size = 0;
_current = _begin.ptr;
_size = 0;
return do_until([this] { return !_begin.ptr->next; }, [this] {
auto second_chunk = _begin.ptr->next;
auto next = second_chunk->next;
second_chunk->~chunk();
::free(second_chunk);
_begin->next = std::move(next);
return make_ready_future<>();
});
}
};