/* * Copyright (C) 2015-present ScyllaDB */ /* * SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0 */ #pragma once #include #include "bytes.hh" #include "utils/allocation_strategy.hh" #include "utils/fragment_range.hh" #include #include #include class bytes_ostream; template class managed_bytes_basic_view; using managed_bytes_view = managed_bytes_basic_view; using managed_bytes_mutable_view = managed_bytes_basic_view; // Used to store managed_bytes data in layout 3. (See the doc comment of managed_bytes). // Also used as the underlying storage for bytes_ostream. // // The storage for these "fragmented buffer" types is provided by a chain // (linked list) of multi_chunk_blob_storage objects. struct multi_chunk_blob_storage { // Stored inline in managed_bytes. struct [[gnu::packed]] ref_type { multi_chunk_blob_storage* ptr = nullptr; ref_type() {} ref_type(multi_chunk_blob_storage* ptr) : ptr(ptr) {} operator multi_chunk_blob_storage*() const { return ptr; } multi_chunk_blob_storage* operator->() const { return ptr; } multi_chunk_blob_storage& operator*() const { return *ptr; } }; using size_type = uint32_t; using char_type = bytes_view::value_type; // Backref is needed to update the parent's pointer to us when we are // migrated during memory defragmentation. // (See the docs of allocation_strategy). ref_type* backref; // These fields have two different meanings: // 1. In bytes_ostream: // - `size` is the size of this fragment (== the size of the trailing data[] below). // - `frag_size` is the number of *used* (written) bytes in fragment. // 2. In managed_bytes: // - `size` in the first multi_chunk_blob_storage in the list is the size of the entire fragmented // buffer (the sum of all data[]s in the chain). // - `frag_size` is the data[] size of the current fragment (this multi_chunk_blob_storage). size_type size; size_type frag_size; // Pointer to the next fragment in the list. If we are the last fragment, it's null. ref_type next; // The storage provided by this fragment. char_type data[]; multi_chunk_blob_storage(ref_type* backref, size_type size, size_type frag_size) noexcept : backref(backref) , size(size) , frag_size(frag_size) , next(nullptr) { *backref = this; } multi_chunk_blob_storage(multi_chunk_blob_storage&& o) noexcept : backref(o.backref) , size(o.size) , frag_size(o.frag_size) , next(o.next) { *backref = this; o.next = nullptr; if (next) { next->backref = &next; } memcpy(data, o.data, frag_size); } // Valid only in the managed_bytes interpretation. // As long as bytes_ostream is always allocated in the standard allocator, // and storage_size() is never called on objects in the standard allocator, // it's okay. size_t storage_size() const noexcept { return sizeof(*this) + frag_size; } } __attribute__((packed)); // Used to store managed_bytes data in layout 2. (See the docs of managed_bytes). struct [[gnu::packed]] single_chunk_blob_storage { using size_type = uint32_t; using char_type = bytes_view::value_type; // Stored inline in managed_bytes. // Note the [[packed]]. It allows ref_type to be stored unaligned // in the `union` in managed_bytes. (It wouldn't fit otherwise). struct [[gnu::packed]] ref_type { // managed_bytes has enough spare bytes to store the size inline, // so we do that to save a few bytes in the external allocation. single_chunk_blob_storage* ptr = nullptr; size_type size = 0; }; // Backref is needed to update the parent's pointer to us when we are // migrated during memory defragmentation. // (See the docs of allocation_strategy). ref_type* backref; // The storage provided by this fragment. char_type data[]; single_chunk_blob_storage(ref_type* backref, size_type size) noexcept : backref(backref) { backref->ptr = this; backref->size = size; } single_chunk_blob_storage(single_chunk_blob_storage&& o) noexcept : backref(o.backref) { backref->ptr = this; memcpy(data, o.data, backref->size); } size_t storage_size() const noexcept { return sizeof(*this) + backref->size; } }; // A managed version of "bytes" (can be used with LSA). // // Sometimes also used as a general-purpose fragmented buffer outside of LSA context, // but this is not recommended, because it's too easy to accidentally destroy it // in a different allocator than it was allocated in, which can break the program // in a hard-to-predict way. // // managed_bytes has three storage layouts: // 1. Inline. // Used for data which fits into max_inline_size. // 2. External contiguous. (Single-allocation). // Used for data which fits into preferred_max_contiguous_allocation(). // (At the moment of writing: 128 kiB and 12.8 kiB in LSA). // The storage is a single single_chunk_blob_storage object. // 3. External fragmented. (Multi-allocation). // Used for everything else. // The storage is a chain of multi_chunk_blob_storage objects. // // Layout 2 exists as an optimization for the most common allocation sizes (several bytes). // There is nothing which prevents implementing these with layout 3, but layout 3 stores slightly // more metadata in the allocated buffer (pointer to the next fragment, size of the current fragment), // which adds up to a big overhead when used with small allocations. // E.g. 8-byte values are allocated externally -- each has additional 1 byte of flags and 8 bytes // of timestamp, so it's 17 bytes in total and that doesn't fit into inline storage. // And adding 16 bytes to each 17-byte cell is a big waste. // // The code of `class managed_bytes` is responsible for allocating and freeing the storage. // Code responsible for reading and writing it is in managed_bytes_basic_view. // The implementation details of these two classes are entangled. class managed_bytes { friend class bytes_ostream; static constexpr size_t max_inline_size = 15; // The current layout is discerned by `inline_size`: // >0 -> layout 1 (inline). In this case, the value of `inline_size` holds the data size. // -1 -> layout 2 (single_chunk_blob_storage) // -2 -> layout 3 (multi_chunk_blob_storage) union u { constexpr u() {} constexpr ~u() {} bytes_view::value_type inline_data[max_inline_size]; // Stores the data directly. Size is in inline_size. single_chunk_blob_storage::ref_type single_chunk_ref; // Points to external storage and stores the data size. multi_chunk_blob_storage::ref_type multi_chunk_ref; // Points to external storage. } _u; int8_t _inline_size = 0; private: constexpr bool is_multi_chunk() const noexcept { return _inline_size < -1; } constexpr bool is_single_chunk() const noexcept { return _inline_size == -1; } bool is_inline() const noexcept { return _inline_size >= 0; } size_t max_seg(allocation_strategy& alctr) { return alctr.preferred_max_contiguous_allocation() - std::max(sizeof(multi_chunk_blob_storage), sizeof(single_chunk_blob_storage)); } void free_chain(multi_chunk_blob_storage* p) noexcept { auto& alctr = current_allocator(); while (p) { auto n = p->next; alctr.destroy(p); p = n; } } explicit managed_bytes(multi_chunk_blob_storage* data) { _inline_size = -2; _u.multi_chunk_ref.ptr = data; data->backref = &_u.multi_chunk_ref; } public: using size_type = multi_chunk_blob_storage::size_type; struct initialized_later {}; constexpr managed_bytes() = default; managed_bytes(const multi_chunk_blob_storage::char_type* ptr, size_type size) : managed_bytes(bytes_view(ptr, size)) {} explicit managed_bytes(const bytes& b) : managed_bytes(static_cast(b)) {} template explicit managed_bytes(View v); managed_bytes(initialized_later, size_type size) { memory::on_alloc_point(); if (size <= max_inline_size) { _inline_size = size; } else { auto& alctr = current_allocator(); auto maxseg = max_seg(alctr); if (size < maxseg) { _inline_size = -1; void* p = alctr.alloc(sizeof(single_chunk_blob_storage) + size); new (p) single_chunk_blob_storage(&_u.single_chunk_ref, size); } else { _inline_size = -2; auto maxseg = max_seg(alctr); auto now = std::min(size_t(size), maxseg); void* p = alctr.alloc(sizeof(multi_chunk_blob_storage) + now); auto first = new (p) multi_chunk_blob_storage(&_u.multi_chunk_ref, size, now); auto last = first; size -= now; try { while (size) { auto now = std::min(size_t(size), maxseg); void* p = alctr.alloc(sizeof(multi_chunk_blob_storage) + now); last = new (p) multi_chunk_blob_storage(&last->next, 0, now); size -= now; } } catch (...) { free_chain(first); throw; } } } } explicit managed_bytes(bytes_view v) : managed_bytes(single_fragmented_view(v)) {}; managed_bytes(std::initializer_list b) : managed_bytes(b.begin(), b.size()) {} constexpr ~managed_bytes() noexcept { if (is_multi_chunk()) { free_chain(_u.multi_chunk_ref); } else if (is_single_chunk()) { auto& alctr = current_allocator(); alctr.destroy(_u.single_chunk_ref.ptr); } } // Defined later in the file because it depends on managed_bytes_mutable_view. managed_bytes(const managed_bytes& o); constexpr managed_bytes(managed_bytes&& o) noexcept { // Microoptimization: we use memcpy instead of assignments because // the compiler refuses the merge the load/stores otherwise for some reason. if (!std::is_constant_evaluated()) { std::memcpy(reinterpret_cast(this), &o, sizeof(managed_bytes)); } else { // constexpr-friendly version. _u = o._u; _inline_size = o._inline_size; } o._inline_size = 0; if (is_multi_chunk()) { _u.multi_chunk_ref.ptr->backref = &_u.multi_chunk_ref; } else if (is_single_chunk()) { _u.single_chunk_ref.ptr->backref = &_u.single_chunk_ref; } } managed_bytes& operator=(managed_bytes&& o) noexcept { if (this != &o) { this->~managed_bytes(); new (this) managed_bytes(std::move(o)); } return *this; } managed_bytes& operator=(const managed_bytes& o) { if (this != &o) { managed_bytes tmp(o); this->~managed_bytes(); new (this) managed_bytes(std::move(tmp)); } return *this; } // Defined later in the file because these depend on managed_bytes_mutable_view. bool operator==(const managed_bytes& o) const; bytes_view::value_type& operator[](size_type index); const bytes_view::value_type& operator[](size_type index) const; size_type size() const { if (is_multi_chunk()) { return _u.multi_chunk_ref->size; } else if (is_single_chunk()) { return _u.single_chunk_ref.size; } else { return _inline_size; } } bool empty() const { return _inline_size == 0; } // Returns the amount of external memory used. size_t external_memory_usage() const noexcept { if (is_multi_chunk()) { size_t mem = 0; multi_chunk_blob_storage* blob = _u.multi_chunk_ref; while (blob) { mem += blob->frag_size + sizeof(multi_chunk_blob_storage); blob = blob->next; } return mem; } else if (is_single_chunk()) { return _u.single_chunk_ref.size + sizeof(single_chunk_blob_storage); } return 0; } // Returns the minimum possible amount of external memory used by a managed_bytes // of the same size as us. // In other words, it returns the amount of external memory that would used by this // managed_bytes if all data was allocated in one big fragment. size_t minimal_external_memory_usage() const noexcept { if (is_inline()) { return 0; } else { return sizeof(single_chunk_blob_storage) + size(); } } // Defined later in the file because it depends on managed_bytes_mutable_view. template Func> std::invoke_result_t with_linearized(Func&& func) const; template friend class managed_bytes_basic_view; }; // Sanity check. static_assert(sizeof(managed_bytes) == 16); template class managed_bytes_basic_view { public: using fragment_type = std::conditional_t; using owning_type = std::conditional_t; using value_type = typename fragment_type::value_type; using value_type_maybe_const = std::conditional_t; private: fragment_type _current_fragment = {}; multi_chunk_blob_storage* _next_fragments = nullptr; size_t _size = 0; private: managed_bytes_basic_view(fragment_type current_fragment, multi_chunk_blob_storage* next_fragments, size_t size) : _current_fragment(current_fragment) , _next_fragments(next_fragments) , _size(size) { } public: managed_bytes_basic_view() = default; managed_bytes_basic_view(const managed_bytes_basic_view&) = default; managed_bytes_basic_view(owning_type& mb) { if (mb.is_inline()) { _current_fragment = fragment_type(mb._u.inline_data, mb._inline_size); _size = mb._inline_size; } else if (mb.is_single_chunk()) { auto p = mb._u.single_chunk_ref.ptr; _current_fragment = fragment_type(p->data, mb._u.single_chunk_ref.size); _next_fragments = nullptr; _size = _current_fragment.size(); } else { multi_chunk_blob_storage* p = mb._u.multi_chunk_ref; _current_fragment = fragment_type(p->data, p->frag_size); _next_fragments = p->next; _size = p->size; } } managed_bytes_basic_view(fragment_type bv) : _current_fragment(bv) , _size(bv.size()) { } size_t size() const { return _size; } size_t size_bytes() const { return _size; } bool empty() const { return _size == 0; } fragment_type current_fragment() const { return _current_fragment; } void remove_prefix(size_t n) { while (n >= _current_fragment.size() && n > 0) { n -= _current_fragment.size(); remove_current(); } _size -= n; _current_fragment.remove_prefix(n); } void remove_current() { _size -= _current_fragment.size(); if (_size) { _current_fragment = fragment_type(_next_fragments->data, _next_fragments->frag_size); _next_fragments = _next_fragments->next; _current_fragment = _current_fragment.substr(0, _size); } else { _current_fragment = fragment_type(); } } managed_bytes_basic_view prefix(size_t len) const { managed_bytes_basic_view v = *this; v._size = len; v._current_fragment = v._current_fragment.substr(0, len); return v; } managed_bytes_basic_view substr(size_t offset, size_t len) const { size_t end = std::min(offset + len, _size); managed_bytes_basic_view v = prefix(end); v.remove_prefix(offset); return v; } value_type_maybe_const& front() const { return _current_fragment.front(); } value_type_maybe_const& operator[](size_t index) const { auto v = *this; v.remove_prefix(index); return v.current_fragment().front(); } bytes linearize() const { return linearized(*this); } bool is_linearized() const { return _current_fragment.size() == _size; } // Allow casting mutable views to immutable views. template friend class managed_bytes_basic_view; template managed_bytes_basic_view(const managed_bytes_basic_view& other) requires (is_mutable == mutable_view::no) && (Other == mutable_view::yes) : _current_fragment(other._current_fragment.data(), other._current_fragment.size()) , _next_fragments(other._next_fragments) , _size(other._size) {} template Func> std::invoke_result_t with_linearized(Func&& func) const { bytes b; auto bv = std::invoke([&] () -> bytes_view { if (is_linearized()) { return _current_fragment; } else { b = linearize(); return b; } }); return func(bv); } friend managed_bytes_basic_view build_managed_bytes_view_from_internals(bytes_view current_fragment, multi_chunk_blob_storage* next_fragment, size_t size); }; static_assert(FragmentedView); static_assert(FragmentedMutableView); inline bool operator==(const managed_bytes_view& a, const managed_bytes_view& b) { return a.size_bytes() == b.size_bytes() && compare_unsigned(a, b) == 0; } using managed_bytes_opt = std::optional; using managed_bytes_view_opt = std::optional; inline bytes to_bytes(const managed_bytes& v) { return linearized(managed_bytes_view(v)); } inline bytes to_bytes(managed_bytes_view v) { return linearized(v); } /// Converts a possibly fragmented managed_bytes_opt to a /// linear bytes_opt. /// /// \note copies data bytes_opt to_bytes_opt(const managed_bytes_opt&); /// Converts a linear bytes_opt to a possibly fragmented /// managed_bytes_opt. /// /// \note copies data managed_bytes_opt to_managed_bytes_opt(const bytes_opt&); template inline managed_bytes::managed_bytes(View v) : managed_bytes(initialized_later(), v.size_bytes()) { managed_bytes_mutable_view self(*this); write_fragmented(self, v); } inline managed_bytes_view build_managed_bytes_view_from_internals(bytes_view current_fragment, multi_chunk_blob_storage* next_fragment, size_t size) { return managed_bytes_view(current_fragment, next_fragment, size); } inline bytes_view::value_type& managed_bytes::operator[](size_type index) { return const_cast(std::as_const(*this)[index]); } inline const bytes_view::value_type& managed_bytes::operator[](size_type index) const { if (is_inline()) { return _u.inline_data[index]; } else if (is_single_chunk()) { return _u.single_chunk_ref.ptr->data[index]; } else { managed_bytes_view self(*this); return self[index]; } } template Func> std::invoke_result_t managed_bytes::with_linearized(Func&& func) const { return ::with_linearized(managed_bytes_view(*this), func); } inline bool managed_bytes::operator==(const managed_bytes& o) const { return managed_bytes_view(*this) == managed_bytes_view(o); } inline managed_bytes::managed_bytes(const managed_bytes& o) { if (o.is_inline()) { _inline_size = o._inline_size; _u = o._u; } else if (o.is_single_chunk() && o.size() <= max_seg(current_allocator())) { memory::on_alloc_point(); auto& alctr = current_allocator(); void* p = alctr.alloc(sizeof(single_chunk_blob_storage) + o._u.single_chunk_ref.size); new (p) single_chunk_blob_storage(&_u.single_chunk_ref, o._u.single_chunk_ref.size); memcpy(_u.single_chunk_ref.ptr->data, o._u.single_chunk_ref.ptr->data, o._u.single_chunk_ref.size); _inline_size = -1; } else { *this = managed_bytes(initialized_later(), o.size()); managed_bytes_mutable_view self(*this); write_fragmented(self, managed_bytes_view(o)); } } inline void write_fragmented(managed_bytes_mutable_view& out, std::string_view val) { while (val.size() > 0) { size_t current_n = std::min(val.size(), out.current_fragment().size()); memcpy(out.current_fragment().data(), val.data(), current_n); val.remove_prefix(current_n); out.remove_prefix(current_n); } } template<> struct appending_hash { template void operator()(Hasher& h, managed_bytes_view v) const { feed_hash(h, v.size_bytes()); for (bytes_view frag : fragment_range(v)) { h.update(reinterpret_cast(frag.data()), frag.size()); } } }; namespace std { template <> struct hash { size_t operator()(managed_bytes_view v) const { bytes_view_hasher h; appending_hash{}(h, v); return h.finalize(); } }; template <> struct hash { size_t operator()(const managed_bytes& v) const { return hash{}(v); } }; } // namespace std sstring to_hex(const managed_bytes& b); sstring to_hex(const managed_bytes_opt& b); // The formatters below are used only by tests. template <> struct fmt::formatter : fmt::formatter { template auto format(const managed_bytes_view& v, FormatContext& ctx) const { auto out = ctx.out(); for (bytes_view frag : fragment_range(v)) { out = fmt::format_to(out, "{}", fmt_hex(frag)); } return out; } }; inline std::ostream& operator<<(std::ostream& os, const managed_bytes_view& v) { fmt::print(os, "{}", v); return os; } template <> struct fmt::formatter : fmt::formatter { template auto format(const managed_bytes& b, FormatContext& ctx) const { return fmt::format_to(ctx.out(), "{}", managed_bytes_view(b)); } }; inline std::ostream& operator<<(std::ostream& os, const managed_bytes& b) { fmt::print(os, "{}", b); return os; } template <> struct fmt::formatter : fmt::formatter { template auto format(const managed_bytes_opt& opt, FormatContext& ctx) const { if (opt) { return fmt::format_to(ctx.out(), "{}", *opt); } return fmt::format_to(ctx.out(), "null"); } };