Files
scylladb/utils/managed_bytes.hh
Avi Kivity d4e454b5bc Merge 'Fix bad performance for densely populated partition index pages' from Tomasz Grabiec
This applies to small partition workload where index pages have high partition count, and the index doesn't fit in cache. It was observed that the count can be in the order of hundreds. In such a workload pages undergo constant population, LSA compaction, and LSA eviction, which has severe impact on CPU utilization.

Refs https://scylladb.atlassian.net/browse/SCYLLADB-620

This PR reduces the impact by several changes:

  - reducing memory footprint in the partition index. Assuming partition key size is 16 bytes, the cost dropped from 96 bytes to 36 bytes per partition.

  - flattening the object graph and amortizing storage. Storing entries directly in the vector. Storing all key values in a single managed_bytes. Making index_entry a trivial struct.

  - index entries and key storage are now trivially moveable, and batched inside vector storage
    so LSA migration can use memcpy(), which amortizes the cost per key. This reduces the cost of LSA segment compaction.

 - LSA eviction is now pretty much constant time for the whole page
   regardless of the number of entries, because elements are trivial and batched inside vectors.
   Page eviction cost dropped from 50 us to 1 us.

Performance evaluated with:

   scylla perf-simple-query -c1 -m200M --partitions=1000000

Before:

```
7774.96 tps (166.0 allocs/op, 521.7 logallocs/op,  54.0 tasks/op,  802428 insns/op,  430457 cycles/op,        0 errors)
7511.08 tps (166.1 allocs/op, 527.2 logallocs/op,  54.0 tasks/op,  804185 insns/op,  430752 cycles/op,        0 errors)
7740.44 tps (166.3 allocs/op, 526.2 logallocs/op,  54.2 tasks/op,  805347 insns/op,  432117 cycles/op,        0 errors)
7818.72 tps (165.2 allocs/op, 517.6 logallocs/op,  53.7 tasks/op,  794965 insns/op,  427751 cycles/op,        0 errors)
7865.49 tps (165.1 allocs/op, 513.3 logallocs/op,  53.6 tasks/op,  788898 insns/op,  425171 cycles/op,        0 errors)
```

After (+318%):

```
32492.40 tps (130.7 allocs/op,  12.8 logallocs/op,  36.1 tasks/op,  109236 insns/op,  103203 cycles/op,        0 errors)
32591.99 tps (130.4 allocs/op,  12.8 logallocs/op,  36.0 tasks/op,  108947 insns/op,  102889 cycles/op,        0 errors)
32514.52 tps (130.6 allocs/op,  12.8 logallocs/op,  36.0 tasks/op,  109118 insns/op,  103219 cycles/op,        0 errors)
32491.14 tps (130.6 allocs/op,  12.8 logallocs/op,  36.0 tasks/op,  109349 insns/op,  103272 cycles/op,        0 errors)
32582.90 tps (130.5 allocs/op,  12.8 logallocs/op,  36.0 tasks/op,  109269 insns/op,  102872 cycles/op,        0 errors)
32479.43 tps (130.6 allocs/op,  12.8 logallocs/op,  36.0 tasks/op,  109313 insns/op,  103242 cycles/op,        0 errors)
32418.48 tps (130.7 allocs/op,  12.8 logallocs/op,  36.1 tasks/op,  109201 insns/op,  103301 cycles/op,        0 errors)
31394.14 tps (130.7 allocs/op,  12.8 logallocs/op,  36.1 tasks/op,  109267 insns/op,  103301 cycles/op,        0 errors)
32298.55 tps (130.7 allocs/op,  12.8 logallocs/op,  36.1 tasks/op,  109323 insns/op,  103551 cycles/op,        0 errors)
```

When the workload is miss-only, with both row cache and index cache disabled (no cache maintenance cost):

  perf-simple-query -c1 -m200M --duration 6000 --partitions=100000 --enable-index-cache=0 --enable-cache=0

Before:

```
9124.57 tps (146.2 allocs/op, 789.0 logallocs/op,  45.3 tasks/op,  889320 insns/op,  357937 cycles/op,        0 errors)
9437.23 tps (146.1 allocs/op, 789.3 logallocs/op,  45.3 tasks/op,  889613 insns/op,  357782 cycles/op,        0 errors)
9455.65 tps (146.0 allocs/op, 787.4 logallocs/op,  45.2 tasks/op,  887606 insns/op,  357167 cycles/op,        0 errors)
9451.22 tps (146.0 allocs/op, 787.4 logallocs/op,  45.3 tasks/op,  887627 insns/op,  357357 cycles/op,        0 errors)
9429.50 tps (146.0 allocs/op, 787.4 logallocs/op,  45.3 tasks/op,  887761 insns/op,  358148 cycles/op,        0 errors)
9430.29 tps (146.1 allocs/op, 788.2 logallocs/op,  45.3 tasks/op,  888501 insns/op,  357679 cycles/op,        0 errors)
9454.08 tps (146.0 allocs/op, 787.3 logallocs/op,  45.3 tasks/op,  887545 insns/op,  357132 cycles/op,        0 errors)
```

After (+55%):

```
14484.84 tps (150.7 allocs/op,   6.5 logallocs/op,  44.7 tasks/op,  396164 insns/op,  229490 cycles/op,        0 errors)
14526.21 tps (150.8 allocs/op,   6.5 logallocs/op,  44.8 tasks/op,  396401 insns/op,  228824 cycles/op,        0 errors)
14567.53 tps (150.7 allocs/op,   6.5 logallocs/op,  44.7 tasks/op,  396319 insns/op,  228701 cycles/op,        0 errors)
14545.63 tps (150.6 allocs/op,   6.5 logallocs/op,  44.7 tasks/op,  395889 insns/op,  228493 cycles/op,        0 errors)
14626.06 tps (150.5 allocs/op,   6.5 logallocs/op,  44.7 tasks/op,  395254 insns/op,  227891 cycles/op,        0 errors)
14593.74 tps (150.5 allocs/op,   6.5 logallocs/op,  44.7 tasks/op,  395480 insns/op,  227993 cycles/op,        0 errors)
14538.10 tps (150.8 allocs/op,   6.5 logallocs/op,  44.8 tasks/op,  397035 insns/op,  228831 cycles/op,        0 errors)
14527.18 tps (150.8 allocs/op,   6.5 logallocs/op,  44.8 tasks/op,  396992 insns/op,  228839 cycles/op,        0 errors)
```

Same as above, but with summary ratio increased from 0.0005 to 0.005 (smaller pages):

Before:

```
33906.70 tps (146.1 allocs/op,  83.6 logallocs/op,  45.1 tasks/op,  170553 insns/op,   98104 cycles/op,        0 errors)
32696.16 tps (146.0 allocs/op,  83.5 logallocs/op,  45.1 tasks/op,  170369 insns/op,   98405 cycles/op,        0 errors)
33889.05 tps (146.1 allocs/op,  83.6 logallocs/op,  45.1 tasks/op,  170551 insns/op,   98135 cycles/op,        0 errors)
33893.24 tps (146.1 allocs/op,  83.5 logallocs/op,  45.1 tasks/op,  170488 insns/op,   98168 cycles/op,        0 errors)
33836.73 tps (146.1 allocs/op,  83.6 logallocs/op,  45.1 tasks/op,  170528 insns/op,   98226 cycles/op,        0 errors)
33897.61 tps (146.0 allocs/op,  83.5 logallocs/op,  45.1 tasks/op,  170428 insns/op,   98081 cycles/op,        0 errors)
33834.73 tps (146.1 allocs/op,  83.5 logallocs/op,  45.1 tasks/op,  170438 insns/op,   98178 cycles/op,        0 errors)
33776.31 tps (146.3 allocs/op,  83.9 logallocs/op,  45.2 tasks/op,  170958 insns/op,   98418 cycles/op,        0 errors)
33808.08 tps (146.3 allocs/op,  83.9 logallocs/op,  45.2 tasks/op,  170940 insns/op,   98388 cycles/op,        0 errors)
```

After (+18%):

```
40081.51 tps (148.2 allocs/op,   4.4 logallocs/op,  45.0 tasks/op,  121047 insns/op,   82231 cycles/op,        0 errors)
40005.85 tps (148.6 allocs/op,   4.4 logallocs/op,  45.2 tasks/op,  121327 insns/op,   82545 cycles/op,        0 errors)
39816.75 tps (148.3 allocs/op,   4.4 logallocs/op,  45.1 tasks/op,  121067 insns/op,   82419 cycles/op,        0 errors)
39953.11 tps (148.1 allocs/op,   4.4 logallocs/op,  45.0 tasks/op,  121027 insns/op,   82258 cycles/op,        0 errors)
40073.96 tps (148.2 allocs/op,   4.4 logallocs/op,  45.0 tasks/op,  121006 insns/op,   82313 cycles/op,        0 errors)
39882.25 tps (148.2 allocs/op,   4.4 logallocs/op,  45.0 tasks/op,  120925 insns/op,   82320 cycles/op,        0 errors)
39916.08 tps (148.3 allocs/op,   4.4 logallocs/op,  45.1 tasks/op,  121054 insns/op,   82393 cycles/op,        0 errors)
39786.30 tps (148.2 allocs/op,   4.4 logallocs/op,  45.0 tasks/op,  121027 insns/op,   82465 cycles/op,        0 errors)
38662.45 tps (148.3 allocs/op,   4.4 logallocs/op,  45.0 tasks/op,  121108 insns/op,   82312 cycles/op,        0 errors)
39849.42 tps (148.3 allocs/op,   4.4 logallocs/op,  45.1 tasks/op,  121098 insns/op,   82447 cycles/op,        0 errors)
```

Closes scylladb/scylladb#28603

* github.com:scylladb/scylladb:
  sstables: mx: index_reader: Optimize parsing for no promoted index case
  vint: Use std::countl_zero()
  test: sstable_partition_index_cache_test: Validate scenario of pages with sparse promoted index placement
  sstables: mx: index_reader: Amoritze partition key storage
  managed_bytes: Hoist write_fragmented() to common header
  utils: managed_vector: Use std::uninitialized_move() to move objects
  sstables: mx: index_reader: Keep promoted_index info next to index_entry
  sstables: mx: index_reader: Extract partition_index_page::clear_gently()
  sstables: mx: index_reader: Shave-off 16 bytes from index_entry by using raw_token
  sstables: mx: index_reader: Reduce allocation_section overhead during index page parsing by batching allocation
  sstables: mx: index_reader: Keep index_entry directly in the vector
  dht: Introduce raw_token
  test: perf_simple_query: Add 'sstable-format' command-line option
  test: perf_simple_query: Add 'sstable-summary-ratio' command-line option
  test: perf-simple-query: Add option to disable index cache
  test: cql_test_env: Respect enable-index-cache config

(cherry picked from commit 5e7fb08bf3)

Closes scylladb/scylladb#29136
2026-03-20 01:21:49 +01:00

646 lines
23 KiB
C++

/*
* Copyright (C) 2015-present ScyllaDB
*/
/*
* SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
*/
#pragma once
#include <stdint.h>
#include "bytes.hh"
#include "utils/allocation_strategy.hh"
#include "utils/fragment_range.hh"
#include <seastar/util/alloc_failure_injector.hh>
#include <type_traits>
#include <utility>
class bytes_ostream;
template <mutable_view is_mutable_view>
class managed_bytes_basic_view;
using managed_bytes_view = managed_bytes_basic_view<mutable_view::no>;
using managed_bytes_mutable_view = managed_bytes_basic_view<mutable_view::yes>;
// Used to store managed_bytes data in layout 3. (See the doc comment of managed_bytes).
// Also used as the underlying storage for bytes_ostream.
//
// The storage for these "fragmented buffer" types is provided by a chain
// (linked list) of multi_chunk_blob_storage objects.
struct multi_chunk_blob_storage {
// Stored inline in managed_bytes.
struct [[gnu::packed]] ref_type {
multi_chunk_blob_storage* ptr = nullptr;
ref_type() {}
ref_type(multi_chunk_blob_storage* ptr) : ptr(ptr) {}
operator multi_chunk_blob_storage*() const { return ptr; }
multi_chunk_blob_storage* operator->() const { return ptr; }
multi_chunk_blob_storage& operator*() const { return *ptr; }
};
using size_type = uint32_t;
using char_type = bytes_view::value_type;
// Backref is needed to update the parent's pointer to us when we are
// migrated during memory defragmentation.
// (See the docs of allocation_strategy).
ref_type* backref;
// These fields have two different meanings:
// 1. In bytes_ostream:
// - `size` is the size of this fragment (== the size of the trailing data[] below).
// - `frag_size` is the number of *used* (written) bytes in fragment.
// 2. In managed_bytes:
// - `size` in the first multi_chunk_blob_storage in the list is the size of the entire fragmented
// buffer (the sum of all data[]s in the chain).
// - `frag_size` is the data[] size of the current fragment (this multi_chunk_blob_storage).
size_type size;
size_type frag_size;
// Pointer to the next fragment in the list. If we are the last fragment, it's null.
ref_type next;
// The storage provided by this fragment.
char_type data[];
multi_chunk_blob_storage(ref_type* backref, size_type size, size_type frag_size) noexcept
: backref(backref)
, size(size)
, frag_size(frag_size)
, next(nullptr)
{
*backref = this;
}
multi_chunk_blob_storage(multi_chunk_blob_storage&& o) noexcept
: backref(o.backref)
, size(o.size)
, frag_size(o.frag_size)
, next(o.next)
{
*backref = this;
o.next = nullptr;
if (next) {
next->backref = &next;
}
memcpy(data, o.data, frag_size);
}
// Valid only in the managed_bytes interpretation.
// As long as bytes_ostream is always allocated in the standard allocator,
// and storage_size() is never called on objects in the standard allocator,
// it's okay.
size_t storage_size() const noexcept {
return sizeof(*this) + frag_size;
}
} __attribute__((packed));
// Used to store managed_bytes data in layout 2. (See the docs of managed_bytes).
struct [[gnu::packed]] single_chunk_blob_storage {
using size_type = uint32_t;
using char_type = bytes_view::value_type;
// Stored inline in managed_bytes.
// Note the [[packed]]. It allows ref_type to be stored unaligned
// in the `union` in managed_bytes. (It wouldn't fit otherwise).
struct [[gnu::packed]] ref_type {
// managed_bytes has enough spare bytes to store the size inline,
// so we do that to save a few bytes in the external allocation.
single_chunk_blob_storage* ptr = nullptr;
size_type size = 0;
};
// Backref is needed to update the parent's pointer to us when we are
// migrated during memory defragmentation.
// (See the docs of allocation_strategy).
ref_type* backref;
// The storage provided by this fragment.
char_type data[];
single_chunk_blob_storage(ref_type* backref, size_type size) noexcept
: backref(backref)
{
backref->ptr = this;
backref->size = size;
}
single_chunk_blob_storage(single_chunk_blob_storage&& o) noexcept
: backref(o.backref)
{
backref->ptr = this;
memcpy(data, o.data, backref->size);
}
size_t storage_size() const noexcept {
return sizeof(*this) + backref->size;
}
};
// A managed version of "bytes" (can be used with LSA).
//
// Sometimes also used as a general-purpose fragmented buffer outside of LSA context,
// but this is not recommended, because it's too easy to accidentally destroy it
// in a different allocator than it was allocated in, which can break the program
// in a hard-to-predict way.
//
// managed_bytes has three storage layouts:
// 1. Inline.
// Used for data which fits into max_inline_size.
// 2. External contiguous. (Single-allocation).
// Used for data which fits into preferred_max_contiguous_allocation().
// (At the moment of writing: 128 kiB and 12.8 kiB in LSA).
// The storage is a single single_chunk_blob_storage object.
// 3. External fragmented. (Multi-allocation).
// Used for everything else.
// The storage is a chain of multi_chunk_blob_storage objects.
//
// Layout 2 exists as an optimization for the most common allocation sizes (several bytes).
// There is nothing which prevents implementing these with layout 3, but layout 3 stores slightly
// more metadata in the allocated buffer (pointer to the next fragment, size of the current fragment),
// which adds up to a big overhead when used with small allocations.
// E.g. 8-byte values are allocated externally -- each has additional 1 byte of flags and 8 bytes
// of timestamp, so it's 17 bytes in total and that doesn't fit into inline storage.
// And adding 16 bytes to each 17-byte cell is a big waste.
//
// The code of `class managed_bytes` is responsible for allocating and freeing the storage.
// Code responsible for reading and writing it is in managed_bytes_basic_view.
// The implementation details of these two classes are entangled.
class managed_bytes {
friend class bytes_ostream;
static constexpr size_t max_inline_size = 15;
// The current layout is discerned by `inline_size`:
// >0 -> layout 1 (inline). In this case, the value of `inline_size` holds the data size.
// -1 -> layout 2 (single_chunk_blob_storage)
// -2 -> layout 3 (multi_chunk_blob_storage)
union u {
constexpr u() {}
constexpr ~u() {}
bytes_view::value_type inline_data[max_inline_size]; // Stores the data directly. Size is in inline_size.
single_chunk_blob_storage::ref_type single_chunk_ref; // Points to external storage and stores the data size.
multi_chunk_blob_storage::ref_type multi_chunk_ref; // Points to external storage.
} _u;
int8_t _inline_size = 0;
private:
constexpr bool is_multi_chunk() const noexcept {
return _inline_size < -1;
}
constexpr bool is_single_chunk() const noexcept {
return _inline_size == -1;
}
bool is_inline() const noexcept {
return _inline_size >= 0;
}
size_t max_seg(allocation_strategy& alctr) {
return alctr.preferred_max_contiguous_allocation() - std::max(sizeof(multi_chunk_blob_storage), sizeof(single_chunk_blob_storage));
}
void free_chain(multi_chunk_blob_storage* p) noexcept {
auto& alctr = current_allocator();
while (p) {
auto n = p->next;
alctr.destroy(p);
p = n;
}
}
explicit managed_bytes(multi_chunk_blob_storage* data) {
_inline_size = -2;
_u.multi_chunk_ref.ptr = data;
data->backref = &_u.multi_chunk_ref;
}
public:
using size_type = multi_chunk_blob_storage::size_type;
struct initialized_later {};
constexpr managed_bytes() = default;
managed_bytes(const multi_chunk_blob_storage::char_type* ptr, size_type size)
: managed_bytes(bytes_view(ptr, size)) {}
explicit managed_bytes(const bytes& b) : managed_bytes(static_cast<bytes_view>(b)) {}
template <FragmentedView View>
explicit managed_bytes(View v);
managed_bytes(initialized_later, size_type size) {
memory::on_alloc_point();
if (size <= max_inline_size) {
_inline_size = size;
} else {
auto& alctr = current_allocator();
auto maxseg = max_seg(alctr);
if (size < maxseg) {
_inline_size = -1;
void* p = alctr.alloc<single_chunk_blob_storage>(sizeof(single_chunk_blob_storage) + size);
new (p) single_chunk_blob_storage(&_u.single_chunk_ref, size);
} else {
_inline_size = -2;
auto maxseg = max_seg(alctr);
auto now = std::min(size_t(size), maxseg);
void* p = alctr.alloc<multi_chunk_blob_storage>(sizeof(multi_chunk_blob_storage) + now);
auto first = new (p) multi_chunk_blob_storage(&_u.multi_chunk_ref, size, now);
auto last = first;
size -= now;
try {
while (size) {
auto now = std::min(size_t(size), maxseg);
void* p = alctr.alloc<multi_chunk_blob_storage>(sizeof(multi_chunk_blob_storage) + now);
last = new (p) multi_chunk_blob_storage(&last->next, 0, now);
size -= now;
}
} catch (...) {
free_chain(first);
throw;
}
}
}
}
explicit managed_bytes(bytes_view v) : managed_bytes(single_fragmented_view(v)) {};
managed_bytes(std::initializer_list<bytes::value_type> b) : managed_bytes(b.begin(), b.size()) {}
constexpr ~managed_bytes() noexcept {
if (is_multi_chunk()) {
free_chain(_u.multi_chunk_ref);
} else if (is_single_chunk()) {
auto& alctr = current_allocator();
alctr.destroy(_u.single_chunk_ref.ptr);
}
}
// Defined later in the file because it depends on managed_bytes_mutable_view.
managed_bytes(const managed_bytes& o);
constexpr managed_bytes(managed_bytes&& o) noexcept {
// Microoptimization: we use memcpy instead of assignments because
// the compiler refuses the merge the load/stores otherwise for some reason.
if (!std::is_constant_evaluated()) {
std::memcpy(reinterpret_cast<char*>(this), &o, sizeof(managed_bytes));
} else {
// constexpr-friendly version.
_u = o._u;
_inline_size = o._inline_size;
}
o._inline_size = 0;
if (is_multi_chunk()) {
_u.multi_chunk_ref.ptr->backref = &_u.multi_chunk_ref;
} else if (is_single_chunk()) {
_u.single_chunk_ref.ptr->backref = &_u.single_chunk_ref;
}
}
managed_bytes& operator=(managed_bytes&& o) noexcept {
if (this != &o) {
this->~managed_bytes();
new (this) managed_bytes(std::move(o));
}
return *this;
}
managed_bytes& operator=(const managed_bytes& o) {
if (this != &o) {
managed_bytes tmp(o);
this->~managed_bytes();
new (this) managed_bytes(std::move(tmp));
}
return *this;
}
// Defined later in the file because these depend on managed_bytes_mutable_view.
bool operator==(const managed_bytes& o) const;
bytes_view::value_type& operator[](size_type index);
const bytes_view::value_type& operator[](size_type index) const;
size_type size() const {
if (is_multi_chunk()) {
return _u.multi_chunk_ref->size;
} else if (is_single_chunk()) {
return _u.single_chunk_ref.size;
} else {
return _inline_size;
}
}
bool empty() const {
return _inline_size == 0;
}
// Returns the amount of external memory used.
size_t external_memory_usage() const noexcept {
if (is_multi_chunk()) {
size_t mem = 0;
multi_chunk_blob_storage* blob = _u.multi_chunk_ref;
while (blob) {
mem += blob->frag_size + sizeof(multi_chunk_blob_storage);
blob = blob->next;
}
return mem;
} else if (is_single_chunk()) {
return _u.single_chunk_ref.size + sizeof(single_chunk_blob_storage);
}
return 0;
}
// Returns the minimum possible amount of external memory used by a managed_bytes
// of the same size as us.
// In other words, it returns the amount of external memory that would used by this
// managed_bytes if all data was allocated in one big fragment.
size_t minimal_external_memory_usage() const noexcept {
if (is_inline()) {
return 0;
} else {
return sizeof(single_chunk_blob_storage) + size();
}
}
// Defined later in the file because it depends on managed_bytes_mutable_view.
template <std::invocable<bytes_view> Func>
std::invoke_result_t<Func, bytes_view> with_linearized(Func&& func) const;
template <mutable_view is_mutable_view>
friend class managed_bytes_basic_view;
};
// Sanity check.
static_assert(sizeof(managed_bytes) == 16);
template <mutable_view is_mutable>
class managed_bytes_basic_view {
public:
using fragment_type = std::conditional_t<is_mutable == mutable_view::yes, bytes_mutable_view, bytes_view>;
using owning_type = std::conditional_t<is_mutable == mutable_view::yes, managed_bytes, const managed_bytes>;
using value_type = typename fragment_type::value_type;
using value_type_maybe_const = std::conditional_t<is_mutable == mutable_view::yes, value_type, const value_type>;
private:
fragment_type _current_fragment = {};
multi_chunk_blob_storage* _next_fragments = nullptr;
size_t _size = 0;
private:
managed_bytes_basic_view(fragment_type current_fragment, multi_chunk_blob_storage* next_fragments, size_t size)
: _current_fragment(current_fragment)
, _next_fragments(next_fragments)
, _size(size) {
}
public:
managed_bytes_basic_view() = default;
managed_bytes_basic_view(const managed_bytes_basic_view&) = default;
managed_bytes_basic_view(owning_type& mb) {
if (mb.is_inline()) {
_current_fragment = fragment_type(mb._u.inline_data, mb._inline_size);
_size = mb._inline_size;
} else if (mb.is_single_chunk()) {
auto p = mb._u.single_chunk_ref.ptr;
_current_fragment = fragment_type(p->data, mb._u.single_chunk_ref.size);
_next_fragments = nullptr;
_size = _current_fragment.size();
} else {
multi_chunk_blob_storage* p = mb._u.multi_chunk_ref;
_current_fragment = fragment_type(p->data, p->frag_size);
_next_fragments = p->next;
_size = p->size;
}
}
managed_bytes_basic_view(fragment_type bv)
: _current_fragment(bv)
, _size(bv.size()) {
}
size_t size() const { return _size; }
size_t size_bytes() const { return _size; }
bool empty() const { return _size == 0; }
fragment_type current_fragment() const { return _current_fragment; }
void remove_prefix(size_t n) {
while (n >= _current_fragment.size() && n > 0) {
n -= _current_fragment.size();
remove_current();
}
_size -= n;
_current_fragment.remove_prefix(n);
}
void remove_current() {
_size -= _current_fragment.size();
if (_size) {
_current_fragment = fragment_type(_next_fragments->data, _next_fragments->frag_size);
_next_fragments = _next_fragments->next;
_current_fragment = _current_fragment.substr(0, _size);
} else {
_current_fragment = fragment_type();
}
}
managed_bytes_basic_view prefix(size_t len) const {
managed_bytes_basic_view v = *this;
v._size = len;
v._current_fragment = v._current_fragment.substr(0, len);
return v;
}
managed_bytes_basic_view substr(size_t offset, size_t len) const {
size_t end = std::min(offset + len, _size);
managed_bytes_basic_view v = prefix(end);
v.remove_prefix(offset);
return v;
}
value_type_maybe_const& front() const { return _current_fragment.front(); }
value_type_maybe_const& operator[](size_t index) const {
auto v = *this;
v.remove_prefix(index);
return v.current_fragment().front();
}
bytes linearize() const {
return linearized(*this);
}
bool is_linearized() const {
return _current_fragment.size() == _size;
}
// Allow casting mutable views to immutable views.
template <mutable_view Other>
friend class managed_bytes_basic_view;
template <mutable_view Other>
managed_bytes_basic_view(const managed_bytes_basic_view<Other>& other)
requires (is_mutable == mutable_view::no) && (Other == mutable_view::yes)
: _current_fragment(other._current_fragment.data(), other._current_fragment.size())
, _next_fragments(other._next_fragments)
, _size(other._size)
{}
template <std::invocable<bytes_view> Func>
std::invoke_result_t<Func, bytes_view> with_linearized(Func&& func) const {
bytes b;
auto bv = std::invoke([&] () -> bytes_view {
if (is_linearized()) {
return _current_fragment;
} else {
b = linearize();
return b;
}
});
return func(bv);
}
friend managed_bytes_basic_view<mutable_view::no> build_managed_bytes_view_from_internals(bytes_view current_fragment, multi_chunk_blob_storage* next_fragment, size_t size);
};
static_assert(FragmentedView<managed_bytes_view>);
static_assert(FragmentedMutableView<managed_bytes_mutable_view>);
inline bool operator==(const managed_bytes_view& a, const managed_bytes_view& b) {
return a.size_bytes() == b.size_bytes() && compare_unsigned(a, b) == 0;
}
using managed_bytes_opt = std::optional<managed_bytes>;
using managed_bytes_view_opt = std::optional<managed_bytes_view>;
inline bytes to_bytes(const managed_bytes& v) {
return linearized(managed_bytes_view(v));
}
inline bytes to_bytes(managed_bytes_view v) {
return linearized(v);
}
/// Converts a possibly fragmented managed_bytes_opt to a
/// linear bytes_opt.
///
/// \note copies data
bytes_opt to_bytes_opt(const managed_bytes_opt&);
/// Converts a linear bytes_opt to a possibly fragmented
/// managed_bytes_opt.
///
/// \note copies data
managed_bytes_opt to_managed_bytes_opt(const bytes_opt&);
template<FragmentedView View>
inline managed_bytes::managed_bytes(View v) : managed_bytes(initialized_later(), v.size_bytes()) {
managed_bytes_mutable_view self(*this);
write_fragmented(self, v);
}
inline
managed_bytes_view
build_managed_bytes_view_from_internals(bytes_view current_fragment, multi_chunk_blob_storage* next_fragment, size_t size) {
return managed_bytes_view(current_fragment, next_fragment, size);
}
inline bytes_view::value_type& managed_bytes::operator[](size_type index) {
return const_cast<bytes_view::value_type&>(std::as_const(*this)[index]);
}
inline const bytes_view::value_type& managed_bytes::operator[](size_type index) const {
if (is_inline()) {
return _u.inline_data[index];
} else if (is_single_chunk()) {
return _u.single_chunk_ref.ptr->data[index];
} else {
managed_bytes_view self(*this);
return self[index];
}
}
template <std::invocable<bytes_view> Func>
std::invoke_result_t<Func, bytes_view> managed_bytes::with_linearized(Func&& func) const {
return ::with_linearized(managed_bytes_view(*this), func);
}
inline bool managed_bytes::operator==(const managed_bytes& o) const {
return managed_bytes_view(*this) == managed_bytes_view(o);
}
inline managed_bytes::managed_bytes(const managed_bytes& o) {
if (o.is_inline()) {
_inline_size = o._inline_size;
_u = o._u;
} else if (o.is_single_chunk() && o.size() <= max_seg(current_allocator())) {
memory::on_alloc_point();
auto& alctr = current_allocator();
void* p = alctr.alloc<single_chunk_blob_storage>(sizeof(single_chunk_blob_storage) + o._u.single_chunk_ref.size);
new (p) single_chunk_blob_storage(&_u.single_chunk_ref, o._u.single_chunk_ref.size);
memcpy(_u.single_chunk_ref.ptr->data, o._u.single_chunk_ref.ptr->data, o._u.single_chunk_ref.size);
_inline_size = -1;
} else {
*this = managed_bytes(initialized_later(), o.size());
managed_bytes_mutable_view self(*this);
write_fragmented(self, managed_bytes_view(o));
}
}
inline
void write_fragmented(managed_bytes_mutable_view& out, std::string_view val) {
while (val.size() > 0) {
size_t current_n = std::min(val.size(), out.current_fragment().size());
memcpy(out.current_fragment().data(), val.data(), current_n);
val.remove_prefix(current_n);
out.remove_prefix(current_n);
}
}
template<>
struct appending_hash<managed_bytes_view> {
template<Hasher Hasher>
void operator()(Hasher& h, managed_bytes_view v) const {
feed_hash(h, v.size_bytes());
for (bytes_view frag : fragment_range(v)) {
h.update(reinterpret_cast<const char*>(frag.data()), frag.size());
}
}
};
namespace std {
template <>
struct hash<managed_bytes_view> {
size_t operator()(managed_bytes_view v) const {
bytes_view_hasher h;
appending_hash<managed_bytes_view>{}(h, v);
return h.finalize();
}
};
template <>
struct hash<managed_bytes> {
size_t operator()(const managed_bytes& v) const {
return hash<managed_bytes_view>{}(v);
}
};
} // namespace std
sstring to_hex(const managed_bytes& b);
sstring to_hex(const managed_bytes_opt& b);
// The formatters below are used only by tests.
template <> struct fmt::formatter<managed_bytes_view> : fmt::formatter<string_view> {
template <typename FormatContext>
auto format(const managed_bytes_view& v, FormatContext& ctx) const {
auto out = ctx.out();
for (bytes_view frag : fragment_range(v)) {
out = fmt::format_to(out, "{}", fmt_hex(frag));
}
return out;
}
};
inline std::ostream& operator<<(std::ostream& os, const managed_bytes_view& v) {
fmt::print(os, "{}", v);
return os;
}
template <> struct fmt::formatter<managed_bytes> : fmt::formatter<string_view> {
template <typename FormatContext>
auto format(const managed_bytes& b, FormatContext& ctx) const {
return fmt::format_to(ctx.out(), "{}", managed_bytes_view(b));
}
};
inline std::ostream& operator<<(std::ostream& os, const managed_bytes& b) {
fmt::print(os, "{}", b);
return os;
}
template <> struct fmt::formatter<managed_bytes_opt> : fmt::formatter<string_view> {
template <typename FormatContext>
auto format(const managed_bytes_opt& opt, FormatContext& ctx) const {
if (opt) {
return fmt::format_to(ctx.out(), "{}", *opt);
}
return fmt::format_to(ctx.out(), "null");
}
};