Files
scylladb/utils/managed_bytes.hh
Avi Kivity fcb8d040e8 treewide: use Software Package Data Exchange (SPDX) license identifiers
Instead of lengthy blurbs, switch to single-line, machine-readable
standardized (https://spdx.dev) license identifiers. The Linux kernel
switched long ago, so there is strong precedent.

Three cases are handled: AGPL-only, Apache-only, and dual licensed.
For the latter case, I chose (AGPL-3.0-or-later and Apache-2.0),
reasoning that our changes are extensive enough to apply our license.

The changes we applied mechanically with a script, except to
licenses/README.md.

Closes #9937
2022-01-18 12:15:18 +01:00

517 lines
16 KiB
C++

/*
* Copyright (C) 2015-present ScyllaDB
*/
/*
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
#pragma once
#include <stdint.h>
#include <memory>
#include "bytes.hh"
#include "utils/allocation_strategy.hh"
#include "utils/fragment_range.hh"
#include <seastar/util/alloc_failure_injector.hh>
#include <unordered_map>
#include <type_traits>
template <mutable_view is_mutable_view>
class managed_bytes_basic_view;
using managed_bytes_view = managed_bytes_basic_view<mutable_view::no>;
using managed_bytes_mutable_view = managed_bytes_basic_view<mutable_view::yes>;
struct blob_storage {
struct [[gnu::packed]] ref_type {
blob_storage* ptr;
ref_type() {}
ref_type(blob_storage* ptr) : ptr(ptr) {}
operator blob_storage*() const { return ptr; }
blob_storage* operator->() const { return ptr; }
blob_storage& operator*() const { return *ptr; }
};
using size_type = uint32_t;
using char_type = bytes_view::value_type;
ref_type* backref;
size_type size;
size_type frag_size;
ref_type next;
char_type data[];
blob_storage(ref_type* backref, size_type size, size_type frag_size) noexcept
: backref(backref)
, size(size)
, frag_size(frag_size)
, next(nullptr)
{
*backref = this;
}
blob_storage(blob_storage&& o) noexcept
: backref(o.backref)
, size(o.size)
, frag_size(o.frag_size)
, next(o.next)
{
*backref = this;
o.next = nullptr;
if (next) {
next->backref = &next;
}
memcpy(data, o.data, frag_size);
}
size_t storage_size() const noexcept {
return sizeof(*this) + frag_size;
}
} __attribute__((packed));
// A managed version of "bytes" (can be used with LSA).
class managed_bytes {
static constexpr size_t max_inline_size = 15;
struct small_blob {
bytes_view::value_type data[max_inline_size];
int8_t size; // -1 -> use blob_storage
};
union u {
u() {}
~u() {}
blob_storage::ref_type ptr;
small_blob small;
} _u;
static_assert(sizeof(small_blob) > sizeof(blob_storage*), "inline size too small");
private:
bool external() const noexcept {
return _u.small.size < 0;
}
size_t max_seg(allocation_strategy& alctr) {
return alctr.preferred_max_contiguous_allocation() - sizeof(blob_storage);
}
void free_chain(blob_storage* p) noexcept {
auto& alctr = current_allocator();
while (p) {
auto n = p->next;
alctr.destroy(p);
p = n;
}
}
bytes_view::value_type& value_at_index(blob_storage::size_type index) {
if (!external()) {
return _u.small.data[index];
}
blob_storage* a = _u.ptr;
while (index >= a->frag_size) {
index -= a->frag_size;
a = a->next;
}
return a->data[index];
}
std::unique_ptr<bytes_view::value_type[]> do_linearize_pure() const;
public:
using size_type = blob_storage::size_type;
struct initialized_later {};
managed_bytes() {
_u.small.size = 0;
}
managed_bytes(const blob_storage::char_type* ptr, size_type size)
: managed_bytes(bytes_view(ptr, size)) {}
explicit managed_bytes(const bytes& b) : managed_bytes(static_cast<bytes_view>(b)) {}
template <FragmentedView View>
explicit managed_bytes(View v);
managed_bytes(initialized_later, size_type size) {
memory::on_alloc_point();
if (size <= max_inline_size) {
_u.small.size = size;
} else {
_u.small.size = -1;
auto& alctr = current_allocator();
auto maxseg = max_seg(alctr);
auto now = std::min(size_t(size), maxseg);
void* p = alctr.alloc<blob_storage>(sizeof(blob_storage) + now);
auto first = new (p) blob_storage(&_u.ptr, size, now);
auto last = first;
size -= now;
try {
while (size) {
auto now = std::min(size_t(size), maxseg);
void* p = alctr.alloc<blob_storage>(sizeof(blob_storage) + now);
last = new (p) blob_storage(&last->next, 0, now);
size -= now;
}
} catch (...) {
free_chain(first);
throw;
}
}
}
explicit managed_bytes(bytes_view v) : managed_bytes(initialized_later(), v.size()) {
if (!external()) {
// Workaround for https://github.com/scylladb/scylla/issues/4086
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Warray-bounds"
std::copy(v.begin(), v.end(), _u.small.data);
#pragma GCC diagnostic pop
return;
}
auto p = v.data();
auto s = v.size();
auto b = _u.ptr;
while (s) {
memcpy(b->data, p, b->frag_size);
p += b->frag_size;
s -= b->frag_size;
b = b->next;
}
assert(!b);
}
managed_bytes(std::initializer_list<bytes::value_type> b) : managed_bytes(b.begin(), b.size()) {}
~managed_bytes() noexcept {
if (external()) {
free_chain(_u.ptr);
}
}
managed_bytes(const managed_bytes& o) : managed_bytes(initialized_later(), o.size()) {
if (!o.external()) {
_u.small = o._u.small;
return;
}
auto s = size();
const blob_storage::ref_type* next_src = &o._u.ptr;
blob_storage* blob_src = nullptr;
size_type size_src = 0;
size_type offs_src = 0;
blob_storage::ref_type* next_dst = &_u.ptr;
blob_storage* blob_dst = nullptr;
size_type size_dst = 0;
size_type offs_dst = 0;
while (s) {
if (!size_src) {
blob_src = *next_src;
next_src = &blob_src->next;
size_src = blob_src->frag_size;
offs_src = 0;
}
if (!size_dst) {
blob_dst = *next_dst;
next_dst = &blob_dst->next;
size_dst = blob_dst->frag_size;
offs_dst = 0;
}
auto now = std::min(size_src, size_dst);
memcpy(blob_dst->data + offs_dst, blob_src->data + offs_src, now);
s -= now;
offs_src += now; size_src -= now;
offs_dst += now; size_dst -= now;
}
assert(size_src == 0 && size_dst == 0);
}
managed_bytes(managed_bytes&& o) noexcept
: _u(o._u)
{
if (external()) {
// _u.ptr cannot be null
_u.ptr->backref = &_u.ptr;
}
o._u.small.size = 0;
}
managed_bytes& operator=(managed_bytes&& o) noexcept {
if (this != &o) {
this->~managed_bytes();
new (this) managed_bytes(std::move(o));
}
return *this;
}
managed_bytes& operator=(const managed_bytes& o) {
if (this != &o) {
managed_bytes tmp(o);
this->~managed_bytes();
new (this) managed_bytes(std::move(tmp));
}
return *this;
}
bool operator==(const managed_bytes& o) const {
if (size() != o.size()) {
return false;
}
if (!external()) {
return std::equal(_u.small.data, _u.small.data + _u.small.size, o._u.small.data);
} else {
auto a = _u.ptr;
auto a_data = a->data;
auto a_remain = a->frag_size;
a = a->next;
auto b = o._u.ptr;
auto b_data = b->data;
auto b_remain = b->frag_size;
b = b->next;
while (a_remain || b_remain) {
auto now = std::min(a_remain, b_remain);
if (bytes_view(a_data, now) != bytes_view(b_data, now)) {
return false;
}
a_data += now;
a_remain -= now;
if (!a_remain && a) {
a_data = a->data;
a_remain = a->frag_size;
a = a->next;
}
b_data += now;
b_remain -= now;
if (!b_remain && b) {
b_data = b->data;
b_remain = b->frag_size;
b = b->next;
}
}
return true;
}
}
bool operator!=(const managed_bytes& o) const {
return !(*this == o);
}
bytes_view::value_type& operator[](size_type index) {
return value_at_index(index);
}
const bytes_view::value_type& operator[](size_type index) const {
return const_cast<const bytes_view::value_type&>(
const_cast<managed_bytes*>(this)->value_at_index(index));
}
size_type size() const {
if (external()) {
return _u.ptr->size;
} else {
return _u.small.size;
}
}
bool empty() const {
return _u.small.size == 0;
}
// Returns the amount of external memory used.
size_t external_memory_usage() const noexcept {
if (external()) {
size_t mem = 0;
blob_storage* blob = _u.ptr;
while (blob) {
mem += blob->frag_size + sizeof(blob_storage);
blob = blob->next;
}
return mem;
}
return 0;
}
// Returns the minimum possible amount of external memory used by a managed_bytes
// of the same size as us.
// In other words, it returns the amount of external memory that would used by this
// managed_bytes if all data was allocated in one big fragment.
size_t minimal_external_memory_usage() const noexcept {
if (external()) {
return sizeof(blob_storage) + _u.ptr->size;
} else {
return 0;
}
}
template <std::invocable<bytes_view> Func>
std::invoke_result_t<Func, bytes_view> with_linearized(Func&& func) const {
const bytes_view::value_type* start = nullptr;
size_t size = 0;
if (!external()) {
start = _u.small.data;
size = _u.small.size;
} else if (!_u.ptr->next) {
start = _u.ptr->data;
size = _u.ptr->size;
}
if (start) {
return func(bytes_view(start, size));
} else {
auto data = do_linearize_pure();
return func(bytes_view(data.get(), _u.ptr->size));
}
}
template <mutable_view is_mutable_view>
friend class managed_bytes_basic_view;
};
template <mutable_view is_mutable>
class managed_bytes_basic_view {
public:
using fragment_type = std::conditional_t<is_mutable == mutable_view::yes, bytes_mutable_view, bytes_view>;
using owning_type = std::conditional_t<is_mutable == mutable_view::yes, managed_bytes, const managed_bytes>;
using value_type = typename fragment_type::value_type;
private:
fragment_type _current_fragment = {};
blob_storage* _next_fragments = nullptr;
size_t _size = 0;
public:
managed_bytes_basic_view() = default;
managed_bytes_basic_view(const managed_bytes_basic_view&) = default;
managed_bytes_basic_view(owning_type& mb) {
if (mb._u.small.size != -1) {
_current_fragment = fragment_type(mb._u.small.data, mb._u.small.size);
_size = mb._u.small.size;
} else {
auto p = mb._u.ptr;
_current_fragment = fragment_type(p->data, p->frag_size);
_next_fragments = p->next;
_size = p->size;
}
}
managed_bytes_basic_view(fragment_type bv)
: _current_fragment(bv)
, _size(bv.size()) {
}
size_t size() const { return _size; }
size_t size_bytes() const { return _size; }
bool empty() const { return _size == 0; }
fragment_type current_fragment() const { return _current_fragment; }
void remove_prefix(size_t n) {
while (n >= _current_fragment.size() && n > 0) {
n -= _current_fragment.size();
remove_current();
}
_size -= n;
_current_fragment.remove_prefix(n);
}
void remove_current() {
_size -= _current_fragment.size();
if (_size) {
_current_fragment = fragment_type(_next_fragments->data, _next_fragments->frag_size);
_next_fragments = _next_fragments->next;
_current_fragment = _current_fragment.substr(0, _size);
} else {
_current_fragment = fragment_type();
}
}
managed_bytes_basic_view prefix(size_t len) const {
managed_bytes_basic_view v = *this;
v._size = len;
v._current_fragment = v._current_fragment.substr(0, len);
return v;
}
managed_bytes_basic_view substr(size_t offset, size_t len) const {
size_t end = std::min(offset + len, _size);
managed_bytes_basic_view v = prefix(end);
v.remove_prefix(offset);
return v;
}
const auto& front() const { return _current_fragment.front(); }
auto& front() { return _current_fragment.front(); }
const value_type& operator[](size_t index) const {
auto v = *this;
v.remove_prefix(index);
return v.current_fragment().front();
}
bytes linearize() const {
return linearized(*this);
}
bool is_linearized() {
return _current_fragment.size() == _size;
}
// Allow casting mutable views to immutable views.
template <mutable_view Other>
friend class managed_bytes_basic_view;
template <mutable_view Other>
managed_bytes_basic_view(const managed_bytes_basic_view<Other>& other)
requires (is_mutable == mutable_view::no) && (Other == mutable_view::yes)
: _current_fragment(other._current_fragment.data(), other._current_fragment.size())
, _next_fragments(other._next_fragments)
, _size(other._size)
{}
};
static_assert(FragmentedView<managed_bytes_view>);
static_assert(FragmentedMutableView<managed_bytes_mutable_view>);
using managed_bytes_opt = std::optional<managed_bytes>;
using managed_bytes_view_opt = std::optional<managed_bytes_view>;
inline bytes to_bytes(const managed_bytes& v) {
return linearized(managed_bytes_view(v));
}
inline bytes to_bytes(managed_bytes_view v) {
return linearized(v);
}
template<FragmentedView View>
inline managed_bytes::managed_bytes(View v) : managed_bytes(initialized_later(), v.size_bytes()) {
managed_bytes_mutable_view self(*this);
write_fragmented(self, v);
}
template<>
struct appending_hash<managed_bytes_view> {
template<Hasher Hasher>
void operator()(Hasher& h, managed_bytes_view v) const {
feed_hash(h, v.size_bytes());
for (bytes_view frag : fragment_range(v)) {
h.update(reinterpret_cast<const char*>(frag.data()), frag.size());
}
}
};
namespace std {
template <>
struct hash<managed_bytes_view> {
size_t operator()(managed_bytes_view v) const {
bytes_view_hasher h;
appending_hash<managed_bytes_view>{}(h, v);
return h.finalize();
}
};
template <>
struct hash<managed_bytes> {
size_t operator()(const managed_bytes& v) const {
return hash<managed_bytes_view>{}(v);
}
};
} // namespace std
sstring to_hex(const managed_bytes& b);
sstring to_hex(const managed_bytes_opt& b);
// The operators below are used only by tests.
inline bool operator==(const managed_bytes_view& a, const managed_bytes_view& b) {
return a.size_bytes() == b.size_bytes() && compare_unsigned(a, b) == 0;
}
inline std::ostream& operator<<(std::ostream& os, const managed_bytes_view& v) {
for (bytes_view frag : fragment_range(v)) {
os << to_hex(frag);
}
return os;
}
inline std::ostream& operator<<(std::ostream& os, const managed_bytes& b) {
return (os << managed_bytes_view(b));
}
std::ostream& operator<<(std::ostream& os, const managed_bytes_opt& b);