Files
scylladb/utils/reusable_buffer.hh
Kefu Chai a1dcddd300 utils: do not include unused headers
these unused includes were identified by clangd. see
https://clangd.llvm.org/guides/include-cleaner#unused-include-warning
for more details on the "Unused include" warning.

Signed-off-by: Kefu Chai <kefu.chai@scylladb.com>

Closes scylladb/scylladb#16833
2024-01-18 12:50:06 +02:00

320 lines
13 KiB
C++

/*
* Copyright (C) 2018-present ScyllaDB
*/
/*
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
#pragma once
#include "utils/fragmented_temporary_buffer.hh"
#include <seastar/core/timer.hh>
#include <seastar/core/memory.hh>
#include <bit>
namespace utils {
// Users of reusable_buffer don't need the templated parts,
// so the non-templated implementation parts are extracted
// to a separate class.
// This should prevent some unnecessary template instantiations.
class reusable_buffer_impl {
protected:
friend class reusable_buffer_guard;
// Max size observed since the last decay(), rounded up.
// Currently we round to the smallest power of 2 greater or equal to
// to the observed size.
size_t _high_watermark = 0;
std::unique_ptr<bytes::value_type[]> _buf; // The underlying contiguous buffer.
size_t _buf_size = 0;
size_t _refcount = 0; // Used purely for anti-misuse checks.
size_t _reallocs = 0; // Number of size changes.
public:
size_t size() const noexcept { return _buf_size; }
size_t reallocs() const noexcept { return _reallocs; }
protected:
// The guard keeps a reference to the buffer, so it must have a stable address.
reusable_buffer_impl(const reusable_buffer_impl&) = delete;
reusable_buffer_impl& operator=(const reusable_buffer_impl&) = delete;
reusable_buffer_impl() = default;
~reusable_buffer_impl() {
assert(_refcount == 0);
}
void resize(size_t new_size) & {
// Reusable buffers are expected to be used when large contiguous
// allocations are unavoidable. There is no point in warning
// about them since there isn't much that can be done.
seastar::memory::scoped_large_allocation_warning_disable g;
// Clear before shrinking so that the old buffer
// is freed before allocating the new buffer.
// This lesses the pressure on the allocator and should guarantee
// a success if the new size is smaller than the old size.
_buf = nullptr;
_buf_size = 0;
_reallocs += 1;
_buf.reset(new bytes::value_type[new_size]);
_buf_size = new_size;
}
// The below methods can be considered "public", but they are only accessible
// through their guard wrappers, to make misuse harder.
bytes_mutable_view get_temporary_buffer(size_t size) & {
_high_watermark = std::max(_high_watermark, std::bit_ceil(size));
if (_high_watermark > _buf_size) {
resize(_high_watermark);
}
return {_buf.get(), size};
}
// The helpers below only interact with the rest of the class through
// get_temporary_buffer().
// They could as well be free functions.
// Returns a linearized view onto the provided data.
// If the input view is already linearized, it is returned as-is.
// Otherwise the contents are copied to the reusable buffer
// and a view into the buffer is returned.
bytes_view get_linearized_view(fragmented_temporary_buffer::view ftb) & {
if (ftb.current_fragment().size() == ftb.size_bytes()) {
return {ftb.current_fragment().data(), ftb.size_bytes()};
}
const auto out = get_temporary_buffer(ftb.size_bytes()).data();
auto dst = out;
for (bytes_view fragment : fragmented_temporary_buffer::view(ftb)) {
dst = std::copy(fragment.begin(), fragment.end(), dst);
}
return {out, ftb.size_bytes()};
}
// Returns a linearized view onto the provided data.
// If the input view is already linearized, it is returned as-is.
// Otherwise the contents are copied to the reusable buffer
// and a view into the buffer is returned.
bytes_view get_linearized_view(bytes_ostream& bo) & {
if (bo.is_linearized()) {
return bo.view();
}
const auto out = get_temporary_buffer(bo.size()).data();
auto dst = out;
for (bytes_view fragment : bo) {
dst = std::copy(fragment.begin(), fragment.end(), dst);
}
return {out, bo.size()};
}
// Provides a contiguous buffer of size `maximum_length` to `fn`.
// `fn` writes to the buffer and returns the number of bytes written.
// A fragmented buffer containing the data written by `fn` is returned.
//
// If the data fits into a single fragment, `fn` is ran directly on
// the only fragment of a newly allocated fragmented_buffer.
// Otherwise the contiguous buffer for `fn` is allocated from the reusable
// buffer, and its contents are copied to a new fragmented buffer after `fn`
// returns.
// This way a copy is avoided in the small case.
template<typename Function>
requires std::is_invocable_r_v<size_t, Function, bytes_mutable_view>
bytes_ostream make_bytes_ostream(size_t maximum_length, Function&& fn) & {
bytes_ostream output;
if (maximum_length && maximum_length <= bytes_ostream::max_chunk_size()) {
auto ptr = output.write_place_holder(maximum_length);
size_t actual_length = fn(bytes_mutable_view(ptr, maximum_length));
output.remove_suffix(output.size() - actual_length);
} else {
auto view = get_temporary_buffer(maximum_length);
size_t actual_length = fn(view);
output.write(bytes_view(view.data(), actual_length));
}
return output;
}
// Provides a contiguous buffer of size `maximum_length` to `fn`.
// `fn` writes to the buffer and returns the number of bytes written.
// A fragmented buffer containing the data written by `fn` is returned.
//
// If the data fits into a single fragment, `fn` is ran directly on
// the only fragment of a newly allocated fragmented_buffer.
// Otherwise the contiguous buffer for `fn` is allocated from the reusable
// buffer, and its contents are copied to a new fragmented buffer after `fn`
// returns.
// This way a copy is avoided in the small case.
template<typename Function>
requires std::is_invocable_r_v<size_t, Function, bytes_mutable_view>
fragmented_temporary_buffer make_fragmented_temporary_buffer(size_t maximum_length, Function&& fn) & {
if (maximum_length <= fragmented_temporary_buffer::default_fragment_size) {
seastar::temporary_buffer<char> buf(maximum_length);
auto view = bytes_mutable_view(reinterpret_cast<bytes::value_type*>(buf.get_write()), buf.size());
size_t actual_length = fn(view);
buf.trim(actual_length);
std::vector<seastar::temporary_buffer<char>> chunks;
chunks.push_back(std::move(buf));
return fragmented_temporary_buffer(std::move(chunks), actual_length);
} else {
auto view = get_temporary_buffer(maximum_length);
size_t actual_length = fn(view);
return fragmented_temporary_buffer(reinterpret_cast<const char*>(view.data()), actual_length);
}
}
};
/* Sometimes (e.g. for compression), we need a big contiguous buffer
* of a given size.
*
* Big buffers are a problem for the allocator, so if such a buffer
* is needed regularly, we want to reuse it. In the optimum, we
* only want to use one buffer per concurrent task (if all uses
* of the buffer are synchronous, one buffer per shard is enough),
* and only resize the buffer when it's too small for the current
* request.
*
* At the same time we might not want to keep the buffer around forever.
* A pathological request might cause the buffer to grow to some
* unreasonable size. After the source of the pathology (e.g. bad compression
* chunk size) is fixed, we want the buffer to free the capacity that will
* most likely not be used again.
*
* This class provides a buffer which balances both tasks by periodically
* shrinking its size to the largest recently seen size.
* The buffer always has a power-of-2 size, to minimize the number of
* allocations for small size changes.
*
* Even though the views returned by the buffer's methods have exactly the same
* requested size, the entire underlying capacity (of size size()) is safe to use.
* The returned views are always a prefix of the underlying capacity.
*
* Under a stable workload, the buffer's size will be stable.
* If a pathological request comes, the buffer will grow to accommodate it.
* 1-2 periods after the pathology ceases, the buffer will shrink back
* to its steady-state size.
*
* Be very careful with this buffer.
* Its content might disappear during a preemption point. This can be
* checked by comparing reallocs() before and after the preemption point.
* The content might be also easily invalidated by an unrelated future
* sharing the same buffer, if you are not careful.
*
* To make this buffer slightly harder to misuse, it's only accessible
* by reusable_buffer_guard.
* There can only exist one guard at a time.
* The guard can be used to obtain a view at most once.
*/
template <typename Clock>
class reusable_buffer : public reusable_buffer_impl {
public:
using period_type = typename Clock::duration;
private:
seastar::timer<Clock> _decay_timer;
period_type _decay_period;
void decay() & {
assert(_refcount == 0);
if (_high_watermark <= _buf_size / 16) {
// We shrink when the size falls at least by four power-of-2
// notches, instead of just one notch. This adds hysteresis:
// it prevents small oscillations from inducing shrink/grow cycles.
// With the factor of 16 above, only outliers at least 8x bigger than
// the otherwise stable size might cause reallocations.
//
// In other words, we "define" a pathological request as one that is
// at least 8x bigger than the stable size, where the stable size
// is the max of all sizes seen within a decay period.
resize(_high_watermark);
}
_high_watermark = 0;
}
public:
reusable_buffer(period_type period)
: _decay_period(period)
{
_decay_timer.set_callback([this] {decay();});
_decay_timer.arm_periodic(_decay_period);
}
};
/* Exists only to assert that there exists at most one reference to the
* reusable_buffer, to hopefully make it less of a footgun.
*
* The reference/use counts exist only for assert purposes.
* They don't influence the program otherwise.
*
* Never keep the guard across preemption points.
*
* Don't let views obtained through the guard outlive the guard,
* that defeats its purpose and is begging for trouble.
*
* The guard only accesses _refcount and its "public" methods.
* It doesn't mess with its internals.
*/
class reusable_buffer_guard {
private:
reusable_buffer_impl& _buf;
bool used = false;
private:
void mark_used() {
assert(!used);
used = true;
}
public:
reusable_buffer_guard(const reusable_buffer_guard&) = delete;
reusable_buffer_guard& operator=(const reusable_buffer_guard&) = delete;
reusable_buffer_guard(reusable_buffer_impl& _buf)
: _buf(_buf)
{
assert(_buf._refcount == 0);
_buf._refcount += 1;
}
~reusable_buffer_guard() {
_buf._refcount -= 1;
}
// The result mustn't outlive `this`.
// No method of `this` may be called again.
bytes_mutable_view get_temporary_buffer(size_t size) & {
mark_used();
return _buf.get_temporary_buffer(size);
}
// The result mustn't outlive `this`.
// No method of `this` may be called again.
bytes_view get_linearized_view(fragmented_temporary_buffer::view ftb) & {
mark_used();
return _buf.get_linearized_view(std::move(ftb));
}
// The result mustn't outlive `this`.
// No method of `this` may be called again.
bytes_view get_linearized_view(bytes_ostream& bo) & {
mark_used();
return _buf.get_linearized_view(bo);
}
// The result mustn't outlive `this`.
// No method of `this` may be called again.
template<typename Function>
requires std::is_invocable_r_v<size_t, Function, bytes_mutable_view>
bytes_ostream make_bytes_ostream(size_t maximum_length, Function&& fn) & {
mark_used();
return _buf.make_bytes_ostream(maximum_length, std::forward<Function>(fn));
}
// The result mustn't outlive `this`.
// No method of `this` may be called again.
template<typename Function>
requires std::is_invocable_r_v<size_t, Function, bytes_mutable_view>
fragmented_temporary_buffer make_fragmented_temporary_buffer(size_t maximum_length, Function&& fn) & {
mark_used();
return _buf.make_fragmented_temporary_buffer(maximum_length, std::forward<Function>(fn));
}
};
} // namespace utils