Large contiguous buffers put large pressure on the allocator and are a common source of reactor stalls. Therefore, Scylla avoids their use, replacing it with fragmented buffers whenever possible. However, the use of large contiguous buffers is impossible to avoid when dealing with some external libraries (i.e. some compression libraries, like LZ4). Fortunately, calls to external libraries are synchronous, so we can minimize the allocator impact by reusing a single buffer between calls. An implementation of such a reusable buffer has two conflicting goals: to allocate as rarely as possible, and to waste as little memory as possible. The bigger the buffer, the more likely that it will be able to handle future requests without reallocation, but also the memory memory it ties up. If request sizes are repetitive, the near-optimal solution is to simply resize the buffer up to match the biggest seen request, and never resize down. However, if we anticipate pathologically large requests, which are caused by an application/configuration bug and are never repeated again after they are fixed, we might want to resize down after such pathological requests stop, so that the memory they took isn't tied up forever. The current implementation of reusable buffers handles this by resizing down to 0 every 100'000 requests. This patch attempts to solve a few shortcomings of the current implementation. 1. Resizing to 0 is too aggressive. During regular operation, we will surely need to resize it back to the previous size again. If something is allocated in the hole left by the old buffer, this might cause a stall. We prefer to resize down only after pathological requests. 2. When resizing, the current implementation allocates the new buffer before freeing the old one. This increases allocator pressure for no reason. 3. When resizing up, the buffer is resized to exactly the requested size. That is, if the current size is 1MiB, following requests of 1MiB+1B and 1MiB+2B will both cause a resize. It's preferable to limit the set of possible sizes so that every reset doesn't tend to cause multiple resizes of almost the same size. The natural set of sizes is powers of 2, because that's what the underlying buddy allocator uses. No waste is caused by rounding up the allocation to a power of 2. 4. The interval of 100'000 uses is both too low and too arbitrary. This is up for discussion, but I think that it's preferable to base the dynamics of the buffer on time, rather than the number of uses. It's more predictable to humans. The implementation proposed in this patch addresses these as follows: 1. Instead of resizing down to 0, we resize to the biggest size seen in the last period. As long as at least one maximal (up to a power of 2) "normal" request appears each period, the buffer will never have to be resized. 2. The capacity of the buffer is always rounded up to the nearest power of 2. 3. The resize down period is no longer measured in number of requests but in real time. Additionally, since a shared buffer in asynchronous code is quite a footgun, some rudimentary refcounting is added to assert that only one reference to the buffer exists at a time, and that the buffer isn't downsized while a reference to it exists. Fixes #13437
154 lines
5.2 KiB
C++
154 lines
5.2 KiB
C++
/*
|
|
* Copyright (C) 2018-present ScyllaDB
|
|
*/
|
|
|
|
/*
|
|
* SPDX-License-Identifier: AGPL-3.0-or-later
|
|
*/
|
|
|
|
#include "test/lib/random_utils.hh"
|
|
#include "test/lib/log.hh"
|
|
|
|
#include <boost/range/algorithm/copy.hpp>
|
|
|
|
#include "utils/reusable_buffer.hh"
|
|
#include <seastar/core/manual_clock.hh>
|
|
#include <seastar/testing/test_case.hh>
|
|
#include <seastar/util/later.hh>
|
|
#include <seastar/core/coroutine.hh>
|
|
#include <bit>
|
|
|
|
using namespace seastar;
|
|
|
|
SEASTAR_TEST_CASE(test_get_linearized_view) {
|
|
auto test = [] (size_t n, utils::reusable_buffer<manual_clock>& buffer) {
|
|
testlog.info("Testing buffer size {}", n);
|
|
auto original = tests::random::get_bytes(n);
|
|
|
|
bytes_ostream bo;
|
|
bo.write(original);
|
|
|
|
{
|
|
auto bufguard = utils::reusable_buffer_guard(buffer);
|
|
auto view = bufguard.get_linearized_view(bo);
|
|
BOOST_REQUIRE_EQUAL(view.size(), n);
|
|
BOOST_REQUIRE(view == original);
|
|
BOOST_REQUIRE(bo.linearize() == original);
|
|
}
|
|
|
|
{
|
|
std::vector<temporary_buffer<char>> tbufs;
|
|
bytes_view left = original;
|
|
while (!left.empty()) {
|
|
auto this_size = std::min<size_t>(left.size(), fragmented_temporary_buffer::default_fragment_size);
|
|
tbufs.emplace_back(reinterpret_cast<const char*>(left.data()), this_size);
|
|
left.remove_prefix(this_size);
|
|
}
|
|
|
|
auto bufguard = utils::reusable_buffer_guard(buffer);
|
|
auto fbuf = fragmented_temporary_buffer(std::move(tbufs), original.size());
|
|
auto view = bufguard.get_linearized_view(fragmented_temporary_buffer::view(fbuf));
|
|
BOOST_REQUIRE_EQUAL(view.size(), n);
|
|
BOOST_REQUIRE(view == original);
|
|
BOOST_REQUIRE(linearized(fragmented_temporary_buffer::view(fbuf)) == original);
|
|
}
|
|
};
|
|
|
|
for (auto j = 0; j < 2; j++) {
|
|
utils::reusable_buffer<manual_clock> buffer(std::chrono::milliseconds(1));
|
|
|
|
test(0, buffer);
|
|
test(1'000'000, buffer);
|
|
test(1'000, buffer);
|
|
test(100'000, buffer);
|
|
|
|
for (auto i = 0; i < 25; i++) {
|
|
test(tests::random::get_int(512 * 1024), buffer);
|
|
}
|
|
}
|
|
|
|
return make_ready_future<>();
|
|
}
|
|
|
|
SEASTAR_TEST_CASE(test_make_buffer) {
|
|
auto test = [] (size_t maximum, size_t actual, utils::reusable_buffer<manual_clock>& buffer) {
|
|
testlog.info("Testing maximum buffer size {}, actual: {} ", maximum, actual);
|
|
|
|
bytes original;
|
|
auto make_buffer_fn = [&] (bytes_mutable_view view) {
|
|
original = tests::random::get_bytes(actual);
|
|
BOOST_REQUIRE_EQUAL(maximum, view.size());
|
|
BOOST_REQUIRE_LE(actual, view.size());
|
|
boost::range::copy(original, view.begin());
|
|
return actual;
|
|
};
|
|
|
|
{
|
|
auto bufguard = utils::reusable_buffer_guard(buffer);
|
|
auto bo = bufguard.make_bytes_ostream(maximum, make_buffer_fn);
|
|
|
|
BOOST_REQUIRE_EQUAL(bo.size(), actual);
|
|
BOOST_REQUIRE(bo.linearize() == original);
|
|
}
|
|
|
|
{
|
|
auto bufguard = utils::reusable_buffer_guard(buffer);
|
|
auto fbuf = bufguard.make_fragmented_temporary_buffer(maximum, make_buffer_fn);
|
|
auto view = fragmented_temporary_buffer::view(fbuf);
|
|
|
|
BOOST_REQUIRE_EQUAL(view.size_bytes(), actual);
|
|
BOOST_REQUIRE(linearized(view) == original);
|
|
}
|
|
};
|
|
|
|
for (auto j = 0; j < 2; j++) {
|
|
utils::reusable_buffer<manual_clock> buffer(std::chrono::milliseconds(1));
|
|
|
|
test(0, 0, buffer);
|
|
test(100'000, 0, buffer);
|
|
test(200'000, 200'000, buffer);
|
|
test(400'000, 100'000, buffer);
|
|
|
|
for (auto i = 0; i < 25; i++) {
|
|
auto a = tests::random::get_int(512 * 1024);
|
|
auto b = tests::random::get_int(512 * 1024);
|
|
test(std::max(a, b), std::min(a, b), buffer);
|
|
}
|
|
}
|
|
|
|
return make_ready_future<>();
|
|
}
|
|
|
|
SEASTAR_TEST_CASE(test_decay) {
|
|
using namespace std::chrono_literals;
|
|
utils::reusable_buffer<manual_clock> buffer(1s);
|
|
auto get_buffer = [&buffer] (size_t size) {
|
|
auto bufguard = utils::reusable_buffer_guard(buffer);
|
|
bufguard.get_temporary_buffer(size);
|
|
};
|
|
auto advance_clock = [] (manual_clock::duration d) {
|
|
manual_clock::advance(d);
|
|
return yield();
|
|
};
|
|
BOOST_REQUIRE(buffer.reallocs() == 0);
|
|
get_buffer(1'000'000);
|
|
get_buffer(1'000'001);
|
|
get_buffer(1'000'000);
|
|
get_buffer(1'000);
|
|
BOOST_REQUIRE_EQUAL(buffer.reallocs(), 1);
|
|
// It isn't strictly required from the implementation to use
|
|
// power-of-2 sizes, just sizes coarse enough to limit the number
|
|
// of allocations.
|
|
// If the implementation is modified, this assert can be freely changed.
|
|
BOOST_REQUIRE_EQUAL(buffer.size(), std::bit_ceil(size_t(1'000'001)));
|
|
co_await advance_clock(1500ms);
|
|
get_buffer(1'000);
|
|
BOOST_REQUIRE_EQUAL(buffer.reallocs(), 1);
|
|
co_await advance_clock(1000ms);
|
|
BOOST_REQUIRE_EQUAL(buffer.reallocs(), 2);
|
|
BOOST_REQUIRE_EQUAL(buffer.size(), std::bit_ceil(size_t(1'000)));
|
|
co_await advance_clock(1000ms);
|
|
BOOST_REQUIRE_EQUAL(buffer.reallocs(), 3);
|
|
BOOST_REQUIRE_EQUAL(buffer.size(), 0);
|
|
}
|