mirror of
https://github.com/scylladb/scylladb.git
synced 2026-05-28 18:50:53 +00:00
logalloc: add hold_reserve
mutation_partition_v2::apply_monotonically() needs to perform some allocations in a destructor, to ensure that the invariants of the data structure are restored before returning. But it is usually called with reclaiming disabled, so the allocations might fail even in a perfectly healthy node with plenty of reclaimable memory. This patch adds a mechanism which allows to reserve some LSA memory (by asking the allocator to keep it unused) and make it available for allocation right when we need to guarantee allocation success.
This commit is contained in:
@@ -519,6 +519,73 @@ SEASTAR_TEST_CASE(test_zone_reclaiming_preserves_free_size) {
|
||||
});
|
||||
}
|
||||
|
||||
// Tests the intended usage of hold_reserve.
|
||||
//
|
||||
// Sets up a reserve, exhausts memory, opens the reserve,
|
||||
// checks that this allows us to do multiple additional allocations
|
||||
// without failing.
|
||||
SEASTAR_THREAD_TEST_CASE(test_hold_reserve) {
|
||||
logalloc::region region;
|
||||
logalloc::allocating_section as;
|
||||
|
||||
// We will fill LSA with an intrusive list of small entries.
|
||||
// We make it intrusive to avoid any containers which do std allocations,
|
||||
// since it could make the test imprecise.
|
||||
struct entry {
|
||||
using link = boost::intrusive::list_member_hook<boost::intrusive::link_mode<boost::intrusive::auto_unlink>>;
|
||||
link _link;
|
||||
// We are going to fill the entire memory with this.
|
||||
// Padding makes the entries bigger to speed up the test.
|
||||
std::array<char, 8192> _padding;
|
||||
};
|
||||
using list = boost::intrusive::list<entry,
|
||||
boost::intrusive::member_hook<entry, entry::link, &entry::_link>,
|
||||
boost::intrusive::constant_time_size<false>>;
|
||||
|
||||
as.with_reserve(region, [&] {
|
||||
with_allocator(region.allocator(), [&] {
|
||||
assert(sizeof(entry) + 128 < current_allocator().preferred_max_contiguous_allocation());
|
||||
logalloc::reclaim_lock rl(region);
|
||||
|
||||
// Reserve a segment.
|
||||
auto guard = std::make_optional<hold_reserve>(128*1024);
|
||||
|
||||
// Fill the entire available memory with LSA objects.
|
||||
list entries;
|
||||
auto clean_up = defer([&entries] {
|
||||
entries.clear_and_dispose([] (entry *e) {current_allocator().destroy(e);});
|
||||
});
|
||||
auto alloc_entry = [] () {
|
||||
return current_allocator().construct<entry>();
|
||||
};
|
||||
try {
|
||||
while (true) {
|
||||
entries.push_back(*alloc_entry());
|
||||
}
|
||||
} catch (const std::bad_alloc&) {
|
||||
// expected
|
||||
}
|
||||
|
||||
// Sanity check. We should be OOM at this point.
|
||||
BOOST_REQUIRE_THROW(hold_reserve(128*1024), std::bad_alloc);
|
||||
BOOST_REQUIRE_THROW(alloc_entry(), std::bad_alloc);
|
||||
|
||||
// Release the reserve.
|
||||
guard.reset();
|
||||
|
||||
// Sanity check.
|
||||
BOOST_REQUIRE_NO_THROW(hold_reserve(128*1024));
|
||||
BOOST_REQUIRE_NO_THROW(hold_reserve(128*1024));
|
||||
BOOST_REQUIRE_NO_THROW(hold_reserve(128*1024));
|
||||
|
||||
// Freeing up a segment should be enough to allocate multiple small entries;
|
||||
for (int i = 0; i < 10; ++i) {
|
||||
entries.push_back(*alloc_entry());
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
// No point in testing contiguous memory allocation in debug mode
|
||||
#ifndef SEASTAR_DEFAULT_ALLOCATOR
|
||||
SEASTAR_THREAD_TEST_CASE(test_can_reclaim_contiguous_memory_with_mixed_allocations) {
|
||||
|
||||
@@ -188,6 +188,24 @@ public:
|
||||
void invalidate_references() noexcept {
|
||||
++_invalidate_counter;
|
||||
}
|
||||
|
||||
// Asks the allocator to set aside some free memory,
|
||||
// preventing it from being allocated until the matching
|
||||
// unreserve() call. Can be used to preallocate some memory
|
||||
// for a critical section where allocations can't fail.
|
||||
//
|
||||
// This is hack designed with the implementation details of the
|
||||
// log-structured allocator in mind. In other allocators,
|
||||
// it doesn't do anything useful.
|
||||
//
|
||||
// Don't use this unless you understand exactly what you are doing.
|
||||
virtual uintptr_t reserve(size_t memory) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
// As the argument to this function, you must pass the *return value* of the matching reserve().
|
||||
virtual void unreserve(uintptr_t opaque) noexcept {
|
||||
}
|
||||
};
|
||||
|
||||
class standard_allocation_strategy : public allocation_strategy {
|
||||
@@ -257,6 +275,16 @@ struct alloc_strategy_deleter {
|
||||
}
|
||||
};
|
||||
|
||||
// RAII for allocation_strategy::reserve().
|
||||
class hold_reserve {
|
||||
uintptr_t _opaque;
|
||||
public:
|
||||
hold_reserve(size_t memory) : _opaque(current_allocator().reserve(memory)) {}
|
||||
~hold_reserve() { current_allocator().unreserve(_opaque); }
|
||||
// Disallow copying and moving. They *could* be implemented, but I just didn't bother.
|
||||
hold_reserve(hold_reserve&&) = delete;
|
||||
};
|
||||
|
||||
// std::unique_ptr which can be used for owning an object allocated using allocation_strategy.
|
||||
// Must be destroyed before the pointer is invalidated. For compacting allocators, that
|
||||
// means it must not escape outside allocating_section or reclaim lock.
|
||||
|
||||
@@ -1007,7 +1007,17 @@ class segment_pool {
|
||||
utils::dynamic_bitset _lsa_owned_segments_bitmap; // owned by this
|
||||
utils::dynamic_bitset _lsa_free_segments_bitmap; // owned by this, but not in use
|
||||
size_t _free_segments = 0;
|
||||
|
||||
// Invariant: _free_segments > _current_emergency_reserve_goal.
|
||||
// Used to ensure that some critical allocations won't fail.
|
||||
// (We grow _current_emergency_reserve_goal in advance and shrink it right
|
||||
// before the critical allocations, which allows them to utilize the pre-reserved
|
||||
// segments).
|
||||
size_t _current_emergency_reserve_goal = 1;
|
||||
// Used by allocating_section to request a certain number of free segments
|
||||
// to be prepared for usage when the section is entered.
|
||||
// This is more of a side-channel argument to refill_emergency_reserve() than a real piece of state.
|
||||
// Passing it via a variable makes it easier to debug.
|
||||
size_t _emergency_reserve_max = 30;
|
||||
bool _allocation_failure_flag = false;
|
||||
bool _allocation_enabled = true;
|
||||
@@ -2347,6 +2357,44 @@ public:
|
||||
return _eviction_fn;
|
||||
}
|
||||
|
||||
// LSA holds an internal "emergency reserve" of free segments that
|
||||
// is only "opened" for usage before some critical allocations
|
||||
// (in particular: the ones performed during memory compaction)
|
||||
// to ensure that they won't fail.
|
||||
//
|
||||
// Here we hijack this mechanism to let the rest of the application implement
|
||||
// some critical sections with infallible LSA allocations.
|
||||
//
|
||||
// reserve() increments the size of the internal emergency reserve,
|
||||
// unreserve() decrements it.
|
||||
//
|
||||
// When you want to have some critical section that has to do some LSA
|
||||
// allocations infallibly (e.g. to restore some invariants
|
||||
// of a LSA-managed data structure in a destructor), you can call reserve()
|
||||
// beforehand to ensure that some extra memory will be held unused,
|
||||
// and then call unreserve() (with reserve()'s return value as the argument)
|
||||
// to make the reserved free segments available to the critical section.
|
||||
//
|
||||
uintptr_t reserve(size_t memory) override {
|
||||
// We round up the requested reserve to full segments.
|
||||
size_t n_segments = (memory + segment::size - 1) >> segment::size_shift;
|
||||
|
||||
auto& pool = segment_pool();
|
||||
size_t new_goal = pool.current_emergency_reserve_goal() + n_segments;
|
||||
pool.ensure_free_segments(new_goal);
|
||||
pool.set_current_emergency_reserve_goal(new_goal);
|
||||
|
||||
static_assert(sizeof(uintptr_t) >= sizeof(size_t));
|
||||
return n_segments;
|
||||
}
|
||||
|
||||
void unreserve(uintptr_t n_segments) noexcept override {
|
||||
auto& pool = segment_pool();
|
||||
assert(pool.current_emergency_reserve_goal() >= n_segments);
|
||||
size_t new_goal = pool.current_emergency_reserve_goal() - n_segments;
|
||||
pool.set_current_emergency_reserve_goal(new_goal);
|
||||
}
|
||||
|
||||
friend class region;
|
||||
friend class lsa_buffer;
|
||||
friend class region_evictable_occupancy_ascending_less_comparator;
|
||||
|
||||
Reference in New Issue
Block a user