dirty_memory_manager tracks lsa regions (memtables) under region_group:s, in order to be able to pick up the largest memtable as a candidate for flushing. Just as region_group:s contain regions, they can also contain other region_group:s in a nested structure. It also tracks the nested region_group that contains the largest region in a binomial heap. This latter facility is no longer used. It saw use when we had the system dirty_memory_manager nested under the user dirty_memory_manager, but that proved too complicated so it was undone. We still nest a virtual region_group under the real region_group, and in fact it is the virtual region_group that holds the memtables, but it is accessed directly to find the largest memtable (region_group::get_largest_region) and so all the mechanism that sorts region_group:s is bypassed. Start to dismantle this house of cards by removing the subgroup sorting. Since the hierarchy has exactly one parent and one child, it's clearly useless. This is seen by the fact that we can just remove everything related. We still need the _subgroups member to hold the virtual region_group; it's replaced by a vector. I verified that the non-intrusive vector is exception safe since push_back() happens at the very end; in any case this is early during setup where we aren't under memory pressure. A few tests that check the removed functionality are deleted. Closes #11515
194 lines
6.2 KiB
C++
194 lines
6.2 KiB
C++
// Copyright (C) 2012-present ScyllaDB
|
|
// SPDX-License-Identifier: AGPL-3.0-or-later
|
|
|
|
|
|
#include "dirty_memory_manager.hh"
|
|
#include <seastar/util/later.hh>
|
|
#include <seastar/core/with_scheduling_group.hh>
|
|
#include "seastarx.hh"
|
|
|
|
// Code previously under logalloc namespace
|
|
namespace dirty_memory_manager_logalloc {
|
|
|
|
using namespace logalloc;
|
|
|
|
inline void
|
|
region_group_binomial_group_sanity_check(const region_group::region_heap& bh) {
|
|
#ifdef SEASTAR_DEBUG
|
|
bool failed = false;
|
|
size_t last = std::numeric_limits<size_t>::max();
|
|
for (auto b = bh.ordered_begin(); b != bh.ordered_end(); b++) {
|
|
auto t = (*b)->evictable_occupancy().total_space();
|
|
if (!(t <= last)) {
|
|
failed = true;
|
|
break;
|
|
}
|
|
last = t;
|
|
}
|
|
if (!failed) {
|
|
return;
|
|
}
|
|
|
|
fmt::print("Sanity checking FAILED, size {}\n", bh.size());
|
|
for (auto b = bh.ordered_begin(); b != bh.ordered_end(); b++) {
|
|
auto r = (*b);
|
|
auto t = r->evictable_occupancy().total_space();
|
|
fmt::print(" r = {} (id={}), occupancy = {}\n", fmt::ptr(r), r->id(), t);
|
|
}
|
|
assert(0);
|
|
#endif
|
|
}
|
|
|
|
bool
|
|
region_evictable_occupancy_ascending_less_comparator::operator()(size_tracked_region* r1, size_tracked_region* r2) const {
|
|
return r1->evictable_occupancy().total_space() < r2->evictable_occupancy().total_space();
|
|
}
|
|
|
|
region_group_reclaimer region_group::no_reclaimer;
|
|
|
|
uint64_t region_group::top_region_evictable_space() const noexcept {
|
|
return _regions.empty() ? 0 : _regions.top()->evictable_occupancy().total_space();
|
|
}
|
|
|
|
dirty_memory_manager_logalloc::size_tracked_region* region_group::get_largest_region() noexcept {
|
|
return _regions.empty() ? nullptr : _regions.top();
|
|
}
|
|
|
|
void
|
|
region_group::add(region_group* child) {
|
|
_subgroups.push_back(child);
|
|
update(child->_total_memory);
|
|
}
|
|
|
|
void
|
|
region_group::del(region_group* child) {
|
|
_subgroups.erase(std::find(_subgroups.begin(), _subgroups.end(), child));
|
|
update(-child->_total_memory);
|
|
}
|
|
|
|
void
|
|
region_group::add(region* child_r) {
|
|
auto child = static_cast<size_tracked_region*>(child_r);
|
|
assert(!child->_heap_handle);
|
|
child->_heap_handle = std::make_optional(_regions.push(child));
|
|
region_group_binomial_group_sanity_check(_regions);
|
|
update(child_r->occupancy().total_space());
|
|
}
|
|
|
|
void
|
|
region_group::del(region* child_r) {
|
|
auto child = static_cast<size_tracked_region*>(child_r);
|
|
if (child->_heap_handle) {
|
|
_regions.erase(*std::exchange(child->_heap_handle, std::nullopt));
|
|
region_group_binomial_group_sanity_check(_regions);
|
|
update(-child_r->occupancy().total_space());
|
|
}
|
|
}
|
|
|
|
void
|
|
region_group::moved(region* old_address, region* new_address) {
|
|
auto old_child = static_cast<size_tracked_region*>(old_address);
|
|
if (old_child->_heap_handle) {
|
|
_regions.erase(*std::exchange(old_child->_heap_handle, std::nullopt));
|
|
}
|
|
|
|
auto new_child = static_cast<size_tracked_region*>(new_address);
|
|
|
|
// set the old child handle since it's going to be moved
|
|
// to the new child's handle by the respective move constructor /
|
|
// assignment operator.
|
|
old_child->_heap_handle = std::make_optional(_regions.push(new_child));
|
|
region_group_binomial_group_sanity_check(_regions);
|
|
}
|
|
|
|
bool
|
|
region_group::execution_permitted() noexcept {
|
|
return do_for_each_parent(this, [] (auto rg) noexcept {
|
|
return rg->under_pressure() ? stop_iteration::yes : stop_iteration::no;
|
|
}) == nullptr;
|
|
}
|
|
|
|
future<>
|
|
region_group::start_releaser(scheduling_group deferred_work_sg) {
|
|
return with_scheduling_group(deferred_work_sg, [this] {
|
|
return yield().then([this] {
|
|
return repeat([this] () noexcept {
|
|
if (_shutdown_requested) {
|
|
return make_ready_future<stop_iteration>(stop_iteration::yes);
|
|
}
|
|
|
|
if (!_blocked_requests.empty() && execution_permitted()) {
|
|
auto req = std::move(_blocked_requests.front());
|
|
_blocked_requests.pop_front();
|
|
req->allocate();
|
|
return make_ready_future<stop_iteration>(stop_iteration::no);
|
|
} else {
|
|
// Block reclaiming to prevent signal() from being called by reclaimer inside wait()
|
|
// FIXME: handle allocation failures (not very likely) like allocating_section does
|
|
tracker_reclaimer_lock rl(logalloc::shard_tracker());
|
|
return _relief.wait().then([] {
|
|
return stop_iteration::no;
|
|
});
|
|
}
|
|
});
|
|
});
|
|
});
|
|
}
|
|
|
|
region_group::region_group(sstring name, region_group *parent,
|
|
region_group_reclaimer& reclaimer, scheduling_group deferred_work_sg)
|
|
: _parent(parent)
|
|
, _reclaimer(reclaimer)
|
|
, _blocked_requests(on_request_expiry{std::move(name)})
|
|
, _releaser(reclaimer_can_block() ? start_releaser(deferred_work_sg) : make_ready_future<>())
|
|
{
|
|
if (_parent) {
|
|
_parent->add(this);
|
|
}
|
|
}
|
|
|
|
bool region_group::reclaimer_can_block() const {
|
|
return _reclaimer.throttle_threshold() != std::numeric_limits<size_t>::max();
|
|
}
|
|
|
|
void region_group::notify_relief() {
|
|
_relief.signal();
|
|
for (region_group* child : _subgroups) {
|
|
child->notify_relief();
|
|
}
|
|
}
|
|
|
|
void region_group::update(ssize_t delta) {
|
|
// Most-enclosing group which was relieved.
|
|
region_group* top_relief = nullptr;
|
|
|
|
do_for_each_parent(this, [&top_relief, delta] (region_group* rg) mutable {
|
|
rg->_total_memory += delta;
|
|
|
|
if (rg->_total_memory >= rg->_reclaimer.soft_limit_threshold()) {
|
|
rg->_reclaimer.notify_soft_pressure();
|
|
} else {
|
|
rg->_reclaimer.notify_soft_relief();
|
|
}
|
|
|
|
if (rg->_total_memory > rg->_reclaimer.throttle_threshold()) {
|
|
rg->_reclaimer.notify_pressure();
|
|
} else if (rg->_reclaimer.under_pressure()) {
|
|
rg->_reclaimer.notify_relief();
|
|
top_relief = rg;
|
|
}
|
|
|
|
return stop_iteration::no;
|
|
});
|
|
|
|
if (top_relief) {
|
|
top_relief->notify_relief();
|
|
}
|
|
}
|
|
|
|
void region_group::on_request_expiry::operator()(std::unique_ptr<allocating_function>& func) noexcept {
|
|
func->fail(std::make_exception_ptr(blocked_requests_timed_out_error{_name}));
|
|
}
|
|
|
|
}
|