Files
scylladb/dirty_memory_manager.cc
Avi Kivity 6c797587c7 dirty_memory_manager: region_group: remove sorting of subgroups
dirty_memory_manager tracks lsa regions (memtables) under region_group:s,
in order to be able to pick up the largest memtable as a candidate for
flushing.

Just as region_group:s contain regions, they can also contain other
region_group:s in a nested structure. It also tracks the nested region_group
that contains the largest region in a binomial heap.

This latter facility is no longer used. It saw use when we had the system
dirty_memory_manager nested under the user dirty_memory_manager, but
that proved too complicated so it was undone. We still nest a virtual
region_group under the real region_group, and in fact it is the
virtual region_group that holds the memtables, but it is accessed
directly to find the largest memtable (region_group::get_largest_region)
and so all the mechanism that sorts region_group:s is bypassed.

Start to dismantle this house of cards by removing the subgroup
sorting. Since the hierarchy has exactly one parent and one child,
it's clearly useless. This is seen by the fact that we can just remove
everything related.

We still need the _subgroups member to hold the virtual region_group;
it's replaced by a vector. I verified that the non-intrusive vector
is exception safe since push_back() happens at the very end; in any
case this is early during setup where we aren't under memory pressure.

A few tests that check the removed functionality are deleted.

Closes #11515
2022-09-12 09:29:08 +03:00

194 lines
6.2 KiB
C++

// Copyright (C) 2012-present ScyllaDB
// SPDX-License-Identifier: AGPL-3.0-or-later
#include "dirty_memory_manager.hh"
#include <seastar/util/later.hh>
#include <seastar/core/with_scheduling_group.hh>
#include "seastarx.hh"
// Code previously under logalloc namespace
namespace dirty_memory_manager_logalloc {
using namespace logalloc;
inline void
region_group_binomial_group_sanity_check(const region_group::region_heap& bh) {
#ifdef SEASTAR_DEBUG
bool failed = false;
size_t last = std::numeric_limits<size_t>::max();
for (auto b = bh.ordered_begin(); b != bh.ordered_end(); b++) {
auto t = (*b)->evictable_occupancy().total_space();
if (!(t <= last)) {
failed = true;
break;
}
last = t;
}
if (!failed) {
return;
}
fmt::print("Sanity checking FAILED, size {}\n", bh.size());
for (auto b = bh.ordered_begin(); b != bh.ordered_end(); b++) {
auto r = (*b);
auto t = r->evictable_occupancy().total_space();
fmt::print(" r = {} (id={}), occupancy = {}\n", fmt::ptr(r), r->id(), t);
}
assert(0);
#endif
}
bool
region_evictable_occupancy_ascending_less_comparator::operator()(size_tracked_region* r1, size_tracked_region* r2) const {
return r1->evictable_occupancy().total_space() < r2->evictable_occupancy().total_space();
}
region_group_reclaimer region_group::no_reclaimer;
uint64_t region_group::top_region_evictable_space() const noexcept {
return _regions.empty() ? 0 : _regions.top()->evictable_occupancy().total_space();
}
dirty_memory_manager_logalloc::size_tracked_region* region_group::get_largest_region() noexcept {
return _regions.empty() ? nullptr : _regions.top();
}
void
region_group::add(region_group* child) {
_subgroups.push_back(child);
update(child->_total_memory);
}
void
region_group::del(region_group* child) {
_subgroups.erase(std::find(_subgroups.begin(), _subgroups.end(), child));
update(-child->_total_memory);
}
void
region_group::add(region* child_r) {
auto child = static_cast<size_tracked_region*>(child_r);
assert(!child->_heap_handle);
child->_heap_handle = std::make_optional(_regions.push(child));
region_group_binomial_group_sanity_check(_regions);
update(child_r->occupancy().total_space());
}
void
region_group::del(region* child_r) {
auto child = static_cast<size_tracked_region*>(child_r);
if (child->_heap_handle) {
_regions.erase(*std::exchange(child->_heap_handle, std::nullopt));
region_group_binomial_group_sanity_check(_regions);
update(-child_r->occupancy().total_space());
}
}
void
region_group::moved(region* old_address, region* new_address) {
auto old_child = static_cast<size_tracked_region*>(old_address);
if (old_child->_heap_handle) {
_regions.erase(*std::exchange(old_child->_heap_handle, std::nullopt));
}
auto new_child = static_cast<size_tracked_region*>(new_address);
// set the old child handle since it's going to be moved
// to the new child's handle by the respective move constructor /
// assignment operator.
old_child->_heap_handle = std::make_optional(_regions.push(new_child));
region_group_binomial_group_sanity_check(_regions);
}
bool
region_group::execution_permitted() noexcept {
return do_for_each_parent(this, [] (auto rg) noexcept {
return rg->under_pressure() ? stop_iteration::yes : stop_iteration::no;
}) == nullptr;
}
future<>
region_group::start_releaser(scheduling_group deferred_work_sg) {
return with_scheduling_group(deferred_work_sg, [this] {
return yield().then([this] {
return repeat([this] () noexcept {
if (_shutdown_requested) {
return make_ready_future<stop_iteration>(stop_iteration::yes);
}
if (!_blocked_requests.empty() && execution_permitted()) {
auto req = std::move(_blocked_requests.front());
_blocked_requests.pop_front();
req->allocate();
return make_ready_future<stop_iteration>(stop_iteration::no);
} else {
// Block reclaiming to prevent signal() from being called by reclaimer inside wait()
// FIXME: handle allocation failures (not very likely) like allocating_section does
tracker_reclaimer_lock rl(logalloc::shard_tracker());
return _relief.wait().then([] {
return stop_iteration::no;
});
}
});
});
});
}
region_group::region_group(sstring name, region_group *parent,
region_group_reclaimer& reclaimer, scheduling_group deferred_work_sg)
: _parent(parent)
, _reclaimer(reclaimer)
, _blocked_requests(on_request_expiry{std::move(name)})
, _releaser(reclaimer_can_block() ? start_releaser(deferred_work_sg) : make_ready_future<>())
{
if (_parent) {
_parent->add(this);
}
}
bool region_group::reclaimer_can_block() const {
return _reclaimer.throttle_threshold() != std::numeric_limits<size_t>::max();
}
void region_group::notify_relief() {
_relief.signal();
for (region_group* child : _subgroups) {
child->notify_relief();
}
}
void region_group::update(ssize_t delta) {
// Most-enclosing group which was relieved.
region_group* top_relief = nullptr;
do_for_each_parent(this, [&top_relief, delta] (region_group* rg) mutable {
rg->_total_memory += delta;
if (rg->_total_memory >= rg->_reclaimer.soft_limit_threshold()) {
rg->_reclaimer.notify_soft_pressure();
} else {
rg->_reclaimer.notify_soft_relief();
}
if (rg->_total_memory > rg->_reclaimer.throttle_threshold()) {
rg->_reclaimer.notify_pressure();
} else if (rg->_reclaimer.under_pressure()) {
rg->_reclaimer.notify_relief();
top_relief = rg;
}
return stop_iteration::no;
});
if (top_relief) {
top_relief->notify_relief();
}
}
void region_group::on_request_expiry::operator()(std::unique_ptr<allocating_function>& func) noexcept {
func->fail(std::make_exception_ptr(blocked_requests_timed_out_error{_name}));
}
}