From ee720fa23b181a9fa53c0980ce4eaa5bdf69f0cc Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Wed, 22 Jun 2022 18:57:03 +0300 Subject: [PATCH] logalloc: relax lifetime rules around region_listener Currently, a region_listener is added during construction and removed during destruction. This was done to mimick the old region(region_group&) constructor, as region_listener replaces region_group. However, this makes moving the binomial heap handle outside logalloc difficult. The natural place for the handle is in a derived class of logalloc::region (e.g. memtable), but members of this derived class will be destroyed earlier than the logalloc::region here. We could play trickes with an earlier base class but it's better to just decouple region lifecycle from listener lifecycle. Do that be adding listen()/unlisten() methods. Some small awkwardness remains in that merge() implicitly unlistens (see comment in region::unlisten). Unit tests are adjusted. --- replica/memtable.cc | 4 +- test/boost/dirty_memory_manager_test.cc | 59 ++++++++++++++++--------- utils/logalloc.cc | 30 ++++++++++--- utils/logalloc.hh | 4 +- 4 files changed, 70 insertions(+), 27 deletions(-) diff --git a/replica/memtable.cc b/replica/memtable.cc index 21e570deb3..95d9c4d251 100644 --- a/replica/memtable.cc +++ b/replica/memtable.cc @@ -116,7 +116,7 @@ void memtable::memtable_encoding_stats_collector::update(const ::schema& s, cons memtable::memtable(schema_ptr schema, dirty_memory_manager& dmm, replica::table_stats& table_stats, memtable_list* memtable_list, seastar::scheduling_group compaction_scheduling_group) - : logalloc::region(dmm.region_group()) + : logalloc::region() , _dirty_mgr(dmm) , _cleaner(*this, no_cache_tracker, table_stats.memtable_app_stats, compaction_scheduling_group, [this] (size_t freed) { remove_flushed_memory(freed); }) @@ -124,6 +124,7 @@ memtable::memtable(schema_ptr schema, dirty_memory_manager& dmm, replica::table_ , _schema(std::move(schema)) , partitions(dht::raw_token_less_comparator{}) , _table_stats(table_stats) { + logalloc::region::listen(&dmm.region_group()); } static thread_local dirty_memory_manager mgr_for_tests; @@ -136,6 +137,7 @@ memtable::memtable(schema_ptr schema) memtable::~memtable() { revert_flushed_memory(); clear(); + logalloc::region::unlisten(); } uint64_t memtable::dirty_size() const { diff --git a/test/boost/dirty_memory_manager_test.cc b/test/boost/dirty_memory_manager_test.cc index 3c97ebfd5b..7ae3eeb8be 100644 --- a/test/boost/dirty_memory_manager_test.cc +++ b/test/boost/dirty_memory_manager_test.cc @@ -53,10 +53,14 @@ SEASTAR_TEST_CASE(test_region_groups) { region_group all; region_group one_and_two("one_and_two", &all); - auto one = std::make_unique(one_and_two); - auto two = std::make_unique(one_and_two); - auto three = std::make_unique(all); - auto four = std::make_unique(just_four); + auto one = std::make_unique(); + one->listen(&one_and_two); + auto two = std::make_unique(); + two->listen(&one_and_two); + auto three = std::make_unique(); + three->listen(&all); + auto four = std::make_unique(); + four->listen(&just_four); auto five = std::make_unique(); constexpr size_t base_count = 16 * 1024; @@ -176,7 +180,7 @@ struct test_region_group: public region_group { }; struct test_region: public logalloc::region { - test_region(test_region_group& rg) : logalloc::region(rg) {} + test_region() : logalloc::region() {} ~test_region() { clear(); } @@ -212,7 +216,8 @@ SEASTAR_TEST_CASE(test_region_groups_basic_throttling) { // singleton hierarchy, only one segment allowed test_region_group simple(simple_reclaimer); - auto simple_region = std::make_unique(simple); + auto simple_region = std::make_unique(); + simple_region->listen(&simple); // Expectation: after first allocation region will have one segment, // memory_used() == throttle_threshold and we are good to go, future @@ -228,7 +233,8 @@ SEASTAR_TEST_CASE(test_region_groups_basic_throttling) { BOOST_REQUIRE_EQUAL(fut.available(), true); BOOST_REQUIRE_EQUAL(simple.memory_used(), logalloc::segment_size); - auto big_region = std::make_unique(simple); + auto big_region = std::make_unique(); + big_region->listen(&simple); // Allocate a big chunk, that will certainly get us over the threshold big_region->alloc(); @@ -270,8 +276,10 @@ SEASTAR_TEST_CASE(test_region_groups_linear_hierarchy_throttling_child_alloc) { test_region_group parent(parent_reclaimer); test_region_group child(&parent, child_reclaimer); - auto child_region = std::make_unique(child); - auto parent_region = std::make_unique(parent); + auto child_region = std::make_unique(); + child_region->listen(&child); + auto parent_region = std::make_unique(); + parent_region->listen(&parent); child_region->alloc(); BOOST_REQUIRE_GE(parent.memory_used(), logalloc::segment_size); @@ -301,7 +309,8 @@ SEASTAR_TEST_CASE(test_region_groups_linear_hierarchy_throttling_parent_alloc) { test_region_group parent(simple_reclaimer); test_region_group child(&parent, simple_reclaimer); - auto parent_region = std::make_unique(parent); + auto parent_region = std::make_unique(); + parent_region->listen(&parent); parent_region->alloc(); BOOST_REQUIRE_GE(parent.memory_used(), logalloc::segment_size); @@ -321,7 +330,8 @@ SEASTAR_TEST_CASE(test_region_groups_fifo_order) { test_region_group rg(simple_reclaimer); - auto region = std::make_unique(rg); + auto region = std::make_unique(); + region->listen(&rg); // fill the parent. Try allocating at child level. Should not be allowed. region->alloc(); @@ -355,8 +365,10 @@ SEASTAR_TEST_CASE(test_region_groups_linear_hierarchy_throttling_moving_restrict test_region_group inner(&root, simple_reclaimer); test_region_group child(&inner, simple_reclaimer); - auto inner_region = std::make_unique(inner); - auto root_region = std::make_unique(root); + auto inner_region = std::make_unique(); + inner_region->listen(&inner); + auto root_region = std::make_unique(); + root_region->listen(&root); // fill the inner node. Try allocating at child level. Should not be allowed. circular_buffer big_alloc; @@ -407,8 +419,10 @@ SEASTAR_TEST_CASE(test_region_groups_tree_hierarchy_throttling_leaf_alloc) { leaf(test_region_group& parent) : _leaf_reclaimer(logalloc::segment_size) , _rg(&parent, _leaf_reclaimer) - , _region(std::make_unique(_rg)) - {} + , _region(std::make_unique()) + { + _region->listen(&_rg); + } void alloc(size_t size) { _region->alloc(size); @@ -420,7 +434,8 @@ SEASTAR_TEST_CASE(test_region_groups_tree_hierarchy_throttling_leaf_alloc) { }, db::no_timeout); } void reset() { - _region.reset(new test_region(_rg)); + _region.reset(new test_region()); + _region->listen(&_rg); } }; @@ -467,10 +482,11 @@ class test_async_reclaim_region { region_group& _rg; public: test_async_reclaim_region(region_group& rg, size_t alloc_size) - : _region(rg) + : _region() , _alloc_size(alloc_size) , _rg(rg) { + _region.listen(&rg); with_allocator(_region.allocator(), [this] { _alloc.push_back(managed_bytes(bytes(bytes::initialized_later(), this->_alloc_size))); }); @@ -488,7 +504,8 @@ public: with_allocator(_region.allocator(), [this] { std::vector().swap(_alloc); }); - _region = logalloc::region(_rg); + _region = logalloc::region(); + _region.listen(&_rg); return this->_alloc_size; } static test_async_reclaim_region& from_region(region* region_ptr) { @@ -724,7 +741,8 @@ SEASTAR_TEST_CASE(test_no_crash_when_a_lot_of_requests_released_which_change_reg region_group_reclaimer recl(threshold, threshold); region_group gr(test_name, recl); auto close_gr = defer([&gr] () noexcept { gr.shutdown().get(); }); - region r(gr); + region r; + r.listen(&gr); with_allocator(r.allocator(), [&] { std::vector objs; @@ -792,7 +810,8 @@ SEASTAR_TEST_CASE(test_reclaiming_runs_as_long_as_there_is_soft_pressure) { reclaimer recl(hard_threshold, soft_threshold); region_group gr(test_name, recl); auto close_gr = defer([&gr] () noexcept { gr.shutdown().get(); }); - region r(gr); + region r; + r.listen(&gr); with_allocator(r.allocator(), [&] { std::vector objs; diff --git a/utils/logalloc.cc b/utils/logalloc.cc index 653f382f2c..a634647ed1 100644 --- a/utils/logalloc.cc +++ b/utils/logalloc.cc @@ -1741,8 +1741,8 @@ private: }; public: - explicit region_impl(region* region, region_listener* listener = nullptr) - : _region(region), _listener(listener), _id(next_id()) + explicit region_impl(region* region) + : _region(region), _id(next_id()) { _buf_ptrs_for_compact_segment.reserve(segment::size / buf_align); _preferred_max_contiguous_allocation = max_managed_object_size; @@ -1780,6 +1780,21 @@ public: return occupancy().used_space() == 0; } + void listen(region_listener* listener) { + _listener = listener; + _listener->add(_region); + } + + void unlisten() { + // _listener may have been removed be merge(), so check for that. + // Yes, it's awkward, we should have the caller unlisten before merge + // to remove implicit behavior. + if (_listener) { + _listener->del(_region); + _listener = nullptr; + } + } + occupancy_stats occupancy() const { occupancy_stats total = _non_lsa_occupancy; total += _closed_occupancy; @@ -2111,9 +2126,14 @@ region::region() : _impl(make_shared(this)) { } -region::region(region_listener& listener) - : _impl(make_shared(this, &listener)) { - listener.add(this); +void +region::listen(region_listener* listener) { + get_impl().listen(listener); +} + +void +region::unlisten() { + get_impl().unlisten(); } region_impl& region::get_impl() { diff --git a/utils/logalloc.hh b/utils/logalloc.hh index c934ed2d7e..df4ae71b9a 100644 --- a/utils/logalloc.hh +++ b/utils/logalloc.hh @@ -301,12 +301,14 @@ private: const region_impl& get_impl() const; public: region(); - explicit region(region_listener& listener); ~region(); region(region&& other); region& operator=(region&& other); region(const region& other) = delete; + void listen(region_listener* listener); + void unlisten(); + occupancy_stats occupancy() const; allocation_strategy& allocator() noexcept {