From 5f4927eaaf5d4c05e4d707ba02a4504db1bdd898 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Chojnowski?= Date: Tue, 6 May 2025 18:43:42 +0200 Subject: [PATCH 1/2] logalloc: make background_reclaimer::free_memory_threshold publicly visible Wanted by the change to the background_reclaim test in the next patch. (cherry picked from commit c47f438db313be54760a0836636ae38511fbeda0) --- utils/logalloc.cc | 2 +- utils/logalloc.hh | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/utils/logalloc.cc b/utils/logalloc.cc index f57d639647..b94ba40f4b 100644 --- a/utils/logalloc.cc +++ b/utils/logalloc.cc @@ -405,7 +405,7 @@ class background_reclaimer { promise<>* _main_loop_wait = nullptr; future<> _done; bool _stopping = false; - static constexpr size_t free_memory_threshold = 60'000'000; + static constexpr size_t free_memory_threshold = background_reclaim_free_memory_threshold; private: bool have_work() const { #ifndef SEASTAR_DEFAULT_ALLOCATOR diff --git a/utils/logalloc.hh b/utils/logalloc.hh index 1685adff4b..21e11e6440 100644 --- a/utils/logalloc.hh +++ b/utils/logalloc.hh @@ -29,6 +29,8 @@ constexpr int segment_size_shift = 17; // 128K; see #151, #152 constexpr size_t segment_size = 1 << segment_size_shift; constexpr size_t max_zone_segments = 256; +constexpr size_t background_reclaim_free_memory_threshold = 60'000'000; + // // Frees some amount of objects from the region to which it's attached. // From c339f464b67284f9e98d55e76a70fa328f67f5a2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Chojnowski?= Date: Tue, 6 May 2025 18:45:46 +0200 Subject: [PATCH 2/2] logalloc_test: don't test performance in test `background_reclaim` The test is failing in CI sometimes due to performance reasons. There are at least two problems: 1. The initial 500ms (wall time) sleep might be too short. If the reclaimer doesn't manage to evict enough memory during this time, the test will fail. 2. During the 100ms (thread CPU time) window given by the test to background reclaim, the `background_reclaim` scheduling group isn't actually guaranteed to get any CPU, regardless of shares. If the process is switched out inside the `background_reclaim` group, it might accumulate so much vruntime that it won't get any more CPU again for a long time. We have seen both. This kind of timing test can't be run reliably on overcommitted machines without modifying the Seastar scheduler to support that (by e.g. using thread clock instead of wall time clock in the scheduler), and that would require an amount of effort disproportionate to the value of the test. So for now, to unflake the test, this patch removes the performance test part. (And the tradeoff is a weakening of the test). (cherry picked from commit 1c1741cfbc400538bf2769489f087c3667b7f5d5) --- test/boost/logalloc_test.cc | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/test/boost/logalloc_test.cc b/test/boost/logalloc_test.cc index 38a2dd54fc..d7240d9e1a 100644 --- a/test/boost/logalloc_test.cc +++ b/test/boost/logalloc_test.cc @@ -812,27 +812,20 @@ SEASTAR_THREAD_TEST_CASE(background_reclaim) { logalloc::shard_tracker().stop().get(); }); - sleep(500ms).get(); // sleep a little, to give the reclaimer a head start - std::vector std_allocs; size_t std_alloc_size = 1000000; // note that managed_bytes fragments these, even in std for (int i = 0; i < 50; ++i) { + // Background reclaim is supposed to eventually ensure a certain amount of free memory. + while (memory::free_memory() < background_reclaim_free_memory_threshold) { + thread::maybe_yield(); + } + auto compacted_pre = logalloc::shard_tracker().statistics().memory_compacted; fmt::print("compacted {} items {} (pre)\n", compacted_pre, evictable_allocs.size()); std_allocs.emplace_back(managed_bytes::initialized_later(), std_alloc_size); auto compacted_post = logalloc::shard_tracker().statistics().memory_compacted; fmt::print("compacted {} items {} (post)\n", compacted_post, evictable_allocs.size()); BOOST_REQUIRE_EQUAL(compacted_pre, compacted_post); - - // Pretend to do some work. Sleeping would be too easy, as the background reclaim group would use - // all that time. - // - // Use thread_cputime_clock to prevent overcommitted test machines from stealing CPU time - // and causing test failures. - auto deadline = thread_cputime_clock::now() + 100ms; - while (thread_cputime_clock::now() < deadline) { - thread::maybe_yield(); - } } }