Compare commits
7 Commits
debug_form
...
copilot/us
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
f9f7b282fc | ||
|
|
ba9122a374 | ||
|
|
c0e85178e1 | ||
|
|
5d8f122cb7 | ||
|
|
02c631a8ea | ||
|
|
481d5ae2e5 | ||
|
|
76c6354f9f |
@@ -597,6 +597,7 @@ scylla_tests = set([
|
||||
'test/boost/logalloc_standard_allocator_segment_pool_backend_test',
|
||||
'test/boost/logalloc_test',
|
||||
'test/boost/lru_string_map_test',
|
||||
'test/boost/lru_test',
|
||||
'test/boost/managed_bytes_test',
|
||||
'test/boost/managed_vector_test',
|
||||
'test/boost/map_difference_test',
|
||||
@@ -1583,6 +1584,7 @@ pure_boost_tests = set([
|
||||
'test/boost/like_matcher_test',
|
||||
'test/boost/linearizing_input_stream_test',
|
||||
'test/boost/lru_string_map_test',
|
||||
'test/boost/lru_test',
|
||||
'test/boost/map_difference_test',
|
||||
'test/boost/nonwrapping_interval_test',
|
||||
'test/boost/observable_test',
|
||||
|
||||
@@ -214,11 +214,7 @@ void cache_tracker::clear() {
|
||||
}
|
||||
|
||||
void cache_tracker::touch(rows_entry& e) {
|
||||
// last dummy may not be linked if evicted
|
||||
if (e.is_linked()) {
|
||||
_lru.remove(e);
|
||||
}
|
||||
_lru.add(e);
|
||||
_lru.touch(e);
|
||||
}
|
||||
|
||||
void cache_tracker::insert(cache_entry& entry) {
|
||||
|
||||
@@ -10,7 +10,27 @@ Cache is always paired with its underlying mutation source which it mirrors. Tha
|
||||
|
||||
Eviction is about removing parts of the data from memory and recording the fact that information about those parts is missing. Eviction doesn't change the set of writes represented by cache as part of its `mutation_source` interface.
|
||||
|
||||
The smallest object which can be evicted, called eviction unit, is currently a single row (`rows_entry`). Eviction units are linked in an LRU owned by a `cache_tracker`. The LRU determines eviction order. The LRU is shared among many tables. Currently, there is one per `database`.
|
||||
The smallest object which can be evicted, called eviction unit, is currently a single row (`rows_entry`). Eviction units are managed by a W-TinyLFU policy owned by a `cache_tracker`. The W-TinyLFU policy determines eviction order. It is shared among many tables. Currently, there is one per `database`.
|
||||
|
||||
### W-TinyLFU Eviction Policy
|
||||
|
||||
The cache uses a W-TinyLFU (Window Tiny Least Frequently Used) eviction policy,
|
||||
which combines recency and frequency information for better hit rates than plain LRU.
|
||||
|
||||
The policy organizes entries into three segments:
|
||||
|
||||
- **Window** (~1% of cache): A small LRU that admits all new entries. This allows
|
||||
new entries to build up frequency information before competing for main cache space.
|
||||
- **Probation** (~19% of cache): Part of the main SLRU cache. Entries from the window
|
||||
compete with probation victims for admission using a TinyLFU frequency filter.
|
||||
- **Protected** (~80% of cache): The other part of the main SLRU cache. Entries are
|
||||
promoted here from probation when accessed again.
|
||||
|
||||
The TinyLFU frequency filter uses a Count-Min Sketch to compactly estimate access
|
||||
frequency. When eviction is needed, the window victim competes with the probation
|
||||
victim: the entry with higher estimated frequency survives in probation while the
|
||||
other is evicted. The sketch is periodically aged (all counts halved) to adapt to
|
||||
changing access patterns.
|
||||
|
||||
All `rows_entry` objects which are owned by a `cache_tracker` are assumed to be either contained in a cache (in some `row_cache::partitions_type`) or
|
||||
be owned by a (detached) `partition_snapshot`. When the last row from a `partition_entry` is evicted, the containing `cache_entry` is evicted from the cache.
|
||||
|
||||
287
test/boost/lru_test.cc
Normal file
287
test/boost/lru_test.cc
Normal file
@@ -0,0 +1,287 @@
|
||||
/*
|
||||
* Copyright (C) 2024-present ScyllaDB
|
||||
*/
|
||||
|
||||
/*
|
||||
* SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
|
||||
*/
|
||||
|
||||
#define BOOST_TEST_MODULE lru
|
||||
|
||||
#include <boost/test/unit_test.hpp>
|
||||
#include <vector>
|
||||
#include <algorithm>
|
||||
#include <memory>
|
||||
|
||||
#include "utils/count_min_sketch.hh"
|
||||
#include "utils/lru.hh"
|
||||
|
||||
// A concrete evictable for testing.
|
||||
struct test_evictable final : public evictable {
|
||||
int id;
|
||||
bool was_evicted = false;
|
||||
|
||||
explicit test_evictable(int id) : id(id) {}
|
||||
|
||||
void on_evicted() noexcept override {
|
||||
was_evicted = true;
|
||||
}
|
||||
};
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Count-Min Sketch Tests
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
// Width = 2^test_sketch_width_log2 = 1024 counters per row.
|
||||
static constexpr size_t test_sketch_width_log2 = 10;
|
||||
|
||||
BOOST_AUTO_TEST_CASE(test_count_min_sketch_basic) {
|
||||
utils::count_min_sketch sketch(test_sketch_width_log2);
|
||||
|
||||
// An unseen key should have estimate 0.
|
||||
BOOST_REQUIRE_EQUAL(sketch.estimate(42), 0);
|
||||
|
||||
sketch.increment(42);
|
||||
BOOST_REQUIRE_EQUAL(sketch.estimate(42), 1);
|
||||
|
||||
sketch.increment(42);
|
||||
sketch.increment(42);
|
||||
BOOST_REQUIRE_EQUAL(sketch.estimate(42), 3);
|
||||
|
||||
// A different key should be independent.
|
||||
BOOST_REQUIRE_EQUAL(sketch.estimate(100), 0);
|
||||
sketch.increment(100);
|
||||
BOOST_REQUIRE_EQUAL(sketch.estimate(100), 1);
|
||||
BOOST_REQUIRE_EQUAL(sketch.estimate(42), 3);
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(test_count_min_sketch_max_counter) {
|
||||
utils::count_min_sketch sketch(test_sketch_width_log2);
|
||||
|
||||
for (int i = 0; i < 20; ++i) {
|
||||
sketch.increment(1);
|
||||
}
|
||||
// 4-bit counter caps at 15.
|
||||
BOOST_REQUIRE_EQUAL(sketch.estimate(1), 15);
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(test_count_min_sketch_decay) {
|
||||
utils::count_min_sketch sketch(test_sketch_width_log2);
|
||||
|
||||
sketch.increment(1);
|
||||
sketch.increment(1);
|
||||
sketch.increment(1);
|
||||
sketch.increment(1); // freq = 4
|
||||
BOOST_REQUIRE_EQUAL(sketch.estimate(1), 4);
|
||||
|
||||
sketch.decay(); // halve → 2
|
||||
BOOST_REQUIRE_EQUAL(sketch.estimate(1), 2);
|
||||
|
||||
sketch.decay(); // halve → 1
|
||||
BOOST_REQUIRE_EQUAL(sketch.estimate(1), 1);
|
||||
|
||||
sketch.decay(); // halve → 0
|
||||
BOOST_REQUIRE_EQUAL(sketch.estimate(1), 0);
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// W-TinyLFU LRU Tests
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
BOOST_AUTO_TEST_CASE(test_lru_add_and_evict) {
|
||||
lru l;
|
||||
test_evictable e1(1), e2(2), e3(3);
|
||||
|
||||
l.add(e1);
|
||||
l.add(e2);
|
||||
l.add(e3);
|
||||
|
||||
BOOST_REQUIRE(e1.is_linked());
|
||||
BOOST_REQUIRE(e2.is_linked());
|
||||
BOOST_REQUIRE(e3.is_linked());
|
||||
|
||||
// Evict removes at least one entry.
|
||||
auto r = l.evict();
|
||||
BOOST_REQUIRE(r == seastar::memory::reclaiming_result::reclaimed_something);
|
||||
|
||||
// At least one entry should have been evicted.
|
||||
int evicted_count = (e1.was_evicted ? 1 : 0) + (e2.was_evicted ? 1 : 0) + (e3.was_evicted ? 1 : 0);
|
||||
BOOST_REQUIRE_GE(evicted_count, 1);
|
||||
|
||||
// Clean up remaining linked entries.
|
||||
if (e1.is_linked()) l.remove(e1);
|
||||
if (e2.is_linked()) l.remove(e2);
|
||||
if (e3.is_linked()) l.remove(e3);
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(test_lru_evict_empty) {
|
||||
lru l;
|
||||
auto r = l.evict();
|
||||
BOOST_REQUIRE(r == seastar::memory::reclaiming_result::reclaimed_nothing);
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(test_lru_touch_keeps_entry_alive) {
|
||||
lru l;
|
||||
|
||||
// Create entries with different access patterns.
|
||||
test_evictable hot(1), cold1(2), cold2(3);
|
||||
|
||||
l.add(hot);
|
||||
l.add(cold1);
|
||||
l.add(cold2);
|
||||
|
||||
// Touch 'hot' many times to build frequency.
|
||||
for (int i = 0; i < 10; ++i) {
|
||||
l.touch(hot);
|
||||
}
|
||||
|
||||
// Evict all - the hot entry may survive longer than cold entries.
|
||||
l.evict();
|
||||
l.evict();
|
||||
|
||||
// Hot entry should still be linked (survived eviction of cold entries).
|
||||
BOOST_REQUIRE(hot.is_linked());
|
||||
|
||||
// Clean up.
|
||||
l.remove(hot);
|
||||
// cold entries may or may not still be linked, clean up if needed.
|
||||
if (cold1.is_linked()) l.remove(cold1);
|
||||
if (cold2.is_linked()) l.remove(cold2);
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(test_lru_evict_all) {
|
||||
lru l;
|
||||
test_evictable e1(1), e2(2), e3(3);
|
||||
|
||||
l.add(e1);
|
||||
l.add(e2);
|
||||
l.add(e3);
|
||||
|
||||
l.evict_all();
|
||||
|
||||
BOOST_REQUIRE(!e1.is_linked());
|
||||
BOOST_REQUIRE(!e2.is_linked());
|
||||
BOOST_REQUIRE(!e3.is_linked());
|
||||
BOOST_REQUIRE(e1.was_evicted);
|
||||
BOOST_REQUIRE(e2.was_evicted);
|
||||
BOOST_REQUIRE(e3.was_evicted);
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(test_lru_remove) {
|
||||
lru l;
|
||||
test_evictable e1(1), e2(2), e3(3);
|
||||
|
||||
l.add(e1);
|
||||
l.add(e2);
|
||||
l.add(e3);
|
||||
|
||||
l.remove(e2);
|
||||
BOOST_REQUIRE(!e2.is_linked());
|
||||
BOOST_REQUIRE(!e2.was_evicted); // remove does not call on_evicted
|
||||
|
||||
l.evict_all();
|
||||
BOOST_REQUIRE(e1.was_evicted);
|
||||
BOOST_REQUIRE(e3.was_evicted);
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(test_lru_add_before) {
|
||||
lru l;
|
||||
test_evictable e1(1), e2(2), e3(3);
|
||||
|
||||
l.add(e1);
|
||||
l.add(e2);
|
||||
|
||||
// Insert e3 before e2 so e3 is evicted before e2.
|
||||
l.add_before(e2, e3);
|
||||
|
||||
BOOST_REQUIRE(e1.is_linked());
|
||||
BOOST_REQUIRE(e2.is_linked());
|
||||
BOOST_REQUIRE(e3.is_linked());
|
||||
|
||||
// Clean up.
|
||||
l.evict_all();
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(test_lru_frequency_based_eviction) {
|
||||
lru l;
|
||||
|
||||
// Create entries with different access patterns.
|
||||
// Use a fixed-size array to avoid move construction issues.
|
||||
static constexpr int N = 20;
|
||||
std::unique_ptr<test_evictable> entries[N];
|
||||
for (int i = 0; i < N; ++i) {
|
||||
entries[i] = std::make_unique<test_evictable>(i);
|
||||
}
|
||||
|
||||
for (int i = 0; i < N; ++i) {
|
||||
l.add(*entries[i]);
|
||||
}
|
||||
|
||||
// Touch entries 15-19 many times (they should be "hot").
|
||||
for (int round = 0; round < 10; ++round) {
|
||||
for (int i = 15; i < N; ++i) {
|
||||
l.touch(*entries[i]);
|
||||
}
|
||||
}
|
||||
|
||||
// Evict half the entries.
|
||||
for (int i = 0; i < 10; ++i) {
|
||||
l.evict();
|
||||
}
|
||||
|
||||
// Hot entries (15-19) should still be linked.
|
||||
for (int i = 15; i < N; ++i) {
|
||||
BOOST_REQUIRE_MESSAGE(entries[i]->is_linked(),
|
||||
"Hot entry " << i << " should survive eviction");
|
||||
}
|
||||
|
||||
// Clean up remaining entries.
|
||||
for (int i = 0; i < N; ++i) {
|
||||
if (entries[i]->is_linked()) {
|
||||
l.remove(*entries[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(test_lru_touch_promotes_from_probation) {
|
||||
lru l;
|
||||
|
||||
// Create entries.
|
||||
static constexpr int N = 10;
|
||||
std::unique_ptr<test_evictable> entries[N];
|
||||
for (int i = 0; i < N; ++i) {
|
||||
entries[i] = std::make_unique<test_evictable>(i);
|
||||
}
|
||||
for (int i = 0; i < N; ++i) {
|
||||
l.add(*entries[i]);
|
||||
}
|
||||
|
||||
// Evict and re-add some to force entries into probation via the eviction logic.
|
||||
// The eviction drains excess from window to probation.
|
||||
// After enough evictions, remaining entries should be in probation or protected.
|
||||
|
||||
// Touch entries 0-4 multiple times to build frequency.
|
||||
for (int round = 0; round < 5; ++round) {
|
||||
for (int i = 0; i < 5; ++i) {
|
||||
l.touch(*entries[i]);
|
||||
}
|
||||
}
|
||||
|
||||
// Evict 5 entries - cold entries (5-9) should be evicted.
|
||||
for (int i = 0; i < 5; ++i) {
|
||||
l.evict();
|
||||
}
|
||||
|
||||
// Entries 0-4 (frequently touched) should survive.
|
||||
for (int i = 0; i < 5; ++i) {
|
||||
BOOST_REQUIRE_MESSAGE(entries[i]->is_linked(),
|
||||
"Frequently touched entry " << i << " should survive eviction");
|
||||
}
|
||||
|
||||
// Clean up.
|
||||
for (int i = 0; i < N; ++i) {
|
||||
if (entries[i]->is_linked()) {
|
||||
l.remove(*entries[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
108
utils/count_min_sketch.hh
Normal file
108
utils/count_min_sketch.hh
Normal file
@@ -0,0 +1,108 @@
|
||||
/*
|
||||
* Copyright (C) 2024-present ScyllaDB
|
||||
*/
|
||||
|
||||
/*
|
||||
* SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
#include <cstddef>
|
||||
#include <vector>
|
||||
#include <algorithm>
|
||||
|
||||
namespace utils {
|
||||
|
||||
/// A Count-Min Sketch with 4-bit counters for frequency estimation.
|
||||
///
|
||||
/// Used by the W-TinyLFU cache admission policy to estimate access frequency.
|
||||
/// Each counter is 4 bits (max value 15), and counters are packed 16 per
|
||||
/// uint64_t word. The sketch uses 4 independent hash functions (rows) and
|
||||
/// returns the minimum count across all rows for frequency estimation.
|
||||
class count_min_sketch {
|
||||
static constexpr size_t depth = 4;
|
||||
static constexpr uint64_t reset_mask = 0x7777777777777777ULL;
|
||||
|
||||
// Hash seeds from splitmix64 sequence, chosen for low correlation between rows.
|
||||
static constexpr uint64_t seeds[depth] = {
|
||||
0x9e3779b97f4a7c15ULL,
|
||||
0xbf58476d1ce4e5b9ULL,
|
||||
0x94d049bb133111ebULL,
|
||||
0xd6e8feb86659fd93ULL,
|
||||
};
|
||||
|
||||
std::vector<uint64_t> _table;
|
||||
size_t _width;
|
||||
size_t _width_mask;
|
||||
size_t _words_per_row;
|
||||
|
||||
static uint64_t mix(uint64_t key, uint64_t seed) noexcept {
|
||||
uint64_t h = key * seed;
|
||||
h ^= h >> 32;
|
||||
h *= 0xd6e8feb86659fd93ULL;
|
||||
h ^= h >> 32;
|
||||
return h;
|
||||
}
|
||||
|
||||
size_t counter_index(size_t row, uint64_t key) const noexcept {
|
||||
return mix(key, seeds[row]) & _width_mask;
|
||||
}
|
||||
|
||||
static uint8_t get_counter(uint64_t word, size_t pos) noexcept {
|
||||
return (word >> (pos * 4)) & 0x0FULL;
|
||||
}
|
||||
|
||||
size_t word_index(size_t row, size_t col) const noexcept {
|
||||
return row * _words_per_row + col / 16;
|
||||
}
|
||||
|
||||
public:
|
||||
/// Construct a sketch with the given number of counters per row.
|
||||
/// \param width_log2 Log base 2 of the number of counters per row.
|
||||
/// Total memory is approximately depth * 2^width_log2 / 2 bytes.
|
||||
explicit count_min_sketch(size_t width_log2 = 16)
|
||||
: _width(size_t(1) << width_log2)
|
||||
, _width_mask(_width - 1)
|
||||
, _words_per_row(_width / 16)
|
||||
{
|
||||
_table.resize(depth * _words_per_row, 0);
|
||||
}
|
||||
|
||||
void increment(uint64_t key) noexcept {
|
||||
for (size_t row = 0; row < depth; ++row) {
|
||||
size_t col = counter_index(row, key);
|
||||
size_t wi = word_index(row, col);
|
||||
size_t pos = col & 15;
|
||||
uint8_t val = get_counter(_table[wi], pos);
|
||||
if (val < 15) {
|
||||
_table[wi] += (1ULL << (pos * 4));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
uint8_t estimate(uint64_t key) const noexcept {
|
||||
uint8_t min_val = 15;
|
||||
for (size_t row = 0; row < depth; ++row) {
|
||||
size_t col = counter_index(row, key);
|
||||
size_t wi = word_index(row, col);
|
||||
size_t pos = col & 15;
|
||||
min_val = std::min(min_val, get_counter(_table[wi], pos));
|
||||
}
|
||||
return min_val;
|
||||
}
|
||||
|
||||
/// Halve all counters (frequency decay / aging).
|
||||
/// This is NOT a full clear — it preserves relative frequency ordering
|
||||
/// while allowing the sketch to adapt to changing access patterns.
|
||||
void decay() noexcept {
|
||||
for (auto& word : _table) {
|
||||
word = (word >> 1) & reset_mask;
|
||||
}
|
||||
}
|
||||
|
||||
size_t width() const noexcept { return _width; }
|
||||
};
|
||||
|
||||
} // namespace utils
|
||||
310
utils/lru.hh
310
utils/lru.hh
@@ -9,8 +9,18 @@
|
||||
#pragma once
|
||||
|
||||
#include "utils/assert.hh"
|
||||
#include "utils/count_min_sketch.hh"
|
||||
#include <boost/intrusive/list.hpp>
|
||||
#include <seastar/core/memory.hh>
|
||||
#include <algorithm>
|
||||
|
||||
// Identifies which W-TinyLFU segment an evictable belongs to.
|
||||
enum class lru_segment : uint8_t {
|
||||
none = 0,
|
||||
window = 1,
|
||||
probation = 2,
|
||||
protected_ = 3,
|
||||
};
|
||||
|
||||
class evictable {
|
||||
friend class lru;
|
||||
@@ -32,6 +42,11 @@ protected:
|
||||
static_assert(std::is_nothrow_constructible_v<lru_link_type, lru_link_type&&>);
|
||||
private:
|
||||
lru_link_type _lru_link;
|
||||
// Stable key for frequency estimation in the Count-Min Sketch.
|
||||
// Assigned when the entry is first added to the LRU, preserved across LSA
|
||||
// compaction moves (which change the object address but preserve members).
|
||||
uint32_t _frequency_hash = 0;
|
||||
lru_segment _segment = lru_segment::none;
|
||||
protected:
|
||||
// Prevent destruction via evictable pointer. LRU is not aware of allocation strategy.
|
||||
// Prevent destruction of a linked evictable. While we could unlink the evictable here
|
||||
@@ -54,6 +69,8 @@ public:
|
||||
|
||||
void swap(evictable& o) noexcept {
|
||||
_lru_link.swap_nodes(o._lru_link);
|
||||
std::swap(_frequency_hash, o._frequency_hash);
|
||||
std::swap(_segment, o._segment);
|
||||
}
|
||||
|
||||
virtual bool is_index() const noexcept {
|
||||
@@ -76,13 +93,27 @@ class index_evictable : public evictable {
|
||||
}
|
||||
};
|
||||
|
||||
// Implements LRU cache replacement for row cache and sstable index cache.
|
||||
// Implements W-TinyLFU cache replacement for row cache and sstable index cache.
|
||||
//
|
||||
// W-TinyLFU uses a small admission window backed by an LRU and a main cache
|
||||
// organized as a Segmented LRU (SLRU) with probation and protected segments.
|
||||
// Admission to the main cache is controlled by a TinyLFU frequency filter
|
||||
// implemented via a Count-Min Sketch.
|
||||
//
|
||||
// New entries enter the window. When eviction is needed, the window victim
|
||||
// competes with the probation victim: the entry with higher estimated
|
||||
// frequency survives in probation while the other is evicted.
|
||||
// Touching an entry in probation promotes it to the protected segment.
|
||||
// When the protected segment exceeds its target size, the least-recently-used
|
||||
// protected entry is demoted back to probation.
|
||||
class lru {
|
||||
private:
|
||||
using lru_type = boost::intrusive::list<evictable,
|
||||
boost::intrusive::member_hook<evictable, evictable::lru_link_type, &evictable::_lru_link>,
|
||||
boost::intrusive::constant_time_size<false>>; // we need this to have bi::auto_unlink on hooks.
|
||||
lru_type _list;
|
||||
lru_type _window;
|
||||
lru_type _probation;
|
||||
lru_type _protected;
|
||||
|
||||
// See the comment to index_evictable.
|
||||
using index_lru_type = boost::intrusive::list<index_evictable,
|
||||
@@ -92,24 +123,225 @@ private:
|
||||
|
||||
using reclaiming_result = seastar::memory::reclaiming_result;
|
||||
|
||||
public:
|
||||
~lru() {
|
||||
while (!_list.empty()) {
|
||||
evictable& e = _list.front();
|
||||
remove(e);
|
||||
e.on_evicted();
|
||||
static constexpr size_t sketch_width_log2 = 16;
|
||||
static constexpr size_t sketch_width = size_t(1) << sketch_width_log2;
|
||||
static constexpr size_t sample_threshold = sketch_width * 10;
|
||||
// Window segment target: ~1% of total cache entries.
|
||||
static constexpr size_t window_percent = 1;
|
||||
// Protected segment target: ~80% of total cache entries.
|
||||
static constexpr size_t protected_percent = 80;
|
||||
|
||||
utils::count_min_sketch _sketch{sketch_width_log2};
|
||||
size_t _window_size = 0;
|
||||
size_t _probation_size = 0;
|
||||
size_t _protected_size = 0;
|
||||
size_t _sample_count = 0;
|
||||
// Monotonic counter for assigning stable frequency hash keys.
|
||||
// Using object addresses as sketch keys is incorrect because LSA
|
||||
// relocates objects during compaction, changing their address.
|
||||
uint32_t _next_hash = 0;
|
||||
|
||||
size_t total_size() const noexcept {
|
||||
return _window_size + _probation_size + _protected_size;
|
||||
}
|
||||
|
||||
size_t max_window_size() const noexcept {
|
||||
return std::max(size_t(1), total_size() * window_percent / 100);
|
||||
}
|
||||
|
||||
size_t max_protected_size() const noexcept {
|
||||
return total_size() * protected_percent / 100;
|
||||
}
|
||||
|
||||
static uint64_t entry_key(const evictable& e) noexcept {
|
||||
return e._frequency_hash;
|
||||
}
|
||||
|
||||
void assign_frequency_hash(evictable& e) noexcept {
|
||||
// Only assign a new hash if the entry doesn't have one yet.
|
||||
// Re-added entries (after remove()) keep their existing hash
|
||||
// to preserve frequency tracking across remove/add cycles.
|
||||
if (e._frequency_hash == 0) {
|
||||
// Skip 0 on wrap-around to keep it as the "unassigned" sentinel.
|
||||
if (++_next_hash == 0) {
|
||||
++_next_hash;
|
||||
}
|
||||
e._frequency_hash = _next_hash;
|
||||
}
|
||||
}
|
||||
|
||||
void record_access(const evictable& e) noexcept {
|
||||
_sketch.increment(entry_key(e));
|
||||
if (++_sample_count >= sample_threshold) {
|
||||
_sketch.decay();
|
||||
_sample_count /= 2;
|
||||
}
|
||||
}
|
||||
|
||||
lru_type& segment_list(lru_segment seg) noexcept {
|
||||
switch (seg) {
|
||||
case lru_segment::none:
|
||||
SCYLLA_ASSERT(false && "segment_list called with none");
|
||||
__builtin_unreachable();
|
||||
case lru_segment::window: return _window;
|
||||
case lru_segment::probation: return _probation;
|
||||
case lru_segment::protected_: return _protected;
|
||||
}
|
||||
__builtin_unreachable();
|
||||
}
|
||||
|
||||
void increment_size(lru_segment seg) noexcept {
|
||||
switch (seg) {
|
||||
case lru_segment::none: break;
|
||||
case lru_segment::window: ++_window_size; break;
|
||||
case lru_segment::probation: ++_probation_size; break;
|
||||
case lru_segment::protected_: ++_protected_size; break;
|
||||
}
|
||||
}
|
||||
|
||||
void decrement_size(lru_segment seg) noexcept {
|
||||
switch (seg) {
|
||||
case lru_segment::none: break;
|
||||
case lru_segment::window: --_window_size; break;
|
||||
case lru_segment::probation: --_probation_size; break;
|
||||
case lru_segment::protected_: --_protected_size; break;
|
||||
}
|
||||
}
|
||||
|
||||
void remove_from_segment(evictable& e) noexcept {
|
||||
auto& list = segment_list(e._segment);
|
||||
list.erase(list.iterator_to(e));
|
||||
decrement_size(e._segment);
|
||||
e._segment = lru_segment::none;
|
||||
}
|
||||
|
||||
void add_to_segment(evictable& e, lru_segment seg) noexcept {
|
||||
e._segment = seg;
|
||||
segment_list(seg).push_back(e);
|
||||
increment_size(seg);
|
||||
}
|
||||
|
||||
// Move excess protected entries to probation.
|
||||
// Bounded to avoid reactor stalls when the protected segment is
|
||||
// significantly oversized (e.g. after many promotions without eviction).
|
||||
static constexpr size_t max_rebalance_per_call = 128;
|
||||
void rebalance_protected() noexcept {
|
||||
size_t max_prot = max_protected_size();
|
||||
size_t moved = 0;
|
||||
while (_protected_size > max_prot && !_protected.empty() && moved < max_rebalance_per_call) {
|
||||
evictable& victim = _protected.front();
|
||||
remove_from_segment(victim);
|
||||
add_to_segment(victim, lru_segment::probation);
|
||||
++moved;
|
||||
}
|
||||
}
|
||||
|
||||
// Evicts a single element using W-TinyLFU policy.
|
||||
template <bool Shallow = false>
|
||||
reclaiming_result do_evict(bool should_evict_index) noexcept {
|
||||
// Index eviction path: evict the least recently used index entry.
|
||||
if (should_evict_index && !_index_list.empty()) {
|
||||
evictable& e = _index_list.front();
|
||||
remove(e);
|
||||
if constexpr (!Shallow) {
|
||||
e.on_evicted();
|
||||
} else {
|
||||
e.on_evicted_shallow();
|
||||
}
|
||||
return reclaiming_result::reclaimed_something;
|
||||
}
|
||||
|
||||
if (_window.empty() && _probation.empty() && _protected.empty()) {
|
||||
return reclaiming_result::reclaimed_nothing;
|
||||
}
|
||||
|
||||
rebalance_protected();
|
||||
|
||||
// Drain excess from window using TinyLFU admission.
|
||||
while (_window_size > max_window_size() && !_window.empty()) {
|
||||
evictable& w_victim = _window.front();
|
||||
|
||||
if (!_probation.empty()) {
|
||||
// Competition: window victim vs. probation victim.
|
||||
evictable& p_victim = _probation.front();
|
||||
uint8_t w_freq = _sketch.estimate(entry_key(w_victim));
|
||||
uint8_t p_freq = _sketch.estimate(entry_key(p_victim));
|
||||
|
||||
if (w_freq >= p_freq) {
|
||||
// Admit window victim to probation; evict probation victim.
|
||||
remove_from_segment(w_victim);
|
||||
add_to_segment(w_victim, lru_segment::probation);
|
||||
remove(p_victim);
|
||||
if constexpr (!Shallow) {
|
||||
p_victim.on_evicted();
|
||||
} else {
|
||||
p_victim.on_evicted_shallow();
|
||||
}
|
||||
} else {
|
||||
// Reject window victim.
|
||||
remove(w_victim);
|
||||
if constexpr (!Shallow) {
|
||||
w_victim.on_evicted();
|
||||
} else {
|
||||
w_victim.on_evicted_shallow();
|
||||
}
|
||||
}
|
||||
return reclaiming_result::reclaimed_something;
|
||||
}
|
||||
|
||||
// Probation is empty: move window victim to probation and retry.
|
||||
remove_from_segment(w_victim);
|
||||
add_to_segment(w_victim, lru_segment::probation);
|
||||
}
|
||||
|
||||
// Window is within target. Evict from probation, then window, then protected.
|
||||
evictable* victim = nullptr;
|
||||
if (!_probation.empty()) {
|
||||
victim = &_probation.front();
|
||||
} else if (!_window.empty()) {
|
||||
victim = &_window.front();
|
||||
} else if (!_protected.empty()) {
|
||||
victim = &_protected.front();
|
||||
} else {
|
||||
return reclaiming_result::reclaimed_nothing;
|
||||
}
|
||||
remove(*victim);
|
||||
if constexpr (!Shallow) {
|
||||
victim->on_evicted();
|
||||
} else {
|
||||
victim->on_evicted_shallow();
|
||||
}
|
||||
return reclaiming_result::reclaimed_something;
|
||||
}
|
||||
|
||||
public:
|
||||
~lru() {
|
||||
auto drain = [this](lru_type& list) {
|
||||
while (!list.empty()) {
|
||||
evictable& e = list.front();
|
||||
remove(e);
|
||||
e.on_evicted();
|
||||
}
|
||||
};
|
||||
drain(_window);
|
||||
drain(_probation);
|
||||
drain(_protected);
|
||||
}
|
||||
|
||||
void remove(evictable& e) noexcept {
|
||||
_list.erase(_list.iterator_to(e));
|
||||
auto& list = segment_list(e._segment);
|
||||
list.erase(list.iterator_to(e));
|
||||
decrement_size(e._segment);
|
||||
e._segment = lru_segment::none;
|
||||
if (e.is_index()) {
|
||||
_index_list.erase(_index_list.iterator_to(static_cast<index_evictable&>(e)));
|
||||
}
|
||||
}
|
||||
|
||||
void add(evictable& e) noexcept {
|
||||
_list.push_back(e);
|
||||
assign_frequency_hash(e);
|
||||
record_access(e);
|
||||
add_to_segment(e, lru_segment::window);
|
||||
if (e.is_index()) {
|
||||
_index_list.push_back(static_cast<index_evictable&>(e));
|
||||
}
|
||||
@@ -117,36 +349,52 @@ public:
|
||||
|
||||
// Like add(e) but makes sure that e is evicted right before "more_recent" in the absence of later touches.
|
||||
void add_before(evictable& more_recent, evictable& e) noexcept {
|
||||
_list.insert(_list.iterator_to(more_recent), e);
|
||||
assign_frequency_hash(e);
|
||||
record_access(e);
|
||||
lru_segment seg = more_recent._segment;
|
||||
auto& list = segment_list(seg);
|
||||
list.insert(list.iterator_to(more_recent), e);
|
||||
e._segment = seg;
|
||||
increment_size(seg);
|
||||
}
|
||||
|
||||
// Handles access to an entry:
|
||||
// - In window: moves to back of window.
|
||||
// - In probation: promotes to protected.
|
||||
// - In protected: moves to back of protected.
|
||||
// - Not linked: adds to window.
|
||||
void touch(evictable& e) noexcept {
|
||||
remove(e);
|
||||
add(e);
|
||||
record_access(e);
|
||||
|
||||
switch (e._segment) {
|
||||
case lru_segment::none:
|
||||
assign_frequency_hash(e);
|
||||
add_to_segment(e, lru_segment::window);
|
||||
break;
|
||||
case lru_segment::window:
|
||||
_window.erase(_window.iterator_to(e));
|
||||
_window.push_back(e);
|
||||
break;
|
||||
case lru_segment::probation:
|
||||
_probation.erase(_probation.iterator_to(e));
|
||||
--_probation_size;
|
||||
e._segment = lru_segment::protected_;
|
||||
_protected.push_back(e);
|
||||
++_protected_size;
|
||||
break;
|
||||
case lru_segment::protected_:
|
||||
_protected.erase(_protected.iterator_to(e));
|
||||
_protected.push_back(e);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Evicts a single element from the LRU
|
||||
template <bool Shallow = false>
|
||||
reclaiming_result do_evict(bool should_evict_index) noexcept {
|
||||
if (_list.empty()) {
|
||||
return reclaiming_result::reclaimed_nothing;
|
||||
}
|
||||
evictable& e = (should_evict_index && !_index_list.empty()) ? _index_list.front() : _list.front();
|
||||
remove(e);
|
||||
if constexpr (!Shallow) {
|
||||
e.on_evicted();
|
||||
} else {
|
||||
e.on_evicted_shallow();
|
||||
}
|
||||
return reclaiming_result::reclaimed_something;
|
||||
}
|
||||
|
||||
// Evicts a single element from the LRU.
|
||||
// Evicts a single element using the W-TinyLFU policy.
|
||||
reclaiming_result evict(bool should_evict_index = false) noexcept {
|
||||
return do_evict<false>(should_evict_index);
|
||||
}
|
||||
|
||||
// Evicts a single element from the LRU.
|
||||
// Evicts a single element using the W-TinyLFU policy.
|
||||
// Will call on_evicted_shallow() instead of on_evicted().
|
||||
reclaiming_result evict_shallow() noexcept {
|
||||
return do_evict<true>(false);
|
||||
|
||||
Reference in New Issue
Block a user