diff --git a/cache_flat_mutation_reader.hh b/cache_flat_mutation_reader.hh index 901cf0bf93..d5cccdc916 100644 --- a/cache_flat_mutation_reader.hh +++ b/cache_flat_mutation_reader.hh @@ -737,7 +737,9 @@ void cache_flat_mutation_reader::maybe_drop_last_entry() noexcept { && _snp->at_oldest_version()) { with_allocator(_snp->region().allocator(), [&] { - _last_row->on_evicted(_read_context.cache()._tracker); + cache_tracker& tracker = _read_context.cache()._tracker; + tracker.get_lru().remove(*_last_row); + _last_row->on_evicted(tracker); }); _last_row = nullptr; diff --git a/db/cache_tracker.hh b/db/cache_tracker.hh index 434072dcb7..c484a555c7 100644 --- a/db/cache_tracker.hh +++ b/db/cache_tracker.hh @@ -92,7 +92,7 @@ public: void insert(partition_entry&) noexcept; void insert(partition_version&) noexcept; void insert(rows_entry&) noexcept; - void on_remove() noexcept; + void remove(rows_entry&) noexcept; void clear_continuity(cache_entry& ce) noexcept; void on_partition_erase() noexcept; void on_partition_merge() noexcept; @@ -123,9 +123,12 @@ public: }; inline -void cache_tracker::on_remove() noexcept { +void cache_tracker::remove(rows_entry& entry) noexcept { --_stats.rows; ++_stats.row_removals; + if (entry.is_linked()) { + _lru.remove(entry); + } } inline diff --git a/mutation_partition.cc b/mutation_partition.cc index a1aa120a34..c0bf77c18a 100644 --- a/mutation_partition.cc +++ b/mutation_partition.cc @@ -336,7 +336,7 @@ stop_iteration mutation_partition::apply_monotonically(const schema& s, mutation ++app_stats.rows_dropped_by_tombstones; i = _rows.erase(i); if (tracker) { - tracker->on_remove(); + tracker->remove(e); } del(&e); } else { @@ -428,7 +428,7 @@ stop_iteration mutation_partition::apply_monotonically(const schema& s, mutation if (src_e.dummy()) { p_i = p._rows.erase(p_i); if (tracker) { - tracker->on_remove(); + tracker->remove(src_e); } del(&src_e); insert = false; @@ -448,9 +448,9 @@ stop_iteration mutation_partition::apply_monotonically(const schema& s, mutation // violating exception guarantees. src_e.set_continuous(false); if (tracker) { - tracker->on_remove(); // Newer evictable versions store complete rows i->replace_with(std::move(src_e)); + tracker->remove(src_e); } else { memory::on_alloc_point(); i->apply_monotonically(s, std::move(src_e)); @@ -2394,7 +2394,7 @@ stop_iteration mutation_partition::clear_gently(cache_tracker* tracker) noexcept auto end = _rows.end(); while (i != end) { if (tracker) { - tracker->on_remove(); + tracker->remove(*i); } i = _rows.erase_and_dispose(i, del); diff --git a/row_cache.cc b/row_cache.cc index 516ed7f1f5..2ca5a56037 100644 --- a/row_cache.cc +++ b/row_cache.cc @@ -156,9 +156,10 @@ void cache_tracker::clear() { } void cache_tracker::touch(rows_entry& e) { - // last dummy may not be linked if evicted, but - // the unlink_from_lru() handles it - e.unlink_from_lru(); + // last dummy may not be linked if evicted + if (e.is_linked()) { + _lru.remove(e); + } _lru.add(e); } @@ -1084,7 +1085,10 @@ void row_cache::unlink_from_lru(const dht::decorated_key& dk) { if (i != _partitions.end()) { for (partition_version& pv : i->partition().versions_from_oldest()) { for (rows_entry& row : pv.partition().clustered_rows()) { - row.unlink_from_lru(); + // Last dummy may already be unlinked. + if (row.is_linked()) { + _tracker.get_lru().remove(row); + } } } } @@ -1219,7 +1223,6 @@ void rows_entry::on_evicted(cache_tracker& tracker) noexcept { // so don't remove it, just unlink from the LRU. // That dummy is linked in the LRU, because there may be partitions // with no regular rows, and we need to track them. - unlink_from_lru(); } else { // When evicting a dummy with both sides continuous we don't need to break continuity. // diff --git a/sstables/partition_index_cache.hh b/sstables/partition_index_cache.hh index 15046d9727..764979a25c 100644 --- a/sstables/partition_index_cache.hh +++ b/sstables/partition_index_cache.hh @@ -111,14 +111,16 @@ public: explicit entry_ptr(lsa::weak_ptr ref) : _ref(std::move(ref)) { - _ref->unlink_from_lru(); + if (_ref->is_linked()) { + _ref->_parent->_lru.remove(*_ref); + } } ~entry_ptr() { *this = nullptr; } entry_ptr(entry_ptr&&) noexcept = default; entry_ptr(const entry_ptr&) noexcept = default; entry_ptr& operator=(std::nullptr_t) noexcept { if (_ref) { - if (_ref.unique()) { + if (_ref.unique() && _ref->ready()) { _ref->_parent->_lru.add(*_ref); } _ref = nullptr; @@ -172,6 +174,7 @@ public: ~partition_index_cache() { with_allocator(_region.allocator(), [&] { _cache.clear_and_dispose([this] (entry* e) noexcept { + _lru.remove(*e); on_evicted(*e); }); }); @@ -260,6 +263,7 @@ public: if (i->is_referenced()) { ++i; } else { + _lru.remove(*i); on_evicted(*i); i = i.erase(key_less_comparator()); } diff --git a/test/boost/mvcc_test.cc b/test/boost/mvcc_test.cc index 3631c785ab..a0c17152fb 100644 --- a/test/boost/mvcc_test.cc +++ b/test/boost/mvcc_test.cc @@ -320,6 +320,7 @@ public: mvcc_partition(mvcc_partition&&) = default; ~mvcc_partition() { + evict(); with_allocator(region().allocator(), [&] { _e = {}; }); @@ -569,8 +570,6 @@ SEASTAR_TEST_CASE(test_eviction_with_active_reader) { BOOST_REQUIRE(cursor.continuous()); BOOST_REQUIRE(cursor.key().equal(s, ck1)); - e.evict(); - { logalloc::reclaim_lock rl(ms.region()); cursor.maybe_refresh(); @@ -769,6 +768,7 @@ SEASTAR_TEST_CASE(test_continuity_merging_in_evictable) { assert_that(s, actual2) .has_same_continuity(expected) .is_equal_to_compacted(expected); + e.evict(tracker.cleaner()); } }); }); @@ -940,6 +940,7 @@ SEASTAR_TEST_CASE(test_partition_snapshot_row_cursor) { BOOST_REQUIRE(eq(cur.position(), table.make_ckey(5))); BOOST_REQUIRE(cur.continuous()); } + e.evict(tracker.cleaner()); }); }); } @@ -1238,6 +1239,7 @@ SEASTAR_TEST_CASE(test_cursor_tracks_continuity_in_reversed_mode) { BOOST_REQUIRE(!cur.next()); } + e.evict(tracker.cleaner()); }); }); } @@ -1293,6 +1295,7 @@ SEASTAR_TEST_CASE(test_ensure_entry_in_latest_in_reversed_mode) { BOOST_REQUIRE(!res.inserted); } } + e.evict(tracker.cleaner()); }); }); } @@ -1344,6 +1347,7 @@ SEASTAR_TEST_CASE(test_ensure_entry_in_latest_does_not_set_continuity_in_reverse // the entry for ckey 2 in latest version should not be marked as continuous. BOOST_REQUIRE(!cur.continuous()); } + e.evict(tracker.cleaner()); }); }); } diff --git a/utils/cached_file.hh b/utils/cached_file.hh index 2de749001e..6348943fa6 100644 --- a/utils/cached_file.hh +++ b/utils/cached_file.hh @@ -79,7 +79,9 @@ private: // because it will not be linked in the LRU. ptr_type share() noexcept { if (_use_count++ == 0) { - unlink_from_lru(); + if (is_linked()) { + parent->_lru.remove(*this); + } } return std::unique_ptr(this); } @@ -332,6 +334,7 @@ public: while (start != end) { if (start->is_linked()) { ++count; + _lru.remove(*start); on_evicted(*start); start = start.erase_and_dispose(disposer, page_idx_less_comparator()); } else { @@ -449,6 +452,7 @@ public: auto i = _cache.begin(); while (i != _cache.end()) { if (i->is_linked()) { + _lru.remove(*i); on_evicted(*i); i = i.erase(page_idx_less_comparator()); } else { diff --git a/utils/lru.hh b/utils/lru.hh index 7d4fb40d5b..89af7eed99 100644 --- a/utils/lru.hh +++ b/utils/lru.hh @@ -13,12 +13,24 @@ class evictable { friend class lru; + // For bookkeeping, we want the unlinking of evictables to be explicit. + // E.g. if the cache's internal data structure consists of multiple lists, we would + // like to know which list is an element being removed from. + // Therefore, we are using auto_unlink only to be able to call unlink() in the move constructor + // and we do NOT rely on automatic unlinking in _lru_link's destructor. + // It's the programmer's responsibility. to call lru::remove on the evictable before its destruction. + // Failure to do so is a bug, and it will trigger an assertion in the destructor. using lru_link_type = boost::intrusive::list_member_hook< boost::intrusive::link_mode>; lru_link_type _lru_link; protected: // Prevent destruction via evictable pointer. LRU is not aware of allocation strategy. - ~evictable() = default; + // Prevent destruction of a linked evictable. While we could unlink the evictable here + // in the destructor, we can't perform proper accounting for that without access to the + // head of the containing list. + ~evictable() { + assert(!_lru_link.is_linked()); + } public: evictable() = default; evictable(evictable&& o) noexcept; @@ -30,10 +42,6 @@ public: return _lru_link.is_linked(); } - void unlink_from_lru() { - _lru_link.unlink(); - } - void swap(evictable& o) noexcept { _lru_link.swap_nodes(o._lru_link); }