/* * Copyright (C) 2015 ScyllaDB */ /* * This file is part of Scylla. * * Scylla is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * Scylla is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with Scylla. If not, see . */ #pragma once #include #include #include "core/memory.hh" #include #include "mutation_reader.hh" #include "mutation_partition.hh" #include "utils/logalloc.hh" #include "key_reader.hh" #include "utils/phased_barrier.hh" #include "utils/histogram.hh" #include "partition_version.hh" namespace scollectd { struct registrations; } namespace bi = boost::intrusive; class row_cache; // Intrusive set entry which holds partition data. // // TODO: Make memtables use this format too. class cache_entry { // We need auto_unlink<> option on the _cache_link because when entry is // evicted from cache via LRU we don't have a reference to the container // and don't want to store it with each entry. As for the _lru_link, we // have a global LRU, so technically we could not use auto_unlink<> on // _lru_link, but it's convenient to do so too. We may also want to have // multiple eviction spaces in the future and thus multiple LRUs. using lru_link_type = bi::list_member_hook>; using cache_link_type = bi::set_member_hook>; schema_ptr _schema; dht::ring_position _key; partition_entry _pe; // True when we know that there is nothing between this entry and the next one in cache bool _continuous; lru_link_type _lru_link; cache_link_type _cache_link; friend class size_calculator; public: friend class row_cache; friend class cache_tracker; cache_entry(schema_ptr s) : _schema(std::move(s)) , _key(dht::ring_position::starting_at(dht::minimum_token())) , _pe(_schema) , _continuous(false) { } cache_entry(schema_ptr s, const dht::decorated_key& key, const mutation_partition& p, bool continuous = false) : _schema(std::move(s)) , _key(key) , _pe(p) , _continuous(continuous) { } cache_entry(schema_ptr s, dht::decorated_key&& key, mutation_partition&& p, bool continuous = false) noexcept : _schema(std::move(s)) , _key(std::move(key)) , _pe(std::move(p)) , _continuous(continuous) { } cache_entry(schema_ptr s, dht::decorated_key&& key, partition_entry&& pe, bool continuous = false) noexcept : _schema(std::move(s)) , _key(std::move(key)) , _pe(std::move(pe)) , _continuous(continuous) { } cache_entry(cache_entry&&) noexcept; bool is_evictable() { return _lru_link.is_linked(); } const dht::ring_position& key() const { return _key; } const partition_entry& partition() const { return _pe; } partition_entry& partition() { return _pe; } const schema_ptr& schema() const { return _schema; } schema_ptr& schema() { return _schema; } streamed_mutation read(row_cache&, const schema_ptr&); streamed_mutation read(row_cache&, const schema_ptr&, query::clustering_key_filtering_context); bool continuous() const { return _continuous; } void set_continuous(bool value) { _continuous = value; } struct compare { dht::ring_position_less_comparator _c; compare(schema_ptr s) : _c(*s) {} bool operator()(const dht::decorated_key& k1, const cache_entry& k2) const { return _c(k1, k2._key); } bool operator()(const dht::ring_position& k1, const cache_entry& k2) const { return _c(k1, k2._key); } bool operator()(const cache_entry& k1, const cache_entry& k2) const { return _c(k1._key, k2._key); } bool operator()(const cache_entry& k1, const dht::decorated_key& k2) const { return _c(k1._key, k2); } bool operator()(const cache_entry& k1, const dht::ring_position& k2) const { return _c(k1._key, k2); } }; }; // Tracks accesses and performs eviction of cache entries. class cache_tracker final { public: using lru_type = bi::list, bi::constant_time_size>; // we need this to have bi::auto_unlink on hooks. private: uint64_t _hits = 0; uint64_t _misses = 0; uint64_t _insertions = 0; uint64_t _merges = 0; uint64_t _evictions = 0; uint64_t _removals = 0; uint64_t _partitions = 0; uint64_t _modification_count = 0; std::unique_ptr _collectd_registrations; logalloc::region _region; lru_type _lru; private: void setup_collectd(); public: cache_tracker(); ~cache_tracker(); void clear(); void touch(cache_entry&); void insert(cache_entry&); void on_erase(); void on_merge(); void on_hit(); void on_miss(); allocation_strategy& allocator(); logalloc::region& region(); const logalloc::region& region() const; uint64_t modification_count() const { return _modification_count; } uint64_t partitions() const { return _partitions; } }; // Returns a reference to shard-wide cache_tracker. cache_tracker& global_cache_tracker(); // // A data source which wraps another data source such that data obtained from the underlying data source // is cached in-memory in order to serve queries faster. // // To query the underlying data source through cache, use make_reader(). // // Cache populates itself automatically during misses. // // Cache needs to be maintained externally so that it remains consistent with the underlying data source. // Any incremental change to the underlying data source should result in update() being called on cache. // class row_cache final { public: using partitions_type = bi::set, bi::constant_time_size, // we need this to have bi::auto_unlink on hooks bi::compare>; friend class single_partition_populating_reader; friend class cache_entry; public: struct stats { utils::timed_rate_moving_average hits; utils::timed_rate_moving_average misses; }; private: cache_tracker& _tracker; stats _stats{}; schema_ptr _schema; partitions_type _partitions; // Cached partitions are complete. mutation_source _underlying; key_source _underlying_keys; // Synchronizes populating reads with updates of underlying data source to ensure that cache // remains consistent across flushes with the underlying data source. // Readers obtained from the underlying data source in earlier than // current phases must not be used to populate the cache, unless they hold // phaser::operation created in the reader's phase of origin. Readers // should hold to a phase only briefly because this inhibits progress of // updates. Phase changes occur in update()/clear(), which can be assumed to // be asynchronous wrt invoking of the underlying data source. utils::phased_barrier _populate_phaser; logalloc::allocating_section _update_section; logalloc::allocating_section _populate_section; logalloc::allocating_section _read_section; mutation_reader make_scanning_reader(schema_ptr, const query::partition_range&, const io_priority_class& pc, query::clustering_key_filtering_context ck_filtering); void on_hit(); void on_miss(); void upgrade_entry(cache_entry&); void invalidate_locked(const dht::decorated_key&); void invalidate_unwrapped(const query::partition_range&); void clear_now() noexcept; static thread_local seastar::thread_scheduling_group _update_thread_scheduling_group; public: ~row_cache(); row_cache(schema_ptr, mutation_source underlying, key_source, cache_tracker&); row_cache(row_cache&&) = default; row_cache(const row_cache&) = delete; row_cache& operator=(row_cache&&) = default; public: // Implements mutation_source for this cache, see mutation_reader.hh // User needs to ensure that the row_cache object stays alive // as long as the reader is used. // The range must not wrap around. mutation_reader make_reader(schema_ptr, const query::partition_range& = query::full_partition_range, query::clustering_key_filtering_context = query::no_clustering_key_filtering, const io_priority_class& = default_priority_class()); const stats& stats() const { return _stats; } public: // Populate cache from given mutation. The mutation must contain all // information there is for its partition in the underlying data sources. void populate(const mutation& m); // Clears the cache. // Guarantees that cache will not be populated using readers created // before this method was invoked. future<> clear(); // Synchronizes cache with the underlying data source from a memtable which // has just been flushed to the underlying data source. // The memtable can be queried during the process, but must not be written. // After the update is complete, memtable is empty. future<> update(memtable&, partition_presence_checker underlying_negative); // Moves given partition to the front of LRU if present in cache. void touch(const dht::decorated_key&); // Removes given partition from cache. // // Guarantees that cache will not be populated with given key // using readers created before this method was invoked. // // The key must be kept alive until method resolves. future<> invalidate(const dht::decorated_key& key); // Removes given range of partitions from cache. // The range can be a wrap around. // // Guarantees that cache will not be populated with partitions from that range // using readers created before this method was invoked. // // The range must be kept alive until method resolves. future<> invalidate(const query::partition_range&); auto num_entries() const { return _partitions.size(); } const cache_tracker& get_cache_tracker() const { return _tracker; } void set_schema(schema_ptr) noexcept; const schema_ptr& schema() const; friend class just_cache_scanning_reader; friend class scanning_and_populating_reader; friend class range_populating_reader; friend class cache_tracker; friend class mark_end_as_continuous; };