From e08fa7dafa7f268cd71f3de5968673b4ea6d9cc7 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Tue, 21 Jun 2016 14:41:41 -0400 Subject: [PATCH] fix potential stale data in cache update We currently have a problem in update_cache, that can be trigger by ordering issues related to memtable flush termination (not initiation) and/or update_cache() call duration. That issue is described in #1364, and in short, happens if a call to update_cache starts before and ongoing call finishes. There is now a new SSTable that should be consulted by the presence checker that is not. The partition checker operates in a stale list because we need to make sure the SSTable we just wrote is excluded from it. This patch changes the partition checker so that all SSTables currently in use are consulted, except for the one we have just flushed. That provides both the guarantee that we won't check our own SSTable and access to the most up-to-date SSTable list. Fixes #1364 Signed-off-by: Glauber Costa Message-Id: --- database.cc | 15 ++++++++------- database.hh | 4 ++-- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/database.cc b/database.cc index 9da82b05a8..0a1758d038 100644 --- a/database.cc +++ b/database.cc @@ -125,10 +125,11 @@ column_family::column_family(schema_ptr schema, config config, db::commitlog* cl } partition_presence_checker -column_family::make_partition_presence_checker(lw_shared_ptr old_sstables) { - return [this, old_sstables = std::move(old_sstables)] (partition_key_view key) { - for (auto&& s : *old_sstables) { - if (s.second->filter_has_key(*_schema, key)) { +column_family::make_partition_presence_checker(sstables::shared_sstable exclude_sstable) { + return [this, exclude_sstable = std::move(exclude_sstable)] (partition_key_view key) { + auto exclude = [e = std::move(exclude_sstable)] (auto s) { return s != e; }; + for (auto&& s : boost::make_iterator_range(*_sstables) | boost::adaptors::map_values | boost::adaptors::filtered(exclude)) { + if (s->filter_has_key(*_schema, key)) { return partition_presence_checker_result::maybe_exists; } } @@ -621,11 +622,11 @@ void column_family::add_sstable(lw_shared_ptr sstable) { } future<> -column_family::update_cache(memtable& m, lw_shared_ptr old_sstables) { +column_family::update_cache(memtable& m, sstables::shared_sstable exclude_sstable) { if (_config.enable_cache) { // be careful to use the old sstable list, since the new one will hit every // mutation in m. - return _cache.update(m, make_partition_presence_checker(std::move(old_sstables))); + return _cache.update(m, make_partition_presence_checker(std::move(exclude_sstable))); } else { return make_ready_future<>(); } @@ -798,7 +799,7 @@ column_family::try_flush_memtable_to_sstable(lw_shared_ptr old) { trigger_compaction(); - return update_cache(*old, std::move(old_sstables)).then_wrapped([this, old] (future<> f) { + return update_cache(*old, newtab).then_wrapped([this, old] (future<> f) { try { f.get(); } catch(...) { diff --git a/database.hh b/database.hh index 3b92c86492..26f35cad93 100644 --- a/database.hh +++ b/database.hh @@ -380,7 +380,7 @@ private: lw_shared_ptr new_memtable(); lw_shared_ptr new_streaming_memtable(); future try_flush_memtable_to_sstable(lw_shared_ptr memt); - future<> update_cache(memtable&, lw_shared_ptr old_sstables); + future<> update_cache(memtable&, sstables::shared_sstable exclude_sstable); struct merge_comparator; // update the sstable generation, making sure that new new sstables don't overwrite this one. @@ -414,7 +414,7 @@ private: mutation_source sstables_as_mutation_source(); key_source sstables_as_key_source() const; - partition_presence_checker make_partition_presence_checker(lw_shared_ptr old_sstables); + partition_presence_checker make_partition_presence_checker(sstables::shared_sstable exclude_sstable); std::chrono::steady_clock::time_point _sstable_writes_disabled_at; void do_trigger_compaction(); public: