From 71c5dc82df7eb7f63c1be4ef07d569bf0e97b5e5 Mon Sep 17 00:00:00 2001 From: Calle Wilund Date: Tue, 15 Dec 2020 13:54:55 +0000 Subject: [PATCH] database: Verify iff we actually are writing memtables to disk in truncate Fixes #7732 When truncating with auto_snapshot on, we try to verify the low rp mark from the CF against the sstables discarded by the truncation timestamp. However, in a scenario like: Fill memtables Flush Truncate with snapshot A Fill memtables some more Truncate Move snapshot A to upload + refresh (load old tables) Truncate The last op will assert, because while we have sstables loaded, which will be discarded now, we did not in fact generate any _new_ ones (since memtables are empty), and the RP we get back from discard is one from an earlier generation set. (Any permutation of events that create the situation "empty memtable" + "non-empty sstables with only old tables" will generate the same error). Added a check that before flushing checks if we actually have any data, and if not, does not uphold the RP relation assert. Closes #7799 --- database.cc | 14 ++++++++------ database.hh | 6 ++++++ table.cc | 4 ++++ 3 files changed, 18 insertions(+), 6 deletions(-) diff --git a/database.cc b/database.cc index 729d61e027..38187533b8 100644 --- a/database.cc +++ b/database.cc @@ -2059,26 +2059,28 @@ future<> database::truncate(const keyspace& ks, column_family& cf, timestamp_fun return cf.run_with_compaction_disabled([this, &cf, should_flush, auto_snapshot, tsf = std::move(tsf), low_mark]() mutable { future<> f = make_ready_future<>(); - if (should_flush) { + bool did_flush = false; + if (should_flush && cf.can_flush()) { // TODO: // this is not really a guarantee at all that we've actually // gotten all things to disk. Again, need queue-ish or something. f = cf.flush(); + did_flush = true; } else { f = cf.clear(); } - return f.then([this, &cf, auto_snapshot, tsf = std::move(tsf), low_mark, should_flush] { + return f.then([this, &cf, auto_snapshot, tsf = std::move(tsf), low_mark, should_flush, did_flush] { dblog.debug("Discarding sstable data for truncated CF + indexes"); // TODO: notify truncation - return tsf().then([this, &cf, auto_snapshot, low_mark, should_flush](db_clock::time_point truncated_at) { + return tsf().then([this, &cf, auto_snapshot, low_mark, should_flush, did_flush](db_clock::time_point truncated_at) { future<> f = make_ready_future<>(); if (auto_snapshot) { auto name = format("{:d}-{}", truncated_at.time_since_epoch().count(), cf.schema()->cf_name()); f = cf.snapshot(*this, name); } - return f.then([this, &cf, truncated_at, low_mark, should_flush] { - return cf.discard_sstables(truncated_at).then([this, &cf, truncated_at, low_mark, should_flush](db::replay_position rp) { + return f.then([this, &cf, truncated_at, low_mark, should_flush, did_flush] { + return cf.discard_sstables(truncated_at).then([this, &cf, truncated_at, low_mark, should_flush, did_flush](db::replay_position rp) { // TODO: indexes. // Note: since discard_sstables was changed to only count tables owned by this shard, // we can get zero rp back. Changed assert, and ensure we save at least low_mark. @@ -2086,7 +2088,7 @@ future<> database::truncate(const keyspace& ks, column_family& cf, timestamp_fun // We nowadays do not flush tables with sstables but autosnapshot=false. This means // the low_mark assertion does not hold, because we maybe/probably never got around to // creating the sstables that would create them. - assert(!should_flush || low_mark <= rp || rp == db::replay_position()); + assert(!did_flush || low_mark <= rp || rp == db::replay_position()); rp = std::max(low_mark, rp); return truncate_views(cf, truncated_at, should_flush).then([&cf, truncated_at, rp] { // save_truncation_record() may actually fail after we cached the truncation time diff --git a/database.hh b/database.hh index 3a18e7e82b..b4e003fad1 100644 --- a/database.hh +++ b/database.hh @@ -224,6 +224,10 @@ public: return bool(_seal_immediate_fn); } + bool can_flush() const { + return may_flush() && !empty(); + } + bool empty() const { for (auto& m : _memtables) { if (!m->empty()) { @@ -750,6 +754,8 @@ public: future<> clear(); // discards memtable(s) without flushing them to disk. future discard_sstables(db_clock::time_point); + bool can_flush() const; + // FIXME: this is just an example, should be changed to something more // general. compact_all_sstables() starts a compaction of all sstables. // It doesn't flush the current memtable first. It's just a ad-hoc method, diff --git a/table.cc b/table.cc index e52589661e..919f5441e9 100644 --- a/table.cc +++ b/table.cc @@ -1250,6 +1250,10 @@ future<> table::flush() { return _memtables->request_flush().then([op = std::move(op)] {}); } +bool table::can_flush() const { + return _memtables->can_flush(); +} + future<> table::clear() { if (_commitlog) { _commitlog->discard_completed_segments(_schema->id());