row_cache, mvcc: Prevent locked snapshots from being evicted

If the whole partition entry is evicted while being updated from the
memtable, a subsequent read may populate the partition using the old
version of data if it attempts to do it before cache update advances
past that partition. Partial eviction is not affected because
populating reads will notice that there is a newer snapshot
corresponding to the updater.

This can happen only in OOM situations where the whole cache gets evicted.

Affects only tables with multi-row partitions, which are the only ones
that can experience the update of partition entry being preempted.

Introduced in 70c7277.

Fixes #5134.
This commit is contained in:
Tomasz Grabiec
2019-10-02 20:44:20 +02:00
parent 57a93513bd
commit 90d6c0b9a2
3 changed files with 14 additions and 2 deletions

View File

@@ -172,6 +172,9 @@ tombstone partition_entry::partition_tombstone() const {
partition_snapshot::~partition_snapshot() {
with_allocator(region().allocator(), [this] {
if (_locked) {
touch();
}
if (_version && _version.is_unique_owner()) {
auto v = &*_version;
_version = {};
@@ -705,6 +708,7 @@ void partition_entry::evict(mutation_cleaner& cleaner) noexcept {
return;
}
if (_snapshot) {
assert(!_snapshot->is_locked());
_snapshot->_version = std::move(_version);
_snapshot->_version.mark_as_unique_owner();
_snapshot->_entry = nullptr;
@@ -733,5 +737,9 @@ void partition_snapshot::lock() noexcept {
}
void partition_snapshot::unlock() noexcept {
// Locked snapshots must always be latest, is_locked() assumes that.
// Also, touch() is only effective when this snapshot is latest.
assert(at_latest_version());
_locked = false;
touch(); // Make the entry evictable again in case it was fully unlinked by eviction attempt.
}

View File

@@ -330,6 +330,7 @@ public:
// Tells whether the snapshot is locked.
// Locking the snapshot prevents it from getting detached from the partition entry.
// It also prevents the partition entry from being evicted.
bool is_locked() const {
return _locked;
}

View File

@@ -1245,8 +1245,11 @@ void rows_entry::on_evicted(cache_tracker& tracker) noexcept {
partition_version& pv = partition_version::container_of(mutation_partition::container_of(
mutation_partition::rows_type::container_of_only_member(*it)));
if (pv.is_referenced_from_entry()) {
cache_entry& ce = cache_entry::container_of(partition_entry::container_of(pv));
ce.on_evicted(tracker);
partition_entry& pe = partition_entry::container_of(pv);
if (!pe.is_locked()) {
cache_entry& ce = cache_entry::container_of(pe);
ce.on_evicted(tracker);
}
}
}
}