From 5007ded2c19303ce1bc95cb6b313597b1b9de2d6 Mon Sep 17 00:00:00 2001 From: Benny Halevy Date: Sun, 4 Dec 2022 16:28:37 +0200 Subject: [PATCH] view: row_lock: lock_ck: serialize partition and row locking The problematic scenario this patch fixes might happen due to unfortunate serialization of locks/unlocks between lock_pk and lock_ck, as follows: 1. lock_pk acquires an exclusive lock on the partition. 2.a lock_ck attempts to acquire shared lock on the partition and any lock on the row. both cases currently use a fiber returning a future. 2.b since the partition is locked, the lock_partition times out returning an exceptional future. lock_row has no such problem and succeeds, returning a future holding a rwlock::holder, pointing to the row lock. 3.a the lock_holder previously returned by lock_pk is destroyed, calling `row_locker::unlock` 3.b row_locker::unlock sees that the partition is not locked and erases it, including the row locks it contains. 4.a when_all_succeeds continuation in lock_ck runs. Since the lock_partition future failed, it destroyes both futures. 4.b the lock_row future is destroyed with the rwlock::holder value. 4.c ~holder attempts to return the semaphore units to the row rwlock, but the latter was already destroyed in 3.b above. Acquiring the partition lock and row lock in parallel doesn't help anything, but it complicates error handling as seen above, This patch serializes acquiring the row lock in lock_ck after locking the partition to prevent the above race. This way, erasing the unlocked partition is never expected to happen while any of its rows locks is held. Fixes #12168 Signed-off-by: Benny Halevy Closes #12208 --- db/view/row_locking.cc | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/db/view/row_locking.cc b/db/view/row_locking.cc index 50d6a0fdf1..d174b79bb2 100644 --- a/db/view/row_locking.cc +++ b/db/view/row_locking.cc @@ -10,8 +10,6 @@ #include "log.hh" #include "utils/latency.hh" -#include - static logging::logger mylog("row_locking"); row_locker::row_locker(schema_ptr s) @@ -102,9 +100,10 @@ row_locker::lock_ck(const dht::decorated_key& pk, const clustering_key_prefix& c single_lock_stats.operations_currently_waiting_for_lock++; utils::latency_counter waiting_latency; waiting_latency.start(); - future lock_row = exclusive ? j->second.hold_write_lock(timeout) : j->second.hold_read_lock(timeout); - return when_all_succeed(std::move(lock_partition), std::move(lock_row)) - .then_unpack([this, pk = &i->first, cpk = &j->first, exclusive, &single_lock_stats, waiting_latency = std::move(waiting_latency)] (auto lock1, auto lock2) mutable { + return lock_partition.then([this, pk = &i->first, cpk = &j->first, &row_lock = j->second, exclusive, &single_lock_stats, waiting_latency = std::move(waiting_latency), timeout] (auto lock1) mutable { + auto lock_row = exclusive ? row_lock.hold_write_lock(timeout) : row_lock.hold_read_lock(timeout); + return lock_row.then([this, pk, cpk, exclusive, &single_lock_stats, waiting_latency = std::move(waiting_latency), lock1 = std::move(lock1)] (auto lock2) mutable { + // FIXME: indentation lock1.release(); lock2.release(); waiting_latency.stop(); @@ -112,6 +111,7 @@ row_locker::lock_ck(const dht::decorated_key& pk, const clustering_key_prefix& c single_lock_stats.lock_acquisitions++; single_lock_stats.operations_currently_waiting_for_lock--; return lock_holder(this, pk, cpk, exclusive); + }); }); }