Compare commits

...

5 Commits

Author SHA1 Message Date
Zach Brown
a27c54568c Handle back to back invalidation requests
The client's incoming lock invalidation request handler triggers a
BUG_ON if it gets a request for a lock that is already processing a
previous invalidation request.  The server is supposed to only send
one request at a time.

The problem is that the batched invalidation request handling will send
responses outside of spinlock coverage before reacquirin the lock and
finishing processing once the response send has been successful.

This gives a window for another invalidation request to arrive after the
response was sent but before the invalidation finished processing.  This
triggers the bug.

The fix is to mark the lock such that we can recognize a valid second
request arriving after we send the response but before we finish
processing.  If it arrives we'll continue invalidation processing with
the arguments from the new request.

Signed-off-by: Zach Brown <zab@versity.com>
2021-04-22 17:00:50 -07:00
Andy Grover
cbb031bb5d Merge pull request #32 from versity/zab/block_rhashtable_insert_fixes
Zab/block rhashtable insert fixes
2021-04-13 10:42:17 -07:00
Zach Brown
c3290771a0 Block cache use rht _lookup_ insert for EEXIST
The sneaky rhashtable_insert_fast() can't return -EEXIST despite the
last line of the function *REALLY* making it look like it can.  It just
inserts new objects at the head of the bucket lists without comparing
the insertion with existing objects.

The block cache was relying on insertion to resolve duplicate racing
allocated blocks.  Because it couldn't return -EEXIST we could get
duplicate cached blocks present in the hash table.

rhashtable_lookup_insert_fast() fixes this by actually comparing the
inserted objects key with the objects found in the insertion bucket.  A
racing allocator trying to insert a duplicate cached block will get an
error, drop their allocated block, and retry their lookup.

Signed-off-by: Zach Brown <zab@versity.com>
2021-04-13 09:24:23 -07:00
Zach Brown
cf3cb3f197 Wait for rhashtable to rehash on insert EBUSY
The rhashtable can return EBUSY if you insert fast enough to trigger an
expansion of the next table size that is waiting to be rehashed in an
rcu callback.  If we get EBUSY from rhasthable_insert we call
synchronize_rcu to wait for the rehash to complete before trying again.

This was hit in testing restores of a very large namespace and took a
few hours to hit.

Signed-off-by: Zach Brown <zab@versity.com>
2021-04-13 09:24:23 -07:00
Andy Grover
cb4ed98b3c Merge pull request #31 from versity/zab/block_shrink_wait_for_rebalance
Block cache shrink restart waits for rcu callbacks
2021-04-08 09:03:12 -07:00
2 changed files with 41 additions and 15 deletions

View File

@@ -286,10 +286,16 @@ static int block_insert(struct super_block *sb, struct block_private *bp)
WARN_ON_ONCE(atomic_read(&bp->refcount) & BLOCK_REF_INSERTED);
retry:
atomic_add(BLOCK_REF_INSERTED, &bp->refcount);
ret = rhashtable_insert_fast(&binf->ht, &bp->ht_head, block_ht_params);
ret = rhashtable_lookup_insert_fast(&binf->ht, &bp->ht_head, block_ht_params);
if (ret < 0) {
atomic_sub(BLOCK_REF_INSERTED, &bp->refcount);
if (ret == -EBUSY) {
/* wait for pending rebalance to finish */
synchronize_rcu();
goto retry;
}
} else {
atomic_inc(&binf->total_inserted);
TRACE_BLOCK(insert, bp);

View File

@@ -713,7 +713,9 @@ int scoutfs_lock_grant_response(struct super_block *sb,
/*
* Each lock has received a lock invalidation request from the server
* which specifies a new mode for the lock. The server will only send
* one invalidation request at a time for each lock.
* one invalidation request at a time for each lock. The server can
* send another invalidate request after we send the response but before
* we reacquire the lock and finish invalidation.
*
* This is an unsolicited request from the server so it can arrive at
* any time after we make the server aware of the lock by initially
@@ -803,6 +805,9 @@ static void lock_invalidate_worker(struct work_struct *work)
ret = lock_invalidate(sb, lock, nl->old_mode, nl->new_mode);
BUG_ON(ret);
/* allow another request after we respond but before we finish */
lock->inv_net_id = 0;
/* respond with the key and modes from the request */
ret = scoutfs_client_lock_response(sb, net_id, nl);
BUG_ON(ret);
@@ -814,11 +819,13 @@ static void lock_invalidate_worker(struct work_struct *work)
spin_lock(&linfo->lock);
list_for_each_entry_safe(lock, tmp, &ready, inv_head) {
list_del_init(&lock->inv_head);
lock->invalidate_pending = 0;
trace_scoutfs_lock_invalidated(sb, lock);
wake_up(&lock->waitq);
if (lock->inv_net_id == 0) {
/* finish if another request didn't arrive */
list_del_init(&lock->inv_head);
lock->invalidate_pending = 0;
wake_up(&lock->waitq);
}
put_lock(linfo, lock);
}
@@ -833,34 +840,47 @@ out:
}
/*
* Record an incoming invalidate request from the server and add its lock
* to the list for processing.
* Record an incoming invalidate request from the server and add its
* lock to the list for processing. This request can be from a new
* server and racing with invalidation that frees from an old server.
* It's fine to not find the requested lock and send an immediate
* response.
*
* This is trusting the server and will crash if it's sent bad requests :/
* The invalidation process drops the linfo lock to send responses. The
* moment it does so we can receive another invalidation request (the
* server can ask us to go from write->read then read->null). We allow
* for one chain like this but it's a bug if we receive more concurrent
* invalidation requests than that. The server should be only sending
* one at a time.
*/
int scoutfs_lock_invalidate_request(struct super_block *sb, u64 net_id,
struct scoutfs_net_lock *nl)
{
DECLARE_LOCK_INFO(sb, linfo);
struct scoutfs_lock *lock;
int ret = 0;
scoutfs_inc_counter(sb, lock_invalidate_request);
spin_lock(&linfo->lock);
lock = get_lock(sb, &nl->key);
BUG_ON(!lock);
if (lock) {
BUG_ON(lock->invalidate_pending);
lock->invalidate_pending = 1;
lock->inv_nl = *nl;
BUG_ON(lock->inv_net_id != 0);
lock->inv_net_id = net_id;
list_add_tail(&lock->inv_head, &linfo->inv_list);
lock->inv_nl = *nl;
if (list_empty(&lock->inv_head)) {
list_add_tail(&lock->inv_head, &linfo->inv_list);
lock->invalidate_pending = 1;
}
trace_scoutfs_lock_invalidate_request(sb, lock);
queue_inv_work(linfo);
}
spin_unlock(&linfo->lock);
return 0;
if (!lock)
ret = scoutfs_client_lock_response(sb, net_id, nl);
return ret;
}
/*