diff --git a/kmod/src/lock.c b/kmod/src/lock.c index b04e18ee..9275e3a4 100644 --- a/kmod/src/lock.c +++ b/kmod/src/lock.c @@ -289,6 +289,7 @@ static struct scoutfs_lock *lock_alloc(struct super_block *sb, lock->sb = sb; init_waitqueue_head(&lock->waitq); lock->mode = SCOUTFS_LOCK_NULL; + lock->invalidating_mode = SCOUTFS_LOCK_NULL; atomic64_set(&lock->forest_bloom_nr, 0); @@ -666,7 +667,9 @@ struct inv_req { * * Before we start invalidating the lock we set the lock to the new * mode, preventing further incompatible users of the old mode from - * using the lock while we're invalidating. + * using the lock while we're invalidating. We record the previously + * granted mode so that we can send lock recover responses with the old + * granted mode during invalidation. */ static void lock_invalidate_worker(struct work_struct *work) { @@ -691,7 +694,8 @@ static void lock_invalidate_worker(struct work_struct *work) if (!lock_counts_match(nl->new_mode, lock->users)) continue; - /* set the new mode, no incompatible users during inval */ + /* set the new mode, no incompatible users during inval, recov needs old */ + lock->invalidating_mode = lock->mode; lock->mode = nl->new_mode; /* move everyone that's ready to our private list */ @@ -734,6 +738,8 @@ static void lock_invalidate_worker(struct work_struct *work) list_del(&ireq->head); kfree(ireq); + lock->invalidating_mode = SCOUTFS_LOCK_NULL; + if (list_empty(&lock->inv_list)) { /* finish if another request didn't arrive */ list_del_init(&lock->inv_head); @@ -824,6 +830,7 @@ int scoutfs_lock_recover_request(struct super_block *sb, u64 net_id, { DECLARE_LOCK_INFO(sb, linfo); struct scoutfs_net_lock_recover *nlr; + enum scoutfs_lock_mode mode; struct scoutfs_lock *lock; struct scoutfs_lock *next; struct rb_node *node; @@ -844,10 +851,15 @@ int scoutfs_lock_recover_request(struct super_block *sb, u64 net_id, for (i = 0; lock && i < SCOUTFS_NET_LOCK_MAX_RECOVER_NR; i++) { + if (lock->invalidating_mode != SCOUTFS_LOCK_NULL) + mode = lock->invalidating_mode; + else + mode = lock->mode; + nlr->locks[i].key = lock->start; nlr->locks[i].write_seq = cpu_to_le64(lock->write_seq); - nlr->locks[i].old_mode = lock->mode; - nlr->locks[i].new_mode = lock->mode; + nlr->locks[i].old_mode = mode; + nlr->locks[i].new_mode = mode; node = rb_next(&lock->node); if (node) diff --git a/kmod/src/lock.h b/kmod/src/lock.h index a1d9b903..204ee888 100644 --- a/kmod/src/lock.h +++ b/kmod/src/lock.h @@ -39,6 +39,7 @@ struct scoutfs_lock { struct list_head cov_list; enum scoutfs_lock_mode mode; + enum scoutfs_lock_mode invalidating_mode; unsigned int waiters[SCOUTFS_LOCK_NR_MODES]; unsigned int users[SCOUTFS_LOCK_NR_MODES]; diff --git a/tests/golden/lock-recover-invalidate b/tests/golden/lock-recover-invalidate new file mode 100644 index 00000000..af208381 --- /dev/null +++ b/tests/golden/lock-recover-invalidate @@ -0,0 +1,3 @@ +== starting background invalidating read/write load +== 60s of lock recovery during invalidating load +== stopping background load diff --git a/tests/golden/lock-rever-invalidate b/tests/golden/lock-rever-invalidate new file mode 100644 index 00000000..e69de29b diff --git a/tests/sequence b/tests/sequence index 40c82e52..37f30647 100644 --- a/tests/sequence +++ b/tests/sequence @@ -17,6 +17,7 @@ lock-refleak.sh lock-shrink-consistency.sh lock-pr-cw-conflict.sh lock-revoke-getcwd.sh +lock-recover-invalidate.sh export-lookup-evict-race.sh createmany-parallel.sh createmany-large-names.sh diff --git a/tests/tests/lock-recover-invalidate.sh b/tests/tests/lock-recover-invalidate.sh new file mode 100644 index 00000000..8f06b58c --- /dev/null +++ b/tests/tests/lock-recover-invalidate.sh @@ -0,0 +1,43 @@ +# +# trigger server failover and lock recovery during heavy invalidating +# load on multiple mounts +# + +majority_nr=$(t_majority_count) +quorum_nr=$T_QUORUM + +test "$quorum_nr" == "$majority_nr" && \ + t_skip "need remaining majority when leader unmounted" + +test "$T_NR_MOUNTS" -lt "$((quorum_nr + 2))" && \ + t_skip "need at least 2 non-quorum load mounts" + +echo "== starting background invalidating read/write load" +touch "$T_D0/file" +load_pids="" +for i in $(t_fs_nrs); do + if [ "$i" -ge "$quorum_nr" ]; then + eval path="\$T_D${i}/file" + + (while true; do touch $path > /dev/null 2>&1; done) & + load_pids="$load_pids $!" + (while true; do stat $path > /dev/null 2>&1; done) & + load_pids="$load_pids $!" + fi +done + +# had it reproduce in ~40s on wimpy debug kernel guests +LENGTH=60 +echo "== ${LENGTH}s of lock recovery during invalidating load" +END=$((SECONDS + LENGTH)) +while [ "$SECONDS" -lt "$END" ]; do + sv=$(t_server_nr) + t_umount $sv + t_mount $sv + # new server had to process greeting for mount to finish +done + +echo "== stopping background load" +kill $load_pids + +t_pass