mirror of
https://github.com/versity/scoutfs.git
synced 2025-12-23 05:25:18 +00:00
Merge pull request #193 from versity/zab/hung_lock_fixes
Zab/hung lock fixes
This commit is contained in:
@@ -202,21 +202,48 @@ static u8 invalidation_mode(u8 granted, u8 requested)
|
||||
|
||||
/*
|
||||
* Return true of the client lock instances described by the entries can
|
||||
* be granted at the same time. Typically this only means they're both
|
||||
* modes that are compatible between nodes. In addition there's the
|
||||
* special case where a read lock on a client is compatible with a write
|
||||
* lock on the same client because the client's cache covered by the
|
||||
* read lock is still valid if they get a write lock.
|
||||
* be granted at the same time. There's only three cases where this is
|
||||
* true.
|
||||
*
|
||||
* First, the two locks are both of the same mode that allows full
|
||||
* sharing -- read and write only. The only point of these modes is
|
||||
* that everyone can share them.
|
||||
*
|
||||
* Second, a write lock gives the client permission to read as well.
|
||||
* This means that a client can upgrade its read lock to a write lock
|
||||
* without having to invalidate the existing read and drop caches.
|
||||
*
|
||||
* Third, null locks are always compatible between clients. It's as
|
||||
* though the client with the null lock has no lock at all. But it's
|
||||
* never compatible with all locks on the client requesting null.
|
||||
* Sending invalidations for existing locks on a client when we get a
|
||||
* null request is how we resolve races in shrinking locks -- we turn it
|
||||
* into the unsolicited remote invalidation case.
|
||||
*
|
||||
* All other mode and client combinations can not be shared, most
|
||||
* typically a write lock invalidating all other non-write holders to
|
||||
* drop caches and force a read after the write has completed.
|
||||
*/
|
||||
static bool client_entries_compatible(struct client_lock_entry *granted,
|
||||
struct client_lock_entry *requested)
|
||||
{
|
||||
return (granted->mode == requested->mode &&
|
||||
(granted->mode == SCOUTFS_LOCK_READ ||
|
||||
granted->mode == SCOUTFS_LOCK_WRITE_ONLY)) ||
|
||||
(granted->rid == requested->rid &&
|
||||
granted->mode == SCOUTFS_LOCK_READ &&
|
||||
requested->mode == SCOUTFS_LOCK_WRITE);
|
||||
/* only read and write_only can be full shared */
|
||||
if ((granted->mode == requested->mode) &&
|
||||
(granted->mode == SCOUTFS_LOCK_READ || granted->mode == SCOUTFS_LOCK_WRITE_ONLY))
|
||||
return true;
|
||||
|
||||
/* _write includes reading, so a client can upgrade its read to write */
|
||||
if (granted->rid == requested->rid &&
|
||||
granted->mode == SCOUTFS_LOCK_READ &&
|
||||
requested->mode == SCOUTFS_LOCK_WRITE)
|
||||
return true;
|
||||
|
||||
/* null is always compatible across clients, never within a client */
|
||||
if ((granted->rid != requested->rid) &&
|
||||
(granted->mode == SCOUTFS_LOCK_NULL || requested->mode == SCOUTFS_LOCK_NULL))
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -317,16 +344,18 @@ static void put_server_lock(struct lock_server_info *inf,
|
||||
|
||||
BUG_ON(!mutex_is_locked(&snode->mutex));
|
||||
|
||||
spin_lock(&inf->lock);
|
||||
|
||||
if (atomic_dec_and_test(&snode->refcount) &&
|
||||
list_empty(&snode->granted) &&
|
||||
list_empty(&snode->requested) &&
|
||||
list_empty(&snode->invalidated)) {
|
||||
spin_lock(&inf->lock);
|
||||
rb_erase(&snode->node, &inf->locks_root);
|
||||
spin_unlock(&inf->lock);
|
||||
should_free = true;
|
||||
}
|
||||
|
||||
spin_unlock(&inf->lock);
|
||||
|
||||
mutex_unlock(&snode->mutex);
|
||||
|
||||
if (should_free) {
|
||||
|
||||
@@ -502,12 +502,12 @@ static void scoutfs_net_proc_worker(struct work_struct *work)
|
||||
* Free live responses up to and including the seq by marking them dead
|
||||
* and moving them to the send queue to be freed.
|
||||
*/
|
||||
static int move_acked_responses(struct scoutfs_net_connection *conn,
|
||||
struct list_head *list, u64 seq)
|
||||
static bool move_acked_responses(struct scoutfs_net_connection *conn,
|
||||
struct list_head *list, u64 seq)
|
||||
{
|
||||
struct message_send *msend;
|
||||
struct message_send *tmp;
|
||||
int ret = 0;
|
||||
bool moved = false;
|
||||
|
||||
assert_spin_locked(&conn->lock);
|
||||
|
||||
@@ -519,20 +519,20 @@ static int move_acked_responses(struct scoutfs_net_connection *conn,
|
||||
|
||||
msend->dead = 1;
|
||||
list_move(&msend->head, &conn->send_queue);
|
||||
ret = 1;
|
||||
moved = true;
|
||||
}
|
||||
|
||||
return ret;
|
||||
return moved;
|
||||
}
|
||||
|
||||
/* acks are processed inline in the recv worker */
|
||||
static void free_acked_responses(struct scoutfs_net_connection *conn, u64 seq)
|
||||
{
|
||||
int moved;
|
||||
bool moved;
|
||||
|
||||
spin_lock(&conn->lock);
|
||||
|
||||
moved = move_acked_responses(conn, &conn->send_queue, seq) +
|
||||
moved = move_acked_responses(conn, &conn->send_queue, seq) |
|
||||
move_acked_responses(conn, &conn->resend_queue, seq);
|
||||
|
||||
spin_unlock(&conn->lock);
|
||||
|
||||
@@ -1046,9 +1046,12 @@ DECLARE_EVENT_CLASS(scoutfs_lock_class,
|
||||
sk_trace_define(start)
|
||||
sk_trace_define(end)
|
||||
__field(u64, refresh_gen)
|
||||
__field(u64, write_seq)
|
||||
__field(u64, dirty_trans_seq)
|
||||
__field(unsigned char, request_pending)
|
||||
__field(unsigned char, invalidate_pending)
|
||||
__field(int, mode)
|
||||
__field(int, invalidating_mode)
|
||||
__field(unsigned int, waiters_cw)
|
||||
__field(unsigned int, waiters_pr)
|
||||
__field(unsigned int, waiters_ex)
|
||||
@@ -1061,9 +1064,12 @@ DECLARE_EVENT_CLASS(scoutfs_lock_class,
|
||||
sk_trace_assign(start, &lck->start);
|
||||
sk_trace_assign(end, &lck->end);
|
||||
__entry->refresh_gen = lck->refresh_gen;
|
||||
__entry->write_seq = lck->write_seq;
|
||||
__entry->dirty_trans_seq = lck->dirty_trans_seq;
|
||||
__entry->request_pending = lck->request_pending;
|
||||
__entry->invalidate_pending = lck->invalidate_pending;
|
||||
__entry->mode = lck->mode;
|
||||
__entry->invalidating_mode = lck->invalidating_mode;
|
||||
__entry->waiters_pr = lck->waiters[SCOUTFS_LOCK_READ];
|
||||
__entry->waiters_ex = lck->waiters[SCOUTFS_LOCK_WRITE];
|
||||
__entry->waiters_cw = lck->waiters[SCOUTFS_LOCK_WRITE_ONLY];
|
||||
@@ -1071,10 +1077,11 @@ DECLARE_EVENT_CLASS(scoutfs_lock_class,
|
||||
__entry->users_ex = lck->users[SCOUTFS_LOCK_WRITE];
|
||||
__entry->users_cw = lck->users[SCOUTFS_LOCK_WRITE_ONLY];
|
||||
),
|
||||
TP_printk(SCSBF" start "SK_FMT" end "SK_FMT" mode %u reqpnd %u invpnd %u rfrgen %llu waiters: pr %u ex %u cw %u users: pr %u ex %u cw %u",
|
||||
TP_printk(SCSBF" start "SK_FMT" end "SK_FMT" mode %u invmd %u reqp %u invp %u refg %llu wris %llu dts %llu waiters: pr %u ex %u cw %u users: pr %u ex %u cw %u",
|
||||
SCSB_TRACE_ARGS, sk_trace_args(start), sk_trace_args(end),
|
||||
__entry->mode, __entry->request_pending,
|
||||
__entry->invalidate_pending, __entry->refresh_gen,
|
||||
__entry->mode, __entry->invalidating_mode, __entry->request_pending,
|
||||
__entry->invalidate_pending, __entry->refresh_gen, __entry->write_seq,
|
||||
__entry->dirty_trans_seq,
|
||||
__entry->waiters_pr, __entry->waiters_ex, __entry->waiters_cw,
|
||||
__entry->users_pr, __entry->users_ex, __entry->users_cw)
|
||||
);
|
||||
|
||||
2
tests/golden/lock-shrink-read-race
Normal file
2
tests/golden/lock-shrink-read-race
Normal file
@@ -0,0 +1,2 @@
|
||||
=== setup
|
||||
=== spin reading and shrinking
|
||||
@@ -25,6 +25,7 @@ totl-xattr-tag.sh
|
||||
quota.sh
|
||||
lock-refleak.sh
|
||||
lock-shrink-consistency.sh
|
||||
lock-shrink-read-race.sh
|
||||
lock-pr-cw-conflict.sh
|
||||
lock-revoke-getcwd.sh
|
||||
lock-recover-invalidate.sh
|
||||
|
||||
40
tests/tests/lock-shrink-read-race.sh
Normal file
40
tests/tests/lock-shrink-read-race.sh
Normal file
@@ -0,0 +1,40 @@
|
||||
#
|
||||
# We had a lock server refcounting bug that could let one thread get a
|
||||
# reference on a lock struct that was being freed by another thread. We
|
||||
# were able to reproduce this by having all clients try and produce a
|
||||
# lot of read and null requests.
|
||||
#
|
||||
# This will manfiest as a hung lock and timed out test runs, probably
|
||||
# with hung task messages on the console. Depending on how the race
|
||||
# turns out, it can trigger KASAN warnings in
|
||||
# process_waiting_requests().
|
||||
#
|
||||
|
||||
READERS_PER=3
|
||||
SECS=30
|
||||
|
||||
echo "=== setup"
|
||||
touch "$T_D0/file"
|
||||
|
||||
echo "=== spin reading and shrinking"
|
||||
END=$((SECONDS + SECS))
|
||||
for m in $(t_fs_nrs); do
|
||||
eval file="\$T_D${m}/file"
|
||||
|
||||
# lots of tasks reading as fast as they can
|
||||
for t in $(seq 1 $READERS_PER); do
|
||||
(while [ $SECONDS -lt $END ]; do
|
||||
stat $file > /dev/null
|
||||
done) &
|
||||
done
|
||||
# one task shrinking (triggering null requests) and reading
|
||||
(while [ $SECONDS -lt $END ]; do
|
||||
stat $file > /dev/null
|
||||
t_trigger_arm_silent statfs_lock_purge $m
|
||||
stat -f "$file" > /dev/null
|
||||
done) &
|
||||
done
|
||||
|
||||
wait
|
||||
|
||||
t_pass
|
||||
Reference in New Issue
Block a user