mirror of
https://github.com/versity/scoutfs.git
synced 2026-01-03 02:45:20 +00:00
Merge pull request #193 from versity/zab/hung_lock_fixes
Zab/hung lock fixes
This commit is contained in:
@@ -202,21 +202,48 @@ static u8 invalidation_mode(u8 granted, u8 requested)
|
|||||||
|
|
||||||
/*
|
/*
|
||||||
* Return true of the client lock instances described by the entries can
|
* Return true of the client lock instances described by the entries can
|
||||||
* be granted at the same time. Typically this only means they're both
|
* be granted at the same time. There's only three cases where this is
|
||||||
* modes that are compatible between nodes. In addition there's the
|
* true.
|
||||||
* special case where a read lock on a client is compatible with a write
|
*
|
||||||
* lock on the same client because the client's cache covered by the
|
* First, the two locks are both of the same mode that allows full
|
||||||
* read lock is still valid if they get a write lock.
|
* sharing -- read and write only. The only point of these modes is
|
||||||
|
* that everyone can share them.
|
||||||
|
*
|
||||||
|
* Second, a write lock gives the client permission to read as well.
|
||||||
|
* This means that a client can upgrade its read lock to a write lock
|
||||||
|
* without having to invalidate the existing read and drop caches.
|
||||||
|
*
|
||||||
|
* Third, null locks are always compatible between clients. It's as
|
||||||
|
* though the client with the null lock has no lock at all. But it's
|
||||||
|
* never compatible with all locks on the client requesting null.
|
||||||
|
* Sending invalidations for existing locks on a client when we get a
|
||||||
|
* null request is how we resolve races in shrinking locks -- we turn it
|
||||||
|
* into the unsolicited remote invalidation case.
|
||||||
|
*
|
||||||
|
* All other mode and client combinations can not be shared, most
|
||||||
|
* typically a write lock invalidating all other non-write holders to
|
||||||
|
* drop caches and force a read after the write has completed.
|
||||||
*/
|
*/
|
||||||
static bool client_entries_compatible(struct client_lock_entry *granted,
|
static bool client_entries_compatible(struct client_lock_entry *granted,
|
||||||
struct client_lock_entry *requested)
|
struct client_lock_entry *requested)
|
||||||
{
|
{
|
||||||
return (granted->mode == requested->mode &&
|
/* only read and write_only can be full shared */
|
||||||
(granted->mode == SCOUTFS_LOCK_READ ||
|
if ((granted->mode == requested->mode) &&
|
||||||
granted->mode == SCOUTFS_LOCK_WRITE_ONLY)) ||
|
(granted->mode == SCOUTFS_LOCK_READ || granted->mode == SCOUTFS_LOCK_WRITE_ONLY))
|
||||||
(granted->rid == requested->rid &&
|
return true;
|
||||||
granted->mode == SCOUTFS_LOCK_READ &&
|
|
||||||
requested->mode == SCOUTFS_LOCK_WRITE);
|
/* _write includes reading, so a client can upgrade its read to write */
|
||||||
|
if (granted->rid == requested->rid &&
|
||||||
|
granted->mode == SCOUTFS_LOCK_READ &&
|
||||||
|
requested->mode == SCOUTFS_LOCK_WRITE)
|
||||||
|
return true;
|
||||||
|
|
||||||
|
/* null is always compatible across clients, never within a client */
|
||||||
|
if ((granted->rid != requested->rid) &&
|
||||||
|
(granted->mode == SCOUTFS_LOCK_NULL || requested->mode == SCOUTFS_LOCK_NULL))
|
||||||
|
return true;
|
||||||
|
|
||||||
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@@ -317,16 +344,18 @@ static void put_server_lock(struct lock_server_info *inf,
|
|||||||
|
|
||||||
BUG_ON(!mutex_is_locked(&snode->mutex));
|
BUG_ON(!mutex_is_locked(&snode->mutex));
|
||||||
|
|
||||||
|
spin_lock(&inf->lock);
|
||||||
|
|
||||||
if (atomic_dec_and_test(&snode->refcount) &&
|
if (atomic_dec_and_test(&snode->refcount) &&
|
||||||
list_empty(&snode->granted) &&
|
list_empty(&snode->granted) &&
|
||||||
list_empty(&snode->requested) &&
|
list_empty(&snode->requested) &&
|
||||||
list_empty(&snode->invalidated)) {
|
list_empty(&snode->invalidated)) {
|
||||||
spin_lock(&inf->lock);
|
|
||||||
rb_erase(&snode->node, &inf->locks_root);
|
rb_erase(&snode->node, &inf->locks_root);
|
||||||
spin_unlock(&inf->lock);
|
|
||||||
should_free = true;
|
should_free = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
spin_unlock(&inf->lock);
|
||||||
|
|
||||||
mutex_unlock(&snode->mutex);
|
mutex_unlock(&snode->mutex);
|
||||||
|
|
||||||
if (should_free) {
|
if (should_free) {
|
||||||
|
|||||||
@@ -502,12 +502,12 @@ static void scoutfs_net_proc_worker(struct work_struct *work)
|
|||||||
* Free live responses up to and including the seq by marking them dead
|
* Free live responses up to and including the seq by marking them dead
|
||||||
* and moving them to the send queue to be freed.
|
* and moving them to the send queue to be freed.
|
||||||
*/
|
*/
|
||||||
static int move_acked_responses(struct scoutfs_net_connection *conn,
|
static bool move_acked_responses(struct scoutfs_net_connection *conn,
|
||||||
struct list_head *list, u64 seq)
|
struct list_head *list, u64 seq)
|
||||||
{
|
{
|
||||||
struct message_send *msend;
|
struct message_send *msend;
|
||||||
struct message_send *tmp;
|
struct message_send *tmp;
|
||||||
int ret = 0;
|
bool moved = false;
|
||||||
|
|
||||||
assert_spin_locked(&conn->lock);
|
assert_spin_locked(&conn->lock);
|
||||||
|
|
||||||
@@ -519,20 +519,20 @@ static int move_acked_responses(struct scoutfs_net_connection *conn,
|
|||||||
|
|
||||||
msend->dead = 1;
|
msend->dead = 1;
|
||||||
list_move(&msend->head, &conn->send_queue);
|
list_move(&msend->head, &conn->send_queue);
|
||||||
ret = 1;
|
moved = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
return ret;
|
return moved;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* acks are processed inline in the recv worker */
|
/* acks are processed inline in the recv worker */
|
||||||
static void free_acked_responses(struct scoutfs_net_connection *conn, u64 seq)
|
static void free_acked_responses(struct scoutfs_net_connection *conn, u64 seq)
|
||||||
{
|
{
|
||||||
int moved;
|
bool moved;
|
||||||
|
|
||||||
spin_lock(&conn->lock);
|
spin_lock(&conn->lock);
|
||||||
|
|
||||||
moved = move_acked_responses(conn, &conn->send_queue, seq) +
|
moved = move_acked_responses(conn, &conn->send_queue, seq) |
|
||||||
move_acked_responses(conn, &conn->resend_queue, seq);
|
move_acked_responses(conn, &conn->resend_queue, seq);
|
||||||
|
|
||||||
spin_unlock(&conn->lock);
|
spin_unlock(&conn->lock);
|
||||||
|
|||||||
@@ -1046,9 +1046,12 @@ DECLARE_EVENT_CLASS(scoutfs_lock_class,
|
|||||||
sk_trace_define(start)
|
sk_trace_define(start)
|
||||||
sk_trace_define(end)
|
sk_trace_define(end)
|
||||||
__field(u64, refresh_gen)
|
__field(u64, refresh_gen)
|
||||||
|
__field(u64, write_seq)
|
||||||
|
__field(u64, dirty_trans_seq)
|
||||||
__field(unsigned char, request_pending)
|
__field(unsigned char, request_pending)
|
||||||
__field(unsigned char, invalidate_pending)
|
__field(unsigned char, invalidate_pending)
|
||||||
__field(int, mode)
|
__field(int, mode)
|
||||||
|
__field(int, invalidating_mode)
|
||||||
__field(unsigned int, waiters_cw)
|
__field(unsigned int, waiters_cw)
|
||||||
__field(unsigned int, waiters_pr)
|
__field(unsigned int, waiters_pr)
|
||||||
__field(unsigned int, waiters_ex)
|
__field(unsigned int, waiters_ex)
|
||||||
@@ -1061,9 +1064,12 @@ DECLARE_EVENT_CLASS(scoutfs_lock_class,
|
|||||||
sk_trace_assign(start, &lck->start);
|
sk_trace_assign(start, &lck->start);
|
||||||
sk_trace_assign(end, &lck->end);
|
sk_trace_assign(end, &lck->end);
|
||||||
__entry->refresh_gen = lck->refresh_gen;
|
__entry->refresh_gen = lck->refresh_gen;
|
||||||
|
__entry->write_seq = lck->write_seq;
|
||||||
|
__entry->dirty_trans_seq = lck->dirty_trans_seq;
|
||||||
__entry->request_pending = lck->request_pending;
|
__entry->request_pending = lck->request_pending;
|
||||||
__entry->invalidate_pending = lck->invalidate_pending;
|
__entry->invalidate_pending = lck->invalidate_pending;
|
||||||
__entry->mode = lck->mode;
|
__entry->mode = lck->mode;
|
||||||
|
__entry->invalidating_mode = lck->invalidating_mode;
|
||||||
__entry->waiters_pr = lck->waiters[SCOUTFS_LOCK_READ];
|
__entry->waiters_pr = lck->waiters[SCOUTFS_LOCK_READ];
|
||||||
__entry->waiters_ex = lck->waiters[SCOUTFS_LOCK_WRITE];
|
__entry->waiters_ex = lck->waiters[SCOUTFS_LOCK_WRITE];
|
||||||
__entry->waiters_cw = lck->waiters[SCOUTFS_LOCK_WRITE_ONLY];
|
__entry->waiters_cw = lck->waiters[SCOUTFS_LOCK_WRITE_ONLY];
|
||||||
@@ -1071,10 +1077,11 @@ DECLARE_EVENT_CLASS(scoutfs_lock_class,
|
|||||||
__entry->users_ex = lck->users[SCOUTFS_LOCK_WRITE];
|
__entry->users_ex = lck->users[SCOUTFS_LOCK_WRITE];
|
||||||
__entry->users_cw = lck->users[SCOUTFS_LOCK_WRITE_ONLY];
|
__entry->users_cw = lck->users[SCOUTFS_LOCK_WRITE_ONLY];
|
||||||
),
|
),
|
||||||
TP_printk(SCSBF" start "SK_FMT" end "SK_FMT" mode %u reqpnd %u invpnd %u rfrgen %llu waiters: pr %u ex %u cw %u users: pr %u ex %u cw %u",
|
TP_printk(SCSBF" start "SK_FMT" end "SK_FMT" mode %u invmd %u reqp %u invp %u refg %llu wris %llu dts %llu waiters: pr %u ex %u cw %u users: pr %u ex %u cw %u",
|
||||||
SCSB_TRACE_ARGS, sk_trace_args(start), sk_trace_args(end),
|
SCSB_TRACE_ARGS, sk_trace_args(start), sk_trace_args(end),
|
||||||
__entry->mode, __entry->request_pending,
|
__entry->mode, __entry->invalidating_mode, __entry->request_pending,
|
||||||
__entry->invalidate_pending, __entry->refresh_gen,
|
__entry->invalidate_pending, __entry->refresh_gen, __entry->write_seq,
|
||||||
|
__entry->dirty_trans_seq,
|
||||||
__entry->waiters_pr, __entry->waiters_ex, __entry->waiters_cw,
|
__entry->waiters_pr, __entry->waiters_ex, __entry->waiters_cw,
|
||||||
__entry->users_pr, __entry->users_ex, __entry->users_cw)
|
__entry->users_pr, __entry->users_ex, __entry->users_cw)
|
||||||
);
|
);
|
||||||
|
|||||||
2
tests/golden/lock-shrink-read-race
Normal file
2
tests/golden/lock-shrink-read-race
Normal file
@@ -0,0 +1,2 @@
|
|||||||
|
=== setup
|
||||||
|
=== spin reading and shrinking
|
||||||
@@ -25,6 +25,7 @@ totl-xattr-tag.sh
|
|||||||
quota.sh
|
quota.sh
|
||||||
lock-refleak.sh
|
lock-refleak.sh
|
||||||
lock-shrink-consistency.sh
|
lock-shrink-consistency.sh
|
||||||
|
lock-shrink-read-race.sh
|
||||||
lock-pr-cw-conflict.sh
|
lock-pr-cw-conflict.sh
|
||||||
lock-revoke-getcwd.sh
|
lock-revoke-getcwd.sh
|
||||||
lock-recover-invalidate.sh
|
lock-recover-invalidate.sh
|
||||||
|
|||||||
40
tests/tests/lock-shrink-read-race.sh
Normal file
40
tests/tests/lock-shrink-read-race.sh
Normal file
@@ -0,0 +1,40 @@
|
|||||||
|
#
|
||||||
|
# We had a lock server refcounting bug that could let one thread get a
|
||||||
|
# reference on a lock struct that was being freed by another thread. We
|
||||||
|
# were able to reproduce this by having all clients try and produce a
|
||||||
|
# lot of read and null requests.
|
||||||
|
#
|
||||||
|
# This will manfiest as a hung lock and timed out test runs, probably
|
||||||
|
# with hung task messages on the console. Depending on how the race
|
||||||
|
# turns out, it can trigger KASAN warnings in
|
||||||
|
# process_waiting_requests().
|
||||||
|
#
|
||||||
|
|
||||||
|
READERS_PER=3
|
||||||
|
SECS=30
|
||||||
|
|
||||||
|
echo "=== setup"
|
||||||
|
touch "$T_D0/file"
|
||||||
|
|
||||||
|
echo "=== spin reading and shrinking"
|
||||||
|
END=$((SECONDS + SECS))
|
||||||
|
for m in $(t_fs_nrs); do
|
||||||
|
eval file="\$T_D${m}/file"
|
||||||
|
|
||||||
|
# lots of tasks reading as fast as they can
|
||||||
|
for t in $(seq 1 $READERS_PER); do
|
||||||
|
(while [ $SECONDS -lt $END ]; do
|
||||||
|
stat $file > /dev/null
|
||||||
|
done) &
|
||||||
|
done
|
||||||
|
# one task shrinking (triggering null requests) and reading
|
||||||
|
(while [ $SECONDS -lt $END ]; do
|
||||||
|
stat $file > /dev/null
|
||||||
|
t_trigger_arm_silent statfs_lock_purge $m
|
||||||
|
stat -f "$file" > /dev/null
|
||||||
|
done) &
|
||||||
|
done
|
||||||
|
|
||||||
|
wait
|
||||||
|
|
||||||
|
t_pass
|
||||||
Reference in New Issue
Block a user