Compare commits

...

2 Commits

Author SHA1 Message Date
Auke Kok
2d91e51218 Drop pending greetings from resend_queue in set_valid_greeting.
There's a race possible where set_valid_greeting() runs just as
another greeting reply sits on the resend_queue from a handshake:

  1. server_greeting() calls scoutfs_net_response() to queue the
     server's greeting reply on send_queue and kicks send_work.
  2. server_greeting() falls through to scoutfs_net_server_greeting()
     which calls set_valid_greeting() under conn->lock.
  3. If send_work already ran between steps 1 and 2 it moved the
     reply onto resend_queue.
  4. set_valid_greeting() splices resend_queue back into send_queue,
     re-queueing the greeting reply for a second send on the same
     socket.

The receiver treats a duplicate greeting as fatal: the recieve path
invalid_message check on the saw_greeting flag tears the connection
down silently. The client never sees a normal disconnect, just a
stalled connection, and the server only notices once the 30s
recovery timeout fires and fences the client.

Fix by walking resend_queue in set_valid_greeting() and dropping any
queued greeting before splicing them in. Dropping them should be safe
and mimics what scoutfs_net_shutdown_worker() does.

Signed-off-by: Auke Kok <auke.kok@versity.com>
2026-05-12 13:47:24 -07:00
Auke Kok
8e7f3a2e56 Add recovery and lock-recover diagnostic trace events.
Add trace events to aid debugging recovery stalls:

  - scoutfs_recov_prepare(sb, rid, which):
    Server registered that a client needs to recover something.

  - scoutfs_recov_finish(sb, rid, which, remaining):
    Client finished one recovery item, and lists remaining count.

  - scoutfs_recov_timeout_fire(sb, nr_pending):
    The 30s recovery window expired, with count of clients that
    failed to recover within the timeout.

  - scoutfs_recov_fence_rid(sb, rid):
    One event per client about to be fenced.

  - scoutfs_lock_server_greeting(sb, rid, recov_pending):
    Shows which rid we expect to recover locks.

Signed-off-by: Auke Kok <auke.kok@versity.com>
2026-05-12 11:51:34 -07:00
5 changed files with 150 additions and 1 deletions

View File

@@ -626,9 +626,14 @@ out:
int scoutfs_lock_server_greeting(struct super_block *sb, u64 rid)
{
struct scoutfs_key key;
bool pending;
int ret;
if (scoutfs_recov_is_pending(sb, rid, SCOUTFS_RECOV_LOCKS)) {
pending = scoutfs_recov_is_pending(sb, rid, SCOUTFS_RECOV_LOCKS);
trace_scoutfs_lock_server_greeting(sb, rid, pending);
if (pending) {
scoutfs_key_set_zeros(&key);
ret = scoutfs_server_lock_recover_request(sb, rid, &key);
} else {

View File

@@ -1730,12 +1730,30 @@ int scoutfs_net_connect(struct super_block *sb,
static void set_valid_greeting(struct scoutfs_net_connection *conn)
{
struct net_info *ninf = SCOUTFS_SB(conn->sb)->net_info;
struct message_send *msend;
struct message_send *tmp;
assert_spin_locked(&conn->lock);
/* recv should have dropped invalid duplicate greeting messages */
BUG_ON(test_conn_fl(conn, valid_greeting));
set_conn_fl(conn, valid_greeting);
/*
* Drop greetings from the resend_queue before splicing it into
* the send_queue. We might have a greeting left in the resend
* queue at the moment that we reach this point. A duplicate
* greeting is treated as fatal and causes a stall and fence.
*/
list_for_each_entry_safe(msend, tmp, &conn->resend_queue, head) {
if (msend->nh.cmd == SCOUTFS_NET_CMD_GREETING) {
msend->dead = 1;
free_msend(ninf, conn, msend);
}
}
list_splice_tail_init(&conn->resend_queue, &conn->send_queue);
queue_work(conn->workq, &conn->send_work);
}

View File

@@ -21,6 +21,7 @@
#include "super.h"
#include "recov.h"
#include "cmp.h"
#include "scoutfs_trace.h"
/*
* There are a few server messages which can't be processed until they
@@ -119,6 +120,9 @@ int scoutfs_recov_prepare(struct super_block *sb, u64 rid, int which)
spin_unlock(&recinf->lock);
kfree(alloc);
trace_scoutfs_recov_prepare(sb, rid, which);
return 0;
}
@@ -135,6 +139,15 @@ static int recov_finished(struct recov_info *recinf)
static void timer_callback(struct timer_list *timer)
{
struct recov_info *recinf = from_timer(recinf, timer, timer);
struct recov_pending *pend;
int nr = 0;
spin_lock(&recinf->lock);
list_for_each_entry(pend, &recinf->pending, head)
nr++;
spin_unlock(&recinf->lock);
trace_scoutfs_recov_timeout_fire(recinf->sb, nr);
recinf->timeout_fn(recinf->sb);
}
@@ -181,6 +194,8 @@ int scoutfs_recov_finish(struct super_block *sb, u64 rid, int which)
{
DECLARE_RECOV_INFO(sb, recinf);
struct recov_pending *pend;
struct recov_pending *iter;
int remaining = 0;
int ret = 0;
spin_lock(&recinf->lock);
@@ -196,8 +211,13 @@ int scoutfs_recov_finish(struct super_block *sb, u64 rid, int which)
}
}
list_for_each_entry(iter, &recinf->pending, head)
remaining++;
spin_unlock(&recinf->lock);
trace_scoutfs_recov_finish(sb, rid, which, remaining);
if (ret > 0)
del_timer_sync(&recinf->timer);

View File

@@ -2121,6 +2121,110 @@ DEFINE_EVENT(scoutfs_server_client_count_class, scoutfs_server_client_down,
TP_ARGS(sb, rid, nr_clients)
);
TRACE_EVENT(scoutfs_recov_prepare,
TP_PROTO(struct super_block *sb, u64 rid, int which),
TP_ARGS(sb, rid, which),
TP_STRUCT__entry(
SCSB_TRACE_FIELDS
__field(__u64, c_rid)
__field(int, which)
),
TP_fast_assign(
SCSB_TRACE_ASSIGN(sb);
__entry->c_rid = rid;
__entry->which = which;
),
TP_printk(SCSBF" rid %016llx which 0x%x",
SCSB_TRACE_ARGS, __entry->c_rid, __entry->which)
);
TRACE_EVENT(scoutfs_recov_finish,
TP_PROTO(struct super_block *sb, u64 rid, int which, int remaining),
TP_ARGS(sb, rid, which, remaining),
TP_STRUCT__entry(
SCSB_TRACE_FIELDS
__field(__u64, c_rid)
__field(int, which)
__field(int, remaining)
),
TP_fast_assign(
SCSB_TRACE_ASSIGN(sb);
__entry->c_rid = rid;
__entry->which = which;
__entry->remaining = remaining;
),
TP_printk(SCSBF" rid %016llx which 0x%x remaining %d",
SCSB_TRACE_ARGS, __entry->c_rid, __entry->which,
__entry->remaining)
);
TRACE_EVENT(scoutfs_recov_timeout_fire,
TP_PROTO(struct super_block *sb, int nr_pending),
TP_ARGS(sb, nr_pending),
TP_STRUCT__entry(
SCSB_TRACE_FIELDS
__field(int, nr_pending)
),
TP_fast_assign(
SCSB_TRACE_ASSIGN(sb);
__entry->nr_pending = nr_pending;
),
TP_printk(SCSBF" nr_pending %d",
SCSB_TRACE_ARGS, __entry->nr_pending)
);
TRACE_EVENT(scoutfs_recov_fence_rid,
TP_PROTO(struct super_block *sb, u64 rid),
TP_ARGS(sb, rid),
TP_STRUCT__entry(
SCSB_TRACE_FIELDS
__field(__u64, c_rid)
),
TP_fast_assign(
SCSB_TRACE_ASSIGN(sb);
__entry->c_rid = rid;
),
TP_printk(SCSBF" rid %016llx",
SCSB_TRACE_ARGS, __entry->c_rid)
);
TRACE_EVENT(scoutfs_lock_server_greeting,
TP_PROTO(struct super_block *sb, u64 rid, bool recov_pending),
TP_ARGS(sb, rid, recov_pending),
TP_STRUCT__entry(
SCSB_TRACE_FIELDS
__field(__u64, c_rid)
__field(bool, recov_pending)
),
TP_fast_assign(
SCSB_TRACE_ASSIGN(sb);
__entry->c_rid = rid;
__entry->recov_pending = recov_pending;
),
TP_printk(SCSBF" rid %016llx recov_pending %d",
SCSB_TRACE_ARGS, __entry->c_rid, __entry->recov_pending)
);
DECLARE_EVENT_CLASS(scoutfs_server_commit_users_class,
TP_PROTO(struct super_block *sb, int holding, int applying,
int nr_holders, u32 budget,

View File

@@ -4386,6 +4386,8 @@ static void fence_pending_recov_worker(struct work_struct *work)
scoutfs_err(sb, "%lu ms recovery timeout expired for client rid %016llx, fencing",
SERVER_RECOV_TIMEOUT_MS, rid);
trace_scoutfs_recov_fence_rid(sb, rid);
ret = lookup_mounted_client_addr(sb, rid, &addr);
if (ret < 0) {
scoutfs_err(sb, "client rid addr lookup err %d, shutting down server", ret);