From a3dabb4c79694ff49cba76a4082abbac28d9743f Mon Sep 17 00:00:00 2001 From: Auke Kok Date: Fri, 17 Apr 2026 11:10:38 -0700 Subject: [PATCH] Fail pending client requests when reconnecting to new server Previously, client_greeting spliced pending requests back onto send_queue when reconnecting to a new server. Those requests carried state from the old server (sequence numbers, log tree references, lock modes) that was reclaimed at fence time, so resending against the new server was incorrect. Drain pending requests with -ECONNRESET at greeting time, mirroring the forcing_unmount drain in the shutdown worker. Thread the lock pointer through scoutfs_client_lock_request so the response callback can clear request_pending and wake waiters on error; otherwise a lock_key_range waiter would block forever because the new server's lock recovery only reports granted modes, not pending requests. Wrap the sync request senders in client_sync_request so userspace paths (statfs, mkdir, sysfs volopt, resize ioctl, walk-inodes ioctl) retry transparently across failover instead of surfacing a new -ECONNRESET that callers never saw before. Co-Authored-By: Claude Opus 4.7 (1M context) --- kmod/src/client.c | 125 ++++++++++++++++++++++++++++---------------- kmod/src/client.h | 3 +- kmod/src/counters.h | 1 + kmod/src/lock.c | 31 ++++++++++- kmod/src/lock.h | 2 + kmod/src/net.c | 24 +++++++++ 6 files changed, 138 insertions(+), 48 deletions(-) diff --git a/kmod/src/client.c b/kmod/src/client.c index 246616c3..de690fe1 100644 --- a/kmod/src/client.c +++ b/kmod/src/client.c @@ -59,6 +59,31 @@ struct client_info { struct completion farewell_comp; }; +/* + * Reconnection to a new server completes pending sync requests with + * -ECONNRESET because their state in the old server was reclaimed at + * fence time. Transparently retry so callers don't surface the + * reconnect as a failed RPC; preserve the pre-drain behavior where a + * sync request was silently resent across failover. Shutdown paths + * break the loop via the errors that submit and wait already return. + */ +static int client_sync_request(struct super_block *sb, + struct scoutfs_net_connection *conn, + u8 cmd, void *arg, unsigned arg_len, + void *resp, size_t resp_len) +{ + int ret; + + for (;;) { + ret = scoutfs_net_sync_request(sb, conn, cmd, arg, arg_len, + resp, resp_len); + if (ret != -ECONNRESET) + return ret; + if (scoutfs_unmounting(sb) || scoutfs_forcing_unmount(sb)) + return -ESHUTDOWN; + } +} + /* * Ask for a new run of allocated inode numbers. The server can return * fewer than @count. It will success with nr == 0 if we've run out. @@ -72,10 +97,10 @@ int scoutfs_client_alloc_inodes(struct super_block *sb, u64 count, u64 tmp; int ret; - ret = scoutfs_net_sync_request(sb, client->conn, - SCOUTFS_NET_CMD_ALLOC_INODES, - &lecount, sizeof(lecount), - &ial, sizeof(ial)); + ret = client_sync_request(sb, client->conn, + SCOUTFS_NET_CMD_ALLOC_INODES, + &lecount, sizeof(lecount), + &ial, sizeof(ial)); if (ret == 0) { *ino = le64_to_cpu(ial.ino); *nr = le64_to_cpu(ial.nr); @@ -94,9 +119,9 @@ int scoutfs_client_get_log_trees(struct super_block *sb, { struct client_info *client = SCOUTFS_SB(sb)->client_info; - return scoutfs_net_sync_request(sb, client->conn, - SCOUTFS_NET_CMD_GET_LOG_TREES, - NULL, 0, lt, sizeof(*lt)); + return client_sync_request(sb, client->conn, + SCOUTFS_NET_CMD_GET_LOG_TREES, + NULL, 0, lt, sizeof(*lt)); } int scoutfs_client_commit_log_trees(struct super_block *sb, @@ -104,9 +129,9 @@ int scoutfs_client_commit_log_trees(struct super_block *sb, { struct client_info *client = SCOUTFS_SB(sb)->client_info; - return scoutfs_net_sync_request(sb, client->conn, - SCOUTFS_NET_CMD_COMMIT_LOG_TREES, - lt, sizeof(*lt), NULL, 0); + return client_sync_request(sb, client->conn, + SCOUTFS_NET_CMD_COMMIT_LOG_TREES, + lt, sizeof(*lt), NULL, 0); } int scoutfs_client_get_roots(struct super_block *sb, @@ -114,9 +139,9 @@ int scoutfs_client_get_roots(struct super_block *sb, { struct client_info *client = SCOUTFS_SB(sb)->client_info; - return scoutfs_net_sync_request(sb, client->conn, - SCOUTFS_NET_CMD_GET_ROOTS, - NULL, 0, roots, sizeof(*roots)); + return client_sync_request(sb, client->conn, + SCOUTFS_NET_CMD_GET_ROOTS, + NULL, 0, roots, sizeof(*roots)); } int scoutfs_client_get_last_seq(struct super_block *sb, u64 *seq) @@ -125,9 +150,9 @@ int scoutfs_client_get_last_seq(struct super_block *sb, u64 *seq) __le64 last_seq; int ret; - ret = scoutfs_net_sync_request(sb, client->conn, - SCOUTFS_NET_CMD_GET_LAST_SEQ, - NULL, 0, &last_seq, sizeof(last_seq)); + ret = client_sync_request(sb, client->conn, + SCOUTFS_NET_CMD_GET_LAST_SEQ, + NULL, 0, &last_seq, sizeof(last_seq)); if (ret == 0) *seq = le64_to_cpu(last_seq); @@ -140,24 +165,34 @@ static int client_lock_response(struct super_block *sb, void *resp, unsigned int resp_len, int error, void *data) { + struct scoutfs_lock *lock = data; + + if (error) { + scoutfs_lock_request_failed(sb, lock); + return 0; + } + if (resp_len != sizeof(struct scoutfs_net_lock)) return -EINVAL; - /* XXX error? */ - return scoutfs_lock_grant_response(sb, resp); } -/* Send a lock request to the server. */ +/* + * Send a lock request to the server. The lock is anchored by + * request_pending so its address is stable until the response callback + * runs and clears request_pending on either the grant or error path. + */ int scoutfs_client_lock_request(struct super_block *sb, - struct scoutfs_net_lock *nl) + struct scoutfs_net_lock *nl, + struct scoutfs_lock *lock) { struct client_info *client = SCOUTFS_SB(sb)->client_info; return scoutfs_net_submit_request(sb, client->conn, SCOUTFS_NET_CMD_LOCK, nl, sizeof(*nl), - client_lock_response, NULL, NULL); + client_lock_response, lock, NULL); } /* Send a lock response to the server. */ @@ -189,9 +224,9 @@ int scoutfs_client_srch_get_compact(struct super_block *sb, { struct client_info *client = SCOUTFS_SB(sb)->client_info; - return scoutfs_net_sync_request(sb, client->conn, - SCOUTFS_NET_CMD_SRCH_GET_COMPACT, - NULL, 0, sc, sizeof(*sc)); + return client_sync_request(sb, client->conn, + SCOUTFS_NET_CMD_SRCH_GET_COMPACT, + NULL, 0, sc, sizeof(*sc)); } /* Commit the result of a srch file compaction. */ @@ -200,9 +235,9 @@ int scoutfs_client_srch_commit_compact(struct super_block *sb, { struct client_info *client = SCOUTFS_SB(sb)->client_info; - return scoutfs_net_sync_request(sb, client->conn, - SCOUTFS_NET_CMD_SRCH_COMMIT_COMPACT, - res, sizeof(*res), NULL, 0); + return client_sync_request(sb, client->conn, + SCOUTFS_NET_CMD_SRCH_COMMIT_COMPACT, + res, sizeof(*res), NULL, 0); } int scoutfs_client_get_log_merge(struct super_block *sb, @@ -210,9 +245,9 @@ int scoutfs_client_get_log_merge(struct super_block *sb, { struct client_info *client = SCOUTFS_SB(sb)->client_info; - return scoutfs_net_sync_request(sb, client->conn, - SCOUTFS_NET_CMD_GET_LOG_MERGE, - NULL, 0, req, sizeof(*req)); + return client_sync_request(sb, client->conn, + SCOUTFS_NET_CMD_GET_LOG_MERGE, + NULL, 0, req, sizeof(*req)); } int scoutfs_client_commit_log_merge(struct super_block *sb, @@ -220,9 +255,9 @@ int scoutfs_client_commit_log_merge(struct super_block *sb, { struct client_info *client = SCOUTFS_SB(sb)->client_info; - return scoutfs_net_sync_request(sb, client->conn, - SCOUTFS_NET_CMD_COMMIT_LOG_MERGE, - comp, sizeof(*comp), NULL, 0); + return client_sync_request(sb, client->conn, + SCOUTFS_NET_CMD_COMMIT_LOG_MERGE, + comp, sizeof(*comp), NULL, 0); } int scoutfs_client_send_omap_response(struct super_block *sb, u64 id, @@ -254,8 +289,8 @@ int scoutfs_client_open_ino_map(struct super_block *sb, u64 group_nr, .req_id = 0, }; - return scoutfs_net_sync_request(sb, client->conn, SCOUTFS_NET_CMD_OPEN_INO_MAP, - &args, sizeof(args), map, sizeof(*map)); + return client_sync_request(sb, client->conn, SCOUTFS_NET_CMD_OPEN_INO_MAP, + &args, sizeof(args), map, sizeof(*map)); } /* The client is asking the server for the current volume options */ @@ -263,8 +298,8 @@ int scoutfs_client_get_volopt(struct super_block *sb, struct scoutfs_volume_opti { struct client_info *client = SCOUTFS_SB(sb)->client_info; - return scoutfs_net_sync_request(sb, client->conn, SCOUTFS_NET_CMD_GET_VOLOPT, - NULL, 0, volopt, sizeof(*volopt)); + return client_sync_request(sb, client->conn, SCOUTFS_NET_CMD_GET_VOLOPT, + NULL, 0, volopt, sizeof(*volopt)); } /* The client is asking the server to update volume options */ @@ -272,8 +307,8 @@ int scoutfs_client_set_volopt(struct super_block *sb, struct scoutfs_volume_opti { struct client_info *client = SCOUTFS_SB(sb)->client_info; - return scoutfs_net_sync_request(sb, client->conn, SCOUTFS_NET_CMD_SET_VOLOPT, - volopt, sizeof(*volopt), NULL, 0); + return client_sync_request(sb, client->conn, SCOUTFS_NET_CMD_SET_VOLOPT, + volopt, sizeof(*volopt), NULL, 0); } /* The client is asking the server to clear volume options */ @@ -281,24 +316,24 @@ int scoutfs_client_clear_volopt(struct super_block *sb, struct scoutfs_volume_op { struct client_info *client = SCOUTFS_SB(sb)->client_info; - return scoutfs_net_sync_request(sb, client->conn, SCOUTFS_NET_CMD_CLEAR_VOLOPT, - volopt, sizeof(*volopt), NULL, 0); + return client_sync_request(sb, client->conn, SCOUTFS_NET_CMD_CLEAR_VOLOPT, + volopt, sizeof(*volopt), NULL, 0); } int scoutfs_client_resize_devices(struct super_block *sb, struct scoutfs_net_resize_devices *nrd) { struct client_info *client = SCOUTFS_SB(sb)->client_info; - return scoutfs_net_sync_request(sb, client->conn, SCOUTFS_NET_CMD_RESIZE_DEVICES, - nrd, sizeof(*nrd), NULL, 0); + return client_sync_request(sb, client->conn, SCOUTFS_NET_CMD_RESIZE_DEVICES, + nrd, sizeof(*nrd), NULL, 0); } int scoutfs_client_statfs(struct super_block *sb, struct scoutfs_net_statfs *nst) { struct client_info *client = SCOUTFS_SB(sb)->client_info; - return scoutfs_net_sync_request(sb, client->conn, SCOUTFS_NET_CMD_STATFS, - NULL, 0, nst, sizeof(*nst)); + return client_sync_request(sb, client->conn, SCOUTFS_NET_CMD_STATFS, + NULL, 0, nst, sizeof(*nst)); } /* diff --git a/kmod/src/client.h b/kmod/src/client.h index bb155fe0..27426c70 100644 --- a/kmod/src/client.h +++ b/kmod/src/client.h @@ -12,7 +12,8 @@ int scoutfs_client_get_roots(struct super_block *sb, u64 *scoutfs_client_bulk_alloc(struct super_block *sb); int scoutfs_client_get_last_seq(struct super_block *sb, u64 *seq); int scoutfs_client_lock_request(struct super_block *sb, - struct scoutfs_net_lock *nl); + struct scoutfs_net_lock *nl, + struct scoutfs_lock *lock); int scoutfs_client_lock_response(struct super_block *sb, u64 net_id, struct scoutfs_net_lock *nl); int scoutfs_client_lock_recover_response(struct super_block *sb, u64 net_id, diff --git a/kmod/src/counters.h b/kmod/src/counters.h index 9088496c..9f7df8ca 100644 --- a/kmod/src/counters.h +++ b/kmod/src/counters.h @@ -138,6 +138,7 @@ EXPAND_COUNTER(lock_lock_error) \ EXPAND_COUNTER(lock_nonblock_eagain) \ EXPAND_COUNTER(lock_recover_request) \ + EXPAND_COUNTER(lock_request_failed) \ EXPAND_COUNTER(lock_shrink_attempted) \ EXPAND_COUNTER(lock_shrink_request_failed) \ EXPAND_COUNTER(lock_unlock) \ diff --git a/kmod/src/lock.c b/kmod/src/lock.c index a4ce96ce..5699391a 100644 --- a/kmod/src/lock.c +++ b/kmod/src/lock.c @@ -654,6 +654,33 @@ int scoutfs_lock_grant_response(struct super_block *sb, return 0; } +/* + * The lock request we sent to the server was dropped before we could + * receive a grant response. This happens when the client reconnects to + * a new server and completes pending requests with an error, since the + * old server's pending-request state was reclaimed at fence time. + * + * Clear request_pending so that a waiter in lock_key_range re-evaluates + * and sends a fresh request to the new server, and symmetrically put + * the lock so shrink's lru state matches the grant_response path. + */ +void scoutfs_lock_request_failed(struct super_block *sb, + struct scoutfs_lock *lock) +{ + DECLARE_LOCK_INFO(sb, linfo); + + scoutfs_inc_counter(sb, lock_request_failed); + + spin_lock(&linfo->lock); + + BUG_ON(!lock->request_pending); + lock->request_pending = 0; + wake_up(&lock->waitq); + put_lock(linfo, lock); + + spin_unlock(&linfo->lock); +} + struct inv_req { struct list_head head; struct scoutfs_lock *lock; @@ -936,7 +963,7 @@ static bool try_shrink_lock(struct super_block *sb, struct lock_info *linfo, boo spin_unlock(&linfo->lock); if (lock) { - ret = scoutfs_client_lock_request(sb, &nl); + ret = scoutfs_client_lock_request(sb, &nl, lock); if (ret < 0) { scoutfs_inc_counter(sb, lock_shrink_request_failed); @@ -1077,7 +1104,7 @@ static int lock_key_range(struct super_block *sb, enum scoutfs_lock_mode mode, i nl.old_mode = lock->mode; nl.new_mode = mode; - ret = scoutfs_client_lock_request(sb, &nl); + ret = scoutfs_client_lock_request(sb, &nl, lock); if (ret) { spin_lock(&linfo->lock); lock->request_pending = 0; diff --git a/kmod/src/lock.h b/kmod/src/lock.h index 07908d62..c9ebc1ad 100644 --- a/kmod/src/lock.h +++ b/kmod/src/lock.h @@ -60,6 +60,8 @@ struct scoutfs_lock_coverage { int scoutfs_lock_grant_response(struct super_block *sb, struct scoutfs_net_lock *nl); +void scoutfs_lock_request_failed(struct super_block *sb, + struct scoutfs_lock *lock); int scoutfs_lock_invalidate_request(struct super_block *sb, u64 net_id, struct scoutfs_net_lock *nl); int scoutfs_lock_recover_request(struct super_block *sb, u64 net_id, diff --git a/kmod/src/net.c b/kmod/src/net.c index 11ef1dab..e1c42a3b 100644 --- a/kmod/src/net.c +++ b/kmod/src/net.c @@ -1750,8 +1750,10 @@ void scoutfs_net_client_greeting(struct super_block *sb, bool new_server) { struct net_info *ninf = SCOUTFS_SB(sb)->net_info; + scoutfs_net_response_t resp_func; struct message_send *msend; struct message_send *tmp; + void *resp_data; /* only called on client connections :/ */ BUG_ON(conn->listening_conn); @@ -1760,10 +1762,32 @@ void scoutfs_net_client_greeting(struct super_block *sb, if (new_server) { atomic64_set(&conn->recv_seq, 0); + + /* drop stale responses; old server's state is gone */ list_for_each_entry_safe(msend, tmp, &conn->resend_queue, head){ if (nh_is_response(&msend->nh)) free_msend(ninf, conn, msend); } + + /* + * Complete pending requests with -ECONNRESET. Any state + * they depended on in the old server was reclaimed at + * fence time, so resending is wrong. Callers re-issue on + * the new server if they still care. + */ + while ((msend = list_first_entry_or_null(&conn->resend_queue, + struct message_send, head))) { + if (nh_is_response(&msend->nh)) + break; + resp_func = msend->resp_func; + resp_data = msend->resp_data; + free_msend(ninf, conn, msend); + spin_unlock(&conn->lock); + + call_resp_func(sb, conn, resp_func, resp_data, NULL, 0, -ECONNRESET); + + spin_lock(&conn->lock); + } } set_valid_greeting(conn);