Bound RPC waits in idempotent background workers

The srch compact and orphan scan workers called sync request RPCs that would block indefinitely if the server stopped answering. Both workers are idempotent and reschedule on error, so blocking forever buys nothing compared to treating a stalled RPC as a failure and trying again on the next tick. Add scoutfs_net_sync_request_timeout, a bounded-wait variant that returns -ETIMEDOUT if the response doesn't arrive in time. Response state lives on a refcounted heap allocation rather than the caller's stack so a late callback can't scribble into freed memory. On timeout we race with an arriving response for the msend under conn->lock: if find_request wins we queue_dead_free and drop the callback's ref; otherwise we wait for the in-flight callback to complete before returning. Add _timeout typed wrappers for the four RPCs these workers use and thread a 5 minute bound in from each worker. All other callers keep the unbounded client_sync_request path with its reconnect retries. Signed-off-by: Auke Kok <auke.kok@versity.com>
2026-04-26 08:10:30 +00:00 · 2026-04-17 11:48:31 -07:00
parent fe43c624aa
commit 64200ed61c
7 changed files with 212 additions and 4 deletions
--- a/kmod/src/client.c
+++ b/kmod/src/client.c
@@ -144,6 +144,23 @@ int scoutfs_client_get_roots(struct super_block *sb,
 				   NULL, 0, roots, sizeof(*roots));
 }

+/*
+ * Bounded-wait get_roots for the orphan scan worker.  The worker
+ * reschedules on error, so -ETIMEDOUT is treated like any other RPC
+ * failure and retries on the next scan.
+ */
+int scoutfs_client_get_roots_timeout(struct super_block *sb,
+				     struct scoutfs_net_roots *roots,
+				     unsigned long timeout_jiffies)
+{
+	struct client_info *client = SCOUTFS_SB(sb)->client_info;
+
+	return scoutfs_net_sync_request_timeout(sb, client->conn,
+						SCOUTFS_NET_CMD_GET_ROOTS,
+						NULL, 0, roots, sizeof(*roots),
+						timeout_jiffies);
+}
+
 int scoutfs_client_get_last_seq(struct super_block *sb, u64 *seq)
 {
 	struct client_info *client = SCOUTFS_SB(sb)->client_info;
@@ -229,6 +246,23 @@ int scoutfs_client_srch_get_compact(struct super_block *sb,
 				   NULL, 0, sc, sizeof(*sc));
 }

+/*
+ * Bounded-wait get_compact for the srch compact worker.  The worker
+ * reschedules on any error and the compact work is idempotent, so
+ * -ETIMEDOUT just defers this round.
+ */
+int scoutfs_client_srch_get_compact_timeout(struct super_block *sb,
+					    struct scoutfs_srch_compact *sc,
+					    unsigned long timeout_jiffies)
+{
+	struct client_info *client = SCOUTFS_SB(sb)->client_info;
+
+	return scoutfs_net_sync_request_timeout(sb, client->conn,
+						SCOUTFS_NET_CMD_SRCH_GET_COMPACT,
+						NULL, 0, sc, sizeof(*sc),
+						timeout_jiffies);
+}
+
 /* Commit the result of a srch file compaction. */
 int scoutfs_client_srch_commit_compact(struct super_block *sb,
 				       struct scoutfs_srch_compact *res)
@@ -240,6 +274,24 @@ int scoutfs_client_srch_commit_compact(struct super_block *sb,
 				   res, sizeof(*res), NULL, 0);
 }

+/*
+ * Bounded-wait commit_compact for the srch compact worker.  The server
+ * ignores partial work flagged with ERROR, so a timed-out commit
+ * (marked ERROR on this side) lets the server reclaim our allocators
+ * and reassign the compact on the next scheduled attempt.
+ */
+int scoutfs_client_srch_commit_compact_timeout(struct super_block *sb,
+					       struct scoutfs_srch_compact *res,
+					       unsigned long timeout_jiffies)
+{
+	struct client_info *client = SCOUTFS_SB(sb)->client_info;
+
+	return scoutfs_net_sync_request_timeout(sb, client->conn,
+						SCOUTFS_NET_CMD_SRCH_COMMIT_COMPACT,
+						res, sizeof(*res), NULL, 0,
+						timeout_jiffies);
+}
+
 int scoutfs_client_get_log_merge(struct super_block *sb,
 				 struct scoutfs_log_merge_request *req)
 {
@@ -293,6 +345,28 @@ int scoutfs_client_open_ino_map(struct super_block *sb, u64 group_nr,
 				   &args, sizeof(args), map, sizeof(*map));
 }

+/*
+ * Bounded-wait open_ino_map for the orphan scan worker.  The scan
+ * reschedules on error; the delete path callers keep the unbounded
+ * retry.
+ */
+int scoutfs_client_open_ino_map_timeout(struct super_block *sb, u64 group_nr,
+					struct scoutfs_open_ino_map *map,
+					unsigned long timeout_jiffies)
+{
+	struct client_info *client = SCOUTFS_SB(sb)->client_info;
+	struct scoutfs_open_ino_map_args args = {
+		.group_nr = cpu_to_le64(group_nr),
+		.req_id = 0,
+	};
+
+	return scoutfs_net_sync_request_timeout(sb, client->conn,
+						SCOUTFS_NET_CMD_OPEN_INO_MAP,
+						&args, sizeof(args),
+						map, sizeof(*map),
+						timeout_jiffies);
+}
+
 /* The client is asking the server for the current volume options */
 int scoutfs_client_get_volopt(struct super_block *sb, struct scoutfs_volume_options *volopt)
 {
--- a/kmod/src/client.h
+++ b/kmod/src/client.h
@@ -9,6 +9,9 @@ int scoutfs_client_commit_log_trees(struct super_block *sb,
 				    struct scoutfs_log_trees *lt);
 int scoutfs_client_get_roots(struct super_block *sb,
 			     struct scoutfs_net_roots *roots);
+int scoutfs_client_get_roots_timeout(struct super_block *sb,
+				     struct scoutfs_net_roots *roots,
+				     unsigned long timeout_jiffies);
 u64 *scoutfs_client_bulk_alloc(struct super_block *sb);
 int scoutfs_client_get_last_seq(struct super_block *sb, u64 *seq);
 int scoutfs_client_lock_request(struct super_block *sb,
@@ -20,8 +23,14 @@ int scoutfs_client_lock_recover_response(struct super_block *sb, u64 net_id,
 					 struct scoutfs_net_lock_recover *nlr);
 int scoutfs_client_srch_get_compact(struct super_block *sb,
 				    struct scoutfs_srch_compact *sc);
+int scoutfs_client_srch_get_compact_timeout(struct super_block *sb,
+					    struct scoutfs_srch_compact *sc,
+					    unsigned long timeout_jiffies);
 int scoutfs_client_srch_commit_compact(struct super_block *sb,
 				       struct scoutfs_srch_compact *res);
+int scoutfs_client_srch_commit_compact_timeout(struct super_block *sb,
+					       struct scoutfs_srch_compact *res,
+					       unsigned long timeout_jiffies);
 int scoutfs_client_get_log_merge(struct super_block *sb,
 				 struct scoutfs_log_merge_request *req);
 int scoutfs_client_commit_log_merge(struct super_block *sb,
@@ -30,6 +39,9 @@ int scoutfs_client_send_omap_response(struct super_block *sb, u64 id,
 				      struct scoutfs_open_ino_map *map);
 int scoutfs_client_open_ino_map(struct super_block *sb, u64 group_nr,
 				struct scoutfs_open_ino_map *map);
+int scoutfs_client_open_ino_map_timeout(struct super_block *sb, u64 group_nr,
+					struct scoutfs_open_ino_map *map,
+					unsigned long timeout_jiffies);
 int scoutfs_client_get_volopt(struct super_block *sb, struct scoutfs_volume_options *volopt);
 int scoutfs_client_set_volopt(struct super_block *sb, struct scoutfs_volume_options *volopt);
 int scoutfs_client_clear_volopt(struct super_block *sb, struct scoutfs_volume_options *volopt);
--- a/kmod/src/counters.h
+++ b/kmod/src/counters.h
@@ -62,6 +62,7 @@
 	EXPAND_COUNTER(btree_walk)				\
 	EXPAND_COUNTER(btree_walk_restart)			\
 	EXPAND_COUNTER(client_farewell_error)			\
+	EXPAND_COUNTER(client_rpc_timeout)			\
 	EXPAND_COUNTER(corrupt_btree_block_level)		\
 	EXPAND_COUNTER(corrupt_btree_no_child_ref)		\
 	EXPAND_COUNTER(corrupt_dirent_backref_name_len)		\
--- a/kmod/src/inode.c
+++ b/kmod/src/inode.c
@@ -2074,6 +2074,14 @@ void scoutfs_inode_schedule_orphan_dwork(struct super_block *sb)
 	}
 }

+/*
+ * Generous per-RPC bound for the idempotent orphan scan worker.  A
+ * server that hasn't answered in this long is assumed to be broken;
+ * dropping the request lets the scan reschedule instead of blocking
+ * forever.
+ */
+#define ORPHAN_SCAN_RPC_TIMEOUT (5 * 60 * HZ)
+
 /*
 * Find and delete inodes whose only remaining reference is the
 * persistent orphan item that was created as they were unlinked.
@@ -2128,7 +2136,7 @@ static void inode_orphan_scan_worker(struct work_struct *work)
 	init_orphan_key(&last, U64_MAX);
 	omap.args.group_nr = cpu_to_le64(U64_MAX);

-	ret = scoutfs_client_get_roots(sb, &roots);
+	ret = scoutfs_client_get_roots_timeout(sb, &roots, ORPHAN_SCAN_RPC_TIMEOUT);
 	if (ret)
 		goto out;

@@ -2169,7 +2177,8 @@ static void inode_orphan_scan_worker(struct work_struct *work)
 		scoutfs_omap_calc_group_nrs(ino, &group_nr, &bit_nr);

 		if (le64_to_cpu(omap.args.group_nr) != group_nr) {
-			ret = scoutfs_client_open_ino_map(sb, group_nr, &omap);
+			ret = scoutfs_client_open_ino_map_timeout(sb, group_nr, &omap,
+								  ORPHAN_SCAN_RPC_TIMEOUT);
 			if (ret < 0)
 				goto out;
 		}
--- a/kmod/src/net.c
+++ b/kmod/src/net.c
@@ -2060,6 +2060,104 @@ int scoutfs_net_sync_request(struct super_block *sb,
 	return ret;
 }

+/*
+ * A bounded-wait variant of sync_request for idempotent background
+ * workers that must reschedule instead of blocking indefinitely on an
+ * unresponsive server.  Returns -ETIMEDOUT if the response doesn't
+ * arrive within timeout_jiffies; the caller then treats it like any
+ * other RPC failure and retries on its normal reschedule cadence.
+ *
+ * Response state lives in a refcounted heap allocation rather than on
+ * the caller's stack so a late callback can't scribble into freed
+ * memory if we give up waiting.  On timeout we race with an arriving
+ * response for the msend: if find_request wins we queue_dead_free and
+ * the callback won't fire (we drop its ref); otherwise the callback is
+ * already running so we wait for it to complete before returning.
+ */
+struct bounded_sync {
+	struct completion comp;
+	void *resp;
+	unsigned int resp_len;
+	int error;
+	atomic_t refs;
+};
+
+static void bounded_sync_put(struct bounded_sync *bs)
+{
+	if (atomic_dec_and_test(&bs->refs))
+		kfree(bs);
+}
+
+static int bounded_sync_response(struct super_block *sb,
+				 struct scoutfs_net_connection *conn,
+				 void *resp, unsigned int resp_len,
+				 int error, void *data)
+{
+	struct bounded_sync *bs = data;
+
+	if (error == 0 && resp_len != bs->resp_len)
+		error = -EMSGSIZE;
+
+	if (error)
+		bs->error = error;
+	else if (resp_len)
+		memcpy(bs->resp, resp, resp_len);
+
+	complete(&bs->comp);
+	bounded_sync_put(bs);
+	return 0;
+}
+
+int scoutfs_net_sync_request_timeout(struct super_block *sb,
+				     struct scoutfs_net_connection *conn,
+				     u8 cmd, void *arg, unsigned arg_len,
+				     void *resp, size_t resp_len,
+				     unsigned long timeout_jiffies)
+{
+	struct message_send *msend;
+	struct bounded_sync *bs;
+	int ret;
+	u64 id;
+
+	bs = kzalloc(sizeof(*bs), GFP_NOFS);
+	if (!bs)
+		return -ENOMEM;
+	init_completion(&bs->comp);
+	bs->resp = resp;
+	bs->resp_len = resp_len;
+	bs->error = 0;
+	atomic_set(&bs->refs, 2);
+
+	ret = scoutfs_net_submit_request(sb, conn, cmd, arg, arg_len,
+					 bounded_sync_response, bs, &id);
+	if (ret) {
+		bounded_sync_put(bs);
+		bounded_sync_put(bs);
+		return ret;
+	}
+
+	if (wait_for_completion_timeout(&bs->comp, timeout_jiffies) == 0) {
+		scoutfs_inc_counter(sb, client_rpc_timeout);
+
+		spin_lock(&conn->lock);
+		msend = find_request(conn, cmd, id);
+		if (msend)
+			queue_dead_free(conn, msend);
+		spin_unlock(&conn->lock);
+
+		if (msend)
+			bounded_sync_put(bs);
+		else
+			wait_for_completion(&bs->comp);
+		ret = -ETIMEDOUT;
+	} else {
+		ret = bs->error;
+	}
+
+	bounded_sync_put(bs);
+	return ret;
+}
+
 static void net_tseq_show_conn(struct seq_file *m,
 			      struct scoutfs_tseq_entry *ent)
 {
--- a/kmod/src/net.h
+++ b/kmod/src/net.h
@@ -150,6 +150,11 @@ int scoutfs_net_sync_request(struct super_block *sb,
 			     struct scoutfs_net_connection *conn,
 			     u8 cmd, void *arg, unsigned arg_len,
 			     void *resp, size_t resp_len);
+int scoutfs_net_sync_request_timeout(struct super_block *sb,
+				     struct scoutfs_net_connection *conn,
+				     u8 cmd, void *arg, unsigned arg_len,
+				     void *resp, size_t resp_len,
+				     unsigned long timeout_jiffies);
 int scoutfs_net_response(struct super_block *sb,
 			 struct scoutfs_net_connection *conn,
 			 u8 cmd, u64 id, int error, void *resp, u16 resp_len);
--- a/kmod/src/srch.c
+++ b/kmod/src/srch.c
@@ -95,6 +95,13 @@ struct srch_info {
 */
 #define SRCH_COMPACT_DIRTY_LIMIT_BYTES (32 * 1024 * 1024)

+/*
+ * Generous per-RPC bound for the idempotent compact worker.  A server
+ * that hasn't answered in this long is assumed to be broken; dropping
+ * the request lets the worker reschedule instead of blocking forever.
+ */
+#define COMPACT_RPC_TIMEOUT (5 * 60 * HZ)
+
 static int sre_cmp(const struct scoutfs_srch_entry *a,
 		   const struct scoutfs_srch_entry *b)
 {
@@ -2256,7 +2263,8 @@ static void scoutfs_srch_compact_worker(struct work_struct *work)

 	scoutfs_block_writer_init(sb, &wri);

-	ret = scoutfs_client_srch_get_compact(sb, sc);
+	ret = scoutfs_client_srch_get_compact_timeout(sb, sc,
+						      COMPACT_RPC_TIMEOUT);
 	if (ret >= 0)
 		trace_scoutfs_srch_compact_client_recv(sb, sc);
 	if (ret < 0 || sc->nr == 0)
@@ -2287,7 +2295,8 @@ static void scoutfs_srch_compact_worker(struct work_struct *work)
 	sc->flags |= ret < 0 ? SCOUTFS_SRCH_COMPACT_FLAG_ERROR : 0;

 	trace_scoutfs_srch_compact_client_send(sb, sc);
-	err = scoutfs_client_srch_commit_compact(sb, sc);
+	err = scoutfs_client_srch_commit_compact_timeout(sb, sc,
+							 COMPACT_RPC_TIMEOUT);
 	if (err < 0 && ret == 0)
 		ret = err;
 out: