Fix commit budget calculation with multiple holders

The try_drain_data_freed() path was generating errors about overrunning its commit budget: scoutfs f.2b8928.r.02689f error: 1 holders exceeded alloc budget av: bef 8185 now 8036, fr: bef 8185 now 7602 The budget overrun check was using the current number of commit holders (in this case one) instead of the the maximum number of concurrent holders (in this case two). So even well behaved paths like try_drain_data_freed() can appear to exceed their commit budget if other holders dirty some blocks and apply their commits before the try_drain_data_freed() thread does its final budget reconciliation. Signed-off-by: Chris Kirby <ckirby@versity.com>
Fix dirtied block calculation in extent_mod_blocks()
2026-05-03 11:25:43 +00:00 · 2025-06-17 11:38:07 -05:00 · 2025-06-17 11:38:07 -05:00
17 changed files with 95 additions and 297 deletions
--- a/kmod/src/block.c
+++ b/kmod/src/block.c
@@ -712,8 +712,8 @@ retry:

 	ret = 0;
 out:
-	if (!retried && !IS_ERR_OR_NULL(bp) && !block_is_dirty(bp) &&
-	    (ret == -ESTALE || scoutfs_trigger(sb, BLOCK_REMOVE_STALE))) {
+	if ((ret == -ESTALE || scoutfs_trigger(sb, BLOCK_REMOVE_STALE)) &&
+	    !retried && !block_is_dirty(bp)) {
 		retried = true;
 		scoutfs_inc_counter(sb, block_cache_remove_stale);
 		block_remove(sb, bp);
--- a/kmod/src/counters.h
+++ b/kmod/src/counters.h
@@ -90,7 +90,6 @@
 	EXPAND_COUNTER(forest_read_items)			\
 	EXPAND_COUNTER(forest_roots_next_hint)			\
 	EXPAND_COUNTER(forest_set_bloom_bits)			\
-	EXPAND_COUNTER(inode_deleted)				\
 	EXPAND_COUNTER(item_cache_count_objects)		\
 	EXPAND_COUNTER(item_cache_scan_objects)			\
 	EXPAND_COUNTER(item_clear_dirty)			\
@@ -147,8 +146,6 @@
 	EXPAND_COUNTER(lock_unlock)				\
 	EXPAND_COUNTER(lock_wait)				\
 	EXPAND_COUNTER(log_merge_wait_timeout)			\
-	EXPAND_COUNTER(log_merges_completed)			\
-	EXPAND_COUNTER(log_merges_started)			\
 	EXPAND_COUNTER(net_dropped_response)			\
 	EXPAND_COUNTER(net_send_bytes)				\
 	EXPAND_COUNTER(net_send_error)				\
--- a/kmod/src/format.h
+++ b/kmod/src/format.h
@@ -470,7 +470,7 @@ struct scoutfs_srch_compact {
 * @get_trans_seq, @commit_trans_seq: These pair of sequence numbers
 * determine if a transaction is currently open for the mount that owns
 * the log_trees struct.  get_trans_seq is advanced by the server as the
- * transaction is opened.   The server sets commit_trans_seq equal to
+ * transaction is opened.   The server sets comimt_trans_seq equal to
 * get_ as the transaction is committed.
 */
 struct scoutfs_log_trees {
--- a/kmod/src/inode.c
+++ b/kmod/src/inode.c
@@ -1854,9 +1854,6 @@ static int try_delete_inode_items(struct super_block *sb, u64 ino)
 		goto out;

 	ret = delete_inode_items(sb, ino, &sinode, lock, orph_lock);
-	if (ret == 0)
-		scoutfs_inc_counter(sb, inode_deleted);
-
 out:
 	if (clear_trying)
 		clear_bit(bit_nr, ldata->trying);
@@ -1965,8 +1962,6 @@ static void iput_worker(struct work_struct *work)
 		while (count-- > 0)
 			iput(inode);

-		cond_resched();
-
 		/* can't touch inode after final iput */

 		spin_lock(&inf->iput_lock);
@@ -2128,8 +2123,8 @@ static void inode_orphan_scan_worker(struct work_struct *work)
 		}

 		/* seemingly orphaned and unused, get locks and check for sure */
-		ret = try_delete_inode_items(sb, ino);
 		scoutfs_inc_counter(sb, orphan_scan_attempts);
+		ret = try_delete_inode_items(sb, ino);
 	}

 	ret = 0;
--- a/kmod/src/msg.h
+++ b/kmod/src/msg.h
@@ -18,19 +18,6 @@ do {							\
 #define scoutfs_err(sb, fmt, args...) \
 	scoutfs_msg_check(sb, KERN_ERR, " error", fmt, ##args)

-/*
- * This can be used to suppress the expected -EIO error messages that are
- * generated during forced unmount.
- */
-#define ignore_err(sb, error) \
-	((error) == -EIO && unlikely(scoutfs_forcing_unmount(sb)))
-
-#define scoutfs_err_maybe(sb, error, fmt, args...)	\
-do {							\
-	if (!ignore_err(sb, error))			\
-		scoutfs_err(sb, fmt, args);		\
-} while (0)
-
 #define scoutfs_warn(sb, fmt, args...) \
 	scoutfs_msg_check(sb, KERN_WARNING, " warning", fmt, ##args)

--- a/kmod/src/quorum.c
+++ b/kmod/src/quorum.c
@@ -726,8 +726,6 @@ static void scoutfs_quorum_worker(struct work_struct *work)
 	struct quorum_status qst = {0,};
 	struct hb_recording hbr;
 	bool record_hb;
-	bool recv_failed;
-	bool initializing = true;
 	int ret;
 	int err;

@@ -760,8 +758,6 @@ static void scoutfs_quorum_worker(struct work_struct *work)

 		update_show_status(qinf, &qst);

-		recv_failed = false;
-
 		ret = recv_msg(sb, &msg, qst.timeout);
 		if (ret < 0) {
 			if (ret != -ETIMEDOUT && ret != -EAGAIN) {
@@ -769,9 +765,6 @@ static void scoutfs_quorum_worker(struct work_struct *work)
 				scoutfs_inc_counter(sb, quorum_recv_error);
 				goto out;
 			}
-
-			recv_failed = true;
-
 			msg.type = SCOUTFS_QUORUM_MSG_INVALID;
 			ret = 0;
 		}
@@ -829,13 +822,12 @@ static void scoutfs_quorum_worker(struct work_struct *work)

 		/* followers and candidates start new election on timeout */
 		if (qst.role != LEADER &&
-		    (initializing || recv_failed) &&
 		    ktime_after(ktime_get(), qst.timeout)) {
 			/* .. but only if their server has stopped */
 			if (!scoutfs_server_is_down(sb)) {
 				qst.timeout = election_timeout();
 				scoutfs_inc_counter(sb, quorum_candidate_server_stopping);
-				goto again;
+				continue;
 			}

 			qst.role = CANDIDATE;
@@ -972,9 +964,6 @@ static void scoutfs_quorum_worker(struct work_struct *work)
 		}

 		record_hb_delay(sb, qinf, &hbr, record_hb, qst.role);
-
-again:
-		initializing = false;
 	}

 	update_show_status(qinf, &qst);
--- a/kmod/src/scoutfs_trace.h
+++ b/kmod/src/scoutfs_trace.h
@@ -823,14 +823,13 @@ DEFINE_EVENT(scoutfs_lock_info_class, scoutfs_lock_destroy,
 );

 TRACE_EVENT(scoutfs_xattr_set,
-	TP_PROTO(struct super_block *sb, __u64 ino, size_t name_len,
-		 const void *value, size_t size, int flags),
+	TP_PROTO(struct super_block *sb, size_t name_len, const void *value,
+		 size_t size, int flags),

-	TP_ARGS(sb, ino, name_len, value, size, flags),
+	TP_ARGS(sb, name_len, value, size, flags),

 	TP_STRUCT__entry(
 		SCSB_TRACE_FIELDS
-		__field(__u64, ino)
 		__field(size_t, name_len)
 		__field(const void *, value)
 		__field(size_t, size)
@@ -839,16 +838,15 @@ TRACE_EVENT(scoutfs_xattr_set,

 	TP_fast_assign(
 		SCSB_TRACE_ASSIGN(sb);
-		__entry->ino = ino;
 		__entry->name_len = name_len;
 		__entry->value = value;
 		__entry->size = size;
 		__entry->flags = flags;
 	),

-	TP_printk(SCSBF" ino %llu name_len %zu value %p size %zu flags 0x%x",
-		  SCSB_TRACE_ARGS, __entry->ino,  __entry->name_len,
-		  __entry->value, __entry->size, __entry->flags)
+	TP_printk(SCSBF" name_len %zu value %p size %zu flags 0x%x",
+		  SCSB_TRACE_ARGS, __entry->name_len, __entry->value,
+		  __entry->size, __entry->flags)
 );

 TRACE_EVENT(scoutfs_advance_dirty_super,
@@ -2465,27 +2463,6 @@ TRACE_EVENT(scoutfs_block_dirty_ref,
 		  __entry->block_blkno, __entry->block_seq)
 );

-TRACE_EVENT(scoutfs_get_file_block,
-	TP_PROTO(struct super_block *sb, u64 blkno, int flags),
-
-	TP_ARGS(sb, blkno, flags),
-
-	TP_STRUCT__entry(
-		SCSB_TRACE_FIELDS
-		__field(__u64, blkno)
-		__field(int, flags)
-	),
-
-	TP_fast_assign(
-		SCSB_TRACE_ASSIGN(sb);
-		__entry->blkno = blkno;
-		__entry->flags = flags;
-	),
-
-	TP_printk(SCSBF" blkno %llu flags 0x%x",
-		  SCSB_TRACE_ARGS, __entry->blkno, __entry->flags)
-);
-
 TRACE_EVENT(scoutfs_block_stale,
 	TP_PROTO(struct super_block *sb, struct scoutfs_block_ref *ref,
 		 struct scoutfs_block_header *hdr, u32 magic, u32 crc),
@@ -3083,27 +3060,6 @@ DEFINE_EVENT(scoutfs_srch_compact_class, scoutfs_srch_compact_client_recv,
 	TP_ARGS(sb, sc)
 );

-TRACE_EVENT(scoutfs_ioc_search_xattrs,
-	TP_PROTO(struct super_block *sb, u64 ino, u64 last_ino),
-
-	TP_ARGS(sb, ino, last_ino),
-
-	TP_STRUCT__entry(
-		SCSB_TRACE_FIELDS
-		__field(u64, ino)
-		__field(u64, last_ino)
-	),
-
-	TP_fast_assign(
-		SCSB_TRACE_ASSIGN(sb);
-		__entry->ino = ino;
-		__entry->last_ino = last_ino;
-	),
-
-	TP_printk(SCSBF" ino %llu last_ino %llu", SCSB_TRACE_ARGS,
-		  __entry->ino, __entry->last_ino)
-);
-
 #endif /* _TRACE_SCOUTFS_H */

 /* This part must be outside protection */
--- a/kmod/src/server.c
+++ b/kmod/src/server.c
@@ -1040,101 +1040,6 @@ static int next_log_merge_item(struct super_block *sb,
 	return next_log_merge_item_key(sb, root, zone, &key, val, val_len);
 }

-static int do_finalize_ours(struct super_block *sb,
-			    struct scoutfs_log_trees *lt,
-			    struct commit_hold *hold)
-{
-	struct server_info *server = SCOUTFS_SB(sb)->server_info;
-	struct scoutfs_super_block *super = DIRTY_SUPER_SB(sb);
-	struct scoutfs_key key;
-	char *err_str = NULL;
-	u64 rid = le64_to_cpu(lt->rid);
-	bool more;
-	int ret;
-	int err;
-
-	mutex_lock(&server->srch_mutex);
-	ret = scoutfs_srch_rotate_log(sb, &server->alloc, &server->wri,
-				      &super->srch_root, &lt->srch_file, true);
-	mutex_unlock(&server->srch_mutex);
-	if (ret < 0) {
-		scoutfs_err(sb, "error rotating srch log for rid %016llx: %d",
-			    rid, ret);
-		return ret;
-        }
-
-	do {
-		more = false;
-
-		/*
-		 * All of these can return errors, perhaps indicating successful
-		 * partial progress, after having modified the allocator trees.
-		 * We always have to update the roots in the log item.
-		 */
-		mutex_lock(&server->alloc_mutex);
-		ret = (err_str = "splice meta_freed to other_freed",
-				scoutfs_alloc_splice_list(sb, &server->alloc,
-					&server->wri, server->other_freed,
-					&lt->meta_freed)) ?:
-			(err_str = "splice meta_avail",
-			 scoutfs_alloc_splice_list(sb, &server->alloc,
-					&server->wri, server->other_freed,
-					&lt->meta_avail)) ?:
-			(err_str = "empty data_avail",
-			 alloc_move_empty(sb, &super->data_alloc,
-					  &lt->data_avail,
-					  COMMIT_HOLD_ALLOC_BUDGET / 2)) ?:
-			(err_str = "empty data_freed",
-			 alloc_move_empty(sb, &super->data_alloc,
-					  &lt->data_freed,
-					  COMMIT_HOLD_ALLOC_BUDGET / 2));
-		mutex_unlock(&server->alloc_mutex);
-
-		/*
-		 * only finalize, allowing merging, once the allocators are
-		 * fully freed
-		 */
-		if (ret == 0) {
-			/* the transaction is no longer open */
-			le64_add_cpu(&lt->flags, SCOUTFS_LOG_TREES_FINALIZED);
-			lt->finalize_seq = cpu_to_le64(scoutfs_server_next_seq(sb));
-		}
-
-		scoutfs_key_init_log_trees(&key, rid, le64_to_cpu(lt->nr));
-
-		err = scoutfs_btree_update(sb, &server->alloc, &server->wri,
-					   &super->logs_root, &key, lt,
-					   sizeof(*lt));
-		BUG_ON(err != 0); /* alloc, log, srch items out of sync */
-
-		if (ret == -EINPROGRESS) {
-			more = true;
-			mutex_unlock(&server->logs_mutex);
-			ret = server_apply_commit(sb, hold, 0);
-			if (ret < 0)
-				WARN_ON_ONCE(ret < 0);
-			server_hold_commit(sb, hold);
-			mutex_lock(&server->logs_mutex);
-		} else if (ret == 0) {
-			memset(&lt->item_root, 0, sizeof(lt->item_root));
-			memset(&lt->bloom_ref, 0, sizeof(lt->bloom_ref));
-			lt->inode_count_delta = 0;
-			lt->max_item_seq = 0;
-			lt->finalize_seq = 0;
-			le64_add_cpu(&lt->nr, 1);
-			lt->flags = 0;
-		}
-	} while (more);
-
-	if (ret < 0) {
-		scoutfs_err(sb,
-			    "error %d finalizing log trees for rid %016llx: %s",
-			    ret, rid, err_str);
-	}
-
-	return ret;
-}
-
 /*
 * Finalizing the log btrees for merging needs to be done carefully so
 * that items don't appear to go backwards in time.
@@ -1186,6 +1091,7 @@ static int finalize_and_start_log_merge(struct super_block *sb, struct scoutfs_l
 	struct scoutfs_log_merge_range rng;
 	struct scoutfs_mount_options opts;
 	struct scoutfs_log_trees each_lt;
+	struct scoutfs_log_trees fin;
 	unsigned int delay_ms;
 	unsigned long timeo;
 	bool saw_finalized;
@@ -1214,8 +1120,6 @@ static int finalize_and_start_log_merge(struct super_block *sb, struct scoutfs_l
 			break;
 		}

-		scoutfs_inc_counter(sb, log_merges_started);
-
 		/* look for finalized and other active log btrees */
 		saw_finalized = false;
 		others_active = false;
@@ -1292,11 +1196,32 @@ static int finalize_and_start_log_merge(struct super_block *sb, struct scoutfs_l

 		/* Finalize ours if it's visible to others */
 		if (ours_visible) {
-			ret = do_finalize_ours(sb, lt, hold);
+			fin = *lt;
+			memset(&fin.meta_avail, 0, sizeof(fin.meta_avail));
+			memset(&fin.meta_freed, 0, sizeof(fin.meta_freed));
+			memset(&fin.data_avail, 0, sizeof(fin.data_avail));
+			memset(&fin.data_freed, 0, sizeof(fin.data_freed));
+			memset(&fin.srch_file, 0, sizeof(fin.srch_file));
+			le64_add_cpu(&fin.flags, SCOUTFS_LOG_TREES_FINALIZED);
+			fin.finalize_seq = cpu_to_le64(scoutfs_server_next_seq(sb));
+
+			scoutfs_key_init_log_trees(&key, le64_to_cpu(fin.rid),
+						   le64_to_cpu(fin.nr));
+			ret = scoutfs_btree_update(sb, &server->alloc, &server->wri,
+						   &super->logs_root, &key, &fin,
+						   sizeof(fin));
 			if (ret < 0) {
-				err_str = "finalizing ours";
+				err_str = "updating finalized log_trees";
 				break;
 			}
+
+			memset(&lt->item_root, 0, sizeof(lt->item_root));
+			memset(&lt->bloom_ref, 0, sizeof(lt->bloom_ref));
+			lt->inode_count_delta = 0;
+			lt->max_item_seq = 0;
+			lt->finalize_seq = 0;
+			le64_add_cpu(&lt->nr, 1);
+			lt->flags = 0;
 		}

 		/* wait a bit for mounts to arrive */
@@ -1358,7 +1283,6 @@ static int finalize_and_start_log_merge(struct super_block *sb, struct scoutfs_l
 		}

 		/* we're done, caller can make forward progress */
-		scoutfs_inc_counter(sb, log_merges_completed);
 		break;
 	}

@@ -1650,11 +1574,9 @@ unlock:

 	ret = server_apply_commit(sb, &hold, ret);
 out:
-	if (ret < 0) {
-		scoutfs_err_maybe(sb, ret,
-			"error %d getting log trees for rid %016llx: %s",
-			ret, rid, err_str);
-	}
+	if (ret < 0)
+		scoutfs_err(sb, "error %d getting log trees for rid %016llx: %s",
+			    ret, rid, err_str);

 	/* try to drain excessive data_freed with additional commits, if needed */
 	if (ret == 0)
@@ -1757,14 +1679,11 @@ unlock:
 	mutex_unlock(&server->logs_mutex);

 	ret = server_apply_commit(sb, &hold, ret);
-	if (ret < 0) {
-		scoutfs_err_maybe(sb, ret,
-				  "server error %d committing client logs for rid %016llx, nr %llu: %s",
-				  ret, rid, le64_to_cpu(lt.nr), err_str);
-	}
-
+	if (ret < 0)
+		scoutfs_err(sb, "server error %d committing client logs for rid %016llx: %s",
+			    ret, rid, err_str);
 out:
-	WARN_ON_ONCE((ret < 0) && !ignore_err(sb, ret));
+	WARN_ON_ONCE(ret < 0);
 	return scoutfs_net_response(sb, conn, cmd, id, ret, NULL, 0);
 }

@@ -1799,7 +1718,7 @@ static int server_get_roots(struct super_block *sb,
 * all the allocator items.
 *
 * The caller holds the commit rwsem which means we have to do our work
- * in one commit.  The allocator btrees can be very large and very
+ * in one commit.  The alocator btrees can be very large and very
 * fragmented.  We return -EINPROGRESS if we couldn't fully reclaim the
 * allocators in one commit.   The caller should apply the current
 * commit and call again in a new commit.
@@ -1825,8 +1744,6 @@ static int reclaim_open_log_tree(struct super_block *sb, u64 rid)

 	mutex_lock(&server->logs_mutex);

-	scoutfs_inc_counter(sb, log_merges_started);
-
 	/* find the client's last open log_tree */
 	scoutfs_key_init_log_trees(&key, rid, U64_MAX);
 	ret = scoutfs_btree_prev(sb, &super->logs_root, &key, &iref);
@@ -1896,7 +1813,6 @@ static int reclaim_open_log_tree(struct super_block *sb, u64 rid)
 				  &super->logs_root, &key, &lt, sizeof(lt));
 	BUG_ON(err != 0); /* alloc, log, srch items out of sync */

-	scoutfs_inc_counter(sb, log_merges_completed);
 out:
 	mutex_unlock(&server->logs_mutex);

@@ -2101,9 +2017,7 @@ static int server_srch_get_compact(struct super_block *sb,

 apply:
 	ret = server_apply_commit(sb, &hold, ret);
-
-	/* XXX leaked busy item */
-	WARN_ON_ONCE(ret < 0 && ret != -ENOENT && !ignore_err(sb, ret));
+	WARN_ON_ONCE(ret < 0 && ret != -ENOENT); /* XXX leaked busy item */
 out:
 	ret = scoutfs_net_response(sb, conn, cmd, id, ret,
 				   sc, sizeof(struct scoutfs_srch_compact));
@@ -2619,7 +2533,7 @@ static void server_log_merge_free_work(struct work_struct *work)

 		ret = scoutfs_btree_free_blocks(sb, &server->alloc,
 						&server->wri, &fr.key,
-						&fr.root, COMMIT_HOLD_ALLOC_BUDGET / 8);
+						&fr.root, COMMIT_HOLD_ALLOC_BUDGET / 2);
 		if (ret < 0) {
 			err_str = "freeing log btree";
 			break;
@@ -2638,7 +2552,7 @@ static void server_log_merge_free_work(struct work_struct *work)
 		/* freed blocks are in allocator, we *have* to update fr */
 		BUG_ON(ret < 0);

-		if (server_hold_alloc_used_since(sb, &hold) >= (COMMIT_HOLD_ALLOC_BUDGET * 3) / 4) {
+		if (server_hold_alloc_used_since(sb, &hold) >= COMMIT_HOLD_ALLOC_BUDGET / 2) {
 			mutex_unlock(&server->logs_mutex);
 			ret = server_apply_commit(sb, &hold, ret);
 			commit = false;
@@ -2947,10 +2861,9 @@ out:
 		mutex_unlock(&server->alloc_mutex);
 		BUG_ON(err); /* inconsistent */

-		if (ret < 0 && ret != -ENOENT) {
-			scoutfs_err_maybe(sb, ret, "error %d getting merge req rid %016llx: %s",
-					  ret, rid, err_str);
-		}
+		if (ret < 0 && ret != -ENOENT)
+			scoutfs_err(sb, "error %d getting merge req rid %016llx: %s",
+				    ret, rid, err_str);
 	}

 	mutex_unlock(&server->logs_mutex);
--- a/kmod/src/srch.c
+++ b/kmod/src/srch.c
@@ -62,7 +62,7 @@
 * re-allocated and re-written.  Search can restart by checking the
 * btree for the current set of files.  Compaction reads log files which
 * are protected from other compactions by the persistent busy items
- * created by the server.  Compaction won't see its blocks reused out
+ * created by the server.  Compaction won't see it's blocks reused out
 * from under it, but it can encounter stale cached blocks that need to
 * be invalidated.
 */
@@ -442,10 +442,6 @@ out:
 	if (ret == 0 && (flags & GFB_INSERT) && blk >= le64_to_cpu(sfl->blocks))
 		sfl->blocks = cpu_to_le64(blk + 1);

-	if (bl) {
-		trace_scoutfs_get_file_block(sb, bl->blkno, flags);
-	}
-
 	*bl_ret = bl;
 	return ret;
 }
@@ -753,14 +749,14 @@ static int search_log_file(struct super_block *sb,
 		for (i = 0; i < le32_to_cpu(srb->entry_nr); i++) {
 			if (pos > SCOUTFS_SRCH_BLOCK_SAFE_BYTES) {
 				/* can only be inconsistency :/ */
-				ret = -EIO;
+				ret = EIO;
 				break;
 			}

 			ret = decode_entry(srb->entries + pos, &sre, &prev);
 			if (ret <= 0) {
 				/* can only be inconsistency :/ */
-				ret = -EIO;
+				ret = EIO;
 				break;
 			}
 			pos += ret;
@@ -863,14 +859,14 @@ static int search_sorted_file(struct super_block *sb,

 		if (pos > SCOUTFS_SRCH_BLOCK_SAFE_BYTES) {
 			/* can only be inconsistency :/ */
-			ret = -EIO;
+			ret = EIO;
 			break;
 		}

 		ret = decode_entry(srb->entries + pos, &sre, &prev);
 		if (ret <= 0) {
 			/* can only be inconsistency :/ */
-			ret = -EIO;
+			ret = EIO;
 			break;
 		}
 		pos += ret;
@@ -976,8 +972,6 @@ int scoutfs_srch_search_xattrs(struct super_block *sb,

 	scoutfs_inc_counter(sb, srch_search_xattrs);

-	trace_scoutfs_ioc_search_xattrs(sb, ino, last_ino);
-
 	*done = false;
 	srch_init_rb_root(sroot);

@@ -1808,7 +1802,7 @@ static void swap_page_sre(void *A, void *B, int size)
 * typically, ~10x worst case).
 *
 * Because we read and sort all the input files we must perform the full
- * compaction in one operation.  The server must have given us
+ * compaction in one operation.  The server must have given us a
 * sufficiently large avail/freed lists, otherwise we'll return ENOSPC.
 */
 static int compact_logs(struct super_block *sb,
@@ -1872,14 +1866,14 @@ static int compact_logs(struct super_block *sb,

 		if (pos > SCOUTFS_SRCH_BLOCK_SAFE_BYTES) {
 			/* can only be inconsistency :/ */
-			ret = -EIO;
+			ret = EIO;
 			break;
 		}

 		ret = decode_entry(srb->entries + pos, sre, &prev);
 		if (ret <= 0) {
 			/* can only be inconsistency :/ */
-			ret = -EIO;
+			ret = EIO;
 			goto out;
 		}
 		prev = *sre;
--- a/kmod/src/xattr.c
+++ b/kmod/src/xattr.c
@@ -742,7 +742,7 @@ int scoutfs_xattr_set_locked(struct inode *inode, const char *name, size_t name_
 	int ret;
 	int err;

-	trace_scoutfs_xattr_set(sb, ino, name_len, value, size, flags);
+	trace_scoutfs_xattr_set(sb, name_len, value, size, flags);

 	if (WARN_ON_ONCE(tgs->totl && tgs->indx) ||
 	    WARN_ON_ONCE((tgs->totl | tgs->indx) && !tag_lock))
--- a/tests/golden/large-fragmented-free
+++ b/tests/golden/large-fragmented-free
@@ -1,3 +1,4 @@
+== setting longer hung task timeout
 == creating fragmented extents
 == unlink file with moved extents to free extents per block
 == cleanup
--- a/tests/golden/offline-extent-waiting
+++ b/tests/golden/offline-extent-waiting
@@ -49,7 +49,7 @@ offline wating should be empty:
 0
 == truncating does wait
 truncate should be waiting for first block:
-truncate should no longer be waiting:
+trunate should no longer be waiting:
 0
 == writing waits
 should be waiting for write
--- a/tests/tests/format-version-forward-back.sh
+++ b/tests/tests/format-version-forward-back.sh
@@ -11,8 +11,8 @@
 # format version.
 #

-# not supported on el8 or higher
-if [ $(source /etc/os-release ; echo ${VERSION_ID:0:1}) -gt 7 ]; then
+# not supported on el9!
+if [ $(source /etc/os-release ; echo ${VERSION_ID:0:1}) -gt 8 ]; then
 	t_skip_permitted "Unsupported OS version"
 fi

--- a/tests/tests/large-fragmented-free.sh
+++ b/tests/tests/large-fragmented-free.sh
@@ -10,6 +10,30 @@ EXTENTS_PER_BTREE_BLOCK=600
 EXTENTS_PER_LIST_BLOCK=8192
 FREED_EXTENTS=$((EXTENTS_PER_BTREE_BLOCK * EXTENTS_PER_LIST_BLOCK))

+#
+# This test specifically creates a pathologically sparse file that will
+# be as expensive as possible to free.  This is usually fine on
+# dedicated or reasonable hardware, but trying to run this in
+# virtualized debug kernels can take a very long time.  This test is
+# about making sure that the server doesn't fail, not that the platform
+# can handle the scale of work that our btree formats happen to require
+# while execution is bogged down with use-after-free memory reference
+# tracking.  So we give the test a lot more breathing room before
+# deciding that its hung.
+#
+echo "== setting longer hung task timeout"
+if [ -w /proc/sys/kernel/hung_task_timeout_secs ]; then
+	secs=$(cat /proc/sys/kernel/hung_task_timeout_secs)
+	test "$secs" -gt 0 || \
+		t_fail "confusing value '$secs' from /proc/sys/kernel/hung_task_timeout_secs"
+	restore_hung_task_timeout()
+	{
+		echo "$secs" > /proc/sys/kernel/hung_task_timeout_secs
+	}
+	trap restore_hung_task_timeout EXIT
+	echo "$((secs * 5))" > /proc/sys/kernel/hung_task_timeout_secs
+fi
+
 echo "== creating fragmented extents"
 fragmented_data_extents $FREED_EXTENTS $EXTENTS_PER_BTREE_BLOCK "$T_D0/alloc" "$T_D0/move"

--- a/tests/tests/offline-extent-waiting.sh
+++ b/tests/tests/offline-extent-waiting.sh
@@ -157,7 +157,7 @@ echo "truncate should be waiting for first block:"
 expect_wait "$DIR/file" "change_size" $ino 0
 scoutfs stage "$DIR/golden" "$DIR/file" -V "$vers" -o 0 -l $BYTES
 sleep .1
-echo "truncate should no longer be waiting:"
+echo "trunate should no longer be waiting:"
 scoutfs data-waiting -B 0 -I 0 -p "$DIR" | wc -l
 cat "$DIR/golden" > "$DIR/file"
 vers=$(scoutfs stat -s data_version "$DIR/file")
@@ -168,13 +168,10 @@ scoutfs release "$DIR/file" -V "$vers" -o 0 -l $BYTES
 # overwrite, not truncate+write
 dd if="$DIR/other" of="$DIR/file" \
 	bs=$BS count=$BLOCKS conv=notrunc status=none &
-pid="$!"
 sleep .1
 echo "should be waiting for write"
 expect_wait "$DIR/file" "write" $ino 0
 scoutfs stage "$DIR/golden" "$DIR/file" -V "$vers" -o 0 -l $BYTES
-# wait for the background dd to complete
-wait "$pid" 2> /dev/null
 cmp "$DIR/file" "$DIR/other"

 echo "== cleanup"
--- a/tests/tests/orphan-inodes.sh
+++ b/tests/tests/orphan-inodes.sh
@@ -59,10 +59,6 @@ for nr in $(t_fs_nrs); do
 done
 sync
 t_silent_kill $pids
-
-LLMS=$(t_counter log_merges_started $sv)
-LLMC=$(t_counter log_merges_completed $sv)
-
 for nr in $(t_fs_nrs); do
 	t_force_umount $nr
 done
@@ -71,64 +67,18 @@ t_mount_all
 while test -d $(echo /sys/fs/scoutfs/*/fence/* | cut -d " " -f 1); do
 	sleep .5
 done
-
-# wait for the orphan inode cleanup changes to be merged
-S=0
-C=0
-sv=$(t_server_nr)
-
-while sleep 1; do
-	LMS=$(t_counter log_merges_started $sv)
-	LMC=$(t_counter log_merges_completed $sv)
-
-	if [ $LMS != $LLMS ]; then
-		(( S++ ))
-		LLMS=$LMS
-	fi
-	if [ $LMC != $LLMC ]; then
-		(( C++ ))
-		LLMC=$LMC
-	fi
-
-	# If we've completed more than one merge, we're done
-	if [ $C -gt 1 ]; then
-		break
-	fi
-
-	# If we've started more than one merge, we're done
-	if [ $S -gt 1 ]; then
-		break
-	fi
-done
-
 # wait for orphan scans to run
 t_set_all_sysfs_mount_options orphan_scan_delay_ms 1000
-# wait until we see two consecutive orphan scan attempts without
-# any inode deletion forward progress in each mount
-for nr in $(t_fs_nrs); do
-	C=0
-	LOSA=$(t_counter orphan_scan_attempts $nr)
-	LDOP=$(t_counter inode_deleted $nr)
-
-	while [ $C -lt 2 ]; do
-		sleep 1
-
-		OSA=$(t_counter orphan_scan_attempts $nr)
-		DOP=$(t_counter inode_deleted $nr)
-
-		if [ $OSA != $LOSA ]; then
-			if [ $DOP == $LDOP ]; then
-				(( C++ ))
-			else
-				C=0
-			fi
-		fi
-
-		LOSA=$OSA
-		LDOP=$DOP
+# also have to wait for delayed log merge work from mount
+C=120
+while (( C-- )); do
+	brk=1
+	for ino in $inos; do
+		inode_exists $ino && brk=0
 	done
+	test $brk -eq 1 && break
+	sleep 1
 done
-
 for ino in $inos; do
 	inode_exists $ino && echo "$ino still exists"
 done
--- a/utils/sparse.sh
+++ b/utils/sparse.sh
@@ -22,11 +22,6 @@ RE="$RE|warning: memset with byte count of 4194304"
 # some sparse versions don't know about some builtins
 RE="$RE|error: undefined identifier '__builtin_fpclassify'"

-# on el8, sparse can't handle __has_include for some reason when _GNU_SOURCE
-# is defined, and we need that for O_DIRECT.
-RE="$RE|note: in included file .through /usr/include/sys/stat.h.:"
-RE="$RE|/usr/include/bits/statx.h:30:6: error: "
-
 #
 # don't filter out 'too many errors' here, it can signify that
 # sparse doesn't understand something and is throwing a *ton*