Drain conn workers before nulling client->conn in destroy

scoutfs_client_destroy nulled client->conn before scoutfs_net_free_conn had a chance to drain the conn's workqueue. An in-flight proc_worker running client_lock_recover dispatches scoutfs_lock_recover_request synchronously, which in turn calls scoutfs_client_lock_recover_response. That helper reads client->conn and hands it to scoutfs_net_response, so a racing NULL made submit_send dereference conn->lock and trip a KASAN null-ptr-deref followed by a GPF. Only became reachable in practice once reconnect started draining pending client requests with -ECONNRESET, because the farewell can now return while the server is still sending requests on the re-established socket. Reorder so scoutfs_net_free_conn runs first; its shutdown_worker drains conn->workq before any memory is freed, then client->conn is nulled. The original intent of nulling to catch buggy late callers is preserved. Signed-off-by: Auke Kok <auke.kok@versity.com>
Bound RPC waits in idempotent background workers
2026-04-30 01:46:54 +00:00 · 2026-04-22 13:49:33 -07:00 · 2026-04-22 13:49:27 -07:00 · 2026-04-22 13:49:20 -07:00 · 2026-04-22 13:49:19 -07:00 · 2026-04-22 13:49:18 -07:00
56 changed files with 2406 additions and 303 deletions
--- a/ReleaseNotes.md
+++ b/ReleaseNotes.md
@@ -1,6 +1,83 @@
 Versity ScoutFS Release Notes
 =============================

+---
+v1.30
+\
+*Apr 21, 2026*
+
+Fix a problem reading the accumulated totals of contributing .totl.
+xattrs when log merging is in progress.  The problem would have readers
+of the totals calculate the sums incorrectly.
+
+Fix a problem updating quota rules.  There was a race where updates
+could be corrupted if they happened while a transaction was being
+written.
+
+Fix a problem deleting files with .indx. xattrs.  The internal indexing
+metadata wouldn't be properly deleted so the files would still claim to
+be present and visible in the index, though the file no longer existed.
+
+---
+v1.29
+\
+*Mar 25, 2026*
+
+Add a repair mechanism for mount logs that weren't properly resolved as
+mounts left the cluster.  The presence of these logs prevents log
+merging from making forward progress and the backlog of logs over time
+can cause operations to slow to a crawl.  With the repair mechanism in
+place the orphaned logs don't stop merging and operations proceed as
+usual.
+
+Add an ioctl for turning offline unmapped file regions into sparse
+regions.
+
+---
+v1.28
+\
+*Feb 5, 2026*
+
+Fix a bug that lead to incorrect negative caching of ACL entries
+starting in version 9.6 of distribution kernels in the enterprise linux
+family.  This would manifest as ACLs seemingly disappearing,
+particularly default ACLs on directories.  The persistent ACLs always
+existed but because of internal API incompatibility some readers
+couldn't see them and would cache that they didn't exist.
+
+---
+v1.27
+\
+*Jan 15, 2026*
+
+Switch away from using the general VM cache reclaim machinery to reduce
+idle cluster locks in the client.  The VM treated locks like a cache and
+let many accumulate, presuming that it would be efficient to free them
+in batches.  Lock freeing requires network communication so this could
+result in enormous backlogs in network messages (on the order of
+hundreds of thousands) and could result in signifcant delays of other
+network messaging.
+
+Fix inefficient network receive processing while many messages are in
+the send queue.  This consumed sufficient CPU to cause significant
+stalls, perhaps resulting in hung task warning messages due to delayed
+lock message delivery.
+
+Fix a server livelock case that could happen while committing client
+transactions that contain a large amount of freed file data extents.
+This would present as client tasks hanging and a server task spinning
+consuming cpu.
+
+Fix a rare server request processing failure that doesn't deal with
+retransmission of a request that a previous server partially processed.
+This would present as hung client tasks and repeated "error -2
+committing log merge: getting merge status item" kernel messages.
+
+Fix an unneccessary server shutdown during specific circumstances in
+client lock recovery.  The shutdown was due to server state and was
+ultimately harmless.  The next server that started up would proceed
+accordingly.
+
 ---
 v1.26
 \
--- a/kmod/src/Makefile.kernelcompat
+++ b/kmod/src/Makefile.kernelcompat
@@ -479,10 +479,20 @@ ifneq (,$(shell grep '^unsigned int stack_trace_save' include/linux/stacktrace.h
 ccflags-y += -DKC_STACK_TRACE_SAVE
 endif

-# v6.1-rc1-4-g7420332a6ff4
 #
-# .get_acl() method now has dentry arg (and mnt_idmap). The old get_acl has been renamed
-# to get_inode_acl() and is still available as well, but has an extra rcu param.
-ifneq (,$(shell grep 'struct posix_acl ...get_acl..struct mnt_idmap ., struct dentry' include/linux/fs.h))
-ccflags-y += -DKC_GET_ACL_DENTRY
+# v6.1-rc1-2-g138060ba92b3
+#
+# set_acl now passed a struct dentry instead of inode.
+#
+ifneq (,$(shell grep 'int ..set_acl.*struct dentry' include/linux/fs.h))
+ccflags-y += -DKC_SET_ACL_DENTRY
+endif
+
+#
+# v6.1-rc1-3-gcac2f8b8d8b5
+#
+# get_acl renamed to get_inode_acl.
+#
+ifneq (,$(shell grep 'struct posix_acl.*get_inode_acl' include/linux/fs.h))
+ccflags-y += -DKC_GET_INODE_ACL
 endif
--- a/kmod/src/acl.c
+++ b/kmod/src/acl.c
@@ -107,20 +107,22 @@ struct posix_acl *scoutfs_get_acl_locked(struct inode *inode, int type, struct s
 	return acl;
 }

-#ifdef KC_GET_ACL_DENTRY
-struct posix_acl *scoutfs_get_acl(KC_VFS_NS_DEF
-				  struct dentry *dentry, int type)
-{
-	struct inode *inode = dentry->d_inode;
+#ifdef KC_GET_INODE_ACL
+struct posix_acl *scoutfs_get_acl(struct inode *inode, int type, bool rcu)
 #else
 struct posix_acl *scoutfs_get_acl(struct inode *inode, int type)
-{
 #endif
+{
 	struct super_block *sb = inode->i_sb;
 	struct scoutfs_lock *lock = NULL;
 	struct posix_acl *acl;
 	int ret;

+#ifdef KC_GET_INODE_ACL
+	if (rcu)
+		return ERR_PTR(-ECHILD);
+#endif
+
 #ifndef KC___POSIX_ACL_CREATE
 	if (!IS_POSIXACL(inode))
 		return NULL;
@@ -208,7 +210,7 @@ out:
 	return ret;
 }

-#ifdef KC_GET_ACL_DENTRY
+#ifdef KC_SET_ACL_DENTRY
 int scoutfs_set_acl(KC_VFS_NS_DEF
 		    struct dentry *dentry, struct posix_acl *acl, int type)
 {
@@ -254,9 +256,8 @@ int scoutfs_acl_get_xattr(struct dentry *dentry, const char *name, void *value,
 	if (!IS_POSIXACL(dentry->d_inode))
 		return -EOPNOTSUPP;

-#ifdef KC_GET_ACL_DENTRY
-	acl = scoutfs_get_acl(KC_VFS_INIT_NS
-			      dentry, type);
+#ifdef KC_GET_INODE_ACL
+	acl = scoutfs_get_acl(dentry->d_inode, type, false);
 #else
 	acl = scoutfs_get_acl(dentry->d_inode, type);
 #endif
@@ -305,7 +306,7 @@ int scoutfs_acl_set_xattr(struct dentry *dentry, const char *name, const void *v
 		}
 	}

-#ifdef KC_GET_ACL_DENTRY
+#ifdef KC_SET_ACL_DENTRY
 	ret = scoutfs_set_acl(KC_VFS_INIT_NS dentry, acl, type);
 #else
 	ret = scoutfs_set_acl(dentry->d_inode, acl, type);
--- a/kmod/src/acl.h
+++ b/kmod/src/acl.h
@@ -1,12 +1,16 @@
 #ifndef _SCOUTFS_ACL_H_
 #define _SCOUTFS_ACL_H_

-#ifdef KC_GET_ACL_DENTRY
-struct posix_acl *scoutfs_get_acl(KC_VFS_NS_DEF struct dentry *dentry, int type);
-int scoutfs_set_acl(KC_VFS_NS_DEF struct dentry *dentry, struct posix_acl *acl, int type);
+#ifdef KC_SET_ACL_DENTRY
+int scoutfs_set_acl(KC_VFS_NS_DEF
+		    struct dentry *dentry, struct posix_acl *acl, int type);
+#else
+int scoutfs_set_acl(struct inode *inode, struct posix_acl *acl, int type);
+#endif
+#ifdef KC_GET_INODE_ACL
+struct posix_acl *scoutfs_get_acl(struct inode *inode, int type, bool rcu);
 #else
 struct posix_acl *scoutfs_get_acl(struct inode *inode, int type);
-int scoutfs_set_acl(struct inode *inode, struct posix_acl *acl, int type);
 #endif
 struct posix_acl *scoutfs_get_acl_locked(struct inode *inode, int type, struct scoutfs_lock *lock);
 int scoutfs_set_acl_locked(struct inode *inode, struct posix_acl *acl, int type,
--- a/kmod/src/alloc.c
+++ b/kmod/src/alloc.c
@@ -24,6 +24,7 @@
 #include "trans.h"
 #include "alloc.h"
 #include "counters.h"
+#include "msg.h"
 #include "scoutfs_trace.h"

 /*
@@ -496,10 +497,11 @@ static int dirty_alloc_blocks(struct super_block *sb,
 	struct scoutfs_block *fr_bl = NULL;
 	struct scoutfs_block *bl;
 	bool link_orig = false;
+	__le32 orig_first_nr;
 	u64 av_peek;
-	u64 av_old;
+	u64 av_old = 0;
 	u64 fr_peek;
-	u64 fr_old;
+	u64 fr_old = 0;
 	int ret;

 	if (alloc->dirty_avail_bl != NULL)
@@ -509,6 +511,7 @@ static int dirty_alloc_blocks(struct super_block *sb,

 	/* undo dirty freed if we get an error after */
 	orig_freed = alloc->freed.ref;
+	orig_first_nr = alloc->freed.first_nr;

 	if (alloc->dirty_avail_bl != NULL) {
 		ret = 0;
@@ -562,6 +565,17 @@ static int dirty_alloc_blocks(struct super_block *sb,
 	/* sort dirty avail to encourage contiguous sorted meta blocks */
 	list_block_sort(av_bl->data);

+	lblk = fr_bl->data;
+	if (WARN_ON_ONCE(alloc->freed.ref.blkno != lblk->hdr.blkno)) {
+		scoutfs_err(sb, "dirty_alloc freed ref %llu hdr %llu av_old %llu fr_old %llu av_peek %llu fr_peek %llu link_orig %d",
+			    le64_to_cpu(alloc->freed.ref.blkno),
+			    le64_to_cpu(lblk->hdr.blkno),
+			    av_old, fr_old, av_peek, fr_peek, link_orig);
+		ret = -EIO;
+		goto out;
+	}
+	lblk = NULL;
+
 	if (av_old)
 		list_block_add(&alloc->freed, fr_bl->data, av_old);
 	if (fr_old)
@@ -578,6 +592,7 @@ out:
 		if (fr_bl)
 			scoutfs_block_writer_forget(sb, wri, fr_bl);
 		alloc->freed.ref = orig_freed;
+		alloc->freed.first_nr = orig_first_nr;
 	}

 	mutex_unlock(&alloc->mutex);
--- a/kmod/src/block.c
+++ b/kmod/src/block.c
@@ -218,6 +218,7 @@ static void block_free_work(struct work_struct *work)

 	llist_for_each_entry_safe(bp, tmp, deleted, free_node) {
 		block_free(sb, bp);
+		cond_resched();
 	}
 }

@@ -467,9 +468,6 @@ static int block_submit_bio(struct super_block *sb, struct block_private *bp,
 	sector_t sector;
 	int ret = 0;

-	if (scoutfs_forcing_unmount(sb))
-		return -ENOLINK;
-
 	sector = bp->bl.blkno << (SCOUTFS_BLOCK_LG_SHIFT - 9);

 	WARN_ON_ONCE(bp->bl.blkno == U64_MAX);
@@ -480,6 +478,17 @@ static int block_submit_bio(struct super_block *sb, struct block_private *bp,
 	set_bit(BLOCK_BIT_IO_BUSY, &bp->bits);
 	block_get(bp);

+	/*
+	 * A second thread may already be waiting on this block's completion
+	 * after this thread won the race to submit the block.  We exit through
+	 * the block_end_io error path which sets BLOCK_BIT_ERROR and assures
+	 * that other callers in the waitq get woken up.
+	 */
+	if (scoutfs_forcing_unmount(sb)) {
+		ret = -ENOLINK;
+		goto end_io;
+	}
+
 	blk_start_plug(&plug);

 	for (off = 0; off < SCOUTFS_BLOCK_LG_SIZE; off += PAGE_SIZE) {
@@ -517,6 +526,7 @@ static int block_submit_bio(struct super_block *sb, struct block_private *bp,

 	blk_finish_plug(&plug);

+end_io:
 	/* let racing end_io know we're done */
 	block_end_io(sb, opf, bp, ret);

@@ -836,6 +846,8 @@ int scoutfs_block_dirty_ref(struct super_block *sb, struct scoutfs_alloc *alloc,
 		bp = BLOCK_PRIVATE(bl);

 		if (block_is_dirty(bp)) {
+			if (ref_blkno)
+				*ref_blkno = 0;
 			ret = 0;
 			goto out;
 		}
--- a/kmod/src/btree.c
+++ b/kmod/src/btree.c
@@ -2183,6 +2183,8 @@ static int merge_read_item(struct super_block *sb, struct scoutfs_key *key, u64
 		if (ret > 0) {
 			if (ret == SCOUTFS_DELTA_COMBINED) {
 				scoutfs_inc_counter(sb, btree_merge_delta_combined);
+				if (seq > found->seq)
+					found->seq = seq;
 			} else if (ret == SCOUTFS_DELTA_COMBINED_NULL) {
 				scoutfs_inc_counter(sb, btree_merge_delta_null);
 				free_mitem(rng, found);
@@ -2486,6 +2488,14 @@ int scoutfs_btree_merge(struct super_block *sb,
 			mitem = next_mitem(mitem);
 			free_mitem(&rng, tmp);
 		}
+
+		if (mitem && walk_val_len == 0 &&
+		    !(walk_flags & (BTW_INSERT | BTW_DELETE)) &&
+		    scoutfs_trigger(sb, LOG_MERGE_FORCE_PARTIAL)) {
+			ret = -ERANGE;
+			*next_ret = mitem->key;
+			goto out;
+		}
 	}

 	ret = 0;
--- a/kmod/src/client.c
+++ b/kmod/src/client.c
@@ -59,6 +59,31 @@ struct client_info {
 	struct completion farewell_comp;
 };

+/*
+ * Reconnection to a new server completes pending sync requests with
+ * -ECONNRESET because their state in the old server was reclaimed at
+ * fence time.  Transparently retry so callers don't surface the
+ * reconnect as a failed RPC; preserve the pre-drain behavior where a
+ * sync request was silently resent across failover.  Shutdown paths
+ * break the loop via the errors that submit and wait already return.
+ */
+static int client_sync_request(struct super_block *sb,
+			       struct scoutfs_net_connection *conn,
+			       u8 cmd, void *arg, unsigned arg_len,
+			       void *resp, size_t resp_len)
+{
+	int ret;
+
+	for (;;) {
+		ret = scoutfs_net_sync_request(sb, conn, cmd, arg, arg_len,
+					       resp, resp_len);
+		if (ret != -ECONNRESET)
+			return ret;
+		if (scoutfs_unmounting(sb) || scoutfs_forcing_unmount(sb))
+			return -ESHUTDOWN;
+	}
+}
+
 /*
 * Ask for a new run of allocated inode numbers.  The server can return
 * fewer than @count.  It will success with nr == 0 if we've run out.
@@ -72,10 +97,10 @@ int scoutfs_client_alloc_inodes(struct super_block *sb, u64 count,
 	u64 tmp;
 	int ret;

-	ret = scoutfs_net_sync_request(sb, client->conn,
-				       SCOUTFS_NET_CMD_ALLOC_INODES,
-				       &lecount, sizeof(lecount),
-				       &ial, sizeof(ial));
+	ret = client_sync_request(sb, client->conn,
+				  SCOUTFS_NET_CMD_ALLOC_INODES,
+				  &lecount, sizeof(lecount),
+				  &ial, sizeof(ial));
 	if (ret == 0) {
 		*ino = le64_to_cpu(ial.ino);
 		*nr = le64_to_cpu(ial.nr);
@@ -94,9 +119,9 @@ int scoutfs_client_get_log_trees(struct super_block *sb,
 {
 	struct client_info *client = SCOUTFS_SB(sb)->client_info;

-	return scoutfs_net_sync_request(sb, client->conn,
-					SCOUTFS_NET_CMD_GET_LOG_TREES,
-					NULL, 0, lt, sizeof(*lt));
+	return client_sync_request(sb, client->conn,
+				   SCOUTFS_NET_CMD_GET_LOG_TREES,
+				   NULL, 0, lt, sizeof(*lt));
 }

 int scoutfs_client_commit_log_trees(struct super_block *sb,
@@ -104,9 +129,9 @@ int scoutfs_client_commit_log_trees(struct super_block *sb,
 {
 	struct client_info *client = SCOUTFS_SB(sb)->client_info;

-	return scoutfs_net_sync_request(sb, client->conn,
-					SCOUTFS_NET_CMD_COMMIT_LOG_TREES,
-					lt, sizeof(*lt), NULL, 0);
+	return client_sync_request(sb, client->conn,
+				   SCOUTFS_NET_CMD_COMMIT_LOG_TREES,
+				   lt, sizeof(*lt), NULL, 0);
 }

 int scoutfs_client_get_roots(struct super_block *sb,
@@ -114,9 +139,26 @@ int scoutfs_client_get_roots(struct super_block *sb,
 {
 	struct client_info *client = SCOUTFS_SB(sb)->client_info;

-	return scoutfs_net_sync_request(sb, client->conn,
-					SCOUTFS_NET_CMD_GET_ROOTS,
-					NULL, 0, roots, sizeof(*roots));
+	return client_sync_request(sb, client->conn,
+				   SCOUTFS_NET_CMD_GET_ROOTS,
+				   NULL, 0, roots, sizeof(*roots));
+}
+
+/*
+ * Bounded-wait get_roots for the orphan scan worker.  The worker
+ * reschedules on error, so -ETIMEDOUT is treated like any other RPC
+ * failure and retries on the next scan.
+ */
+int scoutfs_client_get_roots_timeout(struct super_block *sb,
+				     struct scoutfs_net_roots *roots,
+				     unsigned long timeout_jiffies)
+{
+	struct client_info *client = SCOUTFS_SB(sb)->client_info;
+
+	return scoutfs_net_sync_request_timeout(sb, client->conn,
+						SCOUTFS_NET_CMD_GET_ROOTS,
+						NULL, 0, roots, sizeof(*roots),
+						timeout_jiffies);
 }

 int scoutfs_client_get_last_seq(struct super_block *sb, u64 *seq)
@@ -125,9 +167,9 @@ int scoutfs_client_get_last_seq(struct super_block *sb, u64 *seq)
 	__le64 last_seq;
 	int ret;

-	ret = scoutfs_net_sync_request(sb, client->conn,
-				       SCOUTFS_NET_CMD_GET_LAST_SEQ,
-				       NULL, 0, &last_seq, sizeof(last_seq));
+	ret = client_sync_request(sb, client->conn,
+				  SCOUTFS_NET_CMD_GET_LAST_SEQ,
+				  NULL, 0, &last_seq, sizeof(last_seq));
 	if (ret == 0)
 		*seq = le64_to_cpu(last_seq);

@@ -140,24 +182,34 @@ static int client_lock_response(struct super_block *sb,
 				void *resp, unsigned int resp_len,
 				int error, void *data)
 {
+	struct scoutfs_lock *lock = data;
+
+	if (error) {
+		scoutfs_lock_request_failed(sb, lock);
+		return 0;
+	}
+
 	if (resp_len != sizeof(struct scoutfs_net_lock))
 		return -EINVAL;

-	/* XXX error? */
-
 	return scoutfs_lock_grant_response(sb, resp);
 }

-/* Send a lock request to the server. */
+/*
+ * Send a lock request to the server.  The lock is anchored by
+ * request_pending so its address is stable until the response callback
+ * runs and clears request_pending on either the grant or error path.
+ */
 int scoutfs_client_lock_request(struct super_block *sb,
-				struct scoutfs_net_lock *nl)
+				struct scoutfs_net_lock *nl,
+				struct scoutfs_lock *lock)
 {
 	struct client_info *client = SCOUTFS_SB(sb)->client_info;

 	return scoutfs_net_submit_request(sb, client->conn,
 					  SCOUTFS_NET_CMD_LOCK,
 					  nl, sizeof(*nl),
-					  client_lock_response, NULL, NULL);
+					  client_lock_response, lock, NULL);
 }

 /* Send a lock response to the server. */
@@ -189,9 +241,26 @@ int scoutfs_client_srch_get_compact(struct super_block *sb,
 {
 	struct client_info *client = SCOUTFS_SB(sb)->client_info;

-	return scoutfs_net_sync_request(sb, client->conn,
-					SCOUTFS_NET_CMD_SRCH_GET_COMPACT,
-					NULL, 0, sc, sizeof(*sc));
+	return client_sync_request(sb, client->conn,
+				   SCOUTFS_NET_CMD_SRCH_GET_COMPACT,
+				   NULL, 0, sc, sizeof(*sc));
+}
+
+/*
+ * Bounded-wait get_compact for the srch compact worker.  The worker
+ * reschedules on any error and the compact work is idempotent, so
+ * -ETIMEDOUT just defers this round.
+ */
+int scoutfs_client_srch_get_compact_timeout(struct super_block *sb,
+					    struct scoutfs_srch_compact *sc,
+					    unsigned long timeout_jiffies)
+{
+	struct client_info *client = SCOUTFS_SB(sb)->client_info;
+
+	return scoutfs_net_sync_request_timeout(sb, client->conn,
+						SCOUTFS_NET_CMD_SRCH_GET_COMPACT,
+						NULL, 0, sc, sizeof(*sc),
+						timeout_jiffies);
 }

 /* Commit the result of a srch file compaction. */
@@ -200,9 +269,27 @@ int scoutfs_client_srch_commit_compact(struct super_block *sb,
 {
 	struct client_info *client = SCOUTFS_SB(sb)->client_info;

-	return scoutfs_net_sync_request(sb, client->conn,
-					SCOUTFS_NET_CMD_SRCH_COMMIT_COMPACT,
-					res, sizeof(*res), NULL, 0);
+	return client_sync_request(sb, client->conn,
+				   SCOUTFS_NET_CMD_SRCH_COMMIT_COMPACT,
+				   res, sizeof(*res), NULL, 0);
+}
+
+/*
+ * Bounded-wait commit_compact for the srch compact worker.  The server
+ * ignores partial work flagged with ERROR, so a timed-out commit
+ * (marked ERROR on this side) lets the server reclaim our allocators
+ * and reassign the compact on the next scheduled attempt.
+ */
+int scoutfs_client_srch_commit_compact_timeout(struct super_block *sb,
+					       struct scoutfs_srch_compact *res,
+					       unsigned long timeout_jiffies)
+{
+	struct client_info *client = SCOUTFS_SB(sb)->client_info;
+
+	return scoutfs_net_sync_request_timeout(sb, client->conn,
+						SCOUTFS_NET_CMD_SRCH_COMMIT_COMPACT,
+						res, sizeof(*res), NULL, 0,
+						timeout_jiffies);
 }

 int scoutfs_client_get_log_merge(struct super_block *sb,
@@ -210,9 +297,9 @@ int scoutfs_client_get_log_merge(struct super_block *sb,
 {
 	struct client_info *client = SCOUTFS_SB(sb)->client_info;

-	return scoutfs_net_sync_request(sb, client->conn,
-					SCOUTFS_NET_CMD_GET_LOG_MERGE,
-					NULL, 0, req, sizeof(*req));
+	return client_sync_request(sb, client->conn,
+				   SCOUTFS_NET_CMD_GET_LOG_MERGE,
+				   NULL, 0, req, sizeof(*req));
 }

 int scoutfs_client_commit_log_merge(struct super_block *sb,
@@ -220,9 +307,9 @@ int scoutfs_client_commit_log_merge(struct super_block *sb,
 {
 	struct client_info *client = SCOUTFS_SB(sb)->client_info;

-	return scoutfs_net_sync_request(sb, client->conn,
-					SCOUTFS_NET_CMD_COMMIT_LOG_MERGE,
-					comp, sizeof(*comp), NULL, 0);
+	return client_sync_request(sb, client->conn,
+				   SCOUTFS_NET_CMD_COMMIT_LOG_MERGE,
+				   comp, sizeof(*comp), NULL, 0);
 }

 int scoutfs_client_send_omap_response(struct super_block *sb, u64 id,
@@ -254,8 +341,30 @@ int scoutfs_client_open_ino_map(struct super_block *sb, u64 group_nr,
 		.req_id = 0,
 	};

-	return scoutfs_net_sync_request(sb, client->conn, SCOUTFS_NET_CMD_OPEN_INO_MAP,
-					&args, sizeof(args), map, sizeof(*map));
+	return client_sync_request(sb, client->conn, SCOUTFS_NET_CMD_OPEN_INO_MAP,
+				   &args, sizeof(args), map, sizeof(*map));
+}
+
+/*
+ * Bounded-wait open_ino_map for the orphan scan worker.  The scan
+ * reschedules on error; the delete path callers keep the unbounded
+ * retry.
+ */
+int scoutfs_client_open_ino_map_timeout(struct super_block *sb, u64 group_nr,
+					struct scoutfs_open_ino_map *map,
+					unsigned long timeout_jiffies)
+{
+	struct client_info *client = SCOUTFS_SB(sb)->client_info;
+	struct scoutfs_open_ino_map_args args = {
+		.group_nr = cpu_to_le64(group_nr),
+		.req_id = 0,
+	};
+
+	return scoutfs_net_sync_request_timeout(sb, client->conn,
+						SCOUTFS_NET_CMD_OPEN_INO_MAP,
+						&args, sizeof(args),
+						map, sizeof(*map),
+						timeout_jiffies);
 }

 /* The client is asking the server for the current volume options */
@@ -263,8 +372,8 @@ int scoutfs_client_get_volopt(struct super_block *sb, struct scoutfs_volume_opti
 {
 	struct client_info *client = SCOUTFS_SB(sb)->client_info;

-	return scoutfs_net_sync_request(sb, client->conn, SCOUTFS_NET_CMD_GET_VOLOPT,
-					NULL, 0, volopt, sizeof(*volopt));
+	return client_sync_request(sb, client->conn, SCOUTFS_NET_CMD_GET_VOLOPT,
+				   NULL, 0, volopt, sizeof(*volopt));
 }

 /* The client is asking the server to update volume options */
@@ -272,8 +381,8 @@ int scoutfs_client_set_volopt(struct super_block *sb, struct scoutfs_volume_opti
 {
 	struct client_info *client = SCOUTFS_SB(sb)->client_info;

-	return scoutfs_net_sync_request(sb, client->conn, SCOUTFS_NET_CMD_SET_VOLOPT,
-					volopt, sizeof(*volopt), NULL, 0);
+	return client_sync_request(sb, client->conn, SCOUTFS_NET_CMD_SET_VOLOPT,
+				   volopt, sizeof(*volopt), NULL, 0);
 }

 /* The client is asking the server to clear volume options */
@@ -281,24 +390,24 @@ int scoutfs_client_clear_volopt(struct super_block *sb, struct scoutfs_volume_op
 {
 	struct client_info *client = SCOUTFS_SB(sb)->client_info;

-	return scoutfs_net_sync_request(sb, client->conn, SCOUTFS_NET_CMD_CLEAR_VOLOPT,
-					volopt, sizeof(*volopt), NULL, 0);
+	return client_sync_request(sb, client->conn, SCOUTFS_NET_CMD_CLEAR_VOLOPT,
+				   volopt, sizeof(*volopt), NULL, 0);
 }

 int scoutfs_client_resize_devices(struct super_block *sb, struct scoutfs_net_resize_devices *nrd)
 {
 	struct client_info *client = SCOUTFS_SB(sb)->client_info;

-	return scoutfs_net_sync_request(sb, client->conn, SCOUTFS_NET_CMD_RESIZE_DEVICES,
-					nrd, sizeof(*nrd), NULL, 0);
+	return client_sync_request(sb, client->conn, SCOUTFS_NET_CMD_RESIZE_DEVICES,
+				   nrd, sizeof(*nrd), NULL, 0);
 }

 int scoutfs_client_statfs(struct super_block *sb, struct scoutfs_net_statfs *nst)
 {
 	struct client_info *client = SCOUTFS_SB(sb)->client_info;

-	return scoutfs_net_sync_request(sb, client->conn, SCOUTFS_NET_CMD_STATFS,
-					NULL, 0, nst, sizeof(*nst));
+	return client_sync_request(sb, client->conn, SCOUTFS_NET_CMD_STATFS,
+				   NULL, 0, nst, sizeof(*nst));
 }

 /*
@@ -646,8 +755,12 @@ void scoutfs_client_destroy(struct super_block *sb)
 						 client_farewell_response,
 						 NULL, NULL);
 		if (ret == 0) {
-			wait_for_completion(&client->farewell_comp);
-			ret = client->farewell_error;
+			if (!wait_for_completion_timeout(&client->farewell_comp,
+							 120 * HZ)) {
+				ret = -ETIMEDOUT;
+			} else {
+				ret = client->farewell_error;
+			}
 		}
 		if (ret) {
 			scoutfs_inc_counter(sb, client_farewell_error);
@@ -661,10 +774,16 @@ void scoutfs_client_destroy(struct super_block *sb)
 	/* make sure worker isn't using the conn */
 	cancel_delayed_work_sync(&client->connect_dwork);

-	/* make racing conn use explode */
+	/*
+	 * Drain the conn's workers before nulling client->conn.  In-flight
+	 * proc_workers dispatch request handlers that call back into client
+	 * response helpers (e.g. scoutfs_client_lock_recover_response) which
+	 * read client->conn; nulling it first races with those workers and
+	 * causes submit_send to dereference a NULL conn->lock.
+	 */
 	conn = client->conn;
-	client->conn = NULL;
 	scoutfs_net_free_conn(sb, conn);
+	client->conn = NULL;

 	if (client->workq)
 		destroy_workqueue(client->workq);
--- a/kmod/src/client.h
+++ b/kmod/src/client.h
@@ -9,18 +9,28 @@ int scoutfs_client_commit_log_trees(struct super_block *sb,
 				    struct scoutfs_log_trees *lt);
 int scoutfs_client_get_roots(struct super_block *sb,
 			     struct scoutfs_net_roots *roots);
+int scoutfs_client_get_roots_timeout(struct super_block *sb,
+				     struct scoutfs_net_roots *roots,
+				     unsigned long timeout_jiffies);
 u64 *scoutfs_client_bulk_alloc(struct super_block *sb);
 int scoutfs_client_get_last_seq(struct super_block *sb, u64 *seq);
 int scoutfs_client_lock_request(struct super_block *sb,
-				struct scoutfs_net_lock *nl);
+				struct scoutfs_net_lock *nl,
+				struct scoutfs_lock *lock);
 int scoutfs_client_lock_response(struct super_block *sb, u64 net_id,
 				struct scoutfs_net_lock *nl);
 int scoutfs_client_lock_recover_response(struct super_block *sb, u64 net_id,
 					 struct scoutfs_net_lock_recover *nlr);
 int scoutfs_client_srch_get_compact(struct super_block *sb,
 				    struct scoutfs_srch_compact *sc);
+int scoutfs_client_srch_get_compact_timeout(struct super_block *sb,
+					    struct scoutfs_srch_compact *sc,
+					    unsigned long timeout_jiffies);
 int scoutfs_client_srch_commit_compact(struct super_block *sb,
 				       struct scoutfs_srch_compact *res);
+int scoutfs_client_srch_commit_compact_timeout(struct super_block *sb,
+					       struct scoutfs_srch_compact *res,
+					       unsigned long timeout_jiffies);
 int scoutfs_client_get_log_merge(struct super_block *sb,
 				 struct scoutfs_log_merge_request *req);
 int scoutfs_client_commit_log_merge(struct super_block *sb,
@@ -29,6 +39,9 @@ int scoutfs_client_send_omap_response(struct super_block *sb, u64 id,
 				      struct scoutfs_open_ino_map *map);
 int scoutfs_client_open_ino_map(struct super_block *sb, u64 group_nr,
 				struct scoutfs_open_ino_map *map);
+int scoutfs_client_open_ino_map_timeout(struct super_block *sb, u64 group_nr,
+					struct scoutfs_open_ino_map *map,
+					unsigned long timeout_jiffies);
 int scoutfs_client_get_volopt(struct super_block *sb, struct scoutfs_volume_options *volopt);
 int scoutfs_client_set_volopt(struct super_block *sb, struct scoutfs_volume_options *volopt);
 int scoutfs_client_clear_volopt(struct super_block *sb, struct scoutfs_volume_options *volopt);
--- a/kmod/src/counters.h
+++ b/kmod/src/counters.h
@@ -62,6 +62,7 @@
 	EXPAND_COUNTER(btree_walk)				\
 	EXPAND_COUNTER(btree_walk_restart)			\
 	EXPAND_COUNTER(client_farewell_error)			\
+	EXPAND_COUNTER(client_rpc_timeout)			\
 	EXPAND_COUNTER(corrupt_btree_block_level)		\
 	EXPAND_COUNTER(corrupt_btree_no_child_ref)		\
 	EXPAND_COUNTER(corrupt_dirent_backref_name_len)		\
@@ -138,6 +139,7 @@
 	EXPAND_COUNTER(lock_lock_error)				\
 	EXPAND_COUNTER(lock_nonblock_eagain)			\
 	EXPAND_COUNTER(lock_recover_request)			\
+	EXPAND_COUNTER(lock_request_failed)			\
 	EXPAND_COUNTER(lock_shrink_attempted)			\
 	EXPAND_COUNTER(lock_shrink_request_failed)		\
 	EXPAND_COUNTER(lock_unlock)				\
--- a/kmod/src/data.c
+++ b/kmod/src/data.c
@@ -79,8 +79,10 @@ static void item_from_extent(struct scoutfs_key *key,
 		.skdx_end = cpu_to_le64(start + len - 1),
 		.skdx_len = cpu_to_le64(len),
 	};
-	dv->blkno = cpu_to_le64(map);
-	dv->flags = flags;
+	*dv = (struct scoutfs_data_extent_val) {
+		.blkno = cpu_to_le64(map),
+		.flags = flags,
+	};
 }

 static void ext_from_item(struct scoutfs_extent *ext,
@@ -1515,6 +1517,101 @@ out:
 	return ret;
 }

+/*
+ * Punch holes in offline extents.  This is a very specific tool that
+ * only does one job: it converts extents from offline to sparse.  It
+ * returns an error if it encounters an extent that isn't offline or has
+ * a block mapping.  It ignores i_size completely; it does not test it,
+ * and does not update it.
+ *
+ * The caller has the inode locked in the vfs and performed basic sanity
+ * checks.  We manage transactions and the extent_sem which is ordered
+ * inside the transaction.
+ */
+int scoutfs_data_punch_offline(struct inode *inode, u64 iblock, u64 last, u64 data_version,
+			       struct scoutfs_lock *lock)
+{
+	struct scoutfs_inode_info *si = SCOUTFS_I(inode);
+	struct super_block *sb = inode->i_sb;
+	struct data_ext_args args = {
+		.ino = scoutfs_ino(inode),
+		.inode = inode,
+		.lock = lock,
+	};
+	struct scoutfs_extent ext;
+	LIST_HEAD(ind_locks);
+	int ret;
+	int i;
+
+	if (WARN_ON_ONCE(iblock > last)) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	/* idiomatic to call start,last with 0,~0, clamp last to last possible */
+	last = min(last, SCOUTFS_BLOCK_SM_MAX);
+
+	ret = 0;
+	while (iblock <= last) {
+		ret = scoutfs_inode_index_lock_hold(inode, &ind_locks, true, false) ?:
+		      scoutfs_dirty_inode_item(inode, lock);
+		if (ret < 0)
+			break;
+
+		down_write(&si->extent_sem);
+
+		for (i = 0; i < 32 && (iblock <= last); i++) {
+			ret = scoutfs_ext_next(sb, &data_ext_ops, &args, iblock, 1, &ext);
+			if (ret == -ENOENT) {
+				iblock = last + 1;
+				ret = 0;
+				break;
+			}
+
+			if (ret < 0)
+				break;
+
+			if (ext.start > last) {
+				iblock = last + 1;
+				break;
+			}
+
+			if (ext.map) {
+				ret = -EINVAL;
+				break;
+			}
+
+			if (ext.flags & SEF_OFFLINE) {
+				if (iblock > ext.start) {
+					ext.len -= iblock - ext.start;
+					ext.start = iblock;
+				}
+				ext.len = min(ext.len, last - ext.start + 1);
+				ext.flags &= ~SEF_OFFLINE;
+
+				ret = scoutfs_ext_set(sb, &data_ext_ops, &args,
+						      ext.start, ext.len, ext.map, ext.flags);
+				if (ret < 0)
+					break;
+			}
+
+			iblock = ext.start + ext.len;
+		}
+
+		up_write(&si->extent_sem);
+
+		scoutfs_update_inode_item(inode, lock, &ind_locks);
+		scoutfs_release_trans(sb);
+		scoutfs_inode_index_unlock(sb, &ind_locks);
+
+		if (ret < 0)
+			break;
+	}
+
+out:
+	return ret;
+}
+
 /*
 * This copies to userspace :/
 */
--- a/kmod/src/data.h
+++ b/kmod/src/data.h
@@ -57,6 +57,8 @@ int scoutfs_data_init_offline_extent(struct inode *inode, u64 size,
 int scoutfs_data_move_blocks(struct inode *from, u64 from_off,
 			     u64 byte_len, struct inode *to, u64 to_off, bool to_stage,
 			     u64 data_version);
+int scoutfs_data_punch_offline(struct inode *inode, u64 iblock, u64 last, u64 data_version,
+			       struct scoutfs_lock *lock);

 int scoutfs_data_wait_check(struct inode *inode, loff_t pos, loff_t len,
 			    u8 sef, u8 op, struct scoutfs_data_wait *ow,
--- a/kmod/src/dir.c
+++ b/kmod/src/dir.c
@@ -587,10 +587,12 @@ static int add_entry_items(struct super_block *sb, u64 dir_ino, u64 hash,
 	}

 	/* initialize the dent */
-	dent->ino = cpu_to_le64(ino);
-	dent->hash = cpu_to_le64(hash);
-	dent->pos = cpu_to_le64(pos);
-	dent->type = mode_to_type(mode);
+	*dent = (struct scoutfs_dirent) {
+		.ino = cpu_to_le64(ino),
+		.hash = cpu_to_le64(hash),
+		.pos = cpu_to_le64(pos),
+		.type = mode_to_type(mode),
+	};
 	memcpy(dent->name, name, name_len);

 	init_dirent_key(&ent_key, SCOUTFS_DIRENT_TYPE, dir_ino, hash, pos);
@@ -2006,7 +2008,11 @@ const struct inode_operations scoutfs_symlink_iops = {
 #ifdef KC_LINUX_HAVE_RHEL_IOPS_WRAPPER
 	.removexattr	= generic_removexattr,
 #endif
+#ifdef KC_GET_INODE_ACL
+	.get_inode_acl	= scoutfs_get_acl,
+#else
 	.get_acl	= scoutfs_get_acl,
+#endif
 #ifndef KC_LINUX_HAVE_RHEL_IOPS_WRAPPER
 	.tmpfile	= scoutfs_tmpfile,
 	.rename		= scoutfs_rename_common,
@@ -2052,8 +2058,12 @@ const struct inode_operations scoutfs_dir_iops = {
 	.removexattr	= generic_removexattr,
 #endif
 	.listxattr	= scoutfs_listxattr,
+#ifdef KC_GET_INODE_ACL
+	.get_inode_acl	= scoutfs_get_acl,
+#else
 	.get_acl	= scoutfs_get_acl,
-#ifdef KC_GET_ACL_DENTRY
+#endif
+#ifdef KC_SET_ACL_DENTRY
 	.set_acl	= scoutfs_set_acl,
 #endif
 	.symlink	= scoutfs_symlink,
--- a/kmod/src/forest.c
+++ b/kmod/src/forest.c
@@ -239,9 +239,9 @@ static int forest_read_items(struct super_block *sb, struct scoutfs_key *key, u6
 * to reset their state and retry with a newer version of the btrees.
 */
 int scoutfs_forest_read_items_roots(struct super_block *sb, struct scoutfs_net_roots *roots,
-				    struct scoutfs_key *key, struct scoutfs_key *bloom_key,
-				    struct scoutfs_key *start, struct scoutfs_key *end,
-				    scoutfs_forest_item_cb cb, void *arg)
+				    u64 merge_input_seq, struct scoutfs_key *key,
+				    struct scoutfs_key *bloom_key, struct scoutfs_key *start,
+				    struct scoutfs_key *end, scoutfs_forest_item_cb cb, void *arg)
 {
 	struct forest_read_items_data rid = {
 		.cb = cb,
@@ -317,15 +317,17 @@ int scoutfs_forest_read_items_roots(struct super_block *sb, struct scoutfs_net_r

 		scoutfs_inc_counter(sb, forest_bloom_pass);

-		if ((le64_to_cpu(lt.flags) & SCOUTFS_LOG_TREES_FINALIZED))
-			rid.fic |= FIC_FINALIZED;
+		if ((le64_to_cpu(lt.flags) & SCOUTFS_LOG_TREES_FINALIZED) &&
+		    (merge_input_seq == 0 ||
+		     le64_to_cpu(lt.finalize_seq) < merge_input_seq))
+			rid.fic |= FIC_MERGE_INPUT;

 		ret = scoutfs_btree_read_items(sb, &lt.item_root, key, start,
 					       end, forest_read_items, &rid);
 		if (ret < 0)
 			goto out;

-		rid.fic &= ~FIC_FINALIZED;
+		rid.fic &= ~FIC_MERGE_INPUT;
 	}

 	ret = 0;
@@ -345,7 +347,7 @@ int scoutfs_forest_read_items(struct super_block *sb,

 	ret = scoutfs_client_get_roots(sb, &roots);
 	if (ret == 0)
-		ret = scoutfs_forest_read_items_roots(sb, &roots, key, bloom_key, start, end,
+		ret = scoutfs_forest_read_items_roots(sb, &roots, 0, key, bloom_key, start, end,
 						      cb, arg);
 	return ret;
 }
@@ -793,7 +795,7 @@ out:
 	if (ret)
 		scoutfs_forest_destroy(sb);

-	return 0;
+	return ret;
 }

 void scoutfs_forest_start(struct super_block *sb)
--- a/kmod/src/forest.h
+++ b/kmod/src/forest.h
@@ -11,7 +11,7 @@ struct scoutfs_lock;
 /* caller gives an item to the callback */
 enum {
 	FIC_FS_ROOT = (1 << 0),
-	FIC_FINALIZED = (1 << 1),
+	FIC_MERGE_INPUT = (1 << 1),
 };
 typedef int (*scoutfs_forest_item_cb)(struct super_block *sb, struct scoutfs_key *key, u64 seq,
 				      u8 flags, void *val, int val_len, int fic, void *arg);
@@ -25,9 +25,9 @@ int scoutfs_forest_read_items(struct super_block *sb,
 			      struct scoutfs_key *end,
 			      scoutfs_forest_item_cb cb, void *arg);
 int scoutfs_forest_read_items_roots(struct super_block *sb, struct scoutfs_net_roots *roots,
-				    struct scoutfs_key *key, struct scoutfs_key *bloom_key,
-				    struct scoutfs_key *start, struct scoutfs_key *end,
-				    scoutfs_forest_item_cb cb, void *arg);
+				    u64 merge_input_seq, struct scoutfs_key *key,
+				    struct scoutfs_key *bloom_key, struct scoutfs_key *start,
+				    struct scoutfs_key *end, scoutfs_forest_item_cb cb, void *arg);
 int scoutfs_forest_set_bloom_bits(struct super_block *sb,
 				  struct scoutfs_lock *lock);
 void scoutfs_forest_set_max_seq(struct super_block *sb, u64 max_seq);
--- a/kmod/src/inode.c
+++ b/kmod/src/inode.c
@@ -149,8 +149,12 @@ static const struct inode_operations scoutfs_file_iops = {
 	.removexattr	= generic_removexattr,
 #endif
 	.listxattr	= scoutfs_listxattr,
+#ifdef KC_GET_INODE_ACL
+	.get_inode_acl	= scoutfs_get_acl,
+#else
 	.get_acl	= scoutfs_get_acl,
-#ifdef KC_GET_ACL_DENTRY
+#endif
+#ifdef KC_SET_ACL_DENTRY
 	.set_acl	= scoutfs_set_acl,
 #endif
 	.fiemap		= scoutfs_data_fiemap,
@@ -165,8 +169,12 @@ static const struct inode_operations scoutfs_special_iops = {
 	.removexattr	= generic_removexattr,
 #endif
 	.listxattr	= scoutfs_listxattr,
+#ifdef KC_GET_INODE_ACL
+	.get_inode_acl	= scoutfs_get_acl,
+#else
 	.get_acl	= scoutfs_get_acl,
-#ifdef KC_GET_ACL_DENTRY
+#endif
+#ifdef KC_SET_ACL_DENTRY
 	.set_acl	= scoutfs_set_acl,
 #endif
 };
@@ -2066,6 +2074,14 @@ void scoutfs_inode_schedule_orphan_dwork(struct super_block *sb)
 	}
 }

+/*
+ * Generous per-RPC bound for the idempotent orphan scan worker.  A
+ * server that hasn't answered in this long is assumed to be broken;
+ * dropping the request lets the scan reschedule instead of blocking
+ * forever.
+ */
+#define ORPHAN_SCAN_RPC_TIMEOUT (5 * 60 * HZ)
+
 /*
 * Find and delete inodes whose only remaining reference is the
 * persistent orphan item that was created as they were unlinked.
@@ -2120,7 +2136,7 @@ static void inode_orphan_scan_worker(struct work_struct *work)
 	init_orphan_key(&last, U64_MAX);
 	omap.args.group_nr = cpu_to_le64(U64_MAX);

-	ret = scoutfs_client_get_roots(sb, &roots);
+	ret = scoutfs_client_get_roots_timeout(sb, &roots, ORPHAN_SCAN_RPC_TIMEOUT);
 	if (ret)
 		goto out;

@@ -2161,7 +2177,8 @@ static void inode_orphan_scan_worker(struct work_struct *work)
 		scoutfs_omap_calc_group_nrs(ino, &group_nr, &bit_nr);

 		if (le64_to_cpu(omap.args.group_nr) != group_nr) {
-			ret = scoutfs_client_open_ino_map(sb, group_nr, &omap);
+			ret = scoutfs_client_open_ino_map_timeout(sb, group_nr, &omap,
+								  ORPHAN_SCAN_RPC_TIMEOUT);
 			if (ret < 0)
 				goto out;
 		}
--- a/kmod/src/ioctl.c
+++ b/kmod/src/ioctl.c
@@ -415,8 +415,6 @@ static long scoutfs_ioc_data_wait_err(struct file *file, unsigned long arg)
 		return 0;
 	if ((args.op & SCOUTFS_IOC_DWO_UNKNOWN) || !IS_ERR_VALUE(args.err))
 		return -EINVAL;
-	if ((args.op & SCOUTFS_IOC_DWO_UNKNOWN) || !IS_ERR_VALUE(args.err))
-		return -EINVAL;

 	trace_scoutfs_ioc_data_wait_err(sb, &args);

@@ -1669,6 +1667,78 @@ out:
 	return ret;
 }

+static long scoutfs_ioc_punch_offline(struct file *file, unsigned long arg)
+{
+	struct inode *inode = file_inode(file);
+	struct super_block *sb = inode->i_sb;
+	struct scoutfs_ioctl_punch_offline __user *upo = (void __user *)arg;
+	struct scoutfs_ioctl_punch_offline po;
+	struct scoutfs_lock *lock = NULL;
+	u64 iblock;
+	u64 last;
+	u64 tmp;
+	int ret;
+
+	if (copy_from_user(&po, upo, sizeof(po)))
+		return -EFAULT;
+
+	if (po.len == 0)
+		return 0;
+
+	if (check_add_overflow(po.offset, po.len - 1, &tmp) ||
+	    (po.offset & SCOUTFS_BLOCK_SM_MASK) ||
+	    (po.len & SCOUTFS_BLOCK_SM_MASK))
+		return -EOVERFLOW;
+
+	if (po.flags)
+		return -EINVAL;
+
+	ret = mnt_want_write_file(file);
+	if (ret < 0)
+		return ret;
+
+	inode_lock(inode);
+
+	ret = scoutfs_lock_inode(sb, SCOUTFS_LOCK_WRITE,
+				 SCOUTFS_LKF_REFRESH_INODE, inode, &lock);
+	if (ret)
+		goto out;
+
+	if (!S_ISREG(inode->i_mode)) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	if (!(file->f_mode & FMODE_WRITE)) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	ret = inode_permission(KC_VFS_INIT_NS inode, MAY_WRITE);
+	if (ret < 0)
+		goto out;
+
+	if (scoutfs_inode_data_version(inode) != po.data_version) {
+		ret = -ESTALE;
+		goto out;
+	}
+
+	if ((ret = scoutfs_inode_check_retention(inode)))
+		goto out;
+
+	iblock = po.offset >> SCOUTFS_BLOCK_SM_SHIFT;
+	last = (po.offset + po.len - 1) >> SCOUTFS_BLOCK_SM_SHIFT;
+
+	ret = scoutfs_data_punch_offline(inode, iblock, last, po.data_version, lock);
+
+out:
+	scoutfs_unlock(sb, lock, SCOUTFS_LOCK_WRITE);
+	inode_unlock(inode);
+	mnt_drop_write_file(file);
+
+	return ret;
+}
+
 long scoutfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 {
 	switch (cmd) {
@@ -1718,6 +1788,8 @@ long scoutfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 		return scoutfs_ioc_mod_quota_rule(file, arg, false);
 	case SCOUTFS_IOC_READ_XATTR_INDEX:
 		return scoutfs_ioc_read_xattr_index(file, arg);
+	case SCOUTFS_IOC_PUNCH_OFFLINE:
+		return scoutfs_ioc_punch_offline(file, arg);
 	}

 	return -ENOTTY;
--- a/kmod/src/ioctl.h
+++ b/kmod/src/ioctl.h
@@ -848,4 +848,32 @@ struct scoutfs_ioctl_read_xattr_index {
 #define SCOUTFS_IOC_READ_XATTR_INDEX \
 	_IOR(SCOUTFS_IOCTL_MAGIC, 23, struct scoutfs_ioctl_read_xattr_index)

+/*
+ * This is a limited and specific version of hole punching.  It's an
+ * archive layer operation that only converts unmapped offline extents
+ * into sparse extents.  It is intended to be used when restoring sparse
+ * files after the initial creation set the entire file size offline.
+ *
+ * The offset and len fields are in units of bytes and must be aligned
+ * to the small (4KiB) block size.  All regions of offline extents
+ * covered by the region will be converted into sparse online extents,
+ * including regions that straddle the boundaries of the region.  Any
+ * existing sparse extents in the region are ignored.
+ *
+ * The data_version must match the inode or EINVAL is returned.  The
+ * data_version is not modified by this operation.
+ *
+ * EINVAL is returned if any mapped extents are found in the region.  If
+ * an error is returned then partial progress may have been made.
+ */
+struct scoutfs_ioctl_punch_offline {
+	__u64 offset;
+	__u64 len;
+	__u64 data_version;
+	__u64 flags;
+};
+
+#define SCOUTFS_IOC_PUNCH_OFFLINE \
+	_IOW(SCOUTFS_IOCTL_MAGIC, 24, struct scoutfs_ioctl_punch_offline)
+
 #endif
--- a/kmod/src/lock.c
+++ b/kmod/src/lock.c
@@ -71,6 +71,8 @@
 * relative to that lock state we resend.
 */

+#define CLIENT_LOCK_WAIT_TIMEOUT (60 * HZ)
+
 /*
 * allocated per-super, freed on unmount.
 */
@@ -157,6 +159,33 @@ static void invalidate_inode(struct super_block *sb, u64 ino)
 	}
 }

+/*
+ * Remove all coverage items from the lock to tell users that their
+ * cache is stale.  This is lock-internal bookkeeping that is safe to
+ * call during shutdown and unmount.  The unconditional unlock/relock
+ * of cov_list_lock avoids sparse warnings from unbalanced locking in
+ * the trylock failure path.
+ */
+static void lock_clear_coverage(struct super_block *sb,
+				struct scoutfs_lock *lock)
+{
+	struct scoutfs_lock_coverage *cov;
+
+	spin_lock(&lock->cov_list_lock);
+	while ((cov = list_first_entry_or_null(&lock->cov_list,
+					       struct scoutfs_lock_coverage, head))) {
+		if (spin_trylock(&cov->cov_lock)) {
+			list_del_init(&cov->head);
+			cov->lock = NULL;
+			spin_unlock(&cov->cov_lock);
+			scoutfs_inc_counter(sb, lock_invalidate_coverage);
+		}
+		spin_unlock(&lock->cov_list_lock);
+		spin_lock(&lock->cov_list_lock);
+	}
+	spin_unlock(&lock->cov_list_lock);
+}
+
 /*
 * Invalidate caches associated with this lock.  Either we're
 * invalidating a write to a read or we're invalidating to null.  We
@@ -166,7 +195,6 @@ static void invalidate_inode(struct super_block *sb, u64 ino)
 static int lock_invalidate(struct super_block *sb, struct scoutfs_lock *lock,
 			   enum scoutfs_lock_mode prev, enum scoutfs_lock_mode mode)
 {
-	struct scoutfs_lock_coverage *cov;
 	u64 ino, last;
 	int ret = 0;

@@ -190,24 +218,7 @@ static int lock_invalidate(struct super_block *sb, struct scoutfs_lock *lock,

 	/* have to invalidate if we're not in the only usable case */
 	if (!(prev == SCOUTFS_LOCK_WRITE && mode == SCOUTFS_LOCK_READ)) {
-		/*
-		 * Remove cov items to tell users that their cache is
-		 * stale.  The unlock pattern comes from avoiding bad
-		 * sparse warnings when taking else in a failed trylock.
-		 */
-		spin_lock(&lock->cov_list_lock);
-		while ((cov = list_first_entry_or_null(&lock->cov_list,
-						       struct scoutfs_lock_coverage, head))) {
-			if (spin_trylock(&cov->cov_lock)) {
-				list_del_init(&cov->head);
-				cov->lock = NULL;
-				spin_unlock(&cov->cov_lock);
-				scoutfs_inc_counter(sb, lock_invalidate_coverage);
-			}
-			spin_unlock(&lock->cov_list_lock);
-			spin_lock(&lock->cov_list_lock);
-		}
-		spin_unlock(&lock->cov_list_lock);
+		lock_clear_coverage(sb, lock);

 		/* invalidate inodes after removing coverage so drop/evict aren't covered */
 		if (lock->start.sk_zone == SCOUTFS_FS_ZONE) {
@@ -643,6 +654,33 @@ int scoutfs_lock_grant_response(struct super_block *sb,
 	return 0;
 }

+/*
+ * The lock request we sent to the server was dropped before we could
+ * receive a grant response.  This happens when the client reconnects to
+ * a new server and completes pending requests with an error, since the
+ * old server's pending-request state was reclaimed at fence time.
+ *
+ * Clear request_pending so that a waiter in lock_key_range re-evaluates
+ * and sends a fresh request to the new server, and symmetrically put
+ * the lock so shrink's lru state matches the grant_response path.
+ */
+void scoutfs_lock_request_failed(struct super_block *sb,
+				 struct scoutfs_lock *lock)
+{
+	DECLARE_LOCK_INFO(sb, linfo);
+
+	scoutfs_inc_counter(sb, lock_request_failed);
+
+	spin_lock(&linfo->lock);
+
+	BUG_ON(!lock->request_pending);
+	lock->request_pending = 0;
+	wake_up(&lock->waitq);
+	put_lock(linfo, lock);
+
+	spin_unlock(&linfo->lock);
+}
+
 struct inv_req {
 	struct list_head head;
 	struct scoutfs_lock *lock;
@@ -714,10 +752,13 @@ static void lock_invalidate_worker(struct work_struct *work)
 		ireq = list_first_entry(&lock->inv_list, struct inv_req, head);
 		nl = &ireq->nl;

-		/* only lock protocol, inv can't call subsystems after shutdown */
-		if (!linfo->shutdown) {
+		/* only lock protocol, inv can't call subsystems after shutdown or unmount */
+		if (!linfo->shutdown && !scoutfs_unmounting(sb)) {
 			ret = lock_invalidate(sb, lock, nl->old_mode, nl->new_mode);
 			BUG_ON(ret < 0 && ret != -ENOLINK);
+		} else {
+			lock_clear_coverage(sb, lock);
+			scoutfs_item_invalidate(sb, &lock->start, &lock->end);
 		}

 		/* respond with the key and modes from the request, server might have died */
@@ -922,7 +963,7 @@ static bool try_shrink_lock(struct super_block *sb, struct lock_info *linfo, boo
 	spin_unlock(&linfo->lock);

 	if (lock) {
-		ret = scoutfs_client_lock_request(sb, &nl);
+		ret = scoutfs_client_lock_request(sb, &nl, lock);
 		if (ret < 0) {
 			scoutfs_inc_counter(sb, lock_shrink_request_failed);

@@ -953,6 +994,9 @@ static bool lock_wait_cond(struct super_block *sb, struct scoutfs_lock *lock,
 	       !lock->request_pending;
 	spin_unlock(&linfo->lock);

+	if (!wake)
+		wake = scoutfs_unmounting(sb);
+
 	if (!wake)
 		scoutfs_inc_counter(sb, lock_wait);

@@ -997,8 +1041,10 @@ static int lock_key_range(struct super_block *sb, enum scoutfs_lock_mode mode, i
 		return -EINVAL;

 	/* maybe catch _setup() and _shutdown order mistakes */
-	if (WARN_ON_ONCE(!linfo || linfo->shutdown))
+	if (!linfo || linfo->shutdown) {
+		WARN_ON_ONCE(!scoutfs_unmounting(sb));
 		return -ENOLCK;
+	}

 	/* have to lock before entering transactions */
 	if (WARN_ON_ONCE(scoutfs_trans_held()))
@@ -1024,6 +1070,11 @@ static int lock_key_range(struct super_block *sb, enum scoutfs_lock_mode mode, i
 			break;
 		}

+		if (scoutfs_unmounting(sb)) {
+			ret = -ESHUTDOWN;
+			break;
+		}
+
 		/* the fast path where we can use the granted mode */
 		if (lock_modes_match(lock->mode, mode)) {
 			lock_inc_count(lock->users, mode);
@@ -1053,7 +1104,7 @@ static int lock_key_range(struct super_block *sb, enum scoutfs_lock_mode mode, i
 			nl.old_mode = lock->mode;
 			nl.new_mode = mode;

-			ret = scoutfs_client_lock_request(sb, &nl);
+			ret = scoutfs_client_lock_request(sb, &nl, lock);
 			if (ret) {
 				spin_lock(&linfo->lock);
 				lock->request_pending = 0;
@@ -1067,8 +1118,9 @@ static int lock_key_range(struct super_block *sb, enum scoutfs_lock_mode mode, i
 		if (flags & SCOUTFS_LKF_INTERRUPTIBLE) {
 			ret = wait_event_interruptible(lock->waitq,
 						       lock_wait_cond(sb, lock, mode));
-		} else {
-			wait_event(lock->waitq, lock_wait_cond(sb, lock, mode));
+		} else if (!wait_event_timeout(lock->waitq,
+					       lock_wait_cond(sb, lock, mode),
+					       CLIENT_LOCK_WAIT_TIMEOUT)) {
 			ret = 0;
 		}

@@ -1650,6 +1702,7 @@ void scoutfs_lock_destroy(struct super_block *sb)
 			list_del_init(&lock->inv_head);
 			lock->invalidate_pending = 0;
 		}
+		lock_clear_coverage(sb, lock);
 		lock_remove(linfo, lock);
 		lock_free(linfo, lock);
 	}
--- a/kmod/src/lock.h
+++ b/kmod/src/lock.h
@@ -60,6 +60,8 @@ struct scoutfs_lock_coverage {

 int scoutfs_lock_grant_response(struct super_block *sb,
 				struct scoutfs_net_lock *nl);
+void scoutfs_lock_request_failed(struct super_block *sb,
+				 struct scoutfs_lock *lock);
 int scoutfs_lock_invalidate_request(struct super_block *sb, u64 net_id,
 				    struct scoutfs_net_lock *nl);
 int scoutfs_lock_recover_request(struct super_block *sb, u64 net_id,
--- a/kmod/src/net.c
+++ b/kmod/src/net.c
@@ -336,7 +336,7 @@ static inline u8 net_err_from_host(struct super_block *sb, int error)
 				     error);
 		}

-		return -EINVAL;
+		return SCOUTFS_NET_ERR_EINVAL;
 	}

 	return net_errs[ind];
@@ -1750,8 +1750,10 @@ void scoutfs_net_client_greeting(struct super_block *sb,
 				 bool new_server)
 {
 	struct net_info *ninf = SCOUTFS_SB(sb)->net_info;
+	scoutfs_net_response_t resp_func;
 	struct message_send *msend;
 	struct message_send *tmp;
+	void *resp_data;

 	/* only called on client connections :/ */
 	BUG_ON(conn->listening_conn);
@@ -1760,10 +1762,32 @@ void scoutfs_net_client_greeting(struct super_block *sb,

 	if (new_server) {
 		atomic64_set(&conn->recv_seq, 0);
+
+		/* drop stale responses; old server's state is gone */
 		list_for_each_entry_safe(msend, tmp, &conn->resend_queue, head){
 			if (nh_is_response(&msend->nh))
 				free_msend(ninf, conn, msend);
 		}
+
+		/*
+		 * Complete pending requests with -ECONNRESET.  Any state
+		 * they depended on in the old server was reclaimed at
+		 * fence time, so resending is wrong.  Callers re-issue on
+		 * the new server if they still care.
+		 */
+		while ((msend = list_first_entry_or_null(&conn->resend_queue,
+							 struct message_send, head))) {
+			if (nh_is_response(&msend->nh))
+				break;
+			resp_func = msend->resp_func;
+			resp_data = msend->resp_data;
+			free_msend(ninf, conn, msend);
+			spin_unlock(&conn->lock);
+
+			call_resp_func(sb, conn, resp_func, resp_data, NULL, 0, -ECONNRESET);
+
+			spin_lock(&conn->lock);
+		}
 	}

 	set_valid_greeting(conn);
@@ -1990,8 +2014,9 @@ static int sync_response(struct super_block *sb,
 * buffer.  Errors returned can come from the remote request processing
 * or local failure to send.
 *
- * The wait for the response is interruptible and can return
- * -ERESTARTSYS if it is interrupted.
+ * The wait for the response uses a 60 second timeout loop that
+ * checks for unmount, returning -ESHUTDOWN if the mount is
+ * being torn down.
 *
 * -EOVERFLOW is returned if the response message's data_length doesn't
 * match the caller's resp_len buffer.
@@ -2002,6 +2027,7 @@ int scoutfs_net_sync_request(struct super_block *sb,
 			     void *resp, size_t resp_len)
 {
 	struct sync_request_completion sreq;
+	struct message_send *msend;
 	int ret;
 	u64 id;

@@ -2014,13 +2040,124 @@ int scoutfs_net_sync_request(struct super_block *sb,
 					 sync_response, &sreq, &id);

 	if (ret == 0) {
-		wait_for_completion(&sreq.comp);
-		ret = sreq.error;
+		while (!wait_for_completion_timeout(&sreq.comp, 60 * HZ)) {
+			if (scoutfs_unmounting(sb)) {
+				ret = -ESHUTDOWN;
+				break;
+			}
+		}
+		if (ret == -ESHUTDOWN) {
+			spin_lock(&conn->lock);
+			msend = find_request(conn, cmd, id);
+			if (msend)
+				queue_dead_free(conn, msend);
+			spin_unlock(&conn->lock);
+		} else {
+			ret = sreq.error;
+		}
 	}

 	return ret;
 }

+/*
+ * A bounded-wait variant of sync_request for idempotent background
+ * workers that must reschedule instead of blocking indefinitely on an
+ * unresponsive server.  Returns -ETIMEDOUT if the response doesn't
+ * arrive within timeout_jiffies; the caller then treats it like any
+ * other RPC failure and retries on its normal reschedule cadence.
+ *
+ * Response state lives in a refcounted heap allocation rather than on
+ * the caller's stack so a late callback can't scribble into freed
+ * memory if we give up waiting.  On timeout we race with an arriving
+ * response for the msend: if find_request wins we queue_dead_free and
+ * the callback won't fire (we drop its ref); otherwise the callback is
+ * already running so we wait for it to complete before returning.
+ */
+struct bounded_sync {
+	struct completion comp;
+	void *resp;
+	unsigned int resp_len;
+	int error;
+	atomic_t refs;
+};
+
+static void bounded_sync_put(struct bounded_sync *bs)
+{
+	if (atomic_dec_and_test(&bs->refs))
+		kfree(bs);
+}
+
+static int bounded_sync_response(struct super_block *sb,
+				 struct scoutfs_net_connection *conn,
+				 void *resp, unsigned int resp_len,
+				 int error, void *data)
+{
+	struct bounded_sync *bs = data;
+
+	if (error == 0 && resp_len != bs->resp_len)
+		error = -EMSGSIZE;
+
+	if (error)
+		bs->error = error;
+	else if (resp_len)
+		memcpy(bs->resp, resp, resp_len);
+
+	complete(&bs->comp);
+	bounded_sync_put(bs);
+	return 0;
+}
+
+int scoutfs_net_sync_request_timeout(struct super_block *sb,
+				     struct scoutfs_net_connection *conn,
+				     u8 cmd, void *arg, unsigned arg_len,
+				     void *resp, size_t resp_len,
+				     unsigned long timeout_jiffies)
+{
+	struct message_send *msend;
+	struct bounded_sync *bs;
+	int ret;
+	u64 id;
+
+	bs = kzalloc(sizeof(*bs), GFP_NOFS);
+	if (!bs)
+		return -ENOMEM;
+	init_completion(&bs->comp);
+	bs->resp = resp;
+	bs->resp_len = resp_len;
+	bs->error = 0;
+	atomic_set(&bs->refs, 2);
+
+	ret = scoutfs_net_submit_request(sb, conn, cmd, arg, arg_len,
+					 bounded_sync_response, bs, &id);
+	if (ret) {
+		bounded_sync_put(bs);
+		bounded_sync_put(bs);
+		return ret;
+	}
+
+	if (wait_for_completion_timeout(&bs->comp, timeout_jiffies) == 0) {
+		scoutfs_inc_counter(sb, client_rpc_timeout);
+
+		spin_lock(&conn->lock);
+		msend = find_request(conn, cmd, id);
+		if (msend)
+			queue_dead_free(conn, msend);
+		spin_unlock(&conn->lock);
+
+		if (msend)
+			bounded_sync_put(bs);
+		else
+			wait_for_completion(&bs->comp);
+		ret = -ETIMEDOUT;
+	} else {
+		ret = bs->error;
+	}
+
+	bounded_sync_put(bs);
+	return ret;
+}
+
 static void net_tseq_show_conn(struct seq_file *m,
 			      struct scoutfs_tseq_entry *ent)
 {
--- a/kmod/src/net.h
+++ b/kmod/src/net.h
@@ -150,6 +150,11 @@ int scoutfs_net_sync_request(struct super_block *sb,
 			     struct scoutfs_net_connection *conn,
 			     u8 cmd, void *arg, unsigned arg_len,
 			     void *resp, size_t resp_len);
+int scoutfs_net_sync_request_timeout(struct super_block *sb,
+				     struct scoutfs_net_connection *conn,
+				     u8 cmd, void *arg, unsigned arg_len,
+				     void *resp, size_t resp_len,
+				     unsigned long timeout_jiffies);
 int scoutfs_net_response(struct super_block *sb,
 			 struct scoutfs_net_connection *conn,
 			 u8 cmd, u64 id, int error, void *resp, u16 resp_len);
--- a/kmod/src/quorum.c
+++ b/kmod/src/quorum.c
@@ -1195,8 +1195,8 @@ static struct attribute *quorum_attrs[] = {

 static inline bool valid_ipv4_unicast(__be32 addr)
 {
-	return !(ipv4_is_multicast(addr) && ipv4_is_lbcast(addr) &&
-		 ipv4_is_zeronet(addr) && ipv4_is_local_multicast(addr));
+	return !(ipv4_is_multicast(addr) || ipv4_is_lbcast(addr) ||
+		 ipv4_is_zeronet(addr) || ipv4_is_local_multicast(addr));
 }

 static inline bool valid_ipv4_port(__be16 port)
--- a/kmod/src/quota.c
+++ b/kmod/src/quota.c
@@ -34,6 +34,7 @@
 #include "totl.h"
 #include "util.h"
 #include "quota.h"
+#include "trans.h"
 #include "counters.h"
 #include "scoutfs_trace.h"

@@ -1086,6 +1087,10 @@ int scoutfs_quota_mod_rule(struct super_block *sb, bool is_add,
 	if (ret < 0)
 		goto out;

+	ret = scoutfs_hold_trans(sb, true);
+	if (ret < 0)
+		goto out;
+
 	down_write(&qtinf->rwsem);

 	if (is_add) {
@@ -1095,28 +1100,30 @@ int scoutfs_quota_mod_rule(struct super_block *sb, bool is_add,
 		else if (ret == 0)
 			ret = -EEXIST;
 		if (ret < 0)
-			goto unlock;
+			goto release;

 		rule_to_rule_val(&rv, &rule);
 		ret = scoutfs_item_create(sb, &key, &rv, sizeof(rv), lock);
 		if (ret < 0)
-			goto unlock;
+			goto release;

 	} else {
 		ret = find_rule(sb, &rule, &key, lock) ?:
 		      scoutfs_item_delete(sb, &key, lock);
 		if (ret < 0)
-			goto unlock;
+			goto release;
 	}

 	scoutfs_quota_invalidate(sb);
 	ret = 0;

-unlock:
+release:
 	up_write(&qtinf->rwsem);
-	scoutfs_unlock(sb, lock, SCOUTFS_LOCK_WRITE);
+	scoutfs_release_trans(sb);

 out:
+	scoutfs_unlock(sb, lock, SCOUTFS_LOCK_WRITE);
+
 	if (is_add)
 		trace_scoutfs_quota_add_rule(sb, &rule, ret);
 	else
--- a/kmod/src/server.c
+++ b/kmod/src/server.c
@@ -256,6 +256,14 @@ static void server_down(struct server_info *server)
 		cmpxchg(&server->status, was, SERVER_DOWN);
 }

+static void init_mounted_client_key(struct scoutfs_key *key, u64 rid)
+{
+	*key = (struct scoutfs_key) {
+		.sk_zone = SCOUTFS_MOUNTED_CLIENT_ZONE,
+		.skmc_rid = cpu_to_le64(rid),
+	};
+}
+
 /*
 * The per-holder allocation block use budget balances batching
 * efficiency and concurrency.  The larger this gets, the fewer
@@ -630,7 +638,7 @@ static void scoutfs_server_commit_func(struct work_struct *work)
 	ret = scoutfs_alloc_empty_list(sb, &server->alloc, &server->wri,
 				       server->meta_freed,
 				       server->other_freed);
-	if (ret) {
+	if (ret && ret != -ENOLINK) {
 		scoutfs_err(sb, "server error emptying freed: %d", ret);
 		goto out;
 	}
@@ -963,6 +971,28 @@ static int find_log_trees_item(struct super_block *sb,
 	return ret;
 }

+/*
+ * Return true if the given rid has a mounted_clients entry.
+ */
+static bool rid_is_mounted(struct super_block *sb, u64 rid)
+{
+	DECLARE_SERVER_INFO(sb, server);
+	struct scoutfs_super_block *super = DIRTY_SUPER_SB(sb);
+	SCOUTFS_BTREE_ITEM_REF(iref);
+	struct scoutfs_key key;
+	int ret;
+
+	init_mounted_client_key(&key, rid);
+
+	mutex_lock(&server->mounted_clients_mutex);
+	ret = scoutfs_btree_lookup(sb, &super->mounted_clients, &key, &iref);
+	if (ret == 0)
+		scoutfs_btree_put_iref(&iref);
+	mutex_unlock(&server->mounted_clients_mutex);
+
+	return ret == 0;
+}
+
 /*
 * Find the log_trees item with the greatest nr for each rid.  Fills the
 * caller's log_trees and sets the key before the returned log_trees for
@@ -1221,6 +1251,60 @@ static int do_finalize_ours(struct super_block *sb,
 * happens to arrive at just the right time.  That's fine, merging will
 * ignore and tear down the empty input.
 */
+
+static int reclaim_open_log_tree(struct super_block *sb, u64 rid);
+
+/*
+ * Reclaim log trees for rids that have no mounted_clients entry.
+ * They block merges by appearing active.  reclaim_open_log_tree
+ * may need multiple commits to drain allocators (-EINPROGRESS).
+ *
+ * The caller holds logs_mutex and a commit, both are dropped and
+ * re-acquired around each reclaim call.  Returns >0 if any orphans
+ * were reclaimed so the caller can re-check state that may have
+ * changed while the lock was dropped.
+ */
+static int reclaim_orphan_log_trees(struct super_block *sb, u64 rid,
+				    struct commit_hold *hold)
+{
+	struct server_info *server = SCOUTFS_SB(sb)->server_info;
+	struct scoutfs_super_block *super = DIRTY_SUPER_SB(sb);
+	struct scoutfs_log_trees lt;
+	struct scoutfs_key key;
+	bool found = false;
+	u64 orphan_rid;
+	int ret;
+	int err;
+
+	scoutfs_key_init_log_trees(&key, U64_MAX, U64_MAX);
+	while ((ret = for_each_rid_last_lt(sb, &super->logs_root, &key, &lt)) > 0) {
+
+		if ((le64_to_cpu(lt.flags) & SCOUTFS_LOG_TREES_FINALIZED) ||
+		    le64_to_cpu(lt.rid) == rid ||
+		    rid_is_mounted(sb, le64_to_cpu(lt.rid)))
+			continue;
+
+		orphan_rid = le64_to_cpu(lt.rid);
+		scoutfs_err(sb, "reclaiming orphan log trees for rid %016llx nr %llu",
+			    orphan_rid, le64_to_cpu(lt.nr));
+		found = true;
+
+		do {
+			mutex_unlock(&server->logs_mutex);
+			err = reclaim_open_log_tree(sb, orphan_rid);
+			ret = server_apply_commit(sb, hold,
+						  err == -EINPROGRESS ? 0 : err);
+			server_hold_commit(sb, hold);
+			mutex_lock(&server->logs_mutex);
+		} while (err == -EINPROGRESS && ret == 0);
+
+		if (ret < 0)
+			break;
+	}
+
+	return ret < 0 ? ret : found;
+}
+
 #define FINALIZE_POLL_MIN_DELAY_MS	5U
 #define FINALIZE_POLL_MAX_DELAY_MS	100U
 #define FINALIZE_POLL_DELAY_GROWTH_PCT	150U
@@ -1261,6 +1345,16 @@ static int finalize_and_start_log_merge(struct super_block *sb, struct scoutfs_l
 			break;
 		}

+		ret = reclaim_orphan_log_trees(sb, rid, hold);
+		if (ret < 0) {
+			err_str = "reclaiming orphan log trees";
+			break;
+		}
+		if (ret > 0) {
+			/* lock was dropped, re-check merge status */
+			continue;
+		}
+
 		/* look for finalized and other active log btrees */
 		saw_finalized = false;
 		others_active = false;
@@ -1929,7 +2023,7 @@ static int reclaim_open_log_tree(struct super_block *sb, u64 rid)
 	mutex_unlock(&server->alloc_mutex);

 	/* only finalize, allowing merging, once the allocators are fully freed */
-	if (ret == 0) {
+	if (ret == 0 && !scoutfs_trigger(sb, RECLAIM_SKIP_FINALIZE)) {
 		/* the transaction is no longer open */
 		lt.commit_trans_seq = lt.get_trans_seq;

@@ -1981,7 +2075,8 @@ static int get_stable_trans_seq(struct super_block *sb, u64 *last_seq_ret)
 	scoutfs_key_init_log_trees(&key, U64_MAX, U64_MAX);
 	while ((ret = for_each_rid_last_lt(sb, &super->logs_root, &key, &lt)) > 0) {
 		if ((le64_to_cpu(lt.get_trans_seq) > le64_to_cpu(lt.commit_trans_seq)) &&
-		     le64_to_cpu(lt.get_trans_seq) <= last_seq) {
+		     le64_to_cpu(lt.get_trans_seq) <= last_seq &&
+		     rid_is_mounted(sb, le64_to_cpu(lt.rid))) {
 			last_seq = le64_to_cpu(lt.get_trans_seq) - 1;
 		}
 	}
@@ -3533,14 +3628,6 @@ out:
 	return scoutfs_net_response(sb, conn, cmd, id, ret, &nst, sizeof(nst));
 }

-static void init_mounted_client_key(struct scoutfs_key *key, u64 rid)
-{
-	*key = (struct scoutfs_key) {
-		.sk_zone = SCOUTFS_MOUNTED_CLIENT_ZONE,
-		.skmc_rid = cpu_to_le64(rid),
-	};
-}
-
 static bool invalid_mounted_client_item(struct scoutfs_btree_item_ref *iref)
 {
 	return (iref->val_len != sizeof(struct scoutfs_mounted_client_btree_val));
--- a/kmod/src/srch.c
+++ b/kmod/src/srch.c
@@ -95,6 +95,13 @@ struct srch_info {
 */
 #define SRCH_COMPACT_DIRTY_LIMIT_BYTES (32 * 1024 * 1024)

+/*
+ * Generous per-RPC bound for the idempotent compact worker.  A server
+ * that hasn't answered in this long is assumed to be broken; dropping
+ * the request lets the worker reschedule instead of blocking forever.
+ */
+#define COMPACT_RPC_TIMEOUT (5 * 60 * HZ)
+
 static int sre_cmp(const struct scoutfs_srch_entry *a,
 		   const struct scoutfs_srch_entry *b)
 {
@@ -2256,7 +2263,8 @@ static void scoutfs_srch_compact_worker(struct work_struct *work)

 	scoutfs_block_writer_init(sb, &wri);

-	ret = scoutfs_client_srch_get_compact(sb, sc);
+	ret = scoutfs_client_srch_get_compact_timeout(sb, sc,
+						      COMPACT_RPC_TIMEOUT);
 	if (ret >= 0)
 		trace_scoutfs_srch_compact_client_recv(sb, sc);
 	if (ret < 0 || sc->nr == 0)
@@ -2287,7 +2295,8 @@ static void scoutfs_srch_compact_worker(struct work_struct *work)
 	sc->flags |= ret < 0 ? SCOUTFS_SRCH_COMPACT_FLAG_ERROR : 0;

 	trace_scoutfs_srch_compact_client_send(sb, sc);
-	err = scoutfs_client_srch_commit_compact(sb, sc);
+	err = scoutfs_client_srch_commit_compact_timeout(sb, sc,
+							 COMPACT_RPC_TIMEOUT);
 	if (err < 0 && ret == 0)
 		ret = err;
 out:
--- a/kmod/src/totl.c
+++ b/kmod/src/totl.c
@@ -30,6 +30,11 @@ void scoutfs_totl_merge_init(struct scoutfs_totl_merging *merg)
 	memset(merg, 0, sizeof(struct scoutfs_totl_merging));
 }

+/*
+ * bin the incoming merge inputs so that we can resolve delta items
+ * properly. Finalized logs that are merge inputs are kept separately
+ * from those that are not.
+ */
 void scoutfs_totl_merge_contribute(struct scoutfs_totl_merging *merg,
 				   u64 seq, u8 flags, void *val, int val_len, int fic)
 {
@@ -39,10 +44,10 @@ void scoutfs_totl_merge_contribute(struct scoutfs_totl_merging *merg,
 		merg->fs_seq = seq;
 		merg->fs_total = le64_to_cpu(tval->total);
 		merg->fs_count = le64_to_cpu(tval->count);
-	} else if (fic & FIC_FINALIZED) {
-		merg->fin_seq = seq;
-		merg->fin_total += le64_to_cpu(tval->total);
-		merg->fin_count += le64_to_cpu(tval->count);
+	} else if (fic & FIC_MERGE_INPUT) {
+		merg->inp_seq = seq;
+		merg->inp_total += le64_to_cpu(tval->total);
+		merg->inp_count += le64_to_cpu(tval->count);
 	} else {
 		merg->log_seq = seq;
 		merg->log_total += le64_to_cpu(tval->total);
@@ -53,15 +58,18 @@ void scoutfs_totl_merge_contribute(struct scoutfs_totl_merging *merg,
 /*
 * .totl. item merging has to be careful because the log btree merging
 * code can write partial results to the fs_root.  This means that a
- * reader can see both cases where new finalized logs should be applied
- * to the old fs items and where old finalized logs have already been
- * applied to the partially merged fs items.  Currently active logged
- * items are always applied on top of all cases.
+ * reader can see both cases where merge input deltas should be applied
+ * to the old fs items and where they have already been applied to the
+ * partially merged fs items.
+ *
+ * Only finalized log trees that are inputs to the current merge cycle
+ * are tracked in the inp_ bucket.  Finalized trees that aren't merge
+ * inputs and active log trees are always applied unconditionally since
+ * they cannot be in fs_root.
 *
 * These cases are differentiated with a combination of sequence numbers
- * in items, the count of contributing xattrs, and a flag
- * differentiating finalized and active logged items.  This lets us
- * recognize all cases, including when finalized logs were merged and
+ * in items and the count of contributing xattrs.  This lets us
+ * recognize all cases, including when merge inputs were merged and
 * deleted the fs item.
 */
 void scoutfs_totl_merge_resolve(struct scoutfs_totl_merging *merg, __u64 *total, __u64 *count)
@@ -75,14 +83,14 @@ void scoutfs_totl_merge_resolve(struct scoutfs_totl_merging *merg, __u64 *total,
 		*count = merg->fs_count;
 	}

-	/* apply finalized logs if they're newer or creating */
-	if (((merg->fs_seq != 0) && (merg->fin_seq > merg->fs_seq)) ||
-	    ((merg->fs_seq == 0) && (merg->fin_count > 0))) {
-		*total += merg->fin_total;
-		*count += merg->fin_count;
+	/* apply merge input deltas if they're newer or creating */
+	if (((merg->fs_seq != 0) && (merg->inp_seq > merg->fs_seq)) ||
+	    ((merg->fs_seq == 0) && (merg->inp_count > 0))) {
+		*total += merg->inp_total;
+		*count += merg->inp_count;
 	}

-	/* always apply active logs which must be newer than fs and finalized */
+	/* always apply non-input finalized and active logs */
 	if (merg->log_seq > 0) {
 		*total += merg->log_total;
 		*count += merg->log_count;
--- a/kmod/src/totl.h
+++ b/kmod/src/totl.h
@@ -7,9 +7,9 @@ struct scoutfs_totl_merging {
 	u64 fs_seq;
 	u64 fs_total;
 	u64 fs_count;
-	u64 fin_seq;
-	u64 fin_total;
-	s64 fin_count;
+	u64 inp_seq;
+	u64 inp_total;
+	s64 inp_count;
 	u64 log_seq;
 	u64 log_total;
 	s64 log_count;
--- a/kmod/src/trans.c
+++ b/kmod/src/trans.c
@@ -195,7 +195,8 @@ static int retry_forever(struct super_block *sb, int (*func)(struct super_block
 				retrying = true;
 			}

-			if (scoutfs_forcing_unmount(sb)) {
+			if (scoutfs_forcing_unmount(sb) ||
+			    scoutfs_unmounting(sb)) {
 				ret = -ENOLINK;
 				break;
 			}
--- a/kmod/src/triggers.c
+++ b/kmod/src/triggers.c
@@ -45,6 +45,8 @@ static char *names[] = {
 	[SCOUTFS_TRIGGER_SRCH_FORCE_LOG_ROTATE] = "srch_force_log_rotate",
 	[SCOUTFS_TRIGGER_SRCH_MERGE_STOP_SAFE] = "srch_merge_stop_safe",
 	[SCOUTFS_TRIGGER_STATFS_LOCK_PURGE] = "statfs_lock_purge",
+	[SCOUTFS_TRIGGER_RECLAIM_SKIP_FINALIZE] = "reclaim_skip_finalize",
+	[SCOUTFS_TRIGGER_LOG_MERGE_FORCE_PARTIAL] = "log_merge_force_partial",
 };

 bool scoutfs_trigger_test_and_clear(struct super_block *sb, unsigned int t)
--- a/kmod/src/triggers.h
+++ b/kmod/src/triggers.h
@@ -8,6 +8,8 @@ enum scoutfs_trigger {
 	SCOUTFS_TRIGGER_SRCH_FORCE_LOG_ROTATE,
 	SCOUTFS_TRIGGER_SRCH_MERGE_STOP_SAFE,
 	SCOUTFS_TRIGGER_STATFS_LOCK_PURGE,
+	SCOUTFS_TRIGGER_RECLAIM_SKIP_FINALIZE,
+	SCOUTFS_TRIGGER_LOG_MERGE_FORCE_PARTIAL,
 	SCOUTFS_TRIGGER_NR,
 };

--- a/kmod/src/wkic.c
+++ b/kmod/src/wkic.c
@@ -95,6 +95,7 @@ struct wkic_info {
 	/* block reading slow path */
 	struct mutex roots_mutex;
 	struct scoutfs_net_roots roots;
+	u64 merge_input_seq;
 	u64 roots_read_seq;
 	ktime_t roots_expire;

@@ -805,29 +806,79 @@ static void free_page_list(struct super_block *sb, struct list_head *list)
 * read_seq number so that we can compare the age of the items in cached
 * pages.  Only one request to refresh the roots is in progress at a
 * time.  This is the slow path that's only used when the cache isn't
- * populated and the roots aren't cached.  The root request is fast
- * enough, especially compared to the resulting item reading IO, that we
- * don't mind hiding it behind a trivial mutex.
+ * populated and the roots aren't cached.
+ *
+ * We read roots directly from the on-disk superblock rather than
+ * requesting them from the server so that we can also read the
+ * log_merge btree from the same superblock.  The merge status item
+ * seq tells us which finalized log trees are inputs to the current
+ * merge, which is needed to correctly resolve totl delta items.
 */
-static int get_roots(struct super_block *sb, struct wkic_info *winf,
-		     struct scoutfs_net_roots *roots_ret, u64 *read_seq, bool force_new)
+static int refresh_roots(struct super_block *sb, struct wkic_info *winf)
+{
+	struct scoutfs_super_block *super;
+	struct scoutfs_log_merge_status *stat;
+	SCOUTFS_BTREE_ITEM_REF(iref);
+	struct scoutfs_key key;
+	int ret;
+
+	super = kmalloc(sizeof(*super), GFP_NOFS);
+	if (!super)
+		return -ENOMEM;
+
+	ret = scoutfs_read_super(sb, super);
+	if (ret < 0)
+		goto out;
+
+	winf->roots = (struct scoutfs_net_roots){
+		.fs_root = super->fs_root,
+		.logs_root = super->logs_root,
+		.srch_root = super->srch_root,
+	};
+
+	winf->merge_input_seq = 0;
+	if (super->log_merge.ref.blkno) {
+		scoutfs_key_set_zeros(&key);
+		key.sk_zone = SCOUTFS_LOG_MERGE_STATUS_ZONE;
+		ret = scoutfs_btree_lookup(sb, &super->log_merge, &key, &iref);
+		if (ret == 0) {
+			if (iref.val_len == sizeof(*stat)) {
+				stat = iref.val;
+				winf->merge_input_seq = le64_to_cpu(stat->seq);
+			} else {
+				ret = -EUCLEAN;
+			}
+			scoutfs_btree_put_iref(&iref);
+		} else if (ret == -ENOENT) {
+			ret = 0;
+		}
+		if (ret < 0)
+			goto out;
+	}
+
+	winf->roots_read_seq++;
+	winf->roots_expire = ktime_add_ms(ktime_get_raw(), WKIC_CACHE_LIFETIME_MS);
+out:
+	kfree(super);
+	return ret;
+}
+
+static int get_roots(struct super_block *sb, struct wkic_info *winf,
+		     struct scoutfs_net_roots *roots_ret, u64 *merge_input_seq,
+		     u64 *read_seq, bool force_new)
 {
-	struct scoutfs_net_roots roots;
 	int ret;

 	mutex_lock(&winf->roots_mutex);

 	if (force_new || ktime_before(winf->roots_expire, ktime_get_raw())) {
-		ret = scoutfs_client_get_roots(sb, &roots);
+		ret = refresh_roots(sb, winf);
 		if (ret < 0)
 			goto out;
-
-		winf->roots = roots;
-		winf->roots_read_seq++;
-		winf->roots_expire = ktime_add_ms(ktime_get_raw(), WKIC_CACHE_LIFETIME_MS);
 	}

 	*roots_ret = winf->roots;
+	*merge_input_seq = winf->merge_input_seq;
 	*read_seq = winf->roots_read_seq;
 	ret = 0;
 out:
@@ -870,24 +921,30 @@ static int insert_read_pages(struct super_block *sb, struct wkic_info *winf,
 	struct scoutfs_key end;
 	struct wkic_page *wpage;
 	LIST_HEAD(pages);
-	u64 read_seq;
+	u64 merge_input_seq;
+	u64 read_seq = 0;
 	int ret;

 	ret = 0;
 retry_stale:
-	ret = get_roots(sb, winf, &roots, &read_seq, ret == -ESTALE);
+	ret = get_roots(sb, winf, &roots, &merge_input_seq, &read_seq, ret == -ESTALE);
 	if (ret < 0)
-		goto out;
+		goto check_stale;

 	start = *range_start;
 	end = *range_end;
-	ret = scoutfs_forest_read_items_roots(sb, &roots, key, range_start, &start, &end,
-					      read_items_cb, &root);
+	ret = scoutfs_forest_read_items_roots(sb, &roots, merge_input_seq, key, range_start,
+					      &start, &end, read_items_cb, &root);
 	trace_scoutfs_wkic_read_items(sb, key, &start, &end);
+check_stale:
 	ret = scoutfs_block_check_stale(sb, ret, &saved, &roots.fs_root.ref, &roots.logs_root.ref);
 	if (ret < 0) {
-		if (ret == -ESTALE)
+		if (ret == -ESTALE) {
+			/* not safe to retry due to delta items, must restart clean */
+			free_item_tree(&root);
+			root = RB_ROOT;
 			goto retry_stale;
+		}
 		goto out;
 	}

--- a/kmod/src/xattr.c
+++ b/kmod/src/xattr.c
@@ -1265,6 +1265,7 @@ int scoutfs_xattr_drop(struct super_block *sb, u64 ino,
 			ret = parse_indx_key(&tag_key, xat->name, xat->name_len, ino);
 			if (ret < 0)
 				goto out;
+			scoutfs_xattr_set_indx_key_xid(&tag_key, le64_to_cpu(key.skx_id));
 		}

 		if ((tgs.totl || tgs.indx) && locked_zone != tag_key.sk_zone) {
--- a/tests/funcs/filter.sh
+++ b/tests/funcs/filter.sh
@@ -20,9 +20,6 @@ t_filter_fs()
 # [ 2687.691366] BUG: KASAN: stack-out-of-bounds in get_reg+0x1bc/0x230
 # ...
 # [ 2687.706220] ==================================================================
-# [ 2687.707284] Disabling lock debugging due to kernel taint
-#
-# That final lock debugging message may not be included.
 #
 ignore_harmless_unwind_kasan_stack_oob()
 {
@@ -46,10 +43,6 @@ awk '
 		saved=""
        }
        ( in_soob == 2 && $0 ~ /==================================================================/ ) {
-                in_soob = 3
-                soob_nr = NR
-        }
-        ( in_soob == 3 && NR > soob_nr && $0 !~ /Disabling lock debugging/ ) {
                in_soob = 0
        }
        ( !in_soob ) { print $0 }
@@ -61,6 +54,58 @@ awk '
 '
 }

+#
+# in el97+, XFS can generate a spurious lockdep circular dependency
+# warning about reclaim. Fixed upstream in e.g. v5.7-rc4-129-g6dcde60efd94
+#
+ignore_harmless_xfs_lockdep_warning()
+{
+awk '
+	BEGIN {
+		in_block = 0
+		block_nr = 0
+		buf = ""
+	}
+	( !in_block && $0 ~ /======================================================/ ) {
+		in_block = 1
+		block_nr = NR
+		buf = $0 "\n"
+		next
+	}
+	( in_block == 1 && NR == (block_nr + 1) ) {
+		if (match($0, /WARNING: possible circular locking dependency detected/) != 0) {
+			in_block = 2
+			buf = buf $0 "\n"
+		} else {
+			in_block = 0
+			printf "%s", buf
+			print $0
+			buf = ""
+		}
+		next
+	}
+	( in_block == 2 ) {
+		buf = buf $0 "\n"
+		if ($0 ~ /<\/TASK>/) {
+			if (buf ~ /xfs_(nondir_|dir_)?ilock_class/ && buf ~ /fs_reclaim/) {
+				# known xfs lockdep false positive, discard
+			} else {
+				printf "%s", buf
+			}
+			in_block = 0
+			buf = ""
+		}
+		next
+	}
+	{ print $0 }
+	END {
+		if (buf) {
+			printf "%s", buf
+		}
+	}
+'
+}
+
 #
 # Filter out expected messages.  Putting messages here implies that
 # tests aren't relying on messages to discover failures.. they're
@@ -123,6 +168,9 @@ t_filter_dmesg()
 	re="$re|hrtimer: interrupt took .*"
 	re="$re|clocksource: Long readout interval"

+	# orphan log trees reclaim is handled, not an error
+	re="$re|scoutfs .* reclaiming orphan log trees"
+
 	# fencing tests force unmounts and trigger timeouts
 	re="$re|scoutfs .* forcing unmount"
 	re="$re|scoutfs .* reconnect timed out"
@@ -173,6 +221,10 @@ t_filter_dmesg()
 	# creating block devices may trigger this
 	re="$re|block device autoloading is deprecated and will be removed."

+	# lockdep or kasan warnings can cause this
+	re="$re|Disabling lock debugging due to kernel taint"
+
 	egrep -v "($re)" | \
-		ignore_harmless_unwind_kasan_stack_oob
+		ignore_harmless_unwind_kasan_stack_oob | \
+		ignore_harmless_xfs_lockdep_warning
 }
--- a/tests/funcs/fs.sh
+++ b/tests/funcs/fs.sh
@@ -283,6 +283,30 @@ t_reinsert_remount_all()
 	t_quiet t_mount_all || t_fail "mounting all failed"
 }

+#
+# scratch helpers
+#
+t_scratch_mkfs()
+{
+	scoutfs mkfs -f -Q 0,127.0.0.1,$T_SCRATCH_PORT "$T_EX_META_DEV" "$T_EX_DATA_DEV" "$@" > $T_TMP.mkfs.out 2>&1 || \
+		t_fail "scratch mkfs failed"
+}
+
+t_scratch_mount()
+{
+	mkdir -p "$T_MSCR"
+	mount -t scoutfs -o metadev_path=$T_EX_META_DEV,quorum_slot_nr=0 "$@" "$T_EX_DATA_DEV" "$T_MSCR" || \
+		t_fail "scratch mount failed"
+}
+
+t_scratch_umount()
+{
+	umount "$T_MSCR" || \
+		t_fail "scratch umount failed"
+	rmdir "$T_MSCR"
+}
+
+
 t_trigger_path() {
 	local nr="$1"

--- a/tests/golden/basic-acl-consistency
+++ b/tests/golden/basic-acl-consistency
@@ -0,0 +1,6 @@
+== make scratch fs
+== create uid/gids
+== set acls and permissions
+== compare output
+== drop caches and compare again
+== cleanup scratch fs
--- a/tests/golden/basic-xattr-indx
+++ b/tests/golden/basic-xattr-indx
@@ -0,0 +1,54 @@
+== testing invalid read-xattr-index arguments
+bad index position entry argument 'bad', it must be in the form "a.b.ino" where each value can be prefixed by '0' for octal or '0x' for hex
+scoutfs: read-xattr-index failed: Invalid argument (22)
+bad index position entry argument '1.2', it must be in the form "a.b.ino" where each value can be prefixed by '0' for octal or '0x' for hex
+scoutfs: read-xattr-index failed: Invalid argument (22)
+initial major index position '256' must be between 0 and 255, inclusive.
+scoutfs: read-xattr-index failed: Invalid argument (22)
+first index position 1.2.3 must be less than last index position 0.0.0
+scoutfs: read-xattr-index failed: Invalid argument (22)
+first index position 1.2.0 must be less than last index position 1.1.2
+scoutfs: read-xattr-index failed: Invalid argument (22)
+first index position 2.2.2 must be less than last index position 2.2.1
+scoutfs: read-xattr-index failed: Invalid argument (22)
+== testing invalid names
+setfattr: /mnt/test/test/basic-xattr-indx/invalid: Invalid argument
+setfattr: /mnt/test/test/basic-xattr-indx/invalid: Invalid argument
+setfattr: /mnt/test/test/basic-xattr-indx/invalid: Invalid argument
+setfattr: /mnt/test/test/basic-xattr-indx/invalid: Invalid argument
+setfattr: /mnt/test/test/basic-xattr-indx/invalid: Invalid argument
+setfattr: /mnt/test/test/basic-xattr-indx/invalid: Invalid argument
+setfattr: /mnt/test/test/basic-xattr-indx/invalid: Invalid argument
+setfattr: /mnt/test/test/basic-xattr-indx/invalid: Invalid argument
+setfattr: /mnt/test/test/basic-xattr-indx/invalid: Invalid argument
+setfattr: /mnt/test/test/basic-xattr-indx/invalid: Invalid argument
+setfattr: /mnt/test/test/basic-xattr-indx/invalid: Invalid argument
+setfattr: /mnt/test/test/basic-xattr-indx/invalid: Numerical result out of range
+== testing boundary values
+0.0 found
+255.max found
+== indx xattr must have no value
+setfattr: /mnt/test/test/basic-xattr-indx/noval: Invalid argument
+setfattr: /mnt/test/test/basic-xattr-indx/noval: Invalid argument
+== set indx xattr and verify index entry
+found
+== setting same indx xattr again is a no-op
+found
+== removing non-existent indx xattr succeeds
+setfattr: /mnt/test/test/basic-xattr-indx/file: No such attribute
+still found
+== explicit xattr removal cleans up index entry
+== file deletion cleans up index entry
+found before delete
+== multiple indx xattrs on one file cleaned up by deletion
+entries before delete: 2
+entries after delete: 0
+== partial removal leaves other entries
+300 found
+== multiple files at same index position
+files at same position: 2
+surviving file found
+== cross-mount visibility
+found on mount 1
+== duplicate position deduplication
+entries for same position: 1
--- a/tests/golden/lock-rever-invalidate
+++ b/tests/golden/lock-rever-invalidate
--- a/tests/golden/orphan-log-trees
+++ b/tests/golden/orphan-log-trees
@@ -0,0 +1,3 @@
+== create orphan log_trees entry via trigger
+== verify orphan is reclaimed and merge completes
+== verify orphan reclaim was logged
--- a/tests/golden/punch-offline
+++ b/tests/golden/punch-offline
@@ -0,0 +1,460 @@
+== missing options should fail ==
+punch-offline: must provide offset
+Try `punch-offline --help' or `punch-offline --usage' for more information.
+punch-offline: must provide length
+Try `punch-offline --help' or `punch-offline --usage' for more information.
+punch-offline: must provide data_version
+Try `punch-offline --help' or `punch-offline --usage' for more information.
+== can't hole punch dir or special ==
+failed to open '/mnt/test.0/test/punch-offline/dir': Is a directory (21)
+scoutfs: punch-offline failed: Is a directory (21)
+== punching an empty file does nothing ==
+== punch outside of i_size does nothing ==
+== can't hole punch online extent ==
+0: offset: 0 length: 4096 flags: ..L
+extents: 1
+punch_offline ioctl failed: Invalid argument (22)
+scoutfs: punch-offline failed: Invalid argument (22)
+0: offset: 0 length: 4096 flags: ..L
+extents: 1
+== can't hole punch unwritten extent ==
+0: offset: 0 length: 12288 flags: .UL
+extents: 1
+punch_offline ioctl failed: Invalid argument (22)
+scoutfs: punch-offline failed: Invalid argument (22)
+0: offset: 0 length: 12288 flags: .UL
+extents: 1
+== hole punch offline extent ==
+0: offset: 0 length: 12288 flags: O.L
+extents: 1
+0: offset: 0 length: 4096 flags: O..
+1: offset: 8192 length: 4096 flags: O.L
+extents: 2
+== can't hole punch non-aligned bsz offset or len ==
+0: offset: 0 length: 12288 flags: O.L
+extents: 1
+punch_offline ioctl failed: Value too large for defined data type (75)
+scoutfs: punch-offline failed: Value too large for defined data type (75)
+punch_offline ioctl failed: Value too large for defined data type (75)
+scoutfs: punch-offline failed: Value too large for defined data type (75)
+punch_offline ioctl failed: Value too large for defined data type (75)
+scoutfs: punch-offline failed: Value too large for defined data type (75)
+punch_offline ioctl failed: Value too large for defined data type (75)
+scoutfs: punch-offline failed: Value too large for defined data type (75)
+punch_offline ioctl failed: Value too large for defined data type (75)
+scoutfs: punch-offline failed: Value too large for defined data type (75)
+punch_offline ioctl failed: Value too large for defined data type (75)
+scoutfs: punch-offline failed: Value too large for defined data type (75)
+0: offset: 0 length: 12288 flags: O.L
+extents: 1
+== can't hole punch mismatched data_version ==
+0: offset: 0 length: 12288 flags: O.L
+extents: 1
+punch_offline ioctl failed: Stale file handle (116)
+scoutfs: punch-offline failed: Stale file handle (116)
+punch_offline ioctl failed: Stale file handle (116)
+scoutfs: punch-offline failed: Stale file handle (116)
+punch_offline ioctl failed: Stale file handle (116)
+scoutfs: punch-offline failed: Stale file handle (116)
+0: offset: 0 length: 12288 flags: O.L
+extents: 1
+== Punch hole crossing multiple extents ==
+0: offset: 0 length: 7 flags: O.L
+extents: 1
+0: offset: 0 length: 1 flags: O..
+1: offset: 2 length: 1 flags: O..
+2: offset: 4 length: 1 flags: O..
+3: offset: 6 length: 1 flags: O.L
+extents: 4
+0: offset: 0 length: 1 flags: O..
+1: offset: 6 length: 1 flags: O.L
+extents: 2
+== punch hole starting at a hole ==
+0: offset: 0 length: 7 flags: O.L
+extents: 1
+0: offset: 0 length: 1 flags: O..
+1: offset: 2 length: 1 flags: O..
+2: offset: 4 length: 1 flags: O..
+3: offset: 6 length: 1 flags: O.L
+extents: 4
+0: offset: 0 length: 1 flags: O..
+1: offset: 6 length: 1 flags: O.L
+extents: 2
+== large punch ==
+0: offset: 0 length: 1572864 flags: O.L
+extents: 1
+0: offset: 0 length: 134123 flags: O..
+1: offset: 202466 length: 264807 flags: O..
+2: offset: 535616 length: 199007 flags: O..
+3: offset: 802966 length: 769898 flags: O.L
+extents: 4
+== overlapping punches with lots of extents ==
+0: offset: 0 length: 4194304 flags: O.L
+extents: 1
+extents: 512
+extents: 505
+extents: 378
+extents: 252
+0: offset: 0 length: 4096 flags: O..
+1: offset: 8192 length: 4096 flags: O..
+2: offset: 32768 length: 4096 flags: O..
+3: offset: 40960 length: 4096 flags: O..
+4: offset: 65536 length: 4096 flags: O..
+5: offset: 73728 length: 4096 flags: O..
+6: offset: 98304 length: 4096 flags: O..
+7: offset: 106496 length: 4096 flags: O..
+8: offset: 196608 length: 4096 flags: O..
+9: offset: 204800 length: 4096 flags: O..
+10: offset: 229376 length: 4096 flags: O..
+11: offset: 237568 length: 4096 flags: O..
+12: offset: 262144 length: 4096 flags: O..
+13: offset: 270336 length: 4096 flags: O..
+14: offset: 294912 length: 4096 flags: O..
+15: offset: 303104 length: 4096 flags: O..
+16: offset: 327680 length: 4096 flags: O..
+17: offset: 335872 length: 4096 flags: O..
+18: offset: 360448 length: 4096 flags: O..
+19: offset: 368640 length: 4096 flags: O..
+20: offset: 393216 length: 4096 flags: O..
+21: offset: 401408 length: 4096 flags: O..
+22: offset: 425984 length: 4096 flags: O..
+23: offset: 434176 length: 4096 flags: O..
+24: offset: 458752 length: 4096 flags: O..
+25: offset: 466944 length: 4096 flags: O..
+26: offset: 491520 length: 4096 flags: O..
+27: offset: 499712 length: 4096 flags: O..
+28: offset: 720896 length: 4096 flags: O..
+29: offset: 729088 length: 4096 flags: O..
+30: offset: 753664 length: 4096 flags: O..
+31: offset: 761856 length: 4096 flags: O..
+32: offset: 786432 length: 4096 flags: O..
+33: offset: 794624 length: 4096 flags: O..
+34: offset: 819200 length: 4096 flags: O..
+35: offset: 827392 length: 4096 flags: O..
+36: offset: 851968 length: 4096 flags: O..
+37: offset: 860160 length: 4096 flags: O..
+38: offset: 884736 length: 4096 flags: O..
+39: offset: 892928 length: 4096 flags: O..
+40: offset: 917504 length: 4096 flags: O..
+41: offset: 925696 length: 4096 flags: O..
+42: offset: 950272 length: 4096 flags: O..
+43: offset: 958464 length: 4096 flags: O..
+44: offset: 983040 length: 4096 flags: O..
+45: offset: 991232 length: 4096 flags: O..
+46: offset: 1015808 length: 4096 flags: O..
+47: offset: 1024000 length: 4096 flags: O..
+48: offset: 1048576 length: 4096 flags: O..
+49: offset: 1056768 length: 4096 flags: O..
+50: offset: 1081344 length: 4096 flags: O..
+51: offset: 1089536 length: 4096 flags: O..
+52: offset: 1114112 length: 4096 flags: O..
+53: offset: 1122304 length: 4096 flags: O..
+54: offset: 1146880 length: 4096 flags: O..
+55: offset: 1155072 length: 4096 flags: O..
+56: offset: 1179648 length: 4096 flags: O..
+57: offset: 1187840 length: 4096 flags: O..
+58: offset: 1212416 length: 4096 flags: O..
+59: offset: 1220608 length: 4096 flags: O..
+60: offset: 1245184 length: 4096 flags: O..
+61: offset: 1253376 length: 4096 flags: O..
+62: offset: 1277952 length: 4096 flags: O..
+63: offset: 1286144 length: 4096 flags: O..
+64: offset: 1310720 length: 4096 flags: O..
+65: offset: 1318912 length: 4096 flags: O..
+66: offset: 1343488 length: 4096 flags: O..
+67: offset: 1351680 length: 4096 flags: O..
+68: offset: 1376256 length: 4096 flags: O..
+69: offset: 1384448 length: 4096 flags: O..
+70: offset: 1409024 length: 4096 flags: O..
+71: offset: 1417216 length: 4096 flags: O..
+72: offset: 1441792 length: 4096 flags: O..
+73: offset: 1449984 length: 4096 flags: O..
+74: offset: 1474560 length: 4096 flags: O..
+75: offset: 1482752 length: 4096 flags: O..
+76: offset: 1507328 length: 4096 flags: O..
+77: offset: 1515520 length: 4096 flags: O..
+78: offset: 1540096 length: 4096 flags: O..
+79: offset: 1548288 length: 4096 flags: O..
+80: offset: 1572864 length: 4096 flags: O..
+81: offset: 1581056 length: 4096 flags: O..
+82: offset: 1605632 length: 4096 flags: O..
+83: offset: 1613824 length: 4096 flags: O..
+84: offset: 1638400 length: 4096 flags: O..
+85: offset: 1646592 length: 4096 flags: O..
+86: offset: 1671168 length: 4096 flags: O..
+87: offset: 1679360 length: 4096 flags: O..
+88: offset: 1703936 length: 4096 flags: O..
+89: offset: 1712128 length: 4096 flags: O..
+90: offset: 1736704 length: 4096 flags: O..
+91: offset: 1744896 length: 4096 flags: O..
+92: offset: 1769472 length: 4096 flags: O..
+93: offset: 1777664 length: 4096 flags: O..
+94: offset: 1802240 length: 4096 flags: O..
+95: offset: 1810432 length: 4096 flags: O..
+96: offset: 1835008 length: 4096 flags: O..
+97: offset: 1843200 length: 4096 flags: O..
+98: offset: 1867776 length: 4096 flags: O..
+99: offset: 1875968 length: 4096 flags: O..
+100: offset: 1900544 length: 4096 flags: O..
+101: offset: 1908736 length: 4096 flags: O..
+102: offset: 1933312 length: 4096 flags: O..
+103: offset: 1941504 length: 4096 flags: O..
+104: offset: 1966080 length: 4096 flags: O..
+105: offset: 1974272 length: 4096 flags: O..
+106: offset: 1998848 length: 4096 flags: O..
+107: offset: 2007040 length: 4096 flags: O..
+108: offset: 2031616 length: 4096 flags: O..
+109: offset: 2039808 length: 4096 flags: O..
+110: offset: 2064384 length: 4096 flags: O..
+111: offset: 2072576 length: 4096 flags: O..
+112: offset: 2097152 length: 4096 flags: O..
+113: offset: 2105344 length: 4096 flags: O..
+114: offset: 2129920 length: 4096 flags: O..
+115: offset: 2138112 length: 4096 flags: O..
+116: offset: 2162688 length: 4096 flags: O..
+117: offset: 2170880 length: 4096 flags: O..
+118: offset: 2195456 length: 4096 flags: O..
+119: offset: 2203648 length: 4096 flags: O..
+120: offset: 2228224 length: 4096 flags: O..
+121: offset: 2236416 length: 4096 flags: O..
+122: offset: 2260992 length: 4096 flags: O..
+123: offset: 2269184 length: 4096 flags: O..
+124: offset: 2293760 length: 4096 flags: O..
+125: offset: 2301952 length: 4096 flags: O..
+126: offset: 2326528 length: 4096 flags: O..
+127: offset: 2334720 length: 4096 flags: O..
+128: offset: 2359296 length: 4096 flags: O..
+129: offset: 2367488 length: 4096 flags: O..
+130: offset: 2392064 length: 4096 flags: O..
+131: offset: 2400256 length: 4096 flags: O..
+132: offset: 2424832 length: 4096 flags: O..
+133: offset: 2433024 length: 4096 flags: O..
+134: offset: 2457600 length: 4096 flags: O..
+135: offset: 2465792 length: 4096 flags: O..
+136: offset: 2490368 length: 4096 flags: O..
+137: offset: 2498560 length: 4096 flags: O..
+138: offset: 2523136 length: 4096 flags: O..
+139: offset: 2531328 length: 4096 flags: O..
+140: offset: 2555904 length: 4096 flags: O..
+141: offset: 2564096 length: 4096 flags: O..
+142: offset: 2588672 length: 4096 flags: O..
+143: offset: 2596864 length: 4096 flags: O..
+144: offset: 2621440 length: 4096 flags: O..
+145: offset: 2629632 length: 4096 flags: O..
+146: offset: 2654208 length: 4096 flags: O..
+147: offset: 2662400 length: 4096 flags: O..
+148: offset: 2686976 length: 4096 flags: O..
+149: offset: 2695168 length: 4096 flags: O..
+150: offset: 2719744 length: 4096 flags: O..
+151: offset: 2727936 length: 4096 flags: O..
+152: offset: 2752512 length: 4096 flags: O..
+153: offset: 2760704 length: 4096 flags: O..
+154: offset: 2785280 length: 4096 flags: O..
+155: offset: 2793472 length: 4096 flags: O..
+156: offset: 2818048 length: 4096 flags: O..
+157: offset: 2826240 length: 4096 flags: O..
+158: offset: 2850816 length: 4096 flags: O..
+159: offset: 2859008 length: 4096 flags: O..
+160: offset: 2883584 length: 4096 flags: O..
+161: offset: 2891776 length: 4096 flags: O..
+162: offset: 2916352 length: 4096 flags: O..
+163: offset: 2924544 length: 4096 flags: O..
+164: offset: 2949120 length: 4096 flags: O..
+165: offset: 2957312 length: 4096 flags: O..
+166: offset: 2981888 length: 4096 flags: O..
+167: offset: 2990080 length: 4096 flags: O..
+168: offset: 3014656 length: 4096 flags: O..
+169: offset: 3022848 length: 4096 flags: O..
+170: offset: 3047424 length: 4096 flags: O..
+171: offset: 3055616 length: 4096 flags: O..
+172: offset: 3080192 length: 4096 flags: O..
+173: offset: 3088384 length: 4096 flags: O..
+174: offset: 3112960 length: 4096 flags: O..
+175: offset: 3121152 length: 4096 flags: O..
+176: offset: 3145728 length: 4096 flags: O..
+177: offset: 3153920 length: 4096 flags: O..
+178: offset: 3178496 length: 4096 flags: O..
+179: offset: 3186688 length: 4096 flags: O..
+180: offset: 3211264 length: 4096 flags: O..
+181: offset: 3219456 length: 4096 flags: O..
+182: offset: 3244032 length: 4096 flags: O..
+183: offset: 3252224 length: 4096 flags: O..
+184: offset: 3276800 length: 4096 flags: O..
+185: offset: 3284992 length: 4096 flags: O..
+186: offset: 3309568 length: 4096 flags: O..
+187: offset: 3317760 length: 4096 flags: O..
+188: offset: 3342336 length: 4096 flags: O..
+189: offset: 3350528 length: 4096 flags: O..
+190: offset: 3375104 length: 4096 flags: O..
+191: offset: 3383296 length: 4096 flags: O..
+192: offset: 3407872 length: 4096 flags: O..
+193: offset: 3416064 length: 4096 flags: O..
+194: offset: 3440640 length: 4096 flags: O..
+195: offset: 3448832 length: 4096 flags: O..
+196: offset: 3473408 length: 4096 flags: O..
+197: offset: 3481600 length: 4096 flags: O..
+198: offset: 3506176 length: 4096 flags: O..
+199: offset: 3514368 length: 4096 flags: O..
+200: offset: 3538944 length: 4096 flags: O..
+201: offset: 3547136 length: 4096 flags: O..
+202: offset: 3571712 length: 4096 flags: O..
+203: offset: 3579904 length: 4096 flags: O..
+204: offset: 3604480 length: 4096 flags: O..
+205: offset: 3612672 length: 4096 flags: O..
+206: offset: 3637248 length: 4096 flags: O..
+207: offset: 3645440 length: 4096 flags: O..
+208: offset: 3670016 length: 4096 flags: O..
+209: offset: 3678208 length: 4096 flags: O..
+210: offset: 3702784 length: 4096 flags: O..
+211: offset: 3710976 length: 4096 flags: O..
+212: offset: 3735552 length: 4096 flags: O..
+213: offset: 3743744 length: 4096 flags: O..
+214: offset: 3768320 length: 4096 flags: O..
+215: offset: 3776512 length: 4096 flags: O..
+216: offset: 3801088 length: 4096 flags: O..
+217: offset: 3809280 length: 4096 flags: O..
+218: offset: 3833856 length: 4096 flags: O..
+219: offset: 3842048 length: 4096 flags: O..
+220: offset: 3866624 length: 4096 flags: O..
+221: offset: 3874816 length: 4096 flags: O..
+222: offset: 3899392 length: 4096 flags: O..
+223: offset: 3907584 length: 4096 flags: O..
+224: offset: 3932160 length: 4096 flags: O..
+225: offset: 3940352 length: 4096 flags: O..
+226: offset: 3964928 length: 4096 flags: O..
+227: offset: 3973120 length: 4096 flags: O..
+228: offset: 3997696 length: 4096 flags: O..
+229: offset: 4005888 length: 4096 flags: O..
+230: offset: 4030464 length: 4096 flags: O..
+231: offset: 4038656 length: 4096 flags: O..
+232: offset: 4063232 length: 4096 flags: O..
+233: offset: 4071424 length: 4096 flags: O..
+234: offset: 4096000 length: 4096 flags: O..
+235: offset: 4104192 length: 4096 flags: O..
+236: offset: 4128768 length: 4096 flags: O..
+237: offset: 4136960 length: 4096 flags: O..
+238: offset: 4161536 length: 4096 flags: O..
+239: offset: 4169728 length: 4096 flags: O.L
+extents: 240
+0: offset: 0 length: 1 flags: O..
+1: offset: 8 length: 1 flags: O..
+2: offset: 16 length: 1 flags: O..
+3: offset: 24 length: 1 flags: O..
+4: offset: 48 length: 1 flags: O..
+5: offset: 56 length: 1 flags: O..
+6: offset: 64 length: 1 flags: O..
+7: offset: 72 length: 1 flags: O..
+8: offset: 80 length: 1 flags: O..
+9: offset: 88 length: 1 flags: O..
+10: offset: 96 length: 1 flags: O..
+11: offset: 104 length: 1 flags: O..
+12: offset: 112 length: 1 flags: O..
+13: offset: 120 length: 1 flags: O..
+14: offset: 176 length: 1 flags: O..
+15: offset: 184 length: 1 flags: O..
+16: offset: 192 length: 1 flags: O..
+17: offset: 200 length: 1 flags: O..
+18: offset: 208 length: 1 flags: O..
+19: offset: 216 length: 1 flags: O..
+20: offset: 224 length: 1 flags: O..
+21: offset: 232 length: 1 flags: O..
+22: offset: 240 length: 1 flags: O..
+23: offset: 248 length: 1 flags: O..
+24: offset: 256 length: 1 flags: O..
+25: offset: 264 length: 1 flags: O..
+26: offset: 272 length: 1 flags: O..
+27: offset: 280 length: 1 flags: O..
+28: offset: 288 length: 1 flags: O..
+29: offset: 296 length: 1 flags: O..
+30: offset: 304 length: 1 flags: O..
+31: offset: 312 length: 1 flags: O..
+32: offset: 320 length: 1 flags: O..
+33: offset: 328 length: 1 flags: O..
+34: offset: 336 length: 1 flags: O..
+35: offset: 344 length: 1 flags: O..
+36: offset: 352 length: 1 flags: O..
+37: offset: 360 length: 1 flags: O..
+38: offset: 368 length: 1 flags: O..
+39: offset: 376 length: 1 flags: O..
+40: offset: 384 length: 1 flags: O..
+41: offset: 392 length: 1 flags: O..
+42: offset: 400 length: 1 flags: O..
+43: offset: 408 length: 1 flags: O..
+44: offset: 416 length: 1 flags: O..
+45: offset: 424 length: 1 flags: O..
+46: offset: 432 length: 1 flags: O..
+47: offset: 440 length: 1 flags: O..
+48: offset: 448 length: 1 flags: O..
+49: offset: 456 length: 1 flags: O..
+50: offset: 464 length: 1 flags: O..
+51: offset: 472 length: 1 flags: O..
+52: offset: 480 length: 1 flags: O..
+53: offset: 488 length: 1 flags: O..
+54: offset: 496 length: 1 flags: O..
+55: offset: 504 length: 1 flags: O..
+56: offset: 512 length: 1 flags: O..
+57: offset: 520 length: 1 flags: O..
+58: offset: 528 length: 1 flags: O..
+59: offset: 536 length: 1 flags: O..
+60: offset: 544 length: 1 flags: O..
+61: offset: 552 length: 1 flags: O..
+62: offset: 560 length: 1 flags: O..
+63: offset: 568 length: 1 flags: O..
+64: offset: 576 length: 1 flags: O..
+65: offset: 584 length: 1 flags: O..
+66: offset: 592 length: 1 flags: O..
+67: offset: 600 length: 1 flags: O..
+68: offset: 608 length: 1 flags: O..
+69: offset: 616 length: 1 flags: O..
+70: offset: 624 length: 1 flags: O..
+71: offset: 632 length: 1 flags: O..
+72: offset: 640 length: 1 flags: O..
+73: offset: 648 length: 1 flags: O..
+74: offset: 656 length: 1 flags: O..
+75: offset: 664 length: 1 flags: O..
+76: offset: 672 length: 1 flags: O..
+77: offset: 680 length: 1 flags: O..
+78: offset: 688 length: 1 flags: O..
+79: offset: 696 length: 1 flags: O..
+80: offset: 704 length: 1 flags: O..
+81: offset: 712 length: 1 flags: O..
+82: offset: 720 length: 1 flags: O..
+83: offset: 728 length: 1 flags: O..
+84: offset: 736 length: 1 flags: O..
+85: offset: 744 length: 1 flags: O..
+86: offset: 752 length: 1 flags: O..
+87: offset: 760 length: 1 flags: O..
+88: offset: 768 length: 1 flags: O..
+89: offset: 776 length: 1 flags: O..
+90: offset: 784 length: 1 flags: O..
+91: offset: 792 length: 1 flags: O..
+92: offset: 800 length: 1 flags: O..
+93: offset: 808 length: 1 flags: O..
+94: offset: 816 length: 1 flags: O..
+95: offset: 824 length: 1 flags: O..
+96: offset: 832 length: 1 flags: O..
+97: offset: 840 length: 1 flags: O..
+98: offset: 848 length: 1 flags: O..
+99: offset: 856 length: 1 flags: O..
+100: offset: 864 length: 1 flags: O..
+101: offset: 872 length: 1 flags: O..
+102: offset: 880 length: 1 flags: O..
+103: offset: 888 length: 1 flags: O..
+104: offset: 896 length: 1 flags: O..
+105: offset: 904 length: 1 flags: O..
+106: offset: 912 length: 1 flags: O..
+107: offset: 920 length: 1 flags: O..
+108: offset: 928 length: 1 flags: O..
+109: offset: 936 length: 1 flags: O..
+110: offset: 944 length: 1 flags: O..
+111: offset: 952 length: 1 flags: O..
+112: offset: 960 length: 1 flags: O..
+113: offset: 968 length: 1 flags: O..
+114: offset: 976 length: 1 flags: O..
+115: offset: 984 length: 1 flags: O..
+116: offset: 992 length: 1 flags: O..
+117: offset: 1000 length: 1 flags: O..
+118: offset: 1008 length: 1 flags: O..
+119: offset: 1016 length: 1 flags: O.L
+extents: 120
+extents: 0
--- a/tests/golden/totl-merge-read
+++ b/tests/golden/totl-merge-read
@@ -0,0 +1,3 @@
+== setup
+expected 4681
+== cleanup
--- a/tests/run-tests.sh
+++ b/tests/run-tests.sh
@@ -505,7 +505,10 @@ crash_monitor()
 		fi

 		if [ "$bad" != 0 ]; then
-			echo "run-tests monitor triggering crash"
+			echo "run-tests monitor syncing and triggering crash"
+			# hail mary, the sync could well hang
+			(echo s > /proc/sysrq-trigger) &
+			sleep 5
 			echo c > /proc/sysrq-trigger
 			exit 1
 		fi
@@ -625,6 +628,9 @@ for t in $tests; do
 		cmd rm -rf "$T_TMPDIR"
 		cmd mkdir -p "$T_TMPDIR"

+		# assign scratch mount point in temporary dir
+		T_MSCR="$T_TMPDIR/scratch"
+
 		# create a test name dir in the fs, clean up old data as needed
 		T_DS=""
 		for i in $(seq 0 $((T_NR_MOUNTS - 1))); do
@@ -688,8 +694,8 @@ for t in $tests; do
 		if [ "$sts" == "$T_PASS_STATUS" ]; then
 			dmesg | t_filter_dmesg > "$T_TMPDIR/dmesg.after"
 			diff --old-line-format="" --unchanged-line-format="" \
-				"$T_TMPDIR/dmesg.before" "$T_TMPDIR/dmesg.after" > \
-				"$T_TMPDIR/dmesg.new"
+				"$T_TMPDIR/dmesg.before" "$T_TMPDIR/dmesg.after" | \
+				grep -v '^$' > "$T_TMPDIR/dmesg.new"

 			if [ -s "$T_TMPDIR/dmesg.new" ]; then
 				message="unexpected messages in dmesg"
--- a/tests/sequence
+++ b/tests/sequence
@@ -2,6 +2,7 @@ export-get-name-parent.sh
 basic-block-counts.sh
 basic-bad-mounts.sh
 basic-posix-acl.sh
+basic-acl-consistency.sh
 inode-items-updated.sh
 simple-inode-index.sh
 simple-staging.sh
@@ -10,6 +11,7 @@ simple-readdir.sh
 get-referring-entries.sh
 fallocate.sh
 basic-truncate.sh
+punch-offline.sh
 data-prealloc.sh
 setattr_more.sh
 offline-extent-waiting.sh
@@ -24,7 +26,9 @@ srch-basic-functionality.sh
 simple-xattr-unit.sh
 retention-basic.sh
 totl-xattr-tag.sh
+basic-xattr-indx.sh
 quota.sh
+totl-merge-read.sh
 lock-refleak.sh
 lock-shrink-consistency.sh
 lock-shrink-read-race.sh
@@ -48,6 +52,7 @@ setup-error-teardown.sh
 resize-devices.sh
 change-devices.sh
 fence-and-reclaim.sh
+orphan-log-trees.sh
 quorum-heartbeat-timeout.sh
 orphan-inodes.sh
 mount-unmount-race.sh
--- a/tests/tests/basic-acl-consistency.sh
+++ b/tests/tests/basic-acl-consistency.sh
@@ -0,0 +1,117 @@
+
+#
+# Test basic clustered posix acl consistency.
+#
+
+t_require_commands getfacl setfacl
+
+GETFACL="getfacl --absolute-names"
+
+filter_scratch() {
+	sed "s@$T_MSCR@t_mscr@g"
+}
+
+acl_compare()
+{
+	diff -u - <($GETFACL $T_MSCR/data/dir_a/dir_b | filter_scratch) <<EOF1
+# file: t_mscr/data/dir_a/dir_b
+# owner: t_usr_3
+# group: t_grp_3
+# flags: -s-
+user::rwx
+group::rwx
+group:t_grp_2:r-x
+mask::rwx
+other::---
+default:user::rwx
+default:group::rwx
+default:group:t_grp_2:r-x
+default:group:t_grp_3:rwx
+default:mask::rwx
+default:other::---
+
+EOF1
+
+	test $? -eq 0 || t_fail "dir_b differs"
+
+	diff -u - <($GETFACL -p $T_MSCR/data/dir_a/dir_b/dir_c/dir_d | filter_scratch) <<EOF3
+# file: t_mscr/data/dir_a/dir_b/dir_c/dir_d
+# owner: t_usr_1
+# group: t_grp_1
+# flags: -s-
+user::rwx
+group::rwx
+group:t_grp_2:r-x
+mask::rwx
+other::---
+default:user::rwx
+default:group::rwx
+default:group:t_grp_2:r-x
+default:group:t_grp_3:rwx
+default:mask::rwx
+default:other::---
+
+EOF3
+	test $? -eq 0 || t_fail "dir_d differs"
+
+	diff -u - <($GETFACL $T_MSCR/data/dir_a/dir_b/dir_c | filter_scratch) <<EOF2
+# file: t_mscr/data/dir_a/dir_b/dir_c
+# owner: t_usr_3
+# group: t_grp_2
+# flags: -s-
+user::rwx
+group::rwx
+group:t_grp_2:r-x
+mask::rwx
+other::---
+default:user::rwx
+default:group::rwx
+default:group:t_grp_2:r-x
+default:group:t_grp_3:rwx
+default:mask::rwx
+default:other::---
+
+EOF2
+	test $? -eq 0 || t_fail "dir_c differs"
+}
+echo "== make scratch fs"
+t_scratch_mkfs
+t_scratch_mount
+
+rm -rf $T_MSCR/data
+
+echo "== create uid/gids"
+groupadd -g 7101 t_grp_1 > /dev/null 2>&1
+useradd -g 7101 -u 7101 t_usr_1 > /dev/null 2>&1
+groupadd -g 7102 t_grp_2 > /dev/null 2>&1
+groupadd -g 7103 t_grp_3 > /dev/null 2>&1
+useradd -g 7103 -u 7103 t_usr_3 > /dev/null 2>&1
+
+echo "== set acls and permissions"
+mkdir -p $T_MSCR/data/dir_a/dir_b
+chown t_usr_3:t_grp_3 $T_MSCR/data/dir_a/dir_b
+chmod 2770 $T_MSCR/data/dir_a/dir_b
+setfacl -m g:t_grp_2:rx $T_MSCR/data/dir_a/dir_b
+setfacl -m d:g:t_grp_2:rx $T_MSCR/data/dir_a/dir_b
+setfacl -m d:g:t_grp_3:rwx $T_MSCR/data/dir_a/dir_b
+
+mkdir -p $T_MSCR/data/dir_a/dir_b/dir_c
+chown t_usr_3:t_grp_2 $T_MSCR/data/dir_a/dir_b/dir_c
+setfacl -x g:t_grp_3 $T_MSCR/data/dir_a/dir_b/dir_c
+
+mkdir -p $T_MSCR/data/dir_a/dir_b/dir_c/dir_d
+chown t_usr_1:t_grp_1 $T_MSCR/data/dir_a/dir_b/dir_c/dir_d
+setfacl -x g:t_grp_3 $T_MSCR/data/dir_a/dir_b/dir_c/dir_d
+
+echo "== compare output"
+acl_compare
+
+echo "== drop caches and compare again"
+sync
+echo 3 > /proc/sys/vm/drop_caches
+acl_compare
+
+echo "== cleanup scratch fs"
+t_scratch_umount
+
+t_pass
--- a/tests/tests/basic-bad-mounts.sh
+++ b/tests/tests/basic-bad-mounts.sh
@@ -12,25 +12,22 @@ mount_fail()
 }

 echo "== prepare devices, mount point, and logs"
-SCR="$T_TMPDIR/mnt.scratch"
-mkdir -p "$SCR"
+t_scratch_mkfs
 > $T_TMP.mount.out
-scoutfs mkfs -f -Q 0,127.0.0.1,$T_SCRATCH_PORT "$T_EX_META_DEV" "$T_EX_DATA_DEV" > $T_TMP.mkfs.out 2>&1 \
-	|| t_fail "mkfs failed"

 echo "== bad devices, bad options"
-mount_fail -o _bad /dev/null /dev/null "$SCR"
+mount_fail -o _bad /dev/null /dev/null "$T_MSCR"

 echo "== swapped devices"
-mount_fail -o metadev_path=$T_EX_DATA_DEV,quorum_slot_nr=0 "$T_EX_META_DEV" "$SCR"
+mount_fail -o metadev_path=$T_EX_DATA_DEV,quorum_slot_nr=0 "$T_EX_META_DEV" "$T_MSCR"

 echo "== both meta devices"
-mount_fail -o metadev_path=$T_EX_META_DEV,quorum_slot_nr=0 "$T_EX_META_DEV" "$SCR"
+mount_fail -o metadev_path=$T_EX_META_DEV,quorum_slot_nr=0 "$T_EX_META_DEV" "$T_MSCR"

 echo "== both data devices"
-mount_fail -o metadev_path=$T_EX_DATA_DEV,quorum_slot_nr=0 "$T_EX_DATA_DEV" "$SCR"
+mount_fail -o metadev_path=$T_EX_DATA_DEV,quorum_slot_nr=0 "$T_EX_DATA_DEV" "$T_MSCR"

 echo "== good volume, bad option and good options"
-mount_fail -o _bad,metadev_path=$T_EX_META_DEV,quorum_slot_nr=0 "$T_EX_DATA_DEV" "$SCR" 
+mount_fail -o _bad,metadev_path=$T_EX_META_DEV,quorum_slot_nr=0 "$T_EX_DATA_DEV" "$T_MSCR"

 t_pass
--- a/tests/tests/basic-xattr-indx.sh
+++ b/tests/tests/basic-xattr-indx.sh
@@ -0,0 +1,143 @@
+#
+# Test basic .indx. xattr tag functionality and index entry lifecycle
+#
+
+t_require_commands touch rm setfattr scoutfs stat
+t_require_mounts 2
+
+# query index from a specific mount, default mount 0
+read_xattr_index()
+{
+	local nr="${1:-0}"
+	local mnt="$(eval echo \$T_M$nr)"
+	shift
+
+	sync
+	echo 1 > $(t_debugfs_path $nr)/drop_weak_item_cache
+	scoutfs read-xattr-index -p "$mnt" "$@"
+}
+
+MAJOR=5
+MINOR=100
+
+echo "== testing invalid read-xattr-index arguments"
+scoutfs read-xattr-index -p "$T_M0" bad 2>&1
+scoutfs read-xattr-index -p "$T_M0" 1.2 2>&1
+scoutfs read-xattr-index -p "$T_M0" 1.2.3 256.0.0 2>&1
+scoutfs read-xattr-index -p "$T_M0" 1.2.3 0.0.0 2>&1
+scoutfs read-xattr-index -p "$T_M0" 1.2.0 1.1.2 2>&1
+scoutfs read-xattr-index -p "$T_M0" 2.2.2 2.2.1 2>&1
+
+echo "== testing invalid names"
+touch "$T_D0/invalid"
+setfattr -n scoutfs.hide.indx.test.$MAJOR "$T_D0/invalid" 2>&1 | t_filter_fs
+setfattr -n scoutfs.hide.indx.test "$T_D0/invalid" 2>&1 | t_filter_fs
+setfattr -n scoutfs.hide.indx.test.. "$T_D0/invalid" 2>&1 | t_filter_fs
+setfattr -n scoutfs.hide.indx.test..$MINOR "$T_D0/invalid" 2>&1 | t_filter_fs
+setfattr -n scoutfs.hide.indx.test.$MAJOR. "$T_D0/invalid" 2>&1 | t_filter_fs
+setfattr -n scoutfs.hide.indx.test.256.$MINOR "$T_D0/invalid" 2>&1 | t_filter_fs
+setfattr -n scoutfs.hide.indx.test.abc.$MINOR "$T_D0/invalid" 2>&1 | t_filter_fs
+setfattr -n scoutfs.hide.indx.test.$MAJOR.abc "$T_D0/invalid" 2>&1 | t_filter_fs
+setfattr -n scoutfs.hide.indx.test.-1.$MINOR "$T_D0/invalid" 2>&1 | t_filter_fs
+setfattr -n scoutfs.hide.indx.test.$MAJOR.-1 "$T_D0/invalid" 2>&1 | t_filter_fs
+setfattr -n scoutfs.hide.indx.test.18446744073709551616.$MINOR "$T_D0/invalid" 2>&1 | t_filter_fs
+setfattr -n scoutfs.hide.indx.$(printf 'x%.0s' $(seq 1 240)).$MAJOR.$MINOR "$T_D0/invalid" 2>&1 | t_filter_fs
+rm -f "$T_D0/invalid"
+
+echo "== testing boundary values"
+touch "$T_D0/boundary"
+INO=$(stat -c "%i" "$T_D0/boundary")
+setfattr -n scoutfs.hide.indx.test.0.0 "$T_D0/boundary"
+read_xattr_index 0 0.0.0 0.0.-1 | awk '($3 == "'$INO'") {print "0.0 found"}'
+setfattr -x scoutfs.hide.indx.test.0.0 "$T_D0/boundary"
+setfattr -n scoutfs.hide.indx.test.255.18446744073709551615 "$T_D0/boundary"
+read_xattr_index 0 255.0.0 255.-1.-1 | awk '($3 == "'$INO'") {print "255.max found"}'
+setfattr -x scoutfs.hide.indx.test.255.18446744073709551615 "$T_D0/boundary"
+rm -f "$T_D0/boundary"
+
+echo "== indx xattr must have no value"
+touch "$T_D0/noval"
+setfattr -n scoutfs.hide.indx.test.$MAJOR.$MINOR -v "" "$T_D0/noval" 2>&1 | t_filter_fs
+setfattr -n scoutfs.hide.indx.test.$MAJOR.$MINOR -v 0 "$T_D0/noval" 2>&1 | t_filter_fs
+setfattr -n scoutfs.hide.indx.test.$MAJOR.$MINOR -v 1 "$T_D0/noval" 2>&1 | t_filter_fs
+rm -f "$T_D0/noval"
+
+echo "== set indx xattr and verify index entry"
+touch "$T_D0/file"
+INO=$(stat -c "%i" "$T_D0/file")
+setfattr -n scoutfs.hide.indx.test.$MAJOR.$MINOR "$T_D0/file"
+read_xattr_index 0 $MAJOR.0.0 $MAJOR.-1.-1 | awk '($3 == "'$INO'") {print "found"}'
+
+echo "== setting same indx xattr again is a no-op"
+setfattr -n scoutfs.hide.indx.test.$MAJOR.$MINOR "$T_D0/file"
+read_xattr_index 0 $MAJOR.0.0 $MAJOR.-1.-1 | awk '($3 == "'$INO'") {print "found"}'
+
+echo "== removing non-existent indx xattr succeeds"
+setfattr -x scoutfs.hide.indx.nonexistent.$MAJOR.999 "$T_D0/file" 2>&1 | t_filter_fs
+read_xattr_index 0 $MAJOR.0.0 $MAJOR.-1.-1 | awk '($3 == "'$INO'") {print "still found"}'
+
+echo "== explicit xattr removal cleans up index entry"
+setfattr -x scoutfs.hide.indx.test.$MAJOR.$MINOR "$T_D0/file"
+read_xattr_index 0 $MAJOR.0.0 $MAJOR.-1.-1 | awk '($3 == "'$INO'") {print "found orphan"}'
+rm -f "$T_D0/file"
+
+echo "== file deletion cleans up index entry"
+touch "$T_D0/file2"
+INO=$(stat -c "%i" "$T_D0/file2")
+setfattr -n scoutfs.hide.indx.test.$MAJOR.$MINOR "$T_D0/file2"
+read_xattr_index 0 $MAJOR.0.0 $MAJOR.-1.-1 | awk '($3 == "'$INO'") {print "found before delete"}'
+rm -f "$T_D0/file2"
+read_xattr_index 0 $MAJOR.0.0 $MAJOR.-1.-1 | awk '($3 == "'$INO'") {print "found orphan after delete"}'
+
+echo "== multiple indx xattrs on one file cleaned up by deletion"
+touch "$T_D0/file3"
+INO=$(stat -c "%i" "$T_D0/file3")
+setfattr -n scoutfs.hide.indx.a.$MAJOR.200 "$T_D0/file3"
+setfattr -n scoutfs.hide.indx.b.$MAJOR.300 "$T_D0/file3"
+BEFORE=$(read_xattr_index 0 $MAJOR.0.0 $MAJOR.-1.-1 | awk '($3 == "'$INO'")' | wc -l)
+echo "entries before delete: $BEFORE"
+rm -f "$T_D0/file3"
+AFTER=$(read_xattr_index 0 $MAJOR.0.0 $MAJOR.-1.-1 | awk '($3 == "'$INO'")' | wc -l)
+echo "entries after delete: $AFTER"
+
+echo "== partial removal leaves other entries"
+touch "$T_D0/partial"
+INO=$(stat -c "%i" "$T_D0/partial")
+setfattr -n scoutfs.hide.indx.a.$MAJOR.200 "$T_D0/partial"
+setfattr -n scoutfs.hide.indx.b.$MAJOR.300 "$T_D0/partial"
+setfattr -x scoutfs.hide.indx.a.$MAJOR.200 "$T_D0/partial"
+read_xattr_index 0 $MAJOR.200.0 $MAJOR.200.-1 | awk '($3 == "'$INO'") {print "200 found"}'
+read_xattr_index 0 $MAJOR.300.0 $MAJOR.300.-1 | awk '($3 == "'$INO'") {print "300 found"}'
+rm -f "$T_D0/partial"
+
+echo "== multiple files at same index position"
+touch "$T_D0/multi_a" "$T_D0/multi_b"
+INO_A=$(stat -c "%i" "$T_D0/multi_a")
+INO_B=$(stat -c "%i" "$T_D0/multi_b")
+setfattr -n scoutfs.hide.indx.test.$MAJOR.$MINOR "$T_D0/multi_a"
+setfattr -n scoutfs.hide.indx.test.$MAJOR.$MINOR "$T_D0/multi_b"
+COUNT=$(read_xattr_index 0 $MAJOR.$MINOR.0 $MAJOR.$MINOR.-1 | wc -l)
+echo "files at same position: $COUNT"
+rm -f "$T_D0/multi_a"
+read_xattr_index 0 $MAJOR.$MINOR.0 $MAJOR.$MINOR.-1 | awk '($3 == "'$INO_A'") {print "deleted file still found"}'
+read_xattr_index 0 $MAJOR.$MINOR.0 $MAJOR.$MINOR.-1 | awk '($3 == "'$INO_B'") {print "surviving file found"}'
+rm -f "$T_D0/multi_b"
+
+echo "== cross-mount visibility"
+touch "$T_D0/file4"
+INO=$(stat -c "%i" "$T_D0/file4")
+setfattr -n scoutfs.hide.indx.test.$MAJOR.$MINOR "$T_D0/file4"
+read_xattr_index 1 $MAJOR.0.0 $MAJOR.-1.-1 | awk '($3 == "'$INO'") {print "found on mount 1"}'
+rm -f "$T_D0/file4"
+read_xattr_index 1 $MAJOR.0.0 $MAJOR.-1.-1 | awk '($3 == "'$INO'") {print "found orphan on mount 1"}'
+
+echo "== duplicate position deduplication"
+touch "$T_D0/file5"
+INO=$(stat -c "%i" "$T_D0/file5")
+setfattr -n scoutfs.hide.indx.aa.$MAJOR.$MINOR "$T_D0/file5"
+setfattr -n scoutfs.hide.indx.bb.$MAJOR.$MINOR "$T_D0/file5"
+COUNT=$(read_xattr_index 0 $MAJOR.0.0 $MAJOR.-1.-1 | awk '($3 == "'$INO'")' | wc -l)
+echo "entries for same position: $COUNT"
+rm -f "$T_D0/file5"
+
+t_pass
--- a/tests/tests/change-devices.sh
+++ b/tests/tests/change-devices.sh
@@ -11,9 +11,8 @@ truncate -s $sz "$T_TMP.equal"
 truncate -s $large_sz "$T_TMP.large"

 echo "== make scratch fs"
-t_quiet scoutfs mkfs -f -Q 0,127.0.0.1,$T_SCRATCH_PORT "$T_EX_META_DEV" "$T_EX_DATA_DEV"
-SCR="$T_TMPDIR/mnt.scratch"
-mkdir -p "$SCR"
+t_scratch_mkfs
+mkdir -p "$T_MSCR"

 echo "== small new data device fails"
 t_rc scoutfs prepare-empty-data-device "$T_EX_META_DEV" "$T_TMP.small"
@@ -23,13 +22,13 @@ t_rc scoutfs prepare-empty-data-device --check "$T_EX_META_DEV" "$T_TMP.small"
 t_rc scoutfs prepare-empty-data-device --check "$T_EX_META_DEV"

 echo "== preparing while mounted fails"
-mount -t scoutfs -o metadev_path=$T_EX_META_DEV,quorum_slot_nr=0 "$T_EX_DATA_DEV" "$SCR"
+mount -t scoutfs -o metadev_path=$T_EX_META_DEV,quorum_slot_nr=0 "$T_EX_DATA_DEV" "$T_MSCR"
 t_rc scoutfs prepare-empty-data-device "$T_EX_META_DEV" "$T_TMP.equal"
-umount "$SCR"
+umount "$T_MSCR"

 echo "== preparing without recovery fails"
-mount -t scoutfs -o metadev_path=$T_EX_META_DEV,quorum_slot_nr=0 "$T_EX_DATA_DEV" "$SCR"
-umount -f "$SCR"
+mount -t scoutfs -o metadev_path=$T_EX_META_DEV,quorum_slot_nr=0 "$T_EX_DATA_DEV" "$T_MSCR"
+umount -f "$T_MSCR"
 t_rc scoutfs prepare-empty-data-device "$T_EX_META_DEV" "$T_TMP.equal"

 echo "== check sees metadata errors"
@@ -37,16 +36,16 @@ t_rc scoutfs prepare-empty-data-device --check "$T_EX_META_DEV"
 t_rc scoutfs prepare-empty-data-device --check "$T_EX_META_DEV" "$T_TMP.equal"

 echo "== preparing with file data fails"
-mount -t scoutfs -o metadev_path=$T_EX_META_DEV,quorum_slot_nr=0 "$T_EX_DATA_DEV" "$SCR"
-echo hi > "$SCR"/file
-umount "$SCR"
+mount -t scoutfs -o metadev_path=$T_EX_META_DEV,quorum_slot_nr=0 "$T_EX_DATA_DEV" "$T_MSCR"
+echo hi > "$T_MSCR"/file
+umount "$T_MSCR"
 scoutfs print "$T_EX_META_DEV" > "$T_TMP.print"
 t_rc scoutfs prepare-empty-data-device "$T_EX_META_DEV" "$T_TMP.equal"

 echo "== preparing after emptied"
-mount -t scoutfs -o metadev_path=$T_EX_META_DEV,quorum_slot_nr=0 "$T_EX_DATA_DEV" "$SCR"
-rm -f "$SCR"/file
-umount "$SCR"
+mount -t scoutfs -o metadev_path=$T_EX_META_DEV,quorum_slot_nr=0 "$T_EX_DATA_DEV" "$T_MSCR"
+rm -f "$T_MSCR"/file
+umount "$T_MSCR"
 t_rc scoutfs prepare-empty-data-device "$T_EX_META_DEV" "$T_TMP.equal"

 echo "== checks pass"
@@ -55,22 +54,22 @@ t_rc scoutfs prepare-empty-data-device --check "$T_EX_META_DEV" "$T_TMP.equal"

 echo "== using prepared"
 scr_loop=$(losetup --find --show "$T_TMP.equal")
-mount -t scoutfs -o metadev_path=$T_EX_META_DEV,quorum_slot_nr=0 "$scr_loop" "$SCR"
-touch "$SCR"/equal_prepared
-equal_tot=$(scoutfs statfs -s total_data_blocks -p "$SCR")
-umount "$SCR"
+mount -t scoutfs -o metadev_path=$T_EX_META_DEV,quorum_slot_nr=0 "$scr_loop" "$T_MSCR"
+touch "$T_MSCR"/equal_prepared
+equal_tot=$(scoutfs statfs -s total_data_blocks -p "$T_MSCR")
+umount "$T_MSCR"
 losetup -d "$scr_loop"

 echo "== preparing larger and resizing"
 t_rc scoutfs prepare-empty-data-device "$T_EX_META_DEV" "$T_TMP.large"
 scr_loop=$(losetup --find --show "$T_TMP.large")
-mount -t scoutfs -o metadev_path=$T_EX_META_DEV,quorum_slot_nr=0 "$scr_loop" "$SCR"
-touch "$SCR"/large_prepared
-ls "$SCR"
-scoutfs resize-devices -p "$SCR" -d $large_sz
-large_tot=$(scoutfs statfs -s total_data_blocks -p "$SCR")
+mount -t scoutfs -o metadev_path=$T_EX_META_DEV,quorum_slot_nr=0 "$scr_loop" "$T_MSCR"
+touch "$T_MSCR"/large_prepared
+ls "$T_MSCR"
+scoutfs resize-devices -p "$T_MSCR" -d $large_sz
+large_tot=$(scoutfs statfs -s total_data_blocks -p "$T_MSCR")
 test "$large_tot" -gt "$equal_tot" ; echo "resized larger test rc: $?"
-umount "$SCR"
+umount "$T_MSCR"
 losetup -d "$scr_loop"

 echo "== cleanup"
--- a/tests/tests/enospc.sh
+++ b/tests/tests/enospc.sh
@@ -54,21 +54,16 @@ after=$(free_blocks Data "$T_M0")
 test "$before" == "$after" || \
 	t_fail "$after free data blocks after rm, expected $before"

-# XXX this is all pretty manual, would be nice to have helpers
 echo "== make small meta fs"
 # meta device just big enough for reserves and the metadata we'll fill
-scoutfs mkfs -A -f -Q 0,127.0.0.1,$T_SCRATCH_PORT -m 10G "$T_EX_META_DEV" "$T_EX_DATA_DEV" > $T_TMP.mkfs.out 2>&1 || \
-	t_fail "mkfs failed"
-SCR="$T_TMPDIR/mnt.scratch"
-mkdir -p "$SCR"
-mount -t scoutfs -o metadev_path=$T_EX_META_DEV,quorum_slot_nr=0 \
-	"$T_EX_DATA_DEV" "$SCR"
+t_scratch_mkfs -A -m 10G
+t_scratch_mount

 echo "== create large xattrs until we fill up metadata"
-mkdir -p "$SCR/xattrs"
+mkdir -p "$T_MSCR/xattrs"

 for f in $(seq 1 100000); do
-	file="$SCR/xattrs/file-$f"
+	file="$T_MSCR/xattrs/file-$f"
 	touch "$file"

 	LC_ALL=C create_xattr_loop -c 1000 -n user.scoutfs-enospc -p "$file" -s 65535 > $T_TMP.cxl 2>&1
@@ -84,10 +79,10 @@ for f in $(seq 1 100000); do
 done

 echo "== remove files with xattrs after enospc"
-rm -rf "$SCR/xattrs"
+rm -rf "$T_MSCR/xattrs"

 echo "== make sure we can create again"
-file="$SCR/file-after"
+file="$T_MSCR/file-after"
 C=120
 while (( C-- )); do
 	touch $file 2> /dev/null && break
@@ -99,7 +94,6 @@ sync
 rm -f "$file"

 echo "== cleanup small meta fs"
-umount "$SCR"
-rmdir "$SCR"
+t_scratch_umount

 t_pass
--- a/tests/tests/inode-deletion.sh
+++ b/tests/tests/inode-deletion.sh
@@ -53,14 +53,6 @@ exec {FD1}>&-  # close
 exec {FD2}>&-  # close
 check_ino_index "$ino" "$dseq" "$T_M0"

-echo "== remote unopened unlink deletes"
-echo "contents" > "$T_D0/file"
-ino=$(stat -c "%i" "$T_D0/file")
-dseq=$(scoutfs stat -s data_seq "$T_D0/file")
-rm -f "$T_D1/file"
-check_ino_index "$ino" "$dseq" "$T_M0"
-check_ino_index "$ino" "$dseq" "$T_M1"
-
 # Hurry along the orphan scanners. If any are currently asleep, we will
 # have to wait at least their current scan interval before they wake up,
 # run, and notice their new interval.
@@ -68,6 +60,19 @@ t_save_all_sysfs_mount_options orphan_scan_delay_ms
 t_set_all_sysfs_mount_options orphan_scan_delay_ms 500
 t_wait_for_orphan_scan_runs

+echo "== remote unopened unlink deletes"
+echo "contents" > "$T_D0/file"
+ino=$(stat -c "%i" "$T_D0/file")
+dseq=$(scoutfs stat -s data_seq "$T_D0/file")
+rm -f "$T_D1/file"
+# cross-mount deletion falls back to the orphan scanner when the
+# creating mount still has the inode cached, wait for it to complete
+t_force_log_merge
+# wait for orphan scanners to pick up the unlinked inode and become idle
+t_wait_for_no_orphans
+check_ino_index "$ino" "$dseq" "$T_M0"
+check_ino_index "$ino" "$dseq" "$T_M1"
+
 echo "== unlink wait for open on other mount"
 echo "contents" > "$T_D0/badfile"
 ino=$(stat -c "%i" "$T_D0/badfile")
@@ -81,7 +86,6 @@ exec {FD}>&-  # close
 # we know that revalidating will unhash the remote dentry
 stat "$T_D0/badfile" 2>&1 | sed 's/cannot statx/cannot stat/' | t_filter_fs
 t_force_log_merge
-# wait for orphan scanners to pick up the unlinked inode and become idle
 t_wait_for_no_orphans
 check_ino_index "$ino" "$dseq" "$T_M0"
 check_ino_index "$ino" "$dseq" "$T_M1"
--- a/tests/tests/orphan-log-trees.sh
+++ b/tests/tests/orphan-log-trees.sh
@@ -0,0 +1,52 @@
+#
+# Test that orphaned log_trees entries from unmounted rids are
+# finalized and merged.
+#
+# An orphan log_trees entry is one whose rid has no mounted_clients
+# entry.  This can happen from incomplete reclaim across server
+# failovers.  We simulate it with the reclaim_skip_finalize trigger
+# which makes reclaim_open_log_tree skip the finalization step.
+#
+
+t_require_commands touch scoutfs
+t_require_mounts 2
+
+TIMEOUT=90
+
+echo "== create orphan log_trees entry via trigger"
+sv=$(t_server_nr)
+cl=$(t_first_client_nr)
+rid=$(t_mount_rid $cl)
+
+touch "$T_D0/file" "$T_D1/file"
+sync
+
+# arm the trigger so reclaim skips finalization
+t_trigger_arm_silent reclaim_skip_finalize $sv
+
+# force unmount the client, server will fence and reclaim it
+# but the trigger makes reclaim leave log_trees unfinalized
+t_force_umount $cl
+
+# wait for fencing to run
+verify_fenced() {
+	grep -q "running rid '$rid'" "$T_FENCED_LOG" 2>/dev/null
+}
+t_wait_until_timeout $TIMEOUT verify_fenced
+
+# give the server time to complete reclaim after fence
+sleep 5
+
+# remount the client so t_force_log_merge can sync all mounts.
+# the client gets a new rid; the old rid's log_trees is the orphan.
+t_mount $cl
+
+echo "== verify orphan is reclaimed and merge completes"
+t_force_log_merge
+
+echo "== verify orphan reclaim was logged"
+if ! dmesg | grep -q "reclaiming orphan log trees for rid $rid"; then
+	t_fail "expected orphan reclaim message for rid $rid in dmesg"
+fi
+
+t_pass
--- a/tests/tests/punch-offline.sh
+++ b/tests/tests/punch-offline.sh
@@ -0,0 +1,152 @@
+
+t_require_commands scoutfs dd fallocate
+
+FILE="$T_D0/file"
+DIR="$T_D0/dir"
+
+echo "== missing options should fail =="
+rm -rf $DIR && mkdir -p $DIR
+scoutfs punch-offline $DIR -l 4096 -V 0
+scoutfs punch-offline $DIR -o 0 -V 0
+scoutfs punch-offline $DIR -o 0 -l 4096
+
+echo "== can't hole punch dir or special =="
+rm -rf $DIR && mkdir -p $DIR
+scoutfs punch-offline $DIR -o 0 -l 4096 -V 0
+
+echo "== punching an empty file does nothing =="
+rm -f $FILE && touch $FILE
+scoutfs punch-offline $FILE -o 0 -l 4096 -V 0
+
+echo "== punch outside of i_size does nothing =="
+dd if=/dev/zero of=$FILE bs=4096 count=1 status=none
+scoutfs punch-offline $FILE -o 4096 -l 4096 -V 1
+
+echo "== can't hole punch online extent =="
+scoutfs get-fiemap -Lb $FILE
+scoutfs punch-offline $FILE -o 0 -l 4096 -V 1
+scoutfs get-fiemap -Lb $FILE
+
+echo "== can't hole punch unwritten extent =="
+rm -rf $FILE && touch $FILE
+fallocate -l $((4096 * 3)) $FILE
+vers=$(scoutfs stat -s data_version "$FILE")
+scoutfs get-fiemap -Lb $FILE
+scoutfs punch-offline $FILE -o 4096 -l 4096 -V $vers
+scoutfs get-fiemap -Lb $FILE
+
+echo "== hole punch offline extent =="
+rm -rf $FILE && touch $FILE
+fallocate -l $((4096 * 3)) $FILE
+vers=$(scoutfs stat -s data_version "$FILE")
+scoutfs release $FILE --data-version $vers
+scoutfs get-fiemap -Lb $FILE
+scoutfs punch-offline $FILE -o 4096 -l 4096 -V $vers
+scoutfs get-fiemap -Lb $FILE
+
+echo "== can't hole punch non-aligned bsz offset or len =="
+rm -rf $FILE && touch $FILE
+fallocate -l $((4096 * 3)) $FILE
+vers=$(scoutfs stat -s data_version "$FILE")
+scoutfs release $FILE --data-version $vers
+scoutfs get-fiemap -Lb $FILE
+scoutfs punch-offline $FILE -o 4095 -l 4096 -V $vers
+scoutfs punch-offline $FILE -o 1 -l 4096 -V $vers
+scoutfs punch-offline $FILE -o 4096 -l 409700 -V $vers
+scoutfs punch-offline $FILE -o 4096 -l 4097 -V $vers
+scoutfs punch-offline $FILE -o 4096 -l 4095 -V $vers
+scoutfs punch-offline $FILE -o 4096 -l 1 -V $vers
+scoutfs punch-offline $FILE -o 4096 -l 0 -V $vers
+scoutfs get-fiemap -Lb $FILE
+
+echo "== can't hole punch mismatched data_version =="
+rm -rf $FILE && touch $FILE
+fallocate -l $((4096 * 3)) $FILE
+vers=$(scoutfs stat -s data_version "$FILE")
+scoutfs release $FILE --data-version $vers
+scoutfs get-fiemap -Lb $FILE
+scoutfs punch-offline $FILE -o 4096 -l 4096 -V 0
+scoutfs punch-offline $FILE -o 4096 -l 4096 -V 2
+scoutfs punch-offline $FILE -o 4096 -l 4096 -V 9999
+scoutfs get-fiemap -Lb $FILE
+
+echo "== Punch hole crossing multiple extents =="
+rm -rf $FILE && touch $FILE
+fallocate -l $((7 * 4096)) $FILE
+vers=$(scoutfs stat -s data_version "$FILE")
+scoutfs release $FILE --data-version $vers
+scoutfs get-fiemap -L $FILE
+scoutfs punch-offline $FILE -o $((1 * 4096)) -l 4096 -V $vers
+scoutfs punch-offline $FILE -o $((3 * 4096)) -l 4096 -V $vers
+scoutfs punch-offline $FILE -o $((5 * 4096)) -l 4096 -V $vers
+# 0.1.2.3
+scoutfs get-fiemap -L $FILE
+scoutfs punch-offline $FILE -o $((2 * 4096)) -l $((3 * 4096)) -V $vers
+# 0.....1
+scoutfs get-fiemap -L $FILE
+
+echo "== punch hole starting at a hole =="
+rm -rf $FILE && touch $FILE
+fallocate -l $((7 * 4096)) $FILE
+vers=$(scoutfs stat -s data_version "$FILE")
+scoutfs release $FILE --data-version $vers
+scoutfs get-fiemap -L $FILE
+scoutfs punch-offline $FILE -o $((1 * 4096)) -l 4096 -V $vers
+scoutfs punch-offline $FILE -o $((3 * 4096)) -l 4096 -V $vers
+scoutfs punch-offline $FILE -o $((5 * 4096)) -l 4096 -V $vers
+# 0.1.2.3
+scoutfs get-fiemap -L $FILE
+scoutfs punch-offline $FILE -o $((1 * 4096)) -l $((5 * 4096)) -V $vers
+# 0.....1
+scoutfs get-fiemap -L $FILE
+
+echo "== large punch =="
+rm -rf $FILE && touch $FILE
+fallocate -l $((6 * 1024 * 1024 * 1024)) $FILE
+vers=$(scoutfs stat -s data_version "$FILE")
+scoutfs release $FILE --data-version $vers
+scoutfs get-fiemap -L $FILE
+scoutfs punch-offline $FILE -o $((134123 * 4096)) -l $((68343 * 4096)) -V $vers
+scoutfs punch-offline $FILE -o $((467273 * 4096)) -l $((68343 * 4096)) -V $vers
+scoutfs punch-offline $FILE -o $((734623 * 4096)) -l $((68343 * 4096)) -V $vers
+scoutfs get-fiemap -L $FILE
+
+echo "== overlapping punches with lots of extents =="
+rm -rf $FILE && touch $FILE
+fallocate -l $((4096 * 1024)) $FILE
+vers=$(scoutfs stat -s data_version "$FILE")
+scoutfs release $FILE --data-version 1
+scoutfs get-fiemap -Lb $FILE
+# punch odd ones away
+for h in $(seq 1 2 1023); do
+	scoutfs punch-offline $FILE -o $((h * 4096)) -l 4096 -V $vers
+done
+scoutfs get-fiemap -Lb $FILE | tail -n 1
+# punch a large hole from 32 to 55, removing 7 extents
+scoutfs punch-offline $FILE -o $((32 * 4096)) -l $((13 * 4096)) -V $vers
+scoutfs get-fiemap -Lb $FILE | tail -n 1
+# punch every 8th @6
+for h in $(seq 6 8 1024); do
+	scoutfs punch-offline $FILE -o $((h * 4096)) -l 4096 -V $vers
+done
+# again @4
+scoutfs get-fiemap -Lb $FILE | tail -n 1
+for h in $(seq 4 8 1024); do
+	scoutfs punch-offline $FILE -o $((h * 4096)) -l 4096 -V $vers
+done
+scoutfs get-fiemap -Lb $FILE | tail -n 1
+# punching a large hole from 127 to 175, removing 12 extents
+scoutfs punch-offline $FILE -o $((127 * 4096)) -l $((48 * 4096)) -V $vers
+scoutfs get-fiemap -Lb $FILE
+# again @2
+for h in $(seq 2 8 1024); do
+	scoutfs punch-offline $FILE -o $((h * 4096)) -l 4096 -V $vers
+done
+scoutfs get-fiemap -L $FILE
+# and again @0, punching away everything remaining extent
+for h in $(seq 0 8 1024); do
+	scoutfs punch-offline $FILE -o $((h * 4096)) -l 4096 -V $vers
+done
+scoutfs get-fiemap -Lb $FILE
+
+t_pass
--- a/tests/tests/resize-devices.sh
+++ b/tests/tests/resize-devices.sh
@@ -19,8 +19,8 @@ df_free() {
 }

 same_totals() {
-	cur_meta_tot=$(statfs_total meta "$SCR")
-	cur_data_tot=$(statfs_total data "$SCR")
+	cur_meta_tot=$(statfs_total meta "$T_MSCR")
+	cur_data_tot=$(statfs_total data "$T_MSCR")

 	test "$cur_meta_tot" == "$exp_meta_tot" || \
 		t_fail "cur total_meta_blocks $cur_meta_tot != expected $exp_meta_tot"
@@ -34,10 +34,10 @@ same_totals() {
 # some slop to account for reserved blocks and concurrent allocation.
 #
 devices_grew() {
-	cur_meta_tot=$(statfs_total meta "$SCR")
-	cur_data_tot=$(statfs_total data "$SCR")
-	cur_meta_df=$(df_free MetaData "$SCR")
-	cur_data_df=$(df_free Data "$SCR")
+	cur_meta_tot=$(statfs_total meta "$T_MSCR")
+	cur_data_tot=$(statfs_total data "$T_MSCR")
+	cur_meta_df=$(df_free MetaData "$T_MSCR")
+	cur_data_df=$(df_free Data "$T_MSCR")

 	local grow_meta_tot=$(echo "$exp_meta_tot * 2" | bc)
 	local grow_data_tot=$(echo "$exp_data_tot * 2" | bc)
@@ -70,19 +70,13 @@ size_data=$(blockdev --getsize64 "$T_EX_DATA_DEV")
 quarter_meta=$(echo "$size_meta / 4" | bc)
 quarter_data=$(echo "$size_data / 4" | bc)

-# XXX this is all pretty manual, would be nice to have helpers
 echo "== make initial small fs"
-scoutfs mkfs -A -f -Q 0,127.0.0.1,$T_SCRATCH_PORT -m $quarter_meta -d $quarter_data \
-	"$T_EX_META_DEV" "$T_EX_DATA_DEV" > $T_TMP.mkfs.out 2>&1 || \
-		t_fail "mkfs failed"
-SCR="$T_TMPDIR/mnt.scratch"
-mkdir -p "$SCR"
-mount -t scoutfs -o metadev_path=$T_EX_META_DEV,quorum_slot_nr=0 \
-	"$T_EX_DATA_DEV" "$SCR"
+t_scratch_mkfs -A -m $quarter_meta -d $quarter_data
+t_scratch_mount

 # then calculate sizes based on blocks that mkfs used
-quarter_meta=$(echo "$(statfs_total meta "$SCR") * 64 * 1024" | bc)
-quarter_data=$(echo "$(statfs_total data "$SCR") * 4 * 1024" | bc)
+quarter_meta=$(echo "$(statfs_total meta "$T_MSCR") * 64 * 1024" | bc)
+quarter_data=$(echo "$(statfs_total data "$T_MSCR") * 4 * 1024" | bc)
 whole_meta=$(echo "$quarter_meta * 4" | bc)
 whole_data=$(echo "$quarter_data * 4" | bc)
 outsize_meta=$(echo "$whole_meta * 2" | bc)
@@ -93,59 +87,58 @@ shrink_meta=$(echo "$quarter_meta / 2" | bc)
 shrink_data=$(echo "$quarter_data / 2" | bc)

 # and save expected values for checks
-exp_meta_tot=$(statfs_total meta "$SCR")
-exp_meta_df=$(df_free MetaData "$SCR")
-exp_data_tot=$(statfs_total data "$SCR")
-exp_data_df=$(df_free Data "$SCR")
+exp_meta_tot=$(statfs_total meta "$T_MSCR")
+exp_meta_df=$(df_free MetaData "$T_MSCR")
+exp_data_tot=$(statfs_total data "$T_MSCR")
+exp_data_df=$(df_free Data "$T_MSCR")

 echo "== 0s do nothing"
-scoutfs resize-devices -p "$SCR" 
-scoutfs resize-devices -p "$SCR" -m 0
-scoutfs resize-devices -p "$SCR" -d 0
-scoutfs resize-devices -p "$SCR" -m 0 -d 0
+scoutfs resize-devices -p "$T_MSCR"
+scoutfs resize-devices -p "$T_MSCR" -m 0
+scoutfs resize-devices -p "$T_MSCR" -d 0
+scoutfs resize-devices -p "$T_MSCR" -m 0 -d 0

 echo "== shrinking fails"
-scoutfs resize-devices -p "$SCR" -m $shrink_meta
-scoutfs resize-devices -p "$SCR" -d $shrink_data
-scoutfs resize-devices -p "$SCR" -m $shrink_meta -d $shrink_data
+scoutfs resize-devices -p "$T_MSCR" -m $shrink_meta
+scoutfs resize-devices -p "$T_MSCR" -d $shrink_data
+scoutfs resize-devices -p "$T_MSCR" -m $shrink_meta -d $shrink_data
 same_totals

 echo "== existing sizes do nothing"
-scoutfs resize-devices -p "$SCR" -m $quarter_meta
-scoutfs resize-devices -p "$SCR" -d $quarter_data
-scoutfs resize-devices -p "$SCR" -m $quarter_meta -d $quarter_data
+scoutfs resize-devices -p "$T_MSCR" -m $quarter_meta
+scoutfs resize-devices -p "$T_MSCR" -d $quarter_data
+scoutfs resize-devices -p "$T_MSCR" -m $quarter_meta -d $quarter_data
 same_totals

 echo "== growing outside device fails"
-scoutfs resize-devices -p "$SCR" -m $outsize_meta
-scoutfs resize-devices -p "$SCR" -d $outsize_data
-scoutfs resize-devices -p "$SCR" -m $outsize_meta -d $outsize_data
+scoutfs resize-devices -p "$T_MSCR" -m $outsize_meta
+scoutfs resize-devices -p "$T_MSCR" -d $outsize_data
+scoutfs resize-devices -p "$T_MSCR" -m $outsize_meta -d $outsize_data
 same_totals

 echo "== resizing meta works"
-scoutfs resize-devices -p "$SCR" -m $half_meta
+scoutfs resize-devices -p "$T_MSCR" -m $half_meta
 devices_grew meta

 echo "== resizing data works"
-scoutfs resize-devices -p "$SCR" -d $half_data
+scoutfs resize-devices -p "$T_MSCR" -d $half_data
 devices_grew data

 echo "== shrinking back fails"
-scoutfs resize-devices -p "$SCR" -m $quarter_meta
-scoutfs resize-devices -p "$SCR" -m $quarter_data
+scoutfs resize-devices -p "$T_MSCR" -m $quarter_meta
+scoutfs resize-devices -p "$T_MSCR" -m $quarter_data
 same_totals

 echo "== resizing again does nothing"
-scoutfs resize-devices -p "$SCR" -m $half_meta
-scoutfs resize-devices -p "$SCR" -m $half_data
+scoutfs resize-devices -p "$T_MSCR" -m $half_meta
+scoutfs resize-devices -p "$T_MSCR" -m $half_data
 same_totals

 echo "== resizing to full works"
-scoutfs resize-devices -p "$SCR" -m $whole_meta -d $whole_data
+scoutfs resize-devices -p "$T_MSCR" -m $whole_meta -d $whole_data
 devices_grew meta data

 echo "== cleanup extra fs"
-umount "$SCR"
-rmdir "$SCR"
+t_scratch_umount

 t_pass
--- a/tests/tests/simple-inode-index.sh
+++ b/tests/tests/simple-inode-index.sh
@@ -32,7 +32,7 @@ echo "== dirs shouldn't appear in data_seq queries"
 mkdir "$DIR"
 ino=$(stat -c "%i" "$DIR")
 t_sync_seq_index
-query_index data_seq | grep "$ino\>"
+query_index data_seq | awk '($4 == "'$ino'")'

 echo "== two created files are present and come after each other"
 touch "$DIR/first"
@@ -92,13 +92,13 @@ test "$before" -lt "$after" || \
 # didn't skip past deleted dirty items
 #
 echo "== make sure dirtying doesn't livelock walk"
-dd if=/dev/urandom of="$DIR/dirtying" bs=4K count=1 >> $seqres.full 2>&1
+dd if=/dev/urandom of="$DIR/dirtying" bs=4K count=1 >> "$T_TMPDIR/seqres.full" 2>&1
 nr=1
 while [ "$nr" -lt 100 ]; do
-	echo "dirty/walk attempt $nr" >> $seqres.full
+	echo "dirty/walk attempt $nr" >> "$T_TMPDIR/seqres.full"
 	sync
 	dd if=/dev/urandom of="$DIR/dirtying" bs=4K count=1 conv=notrunc \
-		>> $seqres.full 2>&1
+		>> "$T_TMPDIR/seqres.full" 2>&1
 	scoutfs walk-inodes data_seq 0 -1 $DIR/dirtying >& /dev/null 
 	((nr++))
 done
--- a/tests/tests/simple-staging.sh
+++ b/tests/tests/simple-staging.sh
@@ -12,12 +12,12 @@ create_file() {

 	if [ "$blocks" != 0 ]; then
 		dd if=/dev/urandom bs=4096 count=$blocks of="$file" \
-			>> $seqres.full 2>&1
+			>> "$T_TMPDIR/seqres.full" 2>&1
 	fi

 	if [ "$remainder" != 0 ]; then
 		dd if=/dev/urandom bs="$remainder" count=1 of="$file" \
-			conv=notrunc oflag=append >> $seqres.full 2>&1
+			conv=notrunc oflag=append >> "$T_TMPDIR/seqres.full" 2>&1
 	fi
 }

@@ -78,7 +78,7 @@ create_file "$FILE" $((4096 * 1024))
 cp "$FILE"  "$T_TMP"
 nr=1
 while [ "$nr" -lt 10 ]; do
-	echo "attempt $nr" >> $seqres.full 2>&1
+	echo "attempt $nr" >> "$T_TMPDIR/$seqres.full" 2>&1
 	release_vers "$FILE" stat 0 4096K
 	sync
 	echo 3 > /proc/sys/vm/drop_caches
--- a/tests/tests/totl-merge-read.sh
+++ b/tests/tests/totl-merge-read.sh
@@ -0,0 +1,50 @@
+#
+# Test that merge_read_item() correctly updates the sequence number when
+# combining delta items from multiple finalized log trees.  Each mount
+# sets a totl value in its own 3-bit lane (powers of 8) so that any
+# double-counting overflows the lane and is caught by: or(v, exp) != exp.
+#
+
+t_require_commands setfattr scoutfs
+t_require_mounts 5
+
+echo "== setup"
+for nr in $(t_fs_nrs); do
+	d=$(eval echo \$T_D$nr)
+	for i in $(seq 1 2500); do : > "$d/f$nr$i"; done
+done
+sync
+t_force_log_merge
+
+vals=(1 8 64 512 4096)
+expected=4681
+n=0
+for nr in $(t_fs_nrs); do
+	d=$(eval echo \$T_D$nr)
+	v=${vals[$((n++))]}
+	for i in $(seq 1 2500); do
+		setfattr -n "scoutfs.totl.t.$i.0.0" -v $v "$d/f$nr$i"
+	done
+done
+
+t_trigger_arm_silent log_merge_force_partial $(t_server_nr)
+
+bad="$T_TMPDIR/bad"
+for nr in $(t_fs_nrs); do
+	( while true; do
+		echo 1 > "$(t_debugfs_path $nr)/drop_weak_item_cache"
+		scoutfs read-xattr-totals -p "$(eval echo \$T_M$nr)" | \
+			awk -F'[ =,]+' -v e=$expected 'or($2+0,e) != e'
+	done ) >> "$bad" &
+done
+
+echo "expected $expected"
+t_force_log_merge
+t_silent_kill $(jobs -p)
+test -s "$bad" && echo "double-counted:" && cat "$bad"
+
+echo "== cleanup"
+for nr in $(t_fs_nrs); do
+	find "$(eval echo \$T_D$nr)" -name "f$nr*" -delete
+done
+t_pass
--- a/utils/src/punch_offline.c
+++ b/utils/src/punch_offline.c
@@ -0,0 +1,127 @@
+#include <sys/ioctl.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <string.h>
+#include <argp.h>
+
+#include "sparse.h"
+#include "parse.h"
+#include "util.h"
+#include "ioctl.h"
+#include "cmd.h"
+
+struct po_args {
+	char *path;
+	u64 offset;
+	u64 length;
+	u64 data_version;
+
+	unsigned offset_set:1,
+	         length_set:1,
+	         data_version_set:1;
+};
+
+static int do_punch_offline(struct po_args *args)
+{
+	struct scoutfs_ioctl_punch_offline ioctl_args;
+	int ret;
+	int fd;
+
+	fd = get_path(args->path, O_RDWR);
+	if (fd < 0)
+		return fd;
+
+	ioctl_args.offset = args->offset;
+	ioctl_args.len = args->length;
+	ioctl_args.data_version = args->data_version;
+	ioctl_args.flags = 0;
+
+	ret = ioctl(fd, SCOUTFS_IOC_PUNCH_OFFLINE, &ioctl_args);
+
+	if (ret < 0) {
+		ret = -errno;
+		fprintf(stderr, "punch_offline ioctl failed: %s (%d)\n",
+			strerror(errno), errno);
+	}
+
+	close(fd);
+	return ret;
+}
+
+static int parse_opt(int key, char *arg, struct argp_state *state)
+{
+	struct po_args *args = state->input;
+	int ret = 0;
+
+	switch (key) {
+	case 'V':
+		ret = parse_u64(arg, &args->data_version);
+		if (ret)
+			return ret;
+		args->data_version_set = 1;
+		break;
+	case 'o': /* offset */
+		ret = parse_human(arg, &args->offset);
+		if (ret)
+			return ret;
+		args->offset_set = 1;
+		break;
+	case 'l': /* length */
+		ret = parse_human(arg, &args->length);
+		if (ret)
+			return ret;
+		args->length_set = 1;
+		break;
+	case ARGP_KEY_ARG:
+		if (!args->path)
+			args->path = strdup_or_error(state, arg);
+		else
+			argp_error(state, "unknown extra argument given");
+		break;
+	case ARGP_KEY_FINI:
+		if (!args->path)
+			argp_error(state, "must provide path to file");
+		if (!args->offset_set)
+			argp_error(state, "must provide offset");
+		if (!args->length_set)
+			argp_error(state, "must provide length");
+		if (!args->data_version_set)
+			argp_error(state, "must provide data_version");
+		break;
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+static struct argp_option options[] = {
+	{ "data-version", 'V', "VERSION", 0, "Data version of the file [Required]"},
+	{ "offset", 'o', "OFFSET", 0, "Offset (bytes or KMGTP units) in file to stage [Required]"},
+	{ "length", 'l', "LENGTH", 0, "Length of range (bytes or KMGTP units) of file to stage. [Required]"},
+	{ NULL }
+};
+
+static struct argp argp = {
+	options,
+	parse_opt,
+	"PATH",
+	"Make a (sparse) hole in the file at offset and with length"
+};
+
+static int punch_offline_cmd(int argc, char **argv)
+{
+	struct po_args po_args = {NULL};
+	int ret;
+
+	ret = argp_parse(&argp, argc, argv, 0, NULL, &po_args);
+	if (ret)
+		return ret;
+
+	return do_punch_offline(&po_args);
+}
+
+static void __attribute__((constructor)) punch_offline_ctor(void)
+{
+	cmd_register_argp("punch-offline", &argp, GROUP_AGENT, punch_offline_cmd);
+}