block_write_{begin,end} take a folio as well as page_mkwrite

Adds compat handlers for block_write_begin and block_write_end to take a folio argument instead of page, as this is needed since v6.11-rc1-54-g9f04609f74ec. To avoid having two duplicate page_mkwrite functions there's now a complete page/folio ifdef split here to make it handle either. This is ugly but it's the most straightforward solution here and avoids more obscure macros. Signed-off-by: Auke Kok <auke.kok@versity.com>
Fix compat for list_lru_walk in el10
2026-05-01 02:15:44 +00:00 · 2026-04-29 17:48:01 -07:00 · 2026-04-29 17:48:01 -07:00 · 2026-04-29 17:48:01 -07:00 · 2026-04-29 17:47:30 -07:00 · 2026-04-29 17:47:30 -07:00
78 changed files with 3319 additions and 663 deletions
--- a/ReleaseNotes.md
+++ b/ReleaseNotes.md
@@ -1,6 +1,83 @@
 Versity ScoutFS Release Notes
 =============================

+---
+v1.30
+\
+*Apr 21, 2026*
+
+Fix a problem reading the accumulated totals of contributing .totl.
+xattrs when log merging is in progress.  The problem would have readers
+of the totals calculate the sums incorrectly.
+
+Fix a problem updating quota rules.  There was a race where updates
+could be corrupted if they happened while a transaction was being
+written.
+
+Fix a problem deleting files with .indx. xattrs.  The internal indexing
+metadata wouldn't be properly deleted so the files would still claim to
+be present and visible in the index, though the file no longer existed.
+
+---
+v1.29
+\
+*Mar 25, 2026*
+
+Add a repair mechanism for mount logs that weren't properly resolved as
+mounts left the cluster.  The presence of these logs prevents log
+merging from making forward progress and the backlog of logs over time
+can cause operations to slow to a crawl.  With the repair mechanism in
+place the orphaned logs don't stop merging and operations proceed as
+usual.
+
+Add an ioctl for turning offline unmapped file regions into sparse
+regions.
+
+---
+v1.28
+\
+*Feb 5, 2026*
+
+Fix a bug that lead to incorrect negative caching of ACL entries
+starting in version 9.6 of distribution kernels in the enterprise linux
+family.  This would manifest as ACLs seemingly disappearing,
+particularly default ACLs on directories.  The persistent ACLs always
+existed but because of internal API incompatibility some readers
+couldn't see them and would cache that they didn't exist.
+
+---
+v1.27
+\
+*Jan 15, 2026*
+
+Switch away from using the general VM cache reclaim machinery to reduce
+idle cluster locks in the client.  The VM treated locks like a cache and
+let many accumulate, presuming that it would be efficient to free them
+in batches.  Lock freeing requires network communication so this could
+result in enormous backlogs in network messages (on the order of
+hundreds of thousands) and could result in signifcant delays of other
+network messaging.
+
+Fix inefficient network receive processing while many messages are in
+the send queue.  This consumed sufficient CPU to cause significant
+stalls, perhaps resulting in hung task warning messages due to delayed
+lock message delivery.
+
+Fix a server livelock case that could happen while committing client
+transactions that contain a large amount of freed file data extents.
+This would present as client tasks hanging and a server task spinning
+consuming cpu.
+
+Fix a rare server request processing failure that doesn't deal with
+retransmission of a request that a previous server partially processed.
+This would present as hung client tasks and repeated "error -2
+committing log merge: getting merge status item" kernel messages.
+
+Fix an unneccessary server shutdown during specific circumstances in
+client lock recovery.  The shutdown was due to server state and was
+ultimately harmless.  The next server that started up would proceed
+accordingly.
+
 ---
 v1.26
 \
--- a/kmod/src/Makefile
+++ b/kmod/src/Makefile
@@ -60,11 +60,11 @@ scoutfs-y +=			\
 #
 .PHONY: $(src)/check_exported_types
 $(src)/check_exported_types:
-	@if egrep '\<[us](8|16|32|64\>)' $(src)/format.h $(src)/ioctl.h; then \
+	@if grep -E '\<[us](8|16|32|64\>)' $(src)/format.h $(src)/ioctl.h; then \
 		echo "no raw types in exported headers, preface with __";     \
 		exit 1;							      \
 	fi
-	@if egrep '\<__packed\>' $(src)/format.h $(src)/ioctl.h; then \
+	@if grep -E '\<__packed\>' $(src)/format.h $(src)/ioctl.h; then \
 		echo "no __packed allowed in exported headers";     \
 		exit 1;							      \
 	fi
--- a/kmod/src/Makefile.kernelcompat
+++ b/kmod/src/Makefile.kernelcompat
@@ -479,10 +479,148 @@ ifneq (,$(shell grep '^unsigned int stack_trace_save' include/linux/stacktrace.h
 ccflags-y += -DKC_STACK_TRACE_SAVE
 endif

-# v6.1-rc1-4-g7420332a6ff4
 #
-# .get_acl() method now has dentry arg (and mnt_idmap). The old get_acl has been renamed
-# to get_inode_acl() and is still available as well, but has an extra rcu param.
-ifneq (,$(shell grep 'struct posix_acl ...get_acl..struct mnt_idmap ., struct dentry' include/linux/fs.h))
-ccflags-y += -DKC_GET_ACL_DENTRY
+# v6.1-rc1-2-g138060ba92b3
+#
+# set_acl now passed a struct dentry instead of inode.
+#
+ifneq (,$(shell grep 'int ..set_acl.*struct dentry' include/linux/fs.h))
+ccflags-y += -DKC_SET_ACL_DENTRY
+endif
+
+#
+# v6.1-rc1-3-gcac2f8b8d8b5
+#
+# get_acl renamed to get_inode_acl.
+#
+ifneq (,$(shell grep 'struct posix_acl.*get_inode_acl' include/linux/fs.h))
+ccflags-y += -DKC_GET_INODE_ACL
+endif
+
+#
+# v6.1-rc5-2-ge9a688bcb193
+#
+# get_random_u32_below() implementation
+ifneq (,$(shell grep 'u32 get_random_u32_below' include/linux/random.h))
+ccflags-y += -DKC_HAVE_GET_RANDOM_U32_BELOW
+endif
+
+# v6.5-rc1-7-g9b6304c1d537
+#
+# ctime accessor methods
+ifneq (,$(shell grep 'timespec64 inode_set_ctime_current' include/linux/fs.h))
+ccflags-y += -DKC_FS_INODE_C_TIME_ACCESSOR
+endif
+
+#
+# v6.6-rc5-1-g077c212f0344
+#
+# Must use access methods from fs.h to get to inode ctime/mtime/atime
+ifneq (,$(shell grep 'inline time64_t inode_get_atime_sec' include/linux/fs.h))
+ccflags-y += -DKC_FS_INODE_AM_TIME_ACCESSOR
+endif
+
+#
+# v6.12-rc1-3-g5f60d5f6bbc1
+#
+# asm/unaligned.h replaced with linux/unaligned.h
+ifneq (,$(shell grep -s 'define __LINUX_UNALIGNED_H' include/linux/unaligned.h))
+ccflags-y += -DKC_HAVE__LINUX_UNALIGNED_H
+endif
+
+#
+# v6.9-rc4-29-g203c1ce0bb06
+#
+# RIP bd_inode. (note, struct moved between headers!)
+ifneq (,$(shell grep -s 'struct inode.*bd_inode' include/linux/blk_types.h include/linux/fs.h))
+ccflags-y += -DKC_HAVE_BD_INODE
+endif
+
+#
+# v6.8-9146-gc759e609030c
+#
+# Removes __assign_str_len() and removes the 2nd param of __assign_str().
+ifneq (,$(shell grep -s 'define __assign_str.dst, src' \
+				include/trace/trace_events.h \
+				include/trace/ftrace.h \
+				include/trace/stages/stage6_event_callback.h))
+ccflags-y += -DKC_HAVE_ASSIGN_STR_PARMS
+endif
+
+#
+# v6.5-113-g615e95831ec3
+#
+ifneq (,$(shell grep 'generic_fillattr..*,.u32,' include/linux/fs.h))
+ccflags-y += -DKC_GENERIC_FILLATTR_REQUEST_MASK
+endif
+
+#
+# v6.6-rc4-53-gc42d50aefd17
+#
+# el10 yet again modifies the shrinker API significantly, breaking our current
+# implementation.
+ifneq (,$(shell grep 'struct shrinker .shrinker_alloc' include/linux/shrinker.h))
+ccflags-y += -DKC_SHRINKER_ALLOC
+endif
+
+#
+# v6.9-rc4-8-gead083aeeed9
+#
+# set_blocksize() now has a struct file arg.
+ifneq (,$(shell grep -s 'int set_blocksize.struct file' include/linux/blkdev.h))
+ccflags-y += -DKC_BLKDEV_SET_BLOCKSIZE_FILE
+endif
+
+#
+# v5.1-rc3-29-gaa30f47cf666
+#
+# struct kobj_type now has member `default_groups`
+ifneq (,$(shell grep 'const struct attribute_group ..default_groups;' include/linux/kobject.h))
+ccflags-y += -DKC_KOBJECT_DEFAULT_GROUPS
+endif
+
+#
+# v5.19-rc3-395-g67235182a41c
+#
+# Adds buffer_migrate_folio(), similar to other fss. Quote willy: "If the filesystem
+# implements migrate_folio and writepages, there is no need for a writepage implementation."
+ifneq (,$(shell grep 'int buffer_migrate_folio.struct address_space' include/linux/buffer_head.h))
+ccflags-y += -DKC_HAVE_BUFFER_MIGRATE_FOLIO
+endif
+
+#
+# v6.7-rc4-307-g17bf23a981be
+#
+# block_write_full_page() is replaced with block_write_full_folio(),
+# but that isn't exported as it used to be (and the only users now
+# are builtin). However, the kernel will fall back to using the
+# .writepages method instead, so we can drop this method.
+ifneq (,$(shell grep 'int block_write_full_page.struct page' include/linux/buffer_head.h))
+ccflags-y += -DKC_HAVE_BLOCK_WRITE_FULL_PAGE
+endif
+
+#
+# v6.4-rc2-29-gc6585011bc1d
+#
+# generic_file_splice_read is removed. It can be replaced with filemap_splice_read
+# or copy_splice_read.
+ifneq (,$(shell grep 'ssize_t generic_file_splice_read.struct file' include/linux/fs.h))
+ccflags-y += -DKC_HAVE_GENERIC_FILE_SPLICE_READ
+endif
+
+#
+# v4.6-rc3-29-g6192269444eb
+#
+# Adds .iterate_shared readdir() iterator vfs method.
+ifneq (,$(shell grep 'iterate_shared...struct file.., struct dir_context' include/linux/fs.h))
+ccflags-y += -DKC_HAVE_ITERATE_SHARED
+endif
+
+#
+# v6.11-rc1-54-g9f04609f74ec
+#
+# Last of a series of changes that make block_write_begin/end take a folio instead of
+# a struct pagep.
+ifneq (,$(shell grep 'int __block_write_begin.struct.folio' include/linux/buffer_head.h))
+ccflags-y += -DKC_BLOCK_WRITE_BEGIN_FOLIO
 endif
--- a/kmod/src/acl.c
+++ b/kmod/src/acl.c
@@ -107,20 +107,22 @@ struct posix_acl *scoutfs_get_acl_locked(struct inode *inode, int type, struct s
 	return acl;
 }

-#ifdef KC_GET_ACL_DENTRY
-struct posix_acl *scoutfs_get_acl(KC_VFS_NS_DEF
-				  struct dentry *dentry, int type)
-{
-	struct inode *inode = dentry->d_inode;
+#ifdef KC_GET_INODE_ACL
+struct posix_acl *scoutfs_get_acl(struct inode *inode, int type, bool rcu)
 #else
 struct posix_acl *scoutfs_get_acl(struct inode *inode, int type)
-{
 #endif
+{
 	struct super_block *sb = inode->i_sb;
 	struct scoutfs_lock *lock = NULL;
 	struct posix_acl *acl;
 	int ret;

+#ifdef KC_GET_INODE_ACL
+	if (rcu)
+		return ERR_PTR(-ECHILD);
+#endif
+
 #ifndef KC___POSIX_ACL_CREATE
 	if (!IS_POSIXACL(inode))
 		return NULL;
@@ -195,7 +197,7 @@ int scoutfs_set_acl_locked(struct inode *inode, struct posix_acl *acl, int type,
 		if (!value) {
 			/* can be setting an acl that only affects mode, didn't need xattr */
 			inode_inc_iversion(inode);
-			inode->i_ctime = current_time(inode);
+			inode_set_ctime_current(inode);
 		}
 	}

@@ -208,7 +210,7 @@ out:
 	return ret;
 }

-#ifdef KC_GET_ACL_DENTRY
+#ifdef KC_SET_ACL_DENTRY
 int scoutfs_set_acl(KC_VFS_NS_DEF
 		    struct dentry *dentry, struct posix_acl *acl, int type)
 {
@@ -254,9 +256,8 @@ int scoutfs_acl_get_xattr(struct dentry *dentry, const char *name, void *value,
 	if (!IS_POSIXACL(dentry->d_inode))
 		return -EOPNOTSUPP;

-#ifdef KC_GET_ACL_DENTRY
-	acl = scoutfs_get_acl(KC_VFS_INIT_NS
-			      dentry, type);
+#ifdef KC_GET_INODE_ACL
+	acl = scoutfs_get_acl(dentry->d_inode, type, false);
 #else
 	acl = scoutfs_get_acl(dentry->d_inode, type);
 #endif
@@ -305,7 +306,7 @@ int scoutfs_acl_set_xattr(struct dentry *dentry, const char *name, const void *v
 		}
 	}

-#ifdef KC_GET_ACL_DENTRY
+#ifdef KC_SET_ACL_DENTRY
 	ret = scoutfs_set_acl(KC_VFS_INIT_NS dentry, acl, type);
 #else
 	ret = scoutfs_set_acl(dentry->d_inode, acl, type);
--- a/kmod/src/acl.h
+++ b/kmod/src/acl.h
@@ -1,12 +1,16 @@
 #ifndef _SCOUTFS_ACL_H_
 #define _SCOUTFS_ACL_H_

-#ifdef KC_GET_ACL_DENTRY
-struct posix_acl *scoutfs_get_acl(KC_VFS_NS_DEF struct dentry *dentry, int type);
-int scoutfs_set_acl(KC_VFS_NS_DEF struct dentry *dentry, struct posix_acl *acl, int type);
+#ifdef KC_SET_ACL_DENTRY
+int scoutfs_set_acl(KC_VFS_NS_DEF
+		    struct dentry *dentry, struct posix_acl *acl, int type);
+#else
+int scoutfs_set_acl(struct inode *inode, struct posix_acl *acl, int type);
+#endif
+#ifdef KC_GET_INODE_ACL
+struct posix_acl *scoutfs_get_acl(struct inode *inode, int type, bool rcu);
 #else
 struct posix_acl *scoutfs_get_acl(struct inode *inode, int type);
-int scoutfs_set_acl(struct inode *inode, struct posix_acl *acl, int type);
 #endif
 struct posix_acl *scoutfs_get_acl_locked(struct inode *inode, int type, struct scoutfs_lock *lock);
 int scoutfs_set_acl_locked(struct inode *inode, struct posix_acl *acl, int type,
--- a/kmod/src/alloc.c
+++ b/kmod/src/alloc.c
@@ -308,14 +308,14 @@ static bool invalid_extent(u64 start, u64 end, u64 first, u64 last)
 static bool invalid_meta_blkno(struct super_block *sb, u64 blkno)
 {
 	struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
-	u64 last_meta = (i_size_read(sbi->meta_bdev->bd_inode) >> SCOUTFS_BLOCK_LG_SHIFT) - 1;
+	u64 last_meta = (i_size_read(KC_BDEV_INODE(sbi->meta_bdev)) >> SCOUTFS_BLOCK_LG_SHIFT) - 1;

 	return invalid_extent(blkno, blkno, SCOUTFS_META_DEV_START_BLKNO, last_meta);
 }

 static bool invalid_data_extent(struct super_block *sb, u64 start, u64 len)
 {
-	u64 last_data = (i_size_read(sb->s_bdev->bd_inode) >> SCOUTFS_BLOCK_SM_SHIFT) - 1;
+	u64 last_data = (i_size_read(KC_BDEV_INODE(sb->s_bdev)) >> SCOUTFS_BLOCK_SM_SHIFT) - 1;

 	return invalid_extent(start, start + len - 1, SCOUTFS_DATA_DEV_START_BLKNO, last_data);
 }
--- a/kmod/src/attr_x.c
+++ b/kmod/src/attr_x.c
@@ -103,8 +103,8 @@ int scoutfs_get_attr_x(struct inode *inode, struct scoutfs_ioctl_inode_attr_x *i
 		size = fill_attr(size, iax, SCOUTFS_IOC_IAX_OFFLINE_BLOCKS,
 				 offline_blocks, offline);
 	}
-	size = fill_attr(size, iax, SCOUTFS_IOC_IAX_CTIME, ctime_sec, inode->i_ctime.tv_sec);
-	size = fill_attr(size, iax, SCOUTFS_IOC_IAX_CTIME, ctime_nsec, inode->i_ctime.tv_nsec);
+	size = fill_attr(size, iax, SCOUTFS_IOC_IAX_CTIME, ctime_sec, inode_get_ctime_sec(inode));
+	size = fill_attr(size, iax, SCOUTFS_IOC_IAX_CTIME, ctime_nsec, inode_get_ctime_nsec(inode));
 	size = fill_attr(size, iax, SCOUTFS_IOC_IAX_CRTIME, crtime_sec, si->crtime.tv_sec);
 	size = fill_attr(size, iax, SCOUTFS_IOC_IAX_CRTIME, crtime_nsec, si->crtime.tv_nsec);
 	size = fill_attr(size, iax, SCOUTFS_IOC_IAX_SIZE, size, i_size_read(inode));
@@ -223,10 +223,8 @@ int scoutfs_set_attr_x(struct inode *inode, struct scoutfs_ioctl_inode_attr_x *i
 		scoutfs_inode_set_data_version(inode, iax->data_version);
 	if (iax->x_mask & SCOUTFS_IOC_IAX_SIZE)
 		i_size_write(inode, iax->size);
-	if (iax->x_mask & SCOUTFS_IOC_IAX_CTIME) {
-		inode->i_ctime.tv_sec = iax->ctime_sec;
-		inode->i_ctime.tv_nsec = iax->ctime_nsec;
-	}
+	if (iax->x_mask & SCOUTFS_IOC_IAX_CTIME)
+		inode_set_ctime(inode, iax->ctime_sec, iax->ctime_nsec);
 	if (iax->x_mask & SCOUTFS_IOC_IAX_CRTIME) {
 		si->crtime.tv_sec = iax->crtime_sec;
 		si->crtime.tv_nsec = iax->crtime_nsec;
--- a/kmod/src/block.c
+++ b/kmod/src/block.c
@@ -884,7 +884,7 @@ int scoutfs_block_dirty_ref(struct super_block *sb, struct scoutfs_alloc *alloc,
 	hdr->magic = cpu_to_le32(magic);
 	hdr->fsid = cpu_to_le64(sbi->fsid);
 	hdr->blkno = cpu_to_le64(bl->blkno);
-	prandom_bytes(&hdr->seq, sizeof(hdr->seq));
+	get_random_bytes(&hdr->seq, sizeof(hdr->seq));

 	trace_scoutfs_block_dirty_ref(sb, le64_to_cpu(ref->blkno), le64_to_cpu(ref->seq),
 				      le64_to_cpu(hdr->blkno), le64_to_cpu(hdr->seq));
@@ -1229,7 +1229,12 @@ static int sm_block_io(struct super_block *sb, struct block_device *bdev, blk_op
 	kc_bio_set_sector(bio, blkno << (SCOUTFS_BLOCK_SM_SHIFT - 9));
 	bio->bi_end_io = sm_block_bio_end_io;
 	bio->bi_private = &sbc;
-	bio_add_page(bio, page, SCOUTFS_BLOCK_SM_SIZE, 0);
+	ret = bio_add_page(bio, page, SCOUTFS_BLOCK_SM_SIZE, 0);
+	if (ret != SCOUTFS_BLOCK_SM_SIZE) {
+		bio_put(bio);
+		ret = -EFAULT;
+		goto out;
+	}

 	init_completion(&sbc.comp);
 	sbc.err = 0;
@@ -1285,9 +1290,12 @@ int scoutfs_block_setup(struct super_block *sb)

 	binf->sb = sb;
 	init_waitqueue_head(&binf->waitq);
-	KC_INIT_SHRINKER_FUNCS(&binf->shrinker, block_count_objects,
-			       block_scan_objects);
-	KC_REGISTER_SHRINKER(&binf->shrinker, "scoutfs-block:" SCSBF, SCSB_ARGS(sb));
+	KC_SETUP_SHRINKER(binf->shrinker, binf, 0, block_count_objects,
+			  block_scan_objects, "scoutfs-block:" SCSBF, SCSB_ARGS(sb));
+	if (KC_SHRINKER_IS_NULL(binf->shrinker)) {
+		ret = -ENOMEM;
+		goto out;
+	}
 	INIT_WORK(&binf->free_work, block_free_work);
 	init_llist_head(&binf->free_llist);

@@ -1309,7 +1317,7 @@ void scoutfs_block_destroy(struct super_block *sb)
 	struct block_info *binf = SCOUTFS_SB(sb)->block_info;

 	if (binf) {
-		KC_UNREGISTER_SHRINKER(&binf->shrinker);
+		KC_UNREGISTER_SHRINKER(binf->shrinker);
 		block_shrink_all(sb);
 		flush_work(&binf->free_work);
 		rhashtable_destroy(&binf->ht);
--- a/kmod/src/btree.c
+++ b/kmod/src/btree.c
@@ -2042,7 +2042,7 @@ struct merged_item {
 	u64 seq;
 	u8 flags;
 	unsigned int val_len;
-	u8 val[0];
+	u8 val[];
 };

 static inline struct merged_item *mitem_container(struct rb_node *node)
@@ -2183,6 +2183,8 @@ static int merge_read_item(struct super_block *sb, struct scoutfs_key *key, u64
 		if (ret > 0) {
 			if (ret == SCOUTFS_DELTA_COMBINED) {
 				scoutfs_inc_counter(sb, btree_merge_delta_combined);
+				if (seq > found->seq)
+					found->seq = seq;
 			} else if (ret == SCOUTFS_DELTA_COMBINED_NULL) {
 				scoutfs_inc_counter(sb, btree_merge_delta_null);
 				free_mitem(rng, found);
@@ -2208,7 +2210,7 @@ static int merge_read_item(struct super_block *sb, struct scoutfs_key *key, u64
 	mitem->flags = flags;
 	mitem->val_len = val_len;
 	if (val_len)
-		memcpy(mitem->val, val, val_len);
+		memcpy(&mitem->val[0], val, val_len);

 	if (found) {
 		replace_mitem(rng, found, mitem);
@@ -2486,6 +2488,14 @@ int scoutfs_btree_merge(struct super_block *sb,
 			mitem = next_mitem(mitem);
 			free_mitem(&rng, tmp);
 		}
+
+		if (mitem && walk_val_len == 0 &&
+		    !(walk_flags & (BTW_INSERT | BTW_DELETE)) &&
+		    scoutfs_trigger(sb, LOG_MERGE_FORCE_PARTIAL)) {
+			ret = -ERANGE;
+			*next_ret = mitem->key;
+			goto out;
+		}
 	}

 	ret = 0;
--- a/kmod/src/btree.h
+++ b/kmod/src/btree.h
@@ -30,7 +30,7 @@ struct scoutfs_btree_item_list {
 	u64 seq;
 	u8 flags;
 	int val_len;
-	u8 val[0];
+	u8 val[];
 };

 int scoutfs_btree_lookup(struct super_block *sb,
--- a/kmod/src/counters.c
+++ b/kmod/src/counters.c
@@ -34,6 +34,17 @@ static struct attribute scoutfs_counter_attrs[] = {
 #define NR_ATTRS ARRAY_SIZE(scoutfs_counter_attrs)
 static struct attribute *scoutfs_counter_attr_ptrs[NR_ATTRS + 1];

+#ifdef KC_KOBJECT_DEFAULT_GROUPS
+static struct attribute_group scoutfs_counter_attr_group = {
+	.attrs = scoutfs_counter_attr_ptrs,
+};
+
+static const struct attribute_group *scoutfs_counter_attr_groups[] = {
+	&scoutfs_counter_attr_group,
+	NULL,
+};
+#endif
+
 static ssize_t scoutfs_counter_attr_show(struct kobject *kobj,
 				         struct attribute *attr, char *buf)
 {
@@ -45,7 +56,6 @@ static ssize_t scoutfs_counter_attr_show(struct kobject *kobj,
 	counters = container_of(kobj, struct scoutfs_counters, kobj);
 	index = attr - scoutfs_counter_attrs;
 	pcpu = &counters->FIRST_COUNTER + index;
-
 	return snprintf(buf, PAGE_SIZE, "%lld\n", percpu_counter_sum(pcpu));
 }

@@ -63,7 +73,7 @@ static const struct sysfs_ops scoutfs_counter_attr_ops = {
 };

 static struct kobj_type scoutfs_counters_ktype = {
-	.default_attrs  = scoutfs_counter_attr_ptrs,
+	.KC_KOBJ_DEFAULT_OP = KC_KOBJ_DEFAULT_PICK(scoutfs_counter_attr_groups, scoutfs_counter_attr_ptrs),
 	.sysfs_ops      = &scoutfs_counter_attr_ops,
 	.release        = scoutfs_counters_kobj_release,
 };
--- a/kmod/src/counters.h
+++ b/kmod/src/counters.h
@@ -125,7 +125,6 @@
 	EXPAND_COUNTER(item_update)				\
 	EXPAND_COUNTER(item_write_dirty)			\
 	EXPAND_COUNTER(lock_alloc)				\
-	EXPAND_COUNTER(lock_count_objects)			\
 	EXPAND_COUNTER(lock_free)				\
 	EXPAND_COUNTER(lock_grant_request)			\
 	EXPAND_COUNTER(lock_grant_response)			\
@@ -139,13 +138,13 @@
 	EXPAND_COUNTER(lock_lock_error)				\
 	EXPAND_COUNTER(lock_nonblock_eagain)			\
 	EXPAND_COUNTER(lock_recover_request)			\
-	EXPAND_COUNTER(lock_scan_objects)			\
 	EXPAND_COUNTER(lock_shrink_attempted)			\
-	EXPAND_COUNTER(lock_shrink_aborted)			\
-	EXPAND_COUNTER(lock_shrink_work)			\
+	EXPAND_COUNTER(lock_shrink_request_failed)		\
 	EXPAND_COUNTER(lock_unlock)				\
 	EXPAND_COUNTER(lock_wait)				\
+	EXPAND_COUNTER(log_merge_complete)			\
 	EXPAND_COUNTER(log_merge_no_finalized)			\
+	EXPAND_COUNTER(log_merge_start)				\
 	EXPAND_COUNTER(log_merge_wait_timeout)			\
 	EXPAND_COUNTER(net_dropped_response)			\
 	EXPAND_COUNTER(net_send_bytes)				\
@@ -160,6 +159,7 @@
 	EXPAND_COUNTER(orphan_scan)				\
 	EXPAND_COUNTER(orphan_scan_attempts)			\
 	EXPAND_COUNTER(orphan_scan_cached)			\
+	EXPAND_COUNTER(orphan_scan_empty)			\
 	EXPAND_COUNTER(orphan_scan_error)			\
 	EXPAND_COUNTER(orphan_scan_item)			\
 	EXPAND_COUNTER(orphan_scan_omap_set)			\
--- a/kmod/src/data.c
+++ b/kmod/src/data.c
@@ -79,8 +79,10 @@ static void item_from_extent(struct scoutfs_key *key,
 		.skdx_end = cpu_to_le64(start + len - 1),
 		.skdx_len = cpu_to_le64(len),
 	};
-	dv->blkno = cpu_to_le64(map);
-	dv->flags = flags;
+	*dv = (struct scoutfs_data_extent_val) {
+		.blkno = cpu_to_le64(map),
+		.flags = flags,
+	};
 }

 static void ext_from_item(struct scoutfs_extent *ext,
@@ -716,24 +718,24 @@ static int scoutfs_readpage(struct file *file, struct page *page)
 		return ret;
 	}

-	if (scoutfs_per_task_add_excl(&si->pt_data_lock, &pt_ent, inode_lock)) {
-		ret = scoutfs_data_wait_check(inode, page_offset(page),
-					      PAGE_SIZE, SEF_OFFLINE,
-					      SCOUTFS_IOC_DWO_READ, &dw,
-					      inode_lock);
-		if (ret != 0) {
-			unlock_page(page);
-			scoutfs_per_task_del(&si->pt_data_lock, &pt_ent);
-			scoutfs_unlock(sb, inode_lock, SCOUTFS_LOCK_READ);
-		}
-		if (ret > 0) {
-			ret = scoutfs_data_wait(inode, &dw);
-			if (ret == 0)
-				ret = AOP_TRUNCATED_PAGE;
-		}
-		if (ret != 0)
-			return ret;
+	scoutfs_per_task_add_excl(&si->pt_data_lock, &pt_ent, inode_lock);
+
+	ret = scoutfs_data_wait_check(inode, page_offset(page),
+				      PAGE_SIZE, SEF_OFFLINE,
+				      SCOUTFS_IOC_DWO_READ, &dw,
+				      inode_lock);
+	if (ret != 0) {
+		unlock_page(page);
+		scoutfs_per_task_del(&si->pt_data_lock, &pt_ent);
+		scoutfs_unlock(sb, inode_lock, SCOUTFS_LOCK_READ);
 	}
+	if (ret > 0) {
+		ret = scoutfs_data_wait(inode, &dw);
+		if (ret == 0)
+			ret = AOP_TRUNCATED_PAGE;
+	}
+	if (ret != 0)
+		return ret;

 #ifdef KC_MPAGE_READ_FOLIO
 	ret = mpage_read_folio(folio, scoutfs_get_block_read);
@@ -741,8 +743,8 @@ static int scoutfs_readpage(struct file *file, struct page *page)
 	ret = mpage_readpage(page, scoutfs_get_block_read);
 #endif

-	scoutfs_unlock(sb, inode_lock, SCOUTFS_LOCK_READ);
 	scoutfs_per_task_del(&si->pt_data_lock, &pt_ent);
+	scoutfs_unlock(sb, inode_lock, SCOUTFS_LOCK_READ);

 	return ret;
 }
@@ -760,8 +762,10 @@ static int scoutfs_readpages(struct file *file, struct address_space *mapping,
 			     struct list_head *pages, unsigned nr_pages)
 {
 	struct inode *inode = file->f_inode;
+	struct scoutfs_inode_info *si = SCOUTFS_I(inode);
 	struct super_block *sb = inode->i_sb;
 	struct scoutfs_lock *inode_lock = NULL;
+	SCOUTFS_DECLARE_PER_TASK_ENTRY(pt_ent);
 	struct page *page;
 	struct page *tmp;
 	int ret;
@@ -771,6 +775,8 @@ static int scoutfs_readpages(struct file *file, struct address_space *mapping,
 	if (ret)
 		goto out;

+	scoutfs_per_task_add_excl(&si->pt_data_lock, &pt_ent, inode_lock);
+
 	list_for_each_entry_safe(page, tmp, pages, lru) {
 		ret = scoutfs_data_wait_check(inode, page_offset(page),
 					      PAGE_SIZE, SEF_OFFLINE,
@@ -790,6 +796,7 @@ static int scoutfs_readpages(struct file *file, struct address_space *mapping,

 	ret = mpage_readpages(mapping, pages, nr_pages, scoutfs_get_block_read);
 out:
+	scoutfs_per_task_del(&si->pt_data_lock, &pt_ent);
 	scoutfs_unlock(sb, inode_lock, SCOUTFS_LOCK_READ);
 	BUG_ON(!list_empty(pages));
 	return ret;
@@ -798,8 +805,10 @@ out:
 static void scoutfs_readahead(struct readahead_control *rac)
 {
 	struct inode *inode = rac->file->f_inode;
+	struct scoutfs_inode_info *si = SCOUTFS_I(inode);
 	struct super_block *sb = inode->i_sb;
 	struct scoutfs_lock *inode_lock = NULL;
+	SCOUTFS_DECLARE_PER_TASK_ENTRY(pt_ent);
 	int ret;

 	ret = scoutfs_lock_inode(sb, SCOUTFS_LOCK_READ,
@@ -807,6 +816,8 @@ static void scoutfs_readahead(struct readahead_control *rac)
 	if (ret)
 		return;

+	scoutfs_per_task_add_excl(&si->pt_data_lock, &pt_ent, inode_lock);
+
 	ret = scoutfs_data_wait_check(inode, readahead_pos(rac),
 				      readahead_length(rac), SEF_OFFLINE,
 				      SCOUTFS_IOC_DWO_READ, NULL,
@@ -814,14 +825,17 @@ static void scoutfs_readahead(struct readahead_control *rac)
 	if (ret == 0)
 		mpage_readahead(rac, scoutfs_get_block_read);

+	scoutfs_per_task_del(&si->pt_data_lock, &pt_ent);
 	scoutfs_unlock(sb, inode_lock, SCOUTFS_LOCK_READ);
 }
 #endif

+#ifdef KC_HAVE_BLOCK_WRITE_FULL_PAGE
 static int scoutfs_writepage(struct page *page, struct writeback_control *wbc)
 {
 	return block_write_full_page(page, scoutfs_get_block_write, wbc);
 }
+#endif

 static int scoutfs_writepages(struct address_space *mapping,
 			      struct writeback_control *wbc)
@@ -841,7 +855,7 @@ static int scoutfs_write_begin(struct file *file,
 #ifdef KC_BLOCK_WRITE_BEGIN_AOP_FLAGS
 			       unsigned flags,
 #endif
-			       struct page **pagep, void **fsdata)
+			       KC_PAGE_OR_FOLIO(struct page **pagep, struct folio **folio), void **fsdata)
 {
 	struct inode *inode = mapping->host;
 	struct scoutfs_inode_info *si = SCOUTFS_I(inode);
@@ -886,7 +900,7 @@ retry:
 #ifdef KC_BLOCK_WRITE_BEGIN_AOP_FLAGS
 				flags,
 #endif
-				pagep, scoutfs_get_block_write);
+				KC_PAGE_OR_FOLIO(pagep, folio), scoutfs_get_block_write);
 	if (ret < 0) {
 		scoutfs_release_trans(sb);
 		scoutfs_inode_index_unlock(sb, &wbd->ind_locks);
@@ -919,7 +933,8 @@ static int writepages_sync_none(struct address_space *mapping, loff_t start,

 static int scoutfs_write_end(struct file *file, struct address_space *mapping,
 			     loff_t pos, unsigned len, unsigned copied,
-			     struct page *page, void *fsdata)
+			     KC_PAGE_OR_FOLIO(struct page *pagep, struct folio *folio),
+			     void *fsdata)
 {
 	struct inode *inode = mapping->host;
 	struct scoutfs_inode_info *si = SCOUTFS_I(inode);
@@ -927,10 +942,11 @@ static int scoutfs_write_end(struct file *file, struct address_space *mapping,
 	struct write_begin_data *wbd = fsdata;
 	int ret;

-	trace_scoutfs_write_end(sb, scoutfs_ino(inode), page->index, (u64)pos,
-				len, copied);
+	trace_scoutfs_write_end(sb, scoutfs_ino(inode),
+				KC_PAGE_OR_FOLIO(pagep->index, folio_index(folio)),
+				(u64)pos, len, copied);

-	ret = generic_write_end(file, mapping, pos, len, copied, page, fsdata);
+	ret = generic_write_end(file, mapping, pos, len, copied, KC_PAGE_OR_FOLIO(pagep, folio), fsdata);
 	if (ret > 0) {
 		if (!si->staging) {
 			scoutfs_inode_set_data_seq(inode);
@@ -1483,12 +1499,14 @@ int scoutfs_data_move_blocks(struct inode *from, u64 from_off,

 		cur_time = current_time(from);
 		if (!is_stage) {
-			to->i_ctime = to->i_mtime = cur_time;
+			inode_set_ctime_to_ts(to, cur_time);
+			inode_set_mtime_to_ts(to, cur_time);
 			inode_inc_iversion(to);
 			scoutfs_inode_inc_data_version(to);
 			scoutfs_inode_set_data_seq(to);
 		}
-		from->i_ctime = from->i_mtime = cur_time;
+		inode_set_ctime_to_ts(from, cur_time);
+		inode_set_mtime_to_ts(from, cur_time);
 		inode_inc_iversion(from);
 		scoutfs_inode_inc_data_version(from);
 		scoutfs_inode_set_data_seq(from);
@@ -1515,6 +1533,101 @@ out:
 	return ret;
 }

+/*
+ * Punch holes in offline extents.  This is a very specific tool that
+ * only does one job: it converts extents from offline to sparse.  It
+ * returns an error if it encounters an extent that isn't offline or has
+ * a block mapping.  It ignores i_size completely; it does not test it,
+ * and does not update it.
+ *
+ * The caller has the inode locked in the vfs and performed basic sanity
+ * checks.  We manage transactions and the extent_sem which is ordered
+ * inside the transaction.
+ */
+int scoutfs_data_punch_offline(struct inode *inode, u64 iblock, u64 last, u64 data_version,
+			       struct scoutfs_lock *lock)
+{
+	struct scoutfs_inode_info *si = SCOUTFS_I(inode);
+	struct super_block *sb = inode->i_sb;
+	struct data_ext_args args = {
+		.ino = scoutfs_ino(inode),
+		.inode = inode,
+		.lock = lock,
+	};
+	struct scoutfs_extent ext;
+	LIST_HEAD(ind_locks);
+	int ret;
+	int i;
+
+	if (WARN_ON_ONCE(iblock > last)) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	/* idiomatic to call start,last with 0,~0, clamp last to last possible */
+	last = min(last, SCOUTFS_BLOCK_SM_MAX);
+
+	ret = 0;
+	while (iblock <= last) {
+		ret = scoutfs_inode_index_lock_hold(inode, &ind_locks, true, false) ?:
+		      scoutfs_dirty_inode_item(inode, lock);
+		if (ret < 0)
+			break;
+
+		down_write(&si->extent_sem);
+
+		for (i = 0; i < 32 && (iblock <= last); i++) {
+			ret = scoutfs_ext_next(sb, &data_ext_ops, &args, iblock, 1, &ext);
+			if (ret == -ENOENT) {
+				iblock = last + 1;
+				ret = 0;
+				break;
+			}
+
+			if (ret < 0)
+				break;
+
+			if (ext.start > last) {
+				iblock = last + 1;
+				break;
+			}
+
+			if (ext.map) {
+				ret = -EINVAL;
+				break;
+			}
+
+			if (ext.flags & SEF_OFFLINE) {
+				if (iblock > ext.start) {
+					ext.len -= iblock - ext.start;
+					ext.start = iblock;
+				}
+				ext.len = min(ext.len, last - ext.start + 1);
+				ext.flags &= ~SEF_OFFLINE;
+
+				ret = scoutfs_ext_set(sb, &data_ext_ops, &args,
+						      ext.start, ext.len, ext.map, ext.flags);
+				if (ret < 0)
+					break;
+			}
+
+			iblock = ext.start + ext.len;
+		}
+
+		up_write(&si->extent_sem);
+
+		scoutfs_update_inode_item(inode, lock, &ind_locks);
+		scoutfs_release_trans(sb);
+		scoutfs_inode_index_unlock(sb, &ind_locks);
+
+		if (ret < 0)
+			break;
+	}
+
+out:
+	return ret;
+}
+
 /*
 * This copies to userspace :/
 */
@@ -1967,7 +2080,11 @@ static int scoutfs_data_page_mkwrite(struct vm_area_struct *vma,
 				     struct vm_fault *vmf)
 {
 #endif
+#ifdef KC_MPAGE_READ_FOLIO
+	struct folio *folio = page_folio(vmf->page);
+#else
 	struct page *page = vmf->page;
+#endif
 	struct file *file = vma->vm_file;
 	struct inode *inode = file_inode(file);
 	struct scoutfs_inode_info *si = SCOUTFS_I(inode);
@@ -2035,27 +2152,50 @@ static int scoutfs_data_page_mkwrite(struct vm_area_struct *vma,

 	down_write(&si->extent_sem);

+#ifdef KC_MPAGE_READ_FOLIO
+	if (!folio_trylock(folio)) {
+#else
 	if (!trylock_page(page)) {
+#endif
 		ret = VM_FAULT_NOPAGE;
 		goto out_sem;
 	}
 	ret = VM_FAULT_LOCKED;

+#ifdef KC_MPAGE_READ_FOLIO
+	if ((folio->mapping != inode->i_mapping) ||
+	    (!folio_test_uptodate(folio)) ||
+	    (folio_pos(folio) > size)) {
+		folio_unlock(folio);
+#else
 	if ((page->mapping != inode->i_mapping) ||
 	    (!PageUptodate(page)) ||
-	    (page_offset(page) > size))	 {
+	    (page_offset(page) > size)) {
 		unlock_page(page);
+#endif
 		ret = VM_FAULT_NOPAGE;
 		goto out_sem;
 	}

+#ifdef KC_MPAGE_READ_FOLIO
+	if (folio_index(folio) == (size - 1) >> PAGE_SHIFT)
+#else
 	if (page->index == (size - 1) >> PAGE_SHIFT)
+#endif
 		len = ((size - 1) & ~PAGE_MASK) + 1;

+#ifdef KC_MPAGE_READ_FOLIO
+	err = __block_write_begin(KC_PAGE_OR_FOLIO(folio_page(folio, 0), folio), pos, PAGE_SIZE, scoutfs_get_block);
+#else
 	err = __block_write_begin(page, pos, PAGE_SIZE, scoutfs_get_block);
+#endif
 	if (err) {
 		ret = vmf_error(err);
+#ifdef KC_MPAGE_READ_FOLIO
+		folio_unlock(folio);
+#else
 		unlock_page(page);
+#endif
 		goto out_sem;
 	}
 	/* end scoutfs_write_begin */
@@ -2065,8 +2205,13 @@ static int scoutfs_data_page_mkwrite(struct vm_area_struct *vma,
 	 * progress, we are guaranteed that writeback during freezing will
 	 * see the dirty page and writeprotect it again.
 	 */
+#ifdef KC_MPAGE_READ_FOLIO
+	folio_mark_dirty(folio);
+	folio_wait_stable(folio);
+#else
 	set_page_dirty(page);
 	wait_for_stable_page(page);
+#endif

 	/* scoutfs_write_end */
 	scoutfs_inode_set_data_seq(inode);
@@ -2193,6 +2338,9 @@ const struct address_space_operations scoutfs_file_aops = {
 	.dirty_folio		= block_dirty_folio,
 	.invalidate_folio	= block_invalidate_folio,
 	.read_folio		= scoutfs_read_folio,
+#ifdef KC_HAVE_BUFFER_MIGRATE_FOLIO
+	.migrate_folio		= buffer_migrate_folio,
+#endif
 #else
 	.readpage		= scoutfs_readpage,
 #endif
@@ -2201,7 +2349,9 @@ const struct address_space_operations scoutfs_file_aops = {
 #else
 	.readahead		= scoutfs_readahead,
 #endif
+#ifdef KC_HAVE_BLOCK_WRITE_FULL_PAGE
 	.writepage		= scoutfs_writepage,
+#endif
 	.writepages		= scoutfs_writepages,
 	.write_begin		= scoutfs_write_begin,
 	.write_end		= scoutfs_write_end,
@@ -2216,8 +2366,12 @@ const struct file_operations scoutfs_file_fops = {
 #else
 	.read_iter	= scoutfs_file_read_iter,
 	.write_iter	= scoutfs_file_write_iter,
-	.splice_read	= generic_file_splice_read,
 	.splice_write	= iter_file_splice_write,
+#endif
+#ifdef KC_HAVE_GENERIC_FILE_SPLICE_READ
+	.splice_read	= generic_file_splice_read,
+#else
+	.splice_read	= copy_splice_read,
 #endif
 	.mmap		= scoutfs_file_mmap,
 	.unlocked_ioctl	= scoutfs_ioctl,
--- a/kmod/src/data.h
+++ b/kmod/src/data.h
@@ -57,6 +57,8 @@ int scoutfs_data_init_offline_extent(struct inode *inode, u64 size,
 int scoutfs_data_move_blocks(struct inode *from, u64 from_off,
 			     u64 byte_len, struct inode *to, u64 to_off, bool to_stage,
 			     u64 data_version);
+int scoutfs_data_punch_offline(struct inode *inode, u64 iblock, u64 last, u64 data_version,
+			       struct scoutfs_lock *lock);

 int scoutfs_data_wait_check(struct inode *inode, loff_t pos, loff_t len,
 			    u8 sef, u8 op, struct scoutfs_data_wait *ow,
--- a/kmod/src/dir.c
+++ b/kmod/src/dir.c
@@ -587,10 +587,12 @@ static int add_entry_items(struct super_block *sb, u64 dir_ino, u64 hash,
 	}

 	/* initialize the dent */
-	dent->ino = cpu_to_le64(ino);
-	dent->hash = cpu_to_le64(hash);
-	dent->pos = cpu_to_le64(pos);
-	dent->type = mode_to_type(mode);
+	*dent = (struct scoutfs_dirent) {
+		.ino = cpu_to_le64(ino),
+		.hash = cpu_to_le64(hash),
+		.pos = cpu_to_le64(pos),
+		.type = mode_to_type(mode),
+	};
 	memcpy(dent->name, name, name_len);

 	init_dirent_key(&ent_key, SCOUTFS_DIRENT_TYPE, dir_ino, hash, pos);
@@ -759,6 +761,7 @@ static int scoutfs_mknod(KC_VFS_NS_DEF
 	struct scoutfs_lock *dir_lock = NULL;
 	struct scoutfs_lock *inode_lock = NULL;
 	struct scoutfs_inode_info *si;
+	struct kc_timespec cur_time;
 	LIST_HEAD(ind_locks);
 	u64 hash;
 	u64 pos;
@@ -790,9 +793,13 @@ static int scoutfs_mknod(KC_VFS_NS_DEF
 	set_dentry_fsdata(dentry, dir_lock);

 	i_size_write(dir, i_size_read(dir) + dentry->d_name.len);
-	dir->i_mtime = dir->i_ctime = current_time(inode);
-	inode->i_mtime = inode->i_atime = inode->i_ctime = dir->i_mtime;
-	si->crtime = inode->i_mtime;
+	cur_time = current_time(inode);
+	inode_set_mtime_to_ts(dir, cur_time);
+	inode_set_ctime_to_ts(dir, cur_time);
+	inode_set_mtime_to_ts(inode, cur_time);
+	inode_set_atime_to_ts(inode, cur_time);
+	inode_set_ctime_to_ts(inode, cur_time);
+	si->crtime = inode_get_mtime(inode);
 	inode_inc_iversion(dir);
 	inode_inc_iversion(inode);
 	scoutfs_forest_inc_inode_count(sb);
@@ -845,6 +852,7 @@ static int scoutfs_link(struct dentry *old_dentry,
 	struct scoutfs_lock *dir_lock;
 	struct scoutfs_lock *inode_lock = NULL;
 	struct scoutfs_lock *orph_lock = NULL;
+	struct kc_timespec cur_time;
 	LIST_HEAD(ind_locks);
 	bool del_orphan = false;
 	u64 dir_size;
@@ -919,8 +927,10 @@ retry:
 	set_dentry_fsdata(dentry, dir_lock);

 	i_size_write(dir, dir_size);
-	dir->i_mtime = dir->i_ctime = current_time(inode);
-	inode->i_ctime = dir->i_mtime;
+	cur_time = current_time(inode);
+	inode_set_mtime_to_ts(dir, cur_time);
+	inode_set_ctime_to_ts(dir, cur_time);
+	inode_set_ctime_to_ts(inode, inode_get_mtime(dir));
 	inc_nlink(inode);
 	inode_inc_iversion(dir);
 	inode_inc_iversion(inode);
@@ -1030,13 +1040,13 @@ retry:

 	set_dentry_fsdata(dentry, dir_lock);

-	dir->i_ctime = ts;
-	dir->i_mtime = ts;
+	inode_set_ctime_to_ts(dir, ts);
+	inode_set_mtime_to_ts(dir, ts);
 	i_size_write(dir, i_size_read(dir) - dentry->d_name.len);
 	inode_inc_iversion(dir);
 	inode_inc_iversion(inode);

-	inode->i_ctime = ts;
+	inode_set_ctime_to_ts(inode, ts);
 	drop_nlink(inode);
 	if (S_ISDIR(inode->i_mode)) {
 		drop_nlink(dir);
@@ -1239,6 +1249,7 @@ static int scoutfs_symlink(KC_VFS_NS_DEF
 	struct scoutfs_lock *dir_lock = NULL;
 	struct scoutfs_lock *inode_lock = NULL;
 	struct scoutfs_inode_info *si;
+	struct kc_timespec cur_time;
 	LIST_HEAD(ind_locks);
 	u64 hash;
 	u64 pos;
@@ -1278,11 +1289,13 @@ static int scoutfs_symlink(KC_VFS_NS_DEF
 	set_dentry_fsdata(dentry, dir_lock);

 	i_size_write(dir, i_size_read(dir) + dentry->d_name.len);
-	dir->i_mtime = dir->i_ctime = current_time(inode);
+	cur_time = current_time(inode);
+	inode_set_mtime_to_ts(dir, cur_time);
+	inode_set_ctime_to_ts(dir, cur_time);
 	inode_inc_iversion(dir);

-	inode->i_ctime = dir->i_mtime;
-	si->crtime = inode->i_ctime;
+	inode_set_ctime_to_ts(inode, inode_get_mtime(dir));
+	si->crtime = inode_get_ctime(inode);
 	i_size_write(inode, name_len);
 	inode_inc_iversion(inode);
 	scoutfs_forest_inc_inode_count(sb);
@@ -1804,15 +1817,15 @@ retry:
 	}

 	now = current_time(old_inode);
-	old_dir->i_ctime = now;
-	old_dir->i_mtime = now;
+	inode_set_ctime_to_ts(old_dir, now);
+	inode_set_mtime_to_ts(old_dir, now);
 	if (new_dir != old_dir) {
-		new_dir->i_ctime = now;
-		new_dir->i_mtime = now;
+		inode_set_ctime_to_ts(new_dir, now);
+		inode_set_mtime_to_ts(new_dir, now);
 	}
-	old_inode->i_ctime = now;
+	inode_set_ctime_to_ts(old_inode, now);
 	if (new_inode)
-		new_inode->i_ctime = now;
+		inode_set_ctime_to_ts(new_inode, now);

 	inode_inc_iversion(old_dir);
 	inode_inc_iversion(old_inode);
@@ -1939,6 +1952,7 @@ static int scoutfs_tmpfile(KC_VFS_NS_DEF
 	struct scoutfs_lock *inode_lock = NULL;
 	struct scoutfs_lock *orph_lock = NULL;
 	struct scoutfs_inode_info *si;
+	struct kc_timespec cur_time;
 	LIST_HEAD(ind_locks);
 	int ret;

@@ -1955,8 +1969,11 @@ static int scoutfs_tmpfile(KC_VFS_NS_DEF
 	if (ret < 0)
 		goto out; /* XXX returning error but items created */

-	inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
-	si->crtime = inode->i_mtime;
+	cur_time = current_time(inode);
+	inode_set_mtime_to_ts(inode, cur_time);
+	inode_set_ctime_to_ts(inode, cur_time);
+	inode_set_atime_to_ts(inode, cur_time);
+	si->crtime = inode_get_mtime(inode);
 	insert_inode_hash(inode);
 	ihold(inode); /* need to update inode modifications in d_tmpfile */
 #ifdef KC_D_TMPFILE_DENTRY
@@ -2006,7 +2023,11 @@ const struct inode_operations scoutfs_symlink_iops = {
 #ifdef KC_LINUX_HAVE_RHEL_IOPS_WRAPPER
 	.removexattr	= generic_removexattr,
 #endif
+#ifdef KC_GET_INODE_ACL
+	.get_inode_acl	= scoutfs_get_acl,
+#else
 	.get_acl	= scoutfs_get_acl,
+#endif
 #ifndef KC_LINUX_HAVE_RHEL_IOPS_WRAPPER
 	.tmpfile	= scoutfs_tmpfile,
 	.rename		= scoutfs_rename_common,
@@ -2020,7 +2041,11 @@ const struct inode_operations scoutfs_symlink_iops = {
 };

 const struct file_operations scoutfs_dir_fops = {
+#ifdef KC_HAVE_ITERATE_SHARED
+	.iterate_shared	= scoutfs_readdir,
+#else
 	.iterate	= scoutfs_readdir,
+#endif
 #ifdef KC_FMODE_KABI_ITERATE
 	.open		= scoutfs_dir_open,
 #endif
@@ -2052,8 +2077,12 @@ const struct inode_operations scoutfs_dir_iops = {
 	.removexattr	= generic_removexattr,
 #endif
 	.listxattr	= scoutfs_listxattr,
+#ifdef KC_GET_INODE_ACL
+	.get_inode_acl	= scoutfs_get_acl,
+#else
 	.get_acl	= scoutfs_get_acl,
-#ifdef KC_GET_ACL_DENTRY
+#endif
+#ifdef KC_SET_ACL_DENTRY
 	.set_acl	= scoutfs_set_acl,
 #endif
 	.symlink	= scoutfs_symlink,
--- a/kmod/src/fence.c
+++ b/kmod/src/fence.c
@@ -217,6 +217,9 @@ static struct attribute *fence_attrs[] = {
 	SCOUTFS_ATTR_PTR(rid),
 	NULL,
 };
+#ifdef KC_KOBJECT_DEFAULT_GROUPS
+ATTRIBUTE_GROUPS(fence);
+#endif

 #define FENCE_TIMEOUT_MS (MSEC_PER_SEC * 30)

@@ -255,7 +258,8 @@ int scoutfs_fence_start(struct super_block *sb, u64 rid, __be32 ipv4_addr, int r
 	fence->rid = rid;

 	ret = scoutfs_sysfs_create_attrs_parent(sb, &fi->kset->kobj,
-						&fence->ssa, fence_attrs,
+						&fence->ssa,
+						KC_KOBJ_DEFAULT(fence),
 						"%016llx", rid);
 	if (ret < 0) {
 		kfree(fence);
--- a/kmod/src/forest.c
+++ b/kmod/src/forest.c
@@ -239,9 +239,9 @@ static int forest_read_items(struct super_block *sb, struct scoutfs_key *key, u6
 * to reset their state and retry with a newer version of the btrees.
 */
 int scoutfs_forest_read_items_roots(struct super_block *sb, struct scoutfs_net_roots *roots,
-				    struct scoutfs_key *key, struct scoutfs_key *bloom_key,
-				    struct scoutfs_key *start, struct scoutfs_key *end,
-				    scoutfs_forest_item_cb cb, void *arg)
+				    u64 merge_input_seq, struct scoutfs_key *key,
+				    struct scoutfs_key *bloom_key, struct scoutfs_key *start,
+				    struct scoutfs_key *end, scoutfs_forest_item_cb cb, void *arg)
 {
 	struct forest_read_items_data rid = {
 		.cb = cb,
@@ -317,15 +317,17 @@ int scoutfs_forest_read_items_roots(struct super_block *sb, struct scoutfs_net_r

 		scoutfs_inc_counter(sb, forest_bloom_pass);

-		if ((le64_to_cpu(lt.flags) & SCOUTFS_LOG_TREES_FINALIZED))
-			rid.fic |= FIC_FINALIZED;
+		if ((le64_to_cpu(lt.flags) & SCOUTFS_LOG_TREES_FINALIZED) &&
+		    (merge_input_seq == 0 ||
+		     le64_to_cpu(lt.finalize_seq) < merge_input_seq))
+			rid.fic |= FIC_MERGE_INPUT;

 		ret = scoutfs_btree_read_items(sb, &lt.item_root, key, start,
 					       end, forest_read_items, &rid);
 		if (ret < 0)
 			goto out;

-		rid.fic &= ~FIC_FINALIZED;
+		rid.fic &= ~FIC_MERGE_INPUT;
 	}

 	ret = 0;
@@ -345,7 +347,7 @@ int scoutfs_forest_read_items(struct super_block *sb,

 	ret = scoutfs_client_get_roots(sb, &roots);
 	if (ret == 0)
-		ret = scoutfs_forest_read_items_roots(sb, &roots, key, bloom_key, start, end,
+		ret = scoutfs_forest_read_items_roots(sb, &roots, 0, key, bloom_key, start, end,
 						      cb, arg);
 	return ret;
 }
@@ -793,7 +795,7 @@ out:
 	if (ret)
 		scoutfs_forest_destroy(sb);

-	return 0;
+	return ret;
 }

 void scoutfs_forest_start(struct super_block *sb)
--- a/kmod/src/forest.h
+++ b/kmod/src/forest.h
@@ -11,7 +11,7 @@ struct scoutfs_lock;
 /* caller gives an item to the callback */
 enum {
 	FIC_FS_ROOT = (1 << 0),
-	FIC_FINALIZED = (1 << 1),
+	FIC_MERGE_INPUT = (1 << 1),
 };
 typedef int (*scoutfs_forest_item_cb)(struct super_block *sb, struct scoutfs_key *key, u64 seq,
 				      u8 flags, void *val, int val_len, int fic, void *arg);
@@ -25,9 +25,9 @@ int scoutfs_forest_read_items(struct super_block *sb,
 			      struct scoutfs_key *end,
 			      scoutfs_forest_item_cb cb, void *arg);
 int scoutfs_forest_read_items_roots(struct super_block *sb, struct scoutfs_net_roots *roots,
-				    struct scoutfs_key *key, struct scoutfs_key *bloom_key,
-				    struct scoutfs_key *start, struct scoutfs_key *end,
-				    scoutfs_forest_item_cb cb, void *arg);
+				    u64 merge_input_seq, struct scoutfs_key *key,
+				    struct scoutfs_key *bloom_key, struct scoutfs_key *start,
+				    struct scoutfs_key *end, scoutfs_forest_item_cb cb, void *arg);
 int scoutfs_forest_set_bloom_bits(struct super_block *sb,
 				  struct scoutfs_lock *lock);
 void scoutfs_forest_set_max_seq(struct super_block *sb, u64 max_seq);
--- a/kmod/src/inode.c
+++ b/kmod/src/inode.c
@@ -149,8 +149,12 @@ static const struct inode_operations scoutfs_file_iops = {
 	.removexattr	= generic_removexattr,
 #endif
 	.listxattr	= scoutfs_listxattr,
+#ifdef KC_GET_INODE_ACL
+	.get_inode_acl	= scoutfs_get_acl,
+#else
 	.get_acl	= scoutfs_get_acl,
-#ifdef KC_GET_ACL_DENTRY
+#endif
+#ifdef KC_SET_ACL_DENTRY
 	.set_acl	= scoutfs_set_acl,
 #endif
 	.fiemap		= scoutfs_data_fiemap,
@@ -165,8 +169,12 @@ static const struct inode_operations scoutfs_special_iops = {
 	.removexattr	= generic_removexattr,
 #endif
 	.listxattr	= scoutfs_listxattr,
+#ifdef KC_GET_INODE_ACL
+	.get_inode_acl	= scoutfs_get_acl,
+#else
 	.get_acl	= scoutfs_get_acl,
-#ifdef KC_GET_ACL_DENTRY
+#endif
+#ifdef KC_SET_ACL_DENTRY
 	.set_acl	= scoutfs_set_acl,
 #endif
 };
@@ -267,12 +275,9 @@ static void load_inode(struct inode *inode, struct scoutfs_inode *cinode, int in
 	i_gid_write(inode, le32_to_cpu(cinode->gid));
 	inode->i_mode = le32_to_cpu(cinode->mode);
 	inode->i_rdev = le32_to_cpu(cinode->rdev);
-	inode->i_atime.tv_sec = le64_to_cpu(cinode->atime.sec);
-	inode->i_atime.tv_nsec = le32_to_cpu(cinode->atime.nsec);
-	inode->i_mtime.tv_sec = le64_to_cpu(cinode->mtime.sec);
-	inode->i_mtime.tv_nsec = le32_to_cpu(cinode->mtime.nsec);
-	inode->i_ctime.tv_sec = le64_to_cpu(cinode->ctime.sec);
-	inode->i_ctime.tv_nsec = le32_to_cpu(cinode->ctime.nsec);
+	inode_set_atime(inode, le64_to_cpu(cinode->atime.sec), le32_to_cpu(cinode->atime.nsec));
+	inode_set_mtime(inode, le64_to_cpu(cinode->mtime.sec), le32_to_cpu(cinode->mtime.nsec));
+	inode_set_ctime(inode, le64_to_cpu(cinode->ctime.sec), le32_to_cpu(cinode->ctime.nsec));

 	si->meta_seq = le64_to_cpu(cinode->meta_seq);
 	si->data_seq = le64_to_cpu(cinode->data_seq);
@@ -393,6 +398,7 @@ int scoutfs_getattr(KC_VFS_NS_DEF
 				 SCOUTFS_LKF_REFRESH_INODE, inode, &lock);
 	if (ret == 0) {
 		generic_fillattr(KC_VFS_INIT_NS
+				 KC_FILLATTR_REQUEST_MASK
 				 inode, stat);
 		scoutfs_unlock(sb, lock, SCOUTFS_LOCK_READ);
 	}
@@ -404,6 +410,7 @@ static int set_inode_size(struct inode *inode, struct scoutfs_lock *lock,
 {
 	struct scoutfs_inode_info *si = SCOUTFS_I(inode);
 	struct super_block *sb = inode->i_sb;
+	struct kc_timespec cur_time;
 	SCOUTFS_DECLARE_PER_TASK_ENTRY(pt_ent);
 	LIST_HEAD(ind_locks);
 	int ret;
@@ -426,7 +433,9 @@ static int set_inode_size(struct inode *inode, struct scoutfs_lock *lock,
 		scoutfs_inode_inc_data_version(inode);

 	truncate_setsize(inode, new_size);
-	inode->i_ctime = inode->i_mtime = current_time(inode);
+	cur_time = current_time(inode);
+	inode_set_ctime_to_ts(inode, cur_time);
+	inode_set_mtime_to_ts(inode, cur_time);
 	if (truncate)
 		si->flags |= SCOUTFS_INO_FLAG_TRUNCATE;
 	scoutfs_inode_set_data_seq(inode);
@@ -893,14 +902,14 @@ static void store_inode(struct scoutfs_inode *cinode, struct inode *inode, int i
 	cinode->gid = cpu_to_le32(i_gid_read(inode));
 	cinode->mode = cpu_to_le32(inode->i_mode);
 	cinode->rdev = cpu_to_le32(inode->i_rdev);
-	cinode->atime.sec = cpu_to_le64(inode->i_atime.tv_sec);
-	cinode->atime.nsec = cpu_to_le32(inode->i_atime.tv_nsec);
+	cinode->atime.sec = cpu_to_le64(inode_get_atime_sec(inode));
+	cinode->atime.nsec = cpu_to_le32(inode_get_atime_nsec(inode));
 	memset(cinode->atime.__pad, 0, sizeof(cinode->atime.__pad));
-	cinode->ctime.sec = cpu_to_le64(inode->i_ctime.tv_sec);
-	cinode->ctime.nsec = cpu_to_le32(inode->i_ctime.tv_nsec);
+	cinode->ctime.sec = cpu_to_le64(inode_get_ctime_sec(inode));
+	cinode->ctime.nsec = cpu_to_le32(inode_get_ctime_nsec(inode));
 	memset(cinode->ctime.__pad, 0, sizeof(cinode->ctime.__pad));
-	cinode->mtime.sec = cpu_to_le64(inode->i_mtime.tv_sec);
-	cinode->mtime.nsec = cpu_to_le32(inode->i_mtime.tv_nsec);
+	cinode->mtime.sec = cpu_to_le64(inode_get_mtime_sec(inode));
+	cinode->mtime.nsec = cpu_to_le32(inode_get_mtime_nsec(inode));
 	memset(cinode->mtime.__pad, 0, sizeof(cinode->mtime.__pad));

 	cinode->meta_seq = cpu_to_le64(scoutfs_inode_meta_seq(inode));
@@ -1558,6 +1567,7 @@ int scoutfs_new_inode(struct super_block *sb, struct inode *dir, umode_t mode, d
 	struct scoutfs_inode sinode;
 	struct scoutfs_key key;
 	struct inode *inode;
+	struct kc_timespec cur_time;
 	int inode_bytes;
 	int ret;

@@ -1587,7 +1597,10 @@ int scoutfs_new_inode(struct super_block *sb, struct inode *dir, umode_t mode, d
 	inode_init_owner(KC_VFS_INIT_NS
 			 inode, dir, mode);
 	inode_set_bytes(inode, 0);
-	inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
+	cur_time = current_time(inode);
+	inode_set_mtime_to_ts(inode, cur_time);
+	inode_set_atime_to_ts(inode, cur_time);
+	inode_set_ctime_to_ts(inode, cur_time);
 	inode->i_rdev = rdev;
 	set_inode_ops(inode);

@@ -1637,10 +1650,14 @@ int scoutfs_inode_orphan_delete(struct super_block *sb, u64 ino, struct scoutfs_
 				struct scoutfs_lock *primary)
 {
 	struct scoutfs_key key;
+	int ret;

 	init_orphan_key(&key, ino);

-	return scoutfs_item_delete_force(sb, &key, lock, primary);
+	ret = scoutfs_item_delete_force(sb, &key, lock, primary);
+	trace_scoutfs_inode_orphan_delete(sb, ino, ret);
+
+	return ret;
 }

 /*
@@ -1722,6 +1739,8 @@ out:
 		scoutfs_release_trans(sb);
 	scoutfs_inode_index_unlock(sb, &ind_locks);

+	trace_scoutfs_delete_inode_end(sb, ino, mode, size, ret);
+
 	return ret;
 }

@@ -1817,6 +1836,9 @@ out:
 * they've checked that the inode could really be deleted.  We serialize
 * on a bit in the lock data so that we only have one deletion attempt
 * per inode under this mount's cluster lock.
+ *
+ * Returns -EAGAIN if we either did some cleanup work or are unable to finish
+ * cleaning up this inode right now.
 */
 static int try_delete_inode_items(struct super_block *sb, u64 ino)
 {
@@ -1830,6 +1852,8 @@ static int try_delete_inode_items(struct super_block *sb, u64 ino)
 	int bit_nr;
 	int ret;

+	trace_scoutfs_try_delete(sb, ino);
+
 	ret = scoutfs_lock_ino(sb, SCOUTFS_LOCK_WRITE, 0, ino, &lock);
 	if (ret < 0)
 		goto out;
@@ -1842,27 +1866,32 @@ static int try_delete_inode_items(struct super_block *sb, u64 ino)

 	/* only one local attempt per inode at a time */
 	if (test_and_set_bit(bit_nr, ldata->trying)) {
-		ret = 0;
+		trace_scoutfs_try_delete_local_busy(sb, ino);
+		ret = -EAGAIN;
 		goto out;
 	}
 	clear_trying = true;

 	/* can't delete if it's cached in local or remote mounts */
 	if (scoutfs_omap_test(sb, ino) || test_bit_le(bit_nr, ldata->map.bits)) {
-		ret = 0;
+		trace_scoutfs_try_delete_cached(sb, ino);
+		ret = -EAGAIN;
 		goto out;
 	}

 	scoutfs_inode_init_key(&key, ino);
 	ret = lookup_inode_item(sb, &key, &sinode, lock);
 	if (ret < 0) {
-		if (ret == -ENOENT)
+		if (ret == -ENOENT) {
+			trace_scoutfs_try_delete_no_item(sb, ino);
 			ret = 0;
+		}
 		goto out;
 	}

 	if (le32_to_cpu(sinode.nlink) > 0) {
-		ret = 0;
+		trace_scoutfs_try_delete_has_links(sb, ino, le32_to_cpu(sinode.nlink));
+		ret = -EAGAIN;
 		goto out;
 	}

@@ -1871,8 +1900,10 @@ static int try_delete_inode_items(struct super_block *sb, u64 ino)
 		goto out;

 	ret = delete_inode_items(sb, ino, &sinode, lock, orph_lock);
-	if (ret == 0)
+	if (ret == 0) {
+		ret = -EAGAIN;
 		scoutfs_inc_counter(sb, inode_deleted);
+	}

 out:
 	if (clear_trying)
@@ -2042,7 +2073,7 @@ void scoutfs_inode_schedule_orphan_dwork(struct super_block *sb)

 		low = (opts.orphan_scan_delay_ms * 80) / 100;
 		high = (opts.orphan_scan_delay_ms * 120) / 100;
-		delay = msecs_to_jiffies(low + prandom_u32_max(high - low)) ?: 1;
+		delay = msecs_to_jiffies(low + get_random_u32_below(high - low)) ?: 1;

 		mod_delayed_work(system_wq, &inf->orphan_scan_dwork, delay);
 	}
@@ -2074,6 +2105,10 @@ void scoutfs_inode_schedule_orphan_dwork(struct super_block *sb)
 * a locally cached inode.  Then we ask the server for the open map
 * containing the inode.  Only if we don't see any cached users do we do
 * the expensive work of acquiring locks to try and delete the items.
+ *
+ * We need to track whether there is any orphan cleanup work remaining so
+ * that tests such as inode-deletion can watch the orphan_scan_empty counter
+ * to determine when inode cleanup from open-unlink scenarios is complete.
 */
 static void inode_orphan_scan_worker(struct work_struct *work)
 {
@@ -2085,11 +2120,14 @@ static void inode_orphan_scan_worker(struct work_struct *work)
 	SCOUTFS_BTREE_ITEM_REF(iref);
 	struct scoutfs_key last;
 	struct scoutfs_key key;
+	bool work_todo = false;
 	u64 group_nr;
 	int bit_nr;
 	u64 ino;
 	int ret;

+	trace_scoutfs_orphan_scan_start(sb);
+
 	scoutfs_inc_counter(sb, orphan_scan);

 	init_orphan_key(&last, U64_MAX);
@@ -2109,8 +2147,10 @@ static void inode_orphan_scan_worker(struct work_struct *work)
 		init_orphan_key(&key, ino);
 		ret = scoutfs_btree_next(sb, &roots.fs_root, &key, &iref);
 		if (ret < 0) {
-			if (ret == -ENOENT)
+			if (ret == -ENOENT) {
+				trace_scoutfs_orphan_scan_work(sb, 0);
 				break;
+			}
 			goto out;
 		}

@@ -2125,6 +2165,7 @@ static void inode_orphan_scan_worker(struct work_struct *work)

 		/* locally cached inodes will try to delete as they evict */
 		if (scoutfs_omap_test(sb, ino)) {
+			work_todo = true;
 			scoutfs_inc_counter(sb, orphan_scan_cached);
 			continue;
 		}
@@ -2140,13 +2181,22 @@ static void inode_orphan_scan_worker(struct work_struct *work)

 		/* remote cached inodes will also try to delete */
 		if (test_bit_le(bit_nr, omap.bits)) {
+			work_todo = true;
 			scoutfs_inc_counter(sb, orphan_scan_omap_set);
 			continue;
 		}

 		/* seemingly orphaned and unused, get locks and check for sure */
 		scoutfs_inc_counter(sb, orphan_scan_attempts);
+		trace_scoutfs_orphan_scan_work(sb, ino);
+
 		ret = try_delete_inode_items(sb, ino);
+		if (ret == -EAGAIN) {
+			work_todo = true;
+			ret = 0;
+		}
+
+		trace_scoutfs_orphan_scan_end(sb, ino, ret);
 	}

 	ret = 0;
@@ -2155,6 +2205,11 @@ out:
 	if (ret < 0)
 		scoutfs_inc_counter(sb, orphan_scan_error);

+	if (!work_todo)
+		scoutfs_inc_counter(sb, orphan_scan_empty);
+
+	trace_scoutfs_orphan_scan_stop(sb, work_todo);
+
 	scoutfs_inode_schedule_orphan_dwork(sb);
 }

--- a/kmod/src/ioctl.c
+++ b/kmod/src/ioctl.c
@@ -415,8 +415,6 @@ static long scoutfs_ioc_data_wait_err(struct file *file, unsigned long arg)
 		return 0;
 	if ((args.op & SCOUTFS_IOC_DWO_UNKNOWN) || !IS_ERR_VALUE(args.err))
 		return -EINVAL;
-	if ((args.op & SCOUTFS_IOC_DWO_UNKNOWN) || !IS_ERR_VALUE(args.err))
-		return -EINVAL;

 	trace_scoutfs_ioc_data_wait_err(sb, &args);

@@ -1669,6 +1667,78 @@ out:
 	return ret;
 }

+static long scoutfs_ioc_punch_offline(struct file *file, unsigned long arg)
+{
+	struct inode *inode = file_inode(file);
+	struct super_block *sb = inode->i_sb;
+	struct scoutfs_ioctl_punch_offline __user *upo = (void __user *)arg;
+	struct scoutfs_ioctl_punch_offline po;
+	struct scoutfs_lock *lock = NULL;
+	u64 iblock;
+	u64 last;
+	u64 tmp;
+	int ret;
+
+	if (copy_from_user(&po, upo, sizeof(po)))
+		return -EFAULT;
+
+	if (po.len == 0)
+		return 0;
+
+	if (check_add_overflow(po.offset, po.len - 1, &tmp) ||
+	    (po.offset & SCOUTFS_BLOCK_SM_MASK) ||
+	    (po.len & SCOUTFS_BLOCK_SM_MASK))
+		return -EOVERFLOW;
+
+	if (po.flags)
+		return -EINVAL;
+
+	ret = mnt_want_write_file(file);
+	if (ret < 0)
+		return ret;
+
+	inode_lock(inode);
+
+	ret = scoutfs_lock_inode(sb, SCOUTFS_LOCK_WRITE,
+				 SCOUTFS_LKF_REFRESH_INODE, inode, &lock);
+	if (ret)
+		goto out;
+
+	if (!S_ISREG(inode->i_mode)) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	if (!(file->f_mode & FMODE_WRITE)) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	ret = inode_permission(KC_VFS_INIT_NS inode, MAY_WRITE);
+	if (ret < 0)
+		goto out;
+
+	if (scoutfs_inode_data_version(inode) != po.data_version) {
+		ret = -ESTALE;
+		goto out;
+	}
+
+	if ((ret = scoutfs_inode_check_retention(inode)))
+		goto out;
+
+	iblock = po.offset >> SCOUTFS_BLOCK_SM_SHIFT;
+	last = (po.offset + po.len - 1) >> SCOUTFS_BLOCK_SM_SHIFT;
+
+	ret = scoutfs_data_punch_offline(inode, iblock, last, po.data_version, lock);
+
+out:
+	scoutfs_unlock(sb, lock, SCOUTFS_LOCK_WRITE);
+	inode_unlock(inode);
+	mnt_drop_write_file(file);
+
+	return ret;
+}
+
 long scoutfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 {
 	switch (cmd) {
@@ -1718,6 +1788,8 @@ long scoutfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 		return scoutfs_ioc_mod_quota_rule(file, arg, false);
 	case SCOUTFS_IOC_READ_XATTR_INDEX:
 		return scoutfs_ioc_read_xattr_index(file, arg);
+	case SCOUTFS_IOC_PUNCH_OFFLINE:
+		return scoutfs_ioc_punch_offline(file, arg);
 	}

 	return -ENOTTY;
--- a/kmod/src/ioctl.h
+++ b/kmod/src/ioctl.h
@@ -848,4 +848,32 @@ struct scoutfs_ioctl_read_xattr_index {
 #define SCOUTFS_IOC_READ_XATTR_INDEX \
 	_IOR(SCOUTFS_IOCTL_MAGIC, 23, struct scoutfs_ioctl_read_xattr_index)

+/*
+ * This is a limited and specific version of hole punching.  It's an
+ * archive layer operation that only converts unmapped offline extents
+ * into sparse extents.  It is intended to be used when restoring sparse
+ * files after the initial creation set the entire file size offline.
+ *
+ * The offset and len fields are in units of bytes and must be aligned
+ * to the small (4KiB) block size.  All regions of offline extents
+ * covered by the region will be converted into sparse online extents,
+ * including regions that straddle the boundaries of the region.  Any
+ * existing sparse extents in the region are ignored.
+ *
+ * The data_version must match the inode or EINVAL is returned.  The
+ * data_version is not modified by this operation.
+ *
+ * EINVAL is returned if any mapped extents are found in the region.  If
+ * an error is returned then partial progress may have been made.
+ */
+struct scoutfs_ioctl_punch_offline {
+	__u64 offset;
+	__u64 len;
+	__u64 data_version;
+	__u64 flags;
+};
+
+#define SCOUTFS_IOC_PUNCH_OFFLINE \
+	_IOW(SCOUTFS_IOCTL_MAGIC, 24, struct scoutfs_ioctl_punch_offline)
+
 #endif
--- a/kmod/src/item.c
+++ b/kmod/src/item.c
@@ -146,7 +146,7 @@ struct cached_item {
 	unsigned int val_len;
 	struct scoutfs_key key;
 	u64 seq;
-	char val[0];
+	char val[];
 };

 #define CACHED_ITEM_ALIGN 8
@@ -424,7 +424,7 @@ static struct cached_item *alloc_item(struct cached_page *pg,
 	item->seq = seq;

 	if (val_len)
-		memcpy(item->val, val, val_len);
+		memcpy(&item->val[0], val, val_len);

 	update_pg_max_seq(pg, item);

@@ -1999,7 +1999,7 @@ int scoutfs_item_update(struct super_block *sb, struct scoutfs_key *key,

 	if (val_len <= found->val_len) {
 		if (val_len)
-			memcpy(found->val, val, val_len);
+			memcpy(&found->val[0], val, val_len);
 		if (val_len < found->val_len)
 			pg->erased_bytes += item_val_bytes(found->val_len) -
 					    item_val_bytes(val_len);
@@ -2316,7 +2316,7 @@ int scoutfs_item_write_dirty(struct super_block *sb)
 			lst->seq = item->seq;
 			lst->flags = item->deletion ? SCOUTFS_ITEM_FLAG_DELETION : 0;
 			lst->val_len = item->val_len;
-			memcpy(lst->val, item->val, item->val_len);
+			memcpy(&lst->val[0], item->val, item->val_len);
 		}

 		spin_lock(&cinf->dirty_lock);
@@ -2626,10 +2626,10 @@ int scoutfs_item_setup(struct super_block *sb)

 	for_each_possible_cpu(cpu)
 		init_pcpu_pages(cinf, cpu);
-
-	KC_INIT_SHRINKER_FUNCS(&cinf->shrinker, item_cache_count_objects,
-			       item_cache_scan_objects);
-	KC_REGISTER_SHRINKER(&cinf->shrinker, "scoutfs-item:" SCSBF, SCSB_ARGS(sb));
+	KC_SETUP_SHRINKER(cinf->shrinker, cinf, 0, item_cache_count_objects,
+			  item_cache_scan_objects, "scoutfs-item:" SCSBF, SCSB_ARGS(sb));
+	if (KC_SHRINKER_IS_NULL(cinf->shrinker))
+		return -ENOMEM;
 #ifdef KC_CPU_NOTIFIER
        cinf->notifier.notifier_call = item_cpu_callback;
        register_hotcpu_notifier(&cinf->notifier);
@@ -2654,7 +2654,7 @@ void scoutfs_item_destroy(struct super_block *sb)
 #ifdef KC_CPU_NOTIFIER
 		unregister_hotcpu_notifier(&cinf->notifier);
 #endif
-		KC_UNREGISTER_SHRINKER(&cinf->shrinker);
+		KC_UNREGISTER_SHRINKER(cinf->shrinker);

 		for_each_possible_cpu(cpu)
 			drop_pcpu_pages(sb, cinf, cpu);
--- a/kmod/src/kernelcompat.c
+++ b/kmod/src/kernelcompat.c
@@ -147,3 +147,13 @@ unsigned long kc_list_lru_shrink_walk(struct list_lru *lru, struct shrink_contro
 }

 #endif
+
+#ifndef KC_FS_INODE_C_TIME_ACCESSOR
+struct timespec64 inode_set_ctime_current(struct inode *inode)
+{
+	struct timespec64 now = current_time(inode);
+
+	inode_set_ctime(inode, now.tv_sec, now.tv_nsec);
+	return now;
+}
+#endif
--- a/kmod/src/kernelcompat.h
+++ b/kmod/src/kernelcompat.h
@@ -142,25 +142,54 @@ struct timespec64 kc_current_time(struct inode *inode);
 #define kc_timespec timespec64
 #endif

-#ifndef KC_SHRINKER_SHRINK
+#ifdef KC_SHRINKER_ALLOC
+// el10+

-#define KC_DEFINE_SHRINKER(name) struct shrinker name
-#define KC_INIT_SHRINKER_FUNCS(name, countfn, scanfn) do {	\
-	__typeof__(name) _shrink = (name);			\
-	_shrink->count_objects = (countfn);			\
-	_shrink->scan_objects = (scanfn);			\
-	_shrink->seeks = DEFAULT_SEEKS;			\
+#define KC_DEFINE_SHRINKER(name) struct shrinker *(name)
+#define KC_SHRINKER_CONTAINER_OF(ptr, type) ptr->private_data
+#define KC_SETUP_SHRINKER(ptr, priv, flags, countfn, scanfn, fmt, args)	\
+do {								\
+	ptr = shrinker_alloc(flags, fmt, args);			\
+	if (ptr) {						\
+		ptr->private_data = (priv);			\
+		ptr->seeks = DEFAULT_SEEKS;			\
+		ptr->count_objects = countfn;			\
+		ptr->scan_objects = scanfn;			\
+		shrinker_register(ptr);				\
+	}							\
 } while (0)
+#define KC_UNREGISTER_SHRINKER(ptr) shrinker_free(ptr)
+#define KC_SHRINKER_FN(ptr) (ptr)
+#define KC_SHRINKER_IS_NULL(ptr) (!(ptr))

+#else /* KC_SHRINKER_ALLOC */
+#ifndef KC_SHRINKER_SHRINK
+// el9, el8
+
+#define KC_DEFINE_SHRINKER(name) struct shrinker (name)
 #define KC_SHRINKER_CONTAINER_OF(ptr, type) container_of(ptr, type, shrinker)
 #ifdef KC_SHRINKER_NAME
-#define KC_REGISTER_SHRINKER register_shrinker
+#define KC_SETUP_SHRINKER(ptr, priv, flags, countfn, scanfn, fmt, args)	\
+do {								\
+	(ptr).count_objects = (countfn);			\
+	(ptr).scan_objects = (scanfn);				\
+	(ptr).seeks = DEFAULT_SEEKS;				\
+	register_shrinker(&(ptr), fmt, args);			\
+} while (0)
 #else
-#define KC_REGISTER_SHRINKER(ptr, fmt, ...) (register_shrinker(ptr))
+#define KC_SETUP_SHRINKER(ptr, priv, flags, countfn, scanfn, fmt, args)	\
+do {								\
+	(ptr).count_objects = (countfn);			\
+	(ptr).scan_objects = (scanfn);				\
+	(ptr).seeks = DEFAULT_SEEKS;				\
+	register_shrinker(&(ptr));				\
+} while (0)
 #endif /* KC_SHRINKER_NAME */
-#define KC_UNREGISTER_SHRINKER(ptr) (unregister_shrinker(ptr))
-#define KC_SHRINKER_FN(ptr) (ptr)
-#else
+#define KC_UNREGISTER_SHRINKER(ptr) (unregister_shrinker(&(ptr)))
+#define KC_SHRINKER_FN(ptr) (&ptr)
+
+#else /* KC_SHRINKER_SHRINK */
+// el7

 #include <linux/shrinker.h>
 #ifndef SHRINK_STOP
@@ -176,19 +205,21 @@ struct kc_shrinker_wrapper {
 };

 #define KC_DEFINE_SHRINKER(name) struct kc_shrinker_wrapper name;
-#define KC_INIT_SHRINKER_FUNCS(name, countfn, scanfn) do {	\
-	struct kc_shrinker_wrapper *_wrap = (name);		\
-	_wrap->count_objects = (countfn);			\
-	_wrap->scan_objects = (scanfn);				\
-	_wrap->shrink.shrink = kc_shrink_wrapper_fn;		\
-	_wrap->shrink.seeks = DEFAULT_SEEKS;			\
-} while (0)
 #define KC_SHRINKER_CONTAINER_OF(ptr, type) container_of(container_of(ptr, struct kc_shrinker_wrapper, shrink), type, shrinker)
-#define KC_REGISTER_SHRINKER(ptr, fmt, ...) (register_shrinker(ptr.shrink))
-#define KC_UNREGISTER_SHRINKER(ptr) (unregister_shrinker(ptr.shrink))
-#define KC_SHRINKER_FN(ptr) (ptr.shrink)
+#define KC_SETUP_SHRINKER(ptr, priv, flags, countfn, scanfn, fmt, args)	\
+do {								\
+	(ptr).count_objects = (countfn);			\
+	(ptr).scan_objects = (scanfn);				\
+	(ptr).shrink.shrink = kc_shrink_wrapper_fn;		\
+	(ptr).shrink.seeks = DEFAULT_SEEKS;			\
+	register_shrinker(&(ptr).shrink);			\
+} while (0)
+#define KC_UNREGISTER_SHRINKER(ptr) (unregister_shrinker(&(ptr).shrink))
+#define KC_SHRINKER_FN(ptr) (&(ptr).shrink)

 #endif /* KC_SHRINKER_SHRINK */
+#define KC_SHRINKER_IS_NULL(ptr) (0)
+#endif /* KC_SHRINKER_ALLOC */

 #ifdef KC_KERNEL_GETSOCKNAME_ADDRLEN
 #include <linux/net.h>
@@ -279,6 +310,12 @@ typedef unsigned int blk_opf_t;
 #endif
 #endif /* KC_VFS_METHOD_MNT_IDMAP_ARG */

+#ifdef KC_GENERIC_FILLATTR_REQUEST_MASK
+#define KC_FILLATTR_REQUEST_MASK request_mask,
+#else
+#define KC_FILLATTR_REQUEST_MASK
+#endif
+
 #ifdef KC_BIO_ALLOC_DEV_OPF_ARGS
 #define kc_bio_alloc bio_alloc
 #else
@@ -452,6 +489,7 @@ unsigned long kc_list_lru_shrink_walk(struct list_lru *lru, struct shrink_contro
 				      kc_list_lru_walk_cb_t isolate, void *cb_arg);
 #else
 #define kc_list_lru_shrink_walk list_lru_shrink_walk
+#define kc_list_lru_walk list_lru_walk
 #endif

 #if defined(KC_LIST_LRU_WALK_CB_ITEM_LOCK)
@@ -489,4 +527,133 @@ static inline void stack_trace_print(unsigned long *entries, unsigned int nr_ent
 }
 #endif

+#ifndef KC_HAVE_GET_RANDOM_U32_BELOW
+#define get_random_u32_below prandom_u32_max
+#endif
+
+#ifndef KC_FS_INODE_C_TIME_ACCESSOR
+struct timespec64 inode_set_ctime_current(struct inode *inode);
+static inline struct timespec64 inode_set_ctime_to_ts(struct inode *inode,
+						      struct timespec64 ts)
+{
+	inode->i_ctime.tv_sec = ts.tv_sec;
+	inode->i_ctime.tv_nsec = ts.tv_nsec;
+	return ts;
+}
+
+static inline struct timespec64 inode_set_ctime(struct inode *inode,
+						time64_t sec, long nsec)
+{
+	struct timespec64 ts = { .tv_sec  = sec,
+				 .tv_nsec = nsec };
+
+	return inode_set_ctime_to_ts(inode, ts);
+}
+
+static inline struct timespec64 inode_get_ctime(const struct inode *inode)
+{
+	struct timespec64 ts = { .tv_sec  = inode->i_ctime.tv_sec,
+				 .tv_nsec = inode->i_ctime.tv_nsec };
+	return ts;
+}
+#endif
+
+#ifndef KC_FS_INODE_AM_TIME_ACCESSOR
+static inline struct timespec64 inode_get_mtime(const struct inode *inode)
+{
+	struct timespec64 ts = { .tv_sec  = inode->i_mtime.tv_sec,
+				 .tv_nsec = inode->i_mtime.tv_nsec };
+	return ts;
+}
+
+static inline struct timespec64 inode_set_mtime_to_ts(struct inode *inode,
+						      struct timespec64 ts)
+{
+	inode->i_mtime.tv_sec = ts.tv_sec;
+	inode->i_mtime.tv_nsec = ts.tv_nsec;
+	return ts;
+}
+
+static inline struct timespec64 inode_set_mtime(struct inode *inode,
+						time64_t sec, long nsec)
+{
+	struct timespec64 ts = { .tv_sec  = sec,
+				 .tv_nsec = nsec };
+
+	return inode_set_mtime_to_ts(inode, ts);
+}
+
+static inline struct timespec64 inode_set_atime_to_ts(struct inode *inode,
+						      struct timespec64 ts)
+{
+	inode->i_atime.tv_sec = ts.tv_sec;
+	inode->i_atime.tv_nsec = ts.tv_nsec;
+	return ts;
+}
+
+static inline struct timespec64 inode_set_atime(struct inode *inode,
+						time64_t sec, long nsec)
+{
+	struct timespec64 ts = { .tv_sec  = sec,
+				 .tv_nsec = nsec };
+
+	return inode_set_atime_to_ts(inode, ts);
+}
+
+static inline time64_t inode_get_ctime_sec(const struct inode *inode)
+{
+	return inode->i_ctime.tv_sec;
+}
+static inline long inode_get_ctime_nsec(const struct inode *inode)
+{
+	return inode->i_ctime.tv_nsec;
+}
+static inline time64_t inode_get_mtime_sec(const struct inode *inode)
+{
+	return inode->i_mtime.tv_sec;
+}
+static inline long inode_get_mtime_nsec(const struct inode *inode)
+{
+	return inode->i_mtime.tv_nsec;
+}
+static inline time64_t inode_get_atime_sec(const struct inode *inode)
+{
+	return inode->i_atime.tv_sec;
+}
+static inline long inode_get_atime_nsec(const struct inode *inode)
+{
+	return inode->i_atime.tv_nsec;
+}
+#endif
+
+#ifdef KC_HAVE_BD_INODE
+#define KC_BDEV_INODE(b) (b)->bd_inode
+#define KC_BDEV_MAPPING(b) (b)->bd_inode->i_mapping
+#else
+#define KC_BDEV_INODE(b) (b)->bd_mapping->host
+#define KC_BDEV_MAPPING(b) (b)->bd_mapping
+#endif
+
+#ifdef KC_HAVE_ASSIGN_STR_PARMS
+#define kc__assign_str(a, b) __assign_str(a, b)
+#else
+#define kc__assign_str(a, b) __assign_str(a)
+#endif
+
+#ifdef KC_KOBJECT_DEFAULT_GROUPS
+#define KC_KOBJ_DEFAULT_OP default_groups
+#define KC_KOBJ_DEFAULT(name) (name##_groups)
+#define KC_KOBJ_DEFAULT_PICK(group, attrs) (group)
+#else
+#define KC_KOBJ_DEFAULT_OP default_attrs
+#define KC_KOBJ_DEFAULT(name) (name##_attrs)
+#define KC_KOBJ_DEFAULT_PICK(group, attrs) (attrs)
+#endif
+
+#ifdef KC_BLOCK_WRITE_BEGIN_FOLIO
+#define KC_PAGE_OR_FOLIO(p, f) f
+#else
+#define KC_PAGE_OR_FOLIO(p, f) p
+#endif
+
 #endif
--- a/kmod/src/lock.c
+++ b/kmod/src/lock.c
@@ -53,8 +53,10 @@
 * all access to the lock (by revoking it down to a null mode) then the
 * lock is freed.
 *
- * Memory pressure on the client can cause the client to request a null
- * mode from the server so that once its granted the lock can be freed.
+ * Each client has a configurable number of locks that are allowed to
+ * remain idle after being granted, for use by future tasks.  Past the
+ * limit locks are freed by requesting a null mode from the server,
+ * governed by a LRU.
 *
 * So far we've only needed a minimal trylock.  We return -EAGAIN if a
 * lock attempt can't immediately match an existing granted lock.  This
@@ -79,14 +81,11 @@ struct lock_info {
 	bool unmounting;
 	struct rb_root lock_tree;
 	struct rb_root lock_range_tree;
-	KC_DEFINE_SHRINKER(shrinker);
+	u64 nr_locks;
 	struct list_head lru_list;
-	unsigned long long lru_nr;
 	struct workqueue_struct *workq;
 	struct work_struct inv_work;
 	struct list_head inv_list;
-	struct work_struct shrink_work;
-	struct list_head shrink_list;
 	atomic64_t next_refresh_gen;

 	struct dentry *tseq_dentry;
@@ -249,7 +248,6 @@ static void lock_free(struct lock_info *linfo, struct scoutfs_lock *lock)
 	BUG_ON(!RB_EMPTY_NODE(&lock->range_node));
 	BUG_ON(!list_empty(&lock->lru_head));
 	BUG_ON(!list_empty(&lock->inv_head));
-	BUG_ON(!list_empty(&lock->shrink_head));
 	BUG_ON(!list_empty(&lock->cov_list));

 	kfree(lock->inode_deletion_data);
@@ -277,7 +275,6 @@ static struct scoutfs_lock *lock_alloc(struct super_block *sb,
 	INIT_LIST_HEAD(&lock->lru_head);
 	INIT_LIST_HEAD(&lock->inv_head);
 	INIT_LIST_HEAD(&lock->inv_list);
-	INIT_LIST_HEAD(&lock->shrink_head);
 	spin_lock_init(&lock->cov_list_lock);
 	INIT_LIST_HEAD(&lock->cov_list);

@@ -410,6 +407,7 @@ static bool lock_insert(struct super_block *sb, struct scoutfs_lock *ins)
 	rb_link_node(&ins->node, parent, node);
 	rb_insert_color(&ins->node, &linfo->lock_tree);

+	linfo->nr_locks++;
 	scoutfs_tseq_add(&linfo->tseq_tree, &ins->tseq_entry);

 	return true;
@@ -424,6 +422,7 @@ static void lock_remove(struct lock_info *linfo, struct scoutfs_lock *lock)
 	rb_erase(&lock->range_node, &linfo->lock_range_tree);
 	RB_CLEAR_NODE(&lock->range_node);

+	linfo->nr_locks--;
 	scoutfs_tseq_del(&linfo->tseq_tree, &lock->tseq_entry);
 }

@@ -463,10 +462,8 @@ static void __lock_del_lru(struct lock_info *linfo, struct scoutfs_lock *lock)
 {
 	assert_spin_locked(&linfo->lock);

-	if (!list_empty(&lock->lru_head)) {
+	if (!list_empty(&lock->lru_head))
 		list_del_init(&lock->lru_head);
-		linfo->lru_nr--;
-	}
 }

 /*
@@ -525,14 +522,16 @@ static struct scoutfs_lock *create_lock(struct super_block *sb,
 * indicate that the lock wasn't idle.  If it really is idle then we
 * either free it if it's null or put it back on the lru.
 */
-static void put_lock(struct lock_info *linfo,struct scoutfs_lock *lock)
+static void __put_lock(struct lock_info *linfo, struct scoutfs_lock *lock, bool tail)
 {
 	assert_spin_locked(&linfo->lock);

 	if (lock_idle(lock)) {
 		if (lock->mode != SCOUTFS_LOCK_NULL) {
-			list_add_tail(&lock->lru_head, &linfo->lru_list);
-			linfo->lru_nr++;
+			if (tail)
+				list_add_tail(&lock->lru_head, &linfo->lru_list);
+			else
+				list_add(&lock->lru_head, &linfo->lru_list);
 		} else {
 			lock_remove(linfo, lock);
 			lock_free(linfo, lock);
@@ -540,6 +539,11 @@ static void put_lock(struct lock_info *linfo,struct scoutfs_lock *lock)
 	}
 }

+static inline void put_lock(struct lock_info *linfo, struct scoutfs_lock *lock)
+{
+	__put_lock(linfo, lock, true);
+}
+
 /*
 * The caller has made a change (set a lock mode) which can let one of the
 * invalidating locks make forward progress.
@@ -713,14 +717,14 @@ static void lock_invalidate_worker(struct work_struct *work)
 		/* only lock protocol, inv can't call subsystems after shutdown */
 		if (!linfo->shutdown) {
 			ret = lock_invalidate(sb, lock, nl->old_mode, nl->new_mode);
-			BUG_ON(ret);
+			BUG_ON(ret < 0 && ret != -ENOLINK);
 		}

 		/* respond with the key and modes from the request, server might have died */
 		ret = scoutfs_client_lock_response(sb, ireq->net_id, nl);
 		if (ret == -ENOTCONN)
 			ret = 0;
-		BUG_ON(ret);
+		BUG_ON(ret < 0 && ret != -ENOLINK);

 		scoutfs_inc_counter(sb, lock_invalidate_response);
 	}
@@ -875,6 +879,69 @@ int scoutfs_lock_recover_request(struct super_block *sb, u64 net_id,
 	return ret;
 }

+/*
+ * This is called on every _lock call to try and keep the number of
+ * locks under the idle count.  We're intentionally trying to throttle
+ * shrinking bursts by tying its frequency to lock use.  It will only
+ * send requests to free unused locks, though, so it's always possible
+ * to exceed the high water mark under heavy load.
+ *
+ * We send a null request and the lock will be freed by the response
+ * once all users drain.  If this races with invalidation then the
+ * server will only send the grant response once the invalidation is
+ * finished.
+ */
+static bool try_shrink_lock(struct super_block *sb, struct lock_info *linfo, bool force)
+{
+	struct scoutfs_mount_options opts;
+	struct scoutfs_lock *lock = NULL;
+	struct scoutfs_net_lock nl;
+	int ret = 0;
+
+	scoutfs_options_read(sb, &opts);
+
+	/* avoiding lock contention with unsynchronized test, don't mind temp false results */
+	if (!force && (list_empty(&linfo->lru_list) ||
+	               READ_ONCE(linfo->nr_locks) <= opts.lock_idle_count))
+		return false;
+
+	spin_lock(&linfo->lock);
+
+	lock = list_first_entry_or_null(&linfo->lru_list, struct scoutfs_lock, lru_head);
+	if (lock && (force || (linfo->nr_locks > opts.lock_idle_count))) {
+		__lock_del_lru(linfo, lock);
+		lock->request_pending = 1;
+
+		nl.key = lock->start;
+		nl.old_mode = lock->mode;
+		nl.new_mode = SCOUTFS_LOCK_NULL;
+	} else {
+		lock = NULL;
+	}
+
+	spin_unlock(&linfo->lock);
+
+	if (lock) {
+		ret = scoutfs_client_lock_request(sb, &nl);
+		if (ret < 0) {
+			scoutfs_inc_counter(sb, lock_shrink_request_failed);
+
+			spin_lock(&linfo->lock);
+
+			lock->request_pending = 0;
+			wake_up(&lock->waitq);
+			__put_lock(linfo, lock, false);
+
+			spin_unlock(&linfo->lock);
+		} else {
+			scoutfs_inc_counter(sb, lock_shrink_attempted);
+			trace_scoutfs_lock_shrink(sb, lock);
+		}
+	}
+
+	return lock && ret == 0;
+}
+
 static bool lock_wait_cond(struct super_block *sb, struct scoutfs_lock *lock,
 			   enum scoutfs_lock_mode mode)
 {
@@ -937,6 +1004,8 @@ static int lock_key_range(struct super_block *sb, enum scoutfs_lock_mode mode, i
 	if (WARN_ON_ONCE(scoutfs_trans_held()))
 		return -EDEADLK;

+	try_shrink_lock(sb, linfo, false);
+
 	spin_lock(&linfo->lock);

 	/* drops and re-acquires lock if it allocates */
@@ -1380,134 +1449,12 @@ bool scoutfs_lock_protected(struct scoutfs_lock *lock, struct scoutfs_key *key,
 					  &lock->start, &lock->end) == 0;
 }

-/*
- * The shrink callback got the lock, marked it request_pending, and put
- * it on the shrink list.  We send a null request and the lock will be
- * freed by the response once all users drain.  If this races with
- * invalidation then the server will only send the grant response once
- * the invalidation is finished.
- */
-static void lock_shrink_worker(struct work_struct *work)
-{
-	struct lock_info *linfo = container_of(work, struct lock_info,
-					       shrink_work);
-	struct super_block *sb = linfo->sb;
-	struct scoutfs_net_lock nl;
-	struct scoutfs_lock *lock;
-	struct scoutfs_lock *tmp;
-	LIST_HEAD(list);
-	int ret;
-
-	scoutfs_inc_counter(sb, lock_shrink_work);
-
-	spin_lock(&linfo->lock);
-	list_splice_init(&linfo->shrink_list, &list);
-	spin_unlock(&linfo->lock);
-
-	list_for_each_entry_safe(lock, tmp, &list, shrink_head) {
-		list_del_init(&lock->shrink_head);
-
-		/* unlocked lock access, but should be stable since we queued */
-		nl.key = lock->start;
-		nl.old_mode = lock->mode;
-		nl.new_mode = SCOUTFS_LOCK_NULL;
-
-		ret = scoutfs_client_lock_request(sb, &nl);
-		if (ret) {
-			/* oh well, not freeing */
-			scoutfs_inc_counter(sb, lock_shrink_aborted);
-
-			spin_lock(&linfo->lock);
-
-			lock->request_pending = 0;
-			wake_up(&lock->waitq);
-			put_lock(linfo, lock);
-
-			spin_unlock(&linfo->lock);
-		}
-	}
-}
-
-static unsigned long lock_count_objects(struct shrinker *shrink,
-					struct shrink_control *sc)
-{
-	struct lock_info *linfo = KC_SHRINKER_CONTAINER_OF(shrink, struct lock_info);
-	struct super_block *sb = linfo->sb;
-
-	scoutfs_inc_counter(sb, lock_count_objects);
-
-	return shrinker_min_long(linfo->lru_nr);
-}
-
-/*
- * Start the shrinking process for locks on the lru.  If a lock is on
- * the lru then it can't have any active users.  We don't want to block
- * or allocate here so all we do is get the lock, mark it request
- * pending, and kick off the work.  The work sends a null request and
- * eventually the lock is freed by its response.
- *
- * Only a racing lock attempt that isn't matched can prevent the lock
- * from being freed.  It'll block waiting to send its request for its
- * mode which will prevent the lock from being freed when the null
- * response arrives.
- */
-static unsigned long lock_scan_objects(struct shrinker *shrink,
-				       struct shrink_control *sc)
-{
-	struct lock_info *linfo = KC_SHRINKER_CONTAINER_OF(shrink, struct lock_info);
-	struct super_block *sb = linfo->sb;
-	struct scoutfs_lock *lock;
-	struct scoutfs_lock *tmp;
-	unsigned long freed = 0;
-	unsigned long nr = sc->nr_to_scan;
-	bool added = false;
-
-	scoutfs_inc_counter(sb, lock_scan_objects);
-
-	spin_lock(&linfo->lock);
-
-restart:
-	list_for_each_entry_safe(lock, tmp, &linfo->lru_list, lru_head) {
-
-		BUG_ON(!lock_idle(lock));
-		BUG_ON(lock->mode == SCOUTFS_LOCK_NULL);
-		BUG_ON(!list_empty(&lock->shrink_head));
-
-		if (nr-- == 0)
-			break;
-
-		__lock_del_lru(linfo, lock);
-		lock->request_pending = 1;
-		list_add_tail(&lock->shrink_head, &linfo->shrink_list);
-		added = true;
-		freed++;
-
-		scoutfs_inc_counter(sb, lock_shrink_attempted);
-		trace_scoutfs_lock_shrink(sb, lock);
-
-		/* could have bazillions of idle locks */
-		if (cond_resched_lock(&linfo->lock))
-			goto restart;
-	}
-
-	spin_unlock(&linfo->lock);
-
-	if (added)
-		queue_work(linfo->workq, &linfo->shrink_work);
-
-	trace_scoutfs_lock_shrink_exit(sb, sc->nr_to_scan, freed);
-	return freed;
-}
-
 void scoutfs_free_unused_locks(struct super_block *sb)
 {
-	struct lock_info *linfo = SCOUTFS_SB(sb)->lock_info;
-	struct shrink_control sc = {
-		.gfp_mask = GFP_NOFS,
-		.nr_to_scan = INT_MAX,
-	};
+	DECLARE_LOCK_INFO(sb, linfo);

-	lock_scan_objects(KC_SHRINKER_FN(&linfo->shrinker), &sc);
+	while (try_shrink_lock(sb, linfo, true))
+		cond_resched();
 }

 static void lock_tseq_show(struct seq_file *m, struct scoutfs_tseq_entry *ent)
@@ -1590,10 +1537,10 @@ u64 scoutfs_lock_ino_refresh_gen(struct super_block *sb, u64 ino)
 * transitions and sending requests.   We set the shutdown flag to catch
 * anyone who breaks this rule.
 *
- * We unregister the shrinker so that we won't try and send null
- * requests in response to memory pressure.  The locks will all be
- * unceremoniously dropped once we get a farewell response from the
- * server which indicates that they destroyed our locking state.
+ * With no more lock callers, we'll no longer try to shrink the pool of
+ * granted locks.  We'll free all of them as _destroy() is called after
+ * the farewell response indicates that the server tore down all our
+ * lock state.
 *
 * We will still respond to invalidation requests that have to be
 * processed to let unmount in other mounts acquire locks and make
@@ -1613,10 +1560,6 @@ void scoutfs_lock_shutdown(struct super_block *sb)

 	trace_scoutfs_lock_shutdown(sb, linfo);

-	/* stop the shrinker from queueing work */
-	KC_UNREGISTER_SHRINKER(&linfo->shrinker);
-	flush_work(&linfo->shrink_work);
-
 	/* cause current and future lock calls to return errors */
 	spin_lock(&linfo->lock);
 	linfo->shutdown = true;
@@ -1707,8 +1650,6 @@ void scoutfs_lock_destroy(struct super_block *sb)
 			list_del_init(&lock->inv_head);
 			lock->invalidate_pending = 0;
 		}
-		if (!list_empty(&lock->shrink_head))
-			list_del_init(&lock->shrink_head);
 		lock_remove(linfo, lock);
 		lock_free(linfo, lock);
 	}
@@ -1733,14 +1674,9 @@ int scoutfs_lock_setup(struct super_block *sb)
 	spin_lock_init(&linfo->lock);
 	linfo->lock_tree = RB_ROOT;
 	linfo->lock_range_tree = RB_ROOT;
-	KC_INIT_SHRINKER_FUNCS(&linfo->shrinker, lock_count_objects,
-			       lock_scan_objects);
-	KC_REGISTER_SHRINKER(&linfo->shrinker, "scoutfs-lock:" SCSBF, SCSB_ARGS(sb));
 	INIT_LIST_HEAD(&linfo->lru_list);
 	INIT_WORK(&linfo->inv_work, lock_invalidate_worker);
 	INIT_LIST_HEAD(&linfo->inv_list);
-	INIT_WORK(&linfo->shrink_work, lock_shrink_worker);
-	INIT_LIST_HEAD(&linfo->shrink_list);
 	atomic64_set(&linfo->next_refresh_gen, 0);
 	scoutfs_tseq_tree_init(&linfo->tseq_tree, lock_tseq_show);

--- a/kmod/src/lock_server.c
+++ b/kmod/src/lock_server.c
@@ -506,6 +506,19 @@ out:
 * because we don't know which locks they'll hold.  Once recover
 * finishes the server calls us to kick all the locks that were waiting
 * during recovery.
+ *
+ * The calling server shuts down if we return errors indicating that we
+ * weren't able to ensure forward progress in the lock state machine.
+ *
+ * Failure to send to a disconnected client is not a fatal error.
+ * During normal disconnection the client's state is removed before
+ * their connection is destroyed.  We can't use state to try and send to
+ * a non-existing connection.  But a client that fails to reconnect is
+ * disconnected before being fenced.  If we have multiple disconnected
+ * clients we can try to send to one while cleaning up another.  If
+ * they've uncleanly disconnected their locks are going to be removed
+ * and the lock can make forward progress again.  Or we'll shutdown for
+ * failure to fence.
 */
 static int process_waiting_requests(struct super_block *sb,
 				    struct server_lock_node *snode)
@@ -597,6 +610,10 @@ static int process_waiting_requests(struct super_block *sb,
 out:
 	put_server_lock(inf, snode);

+	/* disconnected clients will be fenced, trying to send to them isn't fatal */
+	if (ret == -ENOTCONN)
+		ret = 0;
+
 	return ret;
 }

--- a/kmod/src/net.c
+++ b/kmod/src/net.c
@@ -21,6 +21,7 @@
 #include <net/tcp.h>
 #include <linux/log2.h>
 #include <linux/jhash.h>
+#include <linux/rbtree.h>

 #include "format.h"
 #include "counters.h"
@@ -125,6 +126,7 @@ struct message_send {
 	unsigned long dead:1;
 	struct list_head head;
 	scoutfs_net_response_t resp_func;
+	struct rb_node node;
 	void *resp_data;
 	struct scoutfs_net_header nh;
 };
@@ -161,49 +163,118 @@ static bool nh_is_request(struct scoutfs_net_header *nh)
 	return !nh_is_response(nh);
 }

+static int cmp_sorted_msend(u64 pos, struct message_send *msend)
+{
+	if (nh_is_request(&msend->nh))
+		return pos < le64_to_cpu(msend->nh.id) ? -1 :
+		       pos > le64_to_cpu(msend->nh.id) ? 1 : 0;
+	else
+		return pos < le64_to_cpu(msend->nh.seq) ? -1 :
+		       pos > le64_to_cpu(msend->nh.seq) ? 1 : 0;
+}
+
+static struct message_send *search_sorted_msends(struct rb_root *root, u64 pos, struct rb_node *ins)
+{
+	struct rb_node **node = &root->rb_node;
+	struct rb_node *parent = NULL;
+	struct message_send *msend = NULL;
+	struct message_send *next = NULL;
+	int cmp = -1;
+
+	while (*node) {
+		parent = *node;
+		msend = container_of(*node, struct message_send, node);
+
+		cmp = cmp_sorted_msend(pos, msend);
+		if (cmp < 0) {
+			next = msend;
+			node = &(*node)->rb_left;
+		} else if (cmp > 0) {
+			node = &(*node)->rb_right;
+		} else {
+			next = msend;
+			break;
+		}
+	}
+
+	BUG_ON(cmp == 0 && ins);
+
+	if (ins) {
+		rb_link_node(ins, parent, node);
+		rb_insert_color(ins, root);
+	}
+
+	return next;
+}
+
+static struct message_send *next_sorted_msend(struct message_send *msend)
+{
+	struct rb_node *node = rb_next(&msend->node);
+
+	return node ? rb_entry(node, struct message_send, node) : NULL;
+}
+
+#define for_each_sorted_msend(MSEND_, TMP_, ROOT_, POS_) \
+	for (MSEND_ = search_sorted_msends(ROOT_, POS_, NULL); \
+	     MSEND_ != NULL && ({ TMP_ = next_sorted_msend(MSEND_); true; }); \
+	     MSEND_ = TMP_)
+
+static void insert_sorted_msend(struct scoutfs_net_connection *conn, struct message_send *msend)
+{
+	BUG_ON(!RB_EMPTY_NODE(&msend->node));
+
+	if (nh_is_request(&msend->nh))
+		search_sorted_msends(&conn->req_root, le64_to_cpu(msend->nh.id), &msend->node);
+	else
+		search_sorted_msends(&conn->resp_root, le64_to_cpu(msend->nh.seq), &msend->node);
+}
+
+static void erase_sorted_msend(struct scoutfs_net_connection *conn, struct message_send *msend)
+{
+	if (!RB_EMPTY_NODE(&msend->node)) {
+		if (nh_is_request(&msend->nh))
+			rb_erase(&msend->node, &conn->req_root);
+		else
+			rb_erase(&msend->node, &conn->resp_root);
+		RB_CLEAR_NODE(&msend->node);
+	}
+}
+
+static void move_sorted_msends(struct scoutfs_net_connection *dst_conn, struct rb_root *dst_root,
+			       struct scoutfs_net_connection *src_conn, struct rb_root *src_root)
+{
+	struct message_send *msend;
+	struct message_send *tmp;
+
+	for_each_sorted_msend(msend, tmp, src_root, 0) {
+		erase_sorted_msend(src_conn, msend);
+		insert_sorted_msend(dst_conn, msend);
+	}
+}
+
 /*
- * We return dead requests so that the caller can stop searching other
- * lists for the dead request that we found.
+ * Pending requests are uniquely identified by the id they were assigned
+ * as they were first put on the send queue.
 */
-static struct message_send *search_list(struct scoutfs_net_connection *conn,
-					struct list_head *list,
-					u8 cmd, u64 id)
+static struct message_send *find_request(struct scoutfs_net_connection *conn, u8 cmd, u64 id)
 {
 	struct message_send *msend;

 	assert_spin_locked(&conn->lock);

-	list_for_each_entry(msend, list, head) {
-		if (nh_is_request(&msend->nh) && msend->nh.cmd == cmd &&
-		    le64_to_cpu(msend->nh.id) == id)
-			return msend;
-	}
-
-	return NULL;
-}
-
-/*
- * Find an active send request on the lists.  It's almost certainly
- * waiting on the resend queue but it could be actively being sent.
- */
-static struct message_send *find_request(struct scoutfs_net_connection *conn,
-					 u8 cmd, u64 id)
-{
-	struct message_send *msend;
-
-	msend = search_list(conn, &conn->resend_queue, cmd, id) ?:
-		search_list(conn, &conn->send_queue, cmd, id);
-	if (msend && msend->dead)
+	msend = search_sorted_msends(&conn->req_root, id, NULL);
+	if (msend && !(msend->nh.cmd == cmd && le64_to_cpu(msend->nh.id) == id))
 		msend = NULL;
+
 	return msend;
 }

 /*
- * Complete a send message by moving it to the send queue and marking it
- * to be freed.  It won't be visible to callers trying to find sends.
+ * Free a send message by moving it to the send queue and marking it
+ * dead.  It is removed from the sorted rb roots so it won't be visible
+ * as a request for response processing.
 */
-static void complete_send(struct scoutfs_net_connection *conn,
-			  struct message_send *msend)
+static void queue_dead_free(struct scoutfs_net_connection *conn, struct message_send *msend)
 {
 	assert_spin_locked(&conn->lock);

@@ -213,6 +284,7 @@ static void complete_send(struct scoutfs_net_connection *conn,

 	msend->dead = 1;
 	list_move(&msend->head, &conn->send_queue);
+	erase_sorted_msend(conn, msend);
 	queue_work(conn->workq, &conn->send_work);
 }

@@ -264,7 +336,7 @@ static inline u8 net_err_from_host(struct super_block *sb, int error)
 				     error);
 		}

-		return -EINVAL;
+		return SCOUTFS_NET_ERR_EINVAL;
 	}

 	return net_errs[ind];
@@ -370,6 +442,7 @@ static int submit_send(struct super_block *sb,
 	msend->resp_func = resp_func;
 	msend->resp_data = resp_data;
 	msend->dead = 0;
+	RB_CLEAR_NODE(&msend->node);

 	msend->nh.seq = cpu_to_le64(seq);
 	msend->nh.recv_seq = 0;  /* set when sent, not when queued */
@@ -390,6 +463,7 @@ static int submit_send(struct super_block *sb,
 	} else {
 		list_add_tail(&msend->head, &conn->resend_queue);
 	}
+	insert_sorted_msend(conn, msend);

 	if (id_ret)
 		*id_ret = le64_to_cpu(msend->nh.id);
@@ -459,7 +533,7 @@ static int process_response(struct scoutfs_net_connection *conn,
 	if (msend) {
 		resp_func = msend->resp_func;
 		resp_data = msend->resp_data;
-		complete_send(conn, msend);
+		queue_dead_free(conn, msend);
 	} else {
 		scoutfs_inc_counter(sb, net_dropped_response);
 	}
@@ -550,43 +624,21 @@ static void queue_ordered_proc(struct scoutfs_net_connection *conn, struct messa
 * Free live responses up to and including the seq by marking them dead
 * and moving them to the send queue to be freed.
 */
-static bool move_acked_responses(struct scoutfs_net_connection *conn,
-				 struct list_head *list, u64 seq)
+static void free_acked_responses(struct scoutfs_net_connection *conn, u64 seq)
 {
 	struct message_send *msend;
 	struct message_send *tmp;
-	bool moved = false;
-
-	assert_spin_locked(&conn->lock);
-
-	list_for_each_entry_safe(msend, tmp, list, head) {
-		if (le64_to_cpu(msend->nh.seq) > seq)
-			break;
-		if (!nh_is_response(&msend->nh) || msend->dead)
-			continue;
-
-		msend->dead = 1;
-		list_move(&msend->head, &conn->send_queue);
-		moved = true;
-	}
-
-	return moved;
-}
-
-/* acks are processed inline in the recv worker */
-static void free_acked_responses(struct scoutfs_net_connection *conn, u64 seq)
-{
-	bool moved;

 	spin_lock(&conn->lock);

-	moved = move_acked_responses(conn, &conn->send_queue, seq) |
-		move_acked_responses(conn, &conn->resend_queue, seq);
+	for_each_sorted_msend(msend, tmp, &conn->resp_root, 0) {
+		if (le64_to_cpu(msend->nh.seq) > seq)
+			break;
+
+		queue_dead_free(conn, msend);
+	}

 	spin_unlock(&conn->lock);
-
-	if (moved)
-		queue_work(conn->workq, &conn->send_work);
 }

 static int k_recvmsg(struct socket *sock, void *buf, unsigned len)
@@ -824,9 +876,11 @@ static int k_sendmsg_full(struct socket *sock, struct kvec *kv, unsigned long nr
 	return ret;
 }

-static void free_msend(struct net_info *ninf, struct message_send *msend)
+static void free_msend(struct net_info *ninf, struct scoutfs_net_connection *conn,
+		       struct message_send *msend)
 {
 	list_del_init(&msend->head);
+	erase_sorted_msend(conn, msend);
 	scoutfs_tseq_del(&ninf->msg_tseq_tree, &msend->tseq_entry);
 	kfree(msend);
 }
@@ -866,9 +920,10 @@ static void scoutfs_net_send_worker(struct work_struct *work)
 		count = 0;

 		spin_lock(&conn->lock);
+
 		list_for_each_entry_safe(msend, _msend_, &conn->send_queue, head) {
 			if (msend->dead) {
-				free_msend(ninf, msend);
+				free_msend(ninf, conn, msend);
 				continue;
 			}

@@ -957,7 +1012,7 @@ static void scoutfs_net_destroy_worker(struct work_struct *work)

 	list_splice_init(&conn->resend_queue, &conn->send_queue);
 	list_for_each_entry_safe(msend, tmp, &conn->send_queue, head)
-		free_msend(ninf, msend);
+		free_msend(ninf, conn, msend);

 	/* accepted sockets are removed from their listener's list */
 	if (conn->listening_conn) {
@@ -1303,7 +1358,7 @@ static void scoutfs_net_shutdown_worker(struct work_struct *work)
 							struct message_send, head))) {
 			resp_func = msend->resp_func;
 			resp_data = msend->resp_data;
-			free_msend(ninf, msend);
+			free_msend(ninf, conn, msend);
 			spin_unlock(&conn->lock);

 			call_resp_func(sb, conn, resp_func, resp_data, NULL, 0, -ECONNABORTED);
@@ -1319,7 +1374,7 @@ static void scoutfs_net_shutdown_worker(struct work_struct *work)
 	list_splice_tail_init(&conn->send_queue, &conn->resend_queue);
 	list_for_each_entry_safe(msend, tmp, &conn->resend_queue, head) {
 		if (msend->nh.cmd == SCOUTFS_NET_CMD_GREETING)
-			free_msend(ninf, msend);
+			free_msend(ninf, conn, msend);
 	}

 	clear_conn_fl(conn, saw_greeting);
@@ -1493,6 +1548,8 @@ scoutfs_net_alloc_conn(struct super_block *sb,
 	atomic64_set(&conn->recv_seq, 0);
 	INIT_LIST_HEAD(&conn->send_queue);
 	INIT_LIST_HEAD(&conn->resend_queue);
+	conn->req_root = RB_ROOT;
+	conn->resp_root = RB_ROOT;
 	INIT_WORK(&conn->listen_work, scoutfs_net_listen_worker);
 	INIT_WORK(&conn->connect_work, scoutfs_net_connect_worker);
 	INIT_WORK(&conn->send_work, scoutfs_net_send_worker);
@@ -1705,7 +1762,7 @@ void scoutfs_net_client_greeting(struct super_block *sb,
 		atomic64_set(&conn->recv_seq, 0);
 		list_for_each_entry_safe(msend, tmp, &conn->resend_queue, head){
 			if (nh_is_response(&msend->nh))
-				free_msend(ninf, msend);
+				free_msend(ninf, conn, msend);
 		}
 	}

@@ -1808,6 +1865,8 @@ restart:
 		BUG_ON(!list_empty(&reconn->send_queue));
 		/* queued greeting response is racing, can be in send or resend queue */
 		list_splice_tail_init(&reconn->resend_queue, &conn->resend_queue);
+		move_sorted_msends(conn, &conn->req_root, reconn, &reconn->req_root);
+		move_sorted_msends(conn, &conn->resp_root, reconn, &reconn->resp_root);

 		/* new conn info is unused, swap, old won't call down */
 		swap(conn->info, reconn->info);
--- a/kmod/src/net.h
+++ b/kmod/src/net.h
@@ -67,6 +67,8 @@ struct scoutfs_net_connection {
 	u64 next_send_id;
 	struct list_head send_queue;
 	struct list_head resend_queue;
+	struct rb_root req_root;
+	struct rb_root resp_root;

 	atomic64_t recv_seq;
 	unsigned int ordered_proc_nr;
--- a/kmod/src/options.c
+++ b/kmod/src/options.c
@@ -34,6 +34,7 @@ enum {
 	Opt_data_prealloc_blocks,
 	Opt_data_prealloc_contig_only,
 	Opt_ino_alloc_per_lock,
+	Opt_lock_idle_count,
 	Opt_log_merge_wait_timeout_ms,
 	Opt_metadev_path,
 	Opt_noacl,
@@ -49,6 +50,7 @@ static const match_table_t tokens = {
 	{Opt_data_prealloc_blocks, "data_prealloc_blocks=%s"},
 	{Opt_data_prealloc_contig_only, "data_prealloc_contig_only=%s"},
 	{Opt_ino_alloc_per_lock, "ino_alloc_per_lock=%s"},
+	{Opt_lock_idle_count, "lock_idle_count=%s"},
 	{Opt_log_merge_wait_timeout_ms, "log_merge_wait_timeout_ms=%s"},
 	{Opt_metadev_path, "metadev_path=%s"},
 	{Opt_noacl, "noacl"},
@@ -119,6 +121,10 @@ static void free_options(struct scoutfs_mount_options *opts)
 	kfree(opts->metadev_path);
 }

+#define MIN_LOCK_IDLE_COUNT	32
+#define DEFAULT_LOCK_IDLE_COUNT	(10 * 1000)
+#define MAX_LOCK_IDLE_COUNT	(100 * 1000)
+
 #define MIN_LOG_MERGE_WAIT_TIMEOUT_MS		100UL
 #define DEFAULT_LOG_MERGE_WAIT_TIMEOUT_MS	500
 #define MAX_LOG_MERGE_WAIT_TIMEOUT_MS		(60 * MSEC_PER_SEC)
@@ -139,6 +145,7 @@ static void init_default_options(struct scoutfs_mount_options *opts)
 	opts->data_prealloc_blocks = SCOUTFS_DATA_PREALLOC_DEFAULT_BLOCKS;
 	opts->data_prealloc_contig_only = 1;
 	opts->ino_alloc_per_lock = SCOUTFS_LOCK_INODE_GROUP_NR;
+	opts->lock_idle_count = DEFAULT_LOCK_IDLE_COUNT;
 	opts->log_merge_wait_timeout_ms = DEFAULT_LOG_MERGE_WAIT_TIMEOUT_MS;
 	opts->orphan_scan_delay_ms = -1;
 	opts->quorum_heartbeat_timeout_ms = SCOUTFS_QUORUM_DEF_HB_TIMEO_MS;
@@ -146,6 +153,21 @@ static void init_default_options(struct scoutfs_mount_options *opts)
 	opts->tcp_keepalive_timeout_ms = DEFAULT_TCP_KEEPALIVE_TIMEOUT_MS;
 }

+static int verify_lock_idle_count(struct super_block *sb, int ret, int val)
+{
+	if (ret < 0) {
+		scoutfs_err(sb, "failed to parse lock_idle_count value");
+		return -EINVAL;
+	}
+	if (val < MIN_LOCK_IDLE_COUNT || val > MAX_LOCK_IDLE_COUNT) {
+		scoutfs_err(sb, "invalid lock_idle_count value %d, must be between %u and %u",
+			    val, MIN_LOCK_IDLE_COUNT, MAX_LOCK_IDLE_COUNT);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
 static int verify_log_merge_wait_timeout_ms(struct super_block *sb, int ret, int val)
 {
 	if (ret < 0) {
@@ -261,6 +283,14 @@ static int parse_options(struct super_block *sb, char *options, struct scoutfs_m
 			opts->tcp_keepalive_timeout_ms = nr;
 			break;

+		case Opt_lock_idle_count:
+			ret = match_int(args, &nr);
+			ret = verify_lock_idle_count(sb, ret, nr);
+			if (ret < 0)
+				return ret;
+			opts->lock_idle_count = nr;
+			break;
+
 		case Opt_log_merge_wait_timeout_ms:
 			ret = match_int(args, &nr);
 			ret = verify_log_merge_wait_timeout_ms(sb, ret, nr);
@@ -536,6 +566,43 @@ static ssize_t ino_alloc_per_lock_store(struct kobject *kobj, struct kobj_attrib
 }
 SCOUTFS_ATTR_RW(ino_alloc_per_lock);

+static ssize_t lock_idle_count_show(struct kobject *kobj, struct kobj_attribute *attr,
+						char *buf)
+{
+	struct super_block *sb = SCOUTFS_SYSFS_ATTRS_SB(kobj);
+	struct scoutfs_mount_options opts;
+
+	scoutfs_options_read(sb, &opts);
+
+	return snprintf(buf, PAGE_SIZE, "%u", opts.lock_idle_count);
+}
+static ssize_t lock_idle_count_store(struct kobject *kobj, struct kobj_attribute *attr,
+						 const char *buf, size_t count)
+{
+	struct super_block *sb = SCOUTFS_SYSFS_ATTRS_SB(kobj);
+	DECLARE_OPTIONS_INFO(sb, optinf);
+	char nullterm[30]; /* more than enough for octal -U64_MAX */
+	int val;
+	int len;
+	int ret;
+
+	len = min(count, sizeof(nullterm) - 1);
+	memcpy(nullterm, buf, len);
+	nullterm[len] = '\0';
+
+	ret = kstrtoint(nullterm, 0, &val);
+	ret = verify_lock_idle_count(sb, ret, val);
+	if (ret == 0) {
+		write_seqlock(&optinf->seqlock);
+		optinf->opts.lock_idle_count = val;
+		write_sequnlock(&optinf->seqlock);
+		ret = count;
+	}
+
+	return ret;
+}
+SCOUTFS_ATTR_RW(lock_idle_count);
+
 static ssize_t log_merge_wait_timeout_ms_show(struct kobject *kobj, struct kobj_attribute *attr,
 						char *buf)
 {
@@ -677,6 +744,7 @@ static struct attribute *options_attrs[] = {
 	SCOUTFS_ATTR_PTR(data_prealloc_blocks),
 	SCOUTFS_ATTR_PTR(data_prealloc_contig_only),
 	SCOUTFS_ATTR_PTR(ino_alloc_per_lock),
+	SCOUTFS_ATTR_PTR(lock_idle_count),
 	SCOUTFS_ATTR_PTR(log_merge_wait_timeout_ms),
 	SCOUTFS_ATTR_PTR(metadev_path),
 	SCOUTFS_ATTR_PTR(orphan_scan_delay_ms),
@@ -684,13 +752,18 @@ static struct attribute *options_attrs[] = {
 	SCOUTFS_ATTR_PTR(quorum_slot_nr),
 	NULL,
 };
+#ifdef KC_KOBJECT_DEFAULT_GROUPS
+ATTRIBUTE_GROUPS(options);
+#endif

 int scoutfs_options_setup(struct super_block *sb)
 {
 	DECLARE_OPTIONS_INFO(sb, optinf);
 	int ret;

-	ret = scoutfs_sysfs_create_attrs(sb, &optinf->sysfs_attrs, options_attrs, "mount_options");
+	ret = scoutfs_sysfs_create_attrs(sb, &optinf->sysfs_attrs,
+					 KC_KOBJ_DEFAULT_PICK(options_groups, options_attrs),
+					 "mount_options");
 	if (ret < 0)
 		scoutfs_options_destroy(sb);
 	return ret;
--- a/kmod/src/options.h
+++ b/kmod/src/options.h
@@ -9,6 +9,7 @@ struct scoutfs_mount_options {
 	u64 data_prealloc_blocks;
 	bool data_prealloc_contig_only;
 	unsigned int ino_alloc_per_lock;
+	int lock_idle_count;
 	unsigned int log_merge_wait_timeout_ms;
 	char *metadev_path;
 	unsigned int orphan_scan_delay_ms;
--- a/kmod/src/quorum.c
+++ b/kmod/src/quorum.c
@@ -162,7 +162,7 @@ static void quorum_slot_sin(struct scoutfs_quorum_config *qconf, int i, struct s
 static ktime_t election_timeout(void)
 {
 	return ktime_add_ms(ktime_get(), SCOUTFS_QUORUM_ELECT_MIN_MS +
-				 prandom_u32_max(SCOUTFS_QUORUM_ELECT_VAR_MS));
+				 get_random_u32_below(SCOUTFS_QUORUM_ELECT_VAR_MS));
 }

 static ktime_t heartbeat_interval(void)
@@ -1192,11 +1192,14 @@ static struct attribute *quorum_attrs[] = {
 	SCOUTFS_ATTR_PTR(is_leader),
 	NULL,
 };
+#ifdef KC_KOBJECT_DEFAULT_GROUPS
+ATTRIBUTE_GROUPS(quorum);
+#endif

 static inline bool valid_ipv4_unicast(__be32 addr)
 {
-	return !(ipv4_is_multicast(addr) && ipv4_is_lbcast(addr) &&
-		 ipv4_is_zeronet(addr) && ipv4_is_local_multicast(addr));
+	return !(ipv4_is_multicast(addr) || ipv4_is_lbcast(addr) ||
+		 ipv4_is_zeronet(addr) || ipv4_is_local_multicast(addr));
 }

 static inline bool valid_ipv4_port(__be16 port)
@@ -1352,7 +1355,7 @@ int scoutfs_quorum_setup(struct super_block *sb)
 	if (ret < 0)
 		goto out;

-	ret = scoutfs_sysfs_create_attrs(sb, &qinf->ssa, quorum_attrs,
+	ret = scoutfs_sysfs_create_attrs(sb, &qinf->ssa, KC_KOBJ_DEFAULT(quorum),
 					 "quorum");
 	if (ret < 0)
 		goto out;
--- a/kmod/src/quota.c
+++ b/kmod/src/quota.c
@@ -34,6 +34,7 @@
 #include "totl.h"
 #include "util.h"
 #include "quota.h"
+#include "trans.h"
 #include "counters.h"
 #include "scoutfs_trace.h"

@@ -204,7 +205,7 @@ static struct squota_check *lookup_random_check(struct rhashtable *rht)

 	tbl = rht_dereference_rcu(rht->tbl, rht);
 	do {
-		for (s = 0, i = prandom_u32_max(tbl->size);
+		for (s = 0, i = get_random_u32_below(tbl->size);
 		     s < tbl->size;
 		     s++, i = (i + 1) % tbl->size) {
 			rht_for_each_entry_rcu(chk, pos, tbl, i, head) {
@@ -269,7 +270,7 @@ static void shrink_all_cached_checks(struct squota_info *qtinf)
 {
 	struct shrink_control sc = { .nr_to_scan = LONG_MAX, };

-	scan_cached_checks(KC_SHRINKER_FN(&qtinf->shrinker), &sc);
+	scan_cached_checks(KC_SHRINKER_FN(qtinf->shrinker), &sc);
 }

 static u8 ns_is_attr(u8 ns)
@@ -1086,6 +1087,10 @@ int scoutfs_quota_mod_rule(struct super_block *sb, bool is_add,
 	if (ret < 0)
 		goto out;

+	ret = scoutfs_hold_trans(sb, true);
+	if (ret < 0)
+		goto out;
+
 	down_write(&qtinf->rwsem);

 	if (is_add) {
@@ -1095,28 +1100,30 @@ int scoutfs_quota_mod_rule(struct super_block *sb, bool is_add,
 		else if (ret == 0)
 			ret = -EEXIST;
 		if (ret < 0)
-			goto unlock;
+			goto release;

 		rule_to_rule_val(&rv, &rule);
 		ret = scoutfs_item_create(sb, &key, &rv, sizeof(rv), lock);
 		if (ret < 0)
-			goto unlock;
+			goto release;

 	} else {
 		ret = find_rule(sb, &rule, &key, lock) ?:
 		      scoutfs_item_delete(sb, &key, lock);
 		if (ret < 0)
-			goto unlock;
+			goto release;
 	}

 	scoutfs_quota_invalidate(sb);
 	ret = 0;

-unlock:
+release:
 	up_write(&qtinf->rwsem);
-	scoutfs_unlock(sb, lock, SCOUTFS_LOCK_WRITE);
+	scoutfs_release_trans(sb);

 out:
+	scoutfs_unlock(sb, lock, SCOUTFS_LOCK_WRITE);
+
 	if (is_add)
 		trace_scoutfs_quota_add_rule(sb, &rule, ret);
 	else
@@ -1225,8 +1232,12 @@ int scoutfs_quota_setup(struct super_block *sb)
 	spin_lock_init(&qtinf->lock);
 	init_waitqueue_head(&qtinf->waitq);

-	KC_INIT_SHRINKER_FUNCS(&qtinf->shrinker, count_cached_checks, scan_cached_checks);
-	KC_REGISTER_SHRINKER(&qtinf->shrinker, "scoutfs-quota:" SCSBF, SCSB_ARGS(sb));
+	KC_SETUP_SHRINKER(qtinf->shrinker, qtinf, 0, count_cached_checks,
+			  scan_cached_checks, "scoutfs-quota:" SCSBF, SCSB_ARGS(sb));
+	if (KC_SHRINKER_IS_NULL(qtinf->shrinker)) {
+		ret = -ENOMEM;
+		goto out;
+	}

 	sbi->squota_info = qtinf;

@@ -1250,7 +1261,7 @@ void scoutfs_quota_destroy(struct super_block *sb)

 	if (qtinf) {
 		debugfs_remove(qtinf->drop_dentry);
-		KC_UNREGISTER_SHRINKER(&qtinf->shrinker);
+		KC_UNREGISTER_SHRINKER(qtinf->shrinker);

 		spin_lock(&qtinf->lock);
 		rs = rcu_dereference_protected(qtinf->ruleset, lockdep_is_held(&qtinf->lock));
--- a/kmod/src/scoutfs_trace.h
+++ b/kmod/src/scoutfs_trace.h
@@ -102,7 +102,7 @@ TRACE_EVENT(scoutfs_setattr,
 		SCSB_TRACE_ASSIGN(dentry->d_inode->i_sb);
 		__entry->ino = scoutfs_ino(dentry->d_inode);
 		__entry->d_len = dentry->d_name.len;
-		__assign_str(d_name, dentry->d_name.name);
+		kc__assign_str(d_name, dentry->d_name.name);
 		__entry->ia_valid = attr->ia_valid;
 		__entry->size_change = !!(attr->ia_valid & ATTR_SIZE);
 		__entry->ia_size = attr->ia_size;
@@ -789,6 +789,80 @@ TRACE_EVENT(scoutfs_inode_walk_writeback,
 		  __entry->ino, __entry->write, __entry->ret)
 );

+TRACE_EVENT(scoutfs_orphan_scan_start,
+	TP_PROTO(struct super_block *sb),
+
+	TP_ARGS(sb),
+
+	TP_STRUCT__entry(
+		SCSB_TRACE_FIELDS
+	),
+
+	TP_fast_assign(
+		SCSB_TRACE_ASSIGN(sb);
+	),
+
+	TP_printk(SCSBF, SCSB_TRACE_ARGS)
+);
+
+TRACE_EVENT(scoutfs_orphan_scan_stop,
+	TP_PROTO(struct super_block *sb, bool work_todo),
+
+	TP_ARGS(sb, work_todo),
+
+	TP_STRUCT__entry(
+		SCSB_TRACE_FIELDS
+		__field(bool, work_todo)
+	),
+
+	TP_fast_assign(
+		SCSB_TRACE_ASSIGN(sb);
+		__entry->work_todo = work_todo;
+	),
+
+	TP_printk(SCSBF" work_todo %d", SCSB_TRACE_ARGS, __entry->work_todo)
+);
+
+TRACE_EVENT(scoutfs_orphan_scan_work,
+	TP_PROTO(struct super_block *sb, __u64 ino),
+
+	TP_ARGS(sb, ino),
+
+	TP_STRUCT__entry(
+		SCSB_TRACE_FIELDS
+		__field(__u64, ino)
+	),
+
+	TP_fast_assign(
+		SCSB_TRACE_ASSIGN(sb);
+		__entry->ino = ino;
+	),
+
+	TP_printk(SCSBF" ino %llu", SCSB_TRACE_ARGS,
+		  __entry->ino)
+);
+
+TRACE_EVENT(scoutfs_orphan_scan_end,
+	TP_PROTO(struct super_block *sb, __u64 ino, int ret),
+
+	TP_ARGS(sb, ino, ret),
+
+	TP_STRUCT__entry(
+		SCSB_TRACE_FIELDS
+		__field(__u64, ino)
+		__field(int, ret)
+	),
+
+	TP_fast_assign(
+		SCSB_TRACE_ASSIGN(sb);
+		__entry->ino = ino;
+		__entry->ret = ret;
+	),
+
+	TP_printk(SCSBF" ino %llu ret %d", SCSB_TRACE_ARGS,
+		  __entry->ino, __entry->ret)
+);
+
 DECLARE_EVENT_CLASS(scoutfs_lock_info_class,
 	TP_PROTO(struct super_block *sb, struct lock_info *linfo),

@@ -1036,6 +1110,82 @@ TRACE_EVENT(scoutfs_orphan_inode,
 		  MINOR(__entry->dev), __entry->ino)
 );

+DECLARE_EVENT_CLASS(scoutfs_try_delete_class,
+        TP_PROTO(struct super_block *sb, u64 ino),
+        TP_ARGS(sb, ino),
+        TP_STRUCT__entry(
+		SCSB_TRACE_FIELDS
+		__field(__u64, ino)
+        ),
+        TP_fast_assign(
+		SCSB_TRACE_ASSIGN(sb);
+		__entry->ino = ino;
+        ),
+	TP_printk(SCSBF" ino %llu", SCSB_TRACE_ARGS, __entry->ino)
+);
+
+DEFINE_EVENT(scoutfs_try_delete_class, scoutfs_try_delete,
+        TP_PROTO(struct super_block *sb, u64 ino),
+        TP_ARGS(sb, ino)
+);
+
+DEFINE_EVENT(scoutfs_try_delete_class, scoutfs_try_delete_local_busy,
+        TP_PROTO(struct super_block *sb, u64 ino),
+        TP_ARGS(sb, ino)
+);
+
+DEFINE_EVENT(scoutfs_try_delete_class, scoutfs_try_delete_cached,
+        TP_PROTO(struct super_block *sb, u64 ino),
+        TP_ARGS(sb, ino)
+);
+
+DEFINE_EVENT(scoutfs_try_delete_class, scoutfs_try_delete_no_item,
+        TP_PROTO(struct super_block *sb, u64 ino),
+        TP_ARGS(sb, ino)
+);
+
+TRACE_EVENT(scoutfs_try_delete_has_links,
+	TP_PROTO(struct super_block *sb, u64 ino, unsigned int nlink),
+
+	TP_ARGS(sb, ino, nlink),
+
+	TP_STRUCT__entry(
+		SCSB_TRACE_FIELDS
+		__field(__u64, ino)
+		__field(unsigned int, nlink)
+	),
+
+	TP_fast_assign(
+		SCSB_TRACE_ASSIGN(sb);
+		__entry->ino = ino;
+		__entry->nlink = nlink;
+	),
+
+	TP_printk(SCSBF" ino %llu nlink %u", SCSB_TRACE_ARGS, __entry->ino,
+		  __entry->nlink)
+);
+
+TRACE_EVENT(scoutfs_inode_orphan_delete,
+	TP_PROTO(struct super_block *sb, u64 ino, int ret),
+
+	TP_ARGS(sb, ino, ret),
+
+	TP_STRUCT__entry(
+		SCSB_TRACE_FIELDS
+		__field(__u64, ino)
+		__field(int, ret)
+	),
+
+	TP_fast_assign(
+		SCSB_TRACE_ASSIGN(sb);
+		__entry->ino = ino;
+		__entry->ret = ret;
+	),
+
+	TP_printk(SCSBF" ino %llu ret %d", SCSB_TRACE_ARGS, __entry->ino,
+		__entry->ret)
+);
+
 TRACE_EVENT(scoutfs_delete_inode,
 	TP_PROTO(struct super_block *sb, u64 ino, umode_t mode, u64 size),

@@ -1060,6 +1210,32 @@ TRACE_EVENT(scoutfs_delete_inode,
 		  __entry->mode, __entry->size)
 );

+TRACE_EVENT(scoutfs_delete_inode_end,
+	TP_PROTO(struct super_block *sb, u64 ino, umode_t mode, u64 size, int ret),
+
+	TP_ARGS(sb, ino, mode, size, ret),
+
+	TP_STRUCT__entry(
+		__field(dev_t, dev)
+		__field(__u64, ino)
+		__field(umode_t, mode)
+		__field(__u64, size)
+		__field(int, ret)
+	),
+
+	TP_fast_assign(
+		__entry->dev = sb->s_dev;
+		__entry->ino = ino;
+		__entry->mode = mode;
+		__entry->size = size;
+		__entry->ret = ret;
+	),
+
+	TP_printk("dev %d,%d ino %llu, mode 0x%x size %llu, ret %d",
+		  MAJOR(__entry->dev), MINOR(__entry->dev), __entry->ino,
+		  __entry->mode, __entry->size, __entry->ret)
+);
+
 DECLARE_EVENT_CLASS(scoutfs_key_class,
        TP_PROTO(struct super_block *sb, struct scoutfs_key *key),
        TP_ARGS(sb, key),
@@ -1443,28 +1619,6 @@ DEFINE_EVENT(scoutfs_work_class, scoutfs_data_return_server_extents_exit,
        TP_ARGS(sb, data, ret)
 );

-DECLARE_EVENT_CLASS(scoutfs_shrink_exit_class,
-        TP_PROTO(struct super_block *sb, unsigned long nr_to_scan, int ret),
-        TP_ARGS(sb, nr_to_scan, ret),
-        TP_STRUCT__entry(
-		__field(void *, sb)
-		__field(unsigned long, nr_to_scan)
-		__field(int, ret)
-        ),
-        TP_fast_assign(
-		__entry->sb = sb;
-		__entry->nr_to_scan = nr_to_scan;
-		__entry->ret = ret;
-        ),
-        TP_printk("sb %p nr_to_scan %lu ret %d",
-		  __entry->sb, __entry->nr_to_scan, __entry->ret)
-);
-
-DEFINE_EVENT(scoutfs_shrink_exit_class, scoutfs_lock_shrink_exit,
-        TP_PROTO(struct super_block *sb, unsigned long nr_to_scan, int ret),
-        TP_ARGS(sb, nr_to_scan, ret)
-);
-
 TRACE_EVENT(scoutfs_rename,
 	TP_PROTO(struct super_block *sb, struct inode *old_dir,
 		 struct dentry *old_dentry, struct inode *new_dir,
@@ -1484,9 +1638,9 @@ TRACE_EVENT(scoutfs_rename,
 	TP_fast_assign(
 		SCSB_TRACE_ASSIGN(sb);
 		__entry->old_dir_ino = scoutfs_ino(old_dir);
-		__assign_str(old_name, old_dentry->d_name.name)
+		kc__assign_str(old_name, old_dentry->d_name.name);
 		__entry->new_dir_ino = scoutfs_ino(new_dir);
-		__assign_str(new_name, new_dentry->d_name.name)
+		kc__assign_str(new_name, new_dentry->d_name.name);
 		__entry->new_inode_ino = new_dentry->d_inode ?
 					 scoutfs_ino(new_dentry->d_inode) : 0;
 	),
@@ -1516,7 +1670,7 @@ TRACE_EVENT(scoutfs_d_revalidate,
 	TP_fast_assign(
 		SCSB_TRACE_ASSIGN(sb);
 		__entry->dentry = dentry;
-		__assign_str(name, dentry->d_name.name)
+		kc__assign_str(name, dentry->d_name.name);
 		__entry->ino = dentry->d_inode ? scoutfs_ino(dentry->d_inode) : 0;
 		__entry->dir_ino = dir_ino;
 		__entry->flags = flags;
@@ -1551,7 +1705,7 @@ TRACE_EVENT(scoutfs_validate_dentry,
 		SCSB_TRACE_ASSIGN(sb);
 		__entry->dentry = dentry;
 		__entry->dir_ino = dir_ino;
-		__assign_str(name, dentry->d_name.name)
+		kc__assign_str(name, dentry->d_name.name);
 		__entry->dentry_ino = dentry_ino;
 		__entry->dent_ino = dent_ino;
 		__entry->fsdata_gen = (unsigned long long)dentry->d_fsdata;
@@ -1673,7 +1827,7 @@ TRACE_EVENT(scoutfs_get_name,
 		SCSB_TRACE_ASSIGN(sb);
 		__entry->parent_ino = scoutfs_ino(parent);
 		__entry->child_ino = scoutfs_ino(child);
-		__assign_str(name, name);
+		kc__assign_str(name, name);
 	),

 	TP_printk(SCSBF" parent %llu child %llu name: %s",
@@ -3097,6 +3251,24 @@ TRACE_EVENT(scoutfs_ioc_search_xattrs,
 		  __entry->ino, __entry->last_ino)
 );

+TRACE_EVENT(scoutfs_trigger_fired,
+	TP_PROTO(struct super_block *sb, const char *name),
+
+	TP_ARGS(sb, name),
+
+	TP_STRUCT__entry(
+		SCSB_TRACE_FIELDS
+		__field(const char *, name)
+	),
+
+	TP_fast_assign(
+		SCSB_TRACE_ASSIGN(sb);
+		__entry->name = name;
+	),
+
+	TP_printk(SCSBF" %s", SCSB_TRACE_ARGS, __entry->name)
+);
+
 #endif /* _TRACE_SCOUTFS_H */

 /* This part must be outside protection */
--- a/kmod/src/server.c
+++ b/kmod/src/server.c
@@ -20,7 +20,6 @@
 #include <net/sock.h>
 #include <net/tcp.h>
 #include <linux/log2.h>
-#include <asm/unaligned.h>

 #include "format.h"
 #include "counters.h"
@@ -41,6 +40,7 @@
 #include "recov.h"
 #include "omap.h"
 #include "fence.h"
+#include "triggers.h"

 /*
 * Every active mount can act as the server that listens on a net
@@ -255,6 +255,14 @@ static void server_down(struct server_info *server)
 		cmpxchg(&server->status, was, SERVER_DOWN);
 }

+static void init_mounted_client_key(struct scoutfs_key *key, u64 rid)
+{
+	*key = (struct scoutfs_key) {
+		.sk_zone = SCOUTFS_MOUNTED_CLIENT_ZONE,
+		.skmc_rid = cpu_to_le64(rid),
+	};
+}
+
 /*
 * The per-holder allocation block use budget balances batching
 * efficiency and concurrency.  The larger this gets, the fewer
@@ -962,6 +970,28 @@ static int find_log_trees_item(struct super_block *sb,
 	return ret;
 }

+/*
+ * Return true if the given rid has a mounted_clients entry.
+ */
+static bool rid_is_mounted(struct super_block *sb, u64 rid)
+{
+	DECLARE_SERVER_INFO(sb, server);
+	struct scoutfs_super_block *super = DIRTY_SUPER_SB(sb);
+	SCOUTFS_BTREE_ITEM_REF(iref);
+	struct scoutfs_key key;
+	int ret;
+
+	init_mounted_client_key(&key, rid);
+
+	mutex_lock(&server->mounted_clients_mutex);
+	ret = scoutfs_btree_lookup(sb, &super->mounted_clients, &key, &iref);
+	if (ret == 0)
+		scoutfs_btree_put_iref(&iref);
+	mutex_unlock(&server->mounted_clients_mutex);
+
+	return ret == 0;
+}
+
 /*
 * Find the log_trees item with the greatest nr for each rid.  Fills the
 * caller's log_trees and sets the key before the returned log_trees for
@@ -1220,6 +1250,60 @@ static int do_finalize_ours(struct super_block *sb,
 * happens to arrive at just the right time.  That's fine, merging will
 * ignore and tear down the empty input.
 */
+
+static int reclaim_open_log_tree(struct super_block *sb, u64 rid);
+
+/*
+ * Reclaim log trees for rids that have no mounted_clients entry.
+ * They block merges by appearing active.  reclaim_open_log_tree
+ * may need multiple commits to drain allocators (-EINPROGRESS).
+ *
+ * The caller holds logs_mutex and a commit, both are dropped and
+ * re-acquired around each reclaim call.  Returns >0 if any orphans
+ * were reclaimed so the caller can re-check state that may have
+ * changed while the lock was dropped.
+ */
+static int reclaim_orphan_log_trees(struct super_block *sb, u64 rid,
+				    struct commit_hold *hold)
+{
+	struct server_info *server = SCOUTFS_SB(sb)->server_info;
+	struct scoutfs_super_block *super = DIRTY_SUPER_SB(sb);
+	struct scoutfs_log_trees lt;
+	struct scoutfs_key key;
+	bool found = false;
+	u64 orphan_rid;
+	int ret;
+	int err;
+
+	scoutfs_key_init_log_trees(&key, U64_MAX, U64_MAX);
+	while ((ret = for_each_rid_last_lt(sb, &super->logs_root, &key, &lt)) > 0) {
+
+		if ((le64_to_cpu(lt.flags) & SCOUTFS_LOG_TREES_FINALIZED) ||
+		    le64_to_cpu(lt.rid) == rid ||
+		    rid_is_mounted(sb, le64_to_cpu(lt.rid)))
+			continue;
+
+		orphan_rid = le64_to_cpu(lt.rid);
+		scoutfs_err(sb, "reclaiming orphan log trees for rid %016llx nr %llu",
+			    orphan_rid, le64_to_cpu(lt.nr));
+		found = true;
+
+		do {
+			mutex_unlock(&server->logs_mutex);
+			err = reclaim_open_log_tree(sb, orphan_rid);
+			ret = server_apply_commit(sb, hold,
+						  err == -EINPROGRESS ? 0 : err);
+			server_hold_commit(sb, hold);
+			mutex_lock(&server->logs_mutex);
+		} while (err == -EINPROGRESS && ret == 0);
+
+		if (ret < 0)
+			break;
+	}
+
+	return ret < 0 ? ret : found;
+}
+
 #define FINALIZE_POLL_MIN_DELAY_MS	5U
 #define FINALIZE_POLL_MAX_DELAY_MS	100U
 #define FINALIZE_POLL_DELAY_GROWTH_PCT	150U
@@ -1260,6 +1344,16 @@ static int finalize_and_start_log_merge(struct super_block *sb, struct scoutfs_l
 			break;
 		}

+		ret = reclaim_orphan_log_trees(sb, rid, hold);
+		if (ret < 0) {
+			err_str = "reclaiming orphan log trees";
+			break;
+		}
+		if (ret > 0) {
+			/* lock was dropped, re-check merge status */
+			continue;
+		}
+
 		/* look for finalized and other active log btrees */
 		saw_finalized = false;
 		others_active = false;
@@ -1291,9 +1385,13 @@ static int finalize_and_start_log_merge(struct super_block *sb, struct scoutfs_l
 		 * meta was low so that deleted items are merged
 		 * promptly and freed blocks can bring the client out of
 		 * enospc.
+		 *
+		 * The trigger can be used to force a log merge in cases where
+		 * a test only generates small amounts of change.
 		 */
 		finalize_ours = (lt->item_root.height > 2) ||
-				(le32_to_cpu(lt->meta_avail.flags) & SCOUTFS_ALLOC_FLAG_LOW);
+				(le32_to_cpu(lt->meta_avail.flags) & SCOUTFS_ALLOC_FLAG_LOW) ||
+				scoutfs_trigger(sb, LOG_MERGE_FORCE_FINALIZE_OURS);

 		trace_scoutfs_server_finalize_decision(sb, rid, saw_finalized, others_active,
 						       ours_visible, finalize_ours, delay_ms,
@@ -1402,6 +1500,8 @@ static int finalize_and_start_log_merge(struct super_block *sb, struct scoutfs_l
 			BUG_ON(err); /* inconsistent */
 		}

+		scoutfs_inc_counter(sb, log_merge_start);
+
 		/* we're done, caller can make forward progress */
 		break;
 	}
@@ -1618,7 +1718,8 @@ static int server_get_log_trees(struct super_block *sb,
 		goto update;
 	}

-	ret = alloc_move_empty(sb, &super->data_alloc, &lt.data_freed, 100);
+	ret = alloc_move_empty(sb, &super->data_alloc, &lt.data_freed,
+			       COMMIT_HOLD_ALLOC_BUDGET / 2);
 	if (ret == -EINPROGRESS)
 		ret = 0;
 	if (ret < 0) {
@@ -1913,13 +2014,15 @@ static int reclaim_open_log_tree(struct super_block *sb, u64 rid)
 	       scoutfs_alloc_splice_list(sb, &server->alloc, &server->wri, server->other_freed,
 					 &lt.meta_avail)) ?:
 	      (err_str = "empty data_avail",
-	       alloc_move_empty(sb, &super->data_alloc, &lt.data_avail, 100)) ?:
+	       alloc_move_empty(sb, &super->data_alloc, &lt.data_avail,
+				COMMIT_HOLD_ALLOC_BUDGET / 2)) ?:
 	      (err_str = "empty data_freed",
-	       alloc_move_empty(sb, &super->data_alloc, &lt.data_freed, 100));
+	       alloc_move_empty(sb, &super->data_alloc, &lt.data_freed,
+				COMMIT_HOLD_ALLOC_BUDGET / 2));
 	mutex_unlock(&server->alloc_mutex);

 	/* only finalize, allowing merging, once the allocators are fully freed */
-	if (ret == 0) {
+	if (ret == 0 && !scoutfs_trigger(sb, RECLAIM_SKIP_FINALIZE)) {
 		/* the transaction is no longer open */
 		lt.commit_trans_seq = lt.get_trans_seq;

@@ -1971,7 +2074,8 @@ static int get_stable_trans_seq(struct super_block *sb, u64 *last_seq_ret)
 	scoutfs_key_init_log_trees(&key, U64_MAX, U64_MAX);
 	while ((ret = for_each_rid_last_lt(sb, &super->logs_root, &key, &lt)) > 0) {
 		if ((le64_to_cpu(lt.get_trans_seq) > le64_to_cpu(lt.commit_trans_seq)) &&
-		     le64_to_cpu(lt.get_trans_seq) <= last_seq) {
+		     le64_to_cpu(lt.get_trans_seq) <= last_seq &&
+		     rid_is_mounted(sb, le64_to_cpu(lt.rid))) {
 			last_seq = le64_to_cpu(lt.get_trans_seq) - 1;
 		}
 	}
@@ -2506,6 +2610,8 @@ static int splice_log_merge_completions(struct super_block *sb,
 		queue_work(server->wq, &server->log_merge_free_work);
 	else
 		err_str = "deleting merge status item";
+
+	scoutfs_inc_counter(sb, log_merge_complete);
 out:
 	if (upd_stat) {
 		init_log_merge_key(&key, SCOUTFS_LOG_MERGE_STATUS_ZONE, 0, 0);
@@ -3360,7 +3466,7 @@ out:

 static u64 device_blocks(struct block_device *bdev, int shift)
 {
-	return i_size_read(bdev->bd_inode) >> shift;
+	return i_size_read(KC_BDEV_INODE(bdev)) >> shift;
 }

 static int server_resize_devices(struct super_block *sb, struct scoutfs_net_connection *conn,
@@ -3521,14 +3627,6 @@ out:
 	return scoutfs_net_response(sb, conn, cmd, id, ret, &nst, sizeof(nst));
 }

-static void init_mounted_client_key(struct scoutfs_key *key, u64 rid)
-{
-	*key = (struct scoutfs_key) {
-		.sk_zone = SCOUTFS_MOUNTED_CLIENT_ZONE,
-		.skmc_rid = cpu_to_le64(rid),
-	};
-}
-
 static bool invalid_mounted_client_item(struct scoutfs_btree_item_ref *iref)
 {
 	return (iref->val_len != sizeof(struct scoutfs_mounted_client_btree_val));
--- a/kmod/src/srch.c
+++ b/kmod/src/srch.c
@@ -18,7 +18,11 @@
 #include <linux/pagemap.h>
 #include <linux/vmalloc.h>
 #include <linux/sort.h>
+#ifdef KC_HAVE__LINUX_UNALIGNED_H
+#include <linux/unaligned.h>
+#else
 #include <asm/unaligned.h>
+#endif

 #include "super.h"
 #include "format.h"
@@ -2346,6 +2350,9 @@ static struct attribute *srch_attrs[] = {
 	SCOUTFS_ATTR_PTR(compact_delay_ms),
 	NULL,
 };
+#ifdef KC_KOBJECT_DEFAULT_GROUPS
+ATTRIBUTE_GROUPS(srch);
+#endif

 void scoutfs_srch_destroy(struct super_block *sb)
 {
@@ -2387,7 +2394,8 @@ int scoutfs_srch_setup(struct super_block *sb)

 	sbi->srch_info = srinf;

-	ret = scoutfs_sysfs_create_attrs(sb, &srinf->ssa, srch_attrs, "srch");
+	ret = scoutfs_sysfs_create_attrs(sb, &srinf->ssa, KC_KOBJ_DEFAULT(srch),
+					 "srch");
 	if (ret < 0)
 		goto out;

--- a/kmod/src/super.c
+++ b/kmod/src/super.c
@@ -283,7 +283,7 @@ int scoutfs_write_super(struct super_block *sb,
 static bool small_bdev(struct super_block *sb, char *which, u64 blocks,
 		       struct block_device *bdev, int shift)
 {
-	u64 size = (u64)i_size_read(bdev->bd_inode);
+	u64 size = (u64)i_size_read(KC_BDEV_INODE(bdev));
 	u64 count = size >> shift;

 	if (blocks > count) {
@@ -508,7 +508,7 @@ static int scoutfs_fill_super(struct super_block *sb, void *data, int silent)
 	sb->s_time_gran = 1;

 	/* btree blocks use long lived bh->b_data refs */
-	mapping_set_gfp_mask(sb->s_bdev->bd_inode->i_mapping, GFP_NOFS);
+	mapping_set_gfp_mask(KC_BDEV_MAPPING(sb->s_bdev), GFP_NOFS);

 	sbi = kzalloc(sizeof(struct scoutfs_sb_info), GFP_KERNEL);
 	sb->s_fs_info = sbi;
@@ -552,6 +552,7 @@ static int scoutfs_fill_super(struct super_block *sb, void *data, int silent)
 	}
 	sbi->meta_bdev_file = meta_bdev_file;
 	sbi->meta_bdev = file_bdev(meta_bdev_file);
+
 #else
 #ifdef KC_BLKDEV_PUT_HOLDER_ARG
 	meta_bdev = blkdev_get_by_path(opts.metadev_path, SCOUTFS_META_BDEV_MODE, sb, NULL);
@@ -567,7 +568,11 @@ static int scoutfs_fill_super(struct super_block *sb, void *data, int silent)
 	sbi->meta_bdev = meta_bdev;
 #endif

+#ifdef KC_BLKDEV_SET_BLOCKSIZE_FILE
+	ret = set_blocksize(sbi->meta_bdev_file, SCOUTFS_BLOCK_SM_SIZE);
+#else
 	ret = set_blocksize(sbi->meta_bdev, SCOUTFS_BLOCK_SM_SIZE);
+#endif
 	if (ret != 0) {
 		scoutfs_err(sb, "failed to set metadev blocksize, returned %d",
 			    ret);
--- a/kmod/src/sysfs.c
+++ b/kmod/src/sysfs.c
@@ -103,12 +103,11 @@ static ssize_t attr_funcs_show(struct kobject *kobj, struct attribute *attr,
 	};								\
 									\
 	static struct kobj_type _name##_ktype = {			\
-		.default_attrs  = _name##_attrs,			\
+		.KC_KOBJ_DEFAULT_OP = KC_KOBJ_DEFAULT(_name),		\
 		.sysfs_ops      = &_name##_sysfs_ops,			\
 		.release        = _name##_release,			\
 	};

-
 static struct attribute *sb_id_attrs[] = {
 	&data_device_maj_min_attr_funcs.attr,
 	&format_version_attr_funcs.attr,
@@ -116,6 +115,9 @@ static struct attribute *sb_id_attrs[] = {
 	&rid_attr_funcs.attr,
 	NULL,
 };
+#ifdef KC_KOBJECT_DEFAULT_GROUPS
+ATTRIBUTE_GROUPS(sb_id);
+#endif
 KTYPE(sb_id);

 struct kobject *scoutfs_sysfs_sb_dir(struct super_block *sb)
@@ -155,7 +157,12 @@ void scoutfs_sysfs_init_attrs(struct super_block *sb,
 int scoutfs_sysfs_create_attrs_parent(struct super_block *sb,
 				      struct kobject *parent,
 				      struct scoutfs_sysfs_attrs *ssa,
-				      struct attribute **attrs, char *fmt, ...)
+#ifdef KC_KOBJECT_DEFAULT_GROUPS
+				      const struct attribute_group **groups,
+#else
+				      struct attribute **attrs,
+#endif
+				      char *fmt, ...)
 {
 	va_list args;
 	size_t name_len;
@@ -168,7 +175,11 @@ int scoutfs_sysfs_create_attrs_parent(struct super_block *sb,

 	ssa->sb = sb;
 	init_completion(&ssa->comp);
+#ifdef KC_KOBJECT_DEFAULT_GROUPS
+	ssa->ktype.default_groups = groups;
+#else
 	ssa->ktype.default_attrs = attrs;
+#endif
 	ssa->ktype.sysfs_ops = &kobj_sysfs_ops;
 	ssa->ktype.release = scoutfs_sysfs_release;

--- a/kmod/src/sysfs.h
+++ b/kmod/src/sysfs.h
@@ -39,10 +39,15 @@ void scoutfs_sysfs_init_attrs(struct super_block *sb,
 int scoutfs_sysfs_create_attrs_parent(struct super_block *sb,
 				      struct kobject *parent,
 				      struct scoutfs_sysfs_attrs *ssa,
-				      struct attribute **attrs, char *fmt, ...);
-#define scoutfs_sysfs_create_attrs(sb, ssa, attrs, fmt, args...)	\
+#ifdef KC_KOBJECT_DEFAULT_GROUPS
+				      const struct attribute_group **groups,
+#else
+				      struct attribute **attrs,
+#endif
+				      char *fmt, ...);
+#define scoutfs_sysfs_create_attrs(sb, ssa, group_or_attrs, fmt, args...)	\
 	scoutfs_sysfs_create_attrs_parent(sb, scoutfs_sysfs_sb_dir(sb),	\
-					  ssa, attrs, fmt, ##args)
+					  ssa, group_or_attrs, fmt, ##args)

 void scoutfs_sysfs_destroy_attrs(struct super_block *sb,
 				 struct scoutfs_sysfs_attrs *ssa);
--- a/kmod/src/totl.c
+++ b/kmod/src/totl.c
@@ -30,6 +30,11 @@ void scoutfs_totl_merge_init(struct scoutfs_totl_merging *merg)
 	memset(merg, 0, sizeof(struct scoutfs_totl_merging));
 }

+/*
+ * bin the incoming merge inputs so that we can resolve delta items
+ * properly. Finalized logs that are merge inputs are kept separately
+ * from those that are not.
+ */
 void scoutfs_totl_merge_contribute(struct scoutfs_totl_merging *merg,
 				   u64 seq, u8 flags, void *val, int val_len, int fic)
 {
@@ -39,10 +44,10 @@ void scoutfs_totl_merge_contribute(struct scoutfs_totl_merging *merg,
 		merg->fs_seq = seq;
 		merg->fs_total = le64_to_cpu(tval->total);
 		merg->fs_count = le64_to_cpu(tval->count);
-	} else if (fic & FIC_FINALIZED) {
-		merg->fin_seq = seq;
-		merg->fin_total += le64_to_cpu(tval->total);
-		merg->fin_count += le64_to_cpu(tval->count);
+	} else if (fic & FIC_MERGE_INPUT) {
+		merg->inp_seq = seq;
+		merg->inp_total += le64_to_cpu(tval->total);
+		merg->inp_count += le64_to_cpu(tval->count);
 	} else {
 		merg->log_seq = seq;
 		merg->log_total += le64_to_cpu(tval->total);
@@ -53,15 +58,18 @@ void scoutfs_totl_merge_contribute(struct scoutfs_totl_merging *merg,
 /*
 * .totl. item merging has to be careful because the log btree merging
 * code can write partial results to the fs_root.  This means that a
- * reader can see both cases where new finalized logs should be applied
- * to the old fs items and where old finalized logs have already been
- * applied to the partially merged fs items.  Currently active logged
- * items are always applied on top of all cases.
+ * reader can see both cases where merge input deltas should be applied
+ * to the old fs items and where they have already been applied to the
+ * partially merged fs items.
+ *
+ * Only finalized log trees that are inputs to the current merge cycle
+ * are tracked in the inp_ bucket.  Finalized trees that aren't merge
+ * inputs and active log trees are always applied unconditionally since
+ * they cannot be in fs_root.
 *
 * These cases are differentiated with a combination of sequence numbers
- * in items, the count of contributing xattrs, and a flag
- * differentiating finalized and active logged items.  This lets us
- * recognize all cases, including when finalized logs were merged and
+ * in items and the count of contributing xattrs.  This lets us
+ * recognize all cases, including when merge inputs were merged and
 * deleted the fs item.
 */
 void scoutfs_totl_merge_resolve(struct scoutfs_totl_merging *merg, __u64 *total, __u64 *count)
@@ -75,14 +83,14 @@ void scoutfs_totl_merge_resolve(struct scoutfs_totl_merging *merg, __u64 *total,
 		*count = merg->fs_count;
 	}

-	/* apply finalized logs if they're newer or creating */
-	if (((merg->fs_seq != 0) && (merg->fin_seq > merg->fs_seq)) ||
-	    ((merg->fs_seq == 0) && (merg->fin_count > 0))) {
-		*total += merg->fin_total;
-		*count += merg->fin_count;
+	/* apply merge input deltas if they're newer or creating */
+	if (((merg->fs_seq != 0) && (merg->inp_seq > merg->fs_seq)) ||
+	    ((merg->fs_seq == 0) && (merg->inp_count > 0))) {
+		*total += merg->inp_total;
+		*count += merg->inp_count;
 	}

-	/* always apply active logs which must be newer than fs and finalized */
+	/* always apply non-input finalized and active logs */
 	if (merg->log_seq > 0) {
 		*total += merg->log_total;
 		*count += merg->log_count;
--- a/kmod/src/totl.h
+++ b/kmod/src/totl.h
@@ -7,9 +7,9 @@ struct scoutfs_totl_merging {
 	u64 fs_seq;
 	u64 fs_total;
 	u64 fs_count;
-	u64 fin_seq;
-	u64 fin_total;
-	s64 fin_count;
+	u64 inp_seq;
+	u64 inp_total;
+	s64 inp_count;
 	u64 log_seq;
 	u64 log_total;
 	s64 log_count;
--- a/kmod/src/triggers.c
+++ b/kmod/src/triggers.c
@@ -18,6 +18,7 @@

 #include "super.h"
 #include "triggers.h"
+#include "scoutfs_trace.h"

 /*
 * We have debugfs files we can write to which arm triggers which
@@ -39,10 +40,13 @@ struct scoutfs_triggers {

 static char *names[] = {
 	[SCOUTFS_TRIGGER_BLOCK_REMOVE_STALE] = "block_remove_stale",
+	[SCOUTFS_TRIGGER_LOG_MERGE_FORCE_FINALIZE_OURS] = "log_merge_force_finalize_ours",
 	[SCOUTFS_TRIGGER_SRCH_COMPACT_LOGS_PAD_SAFE] = "srch_compact_logs_pad_safe",
 	[SCOUTFS_TRIGGER_SRCH_FORCE_LOG_ROTATE] = "srch_force_log_rotate",
 	[SCOUTFS_TRIGGER_SRCH_MERGE_STOP_SAFE] = "srch_merge_stop_safe",
 	[SCOUTFS_TRIGGER_STATFS_LOCK_PURGE] = "statfs_lock_purge",
+	[SCOUTFS_TRIGGER_RECLAIM_SKIP_FINALIZE] = "reclaim_skip_finalize",
+	[SCOUTFS_TRIGGER_LOG_MERGE_FORCE_PARTIAL] = "log_merge_force_partial",
 };

 bool scoutfs_trigger_test_and_clear(struct super_block *sb, unsigned int t)
@@ -51,6 +55,7 @@ bool scoutfs_trigger_test_and_clear(struct super_block *sb, unsigned int t)
 	atomic_t *atom;
 	int old;
 	int mem;
+	bool fired;

 	BUG_ON(t >= SCOUTFS_TRIGGER_NR);
 	atom = &triggers->atomics[t];
@@ -64,7 +69,12 @@ bool scoutfs_trigger_test_and_clear(struct super_block *sb, unsigned int t)
 		mem = atomic_cmpxchg(atom, old, 0);
 	} while (mem && mem != old);

-	return !!mem;
+	fired = !!mem;
+
+	if (fired)
+		trace_scoutfs_trigger_fired(sb, names[t]);
+
+	return fired;
 }

 int scoutfs_setup_triggers(struct super_block *sb)
--- a/kmod/src/triggers.h
+++ b/kmod/src/triggers.h
@@ -3,10 +3,13 @@

 enum scoutfs_trigger {
 	SCOUTFS_TRIGGER_BLOCK_REMOVE_STALE,
+	SCOUTFS_TRIGGER_LOG_MERGE_FORCE_FINALIZE_OURS,
 	SCOUTFS_TRIGGER_SRCH_COMPACT_LOGS_PAD_SAFE,
 	SCOUTFS_TRIGGER_SRCH_FORCE_LOG_ROTATE,
 	SCOUTFS_TRIGGER_SRCH_MERGE_STOP_SAFE,
 	SCOUTFS_TRIGGER_STATFS_LOCK_PURGE,
+	SCOUTFS_TRIGGER_RECLAIM_SKIP_FINALIZE,
+	SCOUTFS_TRIGGER_LOG_MERGE_FORCE_PARTIAL,
 	SCOUTFS_TRIGGER_NR,
 };

--- a/kmod/src/volopt.c
+++ b/kmod/src/volopt.c
@@ -52,6 +52,15 @@ static struct volopt_nr_name {
 /* initialized by setup, pointer array is null terminated */
 static struct kobj_attribute volopt_attrs[ARRAY_SIZE(volopt_table)];
 static struct attribute *volopt_attr_ptrs[ARRAY_SIZE(volopt_table) + 1];
+#ifdef KC_KOBJECT_DEFAULT_GROUPS
+static const struct attribute_group volopt_group = {
+	.attrs = volopt_attr_ptrs,
+};
+static const struct attribute_group *volopt_groups[] = {
+	&volopt_group,
+	NULL,
+};
+#endif

 static void get_opt_data(struct kobj_attribute *attr, struct scoutfs_volume_options *volopt,
 			 u64 *bit, __le64 **opt)
@@ -164,7 +173,9 @@ int scoutfs_volopt_setup(struct super_block *sb)
 	BUILD_BUG_ON(ARRAY_SIZE(volopt_table) != ARRAY_SIZE(volopt_attr_ptrs) - 1);
 	volopt_attr_ptrs[i] = NULL;

-	ret = scoutfs_sysfs_create_attrs(sb, &vinf->ssa, volopt_attr_ptrs, "volume_options");
+	ret = scoutfs_sysfs_create_attrs(sb, &vinf->ssa,
+					 KC_KOBJ_DEFAULT_PICK(volopt_groups, volopt_attr_ptrs),
+					 "volume_options");
 	if (ret < 0)
 		goto out;

--- a/kmod/src/wkic.c
+++ b/kmod/src/wkic.c
@@ -95,6 +95,7 @@ struct wkic_info {
 	/* block reading slow path */
 	struct mutex roots_mutex;
 	struct scoutfs_net_roots roots;
+	u64 merge_input_seq;
 	u64 roots_read_seq;
 	ktime_t roots_expire;

@@ -171,7 +172,7 @@ struct wkic_item {
 	u64 seq;
 	unsigned int val_len;
 	u8 flags;
-	u8 val[0] __aligned(ARCH_KMALLOC_MINALIGN); /* totls have native structs */
+	u8 val[] __aligned(ARCH_KMALLOC_MINALIGN); /* totls have native structs */
 };

 static struct wkic_item *witem_container(struct rb_node *node)
@@ -763,7 +764,7 @@ static void fill_page_items(struct super_block *sb, struct wkic_page *wpage, str
 		pg_item->val_len = witem->val_len;
 		pg_item->flags = witem->flags;
 		if (witem->val_len)
-			memcpy(pg_item->val, witem->val, witem->val_len);
+			memcpy(&pg_item->val[0], witem->val, witem->val_len);

 		/* always inserting greatest item into page */
 		rb_link_node(&pg_item->node, parent, node);
@@ -805,29 +806,79 @@ static void free_page_list(struct super_block *sb, struct list_head *list)
 * read_seq number so that we can compare the age of the items in cached
 * pages.  Only one request to refresh the roots is in progress at a
 * time.  This is the slow path that's only used when the cache isn't
- * populated and the roots aren't cached.  The root request is fast
- * enough, especially compared to the resulting item reading IO, that we
- * don't mind hiding it behind a trivial mutex.
+ * populated and the roots aren't cached.
+ *
+ * We read roots directly from the on-disk superblock rather than
+ * requesting them from the server so that we can also read the
+ * log_merge btree from the same superblock.  The merge status item
+ * seq tells us which finalized log trees are inputs to the current
+ * merge, which is needed to correctly resolve totl delta items.
 */
-static int get_roots(struct super_block *sb, struct wkic_info *winf,
-		     struct scoutfs_net_roots *roots_ret, u64 *read_seq, bool force_new)
+static int refresh_roots(struct super_block *sb, struct wkic_info *winf)
+{
+	struct scoutfs_super_block *super;
+	struct scoutfs_log_merge_status *stat;
+	SCOUTFS_BTREE_ITEM_REF(iref);
+	struct scoutfs_key key;
+	int ret;
+
+	super = kmalloc(sizeof(*super), GFP_NOFS);
+	if (!super)
+		return -ENOMEM;
+
+	ret = scoutfs_read_super(sb, super);
+	if (ret < 0)
+		goto out;
+
+	winf->roots = (struct scoutfs_net_roots){
+		.fs_root = super->fs_root,
+		.logs_root = super->logs_root,
+		.srch_root = super->srch_root,
+	};
+
+	winf->merge_input_seq = 0;
+	if (super->log_merge.ref.blkno) {
+		scoutfs_key_set_zeros(&key);
+		key.sk_zone = SCOUTFS_LOG_MERGE_STATUS_ZONE;
+		ret = scoutfs_btree_lookup(sb, &super->log_merge, &key, &iref);
+		if (ret == 0) {
+			if (iref.val_len == sizeof(*stat)) {
+				stat = iref.val;
+				winf->merge_input_seq = le64_to_cpu(stat->seq);
+			} else {
+				ret = -EUCLEAN;
+			}
+			scoutfs_btree_put_iref(&iref);
+		} else if (ret == -ENOENT) {
+			ret = 0;
+		}
+		if (ret < 0)
+			goto out;
+	}
+
+	winf->roots_read_seq++;
+	winf->roots_expire = ktime_add_ms(ktime_get_raw(), WKIC_CACHE_LIFETIME_MS);
+out:
+	kfree(super);
+	return ret;
+}
+
+static int get_roots(struct super_block *sb, struct wkic_info *winf,
+		     struct scoutfs_net_roots *roots_ret, u64 *merge_input_seq,
+		     u64 *read_seq, bool force_new)
 {
-	struct scoutfs_net_roots roots;
 	int ret;

 	mutex_lock(&winf->roots_mutex);

 	if (force_new || ktime_before(winf->roots_expire, ktime_get_raw())) {
-		ret = scoutfs_client_get_roots(sb, &roots);
+		ret = refresh_roots(sb, winf);
 		if (ret < 0)
 			goto out;
-
-		winf->roots = roots;
-		winf->roots_read_seq++;
-		winf->roots_expire = ktime_add_ms(ktime_get_raw(), WKIC_CACHE_LIFETIME_MS);
 	}

 	*roots_ret = winf->roots;
+	*merge_input_seq = winf->merge_input_seq;
 	*read_seq = winf->roots_read_seq;
 	ret = 0;
 out:
@@ -870,24 +921,30 @@ static int insert_read_pages(struct super_block *sb, struct wkic_info *winf,
 	struct scoutfs_key end;
 	struct wkic_page *wpage;
 	LIST_HEAD(pages);
-	u64 read_seq;
+	u64 merge_input_seq;
+	u64 read_seq = 0;
 	int ret;

 	ret = 0;
 retry_stale:
-	ret = get_roots(sb, winf, &roots, &read_seq, ret == -ESTALE);
+	ret = get_roots(sb, winf, &roots, &merge_input_seq, &read_seq, ret == -ESTALE);
 	if (ret < 0)
-		goto out;
+		goto check_stale;

 	start = *range_start;
 	end = *range_end;
-	ret = scoutfs_forest_read_items_roots(sb, &roots, key, range_start, &start, &end,
-					      read_items_cb, &root);
+	ret = scoutfs_forest_read_items_roots(sb, &roots, merge_input_seq, key, range_start,
+					      &start, &end, read_items_cb, &root);
 	trace_scoutfs_wkic_read_items(sb, key, &start, &end);
+check_stale:
 	ret = scoutfs_block_check_stale(sb, ret, &saved, &roots.fs_root.ref, &roots.logs_root.ref);
 	if (ret < 0) {
-		if (ret == -ESTALE)
+		if (ret == -ESTALE) {
+			/* not safe to retry due to delta items, must restart clean */
+			free_item_tree(&root);
+			root = RB_ROOT;
 			goto retry_stale;
+		}
 		goto out;
 	}

@@ -1112,8 +1169,13 @@ int scoutfs_wkic_setup(struct super_block *sb)
 	}

 	winf->sb = sb;
-	KC_INIT_SHRINKER_FUNCS(&winf->shrinker, wkic_shrink_count, wkic_shrink_scan);
-	KC_REGISTER_SHRINKER(&winf->shrinker, "scoutfs-weak_item:" SCSBF, SCSB_ARGS(sb));
+	KC_SETUP_SHRINKER(winf->shrinker, winf, 0, wkic_shrink_count,
+			  wkic_shrink_scan, "scoutfs-weak_item:" SCSBF, SCSB_ARGS(sb));
+	if (KC_SHRINKER_IS_NULL(winf->shrinker)) {
+		debugfs_remove(winf->drop_dentry);
+		kfree(winf);
+		return -ENOMEM;
+	}

 	sbi->wkic_info = winf;
 	return 0;
@@ -1141,7 +1203,7 @@ void scoutfs_wkic_destroy(struct super_block *sb)

 	if (winf) {
 		debugfs_remove(winf->drop_dentry);
-		KC_UNREGISTER_SHRINKER(&winf->shrinker);
+		KC_UNREGISTER_SHRINKER(winf->shrinker);

 		/* trees are in sync so tearing down one frees all pages */
 		rbtree_postorder_for_each_entry_safe(wpage, tmp, &winf->wpage_roots[0], nodes[0]) {
--- a/kmod/src/xattr.c
+++ b/kmod/src/xattr.c
@@ -907,7 +907,7 @@ int scoutfs_xattr_set_locked(struct inode *inode, const char *name, size_t name_

 	/* XXX do these want i_mutex or anything? */
 	inode_inc_iversion(inode);
-	inode->i_ctime = current_time(inode);
+	inode_set_ctime_to_ts(inode, current_time(inode));
 	ret = 0;

 out:
@@ -1265,6 +1265,7 @@ int scoutfs_xattr_drop(struct super_block *sb, u64 ino,
 			ret = parse_indx_key(&tag_key, xat->name, xat->name_len, ino);
 			if (ret < 0)
 				goto out;
+			scoutfs_xattr_set_indx_key_xid(&tag_key, le64_to_cpu(key.skx_id));
 		}

 		if ((tgs.totl || tgs.indx) && locked_zone != tag_key.sk_zone) {
--- a/tests/funcs/filter.sh
+++ b/tests/funcs/filter.sh
@@ -3,7 +3,8 @@
 t_filter_fs()
 {
 	sed -e 's@mnt/test\.[0-9]*@mnt/test@g' \
-	    -e 's@Device: [a-fA-F0-9]*h/[0-9]*d@Device: 0h/0d@g'
+	    -e 's@Device: [a-fA-F0-9]*h/[0-9]*d@Device: 0h/0d@g' \
+	    -e 's@Device: [0-9]*,[0-9]*@Device: 0h/0d@g'
 }

 #
@@ -20,9 +21,6 @@ t_filter_fs()
 # [ 2687.691366] BUG: KASAN: stack-out-of-bounds in get_reg+0x1bc/0x230
 # ...
 # [ 2687.706220] ==================================================================
-# [ 2687.707284] Disabling lock debugging due to kernel taint
-#
-# That final lock debugging message may not be included.
 #
 ignore_harmless_unwind_kasan_stack_oob()
 {
@@ -46,10 +44,6 @@ awk '
 		saved=""
        }
        ( in_soob == 2 && $0 ~ /==================================================================/ ) {
-                in_soob = 3
-                soob_nr = NR
-        }
-        ( in_soob == 3 && NR > soob_nr && $0 !~ /Disabling lock debugging/ ) {
                in_soob = 0
        }
        ( !in_soob ) { print $0 }
@@ -61,6 +55,58 @@ awk '
 '
 }

+#
+# in el97+, XFS can generate a spurious lockdep circular dependency
+# warning about reclaim. Fixed upstream in e.g. v5.7-rc4-129-g6dcde60efd94
+#
+ignore_harmless_xfs_lockdep_warning()
+{
+awk '
+	BEGIN {
+		in_block = 0
+		block_nr = 0
+		buf = ""
+	}
+	( !in_block && $0 ~ /======================================================/ ) {
+		in_block = 1
+		block_nr = NR
+		buf = $0 "\n"
+		next
+	}
+	( in_block == 1 && NR == (block_nr + 1) ) {
+		if (match($0, /WARNING: possible circular locking dependency detected/) != 0) {
+			in_block = 2
+			buf = buf $0 "\n"
+		} else {
+			in_block = 0
+			printf "%s", buf
+			print $0
+			buf = ""
+		}
+		next
+	}
+	( in_block == 2 ) {
+		buf = buf $0 "\n"
+		if ($0 ~ /<\/TASK>/) {
+			if (buf ~ /xfs_(nondir_|dir_)?ilock_class/ && buf ~ /fs_reclaim/) {
+				# known xfs lockdep false positive, discard
+			} else {
+				printf "%s", buf
+			}
+			in_block = 0
+			buf = ""
+		}
+		next
+	}
+	{ print $0 }
+	END {
+		if (buf) {
+			printf "%s", buf
+		}
+	}
+'
+}
+
 #
 # Filter out expected messages.  Putting messages here implies that
 # tests aren't relying on messages to discover failures.. they're
@@ -123,6 +169,9 @@ t_filter_dmesg()
 	re="$re|hrtimer: interrupt took .*"
 	re="$re|clocksource: Long readout interval"

+	# orphan log trees reclaim is handled, not an error
+	re="$re|scoutfs .* reclaiming orphan log trees"
+
 	# fencing tests force unmounts and trigger timeouts
 	re="$re|scoutfs .* forcing unmount"
 	re="$re|scoutfs .* reconnect timed out"
@@ -170,6 +219,13 @@ t_filter_dmesg()
 	# some ci test guests are unresponsive
 	re="$re|longest quorum heartbeat .* delay"

-	egrep -v "($re)" | \
-		ignore_harmless_unwind_kasan_stack_oob
+	# creating block devices may trigger this
+	re="$re|block device autoloading is deprecated and will be removed."
+
+	# lockdep or kasan warnings can cause this
+	re="$re|Disabling lock debugging due to kernel taint"
+
+	grep -v -E "($re)" | \
+		ignore_harmless_unwind_kasan_stack_oob | \
+		ignore_harmless_xfs_lockdep_warning
 }
--- a/tests/funcs/fs.sh
+++ b/tests/funcs/fs.sh
@@ -283,6 +283,30 @@ t_reinsert_remount_all()
 	t_quiet t_mount_all || t_fail "mounting all failed"
 }

+#
+# scratch helpers
+#
+t_scratch_mkfs()
+{
+	scoutfs mkfs -f -Q 0,127.0.0.1,$T_SCRATCH_PORT "$T_EX_META_DEV" "$T_EX_DATA_DEV" "$@" > $T_TMP.mkfs.out 2>&1 || \
+		t_fail "scratch mkfs failed"
+}
+
+t_scratch_mount()
+{
+	mkdir -p "$T_MSCR"
+	mount -t scoutfs -o metadev_path=$T_EX_META_DEV,quorum_slot_nr=0 "$@" "$T_EX_DATA_DEV" "$T_MSCR" || \
+		t_fail "scratch mount failed"
+}
+
+t_scratch_umount()
+{
+	umount "$T_MSCR" || \
+		t_fail "scratch umount failed"
+	rmdir "$T_MSCR"
+}
+
+
 t_trigger_path() {
 	local nr="$1"

@@ -498,3 +522,121 @@ t_restore_all_sysfs_mount_options() {
 		t_set_sysfs_mount_option $i $name "${_saved_opts[$ind]}"
 	done
 }
+
+t_force_log_merge() {
+	local sv=$(t_server_nr)
+	local merges_started
+	local last_merges_started
+	local merges_completed
+	local last_merges_completed
+
+	while true; do
+		last_merges_started=$(t_counter log_merge_start $sv)
+		last_merges_completed=$(t_counter log_merge_complete $sv)
+
+		t_trigger_arm_silent log_merge_force_finalize_ours $sv
+
+		t_sync_seq_index
+
+		while test "$(t_trigger_get log_merge_force_finalize_ours $sv)" == "1"; do
+			sleep .5
+		done
+
+		merges_started=$(t_counter log_merge_start $sv)
+
+		if (( merges_started > last_merges_started )); then
+			merges_completed=$(t_counter log_merge_complete $sv)
+
+			while (( merges_completed == last_merges_completed )); do
+				sleep .5
+				merges_completed=$(t_counter log_merge_complete $sv)
+			done
+			break
+		fi
+	done
+}
+
+declare -A _last_scan
+t_get_orphan_scan_runs() {
+	local i
+
+	for i in $(t_fs_nrs); do
+		_last_scan[$i]=$(t_counter orphan_scan $i)
+	done
+}
+
+t_wait_for_orphan_scan_runs() {
+	local i
+	local scan
+
+	t_get_orphan_scan_runs
+
+	for i in $(t_fs_nrs); do
+		while true; do
+			scan=$(t_counter orphan_scan $i)
+			if (( scan != _last_scan[$i] )); then
+				break
+			fi
+			sleep .5
+		done
+	done
+}
+
+declare -A _last_empty
+t_get_orphan_scan_empty() {
+	local i
+
+	for i in $(t_fs_nrs); do
+		_last_empty[$i]=$(t_counter orphan_scan_empty $i)
+	done
+}
+
+t_wait_for_no_orphans() {
+	local i;
+	local working;
+	local empty;
+
+	t_get_orphan_scan_empty
+
+	while true; do
+		working=0
+
+		t_wait_for_orphan_scan_runs
+
+		for i in $(t_fs_nrs); do
+			empty=$(t_counter orphan_scan_empty $i)
+			if (( empty == _last_empty[$i] )); then
+				(( working++ ))
+			else
+				(( _last_empty[$i] = empty ))
+			fi
+		done
+
+		if (( working == 0 )); then
+			break
+		fi
+
+		sleep 1
+	done
+}
+
+#
+# Repeatedly run the arguments as a command, sleeping in between, until
+# it returns success.  The first argument is a relative timeout in
+# seconds.  The remaining arguments are the command and its arguments.
+#
+# If the timeout expires without the command returning 0 then the test
+# fails.
+#
+t_wait_until_timeout() {
+	local relative="$1"
+	local expire="$((SECONDS + relative))"
+	shift
+
+	while (( SECONDS < expire )); do
+		"$@" && return
+		sleep 1
+	done
+
+	t_fail "command failed for $relative sec: $@"
+}
--- a/tests/funcs/tap.sh
+++ b/tests/funcs/tap.sh
@@ -43,9 +43,14 @@ t_tap_progress()
 	local testname=$1
 	local result=$2

+	local stmsg=""
 	local diff=""
 	local dmsg=""

+	if [[ -s $T_RESULTS/tmp/${testname}/status.msg ]]; then
+		stmsg="1"
+	fi
+
 	if [[ -s "$T_RESULTS/tmp/${testname}/dmesg.new" ]]; then
 		dmsg="1"
 	fi
@@ -61,6 +66,7 @@ t_tap_progress()
 		echo "# ${testname} ** skipped - permitted **"
 	else
 		echo "not ok ${i} - ${testname}"
+
 		case ${result} in
 		101)
 			echo "# ${testname} ** skipped **"
@@ -70,6 +76,13 @@ t_tap_progress()
 			;;
 		esac

+		if [[ -n "${stmsg}" ]]; then
+			echo "#"
+			echo "# status:"
+			echo "#"
+			cat $T_RESULTS/tmp/${testname}/status.msg | sed 's/^/# - /'
+		fi
+
 		if [[ -n "${diff}" ]]; then
 			echo "#"
 			echo "# diff:"
--- a/tests/golden/basic-acl-consistency
+++ b/tests/golden/basic-acl-consistency
@@ -0,0 +1,6 @@
+== make scratch fs
+== create uid/gids
+== set acls and permissions
+== compare output
+== drop caches and compare again
+== cleanup scratch fs
--- a/tests/golden/basic-posix-consistency
+++ b/tests/golden/basic-posix-consistency
@@ -47,7 +47,7 @@ four
 --- dir within dir
 --- overwrite file
 --- can't overwrite non-empty dir
-mv: cannot move '/mnt/test/test/basic-posix-consistency/dir/c/clobber' to '/mnt/test/test/basic-posix-consistency/dir/a/dir': Directory not empty
+mv: cannot overwrite '/mnt/test/test/basic-posix-consistency/dir/a/dir': Directory not empty
 --- can overwrite empty dir
 --- can rename into root
 == path resoluion
--- a/tests/golden/basic-xattr-indx
+++ b/tests/golden/basic-xattr-indx
@@ -0,0 +1,54 @@
+== testing invalid read-xattr-index arguments
+bad index position entry argument 'bad', it must be in the form "a.b.ino" where each value can be prefixed by '0' for octal or '0x' for hex
+scoutfs: read-xattr-index failed: Invalid argument (22)
+bad index position entry argument '1.2', it must be in the form "a.b.ino" where each value can be prefixed by '0' for octal or '0x' for hex
+scoutfs: read-xattr-index failed: Invalid argument (22)
+initial major index position '256' must be between 0 and 255, inclusive.
+scoutfs: read-xattr-index failed: Invalid argument (22)
+first index position 1.2.3 must be less than last index position 0.0.0
+scoutfs: read-xattr-index failed: Invalid argument (22)
+first index position 1.2.0 must be less than last index position 1.1.2
+scoutfs: read-xattr-index failed: Invalid argument (22)
+first index position 2.2.2 must be less than last index position 2.2.1
+scoutfs: read-xattr-index failed: Invalid argument (22)
+== testing invalid names
+setfattr: /mnt/test/test/basic-xattr-indx/invalid: Invalid argument
+setfattr: /mnt/test/test/basic-xattr-indx/invalid: Invalid argument
+setfattr: /mnt/test/test/basic-xattr-indx/invalid: Invalid argument
+setfattr: /mnt/test/test/basic-xattr-indx/invalid: Invalid argument
+setfattr: /mnt/test/test/basic-xattr-indx/invalid: Invalid argument
+setfattr: /mnt/test/test/basic-xattr-indx/invalid: Invalid argument
+setfattr: /mnt/test/test/basic-xattr-indx/invalid: Invalid argument
+setfattr: /mnt/test/test/basic-xattr-indx/invalid: Invalid argument
+setfattr: /mnt/test/test/basic-xattr-indx/invalid: Invalid argument
+setfattr: /mnt/test/test/basic-xattr-indx/invalid: Invalid argument
+setfattr: /mnt/test/test/basic-xattr-indx/invalid: Invalid argument
+setfattr: /mnt/test/test/basic-xattr-indx/invalid: Numerical result out of range
+== testing boundary values
+0.0 found
+255.max found
+== indx xattr must have no value
+setfattr: /mnt/test/test/basic-xattr-indx/noval: Invalid argument
+setfattr: /mnt/test/test/basic-xattr-indx/noval: Invalid argument
+== set indx xattr and verify index entry
+found
+== setting same indx xattr again is a no-op
+found
+== removing non-existent indx xattr succeeds
+setfattr: /mnt/test/test/basic-xattr-indx/file: No such attribute
+still found
+== explicit xattr removal cleans up index entry
+== file deletion cleans up index entry
+found before delete
+== multiple indx xattrs on one file cleaned up by deletion
+entries before delete: 2
+entries after delete: 0
+== partial removal leaves other entries
+300 found
+== multiple files at same index position
+files at same position: 2
+surviving file found
+== cross-mount visibility
+found on mount 1
+== duplicate position deduplication
+entries for same position: 1
--- a/tests/golden/inode-deletion
+++ b/tests/golden/inode-deletion
@@ -17,7 +17,7 @@ ino not found in dseq index
 mount 0 contents after mount 1 rm: contents
 ino found in dseq index
 ino found in dseq index
-stat: cannot stat '/mnt/test/test/inode-deletion/file': No such file or directory
+stat: cannot stat '/mnt/test/test/inode-deletion/badfile': No such file or directory
 ino not found in dseq index
 ino not found in dseq index
 == lots of deletions use one open map
--- a/tests/golden/lock-rever-invalidate
+++ b/tests/golden/lock-rever-invalidate
--- a/tests/golden/orphan-log-trees
+++ b/tests/golden/orphan-log-trees
@@ -0,0 +1,3 @@
+== create orphan log_trees entry via trigger
+== verify orphan is reclaimed and merge completes
+== verify orphan reclaim was logged
--- a/tests/golden/punch-offline
+++ b/tests/golden/punch-offline
@@ -0,0 +1,460 @@
+== missing options should fail ==
+punch-offline: must provide offset
+Try `punch-offline --help' or `punch-offline --usage' for more information.
+punch-offline: must provide length
+Try `punch-offline --help' or `punch-offline --usage' for more information.
+punch-offline: must provide data_version
+Try `punch-offline --help' or `punch-offline --usage' for more information.
+== can't hole punch dir or special ==
+failed to open '/mnt/test.0/test/punch-offline/dir': Is a directory (21)
+scoutfs: punch-offline failed: Is a directory (21)
+== punching an empty file does nothing ==
+== punch outside of i_size does nothing ==
+== can't hole punch online extent ==
+0: offset: 0 length: 4096 flags: ..L
+extents: 1
+punch_offline ioctl failed: Invalid argument (22)
+scoutfs: punch-offline failed: Invalid argument (22)
+0: offset: 0 length: 4096 flags: ..L
+extents: 1
+== can't hole punch unwritten extent ==
+0: offset: 0 length: 12288 flags: .UL
+extents: 1
+punch_offline ioctl failed: Invalid argument (22)
+scoutfs: punch-offline failed: Invalid argument (22)
+0: offset: 0 length: 12288 flags: .UL
+extents: 1
+== hole punch offline extent ==
+0: offset: 0 length: 12288 flags: O.L
+extents: 1
+0: offset: 0 length: 4096 flags: O..
+1: offset: 8192 length: 4096 flags: O.L
+extents: 2
+== can't hole punch non-aligned bsz offset or len ==
+0: offset: 0 length: 12288 flags: O.L
+extents: 1
+punch_offline ioctl failed: Value too large for defined data type (75)
+scoutfs: punch-offline failed: Value too large for defined data type (75)
+punch_offline ioctl failed: Value too large for defined data type (75)
+scoutfs: punch-offline failed: Value too large for defined data type (75)
+punch_offline ioctl failed: Value too large for defined data type (75)
+scoutfs: punch-offline failed: Value too large for defined data type (75)
+punch_offline ioctl failed: Value too large for defined data type (75)
+scoutfs: punch-offline failed: Value too large for defined data type (75)
+punch_offline ioctl failed: Value too large for defined data type (75)
+scoutfs: punch-offline failed: Value too large for defined data type (75)
+punch_offline ioctl failed: Value too large for defined data type (75)
+scoutfs: punch-offline failed: Value too large for defined data type (75)
+0: offset: 0 length: 12288 flags: O.L
+extents: 1
+== can't hole punch mismatched data_version ==
+0: offset: 0 length: 12288 flags: O.L
+extents: 1
+punch_offline ioctl failed: Stale file handle (116)
+scoutfs: punch-offline failed: Stale file handle (116)
+punch_offline ioctl failed: Stale file handle (116)
+scoutfs: punch-offline failed: Stale file handle (116)
+punch_offline ioctl failed: Stale file handle (116)
+scoutfs: punch-offline failed: Stale file handle (116)
+0: offset: 0 length: 12288 flags: O.L
+extents: 1
+== Punch hole crossing multiple extents ==
+0: offset: 0 length: 7 flags: O.L
+extents: 1
+0: offset: 0 length: 1 flags: O..
+1: offset: 2 length: 1 flags: O..
+2: offset: 4 length: 1 flags: O..
+3: offset: 6 length: 1 flags: O.L
+extents: 4
+0: offset: 0 length: 1 flags: O..
+1: offset: 6 length: 1 flags: O.L
+extents: 2
+== punch hole starting at a hole ==
+0: offset: 0 length: 7 flags: O.L
+extents: 1
+0: offset: 0 length: 1 flags: O..
+1: offset: 2 length: 1 flags: O..
+2: offset: 4 length: 1 flags: O..
+3: offset: 6 length: 1 flags: O.L
+extents: 4
+0: offset: 0 length: 1 flags: O..
+1: offset: 6 length: 1 flags: O.L
+extents: 2
+== large punch ==
+0: offset: 0 length: 1572864 flags: O.L
+extents: 1
+0: offset: 0 length: 134123 flags: O..
+1: offset: 202466 length: 264807 flags: O..
+2: offset: 535616 length: 199007 flags: O..
+3: offset: 802966 length: 769898 flags: O.L
+extents: 4
+== overlapping punches with lots of extents ==
+0: offset: 0 length: 4194304 flags: O.L
+extents: 1
+extents: 512
+extents: 505
+extents: 378
+extents: 252
+0: offset: 0 length: 4096 flags: O..
+1: offset: 8192 length: 4096 flags: O..
+2: offset: 32768 length: 4096 flags: O..
+3: offset: 40960 length: 4096 flags: O..
+4: offset: 65536 length: 4096 flags: O..
+5: offset: 73728 length: 4096 flags: O..
+6: offset: 98304 length: 4096 flags: O..
+7: offset: 106496 length: 4096 flags: O..
+8: offset: 196608 length: 4096 flags: O..
+9: offset: 204800 length: 4096 flags: O..
+10: offset: 229376 length: 4096 flags: O..
+11: offset: 237568 length: 4096 flags: O..
+12: offset: 262144 length: 4096 flags: O..
+13: offset: 270336 length: 4096 flags: O..
+14: offset: 294912 length: 4096 flags: O..
+15: offset: 303104 length: 4096 flags: O..
+16: offset: 327680 length: 4096 flags: O..
+17: offset: 335872 length: 4096 flags: O..
+18: offset: 360448 length: 4096 flags: O..
+19: offset: 368640 length: 4096 flags: O..
+20: offset: 393216 length: 4096 flags: O..
+21: offset: 401408 length: 4096 flags: O..
+22: offset: 425984 length: 4096 flags: O..
+23: offset: 434176 length: 4096 flags: O..
+24: offset: 458752 length: 4096 flags: O..
+25: offset: 466944 length: 4096 flags: O..
+26: offset: 491520 length: 4096 flags: O..
+27: offset: 499712 length: 4096 flags: O..
+28: offset: 720896 length: 4096 flags: O..
+29: offset: 729088 length: 4096 flags: O..
+30: offset: 753664 length: 4096 flags: O..
+31: offset: 761856 length: 4096 flags: O..
+32: offset: 786432 length: 4096 flags: O..
+33: offset: 794624 length: 4096 flags: O..
+34: offset: 819200 length: 4096 flags: O..
+35: offset: 827392 length: 4096 flags: O..
+36: offset: 851968 length: 4096 flags: O..
+37: offset: 860160 length: 4096 flags: O..
+38: offset: 884736 length: 4096 flags: O..
+39: offset: 892928 length: 4096 flags: O..
+40: offset: 917504 length: 4096 flags: O..
+41: offset: 925696 length: 4096 flags: O..
+42: offset: 950272 length: 4096 flags: O..
+43: offset: 958464 length: 4096 flags: O..
+44: offset: 983040 length: 4096 flags: O..
+45: offset: 991232 length: 4096 flags: O..
+46: offset: 1015808 length: 4096 flags: O..
+47: offset: 1024000 length: 4096 flags: O..
+48: offset: 1048576 length: 4096 flags: O..
+49: offset: 1056768 length: 4096 flags: O..
+50: offset: 1081344 length: 4096 flags: O..
+51: offset: 1089536 length: 4096 flags: O..
+52: offset: 1114112 length: 4096 flags: O..
+53: offset: 1122304 length: 4096 flags: O..
+54: offset: 1146880 length: 4096 flags: O..
+55: offset: 1155072 length: 4096 flags: O..
+56: offset: 1179648 length: 4096 flags: O..
+57: offset: 1187840 length: 4096 flags: O..
+58: offset: 1212416 length: 4096 flags: O..
+59: offset: 1220608 length: 4096 flags: O..
+60: offset: 1245184 length: 4096 flags: O..
+61: offset: 1253376 length: 4096 flags: O..
+62: offset: 1277952 length: 4096 flags: O..
+63: offset: 1286144 length: 4096 flags: O..
+64: offset: 1310720 length: 4096 flags: O..
+65: offset: 1318912 length: 4096 flags: O..
+66: offset: 1343488 length: 4096 flags: O..
+67: offset: 1351680 length: 4096 flags: O..
+68: offset: 1376256 length: 4096 flags: O..
+69: offset: 1384448 length: 4096 flags: O..
+70: offset: 1409024 length: 4096 flags: O..
+71: offset: 1417216 length: 4096 flags: O..
+72: offset: 1441792 length: 4096 flags: O..
+73: offset: 1449984 length: 4096 flags: O..
+74: offset: 1474560 length: 4096 flags: O..
+75: offset: 1482752 length: 4096 flags: O..
+76: offset: 1507328 length: 4096 flags: O..
+77: offset: 1515520 length: 4096 flags: O..
+78: offset: 1540096 length: 4096 flags: O..
+79: offset: 1548288 length: 4096 flags: O..
+80: offset: 1572864 length: 4096 flags: O..
+81: offset: 1581056 length: 4096 flags: O..
+82: offset: 1605632 length: 4096 flags: O..
+83: offset: 1613824 length: 4096 flags: O..
+84: offset: 1638400 length: 4096 flags: O..
+85: offset: 1646592 length: 4096 flags: O..
+86: offset: 1671168 length: 4096 flags: O..
+87: offset: 1679360 length: 4096 flags: O..
+88: offset: 1703936 length: 4096 flags: O..
+89: offset: 1712128 length: 4096 flags: O..
+90: offset: 1736704 length: 4096 flags: O..
+91: offset: 1744896 length: 4096 flags: O..
+92: offset: 1769472 length: 4096 flags: O..
+93: offset: 1777664 length: 4096 flags: O..
+94: offset: 1802240 length: 4096 flags: O..
+95: offset: 1810432 length: 4096 flags: O..
+96: offset: 1835008 length: 4096 flags: O..
+97: offset: 1843200 length: 4096 flags: O..
+98: offset: 1867776 length: 4096 flags: O..
+99: offset: 1875968 length: 4096 flags: O..
+100: offset: 1900544 length: 4096 flags: O..
+101: offset: 1908736 length: 4096 flags: O..
+102: offset: 1933312 length: 4096 flags: O..
+103: offset: 1941504 length: 4096 flags: O..
+104: offset: 1966080 length: 4096 flags: O..
+105: offset: 1974272 length: 4096 flags: O..
+106: offset: 1998848 length: 4096 flags: O..
+107: offset: 2007040 length: 4096 flags: O..
+108: offset: 2031616 length: 4096 flags: O..
+109: offset: 2039808 length: 4096 flags: O..
+110: offset: 2064384 length: 4096 flags: O..
+111: offset: 2072576 length: 4096 flags: O..
+112: offset: 2097152 length: 4096 flags: O..
+113: offset: 2105344 length: 4096 flags: O..
+114: offset: 2129920 length: 4096 flags: O..
+115: offset: 2138112 length: 4096 flags: O..
+116: offset: 2162688 length: 4096 flags: O..
+117: offset: 2170880 length: 4096 flags: O..
+118: offset: 2195456 length: 4096 flags: O..
+119: offset: 2203648 length: 4096 flags: O..
+120: offset: 2228224 length: 4096 flags: O..
+121: offset: 2236416 length: 4096 flags: O..
+122: offset: 2260992 length: 4096 flags: O..
+123: offset: 2269184 length: 4096 flags: O..
+124: offset: 2293760 length: 4096 flags: O..
+125: offset: 2301952 length: 4096 flags: O..
+126: offset: 2326528 length: 4096 flags: O..
+127: offset: 2334720 length: 4096 flags: O..
+128: offset: 2359296 length: 4096 flags: O..
+129: offset: 2367488 length: 4096 flags: O..
+130: offset: 2392064 length: 4096 flags: O..
+131: offset: 2400256 length: 4096 flags: O..
+132: offset: 2424832 length: 4096 flags: O..
+133: offset: 2433024 length: 4096 flags: O..
+134: offset: 2457600 length: 4096 flags: O..
+135: offset: 2465792 length: 4096 flags: O..
+136: offset: 2490368 length: 4096 flags: O..
+137: offset: 2498560 length: 4096 flags: O..
+138: offset: 2523136 length: 4096 flags: O..
+139: offset: 2531328 length: 4096 flags: O..
+140: offset: 2555904 length: 4096 flags: O..
+141: offset: 2564096 length: 4096 flags: O..
+142: offset: 2588672 length: 4096 flags: O..
+143: offset: 2596864 length: 4096 flags: O..
+144: offset: 2621440 length: 4096 flags: O..
+145: offset: 2629632 length: 4096 flags: O..
+146: offset: 2654208 length: 4096 flags: O..
+147: offset: 2662400 length: 4096 flags: O..
+148: offset: 2686976 length: 4096 flags: O..
+149: offset: 2695168 length: 4096 flags: O..
+150: offset: 2719744 length: 4096 flags: O..
+151: offset: 2727936 length: 4096 flags: O..
+152: offset: 2752512 length: 4096 flags: O..
+153: offset: 2760704 length: 4096 flags: O..
+154: offset: 2785280 length: 4096 flags: O..
+155: offset: 2793472 length: 4096 flags: O..
+156: offset: 2818048 length: 4096 flags: O..
+157: offset: 2826240 length: 4096 flags: O..
+158: offset: 2850816 length: 4096 flags: O..
+159: offset: 2859008 length: 4096 flags: O..
+160: offset: 2883584 length: 4096 flags: O..
+161: offset: 2891776 length: 4096 flags: O..
+162: offset: 2916352 length: 4096 flags: O..
+163: offset: 2924544 length: 4096 flags: O..
+164: offset: 2949120 length: 4096 flags: O..
+165: offset: 2957312 length: 4096 flags: O..
+166: offset: 2981888 length: 4096 flags: O..
+167: offset: 2990080 length: 4096 flags: O..
+168: offset: 3014656 length: 4096 flags: O..
+169: offset: 3022848 length: 4096 flags: O..
+170: offset: 3047424 length: 4096 flags: O..
+171: offset: 3055616 length: 4096 flags: O..
+172: offset: 3080192 length: 4096 flags: O..
+173: offset: 3088384 length: 4096 flags: O..
+174: offset: 3112960 length: 4096 flags: O..
+175: offset: 3121152 length: 4096 flags: O..
+176: offset: 3145728 length: 4096 flags: O..
+177: offset: 3153920 length: 4096 flags: O..
+178: offset: 3178496 length: 4096 flags: O..
+179: offset: 3186688 length: 4096 flags: O..
+180: offset: 3211264 length: 4096 flags: O..
+181: offset: 3219456 length: 4096 flags: O..
+182: offset: 3244032 length: 4096 flags: O..
+183: offset: 3252224 length: 4096 flags: O..
+184: offset: 3276800 length: 4096 flags: O..
+185: offset: 3284992 length: 4096 flags: O..
+186: offset: 3309568 length: 4096 flags: O..
+187: offset: 3317760 length: 4096 flags: O..
+188: offset: 3342336 length: 4096 flags: O..
+189: offset: 3350528 length: 4096 flags: O..
+190: offset: 3375104 length: 4096 flags: O..
+191: offset: 3383296 length: 4096 flags: O..
+192: offset: 3407872 length: 4096 flags: O..
+193: offset: 3416064 length: 4096 flags: O..
+194: offset: 3440640 length: 4096 flags: O..
+195: offset: 3448832 length: 4096 flags: O..
+196: offset: 3473408 length: 4096 flags: O..
+197: offset: 3481600 length: 4096 flags: O..
+198: offset: 3506176 length: 4096 flags: O..
+199: offset: 3514368 length: 4096 flags: O..
+200: offset: 3538944 length: 4096 flags: O..
+201: offset: 3547136 length: 4096 flags: O..
+202: offset: 3571712 length: 4096 flags: O..
+203: offset: 3579904 length: 4096 flags: O..
+204: offset: 3604480 length: 4096 flags: O..
+205: offset: 3612672 length: 4096 flags: O..
+206: offset: 3637248 length: 4096 flags: O..
+207: offset: 3645440 length: 4096 flags: O..
+208: offset: 3670016 length: 4096 flags: O..
+209: offset: 3678208 length: 4096 flags: O..
+210: offset: 3702784 length: 4096 flags: O..
+211: offset: 3710976 length: 4096 flags: O..
+212: offset: 3735552 length: 4096 flags: O..
+213: offset: 3743744 length: 4096 flags: O..
+214: offset: 3768320 length: 4096 flags: O..
+215: offset: 3776512 length: 4096 flags: O..
+216: offset: 3801088 length: 4096 flags: O..
+217: offset: 3809280 length: 4096 flags: O..
+218: offset: 3833856 length: 4096 flags: O..
+219: offset: 3842048 length: 4096 flags: O..
+220: offset: 3866624 length: 4096 flags: O..
+221: offset: 3874816 length: 4096 flags: O..
+222: offset: 3899392 length: 4096 flags: O..
+223: offset: 3907584 length: 4096 flags: O..
+224: offset: 3932160 length: 4096 flags: O..
+225: offset: 3940352 length: 4096 flags: O..
+226: offset: 3964928 length: 4096 flags: O..
+227: offset: 3973120 length: 4096 flags: O..
+228: offset: 3997696 length: 4096 flags: O..
+229: offset: 4005888 length: 4096 flags: O..
+230: offset: 4030464 length: 4096 flags: O..
+231: offset: 4038656 length: 4096 flags: O..
+232: offset: 4063232 length: 4096 flags: O..
+233: offset: 4071424 length: 4096 flags: O..
+234: offset: 4096000 length: 4096 flags: O..
+235: offset: 4104192 length: 4096 flags: O..
+236: offset: 4128768 length: 4096 flags: O..
+237: offset: 4136960 length: 4096 flags: O..
+238: offset: 4161536 length: 4096 flags: O..
+239: offset: 4169728 length: 4096 flags: O.L
+extents: 240
+0: offset: 0 length: 1 flags: O..
+1: offset: 8 length: 1 flags: O..
+2: offset: 16 length: 1 flags: O..
+3: offset: 24 length: 1 flags: O..
+4: offset: 48 length: 1 flags: O..
+5: offset: 56 length: 1 flags: O..
+6: offset: 64 length: 1 flags: O..
+7: offset: 72 length: 1 flags: O..
+8: offset: 80 length: 1 flags: O..
+9: offset: 88 length: 1 flags: O..
+10: offset: 96 length: 1 flags: O..
+11: offset: 104 length: 1 flags: O..
+12: offset: 112 length: 1 flags: O..
+13: offset: 120 length: 1 flags: O..
+14: offset: 176 length: 1 flags: O..
+15: offset: 184 length: 1 flags: O..
+16: offset: 192 length: 1 flags: O..
+17: offset: 200 length: 1 flags: O..
+18: offset: 208 length: 1 flags: O..
+19: offset: 216 length: 1 flags: O..
+20: offset: 224 length: 1 flags: O..
+21: offset: 232 length: 1 flags: O..
+22: offset: 240 length: 1 flags: O..
+23: offset: 248 length: 1 flags: O..
+24: offset: 256 length: 1 flags: O..
+25: offset: 264 length: 1 flags: O..
+26: offset: 272 length: 1 flags: O..
+27: offset: 280 length: 1 flags: O..
+28: offset: 288 length: 1 flags: O..
+29: offset: 296 length: 1 flags: O..
+30: offset: 304 length: 1 flags: O..
+31: offset: 312 length: 1 flags: O..
+32: offset: 320 length: 1 flags: O..
+33: offset: 328 length: 1 flags: O..
+34: offset: 336 length: 1 flags: O..
+35: offset: 344 length: 1 flags: O..
+36: offset: 352 length: 1 flags: O..
+37: offset: 360 length: 1 flags: O..
+38: offset: 368 length: 1 flags: O..
+39: offset: 376 length: 1 flags: O..
+40: offset: 384 length: 1 flags: O..
+41: offset: 392 length: 1 flags: O..
+42: offset: 400 length: 1 flags: O..
+43: offset: 408 length: 1 flags: O..
+44: offset: 416 length: 1 flags: O..
+45: offset: 424 length: 1 flags: O..
+46: offset: 432 length: 1 flags: O..
+47: offset: 440 length: 1 flags: O..
+48: offset: 448 length: 1 flags: O..
+49: offset: 456 length: 1 flags: O..
+50: offset: 464 length: 1 flags: O..
+51: offset: 472 length: 1 flags: O..
+52: offset: 480 length: 1 flags: O..
+53: offset: 488 length: 1 flags: O..
+54: offset: 496 length: 1 flags: O..
+55: offset: 504 length: 1 flags: O..
+56: offset: 512 length: 1 flags: O..
+57: offset: 520 length: 1 flags: O..
+58: offset: 528 length: 1 flags: O..
+59: offset: 536 length: 1 flags: O..
+60: offset: 544 length: 1 flags: O..
+61: offset: 552 length: 1 flags: O..
+62: offset: 560 length: 1 flags: O..
+63: offset: 568 length: 1 flags: O..
+64: offset: 576 length: 1 flags: O..
+65: offset: 584 length: 1 flags: O..
+66: offset: 592 length: 1 flags: O..
+67: offset: 600 length: 1 flags: O..
+68: offset: 608 length: 1 flags: O..
+69: offset: 616 length: 1 flags: O..
+70: offset: 624 length: 1 flags: O..
+71: offset: 632 length: 1 flags: O..
+72: offset: 640 length: 1 flags: O..
+73: offset: 648 length: 1 flags: O..
+74: offset: 656 length: 1 flags: O..
+75: offset: 664 length: 1 flags: O..
+76: offset: 672 length: 1 flags: O..
+77: offset: 680 length: 1 flags: O..
+78: offset: 688 length: 1 flags: O..
+79: offset: 696 length: 1 flags: O..
+80: offset: 704 length: 1 flags: O..
+81: offset: 712 length: 1 flags: O..
+82: offset: 720 length: 1 flags: O..
+83: offset: 728 length: 1 flags: O..
+84: offset: 736 length: 1 flags: O..
+85: offset: 744 length: 1 flags: O..
+86: offset: 752 length: 1 flags: O..
+87: offset: 760 length: 1 flags: O..
+88: offset: 768 length: 1 flags: O..
+89: offset: 776 length: 1 flags: O..
+90: offset: 784 length: 1 flags: O..
+91: offset: 792 length: 1 flags: O..
+92: offset: 800 length: 1 flags: O..
+93: offset: 808 length: 1 flags: O..
+94: offset: 816 length: 1 flags: O..
+95: offset: 824 length: 1 flags: O..
+96: offset: 832 length: 1 flags: O..
+97: offset: 840 length: 1 flags: O..
+98: offset: 848 length: 1 flags: O..
+99: offset: 856 length: 1 flags: O..
+100: offset: 864 length: 1 flags: O..
+101: offset: 872 length: 1 flags: O..
+102: offset: 880 length: 1 flags: O..
+103: offset: 888 length: 1 flags: O..
+104: offset: 896 length: 1 flags: O..
+105: offset: 904 length: 1 flags: O..
+106: offset: 912 length: 1 flags: O..
+107: offset: 920 length: 1 flags: O..
+108: offset: 928 length: 1 flags: O..
+109: offset: 936 length: 1 flags: O..
+110: offset: 944 length: 1 flags: O..
+111: offset: 952 length: 1 flags: O..
+112: offset: 960 length: 1 flags: O..
+113: offset: 968 length: 1 flags: O..
+114: offset: 976 length: 1 flags: O..
+115: offset: 984 length: 1 flags: O..
+116: offset: 992 length: 1 flags: O..
+117: offset: 1000 length: 1 flags: O..
+118: offset: 1008 length: 1 flags: O..
+119: offset: 1016 length: 1 flags: O.L
+extents: 120
+extents: 0
--- a/tests/golden/totl-merge-read
+++ b/tests/golden/totl-merge-read
@@ -0,0 +1,3 @@
+== setup
+expected 4681
+== cleanup
--- a/tests/run-tests.sh
+++ b/tests/run-tests.sh
@@ -301,7 +301,7 @@ fi
 # include everything by default
 test -z "$T_INCLUDE" && T_INCLUDE="-e '.*'"
 # (quickly) exclude nothing by default
-test -z "$T_EXCLUDE" && T_EXCLUDE="-e '\Zx'"
+test -z "$T_EXCLUDE" && T_EXCLUDE="-e '^$'"

 # eval to strip re ticks but not expand
 tests=$(grep -v "^#" $T_SEQUENCE |
@@ -400,7 +400,8 @@ if [ -n "$T_INSMOD" ]; then
 fi

 if [ -n "$T_TRACE_MULT" ]; then
-	orig_trace_size=$(cat /sys/kernel/debug/tracing/buffer_size_kb)
+#	orig_trace_size=$(cat /sys/kernel/debug/tracing/buffer_size_kb)
+	orig_trace_size=1408
 	mult_trace_size=$((orig_trace_size * T_TRACE_MULT))
 	msg "increasing trace buffer size from $orig_trace_size KiB to $mult_trace_size KiB"
 	echo $mult_trace_size > /sys/kernel/debug/tracing/buffer_size_kb
@@ -504,7 +505,10 @@ crash_monitor()
 		fi

 		if [ "$bad" != 0 ]; then
-			echo "run-tests monitor triggering crash"
+			echo "run-tests monitor syncing and triggering crash"
+			# hail mary, the sync could well hang
+			(echo s > /proc/sysrq-trigger) &
+			sleep 5
 			echo c > /proc/sysrq-trigger
 			exit 1
 		fi
@@ -624,6 +628,9 @@ for t in $tests; do
 		cmd rm -rf "$T_TMPDIR"
 		cmd mkdir -p "$T_TMPDIR"

+		# assign scratch mount point in temporary dir
+		T_MSCR="$T_TMPDIR/scratch"
+
 		# create a test name dir in the fs, clean up old data as needed
 		T_DS=""
 		for i in $(seq 0 $((T_NR_MOUNTS - 1))); do
@@ -687,8 +694,8 @@ for t in $tests; do
 		if [ "$sts" == "$T_PASS_STATUS" ]; then
 			dmesg | t_filter_dmesg > "$T_TMPDIR/dmesg.after"
 			diff --old-line-format="" --unchanged-line-format="" \
-				"$T_TMPDIR/dmesg.before" "$T_TMPDIR/dmesg.after" > \
-				"$T_TMPDIR/dmesg.new"
+				"$T_TMPDIR/dmesg.before" "$T_TMPDIR/dmesg.after" | \
+				grep -v '^$' > "$T_TMPDIR/dmesg.new"

 			if [ -s "$T_TMPDIR/dmesg.new" ]; then
 				message="unexpected messages in dmesg"
--- a/tests/sequence
+++ b/tests/sequence
@@ -2,6 +2,7 @@ export-get-name-parent.sh
 basic-block-counts.sh
 basic-bad-mounts.sh
 basic-posix-acl.sh
+basic-acl-consistency.sh
 inode-items-updated.sh
 simple-inode-index.sh
 simple-staging.sh
@@ -10,6 +11,7 @@ simple-readdir.sh
 get-referring-entries.sh
 fallocate.sh
 basic-truncate.sh
+punch-offline.sh
 data-prealloc.sh
 setattr_more.sh
 offline-extent-waiting.sh
@@ -24,7 +26,9 @@ srch-basic-functionality.sh
 simple-xattr-unit.sh
 retention-basic.sh
 totl-xattr-tag.sh
+basic-xattr-indx.sh
 quota.sh
+totl-merge-read.sh
 lock-refleak.sh
 lock-shrink-consistency.sh
 lock-shrink-read-race.sh
@@ -48,6 +52,7 @@ setup-error-teardown.sh
 resize-devices.sh
 change-devices.sh
 fence-and-reclaim.sh
+orphan-log-trees.sh
 quorum-heartbeat-timeout.sh
 orphan-inodes.sh
 mount-unmount-race.sh
--- a/tests/tests/basic-acl-consistency.sh
+++ b/tests/tests/basic-acl-consistency.sh
@@ -0,0 +1,117 @@
+
+#
+# Test basic clustered posix acl consistency.
+#
+
+t_require_commands getfacl setfacl
+
+GETFACL="getfacl --absolute-names"
+
+filter_scratch() {
+	sed "s@$T_MSCR@t_mscr@g"
+}
+
+acl_compare()
+{
+	diff -u - <($GETFACL $T_MSCR/data/dir_a/dir_b | filter_scratch) <<EOF1
+# file: t_mscr/data/dir_a/dir_b
+# owner: t_usr_3
+# group: t_grp_3
+# flags: -s-
+user::rwx
+group::rwx
+group:t_grp_2:r-x
+mask::rwx
+other::---
+default:user::rwx
+default:group::rwx
+default:group:t_grp_2:r-x
+default:group:t_grp_3:rwx
+default:mask::rwx
+default:other::---
+
+EOF1
+
+	test $? -eq 0 || t_fail "dir_b differs"
+
+	diff -u - <($GETFACL -p $T_MSCR/data/dir_a/dir_b/dir_c/dir_d | filter_scratch) <<EOF3
+# file: t_mscr/data/dir_a/dir_b/dir_c/dir_d
+# owner: t_usr_1
+# group: t_grp_1
+# flags: -s-
+user::rwx
+group::rwx
+group:t_grp_2:r-x
+mask::rwx
+other::---
+default:user::rwx
+default:group::rwx
+default:group:t_grp_2:r-x
+default:group:t_grp_3:rwx
+default:mask::rwx
+default:other::---
+
+EOF3
+	test $? -eq 0 || t_fail "dir_d differs"
+
+	diff -u - <($GETFACL $T_MSCR/data/dir_a/dir_b/dir_c | filter_scratch) <<EOF2
+# file: t_mscr/data/dir_a/dir_b/dir_c
+# owner: t_usr_3
+# group: t_grp_2
+# flags: -s-
+user::rwx
+group::rwx
+group:t_grp_2:r-x
+mask::rwx
+other::---
+default:user::rwx
+default:group::rwx
+default:group:t_grp_2:r-x
+default:group:t_grp_3:rwx
+default:mask::rwx
+default:other::---
+
+EOF2
+	test $? -eq 0 || t_fail "dir_c differs"
+}
+echo "== make scratch fs"
+t_scratch_mkfs
+t_scratch_mount
+
+rm -rf $T_MSCR/data
+
+echo "== create uid/gids"
+groupadd -g 7101 t_grp_1 > /dev/null 2>&1
+useradd -g 7101 -u 7101 t_usr_1 > /dev/null 2>&1
+groupadd -g 7102 t_grp_2 > /dev/null 2>&1
+groupadd -g 7103 t_grp_3 > /dev/null 2>&1
+useradd -g 7103 -u 7103 t_usr_3 > /dev/null 2>&1
+
+echo "== set acls and permissions"
+mkdir -p $T_MSCR/data/dir_a/dir_b
+chown t_usr_3:t_grp_3 $T_MSCR/data/dir_a/dir_b
+chmod 2770 $T_MSCR/data/dir_a/dir_b
+setfacl -m g:t_grp_2:rx $T_MSCR/data/dir_a/dir_b
+setfacl -m d:g:t_grp_2:rx $T_MSCR/data/dir_a/dir_b
+setfacl -m d:g:t_grp_3:rwx $T_MSCR/data/dir_a/dir_b
+
+mkdir -p $T_MSCR/data/dir_a/dir_b/dir_c
+chown t_usr_3:t_grp_2 $T_MSCR/data/dir_a/dir_b/dir_c
+setfacl -x g:t_grp_3 $T_MSCR/data/dir_a/dir_b/dir_c
+
+mkdir -p $T_MSCR/data/dir_a/dir_b/dir_c/dir_d
+chown t_usr_1:t_grp_1 $T_MSCR/data/dir_a/dir_b/dir_c/dir_d
+setfacl -x g:t_grp_3 $T_MSCR/data/dir_a/dir_b/dir_c/dir_d
+
+echo "== compare output"
+acl_compare
+
+echo "== drop caches and compare again"
+sync
+echo 3 > /proc/sys/vm/drop_caches
+acl_compare
+
+echo "== cleanup scratch fs"
+t_scratch_umount
+
+t_pass
--- a/tests/tests/basic-bad-mounts.sh
+++ b/tests/tests/basic-bad-mounts.sh
@@ -12,25 +12,22 @@ mount_fail()
 }

 echo "== prepare devices, mount point, and logs"
-SCR="$T_TMPDIR/mnt.scratch"
-mkdir -p "$SCR"
+t_scratch_mkfs
 > $T_TMP.mount.out
-scoutfs mkfs -f -Q 0,127.0.0.1,$T_SCRATCH_PORT "$T_EX_META_DEV" "$T_EX_DATA_DEV" > $T_TMP.mkfs.out 2>&1 \
-	|| t_fail "mkfs failed"

 echo "== bad devices, bad options"
-mount_fail -o _bad /dev/null /dev/null "$SCR"
+mount_fail -o _bad /dev/null /dev/null "$T_MSCR"

 echo "== swapped devices"
-mount_fail -o metadev_path=$T_EX_DATA_DEV,quorum_slot_nr=0 "$T_EX_META_DEV" "$SCR"
+mount_fail -o metadev_path=$T_EX_DATA_DEV,quorum_slot_nr=0 "$T_EX_META_DEV" "$T_MSCR"

 echo "== both meta devices"
-mount_fail -o metadev_path=$T_EX_META_DEV,quorum_slot_nr=0 "$T_EX_META_DEV" "$SCR"
+mount_fail -o metadev_path=$T_EX_META_DEV,quorum_slot_nr=0 "$T_EX_META_DEV" "$T_MSCR"

 echo "== both data devices"
-mount_fail -o metadev_path=$T_EX_DATA_DEV,quorum_slot_nr=0 "$T_EX_DATA_DEV" "$SCR"
+mount_fail -o metadev_path=$T_EX_DATA_DEV,quorum_slot_nr=0 "$T_EX_DATA_DEV" "$T_MSCR"

 echo "== good volume, bad option and good options"
-mount_fail -o _bad,metadev_path=$T_EX_META_DEV,quorum_slot_nr=0 "$T_EX_DATA_DEV" "$SCR" 
+mount_fail -o _bad,metadev_path=$T_EX_META_DEV,quorum_slot_nr=0 "$T_EX_DATA_DEV" "$T_MSCR"

 t_pass
--- a/tests/tests/basic-posix-consistency.sh
+++ b/tests/tests/basic-posix-consistency.sh
@@ -138,7 +138,9 @@ echo "--- can't overwrite non-empty dir"
 mkdir "$T_D0/dir/a/dir"
 touch "$T_D0/dir/a/dir/nope"
 mkdir "$T_D1/dir/c/clobber"
-mv -T "$T_D1/dir/c/clobber" "$T_D1/dir/a/dir" 2>&1 | t_filter_fs
+mv -T "$T_D1/dir/c/clobber" "$T_D1/dir/a/dir" 2>&1 | \
+		sed "s@mv: cannot move '.*' to '\(.*\)': Directory not empty@mv: cannot overwrite '\1': Directory not empty@g" | \
+		t_filter_fs
 find "$T_D0/dir" -ls 2>&1 | t_filter_fs > "$T_TMP.0"
 find "$T_D1/dir" -ls 2>&1 | t_filter_fs > "$T_TMP.1"
 diff -u "$T_TMP.0" "$T_TMP.1"
--- a/tests/tests/basic-xattr-indx.sh
+++ b/tests/tests/basic-xattr-indx.sh
@@ -0,0 +1,143 @@
+#
+# Test basic .indx. xattr tag functionality and index entry lifecycle
+#
+
+t_require_commands touch rm setfattr scoutfs stat
+t_require_mounts 2
+
+# query index from a specific mount, default mount 0
+read_xattr_index()
+{
+	local nr="${1:-0}"
+	local mnt="$(eval echo \$T_M$nr)"
+	shift
+
+	sync
+	echo 1 > $(t_debugfs_path $nr)/drop_weak_item_cache
+	scoutfs read-xattr-index -p "$mnt" "$@"
+}
+
+MAJOR=5
+MINOR=100
+
+echo "== testing invalid read-xattr-index arguments"
+scoutfs read-xattr-index -p "$T_M0" bad 2>&1
+scoutfs read-xattr-index -p "$T_M0" 1.2 2>&1
+scoutfs read-xattr-index -p "$T_M0" 1.2.3 256.0.0 2>&1
+scoutfs read-xattr-index -p "$T_M0" 1.2.3 0.0.0 2>&1
+scoutfs read-xattr-index -p "$T_M0" 1.2.0 1.1.2 2>&1
+scoutfs read-xattr-index -p "$T_M0" 2.2.2 2.2.1 2>&1
+
+echo "== testing invalid names"
+touch "$T_D0/invalid"
+setfattr -n scoutfs.hide.indx.test.$MAJOR "$T_D0/invalid" 2>&1 | t_filter_fs
+setfattr -n scoutfs.hide.indx.test "$T_D0/invalid" 2>&1 | t_filter_fs
+setfattr -n scoutfs.hide.indx.test.. "$T_D0/invalid" 2>&1 | t_filter_fs
+setfattr -n scoutfs.hide.indx.test..$MINOR "$T_D0/invalid" 2>&1 | t_filter_fs
+setfattr -n scoutfs.hide.indx.test.$MAJOR. "$T_D0/invalid" 2>&1 | t_filter_fs
+setfattr -n scoutfs.hide.indx.test.256.$MINOR "$T_D0/invalid" 2>&1 | t_filter_fs
+setfattr -n scoutfs.hide.indx.test.abc.$MINOR "$T_D0/invalid" 2>&1 | t_filter_fs
+setfattr -n scoutfs.hide.indx.test.$MAJOR.abc "$T_D0/invalid" 2>&1 | t_filter_fs
+setfattr -n scoutfs.hide.indx.test.-1.$MINOR "$T_D0/invalid" 2>&1 | t_filter_fs
+setfattr -n scoutfs.hide.indx.test.$MAJOR.-1 "$T_D0/invalid" 2>&1 | t_filter_fs
+setfattr -n scoutfs.hide.indx.test.18446744073709551616.$MINOR "$T_D0/invalid" 2>&1 | t_filter_fs
+setfattr -n scoutfs.hide.indx.$(printf 'x%.0s' $(seq 1 240)).$MAJOR.$MINOR "$T_D0/invalid" 2>&1 | t_filter_fs
+rm -f "$T_D0/invalid"
+
+echo "== testing boundary values"
+touch "$T_D0/boundary"
+INO=$(stat -c "%i" "$T_D0/boundary")
+setfattr -n scoutfs.hide.indx.test.0.0 "$T_D0/boundary"
+read_xattr_index 0 0.0.0 0.0.-1 | awk '($3 == "'$INO'") {print "0.0 found"}'
+setfattr -x scoutfs.hide.indx.test.0.0 "$T_D0/boundary"
+setfattr -n scoutfs.hide.indx.test.255.18446744073709551615 "$T_D0/boundary"
+read_xattr_index 0 255.0.0 255.-1.-1 | awk '($3 == "'$INO'") {print "255.max found"}'
+setfattr -x scoutfs.hide.indx.test.255.18446744073709551615 "$T_D0/boundary"
+rm -f "$T_D0/boundary"
+
+echo "== indx xattr must have no value"
+touch "$T_D0/noval"
+setfattr -n scoutfs.hide.indx.test.$MAJOR.$MINOR -v "" "$T_D0/noval" 2>&1 | t_filter_fs
+setfattr -n scoutfs.hide.indx.test.$MAJOR.$MINOR -v 0 "$T_D0/noval" 2>&1 | t_filter_fs
+setfattr -n scoutfs.hide.indx.test.$MAJOR.$MINOR -v 1 "$T_D0/noval" 2>&1 | t_filter_fs
+rm -f "$T_D0/noval"
+
+echo "== set indx xattr and verify index entry"
+touch "$T_D0/file"
+INO=$(stat -c "%i" "$T_D0/file")
+setfattr -n scoutfs.hide.indx.test.$MAJOR.$MINOR "$T_D0/file"
+read_xattr_index 0 $MAJOR.0.0 $MAJOR.-1.-1 | awk '($3 == "'$INO'") {print "found"}'
+
+echo "== setting same indx xattr again is a no-op"
+setfattr -n scoutfs.hide.indx.test.$MAJOR.$MINOR "$T_D0/file"
+read_xattr_index 0 $MAJOR.0.0 $MAJOR.-1.-1 | awk '($3 == "'$INO'") {print "found"}'
+
+echo "== removing non-existent indx xattr succeeds"
+setfattr -x scoutfs.hide.indx.nonexistent.$MAJOR.999 "$T_D0/file" 2>&1 | t_filter_fs
+read_xattr_index 0 $MAJOR.0.0 $MAJOR.-1.-1 | awk '($3 == "'$INO'") {print "still found"}'
+
+echo "== explicit xattr removal cleans up index entry"
+setfattr -x scoutfs.hide.indx.test.$MAJOR.$MINOR "$T_D0/file"
+read_xattr_index 0 $MAJOR.0.0 $MAJOR.-1.-1 | awk '($3 == "'$INO'") {print "found orphan"}'
+rm -f "$T_D0/file"
+
+echo "== file deletion cleans up index entry"
+touch "$T_D0/file2"
+INO=$(stat -c "%i" "$T_D0/file2")
+setfattr -n scoutfs.hide.indx.test.$MAJOR.$MINOR "$T_D0/file2"
+read_xattr_index 0 $MAJOR.0.0 $MAJOR.-1.-1 | awk '($3 == "'$INO'") {print "found before delete"}'
+rm -f "$T_D0/file2"
+read_xattr_index 0 $MAJOR.0.0 $MAJOR.-1.-1 | awk '($3 == "'$INO'") {print "found orphan after delete"}'
+
+echo "== multiple indx xattrs on one file cleaned up by deletion"
+touch "$T_D0/file3"
+INO=$(stat -c "%i" "$T_D0/file3")
+setfattr -n scoutfs.hide.indx.a.$MAJOR.200 "$T_D0/file3"
+setfattr -n scoutfs.hide.indx.b.$MAJOR.300 "$T_D0/file3"
+BEFORE=$(read_xattr_index 0 $MAJOR.0.0 $MAJOR.-1.-1 | awk '($3 == "'$INO'")' | wc -l)
+echo "entries before delete: $BEFORE"
+rm -f "$T_D0/file3"
+AFTER=$(read_xattr_index 0 $MAJOR.0.0 $MAJOR.-1.-1 | awk '($3 == "'$INO'")' | wc -l)
+echo "entries after delete: $AFTER"
+
+echo "== partial removal leaves other entries"
+touch "$T_D0/partial"
+INO=$(stat -c "%i" "$T_D0/partial")
+setfattr -n scoutfs.hide.indx.a.$MAJOR.200 "$T_D0/partial"
+setfattr -n scoutfs.hide.indx.b.$MAJOR.300 "$T_D0/partial"
+setfattr -x scoutfs.hide.indx.a.$MAJOR.200 "$T_D0/partial"
+read_xattr_index 0 $MAJOR.200.0 $MAJOR.200.-1 | awk '($3 == "'$INO'") {print "200 found"}'
+read_xattr_index 0 $MAJOR.300.0 $MAJOR.300.-1 | awk '($3 == "'$INO'") {print "300 found"}'
+rm -f "$T_D0/partial"
+
+echo "== multiple files at same index position"
+touch "$T_D0/multi_a" "$T_D0/multi_b"
+INO_A=$(stat -c "%i" "$T_D0/multi_a")
+INO_B=$(stat -c "%i" "$T_D0/multi_b")
+setfattr -n scoutfs.hide.indx.test.$MAJOR.$MINOR "$T_D0/multi_a"
+setfattr -n scoutfs.hide.indx.test.$MAJOR.$MINOR "$T_D0/multi_b"
+COUNT=$(read_xattr_index 0 $MAJOR.$MINOR.0 $MAJOR.$MINOR.-1 | wc -l)
+echo "files at same position: $COUNT"
+rm -f "$T_D0/multi_a"
+read_xattr_index 0 $MAJOR.$MINOR.0 $MAJOR.$MINOR.-1 | awk '($3 == "'$INO_A'") {print "deleted file still found"}'
+read_xattr_index 0 $MAJOR.$MINOR.0 $MAJOR.$MINOR.-1 | awk '($3 == "'$INO_B'") {print "surviving file found"}'
+rm -f "$T_D0/multi_b"
+
+echo "== cross-mount visibility"
+touch "$T_D0/file4"
+INO=$(stat -c "%i" "$T_D0/file4")
+setfattr -n scoutfs.hide.indx.test.$MAJOR.$MINOR "$T_D0/file4"
+read_xattr_index 1 $MAJOR.0.0 $MAJOR.-1.-1 | awk '($3 == "'$INO'") {print "found on mount 1"}'
+rm -f "$T_D0/file4"
+read_xattr_index 1 $MAJOR.0.0 $MAJOR.-1.-1 | awk '($3 == "'$INO'") {print "found orphan on mount 1"}'
+
+echo "== duplicate position deduplication"
+touch "$T_D0/file5"
+INO=$(stat -c "%i" "$T_D0/file5")
+setfattr -n scoutfs.hide.indx.aa.$MAJOR.$MINOR "$T_D0/file5"
+setfattr -n scoutfs.hide.indx.bb.$MAJOR.$MINOR "$T_D0/file5"
+COUNT=$(read_xattr_index 0 $MAJOR.0.0 $MAJOR.-1.-1 | awk '($3 == "'$INO'")' | wc -l)
+echo "entries for same position: $COUNT"
+rm -f "$T_D0/file5"
+
+t_pass
--- a/tests/tests/change-devices.sh
+++ b/tests/tests/change-devices.sh
@@ -11,9 +11,8 @@ truncate -s $sz "$T_TMP.equal"
 truncate -s $large_sz "$T_TMP.large"

 echo "== make scratch fs"
-t_quiet scoutfs mkfs -f -Q 0,127.0.0.1,$T_SCRATCH_PORT "$T_EX_META_DEV" "$T_EX_DATA_DEV"
-SCR="$T_TMPDIR/mnt.scratch"
-mkdir -p "$SCR"
+t_scratch_mkfs
+mkdir -p "$T_MSCR"

 echo "== small new data device fails"
 t_rc scoutfs prepare-empty-data-device "$T_EX_META_DEV" "$T_TMP.small"
@@ -23,13 +22,13 @@ t_rc scoutfs prepare-empty-data-device --check "$T_EX_META_DEV" "$T_TMP.small"
 t_rc scoutfs prepare-empty-data-device --check "$T_EX_META_DEV"

 echo "== preparing while mounted fails"
-mount -t scoutfs -o metadev_path=$T_EX_META_DEV,quorum_slot_nr=0 "$T_EX_DATA_DEV" "$SCR"
+mount -t scoutfs -o metadev_path=$T_EX_META_DEV,quorum_slot_nr=0 "$T_EX_DATA_DEV" "$T_MSCR"
 t_rc scoutfs prepare-empty-data-device "$T_EX_META_DEV" "$T_TMP.equal"
-umount "$SCR"
+umount "$T_MSCR"

 echo "== preparing without recovery fails"
-mount -t scoutfs -o metadev_path=$T_EX_META_DEV,quorum_slot_nr=0 "$T_EX_DATA_DEV" "$SCR"
-umount -f "$SCR"
+mount -t scoutfs -o metadev_path=$T_EX_META_DEV,quorum_slot_nr=0 "$T_EX_DATA_DEV" "$T_MSCR"
+umount -f "$T_MSCR"
 t_rc scoutfs prepare-empty-data-device "$T_EX_META_DEV" "$T_TMP.equal"

 echo "== check sees metadata errors"
@@ -37,16 +36,16 @@ t_rc scoutfs prepare-empty-data-device --check "$T_EX_META_DEV"
 t_rc scoutfs prepare-empty-data-device --check "$T_EX_META_DEV" "$T_TMP.equal"

 echo "== preparing with file data fails"
-mount -t scoutfs -o metadev_path=$T_EX_META_DEV,quorum_slot_nr=0 "$T_EX_DATA_DEV" "$SCR"
-echo hi > "$SCR"/file
-umount "$SCR"
+mount -t scoutfs -o metadev_path=$T_EX_META_DEV,quorum_slot_nr=0 "$T_EX_DATA_DEV" "$T_MSCR"
+echo hi > "$T_MSCR"/file
+umount "$T_MSCR"
 scoutfs print "$T_EX_META_DEV" > "$T_TMP.print"
 t_rc scoutfs prepare-empty-data-device "$T_EX_META_DEV" "$T_TMP.equal"

 echo "== preparing after emptied"
-mount -t scoutfs -o metadev_path=$T_EX_META_DEV,quorum_slot_nr=0 "$T_EX_DATA_DEV" "$SCR"
-rm -f "$SCR"/file
-umount "$SCR"
+mount -t scoutfs -o metadev_path=$T_EX_META_DEV,quorum_slot_nr=0 "$T_EX_DATA_DEV" "$T_MSCR"
+rm -f "$T_MSCR"/file
+umount "$T_MSCR"
 t_rc scoutfs prepare-empty-data-device "$T_EX_META_DEV" "$T_TMP.equal"

 echo "== checks pass"
@@ -55,22 +54,22 @@ t_rc scoutfs prepare-empty-data-device --check "$T_EX_META_DEV" "$T_TMP.equal"

 echo "== using prepared"
 scr_loop=$(losetup --find --show "$T_TMP.equal")
-mount -t scoutfs -o metadev_path=$T_EX_META_DEV,quorum_slot_nr=0 "$scr_loop" "$SCR"
-touch "$SCR"/equal_prepared
-equal_tot=$(scoutfs statfs -s total_data_blocks -p "$SCR")
-umount "$SCR"
+mount -t scoutfs -o metadev_path=$T_EX_META_DEV,quorum_slot_nr=0 "$scr_loop" "$T_MSCR"
+touch "$T_MSCR"/equal_prepared
+equal_tot=$(scoutfs statfs -s total_data_blocks -p "$T_MSCR")
+umount "$T_MSCR"
 losetup -d "$scr_loop"

 echo "== preparing larger and resizing"
 t_rc scoutfs prepare-empty-data-device "$T_EX_META_DEV" "$T_TMP.large"
 scr_loop=$(losetup --find --show "$T_TMP.large")
-mount -t scoutfs -o metadev_path=$T_EX_META_DEV,quorum_slot_nr=0 "$scr_loop" "$SCR"
-touch "$SCR"/large_prepared
-ls "$SCR"
-scoutfs resize-devices -p "$SCR" -d $large_sz
-large_tot=$(scoutfs statfs -s total_data_blocks -p "$SCR")
+mount -t scoutfs -o metadev_path=$T_EX_META_DEV,quorum_slot_nr=0 "$scr_loop" "$T_MSCR"
+touch "$T_MSCR"/large_prepared
+ls "$T_MSCR"
+scoutfs resize-devices -p "$T_MSCR" -d $large_sz
+large_tot=$(scoutfs statfs -s total_data_blocks -p "$T_MSCR")
 test "$large_tot" -gt "$equal_tot" ; echo "resized larger test rc: $?"
-umount "$SCR"
+umount "$T_MSCR"
 losetup -d "$scr_loop"

 echo "== cleanup"
--- a/tests/tests/enospc.sh
+++ b/tests/tests/enospc.sh
@@ -54,21 +54,16 @@ after=$(free_blocks Data "$T_M0")
 test "$before" == "$after" || \
 	t_fail "$after free data blocks after rm, expected $before"

-# XXX this is all pretty manual, would be nice to have helpers
 echo "== make small meta fs"
 # meta device just big enough for reserves and the metadata we'll fill
-scoutfs mkfs -A -f -Q 0,127.0.0.1,$T_SCRATCH_PORT -m 10G "$T_EX_META_DEV" "$T_EX_DATA_DEV" > $T_TMP.mkfs.out 2>&1 || \
-	t_fail "mkfs failed"
-SCR="$T_TMPDIR/mnt.scratch"
-mkdir -p "$SCR"
-mount -t scoutfs -o metadev_path=$T_EX_META_DEV,quorum_slot_nr=0 \
-	"$T_EX_DATA_DEV" "$SCR"
+t_scratch_mkfs -A -m 10G
+t_scratch_mount

 echo "== create large xattrs until we fill up metadata"
-mkdir -p "$SCR/xattrs"
+mkdir -p "$T_MSCR/xattrs"

 for f in $(seq 1 100000); do
-	file="$SCR/xattrs/file-$f"
+	file="$T_MSCR/xattrs/file-$f"
 	touch "$file"

 	LC_ALL=C create_xattr_loop -c 1000 -n user.scoutfs-enospc -p "$file" -s 65535 > $T_TMP.cxl 2>&1
@@ -84,10 +79,10 @@ for f in $(seq 1 100000); do
 done

 echo "== remove files with xattrs after enospc"
-rm -rf "$SCR/xattrs"
+rm -rf "$T_MSCR/xattrs"

 echo "== make sure we can create again"
-file="$SCR/file-after"
+file="$T_MSCR/file-after"
 C=120
 while (( C-- )); do
 	touch $file 2> /dev/null && break
@@ -99,7 +94,6 @@ sync
 rm -f "$file"

 echo "== cleanup small meta fs"
-umount "$SCR"
-rmdir "$SCR"
+t_scratch_umount

 t_pass
--- a/tests/tests/fence-and-reclaim.sh
+++ b/tests/tests/fence-and-reclaim.sh
@@ -5,6 +5,9 @@
 t_require_commands sleep touch grep sync scoutfs
 t_require_mounts 2

+# regularly see ~20/~30s
+VERIFY_TIMEOUT_SECS=90
+
 #
 # Make sure that all mounts can read the results of a write from each
 # mount.
@@ -40,8 +43,10 @@ verify_fenced_run()

 	for rid in $rids; do
 		grep -q ".* running rid '$rid'.* args 'ignored run args'" "$T_FENCED_LOG" || \
-			t_fail "fenced didn't execute RUN script for rid $rid"
+			return 1
 	done
+
+	return 0
 }

 echo "== make sure all mounts can see each other"
@@ -54,14 +59,7 @@ rid=$(t_mount_rid $cl)
 echo "cl $cl sv $sv rid $rid" >> "$T_TMP.log"
 sync
 t_force_umount $cl
-# wait for client reconnection to timeout
-while grep -q $rid $(t_debugfs_path $sv)/connections; do
-	sleep .5
-done
-while t_rid_is_fencing $rid; do
-	sleep .5
-done
-verify_fenced_run $rid
+t_wait_until_timeout $VERIFY_TIMEOUT_SECS verify_fenced_run $rid
 t_mount $cl
 check_read_write

@@ -83,15 +81,7 @@ for cl in $(t_fs_nrs); do
 	t_force_umount $cl
 done

-# wait for all client reconnections to timeout
-while egrep -q "($pattern)" $(t_debugfs_path $sv)/connections; do
-	sleep .5
-done
-# wait for all fence requests to complete
-while test -d $(echo /sys/fs/scoutfs/*/fence/* | cut -d " " -f 1); do
-	sleep .5
-done
-verify_fenced_run $rids
+t_wait_until_timeout $VERIFY_TIMEOUT_SECS verify_fenced_run $rids
 # remount all the clients
 for cl in $(t_fs_nrs); do
 	if [ $cl == $sv ]; then
@@ -107,12 +97,7 @@ rid=$(t_mount_rid $sv)
 echo "sv $sv rid $rid" >> "$T_TMP.log"
 sync
 t_force_umount $sv
-t_wait_for_leader
-# wait until new server is done fencing unmounted leader rid
-while t_rid_is_fencing $rid; do
-	sleep .5
-done
-verify_fenced_run $rid
+t_wait_until_timeout $VERIFY_TIMEOUT_SECS verify_fenced_run $rid
 t_mount $sv
 check_read_write

@@ -127,11 +112,7 @@ for nr in $(t_fs_nrs); do
 	t_force_umount $nr
 done
 t_mount_all
-# wait for all fence requests to complete
-while test -d $(echo /sys/fs/scoutfs/*/fence/* | cut -d " " -f 1); do
-	sleep .5
-done
-verify_fenced_run $rids
+t_wait_until_timeout $VERIFY_TIMEOUT_SECS verify_fenced_run $rids
 check_read_write

 t_pass
--- a/tests/tests/format-version-forward-back.sh
+++ b/tests/tests/format-version-forward-back.sh
@@ -11,8 +11,8 @@
 # format version.
 #

-# not supported on el8 or higher
-if [ $(source /etc/os-release ; echo ${VERSION_ID:0:1}) -gt 7 ]; then
+# not supported on el8, or higher versions.
+if [ $(source /etc/os-release ; echo ${VERSION_ID} | cut -d. -f1) -gt 7 ]; then
 	t_skip_permitted "Unsupported OS version"
 fi

--- a/tests/tests/get-referring-entries.sh
+++ b/tests/tests/get-referring-entries.sh
@@ -72,7 +72,7 @@ touch $T_D0/dir/file
 mkdir $T_D0/dir/dir
 ln -s $T_D0/dir/file $T_D0/dir/symlink
 mknod $T_D0/dir/char c 1 3 # null
-mknod $T_D0/dir/block b 7 0 # loop0
+mknod $T_D0/dir/block b 42 0 # SAMPLE block dev - nonexistant/demo use only number
 for name in $(ls -UA $T_D0/dir | sort); do
 	ino=$(stat -c '%i' $T_D0/dir/$name)
 	$GRE $ino | filter_types
--- a/tests/tests/inode-deletion.sh
+++ b/tests/tests/inode-deletion.sh
@@ -53,26 +53,40 @@ exec {FD1}>&-  # close
 exec {FD2}>&-  # close
 check_ino_index "$ino" "$dseq" "$T_M0"

+# Hurry along the orphan scanners. If any are currently asleep, we will
+# have to wait at least their current scan interval before they wake up,
+# run, and notice their new interval.
+t_save_all_sysfs_mount_options orphan_scan_delay_ms
+t_set_all_sysfs_mount_options orphan_scan_delay_ms 500
+t_wait_for_orphan_scan_runs
+
 echo "== remote unopened unlink deletes"
 echo "contents" > "$T_D0/file"
 ino=$(stat -c "%i" "$T_D0/file")
 dseq=$(scoutfs stat -s data_seq "$T_D0/file")
 rm -f "$T_D1/file"
+# cross-mount deletion falls back to the orphan scanner when the
+# creating mount still has the inode cached, wait for it to complete
+t_force_log_merge
+# wait for orphan scanners to pick up the unlinked inode and become idle
+t_wait_for_no_orphans
 check_ino_index "$ino" "$dseq" "$T_M0"
 check_ino_index "$ino" "$dseq" "$T_M1"

 echo "== unlink wait for open on other mount"
-echo "contents" > "$T_D0/file"
-ino=$(stat -c "%i" "$T_D0/file")
-dseq=$(scoutfs stat -s data_seq "$T_D0/file")
-exec {FD}<"$T_D0/file"
-rm -f "$T_D1/file"
+echo "contents" > "$T_D0/badfile"
+ino=$(stat -c "%i" "$T_D0/badfile")
+dseq=$(scoutfs stat -s data_seq "$T_D0/badfile")
+exec {FD}<"$T_D0/badfile"
+rm -f "$T_D1/badfile"
 echo "mount 0 contents after mount 1 rm: $(cat <&$FD)"
 check_ino_index "$ino" "$dseq" "$T_M0"
 check_ino_index "$ino" "$dseq" "$T_M1"
 exec {FD}>&-  # close
 # we know that revalidating will unhash the remote dentry
-stat "$T_D0/file" 2>&1 | sed 's/cannot statx/cannot stat/' | t_filter_fs
+stat "$T_D0/badfile" 2>&1 | sed 's/cannot statx/cannot stat/' | t_filter_fs
+t_force_log_merge
+t_wait_for_no_orphans
 check_ino_index "$ino" "$dseq" "$T_M0"
 check_ino_index "$ino" "$dseq" "$T_M1"

@@ -83,16 +97,20 @@ rm -f "$T_D0/dir"/files-*
 rmdir "$T_D0/dir"

 echo "== open files survive remote scanning orphans"
-echo "contents" > "$T_D0/file"
-ino=$(stat -c "%i" "$T_D0/file")
-dseq=$(scoutfs stat -s data_seq "$T_D0/file")
-exec {FD}<"$T_D0/file"
-rm -f "$T_D0/file"
+echo "contents" > "$T_D0/lastfile"
+ino=$(stat -c "%i" "$T_D0/lastfile")
+dseq=$(scoutfs stat -s data_seq "$T_D0/lastfile")
+exec {FD}<"$T_D0/lastfile"
+rm -f "$T_D0/lastfile"
 t_umount 1
 t_mount 1
 echo "mount 0 contents after mount 1 remounted: $(cat <&$FD)"
 exec {FD}>&-  # close
+t_force_log_merge
+t_wait_for_no_orphans
 check_ino_index "$ino" "$dseq" "$T_M0"
 check_ino_index "$ino" "$dseq" "$T_M1"

+t_restore_all_sysfs_mount_options orphan_scan_delay_ms
+
 t_pass
--- a/tests/tests/orphan-log-trees.sh
+++ b/tests/tests/orphan-log-trees.sh
@@ -0,0 +1,52 @@
+#
+# Test that orphaned log_trees entries from unmounted rids are
+# finalized and merged.
+#
+# An orphan log_trees entry is one whose rid has no mounted_clients
+# entry.  This can happen from incomplete reclaim across server
+# failovers.  We simulate it with the reclaim_skip_finalize trigger
+# which makes reclaim_open_log_tree skip the finalization step.
+#
+
+t_require_commands touch scoutfs
+t_require_mounts 2
+
+TIMEOUT=90
+
+echo "== create orphan log_trees entry via trigger"
+sv=$(t_server_nr)
+cl=$(t_first_client_nr)
+rid=$(t_mount_rid $cl)
+
+touch "$T_D0/file" "$T_D1/file"
+sync
+
+# arm the trigger so reclaim skips finalization
+t_trigger_arm_silent reclaim_skip_finalize $sv
+
+# force unmount the client, server will fence and reclaim it
+# but the trigger makes reclaim leave log_trees unfinalized
+t_force_umount $cl
+
+# wait for fencing to run
+verify_fenced() {
+	grep -q "running rid '$rid'" "$T_FENCED_LOG" 2>/dev/null
+}
+t_wait_until_timeout $TIMEOUT verify_fenced
+
+# give the server time to complete reclaim after fence
+sleep 5
+
+# remount the client so t_force_log_merge can sync all mounts.
+# the client gets a new rid; the old rid's log_trees is the orphan.
+t_mount $cl
+
+echo "== verify orphan is reclaimed and merge completes"
+t_force_log_merge
+
+echo "== verify orphan reclaim was logged"
+if ! dmesg | grep -q "reclaiming orphan log trees for rid $rid"; then
+	t_fail "expected orphan reclaim message for rid $rid in dmesg"
+fi
+
+t_pass
--- a/tests/tests/punch-offline.sh
+++ b/tests/tests/punch-offline.sh
@@ -0,0 +1,152 @@
+
+t_require_commands scoutfs dd fallocate
+
+FILE="$T_D0/file"
+DIR="$T_D0/dir"
+
+echo "== missing options should fail =="
+rm -rf $DIR && mkdir -p $DIR
+scoutfs punch-offline $DIR -l 4096 -V 0
+scoutfs punch-offline $DIR -o 0 -V 0
+scoutfs punch-offline $DIR -o 0 -l 4096
+
+echo "== can't hole punch dir or special =="
+rm -rf $DIR && mkdir -p $DIR
+scoutfs punch-offline $DIR -o 0 -l 4096 -V 0
+
+echo "== punching an empty file does nothing =="
+rm -f $FILE && touch $FILE
+scoutfs punch-offline $FILE -o 0 -l 4096 -V 0
+
+echo "== punch outside of i_size does nothing =="
+dd if=/dev/zero of=$FILE bs=4096 count=1 status=none
+scoutfs punch-offline $FILE -o 4096 -l 4096 -V 1
+
+echo "== can't hole punch online extent =="
+scoutfs get-fiemap -Lb $FILE
+scoutfs punch-offline $FILE -o 0 -l 4096 -V 1
+scoutfs get-fiemap -Lb $FILE
+
+echo "== can't hole punch unwritten extent =="
+rm -rf $FILE && touch $FILE
+fallocate -l $((4096 * 3)) $FILE
+vers=$(scoutfs stat -s data_version "$FILE")
+scoutfs get-fiemap -Lb $FILE
+scoutfs punch-offline $FILE -o 4096 -l 4096 -V $vers
+scoutfs get-fiemap -Lb $FILE
+
+echo "== hole punch offline extent =="
+rm -rf $FILE && touch $FILE
+fallocate -l $((4096 * 3)) $FILE
+vers=$(scoutfs stat -s data_version "$FILE")
+scoutfs release $FILE --data-version $vers
+scoutfs get-fiemap -Lb $FILE
+scoutfs punch-offline $FILE -o 4096 -l 4096 -V $vers
+scoutfs get-fiemap -Lb $FILE
+
+echo "== can't hole punch non-aligned bsz offset or len =="
+rm -rf $FILE && touch $FILE
+fallocate -l $((4096 * 3)) $FILE
+vers=$(scoutfs stat -s data_version "$FILE")
+scoutfs release $FILE --data-version $vers
+scoutfs get-fiemap -Lb $FILE
+scoutfs punch-offline $FILE -o 4095 -l 4096 -V $vers
+scoutfs punch-offline $FILE -o 1 -l 4096 -V $vers
+scoutfs punch-offline $FILE -o 4096 -l 409700 -V $vers
+scoutfs punch-offline $FILE -o 4096 -l 4097 -V $vers
+scoutfs punch-offline $FILE -o 4096 -l 4095 -V $vers
+scoutfs punch-offline $FILE -o 4096 -l 1 -V $vers
+scoutfs punch-offline $FILE -o 4096 -l 0 -V $vers
+scoutfs get-fiemap -Lb $FILE
+
+echo "== can't hole punch mismatched data_version =="
+rm -rf $FILE && touch $FILE
+fallocate -l $((4096 * 3)) $FILE
+vers=$(scoutfs stat -s data_version "$FILE")
+scoutfs release $FILE --data-version $vers
+scoutfs get-fiemap -Lb $FILE
+scoutfs punch-offline $FILE -o 4096 -l 4096 -V 0
+scoutfs punch-offline $FILE -o 4096 -l 4096 -V 2
+scoutfs punch-offline $FILE -o 4096 -l 4096 -V 9999
+scoutfs get-fiemap -Lb $FILE
+
+echo "== Punch hole crossing multiple extents =="
+rm -rf $FILE && touch $FILE
+fallocate -l $((7 * 4096)) $FILE
+vers=$(scoutfs stat -s data_version "$FILE")
+scoutfs release $FILE --data-version $vers
+scoutfs get-fiemap -L $FILE
+scoutfs punch-offline $FILE -o $((1 * 4096)) -l 4096 -V $vers
+scoutfs punch-offline $FILE -o $((3 * 4096)) -l 4096 -V $vers
+scoutfs punch-offline $FILE -o $((5 * 4096)) -l 4096 -V $vers
+# 0.1.2.3
+scoutfs get-fiemap -L $FILE
+scoutfs punch-offline $FILE -o $((2 * 4096)) -l $((3 * 4096)) -V $vers
+# 0.....1
+scoutfs get-fiemap -L $FILE
+
+echo "== punch hole starting at a hole =="
+rm -rf $FILE && touch $FILE
+fallocate -l $((7 * 4096)) $FILE
+vers=$(scoutfs stat -s data_version "$FILE")
+scoutfs release $FILE --data-version $vers
+scoutfs get-fiemap -L $FILE
+scoutfs punch-offline $FILE -o $((1 * 4096)) -l 4096 -V $vers
+scoutfs punch-offline $FILE -o $((3 * 4096)) -l 4096 -V $vers
+scoutfs punch-offline $FILE -o $((5 * 4096)) -l 4096 -V $vers
+# 0.1.2.3
+scoutfs get-fiemap -L $FILE
+scoutfs punch-offline $FILE -o $((1 * 4096)) -l $((5 * 4096)) -V $vers
+# 0.....1
+scoutfs get-fiemap -L $FILE
+
+echo "== large punch =="
+rm -rf $FILE && touch $FILE
+fallocate -l $((6 * 1024 * 1024 * 1024)) $FILE
+vers=$(scoutfs stat -s data_version "$FILE")
+scoutfs release $FILE --data-version $vers
+scoutfs get-fiemap -L $FILE
+scoutfs punch-offline $FILE -o $((134123 * 4096)) -l $((68343 * 4096)) -V $vers
+scoutfs punch-offline $FILE -o $((467273 * 4096)) -l $((68343 * 4096)) -V $vers
+scoutfs punch-offline $FILE -o $((734623 * 4096)) -l $((68343 * 4096)) -V $vers
+scoutfs get-fiemap -L $FILE
+
+echo "== overlapping punches with lots of extents =="
+rm -rf $FILE && touch $FILE
+fallocate -l $((4096 * 1024)) $FILE
+vers=$(scoutfs stat -s data_version "$FILE")
+scoutfs release $FILE --data-version 1
+scoutfs get-fiemap -Lb $FILE
+# punch odd ones away
+for h in $(seq 1 2 1023); do
+	scoutfs punch-offline $FILE -o $((h * 4096)) -l 4096 -V $vers
+done
+scoutfs get-fiemap -Lb $FILE | tail -n 1
+# punch a large hole from 32 to 55, removing 7 extents
+scoutfs punch-offline $FILE -o $((32 * 4096)) -l $((13 * 4096)) -V $vers
+scoutfs get-fiemap -Lb $FILE | tail -n 1
+# punch every 8th @6
+for h in $(seq 6 8 1024); do
+	scoutfs punch-offline $FILE -o $((h * 4096)) -l 4096 -V $vers
+done
+# again @4
+scoutfs get-fiemap -Lb $FILE | tail -n 1
+for h in $(seq 4 8 1024); do
+	scoutfs punch-offline $FILE -o $((h * 4096)) -l 4096 -V $vers
+done
+scoutfs get-fiemap -Lb $FILE | tail -n 1
+# punching a large hole from 127 to 175, removing 12 extents
+scoutfs punch-offline $FILE -o $((127 * 4096)) -l $((48 * 4096)) -V $vers
+scoutfs get-fiemap -Lb $FILE
+# again @2
+for h in $(seq 2 8 1024); do
+	scoutfs punch-offline $FILE -o $((h * 4096)) -l 4096 -V $vers
+done
+scoutfs get-fiemap -L $FILE
+# and again @0, punching away everything remaining extent
+for h in $(seq 0 8 1024); do
+	scoutfs punch-offline $FILE -o $((h * 4096)) -l 4096 -V $vers
+done
+scoutfs get-fiemap -Lb $FILE
+
+t_pass
--- a/tests/tests/quorum-heartbeat-timeout.sh
+++ b/tests/tests/quorum-heartbeat-timeout.sh
@@ -62,7 +62,7 @@ test_timeout()
 	sleep 1

 	# tear down the current server/leader
-	t_force_umount $sv
+	t_force_umount $sv &

 	# see how long it takes for the next leader to start
 	start=$(time_ms)
@@ -73,6 +73,7 @@ test_timeout()
 	echo "to $to delay $delay" >> $T_TMP.delay

 	# restore the mount that we tore down
+	wait
 	t_mount $sv

 	# make sure the new leader delay was reasonable, allowing for some slack
--- a/tests/tests/resize-devices.sh
+++ b/tests/tests/resize-devices.sh
@@ -19,8 +19,8 @@ df_free() {
 }

 same_totals() {
-	cur_meta_tot=$(statfs_total meta "$SCR")
-	cur_data_tot=$(statfs_total data "$SCR")
+	cur_meta_tot=$(statfs_total meta "$T_MSCR")
+	cur_data_tot=$(statfs_total data "$T_MSCR")

 	test "$cur_meta_tot" == "$exp_meta_tot" || \
 		t_fail "cur total_meta_blocks $cur_meta_tot != expected $exp_meta_tot"
@@ -34,10 +34,10 @@ same_totals() {
 # some slop to account for reserved blocks and concurrent allocation.
 #
 devices_grew() {
-	cur_meta_tot=$(statfs_total meta "$SCR")
-	cur_data_tot=$(statfs_total data "$SCR")
-	cur_meta_df=$(df_free MetaData "$SCR")
-	cur_data_df=$(df_free Data "$SCR")
+	cur_meta_tot=$(statfs_total meta "$T_MSCR")
+	cur_data_tot=$(statfs_total data "$T_MSCR")
+	cur_meta_df=$(df_free MetaData "$T_MSCR")
+	cur_data_df=$(df_free Data "$T_MSCR")

 	local grow_meta_tot=$(echo "$exp_meta_tot * 2" | bc)
 	local grow_data_tot=$(echo "$exp_data_tot * 2" | bc)
@@ -70,19 +70,13 @@ size_data=$(blockdev --getsize64 "$T_EX_DATA_DEV")
 quarter_meta=$(echo "$size_meta / 4" | bc)
 quarter_data=$(echo "$size_data / 4" | bc)

-# XXX this is all pretty manual, would be nice to have helpers
 echo "== make initial small fs"
-scoutfs mkfs -A -f -Q 0,127.0.0.1,$T_SCRATCH_PORT -m $quarter_meta -d $quarter_data \
-	"$T_EX_META_DEV" "$T_EX_DATA_DEV" > $T_TMP.mkfs.out 2>&1 || \
-		t_fail "mkfs failed"
-SCR="$T_TMPDIR/mnt.scratch"
-mkdir -p "$SCR"
-mount -t scoutfs -o metadev_path=$T_EX_META_DEV,quorum_slot_nr=0 \
-	"$T_EX_DATA_DEV" "$SCR"
+t_scratch_mkfs -A -m $quarter_meta -d $quarter_data
+t_scratch_mount

 # then calculate sizes based on blocks that mkfs used
-quarter_meta=$(echo "$(statfs_total meta "$SCR") * 64 * 1024" | bc)
-quarter_data=$(echo "$(statfs_total data "$SCR") * 4 * 1024" | bc)
+quarter_meta=$(echo "$(statfs_total meta "$T_MSCR") * 64 * 1024" | bc)
+quarter_data=$(echo "$(statfs_total data "$T_MSCR") * 4 * 1024" | bc)
 whole_meta=$(echo "$quarter_meta * 4" | bc)
 whole_data=$(echo "$quarter_data * 4" | bc)
 outsize_meta=$(echo "$whole_meta * 2" | bc)
@@ -93,59 +87,58 @@ shrink_meta=$(echo "$quarter_meta / 2" | bc)
 shrink_data=$(echo "$quarter_data / 2" | bc)

 # and save expected values for checks
-exp_meta_tot=$(statfs_total meta "$SCR")
-exp_meta_df=$(df_free MetaData "$SCR")
-exp_data_tot=$(statfs_total data "$SCR")
-exp_data_df=$(df_free Data "$SCR")
+exp_meta_tot=$(statfs_total meta "$T_MSCR")
+exp_meta_df=$(df_free MetaData "$T_MSCR")
+exp_data_tot=$(statfs_total data "$T_MSCR")
+exp_data_df=$(df_free Data "$T_MSCR")

 echo "== 0s do nothing"
-scoutfs resize-devices -p "$SCR" 
-scoutfs resize-devices -p "$SCR" -m 0
-scoutfs resize-devices -p "$SCR" -d 0
-scoutfs resize-devices -p "$SCR" -m 0 -d 0
+scoutfs resize-devices -p "$T_MSCR"
+scoutfs resize-devices -p "$T_MSCR" -m 0
+scoutfs resize-devices -p "$T_MSCR" -d 0
+scoutfs resize-devices -p "$T_MSCR" -m 0 -d 0

 echo "== shrinking fails"
-scoutfs resize-devices -p "$SCR" -m $shrink_meta
-scoutfs resize-devices -p "$SCR" -d $shrink_data
-scoutfs resize-devices -p "$SCR" -m $shrink_meta -d $shrink_data
+scoutfs resize-devices -p "$T_MSCR" -m $shrink_meta
+scoutfs resize-devices -p "$T_MSCR" -d $shrink_data
+scoutfs resize-devices -p "$T_MSCR" -m $shrink_meta -d $shrink_data
 same_totals

 echo "== existing sizes do nothing"
-scoutfs resize-devices -p "$SCR" -m $quarter_meta
-scoutfs resize-devices -p "$SCR" -d $quarter_data
-scoutfs resize-devices -p "$SCR" -m $quarter_meta -d $quarter_data
+scoutfs resize-devices -p "$T_MSCR" -m $quarter_meta
+scoutfs resize-devices -p "$T_MSCR" -d $quarter_data
+scoutfs resize-devices -p "$T_MSCR" -m $quarter_meta -d $quarter_data
 same_totals

 echo "== growing outside device fails"
-scoutfs resize-devices -p "$SCR" -m $outsize_meta
-scoutfs resize-devices -p "$SCR" -d $outsize_data
-scoutfs resize-devices -p "$SCR" -m $outsize_meta -d $outsize_data
+scoutfs resize-devices -p "$T_MSCR" -m $outsize_meta
+scoutfs resize-devices -p "$T_MSCR" -d $outsize_data
+scoutfs resize-devices -p "$T_MSCR" -m $outsize_meta -d $outsize_data
 same_totals

 echo "== resizing meta works"
-scoutfs resize-devices -p "$SCR" -m $half_meta
+scoutfs resize-devices -p "$T_MSCR" -m $half_meta
 devices_grew meta

 echo "== resizing data works"
-scoutfs resize-devices -p "$SCR" -d $half_data
+scoutfs resize-devices -p "$T_MSCR" -d $half_data
 devices_grew data

 echo "== shrinking back fails"
-scoutfs resize-devices -p "$SCR" -m $quarter_meta
-scoutfs resize-devices -p "$SCR" -m $quarter_data
+scoutfs resize-devices -p "$T_MSCR" -m $quarter_meta
+scoutfs resize-devices -p "$T_MSCR" -m $quarter_data
 same_totals

 echo "== resizing again does nothing"
-scoutfs resize-devices -p "$SCR" -m $half_meta
-scoutfs resize-devices -p "$SCR" -m $half_data
+scoutfs resize-devices -p "$T_MSCR" -m $half_meta
+scoutfs resize-devices -p "$T_MSCR" -m $half_data
 same_totals

 echo "== resizing to full works"
-scoutfs resize-devices -p "$SCR" -m $whole_meta -d $whole_data
+scoutfs resize-devices -p "$T_MSCR" -m $whole_meta -d $whole_data
 devices_grew meta data

 echo "== cleanup extra fs"
-umount "$SCR"
-rmdir "$SCR"
+t_scratch_umount

 t_pass
--- a/tests/tests/simple-inode-index.sh
+++ b/tests/tests/simple-inode-index.sh
@@ -32,7 +32,7 @@ echo "== dirs shouldn't appear in data_seq queries"
 mkdir "$DIR"
 ino=$(stat -c "%i" "$DIR")
 t_sync_seq_index
-query_index data_seq | grep "$ino\>"
+query_index data_seq | awk '($4 == "'$ino'")'

 echo "== two created files are present and come after each other"
 touch "$DIR/first"
@@ -92,13 +92,13 @@ test "$before" -lt "$after" || \
 # didn't skip past deleted dirty items
 #
 echo "== make sure dirtying doesn't livelock walk"
-dd if=/dev/urandom of="$DIR/dirtying" bs=4K count=1 >> $seqres.full 2>&1
+dd if=/dev/urandom of="$DIR/dirtying" bs=4K count=1 >> "$T_TMPDIR/seqres.full" 2>&1
 nr=1
 while [ "$nr" -lt 100 ]; do
-	echo "dirty/walk attempt $nr" >> $seqres.full
+	echo "dirty/walk attempt $nr" >> "$T_TMPDIR/seqres.full"
 	sync
 	dd if=/dev/urandom of="$DIR/dirtying" bs=4K count=1 conv=notrunc \
-		>> $seqres.full 2>&1
+		>> "$T_TMPDIR/seqres.full" 2>&1
 	scoutfs walk-inodes data_seq 0 -1 $DIR/dirtying >& /dev/null 
 	((nr++))
 done
--- a/tests/tests/simple-staging.sh
+++ b/tests/tests/simple-staging.sh
@@ -12,12 +12,12 @@ create_file() {

 	if [ "$blocks" != 0 ]; then
 		dd if=/dev/urandom bs=4096 count=$blocks of="$file" \
-			>> $seqres.full 2>&1
+			>> "$T_TMPDIR/seqres.full" 2>&1
 	fi

 	if [ "$remainder" != 0 ]; then
 		dd if=/dev/urandom bs="$remainder" count=1 of="$file" \
-			conv=notrunc oflag=append >> $seqres.full 2>&1
+			conv=notrunc oflag=append >> "$T_TMPDIR/seqres.full" 2>&1
 	fi
 }

@@ -78,7 +78,7 @@ create_file "$FILE" $((4096 * 1024))
 cp "$FILE"  "$T_TMP"
 nr=1
 while [ "$nr" -lt 10 ]; do
-	echo "attempt $nr" >> $seqres.full 2>&1
+	echo "attempt $nr" >> "$T_TMPDIR/$seqres.full" 2>&1
 	release_vers "$FILE" stat 0 4096K
 	sync
 	echo 3 > /proc/sys/vm/drop_caches
--- a/tests/tests/totl-merge-read.sh
+++ b/tests/tests/totl-merge-read.sh
@@ -0,0 +1,50 @@
+#
+# Test that merge_read_item() correctly updates the sequence number when
+# combining delta items from multiple finalized log trees.  Each mount
+# sets a totl value in its own 3-bit lane (powers of 8) so that any
+# double-counting overflows the lane and is caught by: or(v, exp) != exp.
+#
+
+t_require_commands setfattr scoutfs
+t_require_mounts 5
+
+echo "== setup"
+for nr in $(t_fs_nrs); do
+	d=$(eval echo \$T_D$nr)
+	for i in $(seq 1 2500); do : > "$d/f$nr$i"; done
+done
+sync
+t_force_log_merge
+
+vals=(1 8 64 512 4096)
+expected=4681
+n=0
+for nr in $(t_fs_nrs); do
+	d=$(eval echo \$T_D$nr)
+	v=${vals[$((n++))]}
+	for i in $(seq 1 2500); do
+		setfattr -n "scoutfs.totl.t.$i.0.0" -v $v "$d/f$nr$i"
+	done
+done
+
+t_trigger_arm_silent log_merge_force_partial $(t_server_nr)
+
+bad="$T_TMPDIR/bad"
+for nr in $(t_fs_nrs); do
+	( while true; do
+		echo 1 > "$(t_debugfs_path $nr)/drop_weak_item_cache"
+		scoutfs read-xattr-totals -p "$(eval echo \$T_M$nr)" | \
+			awk -F'[ =,]+' -v e=$expected 'or($2+0,e) != e'
+	done ) >> "$bad" &
+done
+
+echo "expected $expected"
+t_force_log_merge
+t_silent_kill $(jobs -p)
+test -s "$bad" && echo "double-counted:" && cat "$bad"
+
+echo "== cleanup"
+for nr in $(t_fs_nrs); do
+	find "$(eval echo \$T_D$nr)" -name "f$nr*" -delete
+done
+t_pass
--- a/utils/man/scoutfs.5
+++ b/utils/man/scoutfs.5
@@ -63,6 +63,22 @@ mounts because there are more locks that cover the same number of
 created files.  This can be helpful when working with smaller numbers of
 large files.
 .TP
+.B lock_idle_count=<number>
+This option sets the number of locks that the client will allow to
+remain idle after being granted.  If the number of locks exceeds this
+count then the client will try to free the oldest locks.  This setting
+is per-mount and only changes the behavior of that mount.
+.sp
+Idle locks are not reclaimed by memory pressure so this option
+determines the limit of how much memory is likely to be pinned by
+allocated idle locks.  Setting this too low can increase latency of
+operations as repeated use of a working set of locks has to request the
+locks from the network rather than using granted idle locks.
+.sp
+The count is not strictly enforced.  Operations are allowed to use locks
+while over the limit to avoid deadlocks under heavy concurrent load.
+Exceeding the count only attempts freeing of idle locks.
+.TP
 .B log_merge_wait_timeout_ms=<number>
 This option sets the amount of time, in milliseconds, that log merge
 creation can wait before timing out.  This setting is per-mount, only
--- a/utils/sparse.sh
+++ b/utils/sparse.sh
@@ -71,7 +71,7 @@ else
 	m64=""
 fi

-sparse $m64 $include $search/include "$@" 2>&1 | egrep -v "($RE)" | tee .sparse.output
+sparse $m64 $include $search/include "$@" 2>&1 | grep -v -E "($RE)" | tee .sparse.output

 rm -f $defines

--- a/utils/src/punch_offline.c
+++ b/utils/src/punch_offline.c
@@ -0,0 +1,127 @@
+#include <sys/ioctl.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <string.h>
+#include <argp.h>
+
+#include "sparse.h"
+#include "parse.h"
+#include "util.h"
+#include "ioctl.h"
+#include "cmd.h"
+
+struct po_args {
+	char *path;
+	u64 offset;
+	u64 length;
+	u64 data_version;
+
+	unsigned offset_set:1,
+	         length_set:1,
+	         data_version_set:1;
+};
+
+static int do_punch_offline(struct po_args *args)
+{
+	struct scoutfs_ioctl_punch_offline ioctl_args;
+	int ret;
+	int fd;
+
+	fd = get_path(args->path, O_RDWR);
+	if (fd < 0)
+		return fd;
+
+	ioctl_args.offset = args->offset;
+	ioctl_args.len = args->length;
+	ioctl_args.data_version = args->data_version;
+	ioctl_args.flags = 0;
+
+	ret = ioctl(fd, SCOUTFS_IOC_PUNCH_OFFLINE, &ioctl_args);
+
+	if (ret < 0) {
+		ret = -errno;
+		fprintf(stderr, "punch_offline ioctl failed: %s (%d)\n",
+			strerror(errno), errno);
+	}
+
+	close(fd);
+	return ret;
+}
+
+static int parse_opt(int key, char *arg, struct argp_state *state)
+{
+	struct po_args *args = state->input;
+	int ret = 0;
+
+	switch (key) {
+	case 'V':
+		ret = parse_u64(arg, &args->data_version);
+		if (ret)
+			return ret;
+		args->data_version_set = 1;
+		break;
+	case 'o': /* offset */
+		ret = parse_human(arg, &args->offset);
+		if (ret)
+			return ret;
+		args->offset_set = 1;
+		break;
+	case 'l': /* length */
+		ret = parse_human(arg, &args->length);
+		if (ret)
+			return ret;
+		args->length_set = 1;
+		break;
+	case ARGP_KEY_ARG:
+		if (!args->path)
+			args->path = strdup_or_error(state, arg);
+		else
+			argp_error(state, "unknown extra argument given");
+		break;
+	case ARGP_KEY_FINI:
+		if (!args->path)
+			argp_error(state, "must provide path to file");
+		if (!args->offset_set)
+			argp_error(state, "must provide offset");
+		if (!args->length_set)
+			argp_error(state, "must provide length");
+		if (!args->data_version_set)
+			argp_error(state, "must provide data_version");
+		break;
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+static struct argp_option options[] = {
+	{ "data-version", 'V', "VERSION", 0, "Data version of the file [Required]"},
+	{ "offset", 'o', "OFFSET", 0, "Offset (bytes or KMGTP units) in file to stage [Required]"},
+	{ "length", 'l', "LENGTH", 0, "Length of range (bytes or KMGTP units) of file to stage. [Required]"},
+	{ NULL }
+};
+
+static struct argp argp = {
+	options,
+	parse_opt,
+	"PATH",
+	"Make a (sparse) hole in the file at offset and with length"
+};
+
+static int punch_offline_cmd(int argc, char **argv)
+{
+	struct po_args po_args = {NULL};
+	int ret;
+
+	ret = argp_parse(&argp, argc, argv, 0, NULL, &po_args);
+	if (ret)
+		return ret;
+
+	return do_punch_offline(&po_args);
+}
+
+static void __attribute__((constructor)) punch_offline_ctor(void)
+{
+	cmd_register_argp("punch-offline", &argp, GROUP_AGENT, punch_offline_cmd);
+}