block_write_{begin,end} take a folio as well as page_mkwrite.

Adds compat handlers for block_write_begin and block_write_end to take a folio argument instead of page, as this is needed since v6.11-rc1-54-g9f04609f74ec. To avoid having two duplicate page_mkwrite functions there's now a complete page/folio ifdef split here to make it handle either. This is ugly but it's the most straightforward solution here and avoids more obscure macros. Signed-off-by: Auke Kok <auke.kok@versity.com>
Fix compat for list_lru_walk in el10
2026-04-30 09:56:55 +00:00 · 2026-02-13 14:28:40 -08:00 · 2026-02-13 14:28:40 -08:00 · 2026-02-13 14:28:40 -08:00 · 2026-02-13 14:28:40 -08:00 · 2026-02-13 14:28:40 -08:00
62 changed files with 1731 additions and 583 deletions
--- a/ReleaseNotes.md
+++ b/ReleaseNotes.md
@@ -1,6 +1,51 @@
 Versity ScoutFS Release Notes
 =============================

+---
+v1.28
+\
+*Feb 5, 2026*
+
+Fix a bug that lead to incorrect negative caching of ACL entries
+starting in version 9.6 of distribution kernels in the enterprise linux
+family.  This would manifest as ACLs seemingly disappearing,
+particularly default ACLs on directories.  The persistent ACLs always
+existed but because of internal API incompatibility some readers
+couldn't see them and would cache that they didn't exist.
+
+---
+v1.27
+\
+*Jan 15, 2026*
+
+Switch away from using the general VM cache reclaim machinery to reduce
+idle cluster locks in the client.  The VM treated locks like a cache and
+let many accumulate, presuming that it would be efficient to free them
+in batches.  Lock freeing requires network communication so this could
+result in enormous backlogs in network messages (on the order of
+hundreds of thousands) and could result in signifcant delays of other
+network messaging.
+
+Fix inefficient network receive processing while many messages are in
+the send queue.  This consumed sufficient CPU to cause significant
+stalls, perhaps resulting in hung task warning messages due to delayed
+lock message delivery.
+
+Fix a server livelock case that could happen while committing client
+transactions that contain a large amount of freed file data extents.
+This would present as client tasks hanging and a server task spinning
+consuming cpu.
+
+Fix a rare server request processing failure that doesn't deal with
+retransmission of a request that a previous server partially processed.
+This would present as hung client tasks and repeated "error -2
+committing log merge: getting merge status item" kernel messages.
+
+Fix an unneccessary server shutdown during specific circumstances in
+client lock recovery.  The shutdown was due to server state and was
+ultimately harmless.  The next server that started up would proceed
+accordingly.
+
 ---
 v1.26
 \
--- a/kmod/src/Makefile
+++ b/kmod/src/Makefile
@@ -60,11 +60,11 @@ scoutfs-y +=			\
 #
 .PHONY: $(src)/check_exported_types
 $(src)/check_exported_types:
-	@if egrep '\<[us](8|16|32|64\>)' $(src)/format.h $(src)/ioctl.h; then \
+	@if grep -E '\<[us](8|16|32|64\>)' $(src)/format.h $(src)/ioctl.h; then \
 		echo "no raw types in exported headers, preface with __";     \
 		exit 1;							      \
 	fi
-	@if egrep '\<__packed\>' $(src)/format.h $(src)/ioctl.h; then \
+	@if grep -E '\<__packed\>' $(src)/format.h $(src)/ioctl.h; then \
 		echo "no __packed allowed in exported headers";     \
 		exit 1;							      \
 	fi
--- a/kmod/src/Makefile.kernelcompat
+++ b/kmod/src/Makefile.kernelcompat
@@ -479,10 +479,148 @@ ifneq (,$(shell grep '^unsigned int stack_trace_save' include/linux/stacktrace.h
 ccflags-y += -DKC_STACK_TRACE_SAVE
 endif

-# v6.1-rc1-4-g7420332a6ff4
 #
-# .get_acl() method now has dentry arg (and mnt_idmap). The old get_acl has been renamed
-# to get_inode_acl() and is still available as well, but has an extra rcu param.
-ifneq (,$(shell grep 'struct posix_acl ...get_acl..struct mnt_idmap ., struct dentry' include/linux/fs.h))
-ccflags-y += -DKC_GET_ACL_DENTRY
+# v6.1-rc1-2-g138060ba92b3
+#
+# set_acl now passed a struct dentry instead of inode.
+#
+ifneq (,$(shell grep 'int ..set_acl.*struct dentry' include/linux/fs.h))
+ccflags-y += -DKC_SET_ACL_DENTRY
+endif
+
+#
+# v6.1-rc1-3-gcac2f8b8d8b5
+#
+# get_acl renamed to get_inode_acl.
+#
+ifneq (,$(shell grep 'struct posix_acl.*get_inode_acl' include/linux/fs.h))
+ccflags-y += -DKC_GET_INODE_ACL
+endif
+
+#
+# v6.1-rc5-2-ge9a688bcb193
+#
+# get_random_u32_below() implementation
+ifneq (,$(shell grep 'u32 get_random_u32_below' include/linux/random.h))
+ccflags-y += -DKC_HAVE_GET_RANDOM_U32_BELOW
+endif
+
+# v6.5-rc1-7-g9b6304c1d537
+#
+# ctime accessor methods
+ifneq (,$(shell grep 'timespec64 inode_set_ctime_current' include/linux/fs.h))
+ccflags-y += -DKC_FS_INODE_C_TIME_ACCESSOR
+endif
+
+#
+# v6.6-rc5-1-g077c212f0344
+#
+# Must use access methods from fs.h to get to inode ctime/mtime/atime
+ifneq (,$(shell grep 'inline time64_t inode_get_atime_sec' include/linux/fs.h))
+ccflags-y += -DKC_FS_INODE_AM_TIME_ACCESSOR
+endif
+
+#
+# v6.12-rc1-3-g5f60d5f6bbc1
+#
+# asm/unaligned.h replaced with linux/unaligned.h
+ifneq (,$(shell grep -s 'define __LINUX_UNALIGNED_H' include/linux/unaligned.h))
+ccflags-y += -DKC_HAVE__LINUX_UNALIGNED_H
+endif
+
+#
+# v6.9-rc4-29-g203c1ce0bb06
+#
+# RIP bd_inode. (note, struct moved between headers!)
+ifneq (,$(shell grep -s 'struct inode.*bd_inode' include/linux/blk_types.h include/linux/fs.h))
+ccflags-y += -DKC_HAVE_BD_INODE
+endif
+
+#
+# v6.8-9146-gc759e609030c
+#
+# Removes __assign_str_len() and removes the 2nd param of __assign_str().
+ifneq (,$(shell grep -s 'define __assign_str.dst, src' \
+				include/trace/trace_events.h \
+				include/trace/ftrace.h \
+				include/trace/stages/stage6_event_callback.h))
+ccflags-y += -DKC_HAVE_ASSIGN_STR_PARMS
+endif
+
+#
+# v6.5-113-g615e95831ec3
+#
+ifneq (,$(shell grep 'generic_fillattr..*,.u32,' include/linux/fs.h))
+ccflags-y += -DKC_GENERIC_FILLATTR_REQUEST_MASK
+endif
+
+#
+# v6.6-rc4-53-gc42d50aefd17
+#
+# el10 yet again modifies the shrinker API significantly, breaking our current
+# implementation.
+ifneq (,$(shell grep 'struct shrinker .shrinker_alloc' include/linux/shrinker.h))
+ccflags-y += -DKC_SHRINKER_ALLOC
+endif
+
+#
+# v6.9-rc4-8-gead083aeeed9
+#
+# set_blocksize() now has a struct file arg.
+ifneq (,$(shell grep -s 'int set_blocksize.struct file' include/linux/blkdev.h))
+ccflags-y += -DKC_BLKDEV_SET_BLOCKSIZE_FILE
+endif
+
+#
+# v5.1-rc3-29-gaa30f47cf666
+#
+# struct kobj_type now has member `default_groups`
+ifneq (,$(shell grep 'const struct attribute_group ..default_groups;' include/linux/kobject.h))
+ccflags-y += -DKC_KOBJECT_DEFAULT_GROUPS
+endif
+
+#
+# v6.7-rc4-307-g17bf23a981be
+#
+# block_write_full_page() is replaced with block_write_full_folio(),
+# but that isn't exported as it used to be (and the only users now
+# are builtin). However, the kernel will fall back to using the
+# .writepages method instead, so we can drop this method.
+ifneq (,$(shell grep 'int block_write_full_page.struct page' include/linux/buffer_head.h))
+ccflags-y += -DKC_HAVE_BLOCK_WRITE_FULL_PAGE
+endif
+
+#
+# v6.4-rc2-29-gc6585011bc1d
+#
+# generic_file_splice_read is removed. It can be replaced with filemap_splice_read
+# or copy_splice_read.
+ifneq (,$(shell grep 'ssize_t generic_file_splice_read.struct file' include/linux/fs.h))
+ccflags-y += -DKC_HAVE_GENERIC_FILE_SPLICE_READ
+endif
+
+#
+# v5.19-rc3-395-g67235182a41c
+#
+# Adds buffer_migrate_folio(), similar to other fss. Quote willy: "If the filesystem
+# implements migrate_folio and writepages, there is no need for a writepage implementation."
+ifneq (,$(shell grep 'int buffer_migrate_folio.struct address_space' include/linux/buffer_head.h))
+ccflags-y += -DKC_HAVE_BUFFER_MIGRATE_FOLIO
+endif
+
+#
+# v4.6-rc3-29-g6192269444eb
+#
+# Adds .iterate_shared readdir() iterator vfs method.
+ifneq (,$(shell grep 'iterate_shared...struct file.., struct dir_context' include/linux/fs.h))
+ccflags-y += -DKC_HAVE_ITERATE_SHARED
+endif
+
+#
+# v6.11-rc1-54-g9f04609f74ec
+#
+# Last of a series of changes that make block_write_begin/end take a folio instead of
+# a struct pagep.
+ifneq (,$(shell grep 'int __block_write_begin.struct.folio' include/linux/buffer_head.h))
+ccflags-y += -DKC_BLOCK_WRITE_BEGIN_FOLIO
 endif
--- a/kmod/src/acl.c
+++ b/kmod/src/acl.c
@@ -107,20 +107,22 @@ struct posix_acl *scoutfs_get_acl_locked(struct inode *inode, int type, struct s
 	return acl;
 }

-#ifdef KC_GET_ACL_DENTRY
-struct posix_acl *scoutfs_get_acl(KC_VFS_NS_DEF
-				  struct dentry *dentry, int type)
-{
-	struct inode *inode = dentry->d_inode;
+#ifdef KC_GET_INODE_ACL
+struct posix_acl *scoutfs_get_acl(struct inode *inode, int type, bool rcu)
 #else
 struct posix_acl *scoutfs_get_acl(struct inode *inode, int type)
-{
 #endif
+{
 	struct super_block *sb = inode->i_sb;
 	struct scoutfs_lock *lock = NULL;
 	struct posix_acl *acl;
 	int ret;

+#ifdef KC_GET_INODE_ACL
+	if (rcu)
+		return ERR_PTR(-ECHILD);
+#endif
+
 #ifndef KC___POSIX_ACL_CREATE
 	if (!IS_POSIXACL(inode))
 		return NULL;
@@ -195,7 +197,7 @@ int scoutfs_set_acl_locked(struct inode *inode, struct posix_acl *acl, int type,
 		if (!value) {
 			/* can be setting an acl that only affects mode, didn't need xattr */
 			inode_inc_iversion(inode);
-			inode->i_ctime = current_time(inode);
+			inode_set_ctime_current(inode);
 		}
 	}

@@ -208,7 +210,7 @@ out:
 	return ret;
 }

-#ifdef KC_GET_ACL_DENTRY
+#ifdef KC_SET_ACL_DENTRY
 int scoutfs_set_acl(KC_VFS_NS_DEF
 		    struct dentry *dentry, struct posix_acl *acl, int type)
 {
@@ -254,9 +256,8 @@ int scoutfs_acl_get_xattr(struct dentry *dentry, const char *name, void *value,
 	if (!IS_POSIXACL(dentry->d_inode))
 		return -EOPNOTSUPP;

-#ifdef KC_GET_ACL_DENTRY
-	acl = scoutfs_get_acl(KC_VFS_INIT_NS
-			      dentry, type);
+#ifdef KC_GET_INODE_ACL
+	acl = scoutfs_get_acl(dentry->d_inode, type, false);
 #else
 	acl = scoutfs_get_acl(dentry->d_inode, type);
 #endif
@@ -305,7 +306,7 @@ int scoutfs_acl_set_xattr(struct dentry *dentry, const char *name, const void *v
 		}
 	}

-#ifdef KC_GET_ACL_DENTRY
+#ifdef KC_SET_ACL_DENTRY
 	ret = scoutfs_set_acl(KC_VFS_INIT_NS dentry, acl, type);
 #else
 	ret = scoutfs_set_acl(dentry->d_inode, acl, type);
--- a/kmod/src/acl.h
+++ b/kmod/src/acl.h
@@ -1,12 +1,16 @@
 #ifndef _SCOUTFS_ACL_H_
 #define _SCOUTFS_ACL_H_

-#ifdef KC_GET_ACL_DENTRY
-struct posix_acl *scoutfs_get_acl(KC_VFS_NS_DEF struct dentry *dentry, int type);
-int scoutfs_set_acl(KC_VFS_NS_DEF struct dentry *dentry, struct posix_acl *acl, int type);
+#ifdef KC_SET_ACL_DENTRY
+int scoutfs_set_acl(KC_VFS_NS_DEF
+		    struct dentry *dentry, struct posix_acl *acl, int type);
+#else
+int scoutfs_set_acl(struct inode *inode, struct posix_acl *acl, int type);
+#endif
+#ifdef KC_GET_INODE_ACL
+struct posix_acl *scoutfs_get_acl(struct inode *inode, int type, bool rcu);
 #else
 struct posix_acl *scoutfs_get_acl(struct inode *inode, int type);
-int scoutfs_set_acl(struct inode *inode, struct posix_acl *acl, int type);
 #endif
 struct posix_acl *scoutfs_get_acl_locked(struct inode *inode, int type, struct scoutfs_lock *lock);
 int scoutfs_set_acl_locked(struct inode *inode, struct posix_acl *acl, int type,
--- a/kmod/src/alloc.c
+++ b/kmod/src/alloc.c
@@ -308,14 +308,14 @@ static bool invalid_extent(u64 start, u64 end, u64 first, u64 last)
 static bool invalid_meta_blkno(struct super_block *sb, u64 blkno)
 {
 	struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
-	u64 last_meta = (i_size_read(sbi->meta_bdev->bd_inode) >> SCOUTFS_BLOCK_LG_SHIFT) - 1;
+	u64 last_meta = (i_size_read(KC_BDEV_INODE(sbi->meta_bdev)) >> SCOUTFS_BLOCK_LG_SHIFT) - 1;

 	return invalid_extent(blkno, blkno, SCOUTFS_META_DEV_START_BLKNO, last_meta);
 }

 static bool invalid_data_extent(struct super_block *sb, u64 start, u64 len)
 {
-	u64 last_data = (i_size_read(sb->s_bdev->bd_inode) >> SCOUTFS_BLOCK_SM_SHIFT) - 1;
+	u64 last_data = (i_size_read(KC_BDEV_INODE(sb->s_bdev)) >> SCOUTFS_BLOCK_SM_SHIFT) - 1;

 	return invalid_extent(start, start + len - 1, SCOUTFS_DATA_DEV_START_BLKNO, last_data);
 }
--- a/kmod/src/attr_x.c
+++ b/kmod/src/attr_x.c
@@ -103,8 +103,8 @@ int scoutfs_get_attr_x(struct inode *inode, struct scoutfs_ioctl_inode_attr_x *i
 		size = fill_attr(size, iax, SCOUTFS_IOC_IAX_OFFLINE_BLOCKS,
 				 offline_blocks, offline);
 	}
-	size = fill_attr(size, iax, SCOUTFS_IOC_IAX_CTIME, ctime_sec, inode->i_ctime.tv_sec);
-	size = fill_attr(size, iax, SCOUTFS_IOC_IAX_CTIME, ctime_nsec, inode->i_ctime.tv_nsec);
+	size = fill_attr(size, iax, SCOUTFS_IOC_IAX_CTIME, ctime_sec, inode_get_ctime_sec(inode));
+	size = fill_attr(size, iax, SCOUTFS_IOC_IAX_CTIME, ctime_nsec, inode_get_ctime_nsec(inode));
 	size = fill_attr(size, iax, SCOUTFS_IOC_IAX_CRTIME, crtime_sec, si->crtime.tv_sec);
 	size = fill_attr(size, iax, SCOUTFS_IOC_IAX_CRTIME, crtime_nsec, si->crtime.tv_nsec);
 	size = fill_attr(size, iax, SCOUTFS_IOC_IAX_SIZE, size, i_size_read(inode));
@@ -223,10 +223,8 @@ int scoutfs_set_attr_x(struct inode *inode, struct scoutfs_ioctl_inode_attr_x *i
 		scoutfs_inode_set_data_version(inode, iax->data_version);
 	if (iax->x_mask & SCOUTFS_IOC_IAX_SIZE)
 		i_size_write(inode, iax->size);
-	if (iax->x_mask & SCOUTFS_IOC_IAX_CTIME) {
-		inode->i_ctime.tv_sec = iax->ctime_sec;
-		inode->i_ctime.tv_nsec = iax->ctime_nsec;
-	}
+	if (iax->x_mask & SCOUTFS_IOC_IAX_CTIME)
+		inode_set_ctime(inode, iax->ctime_sec, iax->ctime_nsec);
 	if (iax->x_mask & SCOUTFS_IOC_IAX_CRTIME) {
 		si->crtime.tv_sec = iax->crtime_sec;
 		si->crtime.tv_nsec = iax->crtime_nsec;
--- a/kmod/src/block.c
+++ b/kmod/src/block.c
@@ -884,7 +884,7 @@ int scoutfs_block_dirty_ref(struct super_block *sb, struct scoutfs_alloc *alloc,
 	hdr->magic = cpu_to_le32(magic);
 	hdr->fsid = cpu_to_le64(sbi->fsid);
 	hdr->blkno = cpu_to_le64(bl->blkno);
-	prandom_bytes(&hdr->seq, sizeof(hdr->seq));
+	get_random_bytes(&hdr->seq, sizeof(hdr->seq));

 	trace_scoutfs_block_dirty_ref(sb, le64_to_cpu(ref->blkno), le64_to_cpu(ref->seq),
 				      le64_to_cpu(hdr->blkno), le64_to_cpu(hdr->seq));
@@ -1229,7 +1229,12 @@ static int sm_block_io(struct super_block *sb, struct block_device *bdev, blk_op
 	kc_bio_set_sector(bio, blkno << (SCOUTFS_BLOCK_SM_SHIFT - 9));
 	bio->bi_end_io = sm_block_bio_end_io;
 	bio->bi_private = &sbc;
-	bio_add_page(bio, page, SCOUTFS_BLOCK_SM_SIZE, 0);
+	ret = bio_add_page(bio, page, SCOUTFS_BLOCK_SM_SIZE, 0);
+	if (ret != SCOUTFS_BLOCK_SM_SIZE) {
+		bio_put(bio);
+		ret = -EFAULT;
+		goto out;
+	}

 	init_completion(&sbc.comp);
 	sbc.err = 0;
@@ -1285,9 +1290,12 @@ int scoutfs_block_setup(struct super_block *sb)

 	binf->sb = sb;
 	init_waitqueue_head(&binf->waitq);
-	KC_INIT_SHRINKER_FUNCS(&binf->shrinker, block_count_objects,
-			       block_scan_objects);
-	KC_REGISTER_SHRINKER(&binf->shrinker, "scoutfs-block:" SCSBF, SCSB_ARGS(sb));
+	KC_SETUP_SHRINKER(binf->shrinker, binf, 0, block_count_objects,
+			  block_scan_objects, "scoutfs-block:" SCSBF, SCSB_ARGS(sb));
+	if (KC_SHRINKER_IS_NULL(binf->shrinker)) {
+		ret = -ENOMEM;
+		goto out;
+	}
 	INIT_WORK(&binf->free_work, block_free_work);
 	init_llist_head(&binf->free_llist);

@@ -1309,7 +1317,7 @@ void scoutfs_block_destroy(struct super_block *sb)
 	struct block_info *binf = SCOUTFS_SB(sb)->block_info;

 	if (binf) {
-		KC_UNREGISTER_SHRINKER(&binf->shrinker);
+		KC_UNREGISTER_SHRINKER(binf->shrinker);
 		block_shrink_all(sb);
 		flush_work(&binf->free_work);
 		rhashtable_destroy(&binf->ht);
--- a/kmod/src/btree.c
+++ b/kmod/src/btree.c
@@ -2042,7 +2042,7 @@ struct merged_item {
 	u64 seq;
 	u8 flags;
 	unsigned int val_len;
-	u8 val[0];
+	u8 val[];
 };

 static inline struct merged_item *mitem_container(struct rb_node *node)
@@ -2208,7 +2208,7 @@ static int merge_read_item(struct super_block *sb, struct scoutfs_key *key, u64
 	mitem->flags = flags;
 	mitem->val_len = val_len;
 	if (val_len)
-		memcpy(mitem->val, val, val_len);
+		memcpy(&mitem->val[0], val, val_len);

 	if (found) {
 		replace_mitem(rng, found, mitem);
--- a/kmod/src/btree.h
+++ b/kmod/src/btree.h
@@ -30,7 +30,7 @@ struct scoutfs_btree_item_list {
 	u64 seq;
 	u8 flags;
 	int val_len;
-	u8 val[0];
+	u8 val[];
 };

 int scoutfs_btree_lookup(struct super_block *sb,
--- a/kmod/src/counters.c
+++ b/kmod/src/counters.c
@@ -34,6 +34,17 @@ static struct attribute scoutfs_counter_attrs[] = {
 #define NR_ATTRS ARRAY_SIZE(scoutfs_counter_attrs)
 static struct attribute *scoutfs_counter_attr_ptrs[NR_ATTRS + 1];

+#ifdef KC_KOBJECT_DEFAULT_GROUPS
+static struct attribute_group scoutfs_counter_attr_group = {
+	.attrs = scoutfs_counter_attr_ptrs,
+};
+
+static const struct attribute_group *scoutfs_counter_attr_groups[] = {
+	&scoutfs_counter_attr_group,
+	NULL,
+};
+#endif
+
 static ssize_t scoutfs_counter_attr_show(struct kobject *kobj,
 				         struct attribute *attr, char *buf)
 {
@@ -45,7 +56,6 @@ static ssize_t scoutfs_counter_attr_show(struct kobject *kobj,
 	counters = container_of(kobj, struct scoutfs_counters, kobj);
 	index = attr - scoutfs_counter_attrs;
 	pcpu = &counters->FIRST_COUNTER + index;
-
 	return snprintf(buf, PAGE_SIZE, "%lld\n", percpu_counter_sum(pcpu));
 }

@@ -63,7 +73,7 @@ static const struct sysfs_ops scoutfs_counter_attr_ops = {
 };

 static struct kobj_type scoutfs_counters_ktype = {
-	.default_attrs  = scoutfs_counter_attr_ptrs,
+	.KC_KOBJ_DEFAULT_OP = KC_KOBJ_DEFAULT_PICK(scoutfs_counter_attr_groups, scoutfs_counter_attr_ptrs),
 	.sysfs_ops      = &scoutfs_counter_attr_ops,
 	.release        = scoutfs_counters_kobj_release,
 };
--- a/kmod/src/counters.h
+++ b/kmod/src/counters.h
@@ -125,7 +125,6 @@
 	EXPAND_COUNTER(item_update)				\
 	EXPAND_COUNTER(item_write_dirty)			\
 	EXPAND_COUNTER(lock_alloc)				\
-	EXPAND_COUNTER(lock_count_objects)			\
 	EXPAND_COUNTER(lock_free)				\
 	EXPAND_COUNTER(lock_grant_request)			\
 	EXPAND_COUNTER(lock_grant_response)			\
@@ -139,13 +138,13 @@
 	EXPAND_COUNTER(lock_lock_error)				\
 	EXPAND_COUNTER(lock_nonblock_eagain)			\
 	EXPAND_COUNTER(lock_recover_request)			\
-	EXPAND_COUNTER(lock_scan_objects)			\
 	EXPAND_COUNTER(lock_shrink_attempted)			\
-	EXPAND_COUNTER(lock_shrink_aborted)			\
-	EXPAND_COUNTER(lock_shrink_work)			\
+	EXPAND_COUNTER(lock_shrink_request_failed)		\
 	EXPAND_COUNTER(lock_unlock)				\
 	EXPAND_COUNTER(lock_wait)				\
+	EXPAND_COUNTER(log_merge_complete)			\
 	EXPAND_COUNTER(log_merge_no_finalized)			\
+	EXPAND_COUNTER(log_merge_start)				\
 	EXPAND_COUNTER(log_merge_wait_timeout)			\
 	EXPAND_COUNTER(net_dropped_response)			\
 	EXPAND_COUNTER(net_send_bytes)				\
@@ -160,6 +159,7 @@
 	EXPAND_COUNTER(orphan_scan)				\
 	EXPAND_COUNTER(orphan_scan_attempts)			\
 	EXPAND_COUNTER(orphan_scan_cached)			\
+	EXPAND_COUNTER(orphan_scan_empty)			\
 	EXPAND_COUNTER(orphan_scan_error)			\
 	EXPAND_COUNTER(orphan_scan_item)			\
 	EXPAND_COUNTER(orphan_scan_omap_set)			\
--- a/kmod/src/data.c
+++ b/kmod/src/data.c
@@ -716,24 +716,24 @@ static int scoutfs_readpage(struct file *file, struct page *page)
 		return ret;
 	}

-	if (scoutfs_per_task_add_excl(&si->pt_data_lock, &pt_ent, inode_lock)) {
-		ret = scoutfs_data_wait_check(inode, page_offset(page),
-					      PAGE_SIZE, SEF_OFFLINE,
-					      SCOUTFS_IOC_DWO_READ, &dw,
-					      inode_lock);
-		if (ret != 0) {
-			unlock_page(page);
-			scoutfs_per_task_del(&si->pt_data_lock, &pt_ent);
-			scoutfs_unlock(sb, inode_lock, SCOUTFS_LOCK_READ);
-		}
-		if (ret > 0) {
-			ret = scoutfs_data_wait(inode, &dw);
-			if (ret == 0)
-				ret = AOP_TRUNCATED_PAGE;
-		}
-		if (ret != 0)
-			return ret;
+	scoutfs_per_task_add_excl(&si->pt_data_lock, &pt_ent, inode_lock);
+
+	ret = scoutfs_data_wait_check(inode, page_offset(page),
+				      PAGE_SIZE, SEF_OFFLINE,
+				      SCOUTFS_IOC_DWO_READ, &dw,
+				      inode_lock);
+	if (ret != 0) {
+		unlock_page(page);
+		scoutfs_per_task_del(&si->pt_data_lock, &pt_ent);
+		scoutfs_unlock(sb, inode_lock, SCOUTFS_LOCK_READ);
 	}
+	if (ret > 0) {
+		ret = scoutfs_data_wait(inode, &dw);
+		if (ret == 0)
+			ret = AOP_TRUNCATED_PAGE;
+	}
+	if (ret != 0)
+		return ret;

 #ifdef KC_MPAGE_READ_FOLIO
 	ret = mpage_read_folio(folio, scoutfs_get_block_read);
@@ -741,8 +741,8 @@ static int scoutfs_readpage(struct file *file, struct page *page)
 	ret = mpage_readpage(page, scoutfs_get_block_read);
 #endif

-	scoutfs_unlock(sb, inode_lock, SCOUTFS_LOCK_READ);
 	scoutfs_per_task_del(&si->pt_data_lock, &pt_ent);
+	scoutfs_unlock(sb, inode_lock, SCOUTFS_LOCK_READ);

 	return ret;
 }
@@ -760,8 +760,10 @@ static int scoutfs_readpages(struct file *file, struct address_space *mapping,
 			     struct list_head *pages, unsigned nr_pages)
 {
 	struct inode *inode = file->f_inode;
+	struct scoutfs_inode_info *si = SCOUTFS_I(inode);
 	struct super_block *sb = inode->i_sb;
 	struct scoutfs_lock *inode_lock = NULL;
+	SCOUTFS_DECLARE_PER_TASK_ENTRY(pt_ent);
 	struct page *page;
 	struct page *tmp;
 	int ret;
@@ -771,6 +773,8 @@ static int scoutfs_readpages(struct file *file, struct address_space *mapping,
 	if (ret)
 		goto out;

+	scoutfs_per_task_add_excl(&si->pt_data_lock, &pt_ent, inode_lock);
+
 	list_for_each_entry_safe(page, tmp, pages, lru) {
 		ret = scoutfs_data_wait_check(inode, page_offset(page),
 					      PAGE_SIZE, SEF_OFFLINE,
@@ -790,6 +794,7 @@ static int scoutfs_readpages(struct file *file, struct address_space *mapping,

 	ret = mpage_readpages(mapping, pages, nr_pages, scoutfs_get_block_read);
 out:
+	scoutfs_per_task_del(&si->pt_data_lock, &pt_ent);
 	scoutfs_unlock(sb, inode_lock, SCOUTFS_LOCK_READ);
 	BUG_ON(!list_empty(pages));
 	return ret;
@@ -798,8 +803,10 @@ out:
 static void scoutfs_readahead(struct readahead_control *rac)
 {
 	struct inode *inode = rac->file->f_inode;
+	struct scoutfs_inode_info *si = SCOUTFS_I(inode);
 	struct super_block *sb = inode->i_sb;
 	struct scoutfs_lock *inode_lock = NULL;
+	SCOUTFS_DECLARE_PER_TASK_ENTRY(pt_ent);
 	int ret;

 	ret = scoutfs_lock_inode(sb, SCOUTFS_LOCK_READ,
@@ -807,6 +814,8 @@ static void scoutfs_readahead(struct readahead_control *rac)
 	if (ret)
 		return;

+	scoutfs_per_task_add_excl(&si->pt_data_lock, &pt_ent, inode_lock);
+
 	ret = scoutfs_data_wait_check(inode, readahead_pos(rac),
 				      readahead_length(rac), SEF_OFFLINE,
 				      SCOUTFS_IOC_DWO_READ, NULL,
@@ -814,14 +823,17 @@ static void scoutfs_readahead(struct readahead_control *rac)
 	if (ret == 0)
 		mpage_readahead(rac, scoutfs_get_block_read);

+	scoutfs_per_task_del(&si->pt_data_lock, &pt_ent);
 	scoutfs_unlock(sb, inode_lock, SCOUTFS_LOCK_READ);
 }
 #endif

+#ifdef KC_HAVE_BLOCK_WRITE_FULL_PAGE
 static int scoutfs_writepage(struct page *page, struct writeback_control *wbc)
 {
 	return block_write_full_page(page, scoutfs_get_block_write, wbc);
 }
+#endif

 static int scoutfs_writepages(struct address_space *mapping,
 			      struct writeback_control *wbc)
@@ -841,7 +853,7 @@ static int scoutfs_write_begin(struct file *file,
 #ifdef KC_BLOCK_WRITE_BEGIN_AOP_FLAGS
 			       unsigned flags,
 #endif
-			       struct page **pagep, void **fsdata)
+			       KC_PAGE_OR_FOLIO(struct page **pagep, struct folio **folio), void **fsdata)
 {
 	struct inode *inode = mapping->host;
 	struct scoutfs_inode_info *si = SCOUTFS_I(inode);
@@ -886,7 +898,7 @@ retry:
 #ifdef KC_BLOCK_WRITE_BEGIN_AOP_FLAGS
 				flags,
 #endif
-				pagep, scoutfs_get_block_write);
+				KC_PAGE_OR_FOLIO(pagep, folio), scoutfs_get_block_write);
 	if (ret < 0) {
 		scoutfs_release_trans(sb);
 		scoutfs_inode_index_unlock(sb, &wbd->ind_locks);
@@ -919,7 +931,8 @@ static int writepages_sync_none(struct address_space *mapping, loff_t start,

 static int scoutfs_write_end(struct file *file, struct address_space *mapping,
 			     loff_t pos, unsigned len, unsigned copied,
-			     struct page *page, void *fsdata)
+			     KC_PAGE_OR_FOLIO(struct page *pagep, struct folio *folio),
+			     void *fsdata)
 {
 	struct inode *inode = mapping->host;
 	struct scoutfs_inode_info *si = SCOUTFS_I(inode);
@@ -927,10 +940,11 @@ static int scoutfs_write_end(struct file *file, struct address_space *mapping,
 	struct write_begin_data *wbd = fsdata;
 	int ret;

-	trace_scoutfs_write_end(sb, scoutfs_ino(inode), page->index, (u64)pos,
-				len, copied);
+	trace_scoutfs_write_end(sb, scoutfs_ino(inode),
+				KC_PAGE_OR_FOLIO(pagep->index, folio_index(folio)),
+				(u64)pos, len, copied);

-	ret = generic_write_end(file, mapping, pos, len, copied, page, fsdata);
+	ret = generic_write_end(file, mapping, pos, len, copied, KC_PAGE_OR_FOLIO(pagep, folio), fsdata);
 	if (ret > 0) {
 		if (!si->staging) {
 			scoutfs_inode_set_data_seq(inode);
@@ -1483,12 +1497,14 @@ int scoutfs_data_move_blocks(struct inode *from, u64 from_off,

 		cur_time = current_time(from);
 		if (!is_stage) {
-			to->i_ctime = to->i_mtime = cur_time;
+			inode_set_ctime_to_ts(to, cur_time);
+			inode_set_mtime_to_ts(to, cur_time);
 			inode_inc_iversion(to);
 			scoutfs_inode_inc_data_version(to);
 			scoutfs_inode_set_data_seq(to);
 		}
-		from->i_ctime = from->i_mtime = cur_time;
+		inode_set_ctime_to_ts(from, cur_time);
+		inode_set_mtime_to_ts(from, cur_time);
 		inode_inc_iversion(from);
 		scoutfs_inode_inc_data_version(from);
 		scoutfs_inode_set_data_seq(from);
@@ -1967,7 +1983,11 @@ static int scoutfs_data_page_mkwrite(struct vm_area_struct *vma,
 				     struct vm_fault *vmf)
 {
 #endif
+#ifdef KC_MPAGE_READ_FOLIO
+	struct folio *folio = page_folio(vmf->page);
+#else
 	struct page *page = vmf->page;
+#endif
 	struct file *file = vma->vm_file;
 	struct inode *inode = file_inode(file);
 	struct scoutfs_inode_info *si = SCOUTFS_I(inode);
@@ -2035,27 +2055,50 @@ static int scoutfs_data_page_mkwrite(struct vm_area_struct *vma,

 	down_write(&si->extent_sem);

+#ifdef KC_MPAGE_READ_FOLIO
+	if (!folio_trylock(folio)) {
+#else
 	if (!trylock_page(page)) {
+#endif
 		ret = VM_FAULT_NOPAGE;
 		goto out_sem;
 	}
 	ret = VM_FAULT_LOCKED;

+#ifdef KC_MPAGE_READ_FOLIO
+	if ((folio->mapping != inode->i_mapping) ||
+	    (!folio_test_uptodate(folio)) ||
+	    (folio_pos(folio) > size)) {
+		folio_unlock(folio);
+#else
 	if ((page->mapping != inode->i_mapping) ||
 	    (!PageUptodate(page)) ||
-	    (page_offset(page) > size))	 {
+	    (page_offset(page) > size)) {
 		unlock_page(page);
+#endif
 		ret = VM_FAULT_NOPAGE;
 		goto out_sem;
 	}

+#ifdef KC_MPAGE_READ_FOLIO
+	if (folio_index(folio) == (size - 1) >> PAGE_SHIFT)
+#else
 	if (page->index == (size - 1) >> PAGE_SHIFT)
+#endif
 		len = ((size - 1) & ~PAGE_MASK) + 1;

+#ifdef KC_MPAGE_READ_FOLIO
+	err = __block_write_begin(KC_PAGE_OR_FOLIO(folio_page(folio, 0), folio), pos, PAGE_SIZE, scoutfs_get_block);
+#else
 	err = __block_write_begin(page, pos, PAGE_SIZE, scoutfs_get_block);
+#endif
 	if (err) {
 		ret = vmf_error(err);
+#ifdef KC_MPAGE_READ_FOLIO
+		folio_unlock(folio);
+#else
 		unlock_page(page);
+#endif
 		goto out_sem;
 	}
 	/* end scoutfs_write_begin */
@@ -2065,8 +2108,13 @@ static int scoutfs_data_page_mkwrite(struct vm_area_struct *vma,
 	 * progress, we are guaranteed that writeback during freezing will
 	 * see the dirty page and writeprotect it again.
 	 */
+#ifdef KC_MPAGE_READ_FOLIO
+	folio_mark_dirty(folio);
+	folio_wait_stable(folio);
+#else
 	set_page_dirty(page);
 	wait_for_stable_page(page);
+#endif

 	/* scoutfs_write_end */
 	scoutfs_inode_set_data_seq(inode);
@@ -2193,6 +2241,9 @@ const struct address_space_operations scoutfs_file_aops = {
 	.dirty_folio		= block_dirty_folio,
 	.invalidate_folio	= block_invalidate_folio,
 	.read_folio		= scoutfs_read_folio,
+#ifdef KC_HAVE_BUFFER_MIGRATE_FOLIO
+	.migrate_folio		= buffer_migrate_folio,
+#endif
 #else
 	.readpage		= scoutfs_readpage,
 #endif
@@ -2201,7 +2252,9 @@ const struct address_space_operations scoutfs_file_aops = {
 #else
 	.readahead		= scoutfs_readahead,
 #endif
+#ifdef KC_HAVE_BLOCK_WRITE_FULL_PAGE
 	.writepage		= scoutfs_writepage,
+#endif
 	.writepages		= scoutfs_writepages,
 	.write_begin		= scoutfs_write_begin,
 	.write_end		= scoutfs_write_end,
@@ -2216,8 +2269,12 @@ const struct file_operations scoutfs_file_fops = {
 #else
 	.read_iter	= scoutfs_file_read_iter,
 	.write_iter	= scoutfs_file_write_iter,
-	.splice_read	= generic_file_splice_read,
 	.splice_write	= iter_file_splice_write,
+#endif
+#ifdef KC_HAVE_GENERIC_FILE_SPLICE_READ
+	.splice_read	= generic_file_splice_read,
+#else
+	.splice_read	= copy_splice_read,
 #endif
 	.mmap		= scoutfs_file_mmap,
 	.unlocked_ioctl	= scoutfs_ioctl,
--- a/kmod/src/dir.c
+++ b/kmod/src/dir.c
@@ -759,6 +759,7 @@ static int scoutfs_mknod(KC_VFS_NS_DEF
 	struct scoutfs_lock *dir_lock = NULL;
 	struct scoutfs_lock *inode_lock = NULL;
 	struct scoutfs_inode_info *si;
+	struct kc_timespec cur_time;
 	LIST_HEAD(ind_locks);
 	u64 hash;
 	u64 pos;
@@ -790,9 +791,13 @@ static int scoutfs_mknod(KC_VFS_NS_DEF
 	set_dentry_fsdata(dentry, dir_lock);

 	i_size_write(dir, i_size_read(dir) + dentry->d_name.len);
-	dir->i_mtime = dir->i_ctime = current_time(inode);
-	inode->i_mtime = inode->i_atime = inode->i_ctime = dir->i_mtime;
-	si->crtime = inode->i_mtime;
+	cur_time = current_time(inode);
+	inode_set_mtime_to_ts(dir, cur_time);
+	inode_set_ctime_to_ts(dir, cur_time);
+	inode_set_mtime_to_ts(inode, cur_time);
+	inode_set_atime_to_ts(inode, cur_time);
+	inode_set_ctime_to_ts(inode, cur_time);
+	si->crtime = inode_get_mtime(inode);
 	inode_inc_iversion(dir);
 	inode_inc_iversion(inode);
 	scoutfs_forest_inc_inode_count(sb);
@@ -845,6 +850,7 @@ static int scoutfs_link(struct dentry *old_dentry,
 	struct scoutfs_lock *dir_lock;
 	struct scoutfs_lock *inode_lock = NULL;
 	struct scoutfs_lock *orph_lock = NULL;
+	struct kc_timespec cur_time;
 	LIST_HEAD(ind_locks);
 	bool del_orphan = false;
 	u64 dir_size;
@@ -919,8 +925,10 @@ retry:
 	set_dentry_fsdata(dentry, dir_lock);

 	i_size_write(dir, dir_size);
-	dir->i_mtime = dir->i_ctime = current_time(inode);
-	inode->i_ctime = dir->i_mtime;
+	cur_time = current_time(inode);
+	inode_set_mtime_to_ts(dir, cur_time);
+	inode_set_ctime_to_ts(dir, cur_time);
+	inode_set_ctime_to_ts(inode, inode_get_mtime(dir));
 	inc_nlink(inode);
 	inode_inc_iversion(dir);
 	inode_inc_iversion(inode);
@@ -1030,13 +1038,13 @@ retry:

 	set_dentry_fsdata(dentry, dir_lock);

-	dir->i_ctime = ts;
-	dir->i_mtime = ts;
+	inode_set_ctime_to_ts(dir, ts);
+	inode_set_mtime_to_ts(dir, ts);
 	i_size_write(dir, i_size_read(dir) - dentry->d_name.len);
 	inode_inc_iversion(dir);
 	inode_inc_iversion(inode);

-	inode->i_ctime = ts;
+	inode_set_ctime_to_ts(inode, ts);
 	drop_nlink(inode);
 	if (S_ISDIR(inode->i_mode)) {
 		drop_nlink(dir);
@@ -1239,6 +1247,7 @@ static int scoutfs_symlink(KC_VFS_NS_DEF
 	struct scoutfs_lock *dir_lock = NULL;
 	struct scoutfs_lock *inode_lock = NULL;
 	struct scoutfs_inode_info *si;
+	struct kc_timespec cur_time;
 	LIST_HEAD(ind_locks);
 	u64 hash;
 	u64 pos;
@@ -1278,11 +1287,13 @@ static int scoutfs_symlink(KC_VFS_NS_DEF
 	set_dentry_fsdata(dentry, dir_lock);

 	i_size_write(dir, i_size_read(dir) + dentry->d_name.len);
-	dir->i_mtime = dir->i_ctime = current_time(inode);
+	cur_time = current_time(inode);
+	inode_set_mtime_to_ts(dir, cur_time);
+	inode_set_ctime_to_ts(dir, cur_time);
 	inode_inc_iversion(dir);

-	inode->i_ctime = dir->i_mtime;
-	si->crtime = inode->i_ctime;
+	inode_set_ctime_to_ts(inode, inode_get_mtime(dir));
+	si->crtime = inode_get_ctime(inode);
 	i_size_write(inode, name_len);
 	inode_inc_iversion(inode);
 	scoutfs_forest_inc_inode_count(sb);
@@ -1804,15 +1815,15 @@ retry:
 	}

 	now = current_time(old_inode);
-	old_dir->i_ctime = now;
-	old_dir->i_mtime = now;
+	inode_set_ctime_to_ts(old_dir, now);
+	inode_set_mtime_to_ts(old_dir, now);
 	if (new_dir != old_dir) {
-		new_dir->i_ctime = now;
-		new_dir->i_mtime = now;
+		inode_set_ctime_to_ts(new_dir, now);
+		inode_set_mtime_to_ts(new_dir, now);
 	}
-	old_inode->i_ctime = now;
+	inode_set_ctime_to_ts(old_inode, now);
 	if (new_inode)
-		new_inode->i_ctime = now;
+		inode_set_ctime_to_ts(new_inode, now);

 	inode_inc_iversion(old_dir);
 	inode_inc_iversion(old_inode);
@@ -1939,6 +1950,7 @@ static int scoutfs_tmpfile(KC_VFS_NS_DEF
 	struct scoutfs_lock *inode_lock = NULL;
 	struct scoutfs_lock *orph_lock = NULL;
 	struct scoutfs_inode_info *si;
+	struct kc_timespec cur_time;
 	LIST_HEAD(ind_locks);
 	int ret;

@@ -1955,8 +1967,11 @@ static int scoutfs_tmpfile(KC_VFS_NS_DEF
 	if (ret < 0)
 		goto out; /* XXX returning error but items created */

-	inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
-	si->crtime = inode->i_mtime;
+	cur_time = current_time(inode);
+	inode_set_mtime_to_ts(inode, cur_time);
+	inode_set_ctime_to_ts(inode, cur_time);
+	inode_set_atime_to_ts(inode, cur_time);
+	si->crtime = inode_get_mtime(inode);
 	insert_inode_hash(inode);
 	ihold(inode); /* need to update inode modifications in d_tmpfile */
 #ifdef KC_D_TMPFILE_DENTRY
@@ -2006,7 +2021,11 @@ const struct inode_operations scoutfs_symlink_iops = {
 #ifdef KC_LINUX_HAVE_RHEL_IOPS_WRAPPER
 	.removexattr	= generic_removexattr,
 #endif
+#ifdef KC_GET_INODE_ACL
+	.get_inode_acl	= scoutfs_get_acl,
+#else
 	.get_acl	= scoutfs_get_acl,
+#endif
 #ifndef KC_LINUX_HAVE_RHEL_IOPS_WRAPPER
 	.tmpfile	= scoutfs_tmpfile,
 	.rename		= scoutfs_rename_common,
@@ -2020,7 +2039,11 @@ const struct inode_operations scoutfs_symlink_iops = {
 };

 const struct file_operations scoutfs_dir_fops = {
+#ifdef KC_HAVE_ITERATE_SHARED
+	.iterate_shared	= scoutfs_readdir,
+#else
 	.iterate	= scoutfs_readdir,
+#endif
 #ifdef KC_FMODE_KABI_ITERATE
 	.open		= scoutfs_dir_open,
 #endif
@@ -2052,8 +2075,12 @@ const struct inode_operations scoutfs_dir_iops = {
 	.removexattr	= generic_removexattr,
 #endif
 	.listxattr	= scoutfs_listxattr,
+#ifdef KC_GET_INODE_ACL
+	.get_inode_acl	= scoutfs_get_acl,
+#else
 	.get_acl	= scoutfs_get_acl,
-#ifdef KC_GET_ACL_DENTRY
+#endif
+#ifdef KC_SET_ACL_DENTRY
 	.set_acl	= scoutfs_set_acl,
 #endif
 	.symlink	= scoutfs_symlink,
--- a/kmod/src/fence.c
+++ b/kmod/src/fence.c
@@ -217,6 +217,9 @@ static struct attribute *fence_attrs[] = {
 	SCOUTFS_ATTR_PTR(rid),
 	NULL,
 };
+#ifdef KC_KOBJECT_DEFAULT_GROUPS
+ATTRIBUTE_GROUPS(fence);
+#endif

 #define FENCE_TIMEOUT_MS (MSEC_PER_SEC * 30)

@@ -255,7 +258,8 @@ int scoutfs_fence_start(struct super_block *sb, u64 rid, __be32 ipv4_addr, int r
 	fence->rid = rid;

 	ret = scoutfs_sysfs_create_attrs_parent(sb, &fi->kset->kobj,
-						&fence->ssa, fence_attrs,
+						&fence->ssa,
+						KC_KOBJ_DEFAULT(fence),
 						"%016llx", rid);
 	if (ret < 0) {
 		kfree(fence);
--- a/kmod/src/inode.c
+++ b/kmod/src/inode.c
@@ -149,8 +149,12 @@ static const struct inode_operations scoutfs_file_iops = {
 	.removexattr	= generic_removexattr,
 #endif
 	.listxattr	= scoutfs_listxattr,
+#ifdef KC_GET_INODE_ACL
+	.get_inode_acl	= scoutfs_get_acl,
+#else
 	.get_acl	= scoutfs_get_acl,
-#ifdef KC_GET_ACL_DENTRY
+#endif
+#ifdef KC_SET_ACL_DENTRY
 	.set_acl	= scoutfs_set_acl,
 #endif
 	.fiemap		= scoutfs_data_fiemap,
@@ -165,8 +169,12 @@ static const struct inode_operations scoutfs_special_iops = {
 	.removexattr	= generic_removexattr,
 #endif
 	.listxattr	= scoutfs_listxattr,
+#ifdef KC_GET_INODE_ACL
+	.get_inode_acl	= scoutfs_get_acl,
+#else
 	.get_acl	= scoutfs_get_acl,
-#ifdef KC_GET_ACL_DENTRY
+#endif
+#ifdef KC_SET_ACL_DENTRY
 	.set_acl	= scoutfs_set_acl,
 #endif
 };
@@ -267,12 +275,9 @@ static void load_inode(struct inode *inode, struct scoutfs_inode *cinode, int in
 	i_gid_write(inode, le32_to_cpu(cinode->gid));
 	inode->i_mode = le32_to_cpu(cinode->mode);
 	inode->i_rdev = le32_to_cpu(cinode->rdev);
-	inode->i_atime.tv_sec = le64_to_cpu(cinode->atime.sec);
-	inode->i_atime.tv_nsec = le32_to_cpu(cinode->atime.nsec);
-	inode->i_mtime.tv_sec = le64_to_cpu(cinode->mtime.sec);
-	inode->i_mtime.tv_nsec = le32_to_cpu(cinode->mtime.nsec);
-	inode->i_ctime.tv_sec = le64_to_cpu(cinode->ctime.sec);
-	inode->i_ctime.tv_nsec = le32_to_cpu(cinode->ctime.nsec);
+	inode_set_atime(inode, le64_to_cpu(cinode->atime.sec), le32_to_cpu(cinode->atime.nsec));
+	inode_set_mtime(inode, le64_to_cpu(cinode->mtime.sec), le32_to_cpu(cinode->mtime.nsec));
+	inode_set_ctime(inode, le64_to_cpu(cinode->ctime.sec), le32_to_cpu(cinode->ctime.nsec));

 	si->meta_seq = le64_to_cpu(cinode->meta_seq);
 	si->data_seq = le64_to_cpu(cinode->data_seq);
@@ -393,6 +398,7 @@ int scoutfs_getattr(KC_VFS_NS_DEF
 				 SCOUTFS_LKF_REFRESH_INODE, inode, &lock);
 	if (ret == 0) {
 		generic_fillattr(KC_VFS_INIT_NS
+				 KC_FILLATTR_REQUEST_MASK
 				 inode, stat);
 		scoutfs_unlock(sb, lock, SCOUTFS_LOCK_READ);
 	}
@@ -404,6 +410,7 @@ static int set_inode_size(struct inode *inode, struct scoutfs_lock *lock,
 {
 	struct scoutfs_inode_info *si = SCOUTFS_I(inode);
 	struct super_block *sb = inode->i_sb;
+	struct kc_timespec cur_time;
 	SCOUTFS_DECLARE_PER_TASK_ENTRY(pt_ent);
 	LIST_HEAD(ind_locks);
 	int ret;
@@ -426,7 +433,9 @@ static int set_inode_size(struct inode *inode, struct scoutfs_lock *lock,
 		scoutfs_inode_inc_data_version(inode);

 	truncate_setsize(inode, new_size);
-	inode->i_ctime = inode->i_mtime = current_time(inode);
+	cur_time = current_time(inode);
+	inode_set_ctime_to_ts(inode, cur_time);
+	inode_set_mtime_to_ts(inode, cur_time);
 	if (truncate)
 		si->flags |= SCOUTFS_INO_FLAG_TRUNCATE;
 	scoutfs_inode_set_data_seq(inode);
@@ -893,14 +902,14 @@ static void store_inode(struct scoutfs_inode *cinode, struct inode *inode, int i
 	cinode->gid = cpu_to_le32(i_gid_read(inode));
 	cinode->mode = cpu_to_le32(inode->i_mode);
 	cinode->rdev = cpu_to_le32(inode->i_rdev);
-	cinode->atime.sec = cpu_to_le64(inode->i_atime.tv_sec);
-	cinode->atime.nsec = cpu_to_le32(inode->i_atime.tv_nsec);
+	cinode->atime.sec = cpu_to_le64(inode_get_atime_sec(inode));
+	cinode->atime.nsec = cpu_to_le32(inode_get_atime_nsec(inode));
 	memset(cinode->atime.__pad, 0, sizeof(cinode->atime.__pad));
-	cinode->ctime.sec = cpu_to_le64(inode->i_ctime.tv_sec);
-	cinode->ctime.nsec = cpu_to_le32(inode->i_ctime.tv_nsec);
+	cinode->ctime.sec = cpu_to_le64(inode_get_ctime_sec(inode));
+	cinode->ctime.nsec = cpu_to_le32(inode_get_ctime_nsec(inode));
 	memset(cinode->ctime.__pad, 0, sizeof(cinode->ctime.__pad));
-	cinode->mtime.sec = cpu_to_le64(inode->i_mtime.tv_sec);
-	cinode->mtime.nsec = cpu_to_le32(inode->i_mtime.tv_nsec);
+	cinode->mtime.sec = cpu_to_le64(inode_get_mtime_sec(inode));
+	cinode->mtime.nsec = cpu_to_le32(inode_get_mtime_nsec(inode));
 	memset(cinode->mtime.__pad, 0, sizeof(cinode->mtime.__pad));

 	cinode->meta_seq = cpu_to_le64(scoutfs_inode_meta_seq(inode));
@@ -1558,6 +1567,7 @@ int scoutfs_new_inode(struct super_block *sb, struct inode *dir, umode_t mode, d
 	struct scoutfs_inode sinode;
 	struct scoutfs_key key;
 	struct inode *inode;
+	struct kc_timespec cur_time;
 	int inode_bytes;
 	int ret;

@@ -1587,7 +1597,10 @@ int scoutfs_new_inode(struct super_block *sb, struct inode *dir, umode_t mode, d
 	inode_init_owner(KC_VFS_INIT_NS
 			 inode, dir, mode);
 	inode_set_bytes(inode, 0);
-	inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
+	cur_time = current_time(inode);
+	inode_set_mtime_to_ts(inode, cur_time);
+	inode_set_atime_to_ts(inode, cur_time);
+	inode_set_ctime_to_ts(inode, cur_time);
 	inode->i_rdev = rdev;
 	set_inode_ops(inode);

@@ -1637,10 +1650,14 @@ int scoutfs_inode_orphan_delete(struct super_block *sb, u64 ino, struct scoutfs_
 				struct scoutfs_lock *primary)
 {
 	struct scoutfs_key key;
+	int ret;

 	init_orphan_key(&key, ino);

-	return scoutfs_item_delete_force(sb, &key, lock, primary);
+	ret = scoutfs_item_delete_force(sb, &key, lock, primary);
+	trace_scoutfs_inode_orphan_delete(sb, ino, ret);
+
+	return ret;
 }

 /*
@@ -1722,6 +1739,8 @@ out:
 		scoutfs_release_trans(sb);
 	scoutfs_inode_index_unlock(sb, &ind_locks);

+	trace_scoutfs_delete_inode_end(sb, ino, mode, size, ret);
+
 	return ret;
 }

@@ -1817,6 +1836,9 @@ out:
 * they've checked that the inode could really be deleted.  We serialize
 * on a bit in the lock data so that we only have one deletion attempt
 * per inode under this mount's cluster lock.
+ *
+ * Returns -EAGAIN if we either did some cleanup work or are unable to finish
+ * cleaning up this inode right now.
 */
 static int try_delete_inode_items(struct super_block *sb, u64 ino)
 {
@@ -1830,6 +1852,8 @@ static int try_delete_inode_items(struct super_block *sb, u64 ino)
 	int bit_nr;
 	int ret;

+	trace_scoutfs_try_delete(sb, ino);
+
 	ret = scoutfs_lock_ino(sb, SCOUTFS_LOCK_WRITE, 0, ino, &lock);
 	if (ret < 0)
 		goto out;
@@ -1842,27 +1866,32 @@ static int try_delete_inode_items(struct super_block *sb, u64 ino)

 	/* only one local attempt per inode at a time */
 	if (test_and_set_bit(bit_nr, ldata->trying)) {
-		ret = 0;
+		trace_scoutfs_try_delete_local_busy(sb, ino);
+		ret = -EAGAIN;
 		goto out;
 	}
 	clear_trying = true;

 	/* can't delete if it's cached in local or remote mounts */
 	if (scoutfs_omap_test(sb, ino) || test_bit_le(bit_nr, ldata->map.bits)) {
-		ret = 0;
+		trace_scoutfs_try_delete_cached(sb, ino);
+		ret = -EAGAIN;
 		goto out;
 	}

 	scoutfs_inode_init_key(&key, ino);
 	ret = lookup_inode_item(sb, &key, &sinode, lock);
 	if (ret < 0) {
-		if (ret == -ENOENT)
+		if (ret == -ENOENT) {
+			trace_scoutfs_try_delete_no_item(sb, ino);
 			ret = 0;
+		}
 		goto out;
 	}

 	if (le32_to_cpu(sinode.nlink) > 0) {
-		ret = 0;
+		trace_scoutfs_try_delete_has_links(sb, ino, le32_to_cpu(sinode.nlink));
+		ret = -EAGAIN;
 		goto out;
 	}

@@ -1871,8 +1900,10 @@ static int try_delete_inode_items(struct super_block *sb, u64 ino)
 		goto out;

 	ret = delete_inode_items(sb, ino, &sinode, lock, orph_lock);
-	if (ret == 0)
+	if (ret == 0) {
+		ret = -EAGAIN;
 		scoutfs_inc_counter(sb, inode_deleted);
+	}

 out:
 	if (clear_trying)
@@ -2042,7 +2073,7 @@ void scoutfs_inode_schedule_orphan_dwork(struct super_block *sb)

 		low = (opts.orphan_scan_delay_ms * 80) / 100;
 		high = (opts.orphan_scan_delay_ms * 120) / 100;
-		delay = msecs_to_jiffies(low + prandom_u32_max(high - low)) ?: 1;
+		delay = msecs_to_jiffies(low + get_random_u32_below(high - low)) ?: 1;

 		mod_delayed_work(system_wq, &inf->orphan_scan_dwork, delay);
 	}
@@ -2074,6 +2105,10 @@ void scoutfs_inode_schedule_orphan_dwork(struct super_block *sb)
 * a locally cached inode.  Then we ask the server for the open map
 * containing the inode.  Only if we don't see any cached users do we do
 * the expensive work of acquiring locks to try and delete the items.
+ *
+ * We need to track whether there is any orphan cleanup work remaining so
+ * that tests such as inode-deletion can watch the orphan_scan_empty counter
+ * to determine when inode cleanup from open-unlink scenarios is complete.
 */
 static void inode_orphan_scan_worker(struct work_struct *work)
 {
@@ -2085,11 +2120,14 @@ static void inode_orphan_scan_worker(struct work_struct *work)
 	SCOUTFS_BTREE_ITEM_REF(iref);
 	struct scoutfs_key last;
 	struct scoutfs_key key;
+	bool work_todo = false;
 	u64 group_nr;
 	int bit_nr;
 	u64 ino;
 	int ret;

+	trace_scoutfs_orphan_scan_start(sb);
+
 	scoutfs_inc_counter(sb, orphan_scan);

 	init_orphan_key(&last, U64_MAX);
@@ -2109,8 +2147,10 @@ static void inode_orphan_scan_worker(struct work_struct *work)
 		init_orphan_key(&key, ino);
 		ret = scoutfs_btree_next(sb, &roots.fs_root, &key, &iref);
 		if (ret < 0) {
-			if (ret == -ENOENT)
+			if (ret == -ENOENT) {
+				trace_scoutfs_orphan_scan_work(sb, 0);
 				break;
+			}
 			goto out;
 		}

@@ -2125,6 +2165,7 @@ static void inode_orphan_scan_worker(struct work_struct *work)

 		/* locally cached inodes will try to delete as they evict */
 		if (scoutfs_omap_test(sb, ino)) {
+			work_todo = true;
 			scoutfs_inc_counter(sb, orphan_scan_cached);
 			continue;
 		}
@@ -2140,13 +2181,22 @@ static void inode_orphan_scan_worker(struct work_struct *work)

 		/* remote cached inodes will also try to delete */
 		if (test_bit_le(bit_nr, omap.bits)) {
+			work_todo = true;
 			scoutfs_inc_counter(sb, orphan_scan_omap_set);
 			continue;
 		}

 		/* seemingly orphaned and unused, get locks and check for sure */
 		scoutfs_inc_counter(sb, orphan_scan_attempts);
+		trace_scoutfs_orphan_scan_work(sb, ino);
+
 		ret = try_delete_inode_items(sb, ino);
+		if (ret == -EAGAIN) {
+			work_todo = true;
+			ret = 0;
+		}
+
+		trace_scoutfs_orphan_scan_end(sb, ino, ret);
 	}

 	ret = 0;
@@ -2155,6 +2205,11 @@ out:
 	if (ret < 0)
 		scoutfs_inc_counter(sb, orphan_scan_error);

+	if (!work_todo)
+		scoutfs_inc_counter(sb, orphan_scan_empty);
+
+	trace_scoutfs_orphan_scan_stop(sb, work_todo);
+
 	scoutfs_inode_schedule_orphan_dwork(sb);
 }

--- a/kmod/src/item.c
+++ b/kmod/src/item.c
@@ -146,7 +146,7 @@ struct cached_item {
 	unsigned int val_len;
 	struct scoutfs_key key;
 	u64 seq;
-	char val[0];
+	char val[];
 };

 #define CACHED_ITEM_ALIGN 8
@@ -424,7 +424,7 @@ static struct cached_item *alloc_item(struct cached_page *pg,
 	item->seq = seq;

 	if (val_len)
-		memcpy(item->val, val, val_len);
+		memcpy(&item->val[0], val, val_len);

 	update_pg_max_seq(pg, item);

@@ -1999,7 +1999,7 @@ int scoutfs_item_update(struct super_block *sb, struct scoutfs_key *key,

 	if (val_len <= found->val_len) {
 		if (val_len)
-			memcpy(found->val, val, val_len);
+			memcpy(&found->val[0], val, val_len);
 		if (val_len < found->val_len)
 			pg->erased_bytes += item_val_bytes(found->val_len) -
 					    item_val_bytes(val_len);
@@ -2316,7 +2316,7 @@ int scoutfs_item_write_dirty(struct super_block *sb)
 			lst->seq = item->seq;
 			lst->flags = item->deletion ? SCOUTFS_ITEM_FLAG_DELETION : 0;
 			lst->val_len = item->val_len;
-			memcpy(lst->val, item->val, item->val_len);
+			memcpy(&lst->val[0], item->val, item->val_len);
 		}

 		spin_lock(&cinf->dirty_lock);
@@ -2626,10 +2626,10 @@ int scoutfs_item_setup(struct super_block *sb)

 	for_each_possible_cpu(cpu)
 		init_pcpu_pages(cinf, cpu);
-
-	KC_INIT_SHRINKER_FUNCS(&cinf->shrinker, item_cache_count_objects,
-			       item_cache_scan_objects);
-	KC_REGISTER_SHRINKER(&cinf->shrinker, "scoutfs-item:" SCSBF, SCSB_ARGS(sb));
+	KC_SETUP_SHRINKER(cinf->shrinker, cinf, 0, item_cache_count_objects,
+			  item_cache_scan_objects, "scoutfs-item:" SCSBF, SCSB_ARGS(sb));
+	if (KC_SHRINKER_IS_NULL(cinf->shrinker))
+		return -ENOMEM;
 #ifdef KC_CPU_NOTIFIER
        cinf->notifier.notifier_call = item_cpu_callback;
        register_hotcpu_notifier(&cinf->notifier);
@@ -2654,7 +2654,7 @@ void scoutfs_item_destroy(struct super_block *sb)
 #ifdef KC_CPU_NOTIFIER
 		unregister_hotcpu_notifier(&cinf->notifier);
 #endif
-		KC_UNREGISTER_SHRINKER(&cinf->shrinker);
+		KC_UNREGISTER_SHRINKER(cinf->shrinker);

 		for_each_possible_cpu(cpu)
 			drop_pcpu_pages(sb, cinf, cpu);
--- a/kmod/src/kernelcompat.c
+++ b/kmod/src/kernelcompat.c
@@ -147,3 +147,13 @@ unsigned long kc_list_lru_shrink_walk(struct list_lru *lru, struct shrink_contro
 }

 #endif
+
+#ifndef KC_FS_INODE_C_TIME_ACCESSOR
+struct timespec64 inode_set_ctime_current(struct inode *inode)
+{
+	struct timespec64 now = current_time(inode);
+
+	inode_set_ctime(inode, now.tv_sec, now.tv_nsec);
+	return now;
+}
+#endif
--- a/kmod/src/kernelcompat.h
+++ b/kmod/src/kernelcompat.h
@@ -142,25 +142,54 @@ struct timespec64 kc_current_time(struct inode *inode);
 #define kc_timespec timespec64
 #endif

-#ifndef KC_SHRINKER_SHRINK
+#ifdef KC_SHRINKER_ALLOC
+// el10+

-#define KC_DEFINE_SHRINKER(name) struct shrinker name
-#define KC_INIT_SHRINKER_FUNCS(name, countfn, scanfn) do {	\
-	__typeof__(name) _shrink = (name);			\
-	_shrink->count_objects = (countfn);			\
-	_shrink->scan_objects = (scanfn);			\
-	_shrink->seeks = DEFAULT_SEEKS;			\
+#define KC_DEFINE_SHRINKER(name) struct shrinker *(name)
+#define KC_SHRINKER_CONTAINER_OF(ptr, type) ptr->private_data
+#define KC_SETUP_SHRINKER(ptr, priv, flags, countfn, scanfn, fmt, args)	\
+do {								\
+	ptr = shrinker_alloc(flags, fmt, args);			\
+	if (ptr) {						\
+		ptr->private_data = (priv);			\
+		ptr->seeks = DEFAULT_SEEKS;			\
+		ptr->count_objects = countfn;			\
+		ptr->scan_objects = scanfn;			\
+		shrinker_register(ptr);				\
+	}							\
 } while (0)
+#define KC_UNREGISTER_SHRINKER(ptr) shrinker_free(ptr)
+#define KC_SHRINKER_FN(ptr) (ptr)
+#define KC_SHRINKER_IS_NULL(ptr) (!(ptr))

+#else /* KC_SHRINKER_ALLOC */
+#ifndef KC_SHRINKER_SHRINK
+// el9, el8
+
+#define KC_DEFINE_SHRINKER(name) struct shrinker (name)
 #define KC_SHRINKER_CONTAINER_OF(ptr, type) container_of(ptr, type, shrinker)
 #ifdef KC_SHRINKER_NAME
-#define KC_REGISTER_SHRINKER register_shrinker
+#define KC_SETUP_SHRINKER(ptr, priv, flags, countfn, scanfn, fmt, args)	\
+do {								\
+	(ptr).count_objects = (countfn);			\
+	(ptr).scan_objects = (scanfn);				\
+	(ptr).seeks = DEFAULT_SEEKS;				\
+	register_shrinker(&(ptr), fmt, args);			\
+} while (0)
 #else
-#define KC_REGISTER_SHRINKER(ptr, fmt, ...) (register_shrinker(ptr))
+#define KC_SETUP_SHRINKER(ptr, priv, flags, countfn, scanfn, fmt, args)	\
+do {								\
+	(ptr).count_objects = (countfn);			\
+	(ptr).scan_objects = (scanfn);				\
+	(ptr).seeks = DEFAULT_SEEKS;				\
+	register_shrinker(&(ptr));				\
+} while (0)
 #endif /* KC_SHRINKER_NAME */
-#define KC_UNREGISTER_SHRINKER(ptr) (unregister_shrinker(ptr))
-#define KC_SHRINKER_FN(ptr) (ptr)
-#else
+#define KC_UNREGISTER_SHRINKER(ptr) (unregister_shrinker(&(ptr)))
+#define KC_SHRINKER_FN(ptr) (&ptr)
+
+#else /* KC_SHRINKER_SHRINK */
+// el7

 #include <linux/shrinker.h>
 #ifndef SHRINK_STOP
@@ -176,19 +205,21 @@ struct kc_shrinker_wrapper {
 };

 #define KC_DEFINE_SHRINKER(name) struct kc_shrinker_wrapper name;
-#define KC_INIT_SHRINKER_FUNCS(name, countfn, scanfn) do {	\
-	struct kc_shrinker_wrapper *_wrap = (name);		\
-	_wrap->count_objects = (countfn);			\
-	_wrap->scan_objects = (scanfn);				\
-	_wrap->shrink.shrink = kc_shrink_wrapper_fn;		\
-	_wrap->shrink.seeks = DEFAULT_SEEKS;			\
-} while (0)
 #define KC_SHRINKER_CONTAINER_OF(ptr, type) container_of(container_of(ptr, struct kc_shrinker_wrapper, shrink), type, shrinker)
-#define KC_REGISTER_SHRINKER(ptr, fmt, ...) (register_shrinker(ptr.shrink))
-#define KC_UNREGISTER_SHRINKER(ptr) (unregister_shrinker(ptr.shrink))
-#define KC_SHRINKER_FN(ptr) (ptr.shrink)
+#define KC_SETUP_SHRINKER(ptr, priv, flags, countfn, scanfn, fmt, args)	\
+do {								\
+	(ptr).count_objects = (countfn);			\
+	(ptr).scan_objects = (scanfn);				\
+	(ptr).shrink.shrink = kc_shrink_wrapper_fn;		\
+	(ptr).shrink.seeks = DEFAULT_SEEKS;			\
+	register_shrinker(&(ptr).shrink);			\
+} while (0)
+#define KC_UNREGISTER_SHRINKER(ptr) (unregister_shrinker(&(ptr).shrink))
+#define KC_SHRINKER_FN(ptr) (&(ptr).shrink)

 #endif /* KC_SHRINKER_SHRINK */
+#define KC_SHRINKER_IS_NULL(ptr) (0)
+#endif /* KC_SHRINKER_ALLOC */

 #ifdef KC_KERNEL_GETSOCKNAME_ADDRLEN
 #include <linux/net.h>
@@ -279,6 +310,12 @@ typedef unsigned int blk_opf_t;
 #endif
 #endif /* KC_VFS_METHOD_MNT_IDMAP_ARG */

+#ifdef KC_GENERIC_FILLATTR_REQUEST_MASK
+#define KC_FILLATTR_REQUEST_MASK request_mask,
+#else
+#define KC_FILLATTR_REQUEST_MASK
+#endif
+
 #ifdef KC_BIO_ALLOC_DEV_OPF_ARGS
 #define kc_bio_alloc bio_alloc
 #else
@@ -452,6 +489,7 @@ unsigned long kc_list_lru_shrink_walk(struct list_lru *lru, struct shrink_contro
 				      kc_list_lru_walk_cb_t isolate, void *cb_arg);
 #else
 #define kc_list_lru_shrink_walk list_lru_shrink_walk
+#define kc_list_lru_walk list_lru_walk
 #endif

 #if defined(KC_LIST_LRU_WALK_CB_ITEM_LOCK)
@@ -489,4 +527,133 @@ static inline void stack_trace_print(unsigned long *entries, unsigned int nr_ent
 }
 #endif

+#ifndef KC_HAVE_GET_RANDOM_U32_BELOW
+#define get_random_u32_below prandom_u32_max
+#endif
+
+#ifndef KC_FS_INODE_C_TIME_ACCESSOR
+struct timespec64 inode_set_ctime_current(struct inode *inode);
+static inline struct timespec64 inode_set_ctime_to_ts(struct inode *inode,
+						      struct timespec64 ts)
+{
+	inode->i_ctime.tv_sec = ts.tv_sec;
+	inode->i_ctime.tv_nsec = ts.tv_nsec;
+	return ts;
+}
+
+static inline struct timespec64 inode_set_ctime(struct inode *inode,
+						time64_t sec, long nsec)
+{
+	struct timespec64 ts = { .tv_sec  = sec,
+				 .tv_nsec = nsec };
+
+	return inode_set_ctime_to_ts(inode, ts);
+}
+
+static inline struct timespec64 inode_get_ctime(const struct inode *inode)
+{
+	struct timespec64 ts = { .tv_sec  = inode->i_ctime.tv_sec,
+				 .tv_nsec = inode->i_ctime.tv_nsec };
+	return ts;
+}
+#endif
+
+#ifndef KC_FS_INODE_AM_TIME_ACCESSOR
+static inline struct timespec64 inode_get_mtime(const struct inode *inode)
+{
+	struct timespec64 ts = { .tv_sec  = inode->i_mtime.tv_sec,
+				 .tv_nsec = inode->i_mtime.tv_nsec };
+	return ts;
+}
+
+static inline struct timespec64 inode_set_mtime_to_ts(struct inode *inode,
+						      struct timespec64 ts)
+{
+	inode->i_mtime.tv_sec = ts.tv_sec;
+	inode->i_mtime.tv_nsec = ts.tv_nsec;
+	return ts;
+}
+
+static inline struct timespec64 inode_set_mtime(struct inode *inode,
+						time64_t sec, long nsec)
+{
+	struct timespec64 ts = { .tv_sec  = sec,
+				 .tv_nsec = nsec };
+
+	return inode_set_mtime_to_ts(inode, ts);
+}
+
+static inline struct timespec64 inode_set_atime_to_ts(struct inode *inode,
+						      struct timespec64 ts)
+{
+	inode->i_atime.tv_sec = ts.tv_sec;
+	inode->i_atime.tv_nsec = ts.tv_nsec;
+	return ts;
+}
+
+static inline struct timespec64 inode_set_atime(struct inode *inode,
+						time64_t sec, long nsec)
+{
+	struct timespec64 ts = { .tv_sec  = sec,
+				 .tv_nsec = nsec };
+
+	return inode_set_atime_to_ts(inode, ts);
+}
+
+static inline time64_t inode_get_ctime_sec(const struct inode *inode)
+{
+	return inode->i_ctime.tv_sec;
+}
+static inline long inode_get_ctime_nsec(const struct inode *inode)
+{
+	return inode->i_ctime.tv_nsec;
+}
+static inline time64_t inode_get_mtime_sec(const struct inode *inode)
+{
+	return inode->i_mtime.tv_sec;
+}
+static inline long inode_get_mtime_nsec(const struct inode *inode)
+{
+	return inode->i_mtime.tv_nsec;
+}
+static inline time64_t inode_get_atime_sec(const struct inode *inode)
+{
+	return inode->i_atime.tv_sec;
+}
+static inline long inode_get_atime_nsec(const struct inode *inode)
+{
+	return inode->i_atime.tv_nsec;
+}
+#endif
+
+#ifdef KC_HAVE_BD_INODE
+#define KC_BDEV_INODE(b) (b)->bd_inode
+#define KC_BDEV_MAPPING(b) (b)->bd_inode->i_mapping
+#else
+#define KC_BDEV_INODE(b) (b)->bd_mapping->host
+#define KC_BDEV_MAPPING(b) (b)->bd_mapping
+#endif
+
+#ifdef KC_HAVE_ASSIGN_STR_PARMS
+#define kc__assign_str(a, b) __assign_str(a, b)
+#else
+#define kc__assign_str(a, b) __assign_str(a)
+#endif
+
+#ifdef KC_KOBJECT_DEFAULT_GROUPS
+#define KC_KOBJ_DEFAULT_OP default_groups
+#define KC_KOBJ_DEFAULT(name) (name##_groups)
+#define KC_KOBJ_DEFAULT_PICK(group, attrs) (group)
+#else
+#define KC_KOBJ_DEFAULT_OP default_attrs
+#define KC_KOBJ_DEFAULT(name) (name##_attrs)
+#define KC_KOBJ_DEFAULT_PICK(group, attrs) (attrs)
+#endif
+
+#ifdef KC_BLOCK_WRITE_BEGIN_FOLIO
+#define KC_PAGE_OR_FOLIO(p, f) f
+#else
+#define KC_PAGE_OR_FOLIO(p, f) p
+#endif
+
 #endif
--- a/kmod/src/lock.c
+++ b/kmod/src/lock.c
@@ -53,8 +53,10 @@
 * all access to the lock (by revoking it down to a null mode) then the
 * lock is freed.
 *
- * Memory pressure on the client can cause the client to request a null
- * mode from the server so that once its granted the lock can be freed.
+ * Each client has a configurable number of locks that are allowed to
+ * remain idle after being granted, for use by future tasks.  Past the
+ * limit locks are freed by requesting a null mode from the server,
+ * governed by a LRU.
 *
 * So far we've only needed a minimal trylock.  We return -EAGAIN if a
 * lock attempt can't immediately match an existing granted lock.  This
@@ -79,14 +81,11 @@ struct lock_info {
 	bool unmounting;
 	struct rb_root lock_tree;
 	struct rb_root lock_range_tree;
-	KC_DEFINE_SHRINKER(shrinker);
+	u64 nr_locks;
 	struct list_head lru_list;
-	unsigned long long lru_nr;
 	struct workqueue_struct *workq;
 	struct work_struct inv_work;
 	struct list_head inv_list;
-	struct work_struct shrink_work;
-	struct list_head shrink_list;
 	atomic64_t next_refresh_gen;

 	struct dentry *tseq_dentry;
@@ -249,7 +248,6 @@ static void lock_free(struct lock_info *linfo, struct scoutfs_lock *lock)
 	BUG_ON(!RB_EMPTY_NODE(&lock->range_node));
 	BUG_ON(!list_empty(&lock->lru_head));
 	BUG_ON(!list_empty(&lock->inv_head));
-	BUG_ON(!list_empty(&lock->shrink_head));
 	BUG_ON(!list_empty(&lock->cov_list));

 	kfree(lock->inode_deletion_data);
@@ -277,7 +275,6 @@ static struct scoutfs_lock *lock_alloc(struct super_block *sb,
 	INIT_LIST_HEAD(&lock->lru_head);
 	INIT_LIST_HEAD(&lock->inv_head);
 	INIT_LIST_HEAD(&lock->inv_list);
-	INIT_LIST_HEAD(&lock->shrink_head);
 	spin_lock_init(&lock->cov_list_lock);
 	INIT_LIST_HEAD(&lock->cov_list);

@@ -410,6 +407,7 @@ static bool lock_insert(struct super_block *sb, struct scoutfs_lock *ins)
 	rb_link_node(&ins->node, parent, node);
 	rb_insert_color(&ins->node, &linfo->lock_tree);

+	linfo->nr_locks++;
 	scoutfs_tseq_add(&linfo->tseq_tree, &ins->tseq_entry);

 	return true;
@@ -424,6 +422,7 @@ static void lock_remove(struct lock_info *linfo, struct scoutfs_lock *lock)
 	rb_erase(&lock->range_node, &linfo->lock_range_tree);
 	RB_CLEAR_NODE(&lock->range_node);

+	linfo->nr_locks--;
 	scoutfs_tseq_del(&linfo->tseq_tree, &lock->tseq_entry);
 }

@@ -463,10 +462,8 @@ static void __lock_del_lru(struct lock_info *linfo, struct scoutfs_lock *lock)
 {
 	assert_spin_locked(&linfo->lock);

-	if (!list_empty(&lock->lru_head)) {
+	if (!list_empty(&lock->lru_head))
 		list_del_init(&lock->lru_head);
-		linfo->lru_nr--;
-	}
 }

 /*
@@ -525,14 +522,16 @@ static struct scoutfs_lock *create_lock(struct super_block *sb,
 * indicate that the lock wasn't idle.  If it really is idle then we
 * either free it if it's null or put it back on the lru.
 */
-static void put_lock(struct lock_info *linfo,struct scoutfs_lock *lock)
+static void __put_lock(struct lock_info *linfo, struct scoutfs_lock *lock, bool tail)
 {
 	assert_spin_locked(&linfo->lock);

 	if (lock_idle(lock)) {
 		if (lock->mode != SCOUTFS_LOCK_NULL) {
-			list_add_tail(&lock->lru_head, &linfo->lru_list);
-			linfo->lru_nr++;
+			if (tail)
+				list_add_tail(&lock->lru_head, &linfo->lru_list);
+			else
+				list_add(&lock->lru_head, &linfo->lru_list);
 		} else {
 			lock_remove(linfo, lock);
 			lock_free(linfo, lock);
@@ -540,6 +539,11 @@ static void put_lock(struct lock_info *linfo,struct scoutfs_lock *lock)
 	}
 }

+static inline void put_lock(struct lock_info *linfo, struct scoutfs_lock *lock)
+{
+	__put_lock(linfo, lock, true);
+}
+
 /*
 * The caller has made a change (set a lock mode) which can let one of the
 * invalidating locks make forward progress.
@@ -713,14 +717,14 @@ static void lock_invalidate_worker(struct work_struct *work)
 		/* only lock protocol, inv can't call subsystems after shutdown */
 		if (!linfo->shutdown) {
 			ret = lock_invalidate(sb, lock, nl->old_mode, nl->new_mode);
-			BUG_ON(ret);
+			BUG_ON(ret < 0 && ret != -ENOLINK);
 		}

 		/* respond with the key and modes from the request, server might have died */
 		ret = scoutfs_client_lock_response(sb, ireq->net_id, nl);
 		if (ret == -ENOTCONN)
 			ret = 0;
-		BUG_ON(ret);
+		BUG_ON(ret < 0 && ret != -ENOLINK);

 		scoutfs_inc_counter(sb, lock_invalidate_response);
 	}
@@ -875,6 +879,69 @@ int scoutfs_lock_recover_request(struct super_block *sb, u64 net_id,
 	return ret;
 }

+/*
+ * This is called on every _lock call to try and keep the number of
+ * locks under the idle count.  We're intentionally trying to throttle
+ * shrinking bursts by tying its frequency to lock use.  It will only
+ * send requests to free unused locks, though, so it's always possible
+ * to exceed the high water mark under heavy load.
+ *
+ * We send a null request and the lock will be freed by the response
+ * once all users drain.  If this races with invalidation then the
+ * server will only send the grant response once the invalidation is
+ * finished.
+ */
+static bool try_shrink_lock(struct super_block *sb, struct lock_info *linfo, bool force)
+{
+	struct scoutfs_mount_options opts;
+	struct scoutfs_lock *lock = NULL;
+	struct scoutfs_net_lock nl;
+	int ret = 0;
+
+	scoutfs_options_read(sb, &opts);
+
+	/* avoiding lock contention with unsynchronized test, don't mind temp false results */
+	if (!force && (list_empty(&linfo->lru_list) ||
+	               READ_ONCE(linfo->nr_locks) <= opts.lock_idle_count))
+		return false;
+
+	spin_lock(&linfo->lock);
+
+	lock = list_first_entry_or_null(&linfo->lru_list, struct scoutfs_lock, lru_head);
+	if (lock && (force || (linfo->nr_locks > opts.lock_idle_count))) {
+		__lock_del_lru(linfo, lock);
+		lock->request_pending = 1;
+
+		nl.key = lock->start;
+		nl.old_mode = lock->mode;
+		nl.new_mode = SCOUTFS_LOCK_NULL;
+	} else {
+		lock = NULL;
+	}
+
+	spin_unlock(&linfo->lock);
+
+	if (lock) {
+		ret = scoutfs_client_lock_request(sb, &nl);
+		if (ret < 0) {
+			scoutfs_inc_counter(sb, lock_shrink_request_failed);
+
+			spin_lock(&linfo->lock);
+
+			lock->request_pending = 0;
+			wake_up(&lock->waitq);
+			__put_lock(linfo, lock, false);
+
+			spin_unlock(&linfo->lock);
+		} else {
+			scoutfs_inc_counter(sb, lock_shrink_attempted);
+			trace_scoutfs_lock_shrink(sb, lock);
+		}
+	}
+
+	return lock && ret == 0;
+}
+
 static bool lock_wait_cond(struct super_block *sb, struct scoutfs_lock *lock,
 			   enum scoutfs_lock_mode mode)
 {
@@ -937,6 +1004,8 @@ static int lock_key_range(struct super_block *sb, enum scoutfs_lock_mode mode, i
 	if (WARN_ON_ONCE(scoutfs_trans_held()))
 		return -EDEADLK;

+	try_shrink_lock(sb, linfo, false);
+
 	spin_lock(&linfo->lock);

 	/* drops and re-acquires lock if it allocates */
@@ -1380,134 +1449,12 @@ bool scoutfs_lock_protected(struct scoutfs_lock *lock, struct scoutfs_key *key,
 					  &lock->start, &lock->end) == 0;
 }

-/*
- * The shrink callback got the lock, marked it request_pending, and put
- * it on the shrink list.  We send a null request and the lock will be
- * freed by the response once all users drain.  If this races with
- * invalidation then the server will only send the grant response once
- * the invalidation is finished.
- */
-static void lock_shrink_worker(struct work_struct *work)
-{
-	struct lock_info *linfo = container_of(work, struct lock_info,
-					       shrink_work);
-	struct super_block *sb = linfo->sb;
-	struct scoutfs_net_lock nl;
-	struct scoutfs_lock *lock;
-	struct scoutfs_lock *tmp;
-	LIST_HEAD(list);
-	int ret;
-
-	scoutfs_inc_counter(sb, lock_shrink_work);
-
-	spin_lock(&linfo->lock);
-	list_splice_init(&linfo->shrink_list, &list);
-	spin_unlock(&linfo->lock);
-
-	list_for_each_entry_safe(lock, tmp, &list, shrink_head) {
-		list_del_init(&lock->shrink_head);
-
-		/* unlocked lock access, but should be stable since we queued */
-		nl.key = lock->start;
-		nl.old_mode = lock->mode;
-		nl.new_mode = SCOUTFS_LOCK_NULL;
-
-		ret = scoutfs_client_lock_request(sb, &nl);
-		if (ret) {
-			/* oh well, not freeing */
-			scoutfs_inc_counter(sb, lock_shrink_aborted);
-
-			spin_lock(&linfo->lock);
-
-			lock->request_pending = 0;
-			wake_up(&lock->waitq);
-			put_lock(linfo, lock);
-
-			spin_unlock(&linfo->lock);
-		}
-	}
-}
-
-static unsigned long lock_count_objects(struct shrinker *shrink,
-					struct shrink_control *sc)
-{
-	struct lock_info *linfo = KC_SHRINKER_CONTAINER_OF(shrink, struct lock_info);
-	struct super_block *sb = linfo->sb;
-
-	scoutfs_inc_counter(sb, lock_count_objects);
-
-	return shrinker_min_long(linfo->lru_nr);
-}
-
-/*
- * Start the shrinking process for locks on the lru.  If a lock is on
- * the lru then it can't have any active users.  We don't want to block
- * or allocate here so all we do is get the lock, mark it request
- * pending, and kick off the work.  The work sends a null request and
- * eventually the lock is freed by its response.
- *
- * Only a racing lock attempt that isn't matched can prevent the lock
- * from being freed.  It'll block waiting to send its request for its
- * mode which will prevent the lock from being freed when the null
- * response arrives.
- */
-static unsigned long lock_scan_objects(struct shrinker *shrink,
-				       struct shrink_control *sc)
-{
-	struct lock_info *linfo = KC_SHRINKER_CONTAINER_OF(shrink, struct lock_info);
-	struct super_block *sb = linfo->sb;
-	struct scoutfs_lock *lock;
-	struct scoutfs_lock *tmp;
-	unsigned long freed = 0;
-	unsigned long nr = sc->nr_to_scan;
-	bool added = false;
-
-	scoutfs_inc_counter(sb, lock_scan_objects);
-
-	spin_lock(&linfo->lock);
-
-restart:
-	list_for_each_entry_safe(lock, tmp, &linfo->lru_list, lru_head) {
-
-		BUG_ON(!lock_idle(lock));
-		BUG_ON(lock->mode == SCOUTFS_LOCK_NULL);
-		BUG_ON(!list_empty(&lock->shrink_head));
-
-		if (nr-- == 0)
-			break;
-
-		__lock_del_lru(linfo, lock);
-		lock->request_pending = 1;
-		list_add_tail(&lock->shrink_head, &linfo->shrink_list);
-		added = true;
-		freed++;
-
-		scoutfs_inc_counter(sb, lock_shrink_attempted);
-		trace_scoutfs_lock_shrink(sb, lock);
-
-		/* could have bazillions of idle locks */
-		if (cond_resched_lock(&linfo->lock))
-			goto restart;
-	}
-
-	spin_unlock(&linfo->lock);
-
-	if (added)
-		queue_work(linfo->workq, &linfo->shrink_work);
-
-	trace_scoutfs_lock_shrink_exit(sb, sc->nr_to_scan, freed);
-	return freed;
-}
-
 void scoutfs_free_unused_locks(struct super_block *sb)
 {
-	struct lock_info *linfo = SCOUTFS_SB(sb)->lock_info;
-	struct shrink_control sc = {
-		.gfp_mask = GFP_NOFS,
-		.nr_to_scan = INT_MAX,
-	};
+	DECLARE_LOCK_INFO(sb, linfo);

-	lock_scan_objects(KC_SHRINKER_FN(&linfo->shrinker), &sc);
+	while (try_shrink_lock(sb, linfo, true))
+		cond_resched();
 }

 static void lock_tseq_show(struct seq_file *m, struct scoutfs_tseq_entry *ent)
@@ -1590,10 +1537,10 @@ u64 scoutfs_lock_ino_refresh_gen(struct super_block *sb, u64 ino)
 * transitions and sending requests.   We set the shutdown flag to catch
 * anyone who breaks this rule.
 *
- * We unregister the shrinker so that we won't try and send null
- * requests in response to memory pressure.  The locks will all be
- * unceremoniously dropped once we get a farewell response from the
- * server which indicates that they destroyed our locking state.
+ * With no more lock callers, we'll no longer try to shrink the pool of
+ * granted locks.  We'll free all of them as _destroy() is called after
+ * the farewell response indicates that the server tore down all our
+ * lock state.
 *
 * We will still respond to invalidation requests that have to be
 * processed to let unmount in other mounts acquire locks and make
@@ -1613,10 +1560,6 @@ void scoutfs_lock_shutdown(struct super_block *sb)

 	trace_scoutfs_lock_shutdown(sb, linfo);

-	/* stop the shrinker from queueing work */
-	KC_UNREGISTER_SHRINKER(&linfo->shrinker);
-	flush_work(&linfo->shrink_work);
-
 	/* cause current and future lock calls to return errors */
 	spin_lock(&linfo->lock);
 	linfo->shutdown = true;
@@ -1707,8 +1650,6 @@ void scoutfs_lock_destroy(struct super_block *sb)
 			list_del_init(&lock->inv_head);
 			lock->invalidate_pending = 0;
 		}
-		if (!list_empty(&lock->shrink_head))
-			list_del_init(&lock->shrink_head);
 		lock_remove(linfo, lock);
 		lock_free(linfo, lock);
 	}
@@ -1733,14 +1674,9 @@ int scoutfs_lock_setup(struct super_block *sb)
 	spin_lock_init(&linfo->lock);
 	linfo->lock_tree = RB_ROOT;
 	linfo->lock_range_tree = RB_ROOT;
-	KC_INIT_SHRINKER_FUNCS(&linfo->shrinker, lock_count_objects,
-			       lock_scan_objects);
-	KC_REGISTER_SHRINKER(&linfo->shrinker, "scoutfs-lock:" SCSBF, SCSB_ARGS(sb));
 	INIT_LIST_HEAD(&linfo->lru_list);
 	INIT_WORK(&linfo->inv_work, lock_invalidate_worker);
 	INIT_LIST_HEAD(&linfo->inv_list);
-	INIT_WORK(&linfo->shrink_work, lock_shrink_worker);
-	INIT_LIST_HEAD(&linfo->shrink_list);
 	atomic64_set(&linfo->next_refresh_gen, 0);
 	scoutfs_tseq_tree_init(&linfo->tseq_tree, lock_tseq_show);

--- a/kmod/src/lock_server.c
+++ b/kmod/src/lock_server.c
@@ -506,6 +506,19 @@ out:
 * because we don't know which locks they'll hold.  Once recover
 * finishes the server calls us to kick all the locks that were waiting
 * during recovery.
+ *
+ * The calling server shuts down if we return errors indicating that we
+ * weren't able to ensure forward progress in the lock state machine.
+ *
+ * Failure to send to a disconnected client is not a fatal error.
+ * During normal disconnection the client's state is removed before
+ * their connection is destroyed.  We can't use state to try and send to
+ * a non-existing connection.  But a client that fails to reconnect is
+ * disconnected before being fenced.  If we have multiple disconnected
+ * clients we can try to send to one while cleaning up another.  If
+ * they've uncleanly disconnected their locks are going to be removed
+ * and the lock can make forward progress again.  Or we'll shutdown for
+ * failure to fence.
 */
 static int process_waiting_requests(struct super_block *sb,
 				    struct server_lock_node *snode)
@@ -597,6 +610,10 @@ static int process_waiting_requests(struct super_block *sb,
 out:
 	put_server_lock(inf, snode);

+	/* disconnected clients will be fenced, trying to send to them isn't fatal */
+	if (ret == -ENOTCONN)
+		ret = 0;
+
 	return ret;
 }

--- a/kmod/src/net.c
+++ b/kmod/src/net.c
@@ -21,6 +21,7 @@
 #include <net/tcp.h>
 #include <linux/log2.h>
 #include <linux/jhash.h>
+#include <linux/rbtree.h>

 #include "format.h"
 #include "counters.h"
@@ -125,6 +126,7 @@ struct message_send {
 	unsigned long dead:1;
 	struct list_head head;
 	scoutfs_net_response_t resp_func;
+	struct rb_node node;
 	void *resp_data;
 	struct scoutfs_net_header nh;
 };
@@ -161,49 +163,118 @@ static bool nh_is_request(struct scoutfs_net_header *nh)
 	return !nh_is_response(nh);
 }

+static int cmp_sorted_msend(u64 pos, struct message_send *msend)
+{
+	if (nh_is_request(&msend->nh))
+		return pos < le64_to_cpu(msend->nh.id) ? -1 :
+		       pos > le64_to_cpu(msend->nh.id) ? 1 : 0;
+	else
+		return pos < le64_to_cpu(msend->nh.seq) ? -1 :
+		       pos > le64_to_cpu(msend->nh.seq) ? 1 : 0;
+}
+
+static struct message_send *search_sorted_msends(struct rb_root *root, u64 pos, struct rb_node *ins)
+{
+	struct rb_node **node = &root->rb_node;
+	struct rb_node *parent = NULL;
+	struct message_send *msend = NULL;
+	struct message_send *next = NULL;
+	int cmp = -1;
+
+	while (*node) {
+		parent = *node;
+		msend = container_of(*node, struct message_send, node);
+
+		cmp = cmp_sorted_msend(pos, msend);
+		if (cmp < 0) {
+			next = msend;
+			node = &(*node)->rb_left;
+		} else if (cmp > 0) {
+			node = &(*node)->rb_right;
+		} else {
+			next = msend;
+			break;
+		}
+	}
+
+	BUG_ON(cmp == 0 && ins);
+
+	if (ins) {
+		rb_link_node(ins, parent, node);
+		rb_insert_color(ins, root);
+	}
+
+	return next;
+}
+
+static struct message_send *next_sorted_msend(struct message_send *msend)
+{
+	struct rb_node *node = rb_next(&msend->node);
+
+	return node ? rb_entry(node, struct message_send, node) : NULL;
+}
+
+#define for_each_sorted_msend(MSEND_, TMP_, ROOT_, POS_) \
+	for (MSEND_ = search_sorted_msends(ROOT_, POS_, NULL); \
+	     MSEND_ != NULL && ({ TMP_ = next_sorted_msend(MSEND_); true; }); \
+	     MSEND_ = TMP_)
+
+static void insert_sorted_msend(struct scoutfs_net_connection *conn, struct message_send *msend)
+{
+	BUG_ON(!RB_EMPTY_NODE(&msend->node));
+
+	if (nh_is_request(&msend->nh))
+		search_sorted_msends(&conn->req_root, le64_to_cpu(msend->nh.id), &msend->node);
+	else
+		search_sorted_msends(&conn->resp_root, le64_to_cpu(msend->nh.seq), &msend->node);
+}
+
+static void erase_sorted_msend(struct scoutfs_net_connection *conn, struct message_send *msend)
+{
+	if (!RB_EMPTY_NODE(&msend->node)) {
+		if (nh_is_request(&msend->nh))
+			rb_erase(&msend->node, &conn->req_root);
+		else
+			rb_erase(&msend->node, &conn->resp_root);
+		RB_CLEAR_NODE(&msend->node);
+	}
+}
+
+static void move_sorted_msends(struct scoutfs_net_connection *dst_conn, struct rb_root *dst_root,
+			       struct scoutfs_net_connection *src_conn, struct rb_root *src_root)
+{
+	struct message_send *msend;
+	struct message_send *tmp;
+
+	for_each_sorted_msend(msend, tmp, src_root, 0) {
+		erase_sorted_msend(src_conn, msend);
+		insert_sorted_msend(dst_conn, msend);
+	}
+}
+
 /*
- * We return dead requests so that the caller can stop searching other
- * lists for the dead request that we found.
+ * Pending requests are uniquely identified by the id they were assigned
+ * as they were first put on the send queue.
 */
-static struct message_send *search_list(struct scoutfs_net_connection *conn,
-					struct list_head *list,
-					u8 cmd, u64 id)
+static struct message_send *find_request(struct scoutfs_net_connection *conn, u8 cmd, u64 id)
 {
 	struct message_send *msend;

 	assert_spin_locked(&conn->lock);

-	list_for_each_entry(msend, list, head) {
-		if (nh_is_request(&msend->nh) && msend->nh.cmd == cmd &&
-		    le64_to_cpu(msend->nh.id) == id)
-			return msend;
-	}
-
-	return NULL;
-}
-
-/*
- * Find an active send request on the lists.  It's almost certainly
- * waiting on the resend queue but it could be actively being sent.
- */
-static struct message_send *find_request(struct scoutfs_net_connection *conn,
-					 u8 cmd, u64 id)
-{
-	struct message_send *msend;
-
-	msend = search_list(conn, &conn->resend_queue, cmd, id) ?:
-		search_list(conn, &conn->send_queue, cmd, id);
-	if (msend && msend->dead)
+	msend = search_sorted_msends(&conn->req_root, id, NULL);
+	if (msend && !(msend->nh.cmd == cmd && le64_to_cpu(msend->nh.id) == id))
 		msend = NULL;
+
 	return msend;
 }

 /*
- * Complete a send message by moving it to the send queue and marking it
- * to be freed.  It won't be visible to callers trying to find sends.
+ * Free a send message by moving it to the send queue and marking it
+ * dead.  It is removed from the sorted rb roots so it won't be visible
+ * as a request for response processing.
 */
-static void complete_send(struct scoutfs_net_connection *conn,
-			  struct message_send *msend)
+static void queue_dead_free(struct scoutfs_net_connection *conn, struct message_send *msend)
 {
 	assert_spin_locked(&conn->lock);

@@ -213,6 +284,7 @@ static void complete_send(struct scoutfs_net_connection *conn,

 	msend->dead = 1;
 	list_move(&msend->head, &conn->send_queue);
+	erase_sorted_msend(conn, msend);
 	queue_work(conn->workq, &conn->send_work);
 }

@@ -370,6 +442,7 @@ static int submit_send(struct super_block *sb,
 	msend->resp_func = resp_func;
 	msend->resp_data = resp_data;
 	msend->dead = 0;
+	RB_CLEAR_NODE(&msend->node);

 	msend->nh.seq = cpu_to_le64(seq);
 	msend->nh.recv_seq = 0;  /* set when sent, not when queued */
@@ -390,6 +463,7 @@ static int submit_send(struct super_block *sb,
 	} else {
 		list_add_tail(&msend->head, &conn->resend_queue);
 	}
+	insert_sorted_msend(conn, msend);

 	if (id_ret)
 		*id_ret = le64_to_cpu(msend->nh.id);
@@ -459,7 +533,7 @@ static int process_response(struct scoutfs_net_connection *conn,
 	if (msend) {
 		resp_func = msend->resp_func;
 		resp_data = msend->resp_data;
-		complete_send(conn, msend);
+		queue_dead_free(conn, msend);
 	} else {
 		scoutfs_inc_counter(sb, net_dropped_response);
 	}
@@ -550,43 +624,21 @@ static void queue_ordered_proc(struct scoutfs_net_connection *conn, struct messa
 * Free live responses up to and including the seq by marking them dead
 * and moving them to the send queue to be freed.
 */
-static bool move_acked_responses(struct scoutfs_net_connection *conn,
-				 struct list_head *list, u64 seq)
+static void free_acked_responses(struct scoutfs_net_connection *conn, u64 seq)
 {
 	struct message_send *msend;
 	struct message_send *tmp;
-	bool moved = false;
-
-	assert_spin_locked(&conn->lock);
-
-	list_for_each_entry_safe(msend, tmp, list, head) {
-		if (le64_to_cpu(msend->nh.seq) > seq)
-			break;
-		if (!nh_is_response(&msend->nh) || msend->dead)
-			continue;
-
-		msend->dead = 1;
-		list_move(&msend->head, &conn->send_queue);
-		moved = true;
-	}
-
-	return moved;
-}
-
-/* acks are processed inline in the recv worker */
-static void free_acked_responses(struct scoutfs_net_connection *conn, u64 seq)
-{
-	bool moved;

 	spin_lock(&conn->lock);

-	moved = move_acked_responses(conn, &conn->send_queue, seq) |
-		move_acked_responses(conn, &conn->resend_queue, seq);
+	for_each_sorted_msend(msend, tmp, &conn->resp_root, 0) {
+		if (le64_to_cpu(msend->nh.seq) > seq)
+			break;
+
+		queue_dead_free(conn, msend);
+	}

 	spin_unlock(&conn->lock);
-
-	if (moved)
-		queue_work(conn->workq, &conn->send_work);
 }

 static int k_recvmsg(struct socket *sock, void *buf, unsigned len)
@@ -824,9 +876,11 @@ static int k_sendmsg_full(struct socket *sock, struct kvec *kv, unsigned long nr
 	return ret;
 }

-static void free_msend(struct net_info *ninf, struct message_send *msend)
+static void free_msend(struct net_info *ninf, struct scoutfs_net_connection *conn,
+		       struct message_send *msend)
 {
 	list_del_init(&msend->head);
+	erase_sorted_msend(conn, msend);
 	scoutfs_tseq_del(&ninf->msg_tseq_tree, &msend->tseq_entry);
 	kfree(msend);
 }
@@ -866,9 +920,10 @@ static void scoutfs_net_send_worker(struct work_struct *work)
 		count = 0;

 		spin_lock(&conn->lock);
+
 		list_for_each_entry_safe(msend, _msend_, &conn->send_queue, head) {
 			if (msend->dead) {
-				free_msend(ninf, msend);
+				free_msend(ninf, conn, msend);
 				continue;
 			}

@@ -957,7 +1012,7 @@ static void scoutfs_net_destroy_worker(struct work_struct *work)

 	list_splice_init(&conn->resend_queue, &conn->send_queue);
 	list_for_each_entry_safe(msend, tmp, &conn->send_queue, head)
-		free_msend(ninf, msend);
+		free_msend(ninf, conn, msend);

 	/* accepted sockets are removed from their listener's list */
 	if (conn->listening_conn) {
@@ -1303,7 +1358,7 @@ static void scoutfs_net_shutdown_worker(struct work_struct *work)
 							struct message_send, head))) {
 			resp_func = msend->resp_func;
 			resp_data = msend->resp_data;
-			free_msend(ninf, msend);
+			free_msend(ninf, conn, msend);
 			spin_unlock(&conn->lock);

 			call_resp_func(sb, conn, resp_func, resp_data, NULL, 0, -ECONNABORTED);
@@ -1319,7 +1374,7 @@ static void scoutfs_net_shutdown_worker(struct work_struct *work)
 	list_splice_tail_init(&conn->send_queue, &conn->resend_queue);
 	list_for_each_entry_safe(msend, tmp, &conn->resend_queue, head) {
 		if (msend->nh.cmd == SCOUTFS_NET_CMD_GREETING)
-			free_msend(ninf, msend);
+			free_msend(ninf, conn, msend);
 	}

 	clear_conn_fl(conn, saw_greeting);
@@ -1493,6 +1548,8 @@ scoutfs_net_alloc_conn(struct super_block *sb,
 	atomic64_set(&conn->recv_seq, 0);
 	INIT_LIST_HEAD(&conn->send_queue);
 	INIT_LIST_HEAD(&conn->resend_queue);
+	conn->req_root = RB_ROOT;
+	conn->resp_root = RB_ROOT;
 	INIT_WORK(&conn->listen_work, scoutfs_net_listen_worker);
 	INIT_WORK(&conn->connect_work, scoutfs_net_connect_worker);
 	INIT_WORK(&conn->send_work, scoutfs_net_send_worker);
@@ -1705,7 +1762,7 @@ void scoutfs_net_client_greeting(struct super_block *sb,
 		atomic64_set(&conn->recv_seq, 0);
 		list_for_each_entry_safe(msend, tmp, &conn->resend_queue, head){
 			if (nh_is_response(&msend->nh))
-				free_msend(ninf, msend);
+				free_msend(ninf, conn, msend);
 		}
 	}

@@ -1808,6 +1865,8 @@ restart:
 		BUG_ON(!list_empty(&reconn->send_queue));
 		/* queued greeting response is racing, can be in send or resend queue */
 		list_splice_tail_init(&reconn->resend_queue, &conn->resend_queue);
+		move_sorted_msends(conn, &conn->req_root, reconn, &reconn->req_root);
+		move_sorted_msends(conn, &conn->resp_root, reconn, &reconn->resp_root);

 		/* new conn info is unused, swap, old won't call down */
 		swap(conn->info, reconn->info);
--- a/kmod/src/net.h
+++ b/kmod/src/net.h
@@ -67,6 +67,8 @@ struct scoutfs_net_connection {
 	u64 next_send_id;
 	struct list_head send_queue;
 	struct list_head resend_queue;
+	struct rb_root req_root;
+	struct rb_root resp_root;

 	atomic64_t recv_seq;
 	unsigned int ordered_proc_nr;
--- a/kmod/src/options.c
+++ b/kmod/src/options.c
@@ -34,6 +34,7 @@ enum {
 	Opt_data_prealloc_blocks,
 	Opt_data_prealloc_contig_only,
 	Opt_ino_alloc_per_lock,
+	Opt_lock_idle_count,
 	Opt_log_merge_wait_timeout_ms,
 	Opt_metadev_path,
 	Opt_noacl,
@@ -49,6 +50,7 @@ static const match_table_t tokens = {
 	{Opt_data_prealloc_blocks, "data_prealloc_blocks=%s"},
 	{Opt_data_prealloc_contig_only, "data_prealloc_contig_only=%s"},
 	{Opt_ino_alloc_per_lock, "ino_alloc_per_lock=%s"},
+	{Opt_lock_idle_count, "lock_idle_count=%s"},
 	{Opt_log_merge_wait_timeout_ms, "log_merge_wait_timeout_ms=%s"},
 	{Opt_metadev_path, "metadev_path=%s"},
 	{Opt_noacl, "noacl"},
@@ -119,6 +121,10 @@ static void free_options(struct scoutfs_mount_options *opts)
 	kfree(opts->metadev_path);
 }

+#define MIN_LOCK_IDLE_COUNT	32
+#define DEFAULT_LOCK_IDLE_COUNT	(10 * 1000)
+#define MAX_LOCK_IDLE_COUNT	(100 * 1000)
+
 #define MIN_LOG_MERGE_WAIT_TIMEOUT_MS		100UL
 #define DEFAULT_LOG_MERGE_WAIT_TIMEOUT_MS	500
 #define MAX_LOG_MERGE_WAIT_TIMEOUT_MS		(60 * MSEC_PER_SEC)
@@ -139,6 +145,7 @@ static void init_default_options(struct scoutfs_mount_options *opts)
 	opts->data_prealloc_blocks = SCOUTFS_DATA_PREALLOC_DEFAULT_BLOCKS;
 	opts->data_prealloc_contig_only = 1;
 	opts->ino_alloc_per_lock = SCOUTFS_LOCK_INODE_GROUP_NR;
+	opts->lock_idle_count = DEFAULT_LOCK_IDLE_COUNT;
 	opts->log_merge_wait_timeout_ms = DEFAULT_LOG_MERGE_WAIT_TIMEOUT_MS;
 	opts->orphan_scan_delay_ms = -1;
 	opts->quorum_heartbeat_timeout_ms = SCOUTFS_QUORUM_DEF_HB_TIMEO_MS;
@@ -146,6 +153,21 @@ static void init_default_options(struct scoutfs_mount_options *opts)
 	opts->tcp_keepalive_timeout_ms = DEFAULT_TCP_KEEPALIVE_TIMEOUT_MS;
 }

+static int verify_lock_idle_count(struct super_block *sb, int ret, int val)
+{
+	if (ret < 0) {
+		scoutfs_err(sb, "failed to parse lock_idle_count value");
+		return -EINVAL;
+	}
+	if (val < MIN_LOCK_IDLE_COUNT || val > MAX_LOCK_IDLE_COUNT) {
+		scoutfs_err(sb, "invalid lock_idle_count value %d, must be between %u and %u",
+			    val, MIN_LOCK_IDLE_COUNT, MAX_LOCK_IDLE_COUNT);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
 static int verify_log_merge_wait_timeout_ms(struct super_block *sb, int ret, int val)
 {
 	if (ret < 0) {
@@ -261,6 +283,14 @@ static int parse_options(struct super_block *sb, char *options, struct scoutfs_m
 			opts->tcp_keepalive_timeout_ms = nr;
 			break;

+		case Opt_lock_idle_count:
+			ret = match_int(args, &nr);
+			ret = verify_lock_idle_count(sb, ret, nr);
+			if (ret < 0)
+				return ret;
+			opts->lock_idle_count = nr;
+			break;
+
 		case Opt_log_merge_wait_timeout_ms:
 			ret = match_int(args, &nr);
 			ret = verify_log_merge_wait_timeout_ms(sb, ret, nr);
@@ -536,6 +566,43 @@ static ssize_t ino_alloc_per_lock_store(struct kobject *kobj, struct kobj_attrib
 }
 SCOUTFS_ATTR_RW(ino_alloc_per_lock);

+static ssize_t lock_idle_count_show(struct kobject *kobj, struct kobj_attribute *attr,
+						char *buf)
+{
+	struct super_block *sb = SCOUTFS_SYSFS_ATTRS_SB(kobj);
+	struct scoutfs_mount_options opts;
+
+	scoutfs_options_read(sb, &opts);
+
+	return snprintf(buf, PAGE_SIZE, "%u", opts.lock_idle_count);
+}
+static ssize_t lock_idle_count_store(struct kobject *kobj, struct kobj_attribute *attr,
+						 const char *buf, size_t count)
+{
+	struct super_block *sb = SCOUTFS_SYSFS_ATTRS_SB(kobj);
+	DECLARE_OPTIONS_INFO(sb, optinf);
+	char nullterm[30]; /* more than enough for octal -U64_MAX */
+	int val;
+	int len;
+	int ret;
+
+	len = min(count, sizeof(nullterm) - 1);
+	memcpy(nullterm, buf, len);
+	nullterm[len] = '\0';
+
+	ret = kstrtoint(nullterm, 0, &val);
+	ret = verify_lock_idle_count(sb, ret, val);
+	if (ret == 0) {
+		write_seqlock(&optinf->seqlock);
+		optinf->opts.lock_idle_count = val;
+		write_sequnlock(&optinf->seqlock);
+		ret = count;
+	}
+
+	return ret;
+}
+SCOUTFS_ATTR_RW(lock_idle_count);
+
 static ssize_t log_merge_wait_timeout_ms_show(struct kobject *kobj, struct kobj_attribute *attr,
 						char *buf)
 {
@@ -677,6 +744,7 @@ static struct attribute *options_attrs[] = {
 	SCOUTFS_ATTR_PTR(data_prealloc_blocks),
 	SCOUTFS_ATTR_PTR(data_prealloc_contig_only),
 	SCOUTFS_ATTR_PTR(ino_alloc_per_lock),
+	SCOUTFS_ATTR_PTR(lock_idle_count),
 	SCOUTFS_ATTR_PTR(log_merge_wait_timeout_ms),
 	SCOUTFS_ATTR_PTR(metadev_path),
 	SCOUTFS_ATTR_PTR(orphan_scan_delay_ms),
@@ -684,13 +752,18 @@ static struct attribute *options_attrs[] = {
 	SCOUTFS_ATTR_PTR(quorum_slot_nr),
 	NULL,
 };
+#ifdef KC_KOBJECT_DEFAULT_GROUPS
+ATTRIBUTE_GROUPS(options);
+#endif

 int scoutfs_options_setup(struct super_block *sb)
 {
 	DECLARE_OPTIONS_INFO(sb, optinf);
 	int ret;

-	ret = scoutfs_sysfs_create_attrs(sb, &optinf->sysfs_attrs, options_attrs, "mount_options");
+	ret = scoutfs_sysfs_create_attrs(sb, &optinf->sysfs_attrs,
+					 KC_KOBJ_DEFAULT_PICK(options_groups, options_attrs),
+					 "mount_options");
 	if (ret < 0)
 		scoutfs_options_destroy(sb);
 	return ret;
--- a/kmod/src/options.h
+++ b/kmod/src/options.h
@@ -9,6 +9,7 @@ struct scoutfs_mount_options {
 	u64 data_prealloc_blocks;
 	bool data_prealloc_contig_only;
 	unsigned int ino_alloc_per_lock;
+	int lock_idle_count;
 	unsigned int log_merge_wait_timeout_ms;
 	char *metadev_path;
 	unsigned int orphan_scan_delay_ms;
--- a/kmod/src/quorum.c
+++ b/kmod/src/quorum.c
@@ -162,7 +162,7 @@ static void quorum_slot_sin(struct scoutfs_quorum_config *qconf, int i, struct s
 static ktime_t election_timeout(void)
 {
 	return ktime_add_ms(ktime_get(), SCOUTFS_QUORUM_ELECT_MIN_MS +
-				 prandom_u32_max(SCOUTFS_QUORUM_ELECT_VAR_MS));
+				 get_random_u32_below(SCOUTFS_QUORUM_ELECT_VAR_MS));
 }

 static ktime_t heartbeat_interval(void)
@@ -1192,6 +1192,9 @@ static struct attribute *quorum_attrs[] = {
 	SCOUTFS_ATTR_PTR(is_leader),
 	NULL,
 };
+#ifdef KC_KOBJECT_DEFAULT_GROUPS
+ATTRIBUTE_GROUPS(quorum);
+#endif

 static inline bool valid_ipv4_unicast(__be32 addr)
 {
@@ -1352,7 +1355,7 @@ int scoutfs_quorum_setup(struct super_block *sb)
 	if (ret < 0)
 		goto out;

-	ret = scoutfs_sysfs_create_attrs(sb, &qinf->ssa, quorum_attrs,
+	ret = scoutfs_sysfs_create_attrs(sb, &qinf->ssa, KC_KOBJ_DEFAULT(quorum),
 					 "quorum");
 	if (ret < 0)
 		goto out;
--- a/kmod/src/quota.c
+++ b/kmod/src/quota.c
@@ -204,7 +204,7 @@ static struct squota_check *lookup_random_check(struct rhashtable *rht)

 	tbl = rht_dereference_rcu(rht->tbl, rht);
 	do {
-		for (s = 0, i = prandom_u32_max(tbl->size);
+		for (s = 0, i = get_random_u32_below(tbl->size);
 		     s < tbl->size;
 		     s++, i = (i + 1) % tbl->size) {
 			rht_for_each_entry_rcu(chk, pos, tbl, i, head) {
@@ -269,7 +269,7 @@ static void shrink_all_cached_checks(struct squota_info *qtinf)
 {
 	struct shrink_control sc = { .nr_to_scan = LONG_MAX, };

-	scan_cached_checks(KC_SHRINKER_FN(&qtinf->shrinker), &sc);
+	scan_cached_checks(KC_SHRINKER_FN(qtinf->shrinker), &sc);
 }

 static u8 ns_is_attr(u8 ns)
@@ -1225,8 +1225,12 @@ int scoutfs_quota_setup(struct super_block *sb)
 	spin_lock_init(&qtinf->lock);
 	init_waitqueue_head(&qtinf->waitq);

-	KC_INIT_SHRINKER_FUNCS(&qtinf->shrinker, count_cached_checks, scan_cached_checks);
-	KC_REGISTER_SHRINKER(&qtinf->shrinker, "scoutfs-quota:" SCSBF, SCSB_ARGS(sb));
+	KC_SETUP_SHRINKER(qtinf->shrinker, qtinf, 0, count_cached_checks,
+			  scan_cached_checks, "scoutfs-quota:" SCSBF, SCSB_ARGS(sb));
+	if (KC_SHRINKER_IS_NULL(qtinf->shrinker)) {
+		ret = -ENOMEM;
+		goto out;
+	}

 	sbi->squota_info = qtinf;

@@ -1250,7 +1254,7 @@ void scoutfs_quota_destroy(struct super_block *sb)

 	if (qtinf) {
 		debugfs_remove(qtinf->drop_dentry);
-		KC_UNREGISTER_SHRINKER(&qtinf->shrinker);
+		KC_UNREGISTER_SHRINKER(qtinf->shrinker);

 		spin_lock(&qtinf->lock);
 		rs = rcu_dereference_protected(qtinf->ruleset, lockdep_is_held(&qtinf->lock));
--- a/kmod/src/scoutfs_trace.h
+++ b/kmod/src/scoutfs_trace.h
@@ -102,7 +102,7 @@ TRACE_EVENT(scoutfs_setattr,
 		SCSB_TRACE_ASSIGN(dentry->d_inode->i_sb);
 		__entry->ino = scoutfs_ino(dentry->d_inode);
 		__entry->d_len = dentry->d_name.len;
-		__assign_str(d_name, dentry->d_name.name);
+		kc__assign_str(d_name, dentry->d_name.name);
 		__entry->ia_valid = attr->ia_valid;
 		__entry->size_change = !!(attr->ia_valid & ATTR_SIZE);
 		__entry->ia_size = attr->ia_size;
@@ -789,6 +789,80 @@ TRACE_EVENT(scoutfs_inode_walk_writeback,
 		  __entry->ino, __entry->write, __entry->ret)
 );

+TRACE_EVENT(scoutfs_orphan_scan_start,
+	TP_PROTO(struct super_block *sb),
+
+	TP_ARGS(sb),
+
+	TP_STRUCT__entry(
+		SCSB_TRACE_FIELDS
+	),
+
+	TP_fast_assign(
+		SCSB_TRACE_ASSIGN(sb);
+	),
+
+	TP_printk(SCSBF, SCSB_TRACE_ARGS)
+);
+
+TRACE_EVENT(scoutfs_orphan_scan_stop,
+	TP_PROTO(struct super_block *sb, bool work_todo),
+
+	TP_ARGS(sb, work_todo),
+
+	TP_STRUCT__entry(
+		SCSB_TRACE_FIELDS
+		__field(bool, work_todo)
+	),
+
+	TP_fast_assign(
+		SCSB_TRACE_ASSIGN(sb);
+		__entry->work_todo = work_todo;
+	),
+
+	TP_printk(SCSBF" work_todo %d", SCSB_TRACE_ARGS, __entry->work_todo)
+);
+
+TRACE_EVENT(scoutfs_orphan_scan_work,
+	TP_PROTO(struct super_block *sb, __u64 ino),
+
+	TP_ARGS(sb, ino),
+
+	TP_STRUCT__entry(
+		SCSB_TRACE_FIELDS
+		__field(__u64, ino)
+	),
+
+	TP_fast_assign(
+		SCSB_TRACE_ASSIGN(sb);
+		__entry->ino = ino;
+	),
+
+	TP_printk(SCSBF" ino %llu", SCSB_TRACE_ARGS,
+		  __entry->ino)
+);
+
+TRACE_EVENT(scoutfs_orphan_scan_end,
+	TP_PROTO(struct super_block *sb, __u64 ino, int ret),
+
+	TP_ARGS(sb, ino, ret),
+
+	TP_STRUCT__entry(
+		SCSB_TRACE_FIELDS
+		__field(__u64, ino)
+		__field(int, ret)
+	),
+
+	TP_fast_assign(
+		SCSB_TRACE_ASSIGN(sb);
+		__entry->ino = ino;
+		__entry->ret = ret;
+	),
+
+	TP_printk(SCSBF" ino %llu ret %d", SCSB_TRACE_ARGS,
+		  __entry->ino, __entry->ret)
+);
+
 DECLARE_EVENT_CLASS(scoutfs_lock_info_class,
 	TP_PROTO(struct super_block *sb, struct lock_info *linfo),

@@ -1036,6 +1110,82 @@ TRACE_EVENT(scoutfs_orphan_inode,
 		  MINOR(__entry->dev), __entry->ino)
 );

+DECLARE_EVENT_CLASS(scoutfs_try_delete_class,
+        TP_PROTO(struct super_block *sb, u64 ino),
+        TP_ARGS(sb, ino),
+        TP_STRUCT__entry(
+		SCSB_TRACE_FIELDS
+		__field(__u64, ino)
+        ),
+        TP_fast_assign(
+		SCSB_TRACE_ASSIGN(sb);
+		__entry->ino = ino;
+        ),
+	TP_printk(SCSBF" ino %llu", SCSB_TRACE_ARGS, __entry->ino)
+);
+
+DEFINE_EVENT(scoutfs_try_delete_class, scoutfs_try_delete,
+        TP_PROTO(struct super_block *sb, u64 ino),
+        TP_ARGS(sb, ino)
+);
+
+DEFINE_EVENT(scoutfs_try_delete_class, scoutfs_try_delete_local_busy,
+        TP_PROTO(struct super_block *sb, u64 ino),
+        TP_ARGS(sb, ino)
+);
+
+DEFINE_EVENT(scoutfs_try_delete_class, scoutfs_try_delete_cached,
+        TP_PROTO(struct super_block *sb, u64 ino),
+        TP_ARGS(sb, ino)
+);
+
+DEFINE_EVENT(scoutfs_try_delete_class, scoutfs_try_delete_no_item,
+        TP_PROTO(struct super_block *sb, u64 ino),
+        TP_ARGS(sb, ino)
+);
+
+TRACE_EVENT(scoutfs_try_delete_has_links,
+	TP_PROTO(struct super_block *sb, u64 ino, unsigned int nlink),
+
+	TP_ARGS(sb, ino, nlink),
+
+	TP_STRUCT__entry(
+		SCSB_TRACE_FIELDS
+		__field(__u64, ino)
+		__field(unsigned int, nlink)
+	),
+
+	TP_fast_assign(
+		SCSB_TRACE_ASSIGN(sb);
+		__entry->ino = ino;
+		__entry->nlink = nlink;
+	),
+
+	TP_printk(SCSBF" ino %llu nlink %u", SCSB_TRACE_ARGS, __entry->ino,
+		  __entry->nlink)
+);
+
+TRACE_EVENT(scoutfs_inode_orphan_delete,
+	TP_PROTO(struct super_block *sb, u64 ino, int ret),
+
+	TP_ARGS(sb, ino, ret),
+
+	TP_STRUCT__entry(
+		SCSB_TRACE_FIELDS
+		__field(__u64, ino)
+		__field(int, ret)
+	),
+
+	TP_fast_assign(
+		SCSB_TRACE_ASSIGN(sb);
+		__entry->ino = ino;
+		__entry->ret = ret;
+	),
+
+	TP_printk(SCSBF" ino %llu ret %d", SCSB_TRACE_ARGS, __entry->ino,
+		__entry->ret)
+);
+
 TRACE_EVENT(scoutfs_delete_inode,
 	TP_PROTO(struct super_block *sb, u64 ino, umode_t mode, u64 size),

@@ -1060,6 +1210,32 @@ TRACE_EVENT(scoutfs_delete_inode,
 		  __entry->mode, __entry->size)
 );

+TRACE_EVENT(scoutfs_delete_inode_end,
+	TP_PROTO(struct super_block *sb, u64 ino, umode_t mode, u64 size, int ret),
+
+	TP_ARGS(sb, ino, mode, size, ret),
+
+	TP_STRUCT__entry(
+		__field(dev_t, dev)
+		__field(__u64, ino)
+		__field(umode_t, mode)
+		__field(__u64, size)
+		__field(int, ret)
+	),
+
+	TP_fast_assign(
+		__entry->dev = sb->s_dev;
+		__entry->ino = ino;
+		__entry->mode = mode;
+		__entry->size = size;
+		__entry->ret = ret;
+	),
+
+	TP_printk("dev %d,%d ino %llu, mode 0x%x size %llu, ret %d",
+		  MAJOR(__entry->dev), MINOR(__entry->dev), __entry->ino,
+		  __entry->mode, __entry->size, __entry->ret)
+);
+
 DECLARE_EVENT_CLASS(scoutfs_key_class,
        TP_PROTO(struct super_block *sb, struct scoutfs_key *key),
        TP_ARGS(sb, key),
@@ -1443,28 +1619,6 @@ DEFINE_EVENT(scoutfs_work_class, scoutfs_data_return_server_extents_exit,
        TP_ARGS(sb, data, ret)
 );

-DECLARE_EVENT_CLASS(scoutfs_shrink_exit_class,
-        TP_PROTO(struct super_block *sb, unsigned long nr_to_scan, int ret),
-        TP_ARGS(sb, nr_to_scan, ret),
-        TP_STRUCT__entry(
-		__field(void *, sb)
-		__field(unsigned long, nr_to_scan)
-		__field(int, ret)
-        ),
-        TP_fast_assign(
-		__entry->sb = sb;
-		__entry->nr_to_scan = nr_to_scan;
-		__entry->ret = ret;
-        ),
-        TP_printk("sb %p nr_to_scan %lu ret %d",
-		  __entry->sb, __entry->nr_to_scan, __entry->ret)
-);
-
-DEFINE_EVENT(scoutfs_shrink_exit_class, scoutfs_lock_shrink_exit,
-        TP_PROTO(struct super_block *sb, unsigned long nr_to_scan, int ret),
-        TP_ARGS(sb, nr_to_scan, ret)
-);
-
 TRACE_EVENT(scoutfs_rename,
 	TP_PROTO(struct super_block *sb, struct inode *old_dir,
 		 struct dentry *old_dentry, struct inode *new_dir,
@@ -1484,9 +1638,9 @@ TRACE_EVENT(scoutfs_rename,
 	TP_fast_assign(
 		SCSB_TRACE_ASSIGN(sb);
 		__entry->old_dir_ino = scoutfs_ino(old_dir);
-		__assign_str(old_name, old_dentry->d_name.name)
+		kc__assign_str(old_name, old_dentry->d_name.name);
 		__entry->new_dir_ino = scoutfs_ino(new_dir);
-		__assign_str(new_name, new_dentry->d_name.name)
+		kc__assign_str(new_name, new_dentry->d_name.name);
 		__entry->new_inode_ino = new_dentry->d_inode ?
 					 scoutfs_ino(new_dentry->d_inode) : 0;
 	),
@@ -1516,7 +1670,7 @@ TRACE_EVENT(scoutfs_d_revalidate,
 	TP_fast_assign(
 		SCSB_TRACE_ASSIGN(sb);
 		__entry->dentry = dentry;
-		__assign_str(name, dentry->d_name.name)
+		kc__assign_str(name, dentry->d_name.name);
 		__entry->ino = dentry->d_inode ? scoutfs_ino(dentry->d_inode) : 0;
 		__entry->dir_ino = dir_ino;
 		__entry->flags = flags;
@@ -1551,7 +1705,7 @@ TRACE_EVENT(scoutfs_validate_dentry,
 		SCSB_TRACE_ASSIGN(sb);
 		__entry->dentry = dentry;
 		__entry->dir_ino = dir_ino;
-		__assign_str(name, dentry->d_name.name)
+		kc__assign_str(name, dentry->d_name.name);
 		__entry->dentry_ino = dentry_ino;
 		__entry->dent_ino = dent_ino;
 		__entry->fsdata_gen = (unsigned long long)dentry->d_fsdata;
@@ -1673,7 +1827,7 @@ TRACE_EVENT(scoutfs_get_name,
 		SCSB_TRACE_ASSIGN(sb);
 		__entry->parent_ino = scoutfs_ino(parent);
 		__entry->child_ino = scoutfs_ino(child);
-		__assign_str(name, name);
+		kc__assign_str(name, name);
 	),

 	TP_printk(SCSBF" parent %llu child %llu name: %s",
@@ -3097,6 +3251,24 @@ TRACE_EVENT(scoutfs_ioc_search_xattrs,
 		  __entry->ino, __entry->last_ino)
 );

+TRACE_EVENT(scoutfs_trigger_fired,
+	TP_PROTO(struct super_block *sb, const char *name),
+
+	TP_ARGS(sb, name),
+
+	TP_STRUCT__entry(
+		SCSB_TRACE_FIELDS
+		__field(const char *, name)
+	),
+
+	TP_fast_assign(
+		SCSB_TRACE_ASSIGN(sb);
+		__entry->name = name;
+	),
+
+	TP_printk(SCSBF" %s", SCSB_TRACE_ARGS, __entry->name)
+);
+
 #endif /* _TRACE_SCOUTFS_H */

 /* This part must be outside protection */
--- a/kmod/src/server.c
+++ b/kmod/src/server.c
@@ -20,7 +20,6 @@
 #include <net/sock.h>
 #include <net/tcp.h>
 #include <linux/log2.h>
-#include <asm/unaligned.h>

 #include "format.h"
 #include "counters.h"
@@ -41,6 +40,7 @@
 #include "recov.h"
 #include "omap.h"
 #include "fence.h"
+#include "triggers.h"

 /*
 * Every active mount can act as the server that listens on a net
@@ -1291,9 +1291,13 @@ static int finalize_and_start_log_merge(struct super_block *sb, struct scoutfs_l
 		 * meta was low so that deleted items are merged
 		 * promptly and freed blocks can bring the client out of
 		 * enospc.
+		 *
+		 * The trigger can be used to force a log merge in cases where
+		 * a test only generates small amounts of change.
 		 */
 		finalize_ours = (lt->item_root.height > 2) ||
-				(le32_to_cpu(lt->meta_avail.flags) & SCOUTFS_ALLOC_FLAG_LOW);
+				(le32_to_cpu(lt->meta_avail.flags) & SCOUTFS_ALLOC_FLAG_LOW) ||
+				scoutfs_trigger(sb, LOG_MERGE_FORCE_FINALIZE_OURS);

 		trace_scoutfs_server_finalize_decision(sb, rid, saw_finalized, others_active,
 						       ours_visible, finalize_ours, delay_ms,
@@ -1402,6 +1406,8 @@ static int finalize_and_start_log_merge(struct super_block *sb, struct scoutfs_l
 			BUG_ON(err); /* inconsistent */
 		}

+		scoutfs_inc_counter(sb, log_merge_start);
+
 		/* we're done, caller can make forward progress */
 		break;
 	}
@@ -1618,7 +1624,8 @@ static int server_get_log_trees(struct super_block *sb,
 		goto update;
 	}

-	ret = alloc_move_empty(sb, &super->data_alloc, &lt.data_freed, 100);
+	ret = alloc_move_empty(sb, &super->data_alloc, &lt.data_freed,
+			       COMMIT_HOLD_ALLOC_BUDGET / 2);
 	if (ret == -EINPROGRESS)
 		ret = 0;
 	if (ret < 0) {
@@ -1913,9 +1920,11 @@ static int reclaim_open_log_tree(struct super_block *sb, u64 rid)
 	       scoutfs_alloc_splice_list(sb, &server->alloc, &server->wri, server->other_freed,
 					 &lt.meta_avail)) ?:
 	      (err_str = "empty data_avail",
-	       alloc_move_empty(sb, &super->data_alloc, &lt.data_avail, 100)) ?:
+	       alloc_move_empty(sb, &super->data_alloc, &lt.data_avail,
+				COMMIT_HOLD_ALLOC_BUDGET / 2)) ?:
 	      (err_str = "empty data_freed",
-	       alloc_move_empty(sb, &super->data_alloc, &lt.data_freed, 100));
+	       alloc_move_empty(sb, &super->data_alloc, &lt.data_freed,
+				COMMIT_HOLD_ALLOC_BUDGET / 2));
 	mutex_unlock(&server->alloc_mutex);

 	/* only finalize, allowing merging, once the allocators are fully freed */
@@ -2506,6 +2515,8 @@ static int splice_log_merge_completions(struct super_block *sb,
 		queue_work(server->wq, &server->log_merge_free_work);
 	else
 		err_str = "deleting merge status item";
+
+	scoutfs_inc_counter(sb, log_merge_complete);
 out:
 	if (upd_stat) {
 		init_log_merge_key(&key, SCOUTFS_LOG_MERGE_STATUS_ZONE, 0, 0);
@@ -3036,7 +3047,13 @@ static int server_commit_log_merge(struct super_block *sb,
 				  SCOUTFS_LOG_MERGE_STATUS_ZONE, 0, 0,
 				  &stat, sizeof(stat));
 	if (ret < 0) {
-		err_str = "getting merge status item";
+		/*
+		 * During a retransmission, it's possible that the server
+		 * already committed and resolved this log merge. ENOENT
+		 * is expected in that case.
+		 */
+		if (ret != -ENOENT)
+			err_str = "getting merge status item";
 		goto out;
 	}

@@ -3354,7 +3371,7 @@ out:

 static u64 device_blocks(struct block_device *bdev, int shift)
 {
-	return i_size_read(bdev->bd_inode) >> shift;
+	return i_size_read(KC_BDEV_INODE(bdev)) >> shift;
 }

 static int server_resize_devices(struct super_block *sb, struct scoutfs_net_connection *conn,
--- a/kmod/src/srch.c
+++ b/kmod/src/srch.c
@@ -18,7 +18,11 @@
 #include <linux/pagemap.h>
 #include <linux/vmalloc.h>
 #include <linux/sort.h>
+#ifdef KC_HAVE__LINUX_UNALIGNED_H
+#include <linux/unaligned.h>
+#else
 #include <asm/unaligned.h>
+#endif

 #include "super.h"
 #include "format.h"
@@ -2346,6 +2350,9 @@ static struct attribute *srch_attrs[] = {
 	SCOUTFS_ATTR_PTR(compact_delay_ms),
 	NULL,
 };
+#ifdef KC_KOBJECT_DEFAULT_GROUPS
+ATTRIBUTE_GROUPS(srch);
+#endif

 void scoutfs_srch_destroy(struct super_block *sb)
 {
@@ -2387,7 +2394,8 @@ int scoutfs_srch_setup(struct super_block *sb)

 	sbi->srch_info = srinf;

-	ret = scoutfs_sysfs_create_attrs(sb, &srinf->ssa, srch_attrs, "srch");
+	ret = scoutfs_sysfs_create_attrs(sb, &srinf->ssa, KC_KOBJ_DEFAULT(srch),
+					 "srch");
 	if (ret < 0)
 		goto out;

--- a/kmod/src/super.c
+++ b/kmod/src/super.c
@@ -283,7 +283,7 @@ int scoutfs_write_super(struct super_block *sb,
 static bool small_bdev(struct super_block *sb, char *which, u64 blocks,
 		       struct block_device *bdev, int shift)
 {
-	u64 size = (u64)i_size_read(bdev->bd_inode);
+	u64 size = (u64)i_size_read(KC_BDEV_INODE(bdev));
 	u64 count = size >> shift;

 	if (blocks > count) {
@@ -508,7 +508,7 @@ static int scoutfs_fill_super(struct super_block *sb, void *data, int silent)
 	sb->s_time_gran = 1;

 	/* btree blocks use long lived bh->b_data refs */
-	mapping_set_gfp_mask(sb->s_bdev->bd_inode->i_mapping, GFP_NOFS);
+	mapping_set_gfp_mask(KC_BDEV_MAPPING(sb->s_bdev), GFP_NOFS);

 	sbi = kzalloc(sizeof(struct scoutfs_sb_info), GFP_KERNEL);
 	sb->s_fs_info = sbi;
@@ -552,6 +552,7 @@ static int scoutfs_fill_super(struct super_block *sb, void *data, int silent)
 	}
 	sbi->meta_bdev_file = meta_bdev_file;
 	sbi->meta_bdev = file_bdev(meta_bdev_file);
+
 #else
 #ifdef KC_BLKDEV_PUT_HOLDER_ARG
 	meta_bdev = blkdev_get_by_path(opts.metadev_path, SCOUTFS_META_BDEV_MODE, sb, NULL);
@@ -567,7 +568,11 @@ static int scoutfs_fill_super(struct super_block *sb, void *data, int silent)
 	sbi->meta_bdev = meta_bdev;
 #endif

+#ifdef KC_BLKDEV_SET_BLOCKSIZE_FILE
+	ret = set_blocksize(sbi->meta_bdev_file, SCOUTFS_BLOCK_SM_SIZE);
+#else
 	ret = set_blocksize(sbi->meta_bdev, SCOUTFS_BLOCK_SM_SIZE);
+#endif
 	if (ret != 0) {
 		scoutfs_err(sb, "failed to set metadev blocksize, returned %d",
 			    ret);
--- a/kmod/src/sysfs.c
+++ b/kmod/src/sysfs.c
@@ -103,12 +103,11 @@ static ssize_t attr_funcs_show(struct kobject *kobj, struct attribute *attr,
 	};								\
 									\
 	static struct kobj_type _name##_ktype = {			\
-		.default_attrs  = _name##_attrs,			\
+		.KC_KOBJ_DEFAULT_OP = KC_KOBJ_DEFAULT(_name),		\
 		.sysfs_ops      = &_name##_sysfs_ops,			\
 		.release        = _name##_release,			\
 	};

-
 static struct attribute *sb_id_attrs[] = {
 	&data_device_maj_min_attr_funcs.attr,
 	&format_version_attr_funcs.attr,
@@ -116,6 +115,9 @@ static struct attribute *sb_id_attrs[] = {
 	&rid_attr_funcs.attr,
 	NULL,
 };
+#ifdef KC_KOBJECT_DEFAULT_GROUPS
+ATTRIBUTE_GROUPS(sb_id);
+#endif
 KTYPE(sb_id);

 struct kobject *scoutfs_sysfs_sb_dir(struct super_block *sb)
@@ -155,7 +157,12 @@ void scoutfs_sysfs_init_attrs(struct super_block *sb,
 int scoutfs_sysfs_create_attrs_parent(struct super_block *sb,
 				      struct kobject *parent,
 				      struct scoutfs_sysfs_attrs *ssa,
-				      struct attribute **attrs, char *fmt, ...)
+#ifdef KC_KOBJECT_DEFAULT_GROUPS
+				      const struct attribute_group **groups,
+#else
+				      struct attribute **attrs,
+#endif
+				      char *fmt, ...)
 {
 	va_list args;
 	size_t name_len;
@@ -168,7 +175,11 @@ int scoutfs_sysfs_create_attrs_parent(struct super_block *sb,

 	ssa->sb = sb;
 	init_completion(&ssa->comp);
+#ifdef KC_KOBJECT_DEFAULT_GROUPS
+	ssa->ktype.default_groups = groups;
+#else
 	ssa->ktype.default_attrs = attrs;
+#endif
 	ssa->ktype.sysfs_ops = &kobj_sysfs_ops;
 	ssa->ktype.release = scoutfs_sysfs_release;

--- a/kmod/src/sysfs.h
+++ b/kmod/src/sysfs.h
@@ -39,10 +39,15 @@ void scoutfs_sysfs_init_attrs(struct super_block *sb,
 int scoutfs_sysfs_create_attrs_parent(struct super_block *sb,
 				      struct kobject *parent,
 				      struct scoutfs_sysfs_attrs *ssa,
-				      struct attribute **attrs, char *fmt, ...);
-#define scoutfs_sysfs_create_attrs(sb, ssa, attrs, fmt, args...)	\
+#ifdef KC_KOBJECT_DEFAULT_GROUPS
+				      const struct attribute_group **groups,
+#else
+				      struct attribute **attrs,
+#endif
+				      char *fmt, ...);
+#define scoutfs_sysfs_create_attrs(sb, ssa, group_or_attrs, fmt, args...)	\
 	scoutfs_sysfs_create_attrs_parent(sb, scoutfs_sysfs_sb_dir(sb),	\
-					  ssa, attrs, fmt, ##args)
+					  ssa, group_or_attrs, fmt, ##args)

 void scoutfs_sysfs_destroy_attrs(struct super_block *sb,
 				 struct scoutfs_sysfs_attrs *ssa);
--- a/kmod/src/triggers.c
+++ b/kmod/src/triggers.c
@@ -18,6 +18,7 @@

 #include "super.h"
 #include "triggers.h"
+#include "scoutfs_trace.h"

 /*
 * We have debugfs files we can write to which arm triggers which
@@ -39,6 +40,7 @@ struct scoutfs_triggers {

 static char *names[] = {
 	[SCOUTFS_TRIGGER_BLOCK_REMOVE_STALE] = "block_remove_stale",
+	[SCOUTFS_TRIGGER_LOG_MERGE_FORCE_FINALIZE_OURS] = "log_merge_force_finalize_ours",
 	[SCOUTFS_TRIGGER_SRCH_COMPACT_LOGS_PAD_SAFE] = "srch_compact_logs_pad_safe",
 	[SCOUTFS_TRIGGER_SRCH_FORCE_LOG_ROTATE] = "srch_force_log_rotate",
 	[SCOUTFS_TRIGGER_SRCH_MERGE_STOP_SAFE] = "srch_merge_stop_safe",
@@ -51,6 +53,7 @@ bool scoutfs_trigger_test_and_clear(struct super_block *sb, unsigned int t)
 	atomic_t *atom;
 	int old;
 	int mem;
+	bool fired;

 	BUG_ON(t >= SCOUTFS_TRIGGER_NR);
 	atom = &triggers->atomics[t];
@@ -64,7 +67,12 @@ bool scoutfs_trigger_test_and_clear(struct super_block *sb, unsigned int t)
 		mem = atomic_cmpxchg(atom, old, 0);
 	} while (mem && mem != old);

-	return !!mem;
+	fired = !!mem;
+
+	if (fired)
+		trace_scoutfs_trigger_fired(sb, names[t]);
+
+	return fired;
 }

 int scoutfs_setup_triggers(struct super_block *sb)
--- a/kmod/src/triggers.h
+++ b/kmod/src/triggers.h
@@ -3,6 +3,7 @@

 enum scoutfs_trigger {
 	SCOUTFS_TRIGGER_BLOCK_REMOVE_STALE,
+	SCOUTFS_TRIGGER_LOG_MERGE_FORCE_FINALIZE_OURS,
 	SCOUTFS_TRIGGER_SRCH_COMPACT_LOGS_PAD_SAFE,
 	SCOUTFS_TRIGGER_SRCH_FORCE_LOG_ROTATE,
 	SCOUTFS_TRIGGER_SRCH_MERGE_STOP_SAFE,
--- a/kmod/src/volopt.c
+++ b/kmod/src/volopt.c
@@ -52,6 +52,15 @@ static struct volopt_nr_name {
 /* initialized by setup, pointer array is null terminated */
 static struct kobj_attribute volopt_attrs[ARRAY_SIZE(volopt_table)];
 static struct attribute *volopt_attr_ptrs[ARRAY_SIZE(volopt_table) + 1];
+#ifdef KC_KOBJECT_DEFAULT_GROUPS
+static const struct attribute_group volopt_group = {
+	.attrs = volopt_attr_ptrs,
+};
+static const struct attribute_group *volopt_groups[] = {
+	&volopt_group,
+	NULL,
+};
+#endif

 static void get_opt_data(struct kobj_attribute *attr, struct scoutfs_volume_options *volopt,
 			 u64 *bit, __le64 **opt)
@@ -164,7 +173,9 @@ int scoutfs_volopt_setup(struct super_block *sb)
 	BUILD_BUG_ON(ARRAY_SIZE(volopt_table) != ARRAY_SIZE(volopt_attr_ptrs) - 1);
 	volopt_attr_ptrs[i] = NULL;

-	ret = scoutfs_sysfs_create_attrs(sb, &vinf->ssa, volopt_attr_ptrs, "volume_options");
+	ret = scoutfs_sysfs_create_attrs(sb, &vinf->ssa,
+					 KC_KOBJ_DEFAULT_PICK(volopt_groups, volopt_attr_ptrs),
+					 "volume_options");
 	if (ret < 0)
 		goto out;

--- a/kmod/src/wkic.c
+++ b/kmod/src/wkic.c
@@ -171,7 +171,7 @@ struct wkic_item {
 	u64 seq;
 	unsigned int val_len;
 	u8 flags;
-	u8 val[0] __aligned(ARCH_KMALLOC_MINALIGN); /* totls have native structs */
+	u8 val[] __aligned(ARCH_KMALLOC_MINALIGN); /* totls have native structs */
 };

 static struct wkic_item *witem_container(struct rb_node *node)
@@ -763,7 +763,7 @@ static void fill_page_items(struct super_block *sb, struct wkic_page *wpage, str
 		pg_item->val_len = witem->val_len;
 		pg_item->flags = witem->flags;
 		if (witem->val_len)
-			memcpy(pg_item->val, witem->val, witem->val_len);
+			memcpy(&pg_item->val[0], witem->val, witem->val_len);

 		/* always inserting greatest item into page */
 		rb_link_node(&pg_item->node, parent, node);
@@ -1112,8 +1112,13 @@ int scoutfs_wkic_setup(struct super_block *sb)
 	}

 	winf->sb = sb;
-	KC_INIT_SHRINKER_FUNCS(&winf->shrinker, wkic_shrink_count, wkic_shrink_scan);
-	KC_REGISTER_SHRINKER(&winf->shrinker, "scoutfs-weak_item:" SCSBF, SCSB_ARGS(sb));
+	KC_SETUP_SHRINKER(winf->shrinker, winf, 0, wkic_shrink_count,
+			  wkic_shrink_scan, "scoutfs-weak_item:" SCSBF, SCSB_ARGS(sb));
+	if (KC_SHRINKER_IS_NULL(winf->shrinker)) {
+		debugfs_remove(winf->drop_dentry);
+		kfree(winf);
+		return -ENOMEM;
+	}

 	sbi->wkic_info = winf;
 	return 0;
@@ -1141,7 +1146,7 @@ void scoutfs_wkic_destroy(struct super_block *sb)

 	if (winf) {
 		debugfs_remove(winf->drop_dentry);
-		KC_UNREGISTER_SHRINKER(&winf->shrinker);
+		KC_UNREGISTER_SHRINKER(winf->shrinker);

 		/* trees are in sync so tearing down one frees all pages */
 		rbtree_postorder_for_each_entry_safe(wpage, tmp, &winf->wpage_roots[0], nodes[0]) {
--- a/kmod/src/xattr.c
+++ b/kmod/src/xattr.c
@@ -907,7 +907,7 @@ int scoutfs_xattr_set_locked(struct inode *inode, const char *name, size_t name_

 	/* XXX do these want i_mutex or anything? */
 	inode_inc_iversion(inode);
-	inode->i_ctime = current_time(inode);
+	inode_set_ctime_to_ts(inode, current_time(inode));
 	ret = 0;

 out:
--- a/tests/fenced-local-force-unmount.sh
+++ b/tests/fenced-local-force-unmount.sh
@@ -9,7 +9,7 @@
 echo "$0 running rid '$SCOUTFS_FENCED_REQ_RID' ip '$SCOUTFS_FENCED_REQ_IP' args '$@'"

 echo_fail() {
-	echo "$@" >> /dev/stderr
+	echo "$@" >&2
 	exit 1
 }

@@ -27,8 +27,7 @@ for fs in /sys/fs/scoutfs/*; do
 	nr="$(quiet_cat $fs/data_device_maj_min)"
 	[ ! -d "$fs" -o "$fs_rid" != "$rid" ] && continue

-	mnt=$(findmnt -l -n -t scoutfs -o TARGET -S $nr) || \
-		echo_fail "findmnt -t scoutfs -S $nr failed"
+	mnt=$(findmnt -l -n -t scoutfs -o TARGET -S $nr)
 	[ -z "$mnt" ] && continue

 	if ! umount -qf "$mnt"; then
--- a/tests/funcs/filter.sh
+++ b/tests/funcs/filter.sh
@@ -3,7 +3,8 @@
 t_filter_fs()
 {
 	sed -e 's@mnt/test\.[0-9]*@mnt/test@g' \
-	    -e 's@Device: [a-fA-F0-9]*h/[0-9]*d@Device: 0h/0d@g'
+	    -e 's@Device: [a-fA-F0-9]*h/[0-9]*d@Device: 0h/0d@g' \
+	    -e 's@Device: [0-9]*,[0-9]*@Device: 0h/0d@g'
 }

 #
@@ -170,6 +171,9 @@ t_filter_dmesg()
 	# some ci test guests are unresponsive
 	re="$re|longest quorum heartbeat .* delay"

-	egrep -v "($re)" | \
+	# creating block devices may trigger this
+	re="$re|block device autoloading is deprecated and will be removed."
+
+	grep -v -E "($re)" | \
 		ignore_harmless_unwind_kasan_stack_oob
 }
--- a/tests/funcs/fs.sh
+++ b/tests/funcs/fs.sh
@@ -283,6 +283,30 @@ t_reinsert_remount_all()
 	t_quiet t_mount_all || t_fail "mounting all failed"
 }

+#
+# scratch helpers
+#
+t_scratch_mkfs()
+{
+	scoutfs mkfs -f -Q 0,127.0.0.1,$T_SCRATCH_PORT "$T_EX_META_DEV" "$T_EX_DATA_DEV" "$@" > $T_TMP.mkfs.out 2>&1 || \
+		t_fail "scratch mkfs failed"
+}
+
+t_scratch_mount()
+{
+	mkdir -p "$T_MSCR"
+	mount -t scoutfs -o metadev_path=$T_EX_META_DEV,quorum_slot_nr=0 "$@" "$T_EX_DATA_DEV" "$T_MSCR" || \
+		t_fail "scratch mount failed"
+}
+
+t_scratch_umount()
+{
+	umount "$T_MSCR" || \
+		t_fail "scratch umount failed"
+	rmdir "$T_MSCR"
+}
+
+
 t_trigger_path() {
 	local nr="$1"

@@ -498,3 +522,121 @@ t_restore_all_sysfs_mount_options() {
 		t_set_sysfs_mount_option $i $name "${_saved_opts[$ind]}"
 	done
 }
+
+t_force_log_merge() {
+	local sv=$(t_server_nr)
+	local merges_started
+	local last_merges_started
+	local merges_completed
+	local last_merges_completed
+
+	while true; do
+		last_merges_started=$(t_counter log_merge_start $sv)
+		last_merges_completed=$(t_counter log_merge_complete $sv)
+
+		t_trigger_arm_silent log_merge_force_finalize_ours $sv
+
+		t_sync_seq_index
+
+		while test "$(t_trigger_get log_merge_force_finalize_ours $sv)" == "1"; do
+			sleep .5
+		done
+
+		merges_started=$(t_counter log_merge_start $sv)
+
+		if (( merges_started > last_merges_started )); then
+			merges_completed=$(t_counter log_merge_complete $sv)
+
+			while (( merges_completed == last_merges_completed )); do
+				sleep .5
+				merges_completed=$(t_counter log_merge_complete $sv)
+			done
+			break
+		fi
+	done
+}
+
+declare -A _last_scan
+t_get_orphan_scan_runs() {
+	local i
+
+	for i in $(t_fs_nrs); do
+		_last_scan[$i]=$(t_counter orphan_scan $i)
+	done
+}
+
+t_wait_for_orphan_scan_runs() {
+	local i
+	local scan
+
+	t_get_orphan_scan_runs
+
+	for i in $(t_fs_nrs); do
+		while true; do
+			scan=$(t_counter orphan_scan $i)
+			if (( scan != _last_scan[$i] )); then
+				break
+			fi
+			sleep .5
+		done
+	done
+}
+
+declare -A _last_empty
+t_get_orphan_scan_empty() {
+	local i
+
+	for i in $(t_fs_nrs); do
+		_last_empty[$i]=$(t_counter orphan_scan_empty $i)
+	done
+}
+
+t_wait_for_no_orphans() {
+	local i;
+	local working;
+	local empty;
+
+	t_get_orphan_scan_empty
+
+	while true; do
+		working=0
+
+		t_wait_for_orphan_scan_runs
+
+		for i in $(t_fs_nrs); do
+			empty=$(t_counter orphan_scan_empty $i)
+			if (( empty == _last_empty[$i] )); then
+				(( working++ ))
+			else
+				(( _last_empty[$i] = empty ))
+			fi
+		done
+
+		if (( working == 0 )); then
+			break
+		fi
+
+		sleep 1
+	done
+}
+
+#
+# Repeatedly run the arguments as a command, sleeping in between, until
+# it returns success.  The first argument is a relative timeout in
+# seconds.  The remaining arguments are the command and its arguments.
+#
+# If the timeout expires without the command returning 0 then the test
+# fails.
+#
+t_wait_until_timeout() {
+	local relative="$1"
+	local expire="$((SECONDS + relative))"
+	shift
+
+	while (( SECONDS < expire )); do
+		"$@" && return
+		sleep 1
+	done
+
+	t_fail "command failed for $relative sec: $@"
+}
--- a/tests/funcs/tap.sh
+++ b/tests/funcs/tap.sh
@@ -43,9 +43,14 @@ t_tap_progress()
 	local testname=$1
 	local result=$2

+	local stmsg=""
 	local diff=""
 	local dmsg=""

+	if [[ -s $T_RESULTS/tmp/${testname}/status.msg ]]; then
+		stmsg="1"
+	fi
+
 	if [[ -s "$T_RESULTS/tmp/${testname}/dmesg.new" ]]; then
 		dmsg="1"
 	fi
@@ -61,6 +66,7 @@ t_tap_progress()
 		echo "# ${testname} ** skipped - permitted **"
 	else
 		echo "not ok ${i} - ${testname}"
+
 		case ${result} in
 		101)
 			echo "# ${testname} ** skipped **"
@@ -70,6 +76,13 @@ t_tap_progress()
 			;;
 		esac

+		if [[ -n "${stmsg}" ]]; then
+			echo "#"
+			echo "# status:"
+			echo "#"
+			cat $T_RESULTS/tmp/${testname}/status.msg | sed 's/^/# - /'
+		fi
+
 		if [[ -n "${diff}" ]]; then
 			echo "#"
 			echo "# diff:"
--- a/tests/golden/basic-acl-consistency
+++ b/tests/golden/basic-acl-consistency
@@ -0,0 +1,6 @@
+== make scratch fs
+== create uid/gids
+== set acls and permissions
+== compare output
+== drop caches and compare again
+== cleanup scratch fs
--- a/tests/golden/basic-posix-consistency
+++ b/tests/golden/basic-posix-consistency
@@ -47,7 +47,7 @@ four
 --- dir within dir
 --- overwrite file
 --- can't overwrite non-empty dir
-mv: cannot move '/mnt/test/test/basic-posix-consistency/dir/c/clobber' to '/mnt/test/test/basic-posix-consistency/dir/a/dir': Directory not empty
+mv: cannot overwrite '/mnt/test/test/basic-posix-consistency/dir/a/dir': Directory not empty
 --- can overwrite empty dir
 --- can rename into root
 == path resoluion
--- a/tests/golden/inode-deletion
+++ b/tests/golden/inode-deletion
@@ -17,7 +17,7 @@ ino not found in dseq index
 mount 0 contents after mount 1 rm: contents
 ino found in dseq index
 ino found in dseq index
-stat: cannot stat '/mnt/test/test/inode-deletion/file': No such file or directory
+stat: cannot stat '/mnt/test/test/inode-deletion/badfile': No such file or directory
 ino not found in dseq index
 ino not found in dseq index
 == lots of deletions use one open map
--- a/tests/run-tests.sh
+++ b/tests/run-tests.sh
@@ -301,7 +301,7 @@ fi
 # include everything by default
 test -z "$T_INCLUDE" && T_INCLUDE="-e '.*'"
 # (quickly) exclude nothing by default
-test -z "$T_EXCLUDE" && T_EXCLUDE="-e '\Zx'"
+test -z "$T_EXCLUDE" && T_EXCLUDE="-e '^$'"

 # eval to strip re ticks but not expand
 tests=$(grep -v "^#" $T_SEQUENCE |
@@ -400,7 +400,8 @@ if [ -n "$T_INSMOD" ]; then
 fi

 if [ -n "$T_TRACE_MULT" ]; then
-	orig_trace_size=$(cat /sys/kernel/debug/tracing/buffer_size_kb)
+#	orig_trace_size=$(cat /sys/kernel/debug/tracing/buffer_size_kb)
+	orig_trace_size=1408
 	mult_trace_size=$((orig_trace_size * T_TRACE_MULT))
 	msg "increasing trace buffer size from $orig_trace_size KiB to $mult_trace_size KiB"
 	echo $mult_trace_size > /sys/kernel/debug/tracing/buffer_size_kb
@@ -504,7 +505,10 @@ crash_monitor()
 		fi

 		if [ "$bad" != 0 ]; then
-			echo "run-tests monitor triggering crash"
+			echo "run-tests monitor syncing and triggering crash"
+			# hail mary, the sync could well hang
+			(echo s > /proc/sysrq-trigger) &
+			sleep 5
 			echo c > /proc/sysrq-trigger
 			exit 1
 		fi
@@ -624,6 +628,9 @@ for t in $tests; do
 		cmd rm -rf "$T_TMPDIR"
 		cmd mkdir -p "$T_TMPDIR"

+		# assign scratch mount point in temporary dir
+		T_MSCR="$T_TMPDIR/scratch"
+
 		# create a test name dir in the fs, clean up old data as needed
 		T_DS=""
 		for i in $(seq 0 $((T_NR_MOUNTS - 1))); do
--- a/tests/sequence
+++ b/tests/sequence
@@ -2,6 +2,7 @@ export-get-name-parent.sh
 basic-block-counts.sh
 basic-bad-mounts.sh
 basic-posix-acl.sh
+basic-acl-consistency.sh
 inode-items-updated.sh
 simple-inode-index.sh
 simple-staging.sh
--- a/tests/tests/basic-acl-consistency.sh
+++ b/tests/tests/basic-acl-consistency.sh
@@ -0,0 +1,117 @@
+
+#
+# Test basic clustered posix acl consistency.
+#
+
+t_require_commands getfacl setfacl
+
+GETFACL="getfacl --absolute-names"
+
+filter_scratch() {
+	sed "s@$T_MSCR@t_mscr@g"
+}
+
+acl_compare()
+{
+	diff -u - <($GETFACL $T_MSCR/data/dir_a/dir_b | filter_scratch) <<EOF1
+# file: t_mscr/data/dir_a/dir_b
+# owner: t_usr_3
+# group: t_grp_3
+# flags: -s-
+user::rwx
+group::rwx
+group:t_grp_2:r-x
+mask::rwx
+other::---
+default:user::rwx
+default:group::rwx
+default:group:t_grp_2:r-x
+default:group:t_grp_3:rwx
+default:mask::rwx
+default:other::---
+
+EOF1
+
+	test $? -eq 0 || t_fail "dir_b differs"
+
+	diff -u - <($GETFACL -p $T_MSCR/data/dir_a/dir_b/dir_c/dir_d | filter_scratch) <<EOF3
+# file: t_mscr/data/dir_a/dir_b/dir_c/dir_d
+# owner: t_usr_1
+# group: t_grp_1
+# flags: -s-
+user::rwx
+group::rwx
+group:t_grp_2:r-x
+mask::rwx
+other::---
+default:user::rwx
+default:group::rwx
+default:group:t_grp_2:r-x
+default:group:t_grp_3:rwx
+default:mask::rwx
+default:other::---
+
+EOF3
+	test $? -eq 0 || t_fail "dir_d differs"
+
+	diff -u - <($GETFACL $T_MSCR/data/dir_a/dir_b/dir_c | filter_scratch) <<EOF2
+# file: t_mscr/data/dir_a/dir_b/dir_c
+# owner: t_usr_3
+# group: t_grp_2
+# flags: -s-
+user::rwx
+group::rwx
+group:t_grp_2:r-x
+mask::rwx
+other::---
+default:user::rwx
+default:group::rwx
+default:group:t_grp_2:r-x
+default:group:t_grp_3:rwx
+default:mask::rwx
+default:other::---
+
+EOF2
+	test $? -eq 0 || t_fail "dir_c differs"
+}
+echo "== make scratch fs"
+t_scratch_mkfs
+t_scratch_mount
+
+rm -rf $T_MSCR/data
+
+echo "== create uid/gids"
+groupadd -g 7101 t_grp_1 > /dev/null 2>&1
+useradd -g 7101 -u 7101 t_usr_1 > /dev/null 2>&1
+groupadd -g 7102 t_grp_2 > /dev/null 2>&1
+groupadd -g 7103 t_grp_3 > /dev/null 2>&1
+useradd -g 7103 -u 7103 t_usr_3 > /dev/null 2>&1
+
+echo "== set acls and permissions"
+mkdir -p $T_MSCR/data/dir_a/dir_b
+chown t_usr_3:t_grp_3 $T_MSCR/data/dir_a/dir_b
+chmod 2770 $T_MSCR/data/dir_a/dir_b
+setfacl -m g:t_grp_2:rx $T_MSCR/data/dir_a/dir_b
+setfacl -m d:g:t_grp_2:rx $T_MSCR/data/dir_a/dir_b
+setfacl -m d:g:t_grp_3:rwx $T_MSCR/data/dir_a/dir_b
+
+mkdir -p $T_MSCR/data/dir_a/dir_b/dir_c
+chown t_usr_3:t_grp_2 $T_MSCR/data/dir_a/dir_b/dir_c
+setfacl -x g:t_grp_3 $T_MSCR/data/dir_a/dir_b/dir_c
+
+mkdir -p $T_MSCR/data/dir_a/dir_b/dir_c/dir_d
+chown t_usr_1:t_grp_1 $T_MSCR/data/dir_a/dir_b/dir_c/dir_d
+setfacl -x g:t_grp_3 $T_MSCR/data/dir_a/dir_b/dir_c/dir_d
+
+echo "== compare output"
+acl_compare
+
+echo "== drop caches and compare again"
+sync
+echo 3 > /proc/sys/vm/drop_caches
+acl_compare
+
+echo "== cleanup scratch fs"
+t_scratch_umount
+
+t_pass
--- a/tests/tests/basic-bad-mounts.sh
+++ b/tests/tests/basic-bad-mounts.sh
@@ -12,25 +12,22 @@ mount_fail()
 }

 echo "== prepare devices, mount point, and logs"
-SCR="$T_TMPDIR/mnt.scratch"
-mkdir -p "$SCR"
+t_scratch_mkfs
 > $T_TMP.mount.out
-scoutfs mkfs -f -Q 0,127.0.0.1,$T_SCRATCH_PORT "$T_EX_META_DEV" "$T_EX_DATA_DEV" > $T_TMP.mkfs.out 2>&1 \
-	|| t_fail "mkfs failed"

 echo "== bad devices, bad options"
-mount_fail -o _bad /dev/null /dev/null "$SCR"
+mount_fail -o _bad /dev/null /dev/null "$T_MSCR"

 echo "== swapped devices"
-mount_fail -o metadev_path=$T_EX_DATA_DEV,quorum_slot_nr=0 "$T_EX_META_DEV" "$SCR"
+mount_fail -o metadev_path=$T_EX_DATA_DEV,quorum_slot_nr=0 "$T_EX_META_DEV" "$T_MSCR"

 echo "== both meta devices"
-mount_fail -o metadev_path=$T_EX_META_DEV,quorum_slot_nr=0 "$T_EX_META_DEV" "$SCR"
+mount_fail -o metadev_path=$T_EX_META_DEV,quorum_slot_nr=0 "$T_EX_META_DEV" "$T_MSCR"

 echo "== both data devices"
-mount_fail -o metadev_path=$T_EX_DATA_DEV,quorum_slot_nr=0 "$T_EX_DATA_DEV" "$SCR"
+mount_fail -o metadev_path=$T_EX_DATA_DEV,quorum_slot_nr=0 "$T_EX_DATA_DEV" "$T_MSCR"

 echo "== good volume, bad option and good options"
-mount_fail -o _bad,metadev_path=$T_EX_META_DEV,quorum_slot_nr=0 "$T_EX_DATA_DEV" "$SCR" 
+mount_fail -o _bad,metadev_path=$T_EX_META_DEV,quorum_slot_nr=0 "$T_EX_DATA_DEV" "$T_MSCR"

 t_pass
--- a/tests/tests/basic-posix-consistency.sh
+++ b/tests/tests/basic-posix-consistency.sh
@@ -138,7 +138,9 @@ echo "--- can't overwrite non-empty dir"
 mkdir "$T_D0/dir/a/dir"
 touch "$T_D0/dir/a/dir/nope"
 mkdir "$T_D1/dir/c/clobber"
-mv -T "$T_D1/dir/c/clobber" "$T_D1/dir/a/dir" 2>&1 | t_filter_fs
+mv -T "$T_D1/dir/c/clobber" "$T_D1/dir/a/dir" 2>&1 | \
+		sed "s@mv: cannot move '.*' to '\(.*\)': Directory not empty@mv: cannot overwrite '\1': Directory not empty@g" | \
+		t_filter_fs
 find "$T_D0/dir" -ls 2>&1 | t_filter_fs > "$T_TMP.0"
 find "$T_D1/dir" -ls 2>&1 | t_filter_fs > "$T_TMP.1"
 diff -u "$T_TMP.0" "$T_TMP.1"
--- a/tests/tests/change-devices.sh
+++ b/tests/tests/change-devices.sh
@@ -11,9 +11,8 @@ truncate -s $sz "$T_TMP.equal"
 truncate -s $large_sz "$T_TMP.large"

 echo "== make scratch fs"
-t_quiet scoutfs mkfs -f -Q 0,127.0.0.1,$T_SCRATCH_PORT "$T_EX_META_DEV" "$T_EX_DATA_DEV"
-SCR="$T_TMPDIR/mnt.scratch"
-mkdir -p "$SCR"
+t_scratch_mkfs
+mkdir -p "$T_MSCR"

 echo "== small new data device fails"
 t_rc scoutfs prepare-empty-data-device "$T_EX_META_DEV" "$T_TMP.small"
@@ -23,13 +22,13 @@ t_rc scoutfs prepare-empty-data-device --check "$T_EX_META_DEV" "$T_TMP.small"
 t_rc scoutfs prepare-empty-data-device --check "$T_EX_META_DEV"

 echo "== preparing while mounted fails"
-mount -t scoutfs -o metadev_path=$T_EX_META_DEV,quorum_slot_nr=0 "$T_EX_DATA_DEV" "$SCR"
+mount -t scoutfs -o metadev_path=$T_EX_META_DEV,quorum_slot_nr=0 "$T_EX_DATA_DEV" "$T_MSCR"
 t_rc scoutfs prepare-empty-data-device "$T_EX_META_DEV" "$T_TMP.equal"
-umount "$SCR"
+umount "$T_MSCR"

 echo "== preparing without recovery fails"
-mount -t scoutfs -o metadev_path=$T_EX_META_DEV,quorum_slot_nr=0 "$T_EX_DATA_DEV" "$SCR"
-umount -f "$SCR"
+mount -t scoutfs -o metadev_path=$T_EX_META_DEV,quorum_slot_nr=0 "$T_EX_DATA_DEV" "$T_MSCR"
+umount -f "$T_MSCR"
 t_rc scoutfs prepare-empty-data-device "$T_EX_META_DEV" "$T_TMP.equal"

 echo "== check sees metadata errors"
@@ -37,16 +36,16 @@ t_rc scoutfs prepare-empty-data-device --check "$T_EX_META_DEV"
 t_rc scoutfs prepare-empty-data-device --check "$T_EX_META_DEV" "$T_TMP.equal"

 echo "== preparing with file data fails"
-mount -t scoutfs -o metadev_path=$T_EX_META_DEV,quorum_slot_nr=0 "$T_EX_DATA_DEV" "$SCR"
-echo hi > "$SCR"/file
-umount "$SCR"
+mount -t scoutfs -o metadev_path=$T_EX_META_DEV,quorum_slot_nr=0 "$T_EX_DATA_DEV" "$T_MSCR"
+echo hi > "$T_MSCR"/file
+umount "$T_MSCR"
 scoutfs print "$T_EX_META_DEV" > "$T_TMP.print"
 t_rc scoutfs prepare-empty-data-device "$T_EX_META_DEV" "$T_TMP.equal"

 echo "== preparing after emptied"
-mount -t scoutfs -o metadev_path=$T_EX_META_DEV,quorum_slot_nr=0 "$T_EX_DATA_DEV" "$SCR"
-rm -f "$SCR"/file
-umount "$SCR"
+mount -t scoutfs -o metadev_path=$T_EX_META_DEV,quorum_slot_nr=0 "$T_EX_DATA_DEV" "$T_MSCR"
+rm -f "$T_MSCR"/file
+umount "$T_MSCR"
 t_rc scoutfs prepare-empty-data-device "$T_EX_META_DEV" "$T_TMP.equal"

 echo "== checks pass"
@@ -55,22 +54,22 @@ t_rc scoutfs prepare-empty-data-device --check "$T_EX_META_DEV" "$T_TMP.equal"

 echo "== using prepared"
 scr_loop=$(losetup --find --show "$T_TMP.equal")
-mount -t scoutfs -o metadev_path=$T_EX_META_DEV,quorum_slot_nr=0 "$scr_loop" "$SCR"
-touch "$SCR"/equal_prepared
-equal_tot=$(scoutfs statfs -s total_data_blocks -p "$SCR")
-umount "$SCR"
+mount -t scoutfs -o metadev_path=$T_EX_META_DEV,quorum_slot_nr=0 "$scr_loop" "$T_MSCR"
+touch "$T_MSCR"/equal_prepared
+equal_tot=$(scoutfs statfs -s total_data_blocks -p "$T_MSCR")
+umount "$T_MSCR"
 losetup -d "$scr_loop"

 echo "== preparing larger and resizing"
 t_rc scoutfs prepare-empty-data-device "$T_EX_META_DEV" "$T_TMP.large"
 scr_loop=$(losetup --find --show "$T_TMP.large")
-mount -t scoutfs -o metadev_path=$T_EX_META_DEV,quorum_slot_nr=0 "$scr_loop" "$SCR"
-touch "$SCR"/large_prepared
-ls "$SCR"
-scoutfs resize-devices -p "$SCR" -d $large_sz
-large_tot=$(scoutfs statfs -s total_data_blocks -p "$SCR")
+mount -t scoutfs -o metadev_path=$T_EX_META_DEV,quorum_slot_nr=0 "$scr_loop" "$T_MSCR"
+touch "$T_MSCR"/large_prepared
+ls "$T_MSCR"
+scoutfs resize-devices -p "$T_MSCR" -d $large_sz
+large_tot=$(scoutfs statfs -s total_data_blocks -p "$T_MSCR")
 test "$large_tot" -gt "$equal_tot" ; echo "resized larger test rc: $?"
-umount "$SCR"
+umount "$T_MSCR"
 losetup -d "$scr_loop"

 echo "== cleanup"
--- a/tests/tests/enospc.sh
+++ b/tests/tests/enospc.sh
@@ -54,21 +54,16 @@ after=$(free_blocks Data "$T_M0")
 test "$before" == "$after" || \
 	t_fail "$after free data blocks after rm, expected $before"

-# XXX this is all pretty manual, would be nice to have helpers
 echo "== make small meta fs"
 # meta device just big enough for reserves and the metadata we'll fill
-scoutfs mkfs -A -f -Q 0,127.0.0.1,$T_SCRATCH_PORT -m 10G "$T_EX_META_DEV" "$T_EX_DATA_DEV" > $T_TMP.mkfs.out 2>&1 || \
-	t_fail "mkfs failed"
-SCR="$T_TMPDIR/mnt.scratch"
-mkdir -p "$SCR"
-mount -t scoutfs -o metadev_path=$T_EX_META_DEV,quorum_slot_nr=0 \
-	"$T_EX_DATA_DEV" "$SCR"
+t_scratch_mkfs -A -m 10G
+t_scratch_mount

 echo "== create large xattrs until we fill up metadata"
-mkdir -p "$SCR/xattrs"
+mkdir -p "$T_MSCR/xattrs"

 for f in $(seq 1 100000); do
-	file="$SCR/xattrs/file-$f"
+	file="$T_MSCR/xattrs/file-$f"
 	touch "$file"

 	LC_ALL=C create_xattr_loop -c 1000 -n user.scoutfs-enospc -p "$file" -s 65535 > $T_TMP.cxl 2>&1
@@ -84,10 +79,10 @@ for f in $(seq 1 100000); do
 done

 echo "== remove files with xattrs after enospc"
-rm -rf "$SCR/xattrs"
+rm -rf "$T_MSCR/xattrs"

 echo "== make sure we can create again"
-file="$SCR/file-after"
+file="$T_MSCR/file-after"
 C=120
 while (( C-- )); do
 	touch $file 2> /dev/null && break
@@ -99,7 +94,6 @@ sync
 rm -f "$file"

 echo "== cleanup small meta fs"
-umount "$SCR"
-rmdir "$SCR"
+t_scratch_umount

 t_pass
--- a/tests/tests/fence-and-reclaim.sh
+++ b/tests/tests/fence-and-reclaim.sh
@@ -5,6 +5,9 @@
 t_require_commands sleep touch grep sync scoutfs
 t_require_mounts 2

+# regularly see ~20/~30s
+VERIFY_TIMEOUT_SECS=90
+
 #
 # Make sure that all mounts can read the results of a write from each
 # mount.
@@ -40,8 +43,10 @@ verify_fenced_run()

 	for rid in $rids; do
 		grep -q ".* running rid '$rid'.* args 'ignored run args'" "$T_FENCED_LOG" || \
-			t_fail "fenced didn't execute RUN script for rid $rid"
+			return 1
 	done
+
+	return 0
 }

 echo "== make sure all mounts can see each other"
@@ -54,14 +59,7 @@ rid=$(t_mount_rid $cl)
 echo "cl $cl sv $sv rid $rid" >> "$T_TMP.log"
 sync
 t_force_umount $cl
-# wait for client reconnection to timeout
-while grep -q $rid $(t_debugfs_path $sv)/connections; do
-	sleep .5
-done
-while t_rid_is_fencing $rid; do
-	sleep .5
-done
-verify_fenced_run $rid
+t_wait_until_timeout $VERIFY_TIMEOUT_SECS verify_fenced_run $rid
 t_mount $cl
 check_read_write

@@ -83,15 +81,7 @@ for cl in $(t_fs_nrs); do
 	t_force_umount $cl
 done

-# wait for all client reconnections to timeout
-while egrep -q "($pattern)" $(t_debugfs_path $sv)/connections; do
-	sleep .5
-done
-# wait for all fence requests to complete
-while test -d $(echo /sys/fs/scoutfs/*/fence/* | cut -d " " -f 1); do
-	sleep .5
-done
-verify_fenced_run $rids
+t_wait_until_timeout $VERIFY_TIMEOUT_SECS verify_fenced_run $rids
 # remount all the clients
 for cl in $(t_fs_nrs); do
 	if [ $cl == $sv ]; then
@@ -107,12 +97,7 @@ rid=$(t_mount_rid $sv)
 echo "sv $sv rid $rid" >> "$T_TMP.log"
 sync
 t_force_umount $sv
-t_wait_for_leader
-# wait until new server is done fencing unmounted leader rid
-while t_rid_is_fencing $rid; do
-	sleep .5
-done
-verify_fenced_run $rid
+t_wait_until_timeout $VERIFY_TIMEOUT_SECS verify_fenced_run $rid
 t_mount $sv
 check_read_write

@@ -127,11 +112,7 @@ for nr in $(t_fs_nrs); do
 	t_force_umount $nr
 done
 t_mount_all
-# wait for all fence requests to complete
-while test -d $(echo /sys/fs/scoutfs/*/fence/* | cut -d " " -f 1); do
-	sleep .5
-done
-verify_fenced_run $rids
+t_wait_until_timeout $VERIFY_TIMEOUT_SECS verify_fenced_run $rids
 check_read_write

 t_pass
--- a/tests/tests/format-version-forward-back.sh
+++ b/tests/tests/format-version-forward-back.sh
@@ -11,8 +11,8 @@
 # format version.
 #

-# not supported on el8 or higher
-if [ $(source /etc/os-release ; echo ${VERSION_ID:0:1}) -gt 7 ]; then
+# not supported on el8, or higher versions.
+if [ $(source /etc/os-release ; echo ${VERSION_ID} | cut -d. -f1) -gt 7 ]; then
 	t_skip_permitted "Unsupported OS version"
 fi

--- a/tests/tests/get-referring-entries.sh
+++ b/tests/tests/get-referring-entries.sh
@@ -72,7 +72,7 @@ touch $T_D0/dir/file
 mkdir $T_D0/dir/dir
 ln -s $T_D0/dir/file $T_D0/dir/symlink
 mknod $T_D0/dir/char c 1 3 # null
-mknod $T_D0/dir/block b 7 0 # loop0
+mknod $T_D0/dir/block b 42 0 # SAMPLE block dev - nonexistant/demo use only number
 for name in $(ls -UA $T_D0/dir | sort); do
 	ino=$(stat -c '%i' $T_D0/dir/$name)
 	$GRE $ino | filter_types
--- a/tests/tests/inode-deletion.sh
+++ b/tests/tests/inode-deletion.sh
@@ -61,18 +61,28 @@ rm -f "$T_D1/file"
 check_ino_index "$ino" "$dseq" "$T_M0"
 check_ino_index "$ino" "$dseq" "$T_M1"

+# Hurry along the orphan scanners. If any are currently asleep, we will
+# have to wait at least their current scan interval before they wake up,
+# run, and notice their new interval.
+t_save_all_sysfs_mount_options orphan_scan_delay_ms
+t_set_all_sysfs_mount_options orphan_scan_delay_ms 500
+t_wait_for_orphan_scan_runs
+
 echo "== unlink wait for open on other mount"
-echo "contents" > "$T_D0/file"
-ino=$(stat -c "%i" "$T_D0/file")
-dseq=$(scoutfs stat -s data_seq "$T_D0/file")
-exec {FD}<"$T_D0/file"
-rm -f "$T_D1/file"
+echo "contents" > "$T_D0/badfile"
+ino=$(stat -c "%i" "$T_D0/badfile")
+dseq=$(scoutfs stat -s data_seq "$T_D0/badfile")
+exec {FD}<"$T_D0/badfile"
+rm -f "$T_D1/badfile"
 echo "mount 0 contents after mount 1 rm: $(cat <&$FD)"
 check_ino_index "$ino" "$dseq" "$T_M0"
 check_ino_index "$ino" "$dseq" "$T_M1"
 exec {FD}>&-  # close
 # we know that revalidating will unhash the remote dentry
-stat "$T_D0/file" 2>&1 | sed 's/cannot statx/cannot stat/' | t_filter_fs
+stat "$T_D0/badfile" 2>&1 | sed 's/cannot statx/cannot stat/' | t_filter_fs
+t_force_log_merge
+# wait for orphan scanners to pick up the unlinked inode and become idle
+t_wait_for_no_orphans
 check_ino_index "$ino" "$dseq" "$T_M0"
 check_ino_index "$ino" "$dseq" "$T_M1"

@@ -83,16 +93,20 @@ rm -f "$T_D0/dir"/files-*
 rmdir "$T_D0/dir"

 echo "== open files survive remote scanning orphans"
-echo "contents" > "$T_D0/file"
-ino=$(stat -c "%i" "$T_D0/file")
-dseq=$(scoutfs stat -s data_seq "$T_D0/file")
-exec {FD}<"$T_D0/file"
-rm -f "$T_D0/file"
+echo "contents" > "$T_D0/lastfile"
+ino=$(stat -c "%i" "$T_D0/lastfile")
+dseq=$(scoutfs stat -s data_seq "$T_D0/lastfile")
+exec {FD}<"$T_D0/lastfile"
+rm -f "$T_D0/lastfile"
 t_umount 1
 t_mount 1
 echo "mount 0 contents after mount 1 remounted: $(cat <&$FD)"
 exec {FD}>&-  # close
+t_force_log_merge
+t_wait_for_no_orphans
 check_ino_index "$ino" "$dseq" "$T_M0"
 check_ino_index "$ino" "$dseq" "$T_M1"

+t_restore_all_sysfs_mount_options orphan_scan_delay_ms
+
 t_pass
--- a/tests/tests/quorum-heartbeat-timeout.sh
+++ b/tests/tests/quorum-heartbeat-timeout.sh
@@ -62,7 +62,7 @@ test_timeout()
 	sleep 1

 	# tear down the current server/leader
-	t_force_umount $sv
+	t_force_umount $sv &

 	# see how long it takes for the next leader to start
 	start=$(time_ms)
@@ -73,6 +73,7 @@ test_timeout()
 	echo "to $to delay $delay" >> $T_TMP.delay

 	# restore the mount that we tore down
+	wait
 	t_mount $sv

 	# make sure the new leader delay was reasonable, allowing for some slack
--- a/tests/tests/renameat2-noreplace.sh
+++ b/tests/tests/renameat2-noreplace.sh
@@ -8,19 +8,19 @@ t_require_mounts 2
 echo "=== renameat2 noreplace flag test"

 # give each mount their own dir (lock group) to minimize create contention
-mkdir $T_M0/dir0
-mkdir $T_M1/dir1
+mkdir $T_D0/dir0
+mkdir $T_D1/dir1

 echo "=== run two asynchronous calls to renameat2 NOREPLACE"
 for i in $(seq 0 100); do
        # prepare inputs in isolation
-        touch "$T_M0/dir0/old0"
-        touch "$T_M1/dir1/old1"
+        touch "$T_D0/dir0/old0"
+        touch "$T_D1/dir1/old1"

        # race doing noreplace renames, both can't succeed
-        dumb_renameat2 -n "$T_M0/dir0/old0" "$T_M0/dir0/sharednew" 2> /dev/null &
+        dumb_renameat2 -n "$T_D0/dir0/old0" "$T_D0/dir0/sharednew" 2> /dev/null &
        pid0=$!
-        dumb_renameat2 -n "$T_M1/dir1/old1" "$T_M1/dir0/sharednew" 2> /dev/null &
+        dumb_renameat2 -n "$T_D1/dir1/old1" "$T_D1/dir0/sharednew" 2> /dev/null &
        pid1=$!

        wait $pid0
@@ -31,7 +31,7 @@ for i in $(seq 0 100); do
        test "$rc0" == 0 -a "$rc1" == 0 && t_fail "both renames succeeded"

        # blow away possible files for either race outcome
-        rm -f "$T_M0/dir0/old0" "$T_M1/dir1/old1" "$T_M0/dir0/sharednew" "$T_M1/dir1/sharednew"
+        rm -f "$T_D0/dir0/old0" "$T_D1/dir1/old1" "$T_D0/dir0/sharednew" "$T_D1/dir1/sharednew"
 done

 t_pass
--- a/tests/tests/resize-devices.sh
+++ b/tests/tests/resize-devices.sh
@@ -19,8 +19,8 @@ df_free() {
 }

 same_totals() {
-	cur_meta_tot=$(statfs_total meta "$SCR")
-	cur_data_tot=$(statfs_total data "$SCR")
+	cur_meta_tot=$(statfs_total meta "$T_MSCR")
+	cur_data_tot=$(statfs_total data "$T_MSCR")

 	test "$cur_meta_tot" == "$exp_meta_tot" || \
 		t_fail "cur total_meta_blocks $cur_meta_tot != expected $exp_meta_tot"
@@ -34,10 +34,10 @@ same_totals() {
 # some slop to account for reserved blocks and concurrent allocation.
 #
 devices_grew() {
-	cur_meta_tot=$(statfs_total meta "$SCR")
-	cur_data_tot=$(statfs_total data "$SCR")
-	cur_meta_df=$(df_free MetaData "$SCR")
-	cur_data_df=$(df_free Data "$SCR")
+	cur_meta_tot=$(statfs_total meta "$T_MSCR")
+	cur_data_tot=$(statfs_total data "$T_MSCR")
+	cur_meta_df=$(df_free MetaData "$T_MSCR")
+	cur_data_df=$(df_free Data "$T_MSCR")

 	local grow_meta_tot=$(echo "$exp_meta_tot * 2" | bc)
 	local grow_data_tot=$(echo "$exp_data_tot * 2" | bc)
@@ -70,19 +70,13 @@ size_data=$(blockdev --getsize64 "$T_EX_DATA_DEV")
 quarter_meta=$(echo "$size_meta / 4" | bc)
 quarter_data=$(echo "$size_data / 4" | bc)

-# XXX this is all pretty manual, would be nice to have helpers
 echo "== make initial small fs"
-scoutfs mkfs -A -f -Q 0,127.0.0.1,$T_SCRATCH_PORT -m $quarter_meta -d $quarter_data \
-	"$T_EX_META_DEV" "$T_EX_DATA_DEV" > $T_TMP.mkfs.out 2>&1 || \
-		t_fail "mkfs failed"
-SCR="$T_TMPDIR/mnt.scratch"
-mkdir -p "$SCR"
-mount -t scoutfs -o metadev_path=$T_EX_META_DEV,quorum_slot_nr=0 \
-	"$T_EX_DATA_DEV" "$SCR"
+t_scratch_mkfs -A -m $quarter_meta -d $quarter_data
+t_scratch_mount

 # then calculate sizes based on blocks that mkfs used
-quarter_meta=$(echo "$(statfs_total meta "$SCR") * 64 * 1024" | bc)
-quarter_data=$(echo "$(statfs_total data "$SCR") * 4 * 1024" | bc)
+quarter_meta=$(echo "$(statfs_total meta "$T_MSCR") * 64 * 1024" | bc)
+quarter_data=$(echo "$(statfs_total data "$T_MSCR") * 4 * 1024" | bc)
 whole_meta=$(echo "$quarter_meta * 4" | bc)
 whole_data=$(echo "$quarter_data * 4" | bc)
 outsize_meta=$(echo "$whole_meta * 2" | bc)
@@ -93,59 +87,58 @@ shrink_meta=$(echo "$quarter_meta / 2" | bc)
 shrink_data=$(echo "$quarter_data / 2" | bc)

 # and save expected values for checks
-exp_meta_tot=$(statfs_total meta "$SCR")
-exp_meta_df=$(df_free MetaData "$SCR")
-exp_data_tot=$(statfs_total data "$SCR")
-exp_data_df=$(df_free Data "$SCR")
+exp_meta_tot=$(statfs_total meta "$T_MSCR")
+exp_meta_df=$(df_free MetaData "$T_MSCR")
+exp_data_tot=$(statfs_total data "$T_MSCR")
+exp_data_df=$(df_free Data "$T_MSCR")

 echo "== 0s do nothing"
-scoutfs resize-devices -p "$SCR" 
-scoutfs resize-devices -p "$SCR" -m 0
-scoutfs resize-devices -p "$SCR" -d 0
-scoutfs resize-devices -p "$SCR" -m 0 -d 0
+scoutfs resize-devices -p "$T_MSCR"
+scoutfs resize-devices -p "$T_MSCR" -m 0
+scoutfs resize-devices -p "$T_MSCR" -d 0
+scoutfs resize-devices -p "$T_MSCR" -m 0 -d 0

 echo "== shrinking fails"
-scoutfs resize-devices -p "$SCR" -m $shrink_meta
-scoutfs resize-devices -p "$SCR" -d $shrink_data
-scoutfs resize-devices -p "$SCR" -m $shrink_meta -d $shrink_data
+scoutfs resize-devices -p "$T_MSCR" -m $shrink_meta
+scoutfs resize-devices -p "$T_MSCR" -d $shrink_data
+scoutfs resize-devices -p "$T_MSCR" -m $shrink_meta -d $shrink_data
 same_totals

 echo "== existing sizes do nothing"
-scoutfs resize-devices -p "$SCR" -m $quarter_meta
-scoutfs resize-devices -p "$SCR" -d $quarter_data
-scoutfs resize-devices -p "$SCR" -m $quarter_meta -d $quarter_data
+scoutfs resize-devices -p "$T_MSCR" -m $quarter_meta
+scoutfs resize-devices -p "$T_MSCR" -d $quarter_data
+scoutfs resize-devices -p "$T_MSCR" -m $quarter_meta -d $quarter_data
 same_totals

 echo "== growing outside device fails"
-scoutfs resize-devices -p "$SCR" -m $outsize_meta
-scoutfs resize-devices -p "$SCR" -d $outsize_data
-scoutfs resize-devices -p "$SCR" -m $outsize_meta -d $outsize_data
+scoutfs resize-devices -p "$T_MSCR" -m $outsize_meta
+scoutfs resize-devices -p "$T_MSCR" -d $outsize_data
+scoutfs resize-devices -p "$T_MSCR" -m $outsize_meta -d $outsize_data
 same_totals

 echo "== resizing meta works"
-scoutfs resize-devices -p "$SCR" -m $half_meta
+scoutfs resize-devices -p "$T_MSCR" -m $half_meta
 devices_grew meta

 echo "== resizing data works"
-scoutfs resize-devices -p "$SCR" -d $half_data
+scoutfs resize-devices -p "$T_MSCR" -d $half_data
 devices_grew data

 echo "== shrinking back fails"
-scoutfs resize-devices -p "$SCR" -m $quarter_meta
-scoutfs resize-devices -p "$SCR" -m $quarter_data
+scoutfs resize-devices -p "$T_MSCR" -m $quarter_meta
+scoutfs resize-devices -p "$T_MSCR" -m $quarter_data
 same_totals

 echo "== resizing again does nothing"
-scoutfs resize-devices -p "$SCR" -m $half_meta
-scoutfs resize-devices -p "$SCR" -m $half_data
+scoutfs resize-devices -p "$T_MSCR" -m $half_meta
+scoutfs resize-devices -p "$T_MSCR" -m $half_data
 same_totals

 echo "== resizing to full works"
-scoutfs resize-devices -p "$SCR" -m $whole_meta -d $whole_data
+scoutfs resize-devices -p "$T_MSCR" -m $whole_meta -d $whole_data
 devices_grew meta data

 echo "== cleanup extra fs"
-umount "$SCR"
-rmdir "$SCR"
+t_scratch_umount

 t_pass
--- a/utils/fenced/scoutfs-fenced
+++ b/utils/fenced/scoutfs-fenced
@@ -7,7 +7,7 @@ message_output()

 error_message()
 {
-	message_output "$@" >> /dev/stderr
+	message_output "$@" >&2
 }

 error_exit()
--- a/utils/man/scoutfs.5
+++ b/utils/man/scoutfs.5
@@ -63,6 +63,22 @@ mounts because there are more locks that cover the same number of
 created files.  This can be helpful when working with smaller numbers of
 large files.
 .TP
+.B lock_idle_count=<number>
+This option sets the number of locks that the client will allow to
+remain idle after being granted.  If the number of locks exceeds this
+count then the client will try to free the oldest locks.  This setting
+is per-mount and only changes the behavior of that mount.
+.sp
+Idle locks are not reclaimed by memory pressure so this option
+determines the limit of how much memory is likely to be pinned by
+allocated idle locks.  Setting this too low can increase latency of
+operations as repeated use of a working set of locks has to request the
+locks from the network rather than using granted idle locks.
+.sp
+The count is not strictly enforced.  Operations are allowed to use locks
+while over the limit to avoid deadlocks under heavy concurrent load.
+Exceeding the count only attempts freeing of idle locks.
+.TP
 .B log_merge_wait_timeout_ms=<number>
 This option sets the amount of time, in milliseconds, that log merge
 creation can wait before timing out.  This setting is per-mount, only
--- a/utils/sparse.sh
+++ b/utils/sparse.sh
@@ -71,7 +71,7 @@ else
 	m64=""
 fi

-sparse $m64 $include $search/include "$@" 2>&1 | egrep -v "($RE)" | tee .sparse.output
+sparse $m64 $include $search/include "$@" 2>&1 | grep -v -E "($RE)" | tee .sparse.output

 rm -f $defines