v1.17 Release

Finish the release notes for the 1.17 release. Signed-off-by: Zach Brown <zab@versity.com>
Merge pull request #134 from versity/auke/tests-add-bc
2026-01-28 06:12:03 +00:00 · 2023-10-23 14:20:13 -07:00 · 2023-10-16 15:12:22 -07:00 · 2023-10-16 14:54:56 -07:00 · 2023-10-12 11:58:50 -07:00 · 2023-10-12 12:27:41 -04:00
59 changed files with 2594 additions and 471 deletions
--- a/ReleaseNotes.md
+++ b/ReleaseNotes.md
@@ -1,6 +1,55 @@
 Versity ScoutFS Release Notes
 =============================

+---
+v1.17
+\
+*Oct 23, 2023*
+
+Add support for EL8 generation kernels.
+
+---
+v1.16
+\
+*Oct 4, 2023*
+
+Fix an issue where the server could hang on startup if its persistent
+allocator structures were left in a specific degraded state by the
+previously active server.
+
+---
+v1.15
+\
+*Jul 17, 2023*
+
+Process log btree merge splicing in multiple commits.  This prevents a
+rare case where pending log merge completions contain more work than can
+be done in a single server commit, causing the server to trigger an
+assert shortly after starting.
+
+Fix spurious EINVAL from data writes when data\_prealloc\_contig\_only was
+set to 0.
+
+---
+v1.14
+\
+*Jun 29, 2023*
+
+Add get\_referring\_entries ioctl for getting directory entries that
+refer to an inode.
+
+Fix excessive CPU use in the move\_blocks interface when moving a large
+number of extents.
+
+Reduce fragmented data allocation when contig\_only prealloc is not in
+use by more consistently allocating multi-block extents within each
+aligned prealloc region.
+
+Avoid rare deadlock in metadata block cache recalim under both heavy
+load and memory pressure.
+
+Fix crash when using quorum\_heartbeat\_timeout\_ms mount option.
+
 ---
 v1.13
 \
--- a/kmod/Makefile
+++ b/kmod/Makefile
@@ -31,12 +31,12 @@ TARFILE = scoutfs-kmod-$(RPM_VERSION).tar
 all: module

 module:
-	make $(SCOUTFS_ARGS)
-	$(SP) make C=2 CF="-D__CHECK_ENDIAN__" $(SCOUTFS_ARGS)
+	$(MAKE) $(SCOUTFS_ARGS)
+	$(SP) $(MAKE) C=2 CF="-D__CHECK_ENDIAN__" $(SCOUTFS_ARGS)


 modules_install:
-	make $(SCOUTFS_ARGS) modules_install
+	$(MAKE) $(SCOUTFS_ARGS) modules_install


 %.spec: %.spec.in .FORCE
@@ -50,4 +50,4 @@ dist: scoutfs-kmod.spec
 	@ tar rf $(TARFILE) --transform="s@\(.*\)@scoutfs-kmod-$(RPM_VERSION)/\1@" scoutfs-kmod.spec

 clean:
-	make $(SCOUTFS_ARGS) clean
+	$(MAKE) $(SCOUTFS_ARGS) clean
--- a/kmod/scoutfs-kmod.spec.in
+++ b/kmod/scoutfs-kmod.spec.in
@@ -3,16 +3,28 @@
 %define kmod_git_hash @@GITHASH@@
 %define pkg_date %(date +%%Y%%m%%d)

+# Disable the building of the debug package(s).
+%define debug_package %{nil}
+
 # take kernel version or default to uname -r
 %{!?kversion: %global kversion %(uname -r)}
 %global kernel_version %{kversion}

+%if 0%{?el7}
 %global kernel_source() /usr/src/kernels/%{kernel_version}.$(arch)
-%global kernel_release() %{kversion}
+%endif
+%if 0%{?el8}
+%global kernel_source() /usr/src/kernels/%{kernel_version}
+%endif

 %{!?_release: %global _release 0.%{pkg_date}git%{kmod_git_hash}}

+%if 0%{?el7}
 Name:           %{kmod_name}
+%endif
+%if 0%{?el8}
+Name:           kmod-%{kmod_name}
+%endif
 Summary:        %{kmod_name} kernel module
 Version:        %{kmod_version}
 Release:        %{_release}%{?dist}
@@ -20,24 +32,30 @@ License:        GPLv2
 Group:          System/Kernel
 URL:            http://scoutfs.org/

+%if 0%{?el7}
 BuildRequires:  %{kernel_module_package_buildreqs}
-BuildRequires:  git
+%endif
+%if 0%{?el8}
+BuildRequires:  elfutils-libelf-devel
+%endif
 BuildRequires:  kernel-devel-uname-r = %{kernel_version}
+BuildRequires:  git
 BuildRequires:  module-init-tools

 ExclusiveArch:  x86_64

 Source:		%{kmod_name}-kmod-%{kmod_version}.tar

+%if 0%{?el7}
 # Build only for standard kernel variant(s); for debug packages, append "debug"
 # after "default" (separated by space)
 %kernel_module_package default
+%endif

-# Disable the building of the debug package(s).
-%define debug_package %{nil}
-
-%global install_mod_dir extra/%{name}
-
+%global install_mod_dir extra/%{kmod_name}
+%if 0%{?el8}
+%global flavors_to_build x86_64
+%endif

 %description
 %{kmod_name} - kernel module
@@ -66,7 +84,7 @@ export INSTALL_MOD_DIR=%{install_mod_dir}
 mkdir -p %{install_mod_dir}
 for flavor in %{flavors_to_build}; do
 	export KSRC=%{kernel_source $flavor}
-	export KVERSION=%{kernel_release $KSRC}
+	export KVERSION=%{kversion}
 	install -d $INSTALL_MOD_PATH/lib/modules/$KVERSION/%{install_mod_dir}
 	cp $PWD/obj/$flavor/src/scoutfs.ko $INSTALL_MOD_PATH/lib/modules/$KVERSION/%{install_mod_dir}/
 done
@@ -74,6 +92,14 @@ done
 # mark modules executable so that strip-to-file can strip them
 find %{buildroot} -type f -name \*.ko -exec %{__chmod} u+x \{\} \;

+%if 0%{?el8}
+%files
+/lib/modules
+
+%post
+weak-modules --add-kernel --no-initramfs
+depmod -a
+%endif

 %clean
 rm -rf %{buildroot}
--- a/kmod/src/Makefile
+++ b/kmod/src/Makefile
@@ -25,6 +25,7 @@ scoutfs-y +=			\
 	inode.o			\
 	ioctl.o			\
 	item.o			\
+	kernelcompat.o		\
 	lock.o			\
 	lock_server.o		\
 	msg.o			\
--- a/kmod/src/Makefile.kernelcompat
+++ b/kmod/src/Makefile.kernelcompat
@@ -26,6 +26,16 @@ ifneq (,$(shell grep 'dir_emit_dots' include/linux/fs.h))
 ccflags-y += -DKC_DIR_EMIT_DOTS
 endif

+#
+# v3.18-rc2-19-gb5ae6b15bd73
+# 
+# Folds d_materialise_unique into d_splice_alias. Note reversal
+# of arguments (Also note Documentation/filesystems/porting.rst)
+#
+ifneq (,$(shell grep 'd_materialise_unique' include/linux/dcache.h))
+ccflags-y += -DKC_D_MATERIALISE_UNIQUE=1
+endif
+
 #
 # RHEL extended the fop struct so to use it we have to set
 # a flag to indicate that the struct is large enough and
@@ -40,6 +50,211 @@ endif
 #
 # Added user_ns argument to posix_acl_valid
 #
-ifneq (,$(shell grep 'posix_acl_valid.*user_ns,' include/linux/posix_acl.h))
+ifneq (,$(shell grep 'posix_acl_valid.*user_namespace' include/linux/posix_acl.h))
 ccflags-y += -DKC_POSIX_ACL_VALID_USER_NS
 endif
+
+#
+# v5.3-12296-g6d2052d188d9
+#
+# The RBCOMPUTE function is now passed an extra flag, and should return a bool
+# to indicate whether the propagated callback should stop or not.
+#
+ifneq (,$(shell grep 'static inline bool RBNAME.*_compute_max' include/linux/rbtree_augmented.h))
+ccflags-y += -DKC_RB_TREE_AUGMENTED_COMPUTE_MAX
+endif
+
+#
+# v3.13-25-g37bc15392a23
+#
+# Renames posix_acl_create to __posix_acl_create and provide some
+# new interfaces for creating ACLs
+#
+ifneq (,$(shell grep '__posix_acl_create' include/linux/posix_acl.h))
+ccflags-y += -DKC___POSIX_ACL_CREATE
+endif
+
+#
+# v4.8-rc1-29-g31051c85b5e2
+#
+# inode_change_ok() removed - replace with setattr_prepare()
+#
+ifneq (,$(shell grep 'extern int setattr_prepare' include/linux/fs.h))
+ccflags-y += -DKC_SETATTR_PREPARE
+endif
+
+#
+# v4.15-rc3-4-gae5e165d855d
+#
+# linux/iversion.h needs to manually be included for code that
+# manipulates this field.
+#
+ifneq (,$(shell grep -s 'define _LINUX_IVERSION_H' include/linux/iversion.h))
+ccflags-y += -DKC_NEED_LINUX_IVERSION_H=1
+endif
+
+# v4.11-12447-g104b4e5139fe
+#
+# Renamed __percpu_counter_add to percpu_counter_add_batch to clarify
+# that the __ wasn't less safe, just took an extra parameter.
+#
+ifneq (,$(shell grep 'percpu_counter_add_batch' include/linux/percpu_counter.h))
+ccflags-y += -DKC_PERCPU_COUNTER_ADD_BATCH
+endif
+
+#
+# v4.11-4550-g7dea19f9ee63
+#
+# Introduced memalloc_nofs_{save,restore} preferred instead of _noio_.
+#
+ifneq (,$(shell grep 'memalloc_nofs_save' include/linux/sched/mm.h))
+ccflags-y += -DKC_MEMALLOC_NOFS_SAVE
+endif
+
+#
+# v4.7-12414-g1eff9d322a44
+#
+# Renamed bi_rw to bi_opf to force old code to catch up.  We use it as a
+# single switch between old and new bio structures.
+#
+ifneq (,$(shell grep 'bi_opf' include/linux/blk_types.h))
+ccflags-y += -DKC_BIO_BI_OPF
+endif
+
+#
+# v4.12-rc2-201-g4e4cbee93d56
+#
+# Moves to bi_status BLK_STS_ API instead of having a mix of error
+# end_io args or bi_error.
+#
+ifneq (,$(shell grep 'bi_status' include/linux/blk_types.h))
+ccflags-y += -DKC_BIO_BI_STATUS
+endif
+
+#
+# v3.11-8765-ga0b02131c5fc
+#
+# Remove the old ->shrink() API, ->{scan,count}_objects is preferred.
+#
+ifneq (,$(shell grep '(*shrink)' include/linux/shrinker.h))
+ccflags-y += -DKC_SHRINKER_SHRINK
+endif
+
+#
+# v3.19-4777-g6bec00352861
+#
+# backing_dev_info is removed from address_space. Instead we need to use
+# inode_to_bdi() inline from <backing-dev.h>.
+#
+ifneq (,$(shell grep 'struct backing_dev_info.*backing_dev_info' include/linux/fs.h))
+ccflags-y += -DKC_LINUX_BACKING_DEV_INFO=1
+endif
+
+#
+# v4.3-9290-ge409de992e3e
+#
+# xattr handlers are now passed a struct that contains `flags`
+#
+ifneq (,$(shell grep 'int...get..const struct xattr_handler.*struct dentry.*dentry,' include/linux/xattr.h))
+ccflags-y += -DKC_XATTR_STRUCT_XATTR_HANDLER=1
+endif
+
+#
+# v4.16-rc1-1-g9b2c45d479d0
+#
+# kernel_getsockname() and kernel_getpeername dropped addrlen arg
+#
+ifneq (,$(shell grep 'kernel_getsockname.*,$$' include/linux/net.h))
+ccflags-y += -DKC_KERNEL_GETSOCKNAME_ADDRLEN=1
+endif
+
+#
+# v4.1-rc1-410-geeb1bd5c40ed
+#
+# Adds a struct net parameter to sock_create_kern
+#
+ifneq (,$(shell grep 'sock_create_kern.*struct net' include/linux/net.h))
+ccflags-y += -DKC_SOCK_CREATE_KERN_NET=1
+endif
+
+#
+# v3.18-rc6-1619-gc0371da6047a
+#
+# iov_iter is now part of struct msghdr
+#
+ifneq (,$(shell grep 'struct iov_iter.*msg_iter' include/linux/socket.h))
+ccflags-y += -DKC_MSGHDR_STRUCT_IOV_ITER=1
+endif
+
+#
+# v4.17-rc6-7-g95582b008388
+#
+# Kernel has current_time(inode) to uniformly retreive timespec in the right unit
+#
+ifneq (,$(shell grep 'extern struct timespec64 current_time' include/linux/fs.h))
+ccflags-y += -DKC_CURRENT_TIME_INODE=1
+endif
+
+#
+# v4.9-12228-g530e9b76ae8f
+#
+# register_cpu_notifier and family were all removed and to be
+# replaced with cpuhp_* API calls.
+#
+ifneq (,$(shell grep 'define register_hotcpu_notifier' include/linux/cpu.h))
+ccflags-y += -DKC_CPU_NOTIFIER
+endif
+
+#
+# v3.14-rc8-130-gccad2365668f
+#
+# generic_file_buffered_write is removed, backport it
+#
+ifneq (,$(shell grep 'extern ssize_t generic_file_buffered_write' include/linux/fs.h))
+ccflags-y += -DKC_GENERIC_FILE_BUFFERED_WRITE=1
+endif
+
+#
+# v5.7-438-g8151b4c8bee4
+#
+# struct address_space_operations switches away from .readpages to .readahead
+#
+# RHEL has backported this feature all the way to RHEL8, as part of RHEL_KABI,
+# which means we need to detect this very precisely
+#
+ifneq (,$(shell grep 'readahead.*struct readahead_control' include/linux/fs.h))
+ccflags-y += -DKC_FILE_AOPS_READAHEAD
+endif
+
+#
+# v4.0-rc7-1743-g8436318205b9
+#
+# .aio_read and .aio_write no longer exist. All reads and writes now use the
+# .read_iter and .write_iter methods, or must implement .read and .write (which
+# we don't).
+#
+ifneq (,$(shell grep 'ssize_t.*aio_read' include/linux/fs.h))
+ccflags-y += -DKC_LINUX_HAVE_FOP_AIO_READ=1
+endif
+
+#
+# rhel7 has a custom inode_operations_wrapper struct that is discarded
+# entirely in favor of upstream structure since rhel8.
+#
+ifneq (,$(shell grep 'void.*follow_link.*struct dentry' include/linux/fs.h))
+ccflags-y += -DKC_LINUX_HAVE_RHEL_IOPS_WRAPPER=1
+endif
+
+ifneq (,$(shell grep 'size_t.*ki_left;' include/linux/aio.h))
+ccflags-y += -DKC_LINUX_AIO_KI_LEFT=1
+endif
+
+#
+# v4.4-rc4-4-g98e9cb5711c6
+#
+# Introduces a new xattr_handler .name member that can be used to match the
+# entire field, instead of just a prefix. For these kernels, we must use
+# the new .name field instead.
+ifneq (,$(shell grep 'static inline const char .xattr_prefix' include/linux/xattr.h))
+ccflags-y += -DKC_XATTR_HANDLER_NAME=1
+endif
--- a/kmod/src/acl.c
+++ b/kmod/src/acl.c
@@ -69,12 +69,14 @@ struct posix_acl *scoutfs_get_acl_locked(struct inode *inode, int type, struct s
 	char *name;
 	int ret;

+#ifndef KC___POSIX_ACL_CREATE
 	if (!IS_POSIXACL(inode))
 		return NULL;

 	acl = get_cached_acl(inode, type);
 	if (acl != ACL_NOT_CACHED)
 		return acl;
+#endif

 	ret = acl_xattr_name_len(type, &name, NULL);
 	if (ret < 0)
@@ -96,9 +98,11 @@ struct posix_acl *scoutfs_get_acl_locked(struct inode *inode, int type, struct s
 		acl = ERR_PTR(ret);
 	}

+#ifndef KC___POSIX_ACL_CREATE
 	/* can set null negative cache */
 	if (!IS_ERR(acl))
 		set_cached_acl(inode, type, acl);
+#endif

 	kfree(value);

@@ -112,8 +116,10 @@ struct posix_acl *scoutfs_get_acl(struct inode *inode, int type)
 	struct posix_acl *acl;
 	int ret;

+#ifndef KC___POSIX_ACL_CREATE
 	if (!IS_POSIXACL(inode))
 		return NULL;
+#endif

 	ret = scoutfs_lock_inode(sb, SCOUTFS_LOCK_READ, 0, inode, &lock);
 	if (ret < 0) {
@@ -183,13 +189,15 @@ int scoutfs_set_acl_locked(struct inode *inode, struct posix_acl *acl, int type,
 		if (!value) {
 			/* can be setting an acl that only affects mode, didn't need xattr */
 			inode_inc_iversion(inode);
-			inode->i_ctime = CURRENT_TIME;
+			inode->i_ctime = current_time(inode);
 		}
 	}

 out:
+#ifndef KC___POSIX_ACL_CREATE
 	if (!ret)
 		set_cached_acl(inode, type, acl);
+#endif

 	kfree(value);

@@ -218,10 +226,17 @@ int scoutfs_set_acl(struct inode *inode, struct posix_acl *acl, int type)
 	scoutfs_unlock(sb, lock, SCOUTFS_LOCK_WRITE);
 	return ret;
 }
-
+#ifdef KC_XATTR_STRUCT_XATTR_HANDLER
+int scoutfs_acl_get_xattr(const struct xattr_handler *handler, struct dentry *dentry,
+			  struct inode *inode, const char *name, void *value,
+			  size_t size)
+{
+	int type = handler->flags;
+#else
 int scoutfs_acl_get_xattr(struct dentry *dentry, const char *name, void *value, size_t size,
 			  int type)
 {
+#endif
 	struct posix_acl *acl;
 	int ret = 0;

@@ -240,9 +255,17 @@ int scoutfs_acl_get_xattr(struct dentry *dentry, const char *name, void *value,
 	return ret;
 }

+#ifdef KC_XATTR_STRUCT_XATTR_HANDLER
+int scoutfs_acl_set_xattr(const struct xattr_handler *handler, struct dentry *dentry,
+			  struct inode *inode, const char *name, const void *value,
+			  size_t size, int flags)
+{
+	int type = handler->flags;
+#else
 int scoutfs_acl_set_xattr(struct dentry *dentry, const char *name, const void *value, size_t size,
 			  int flags, int type)
 {
+#endif
 	struct posix_acl *acl = NULL;
 	int ret;

@@ -301,7 +324,7 @@ int scoutfs_init_acl_locked(struct inode *inode, struct inode *dir,
 			if (ret)
 				goto out;
 		}
-		ret = posix_acl_create(&acl, GFP_NOFS, &inode->i_mode);
+		ret = __posix_acl_create(&acl, GFP_NOFS, &inode->i_mode);
 		if (ret < 0)
 			return ret;
 		if (ret > 0)
@@ -345,7 +368,7 @@ int scoutfs_acl_chmod_locked(struct inode *inode, struct iattr *attr,
 	if (IS_ERR_OR_NULL(acl))
 		return PTR_ERR(acl);

-	ret = posix_acl_chmod(&acl, GFP_KERNEL, attr->ia_mode);
+	ret = __posix_acl_chmod(&acl, GFP_KERNEL, attr->ia_mode);
 	if (ret)
 		return ret;

--- a/kmod/src/acl.h
+++ b/kmod/src/acl.h
@@ -6,10 +6,19 @@ struct posix_acl *scoutfs_get_acl_locked(struct inode *inode, int type, struct s
 int scoutfs_set_acl(struct inode *inode, struct posix_acl *acl, int type);
 int scoutfs_set_acl_locked(struct inode *inode, struct posix_acl *acl, int type,
 			   struct scoutfs_lock *lock, struct list_head *ind_locks);
+#ifdef KC_XATTR_STRUCT_XATTR_HANDLER
+int scoutfs_acl_get_xattr(const struct xattr_handler *, struct dentry *dentry,
+			  struct inode *inode, const char *name, void *value,
+			  size_t size);
+int scoutfs_acl_set_xattr(const struct xattr_handler *, struct dentry *dentry,
+			  struct inode *inode, const char *name, const void *value,
+			  size_t size, int flags);
+#else
 int scoutfs_acl_get_xattr(struct dentry *dentry, const char *name, void *value, size_t size,
 			  int type);
 int scoutfs_acl_set_xattr(struct dentry *dentry, const char *name, const void *value, size_t size,
 			  int flags, int type);
+#endif
 int scoutfs_acl_chmod_locked(struct inode *inode, struct iattr *attr,
 			     struct scoutfs_lock *lock, struct list_head *ind_locks);
 int scoutfs_init_acl_locked(struct inode *inode, struct inode *dir,
--- a/kmod/src/block.c
+++ b/kmod/src/block.c
@@ -21,6 +21,7 @@
 #include <linux/blkdev.h>
 #include <linux/rhashtable.h>
 #include <linux/random.h>
+#include <linux/sched/mm.h>

 #include "format.h"
 #include "super.h"
@@ -30,6 +31,7 @@
 #include "scoutfs_trace.h"
 #include "alloc.h"
 #include "triggers.h"
+#include "util.h"

 /*
 * The scoutfs block cache manages metadata blocks that can be larger
@@ -57,7 +59,7 @@ struct block_info {
 	atomic64_t access_counter;
 	struct rhashtable ht;
 	wait_queue_head_t waitq;
-	struct shrinker shrinker;
+	KC_DEFINE_SHRINKER(shrinker);
 	struct work_struct free_work;
 	struct llist_head free_llist;
 };
@@ -128,7 +130,7 @@ static __le32 block_calc_crc(struct scoutfs_block_header *hdr, u32 size)
 static struct block_private *block_alloc(struct super_block *sb, u64 blkno)
 {
 	struct block_private *bp;
-	unsigned int noio_flags;
+	unsigned int nofs_flags;

 	/*
 	 * If we had multiple blocks per page we'd need to be a little
@@ -156,9 +158,9 @@ static struct block_private *block_alloc(struct super_block *sb, u64 blkno)
 		 * spurious reclaim-on dependencies and warnings.
 		 */
 		lockdep_off();
-		noio_flags = memalloc_noio_save();
+		nofs_flags = memalloc_nofs_save();
 		bp->virt = __vmalloc(SCOUTFS_BLOCK_LG_SIZE, GFP_NOFS | __GFP_HIGHMEM, PAGE_KERNEL);
-		memalloc_noio_restore(noio_flags);
+		memalloc_nofs_restore(nofs_flags);
 		lockdep_on();

 		if (!bp->virt) {
@@ -436,11 +438,10 @@ static void block_remove_all(struct super_block *sb)
 * possible.  Final freeing, verifying checksums, and unlinking errored
 * blocks are all done by future users of the blocks.
 */
-static void block_end_io(struct super_block *sb, int rw,
+static void block_end_io(struct super_block *sb, unsigned int opf,
 			 struct block_private *bp, int err)
 {
 	DECLARE_BLOCK_INFO(sb, binf);
-	bool is_read = !(rw & WRITE);

 	if (err) {
 		scoutfs_inc_counter(sb, block_cache_end_io_error);
@@ -450,7 +451,7 @@ static void block_end_io(struct super_block *sb, int rw,
 	if (!atomic_dec_and_test(&bp->io_count))
 		return;

-	if (is_read && !test_bit(BLOCK_BIT_ERROR, &bp->bits))
+	if (!op_is_write(opf) && !test_bit(BLOCK_BIT_ERROR, &bp->bits))
 		set_bit(BLOCK_BIT_UPTODATE, &bp->bits);

 	clear_bit(BLOCK_BIT_IO_BUSY, &bp->bits);
@@ -463,13 +464,13 @@ static void block_end_io(struct super_block *sb, int rw,
 		wake_up(&binf->waitq);
 }

-static void block_bio_end_io(struct bio *bio, int err)
+static void KC_DECLARE_BIO_END_IO(block_bio_end_io, struct bio *bio)
 {
 	struct block_private *bp = bio->bi_private;
 	struct super_block *sb = bp->sb;

 	TRACE_BLOCK(end_io, bp);
-	block_end_io(sb, bio->bi_rw, bp, err);
+	block_end_io(sb, kc_bio_get_opf(bio), bp, kc_bio_get_errno(bio));
 	bio_put(bio);
 }

@@ -477,7 +478,7 @@ static void block_bio_end_io(struct bio *bio, int err)
 * Kick off IO for a single block.
 */
 static int block_submit_bio(struct super_block *sb, struct block_private *bp,
-			    int rw)
+			    unsigned int opf)
 {
 	struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
 	struct bio *bio = NULL;
@@ -510,8 +511,9 @@ static int block_submit_bio(struct super_block *sb, struct block_private *bp,
 				break;
 			}

-			bio->bi_sector = sector + (off >> 9);
-			bio->bi_bdev = sbi->meta_bdev;
+			kc_bio_set_opf(bio, opf);
+			kc_bio_set_sector(bio, sector + (off >> 9));
+			bio_set_dev(bio, sbi->meta_bdev);
 			bio->bi_end_io = block_bio_end_io;
 			bio->bi_private = bp;

@@ -528,18 +530,18 @@ static int block_submit_bio(struct super_block *sb, struct block_private *bp,
 			BUG();

 		if (!bio_add_page(bio, page, PAGE_SIZE, 0)) {
-			submit_bio(rw, bio);
+			kc_submit_bio(bio);
 			bio = NULL;
 		}
 	}

 	if (bio)
-		submit_bio(rw, bio);
+		kc_submit_bio(bio);

 	blk_finish_plug(&plug);

 	/* let racing end_io know we're done */
-	block_end_io(sb, rw, bp, ret);
+	block_end_io(sb, opf, bp, ret);

 	return ret;
 }
@@ -640,7 +642,7 @@ static struct block_private *block_read(struct super_block *sb, u64 blkno)

 	if (!test_bit(BLOCK_BIT_UPTODATE, &bp->bits) &&
 	     test_and_clear_bit(BLOCK_BIT_NEW, &bp->bits)) {
-		ret = block_submit_bio(sb, bp, READ);
+		ret = block_submit_bio(sb, bp, REQ_OP_READ);
 		if (ret < 0)
 			goto out;
 	}
@@ -969,7 +971,7 @@ int scoutfs_block_writer_write(struct super_block *sb,
 		/* retry previous write errors */
 		clear_bit(BLOCK_BIT_ERROR, &bp->bits);

-		ret = block_submit_bio(sb, bp, WRITE);
+		ret = block_submit_bio(sb, bp, REQ_OP_WRITE);
 		if (ret < 0)
 			break;
 	}
@@ -1069,6 +1071,16 @@ u64 scoutfs_block_writer_dirty_bytes(struct super_block *sb,
 	return wri->nr_dirty_blocks * SCOUTFS_BLOCK_LG_SIZE;
 }

+static unsigned long block_count_objects(struct shrinker *shrink, struct shrink_control *sc)
+{
+	struct block_info *binf = KC_SHRINKER_CONTAINER_OF(shrink, struct block_info);
+	struct super_block *sb = binf->sb;
+
+	scoutfs_inc_counter(sb, block_cache_count_objects);
+
+	return shrinker_min_long(atomic_read(&binf->total_inserted));
+}
+
 /*
 * Remove a number of cached blocks that haven't been used recently.
 *
@@ -1089,25 +1101,19 @@ u64 scoutfs_block_writer_dirty_bytes(struct super_block *sb,
 * atomically remove blocks when the only references are ours and the
 * hash table.
 */
-static int block_shrink(struct shrinker *shrink, struct shrink_control *sc)
+static unsigned long block_scan_objects(struct shrinker *shrink, struct shrink_control *sc)
 {
-	struct block_info *binf = container_of(shrink, struct block_info,
-					       shrinker);
+	struct block_info *binf = KC_SHRINKER_CONTAINER_OF(shrink, struct block_info);
 	struct super_block *sb = binf->sb;
 	struct rhashtable_iter iter;
 	struct block_private *bp;
-	unsigned long nr;
+	bool stop = false;
+	unsigned long freed = 0;
+	unsigned long nr = sc->nr_to_scan;
 	u64 recently;

-	nr = sc->nr_to_scan;
-	if (nr == 0)
-		goto out;
+	scoutfs_inc_counter(sb, block_cache_scan_objects);

-	scoutfs_inc_counter(sb, block_cache_shrink);
-
-	nr = DIV_ROUND_UP(nr, SCOUTFS_BLOCK_LG_PAGES_PER);
-
-restart:
 	recently = accessed_recently(binf);
 	rhashtable_walk_enter(&binf->ht, &iter);
 	rhashtable_walk_start(&iter);
@@ -1129,12 +1135,15 @@ restart:
 		if (bp == NULL)
 			break;
 		if (bp == ERR_PTR(-EAGAIN)) {
-			/* hard exit to wait for rcu rebalance to finish */
-			rhashtable_walk_stop(&iter);
-			rhashtable_walk_exit(&iter);
-			scoutfs_inc_counter(sb, block_cache_shrink_restart);
-			synchronize_rcu();
-			goto restart;
+			/*
+			 * We can be called from reclaim in the allocation
+			 * to resize the hash table itself.  We have to
+			 * return so that the caller can proceed and
+			 * enable hash table iteration again.
+			 */
+			scoutfs_inc_counter(sb, block_cache_shrink_stop);
+			stop = true;
+			break;
 		}

 		scoutfs_inc_counter(sb, block_cache_shrink_next);
@@ -1148,6 +1157,7 @@ restart:
 			if (block_remove_solo(sb, bp)) {
 				scoutfs_inc_counter(sb, block_cache_shrink_remove);
 				TRACE_BLOCK(shrink, bp);
+				freed++;
 				nr--;
 			}
 			block_put(sb, bp);
@@ -1156,9 +1166,11 @@ restart:

 	rhashtable_walk_stop(&iter);
 	rhashtable_walk_exit(&iter);
-out:
-	return min_t(u64, (u64)atomic_read(&binf->total_inserted) * SCOUTFS_BLOCK_LG_PAGES_PER,
-		     INT_MAX);
+
+	if (stop)
+		return SHRINK_STOP;
+	else
+		return freed;
 }

 struct sm_block_completion {
@@ -1166,11 +1178,11 @@ struct sm_block_completion {
 	int err;
 };

-static void sm_block_bio_end_io(struct bio *bio, int err)
+static void KC_DECLARE_BIO_END_IO(sm_block_bio_end_io, struct bio *bio)
 {
 	struct sm_block_completion *sbc = bio->bi_private;

-	sbc->err = err;
+	sbc->err = kc_bio_get_errno(bio);
 	complete(&sbc->comp);
 	bio_put(bio);
 }
@@ -1185,9 +1197,8 @@ static void sm_block_bio_end_io(struct bio *bio, int err)
 * only layer that sees the full block buffer so we pass the calculated
 * crc to the caller for them to check in their context.
 */
-static int sm_block_io(struct super_block *sb, struct block_device *bdev, int rw, u64 blkno,
-		       struct scoutfs_block_header *hdr, size_t len,
-		       __le32 *blk_crc)
+static int sm_block_io(struct super_block *sb, struct block_device *bdev, unsigned int opf,
+		       u64 blkno, struct scoutfs_block_header *hdr, size_t len, __le32 *blk_crc)
 {
 	struct scoutfs_block_header *pg_hdr;
 	struct sm_block_completion sbc;
@@ -1201,7 +1212,7 @@ static int sm_block_io(struct super_block *sb, struct block_device *bdev, int rw
 		return -EIO;

 	if (WARN_ON_ONCE(len > SCOUTFS_BLOCK_SM_SIZE) ||
-	    WARN_ON_ONCE(!(rw & WRITE) && !blk_crc))
+	    WARN_ON_ONCE(!op_is_write(opf) && !blk_crc))
 		return -EINVAL;

 	page = alloc_page(GFP_NOFS);
@@ -1210,7 +1221,7 @@ static int sm_block_io(struct super_block *sb, struct block_device *bdev, int rw

 	pg_hdr = page_address(page);

-	if (rw & WRITE) {
+	if (op_is_write(opf)) {
 		memcpy(pg_hdr, hdr, len);
 		if (len < SCOUTFS_BLOCK_SM_SIZE)
 			memset((char *)pg_hdr + len, 0,
@@ -1224,8 +1235,9 @@ static int sm_block_io(struct super_block *sb, struct block_device *bdev, int rw
 		goto out;
 	}

-	bio->bi_sector = blkno << (SCOUTFS_BLOCK_SM_SHIFT - 9);
-	bio->bi_bdev = bdev;
+	kc_bio_set_opf(bio, opf | REQ_SYNC);
+	kc_bio_set_sector(bio, blkno << (SCOUTFS_BLOCK_SM_SHIFT - 9));
+	bio_set_dev(bio, bdev);
 	bio->bi_end_io = sm_block_bio_end_io;
 	bio->bi_private = &sbc;
 	bio_add_page(bio, page, SCOUTFS_BLOCK_SM_SIZE, 0);
@@ -1233,12 +1245,12 @@ static int sm_block_io(struct super_block *sb, struct block_device *bdev, int rw
 	init_completion(&sbc.comp);
 	sbc.err = 0;

-	submit_bio((rw & WRITE) ? WRITE_SYNC : READ_SYNC, bio);
+	kc_submit_bio(bio);

 	wait_for_completion(&sbc.comp);
 	ret = sbc.err;

-	if (ret == 0 && !(rw & WRITE)) {
+	if (ret == 0 && !op_is_write(opf)) {
 		memcpy(hdr, pg_hdr, len);
 		*blk_crc = block_calc_crc(pg_hdr, SCOUTFS_BLOCK_SM_SIZE);
 	}
@@ -1252,14 +1264,14 @@ int scoutfs_block_read_sm(struct super_block *sb,
 			  struct scoutfs_block_header *hdr, size_t len,
 			  __le32 *blk_crc)
 {
-	return sm_block_io(sb, bdev, READ, blkno, hdr, len, blk_crc);
+	return sm_block_io(sb, bdev, REQ_OP_READ, blkno, hdr, len, blk_crc);
 }

 int scoutfs_block_write_sm(struct super_block *sb,
 			   struct block_device *bdev, u64 blkno,
 			   struct scoutfs_block_header *hdr, size_t len)
 {
-	return sm_block_io(sb, bdev, WRITE, blkno, hdr, len, NULL);
+	return sm_block_io(sb, bdev, REQ_OP_WRITE, blkno, hdr, len, NULL);
 }

 int scoutfs_block_setup(struct super_block *sb)
@@ -1284,9 +1296,9 @@ int scoutfs_block_setup(struct super_block *sb)
 	atomic_set(&binf->total_inserted, 0);
 	atomic64_set(&binf->access_counter, 0);
 	init_waitqueue_head(&binf->waitq);
-	binf->shrinker.shrink = block_shrink;
-	binf->shrinker.seeks = DEFAULT_SEEKS;
-	register_shrinker(&binf->shrinker);
+	KC_INIT_SHRINKER_FUNCS(&binf->shrinker, block_count_objects,
+			       block_scan_objects);
+	KC_REGISTER_SHRINKER(&binf->shrinker);
 	INIT_WORK(&binf->free_work, block_free_work);
 	init_llist_head(&binf->free_llist);

@@ -1306,7 +1318,7 @@ void scoutfs_block_destroy(struct super_block *sb)
 	struct block_info *binf = SCOUTFS_SB(sb)->block_info;

 	if (binf) {
-		unregister_shrinker(&binf->shrinker);
+		KC_UNREGISTER_SHRINKER(&binf->shrinker);
 		block_remove_all(sb);
 		flush_work(&binf->free_work);
 		rhashtable_destroy(&binf->ht);
--- a/kmod/src/counters.h
+++ b/kmod/src/counters.h
@@ -30,11 +30,13 @@
 	EXPAND_COUNTER(block_cache_free)			\
 	EXPAND_COUNTER(block_cache_free_work)			\
 	EXPAND_COUNTER(block_cache_remove_stale)		\
+	EXPAND_COUNTER(block_cache_count_objects)		\
+	EXPAND_COUNTER(block_cache_scan_objects)		\
 	EXPAND_COUNTER(block_cache_shrink)			\
 	EXPAND_COUNTER(block_cache_shrink_next)			\
 	EXPAND_COUNTER(block_cache_shrink_recent)		\
 	EXPAND_COUNTER(block_cache_shrink_remove)		\
-	EXPAND_COUNTER(block_cache_shrink_restart)		\
+	EXPAND_COUNTER(block_cache_shrink_stop)			\
 	EXPAND_COUNTER(btree_compact_values)			\
 	EXPAND_COUNTER(btree_compact_values_enomem)		\
 	EXPAND_COUNTER(btree_delete)				\
@@ -88,6 +90,8 @@
 	EXPAND_COUNTER(forest_read_items)			\
 	EXPAND_COUNTER(forest_roots_next_hint)			\
 	EXPAND_COUNTER(forest_set_bloom_bits)			\
+	EXPAND_COUNTER(item_cache_count_objects)		\
+	EXPAND_COUNTER(item_cache_scan_objects)			\
 	EXPAND_COUNTER(item_clear_dirty)			\
 	EXPAND_COUNTER(item_create)				\
 	EXPAND_COUNTER(item_delete)				\
@@ -121,6 +125,7 @@
 	EXPAND_COUNTER(item_update)				\
 	EXPAND_COUNTER(item_write_dirty)			\
 	EXPAND_COUNTER(lock_alloc)				\
+	EXPAND_COUNTER(lock_count_objects)			\
 	EXPAND_COUNTER(lock_free)				\
 	EXPAND_COUNTER(lock_grant_request)			\
 	EXPAND_COUNTER(lock_grant_response)			\
@@ -134,6 +139,7 @@
 	EXPAND_COUNTER(lock_lock_error)				\
 	EXPAND_COUNTER(lock_nonblock_eagain)			\
 	EXPAND_COUNTER(lock_recover_request)			\
+	EXPAND_COUNTER(lock_scan_objects)			\
 	EXPAND_COUNTER(lock_shrink_attempted)			\
 	EXPAND_COUNTER(lock_shrink_aborted)			\
 	EXPAND_COUNTER(lock_shrink_work)			\
@@ -232,12 +238,12 @@ struct scoutfs_counters {
 #define SCOUTFS_PCPU_COUNTER_BATCH (1 << 30)

 #define scoutfs_inc_counter(sb, which)					\
-	__percpu_counter_add(&SCOUTFS_SB(sb)->counters->which, 1,	\
-			     SCOUTFS_PCPU_COUNTER_BATCH)
+	percpu_counter_add_batch(&SCOUTFS_SB(sb)->counters->which, 1,	\
+				 SCOUTFS_PCPU_COUNTER_BATCH)

 #define scoutfs_add_counter(sb, which, cnt)				\
-	__percpu_counter_add(&SCOUTFS_SB(sb)->counters->which, cnt,	\
-			     SCOUTFS_PCPU_COUNTER_BATCH)
+	percpu_counter_add_batch(&SCOUTFS_SB(sb)->counters->which, cnt,	\
+				 SCOUTFS_PCPU_COUNTER_BATCH)

 void __init scoutfs_init_counters(void);
 int scoutfs_setup_counters(struct super_block *sb);
--- a/kmod/src/data.c
+++ b/kmod/src/data.c
@@ -307,7 +307,7 @@ int scoutfs_data_truncate_items(struct super_block *sb, struct inode *inode,
 	LIST_HEAD(ind_locks);
 	s64 ret = 0;

-	WARN_ON_ONCE(inode && !mutex_is_locked(&inode->i_mutex));
+	WARN_ON_ONCE(inode && !inode_is_locked(inode));

 	/* clamp last to the last possible block? */
 	if (last > SCOUTFS_BLOCK_SM_MAX)
@@ -456,11 +456,11 @@ static int alloc_block(struct super_block *sb, struct inode *inode,

 	} else {
 		/*
-		 * Preallocation of aligned regions only preallocates if
-		 * the aligned region contains no extents at all.  This
-		 * could be fooled by offline sparse extents but we
-		 * don't want to iterate over all offline extents in the
-		 * aligned region.
+		 * Preallocation within aligned regions tries to
+		 * allocate an extent to fill the hole in the region
+		 * that contains iblock.  We'd have to add a bit of plumbing
+		 * to find previous extents so we only search for a next
+		 * extent from the front of the region and from iblock.
 		 */
 		div64_u64_rem(iblock, opts.data_prealloc_blocks, &rem);
 		start = iblock - rem;
@@ -468,8 +468,20 @@ static int alloc_block(struct super_block *sb, struct inode *inode,
 		ret = scoutfs_ext_next(sb, &data_ext_ops, &args, start, 1, &found);
 		if (ret < 0 && ret != -ENOENT)
 			goto out;
-		if (found.len && found.start < start + count)
-			count = 1;
+
+		/* trim count if there's an extent in the region before iblock */
+		if (found.len && found.start < iblock) {
+			count -= iblock - start;
+			start = iblock;
+			/* see if there's also an extent after iblock */
+			ret = scoutfs_ext_next(sb, &data_ext_ops, &args, iblock, 1, &found);
+			if (ret < 0 && ret != -ENOENT)
+				goto out;
+		}
+
+		/* trim count by next extent after iblock */
+		if (found.len && found.start > start && found.start < start + count)
+			count = (found.start - start);
 	}

 	/* overall prealloc limit */
@@ -546,7 +558,7 @@ static int scoutfs_get_block(struct inode *inode, sector_t iblock,
 	u64 offset;
 	int ret;

-	WARN_ON_ONCE(create && !mutex_is_locked(&inode->i_mutex));
+	WARN_ON_ONCE(create && !inode_is_locked(inode));

 	/* make sure caller holds a cluster lock */
 	lock = scoutfs_per_task_get(&si->pt_data_lock);
@@ -692,7 +704,7 @@ static int scoutfs_readpage(struct file *file, struct page *page)

 	if (scoutfs_per_task_add_excl(&si->pt_data_lock, &pt_ent, inode_lock)) {
 		ret = scoutfs_data_wait_check(inode, page_offset(page),
-					      PAGE_CACHE_SIZE, SEF_OFFLINE,
+					      PAGE_SIZE, SEF_OFFLINE,
 					      SCOUTFS_IOC_DWO_READ, &dw,
 					      inode_lock);
 		if (ret != 0) {
@@ -717,6 +729,7 @@ static int scoutfs_readpage(struct file *file, struct page *page)
 	return ret;
 }

+#ifndef KC_FILE_AOPS_READAHEAD
 /*
 * This is used for opportunistic read-ahead which can throw the pages
 * away if it needs to.  If the caller didn't deal with offline extents
@@ -742,14 +755,14 @@ static int scoutfs_readpages(struct file *file, struct address_space *mapping,

 	list_for_each_entry_safe(page, tmp, pages, lru) {
 		ret = scoutfs_data_wait_check(inode, page_offset(page),
-					      PAGE_CACHE_SIZE, SEF_OFFLINE,
+					      PAGE_SIZE, SEF_OFFLINE,
 					      SCOUTFS_IOC_DWO_READ, NULL,
 					      inode_lock);
 		if (ret < 0)
 			goto out;
 		if (ret > 0) {
 			list_del(&page->lru);
-			page_cache_release(page);
+			put_page(page);
 			if (--nr_pages == 0) {
 				ret = 0;
 				goto out;
@@ -763,6 +776,29 @@ out:
 	BUG_ON(!list_empty(pages));
 	return ret;
 }
+#else
+static void scoutfs_readahead(struct readahead_control *rac)
+{
+	struct inode *inode = rac->file->f_inode;
+	struct super_block *sb = inode->i_sb;
+	struct scoutfs_lock *inode_lock = NULL;
+	int ret;
+
+	ret = scoutfs_lock_inode(sb, SCOUTFS_LOCK_READ,
+				 SCOUTFS_LKF_REFRESH_INODE, inode, &inode_lock);
+	if (ret)
+		return;
+
+	ret = scoutfs_data_wait_check(inode, readahead_pos(rac),
+				      readahead_length(rac), SEF_OFFLINE,
+				      SCOUTFS_IOC_DWO_READ, NULL,
+				      inode_lock);
+	if (ret == 0)
+		mpage_readahead(rac, scoutfs_get_block_read);
+
+	scoutfs_unlock(sb, inode_lock, SCOUTFS_LOCK_READ);
+}
+#endif

 static int scoutfs_writepage(struct page *page, struct writeback_control *wbc)
 {
@@ -1045,7 +1081,7 @@ long scoutfs_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
 		goto out;
 	}

-	mutex_lock(&inode->i_mutex);
+	inode_lock(inode);

 	ret = scoutfs_lock_inode(sb, SCOUTFS_LOCK_WRITE,
 				 SCOUTFS_LKF_REFRESH_INODE, inode, &lock);
@@ -1106,7 +1142,7 @@ out_extent:
 	up_write(&si->extent_sem);
 out_mutex:
 	scoutfs_unlock(sb, lock, SCOUTFS_LOCK_WRITE);
-	mutex_unlock(&inode->i_mutex);
+	inode_unlock(inode);

 out:
 	trace_scoutfs_data_fallocate(sb, ino, mode, offset, len, ret);
@@ -1209,7 +1245,7 @@ int scoutfs_data_move_blocks(struct inode *from, u64 from_off,
 	struct data_ext_args from_args;
 	struct data_ext_args to_args;
 	struct scoutfs_extent ext;
-	struct timespec cur_time;
+	struct kc_timespec cur_time;
 	LIST_HEAD(locks);
 	bool done = false;
 	loff_t from_size;
@@ -1253,6 +1289,7 @@ int scoutfs_data_move_blocks(struct inode *from, u64 from_off,
 	from_iblock = from_off >> SCOUTFS_BLOCK_SM_SHIFT;
 	count = (byte_len + SCOUTFS_BLOCK_SM_MASK) >> SCOUTFS_BLOCK_SM_SHIFT;
 	to_iblock = to_off >> SCOUTFS_BLOCK_SM_SHIFT;
+	from_start = from_iblock;

 	/* only move extent blocks inside i_size, careful not to wrap */
 	from_size = i_size_read(from);
@@ -1329,7 +1366,7 @@ int scoutfs_data_move_blocks(struct inode *from, u64 from_off,

 			/* find the next extent to move */
 			ret = scoutfs_ext_next(sb, &data_ext_ops, &from_args,
-					       from_iblock, 1, &ext);
+					       from_start, 1, &ext);
 			if (ret < 0) {
 				if (ret == -ENOENT) {
 					done = true;
@@ -1417,13 +1454,19 @@ int scoutfs_data_move_blocks(struct inode *from, u64 from_off,
 							i_size_read(from);
 				i_size_write(to, to_size);
 			}
+
+			/* find next after moved extent, avoiding wrapping */
+			if (from_start + len < from_start)
+				from_start = from_iblock + count + 1;
+			else
+				from_start += len;
 		}


 		up_write(&from_si->extent_sem);
 		up_write(&to_si->extent_sem);

-		cur_time = CURRENT_TIME;
+		cur_time = current_time(from);
 		if (!is_stage) {
 			to->i_ctime = to->i_mtime = cur_time;
 			inode_inc_iversion(to);
@@ -1510,7 +1553,7 @@ int scoutfs_data_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
 	if (ret)
 		goto out;

-	mutex_lock(&inode->i_mutex);
+	inode_lock(inode);
 	down_read(&si->extent_sem);

 	ret = scoutfs_lock_inode(sb, SCOUTFS_LOCK_READ, 0, inode, &lock);
@@ -1564,7 +1607,7 @@ int scoutfs_data_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
 unlock:
 	scoutfs_unlock(sb, lock, SCOUTFS_LOCK_READ);
 	up_read(&si->extent_sem);
-	mutex_unlock(&inode->i_mutex);
+	inode_unlock(inode);

 out:
 	if (ret == 1)
@@ -1764,6 +1807,37 @@ int scoutfs_data_wait_check_iov(struct inode *inode, const struct iovec *iov,
 	return ret;
 }

+int scoutfs_data_wait_check_iter(struct inode *inode, loff_t pos, struct iov_iter *iter,
+				 u8 sef, u8 op, struct scoutfs_data_wait *dw,
+				 struct scoutfs_lock *lock)
+{
+	size_t count = iov_iter_count(iter);
+	size_t off = iter->iov_offset;
+	const struct iovec *iov;
+	size_t len;
+	int ret = 0;
+
+	for (iov = iter->iov; count > 0; iov++) {
+		len = iov->iov_len - off;
+		if (len == 0)
+			continue;
+
+		/* aren't we waiting on too much data here ? */
+		ret = scoutfs_data_wait_check(inode, pos, len,
+					      sef, op, dw, lock);
+
+		if (ret != 0)
+			break;
+
+
+		pos += len;
+		count -= len;
+		off = 0;
+	}
+
+	return ret;
+}
+
 int scoutfs_data_wait(struct inode *inode, struct scoutfs_data_wait *dw)
 {
 	DECLARE_DATA_WAIT_ROOT(inode->i_sb, rt);
@@ -1854,7 +1928,11 @@ int scoutfs_data_waiting(struct super_block *sb, u64 ino, u64 iblock,

 const struct address_space_operations scoutfs_file_aops = {
 	.readpage		= scoutfs_readpage,
+#ifndef KC_FILE_AOPS_READAHEAD
 	.readpages		= scoutfs_readpages,
+#else
+	.readahead		= scoutfs_readahead,
+#endif
 	.writepage		= scoutfs_writepage,
 	.writepages		= scoutfs_writepages,
 	.write_begin		= scoutfs_write_begin,
@@ -1862,10 +1940,15 @@ const struct address_space_operations scoutfs_file_aops = {
 };

 const struct file_operations scoutfs_file_fops = {
+#ifdef KC_LINUX_HAVE_FOP_AIO_READ
 	.read		= do_sync_read,
 	.write		= do_sync_write,
 	.aio_read	= scoutfs_file_aio_read,
 	.aio_write	= scoutfs_file_aio_write,
+#else
+	.read_iter	= scoutfs_file_read_iter,
+	.write_iter	= scoutfs_file_write_iter,
+#endif
 	.unlocked_ioctl	= scoutfs_ioctl,
 	.fsync		= scoutfs_file_fsync,
 	.llseek		= scoutfs_file_llseek,
--- a/kmod/src/data.h
+++ b/kmod/src/data.h
@@ -65,6 +65,9 @@ int scoutfs_data_wait_check_iov(struct inode *inode, const struct iovec *iov,
 				unsigned long nr_segs, loff_t pos, u8 sef,
 				u8 op, struct scoutfs_data_wait *ow,
 				struct scoutfs_lock *lock);
+int scoutfs_data_wait_check_iter(struct inode *inode, loff_t pos, struct iov_iter *iter,
+				 u8 sef, u8 op, struct scoutfs_data_wait *ow,
+				 struct scoutfs_lock *lock);
 bool scoutfs_data_wait_found(struct scoutfs_data_wait *ow);
 int scoutfs_data_wait(struct inode *inode,
 			      struct scoutfs_data_wait *ow);
--- a/kmod/src/dir.c
+++ b/kmod/src/dir.c
@@ -272,7 +272,7 @@ static void set_dentry_fsdata(struct dentry *dentry, struct scoutfs_lock *lock)

 static bool test_dentry_fsdata(struct dentry *dentry, u64 refresh)
 {
-	u64 fsd = (unsigned long)ACCESS_ONCE(dentry->d_fsdata);
+	u64 fsd = (unsigned long)READ_ONCE(dentry->d_fsdata);

 	return fsd == refresh;
 }
@@ -735,7 +735,7 @@ static int scoutfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode,
 	set_dentry_fsdata(dentry, dir_lock);

 	i_size_write(dir, i_size_read(dir) + dentry->d_name.len);
-	dir->i_mtime = dir->i_ctime = CURRENT_TIME;
+	dir->i_mtime = dir->i_ctime = current_time(inode);
 	inode->i_mtime = inode->i_atime = inode->i_ctime = dir->i_mtime;
 	si->crtime = inode->i_mtime;
 	inode_inc_iversion(dir);
@@ -859,7 +859,7 @@ retry:
 	set_dentry_fsdata(dentry, dir_lock);

 	i_size_write(dir, dir_size);
-	dir->i_mtime = dir->i_ctime = CURRENT_TIME;
+	dir->i_mtime = dir->i_ctime = current_time(inode);
 	inode->i_ctime = dir->i_mtime;
 	inc_nlink(inode);
 	inode_inc_iversion(dir);
@@ -900,7 +900,7 @@ static int scoutfs_unlink(struct inode *dir, struct dentry *dentry)
 {
 	struct super_block *sb = dir->i_sb;
 	struct inode *inode = dentry->d_inode;
-	struct timespec ts = current_kernel_time();
+	struct kc_timespec ts = current_time(inode);
 	struct scoutfs_lock *inode_lock = NULL;
 	struct scoutfs_lock *orph_lock = NULL;
 	struct scoutfs_lock *dir_lock = NULL;
@@ -1059,14 +1059,14 @@ static int symlink_item_ops(struct super_block *sb, enum symlink_ops op, u64 ino
 }

 /*
- * Full a buffer with the null terminated symlink, point nd at it, and
- * return it so put_link can free it once the vfs is done.
+ * Fill a buffer with the null terminated symlink, and return it
+ * so callers can free it once the vfs is done.
 *
 * We chose to pay the runtime cost of per-call allocation and copy
 * overhead instead of wiring up symlinks to the page cache, storing
 * each small link in a full page, and later having to reclaim them.
 */
-static void *scoutfs_follow_link(struct dentry *dentry, struct nameidata *nd)
+static void *scoutfs_get_link_target(struct dentry *dentry)
 {
 	struct inode *inode = dentry->d_inode;
 	struct super_block *sb = inode->i_sb;
@@ -1125,32 +1125,41 @@ out:
 	if (ret < 0) {
 		kfree(path);
 		path = ERR_PTR(ret);
-	} else {
-		nd_set_link(nd, path);
 	}
+
 	scoutfs_unlock(sb, inode_lock, SCOUTFS_LOCK_READ);
 	return path;
 }

+#ifdef KC_LINUX_HAVE_RHEL_IOPS_WRAPPER
+static void *scoutfs_follow_link(struct dentry *dentry, struct nameidata *nd)
+{
+	char *path;
+
+	path = scoutfs_get_link_target(dentry);
+	if (!IS_ERR_OR_NULL(path))
+		nd_set_link(nd, path);
+	return path;
+}
+
 static void scoutfs_put_link(struct dentry *dentry, struct nameidata *nd,
 			     void *cookie)
 {
 	if (!IS_ERR_OR_NULL(cookie))
 		kfree(cookie);
 }
+#else
+static const char *scoutfs_get_link(struct dentry *dentry, struct inode *inode, struct delayed_call *done)
+{
+	char *path;

-const struct inode_operations scoutfs_symlink_iops = {
-	.readlink       = generic_readlink,
-	.follow_link    = scoutfs_follow_link,
-	.put_link       = scoutfs_put_link,
-	.getattr	= scoutfs_getattr,
-	.setattr	= scoutfs_setattr,
-	.setxattr	= generic_setxattr,
-	.getxattr	= generic_getxattr,
-	.listxattr	= scoutfs_listxattr,
-	.removexattr	= generic_removexattr,
-	.get_acl	= scoutfs_get_acl,
-};
+	path = scoutfs_get_link_target(dentry);
+	if (!IS_ERR_OR_NULL(path))
+		set_delayed_call(done, kfree_link, path);
+
+	return path;
+}
+#endif

 /*
 * Symlink target paths can be annoyingly large.  We store relatively
@@ -1204,7 +1213,7 @@ static int scoutfs_symlink(struct inode *dir, struct dentry *dentry,
 	set_dentry_fsdata(dentry, dir_lock);

 	i_size_write(dir, i_size_read(dir) + dentry->d_name.len);
-	dir->i_mtime = dir->i_ctime = CURRENT_TIME;
+	dir->i_mtime = dir->i_ctime = current_time(inode);
 	inode_inc_iversion(dir);

 	inode->i_ctime = dir->i_mtime;
@@ -1253,75 +1262,93 @@ int scoutfs_symlink_drop(struct super_block *sb, u64 ino,
 }

 /*
- * Find the next link backref key for the given ino starting from the
- * given dir inode and final entry position.  If we find a backref item
- * we add an allocated copy of it to the head of the caller's list.
+ * Find the next link backref items for the given ino starting from the
+ * given dir inode and final entry position.  For each backref item we
+ * add an allocated copy of it to the head of the caller's list.
 *
- * Returns 0 if we added an entry, -ENOENT if we didn't, and -errno for
- * search errors.
+ * Callers who are building a path can add one entry for each parent.
+ * They're left with a list of entries from the root down in list order.
+ *
+ * Callers who are gathering multiple entries for one inode get the
+ * entries in the opposite order that their items are found.
+ *
+ * Returns +ve for number of entries added, -ENOENT if no entries were
+ * found, or -errno on error.  It weirdly won't return 0, but early
+ * callers preferred -ENOENT so we use that for the case of no entries.
 *
 * Callers are comfortable with the race inherent to incrementally
- * building up a path with individual locked backref item lookups.
+ * gathering backrefs across multiple lock acquisitions.
 */
-int scoutfs_dir_add_next_linkref(struct super_block *sb, u64 ino,
-				 u64 dir_ino, u64 dir_pos,
-				 struct list_head *list)
+int scoutfs_dir_add_next_linkrefs(struct super_block *sb, u64 ino, u64 dir_ino, u64 dir_pos,
+				  int count, struct list_head *list)
 {
+	struct scoutfs_link_backref_entry *prev_ent = NULL;
 	struct scoutfs_link_backref_entry *ent = NULL;
 	struct scoutfs_lock *lock = NULL;
 	struct scoutfs_key last_key;
 	struct scoutfs_key key;
+	int nr = 0;
 	int len;
 	int ret;

-	ent = kmalloc(offsetof(struct scoutfs_link_backref_entry,
-			       dent.name[SCOUTFS_NAME_LEN]), GFP_KERNEL);
-	if (!ent) {
-		ret = -ENOMEM;
-		goto out;
-	}
-
-	INIT_LIST_HEAD(&ent->head);
-
 	init_dirent_key(&key, SCOUTFS_LINK_BACKREF_TYPE, ino, dir_ino, dir_pos);
-	init_dirent_key(&last_key, SCOUTFS_LINK_BACKREF_TYPE, ino, U64_MAX,
-			U64_MAX);
+	init_dirent_key(&last_key, SCOUTFS_LINK_BACKREF_TYPE, ino, U64_MAX, U64_MAX);

 	ret = scoutfs_lock_ino(sb, SCOUTFS_LOCK_READ, 0, ino, &lock);
 	if (ret)
 		goto out;

-	ret = scoutfs_item_next(sb, &key, &last_key, &ent->dent,
-				dirent_bytes(SCOUTFS_NAME_LEN), lock);
-	scoutfs_unlock(sb, lock, SCOUTFS_LOCK_READ);
-	lock = NULL;
-	if (ret < 0)
-		goto out;
+	while (nr < count) {
+		ent = kmalloc(offsetof(struct scoutfs_link_backref_entry,
+				       dent.name[SCOUTFS_NAME_LEN]), GFP_NOFS);
+		if (!ent) {
+			ret = -ENOMEM;
+			goto out;
+		}

-	len = ret - sizeof(struct scoutfs_dirent);
-	if (len < 1 || len > SCOUTFS_NAME_LEN) {
-		scoutfs_corruption(sb, SC_DIRENT_BACKREF_NAME_LEN,
-				   corrupt_dirent_backref_name_len,
-				   "ino %llu dir_ino %llu pos %llu key "SK_FMT" len %d",
-				   ino, dir_ino, dir_pos, SK_ARG(&key), len);
-		ret = -EIO;
-		goto out;
+		INIT_LIST_HEAD(&ent->head);
+
+		ret = scoutfs_item_next(sb, &key, &last_key, &ent->dent,
+					dirent_bytes(SCOUTFS_NAME_LEN), lock);
+		if (ret < 0) {
+			if (ret == -ENOENT && prev_ent)
+				prev_ent->last = true;
+			goto out;
+		}
+
+		len = ret - sizeof(struct scoutfs_dirent);
+		if (len < 1 || len > SCOUTFS_NAME_LEN) {
+			scoutfs_corruption(sb, SC_DIRENT_BACKREF_NAME_LEN,
+					   corrupt_dirent_backref_name_len,
+					   "ino %llu dir_ino %llu pos %llu key "SK_FMT" len %d",
+					   ino, dir_ino, dir_pos, SK_ARG(&key), len);
+			ret = -EIO;
+			goto out;
+		}
+
+		ent->dir_ino = le64_to_cpu(key.skd_major);
+		ent->dir_pos = le64_to_cpu(key.skd_minor);
+		ent->name_len = len;
+		ent->d_type = dentry_type(ent->dent.type);
+		ent->last = false;
+
+		trace_scoutfs_dir_add_next_linkref_found(sb, ino, ent->dir_ino, ent->dir_pos,
+							 ent->name_len);
+
+		list_add(&ent->head, list);
+		prev_ent = ent;
+		ent = NULL;
+		nr++;
+		scoutfs_key_inc(&key);
 	}

-	list_add(&ent->head, list);
-	ent->dir_ino = le64_to_cpu(key.skd_major);
-	ent->dir_pos = le64_to_cpu(key.skd_minor);
-	ent->name_len = len;
 	ret = 0;
 out:
-	trace_scoutfs_dir_add_next_linkref(sb, ino, dir_ino, dir_pos, ret,
-					   ent ? ent->dir_ino : 0,
-					   ent ? ent->dir_pos : 0,
-					   ent ? ent->name_len : 0);
+	scoutfs_unlock(sb, lock, SCOUTFS_LOCK_READ);
+	trace_scoutfs_dir_add_next_linkrefs(sb, ino, dir_ino, dir_pos, count, nr, ret);

-	if (ent && list_empty(&ent->head))
-		kfree(ent);
-	return ret;
+	kfree(ent);
+	return nr ?: ret;
 }

 static u64 first_backref_dir_ino(struct list_head *list)
@@ -1396,7 +1423,7 @@ retry:
 	}

 	/* get the next link name to the given inode */
-	ret = scoutfs_dir_add_next_linkref(sb, ino, dir_ino, dir_pos, list);
+	ret = scoutfs_dir_add_next_linkrefs(sb, ino, dir_ino, dir_pos, 1, list);
 	if (ret < 0)
 		goto out;

@@ -1404,7 +1431,7 @@ retry:
 	par_ino = first_backref_dir_ino(list);
 	while (par_ino != SCOUTFS_ROOT_INO) {

-		ret = scoutfs_dir_add_next_linkref(sb, par_ino, 0, 0, list);
+		ret = scoutfs_dir_add_next_linkrefs(sb, par_ino, 0, 0, 1, list);
 		if (ret < 0) {
 			if (ret == -ENOENT) {
 				/* restart if there was no parent component */
@@ -1416,6 +1443,8 @@ retry:

 		par_ino = first_backref_dir_ino(list);
 	}
+
+	ret = 0;
 out:
 	if (ret < 0)
 		scoutfs_dir_free_backref_path(sb, list);
@@ -1538,7 +1567,7 @@ static int scoutfs_rename_common(struct inode *old_dir,
 	struct scoutfs_lock *orph_lock = NULL;
 	struct scoutfs_dirent new_dent;
 	struct scoutfs_dirent old_dent;
-	struct timespec now;
+	struct kc_timespec now;
 	bool ins_new = false;
 	bool del_new = false;
 	bool ins_old = false;
@@ -1704,7 +1733,7 @@ retry:
 		inc_nlink(new_dir);
 	}

-	now = CURRENT_TIME;
+	now = current_time(old_inode);
 	old_dir->i_ctime = now;
 	old_dir->i_mtime = now;
 	if (new_dir != old_dir) {
@@ -1791,12 +1820,14 @@ out_unlock:
 	return ret;
 }

+#ifdef KC_LINUX_HAVE_RHEL_IOPS_WRAPPER
 static int scoutfs_rename(struct inode *old_dir,
 			  struct dentry *old_dentry, struct inode *new_dir,
 			  struct dentry *new_dentry)
 {
 	return scoutfs_rename_common(old_dir, old_dentry, new_dir, new_dentry, 0);
 }
+#endif

 static int scoutfs_rename2(struct inode *old_dir,
 			  struct dentry *old_dentry, struct inode *new_dir,
@@ -1841,7 +1872,7 @@ static int scoutfs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mod
 	if (ret < 0)
 		goto out; /* XXX returning error but items created */

-	inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
+	inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
 	si->crtime = inode->i_mtime;
 	insert_inode_hash(inode);
 	ihold(inode); /* need to update inode modifications in d_tmpfile */
@@ -1866,6 +1897,37 @@ out:
 	return ret;
 }

+const struct inode_operations scoutfs_symlink_iops = {
+#ifdef KC_LINUX_HAVE_RHEL_IOPS_WRAPPER
+	.readlink       = generic_readlink,
+	.follow_link    = scoutfs_follow_link,
+	.put_link       = scoutfs_put_link,
+#else
+	.get_link	= scoutfs_get_link,
+#endif
+	.getattr	= scoutfs_getattr,
+	.setattr	= scoutfs_setattr,
+#ifdef KC_LINUX_HAVE_RHEL_IOPS_WRAPPER
+	.setxattr	= generic_setxattr,
+	.getxattr	= generic_getxattr,
+#endif
+	.listxattr	= scoutfs_listxattr,
+#ifdef KC_LINUX_HAVE_RHEL_IOPS_WRAPPER
+	.removexattr	= generic_removexattr,
+#endif
+	.get_acl	= scoutfs_get_acl,
+#ifndef KC_LINUX_HAVE_RHEL_IOPS_WRAPPER
+	.tmpfile	= scoutfs_tmpfile,
+	.rename		= scoutfs_rename_common,
+	.symlink	= scoutfs_symlink,
+	.unlink		= scoutfs_unlink,
+	.link		= scoutfs_link,
+	.mkdir		= scoutfs_mkdir,
+	.create		= scoutfs_create,
+	.lookup		= scoutfs_lookup,
+#endif
+};
+
 const struct file_operations scoutfs_dir_fops = {
 	.KC_FOP_READDIR	= scoutfs_readdir,
 #ifdef KC_FMODE_KABI_ITERATE
@@ -1877,9 +1939,12 @@ const struct file_operations scoutfs_dir_fops = {
 };


-
+#ifdef KC_LINUX_HAVE_RHEL_IOPS_WRAPPER
 const struct inode_operations_wrapper scoutfs_dir_iops = {
 	.ops = {
+#else
+const struct inode_operations scoutfs_dir_iops = {
+#endif
 	.lookup		= scoutfs_lookup,
 	.mknod		= scoutfs_mknod,
 	.create		= scoutfs_create,
@@ -1887,17 +1952,25 @@ const struct inode_operations_wrapper scoutfs_dir_iops = {
 	.link		= scoutfs_link,
 	.unlink		= scoutfs_unlink,
 	.rmdir		= scoutfs_unlink,
-	.rename		= scoutfs_rename,
 	.getattr	= scoutfs_getattr,
 	.setattr	= scoutfs_setattr,
+#ifdef KC_LINUX_HAVE_RHEL_IOPS_WRAPPER
+	.rename		= scoutfs_rename,
 	.setxattr	= generic_setxattr,
 	.getxattr	= generic_getxattr,
-	.listxattr	= scoutfs_listxattr,
 	.removexattr	= generic_removexattr,
+#endif
+	.listxattr	= scoutfs_listxattr,
 	.get_acl	= scoutfs_get_acl,
 	.symlink	= scoutfs_symlink,
 	.permission	= scoutfs_permission,
+#ifdef KC_LINUX_HAVE_RHEL_IOPS_WRAPPER
 	},
+#endif
 	.tmpfile	= scoutfs_tmpfile,
+#ifdef KC_LINUX_HAVE_RHEL_IOPS_WRAPPER
 	.rename2	= scoutfs_rename2,
+#else
+	.rename		= scoutfs_rename2,
+#endif
 };
--- a/kmod/src/dir.h
+++ b/kmod/src/dir.h
@@ -5,7 +5,11 @@
 #include "lock.h"

 extern const struct file_operations scoutfs_dir_fops;
+#ifdef KC_LINUX_HAVE_RHEL_IOPS_WRAPPER
 extern const struct inode_operations_wrapper scoutfs_dir_iops;
+#else
+extern const struct inode_operations scoutfs_dir_iops;
+#endif
 extern const struct inode_operations scoutfs_symlink_iops;

 extern const struct dentry_operations scoutfs_dentry_ops;
@@ -15,6 +19,8 @@ struct scoutfs_link_backref_entry {
 	u64 dir_ino;
 	u64 dir_pos;
 	u16 name_len;
+	u8 d_type;
+	bool last;
 	struct scoutfs_dirent dent;
 	/* the full name is allocated and stored in dent.name[] */
 };
@@ -24,9 +30,8 @@ int scoutfs_dir_get_backref_path(struct super_block *sb, u64 ino, u64 dir_ino,
 void scoutfs_dir_free_backref_path(struct super_block *sb,
 				   struct list_head *list);

-int scoutfs_dir_add_next_linkref(struct super_block *sb, u64 ino,
-				 u64 dir_ino, u64 dir_pos,
-				 struct list_head *list);
+int scoutfs_dir_add_next_linkrefs(struct super_block *sb, u64 ino, u64 dir_ino, u64 dir_pos,
+				  int count, struct list_head *list);

 int scoutfs_symlink_drop(struct super_block *sb, u64 ino,
 			 struct scoutfs_lock *lock, u64 i_size);
--- a/kmod/src/export.c
+++ b/kmod/src/export.c
@@ -114,8 +114,8 @@ static struct dentry *scoutfs_get_parent(struct dentry *child)
 	int ret;
 	u64 ino;

-	ret = scoutfs_dir_add_next_linkref(sb, scoutfs_ino(inode), 0, 0, &list);
-	if (ret)
+	ret = scoutfs_dir_add_next_linkrefs(sb, scoutfs_ino(inode), 0, 0, 1, &list);
+	if (ret < 0)
 		return ERR_PTR(ret);

 	ent = list_first_entry(&list, struct scoutfs_link_backref_entry, head);
@@ -138,9 +138,9 @@ static int scoutfs_get_name(struct dentry *parent, char *name,
 	LIST_HEAD(list);
 	int ret;

-	ret = scoutfs_dir_add_next_linkref(sb, scoutfs_ino(inode), dir_ino,
-					   0, &list);
-	if (ret)
+	ret = scoutfs_dir_add_next_linkrefs(sb, scoutfs_ino(inode), dir_ino,
+					    0, 1, &list);
+	if (ret < 0)
 		return ret;

 	ret = -ENOENT;
--- a/kmod/src/file.c
+++ b/kmod/src/file.c
@@ -29,6 +29,7 @@
 #include "per_task.h"
 #include "omap.h"

+#ifdef KC_LINUX_HAVE_FOP_AIO_READ
 /*
 * Start a high level file read.  We check for offline extents in the
 * read region here so that we only check the extents once.  We use the
@@ -42,27 +43,27 @@ ssize_t scoutfs_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
 	struct inode *inode = file_inode(file);
 	struct scoutfs_inode_info *si = SCOUTFS_I(inode);
 	struct super_block *sb = inode->i_sb;
-	struct scoutfs_lock *inode_lock = NULL;
+	struct scoutfs_lock *scoutfs_inode_lock = NULL;
 	SCOUTFS_DECLARE_PER_TASK_ENTRY(pt_ent);
 	DECLARE_DATA_WAIT(dw);
 	int ret;

 retry:
 	/* protect checked extents from release */
-	mutex_lock(&inode->i_mutex);
+	inode_lock(inode);
 	atomic_inc(&inode->i_dio_count);
-	mutex_unlock(&inode->i_mutex);
+	inode_unlock(inode);

 	ret = scoutfs_lock_inode(sb, SCOUTFS_LOCK_READ,
-				 SCOUTFS_LKF_REFRESH_INODE, inode, &inode_lock);
+				 SCOUTFS_LKF_REFRESH_INODE, inode, &scoutfs_inode_lock);
 	if (ret)
 		goto out;

-	if (scoutfs_per_task_add_excl(&si->pt_data_lock, &pt_ent, inode_lock)) {
+	if (scoutfs_per_task_add_excl(&si->pt_data_lock, &pt_ent, scoutfs_inode_lock)) {
 		ret = scoutfs_data_wait_check_iov(inode, iov, nr_segs, pos,
 						  SEF_OFFLINE,
 						  SCOUTFS_IOC_DWO_READ,
-						  &dw, inode_lock);
+						  &dw, scoutfs_inode_lock);
 		if (ret != 0)
 			goto out;
 	} else {
@@ -74,7 +75,7 @@ retry:
 out:
 	inode_dio_done(inode);
 	scoutfs_per_task_del(&si->pt_data_lock, &pt_ent);
-	scoutfs_unlock(sb, inode_lock, SCOUTFS_LOCK_READ);
+	scoutfs_unlock(sb, scoutfs_inode_lock, SCOUTFS_LOCK_READ);

 	if (scoutfs_data_wait_found(&dw)) {
 		ret = scoutfs_data_wait(inode, &dw);
@@ -92,7 +93,7 @@ ssize_t scoutfs_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
 	struct inode *inode = file_inode(file);
 	struct scoutfs_inode_info *si = SCOUTFS_I(inode);
 	struct super_block *sb = inode->i_sb;
-	struct scoutfs_lock *inode_lock = NULL;
+	struct scoutfs_lock *scoutfs_inode_lock = NULL;
 	SCOUTFS_DECLARE_PER_TASK_ENTRY(pt_ent);
 	DECLARE_DATA_WAIT(dw);
 	int ret;
@@ -101,22 +102,22 @@ ssize_t scoutfs_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
 		return 0;

 retry:
-	mutex_lock(&inode->i_mutex);
+	inode_lock(inode);
 	ret = scoutfs_lock_inode(sb, SCOUTFS_LOCK_WRITE,
-				 SCOUTFS_LKF_REFRESH_INODE, inode, &inode_lock);
+				 SCOUTFS_LKF_REFRESH_INODE, inode, &scoutfs_inode_lock);
 	if (ret)
 		goto out;

-	ret = scoutfs_complete_truncate(inode, inode_lock);
+	ret = scoutfs_complete_truncate(inode, scoutfs_inode_lock);
 	if (ret)
 		goto out;

-	if (scoutfs_per_task_add_excl(&si->pt_data_lock, &pt_ent, inode_lock)) {
+	if (scoutfs_per_task_add_excl(&si->pt_data_lock, &pt_ent, scoutfs_inode_lock)) {
 		/* data_version is per inode, whole file must be online */
 		ret = scoutfs_data_wait_check(inode, 0, i_size_read(inode),
 					      SEF_OFFLINE,
 					      SCOUTFS_IOC_DWO_WRITE,
-					      &dw, inode_lock);
+					      &dw, scoutfs_inode_lock);
 		if (ret != 0)
 			goto out;
 	}
@@ -127,8 +128,8 @@ retry:

 out:
 	scoutfs_per_task_del(&si->pt_data_lock, &pt_ent);
-	scoutfs_unlock(sb, inode_lock, SCOUTFS_LOCK_WRITE);
-	mutex_unlock(&inode->i_mutex);
+	scoutfs_unlock(sb, scoutfs_inode_lock, SCOUTFS_LOCK_WRITE);
+	inode_unlock(inode);

 	if (scoutfs_data_wait_found(&dw)) {
 		ret = scoutfs_data_wait(inode, &dw);
@@ -146,6 +147,113 @@ out:

 	return ret;
 }
+#else
+ssize_t scoutfs_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
+{
+	struct file *file = iocb->ki_filp;
+	struct inode *inode = file_inode(file);
+	struct scoutfs_inode_info *si = SCOUTFS_I(inode);
+	struct super_block *sb = inode->i_sb;
+	struct scoutfs_lock *scoutfs_inode_lock = NULL;
+	SCOUTFS_DECLARE_PER_TASK_ENTRY(pt_ent);
+	DECLARE_DATA_WAIT(dw);
+	int ret;
+
+retry:
+	/* protect checked extents from release */
+	inode_lock(inode);
+	atomic_inc(&inode->i_dio_count);
+	inode_unlock(inode);
+
+	ret = scoutfs_lock_inode(sb, SCOUTFS_LOCK_READ,
+				 SCOUTFS_LKF_REFRESH_INODE, inode, &scoutfs_inode_lock);
+	if (ret)
+		goto out;
+
+	if (scoutfs_per_task_add_excl(&si->pt_data_lock, &pt_ent, scoutfs_inode_lock)) {
+		ret = scoutfs_data_wait_check_iter(inode, iocb->ki_pos, to,
+						   SEF_OFFLINE,
+						   SCOUTFS_IOC_DWO_READ,
+						   &dw, scoutfs_inode_lock);
+		if (ret != 0)
+			goto out;
+	} else {
+		WARN_ON_ONCE(true);
+	}
+
+	ret = generic_file_read_iter(iocb, to);
+
+out:
+	inode_dio_end(inode);
+	scoutfs_per_task_del(&si->pt_data_lock, &pt_ent);
+	scoutfs_unlock(sb, scoutfs_inode_lock, SCOUTFS_LOCK_READ);
+
+	if (scoutfs_data_wait_found(&dw)) {
+		ret = scoutfs_data_wait(inode, &dw);
+		if (ret == 0)
+			goto retry;
+	}
+	return ret;
+}
+
+ssize_t scoutfs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
+{
+	struct file *file = iocb->ki_filp;
+	struct inode *inode = file_inode(file);
+	struct scoutfs_inode_info *si = SCOUTFS_I(inode);
+	struct super_block *sb = inode->i_sb;
+	struct scoutfs_lock *scoutfs_inode_lock = NULL;
+	SCOUTFS_DECLARE_PER_TASK_ENTRY(pt_ent);
+	DECLARE_DATA_WAIT(dw);
+	int ret;
+	int written;
+
+retry:
+	inode_lock(inode);
+	ret = scoutfs_lock_inode(sb, SCOUTFS_LOCK_WRITE,
+				 SCOUTFS_LKF_REFRESH_INODE, inode, &scoutfs_inode_lock);
+	if (ret)
+		goto out;
+
+	ret = generic_write_checks(iocb, from);
+	if (ret <= 0)
+		goto out;
+
+	ret = scoutfs_complete_truncate(inode, scoutfs_inode_lock);
+	if (ret)
+		goto out;
+
+	if (scoutfs_per_task_add_excl(&si->pt_data_lock, &pt_ent, scoutfs_inode_lock)) {
+		/* data_version is per inode, whole file must be online */
+		ret = scoutfs_data_wait_check_iter(inode, iocb->ki_pos, from,
+						   SEF_OFFLINE,
+						   SCOUTFS_IOC_DWO_WRITE,
+						   &dw, scoutfs_inode_lock);
+		if (ret != 0)
+			goto out;
+	}
+
+	/* XXX: remove SUID bit */
+
+	written = __generic_file_write_iter(iocb, from);
+
+out:
+	scoutfs_per_task_del(&si->pt_data_lock, &pt_ent);
+	scoutfs_unlock(sb, scoutfs_inode_lock, SCOUTFS_LOCK_WRITE);
+	inode_unlock(inode);
+
+	if (scoutfs_data_wait_found(&dw)) {
+		ret = scoutfs_data_wait(inode, &dw);
+		if (ret == 0)
+			goto retry;
+	}
+
+	if (ret > 0 || ret == -EIOCBQUEUED)
+		ret = generic_write_sync(iocb, written);
+
+	return written ? written : ret;
+}
+#endif

 int scoutfs_permission(struct inode *inode, int mask)
 {
--- a/kmod/src/file.h
+++ b/kmod/src/file.h
@@ -1,10 +1,15 @@
 #ifndef _SCOUTFS_FILE_H_
 #define _SCOUTFS_FILE_H_

+#ifdef KC_LINUX_HAVE_FOP_AIO_READ
 ssize_t scoutfs_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
 			      unsigned long nr_segs, loff_t pos);
 ssize_t scoutfs_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
 			       unsigned long nr_segs, loff_t pos);
+#else
+ssize_t scoutfs_file_read_iter(struct kiocb *, struct iov_iter *);
+ssize_t scoutfs_file_write_iter(struct kiocb *, struct iov_iter *);
+#endif
 int scoutfs_permission(struct inode *inode, int mask);
 loff_t scoutfs_file_llseek(struct file *file, loff_t offset, int whence);

--- a/kmod/src/inode.c
+++ b/kmod/src/inode.c
@@ -143,10 +143,12 @@ void scoutfs_destroy_inode(struct inode *inode)
 static const struct inode_operations scoutfs_file_iops = {
 	.getattr	= scoutfs_getattr,
 	.setattr	= scoutfs_setattr,
+#ifdef KC_LINUX_HAVE_RHEL_IOPS_WRAPPER
 	.setxattr	= generic_setxattr,
 	.getxattr	= generic_getxattr,
-	.listxattr	= scoutfs_listxattr,
 	.removexattr	= generic_removexattr,
+#endif
+	.listxattr	= scoutfs_listxattr,
 	.get_acl	= scoutfs_get_acl,
 	.fiemap		= scoutfs_data_fiemap,
 };
@@ -154,10 +156,12 @@ static const struct inode_operations scoutfs_file_iops = {
 static const struct inode_operations scoutfs_special_iops = {
 	.getattr	= scoutfs_getattr,
 	.setattr	= scoutfs_setattr,
+#ifdef KC_LINUX_HAVE_RHEL_IOPS_WRAPPER
 	.setxattr	= generic_setxattr,
 	.getxattr	= generic_getxattr,
-	.listxattr	= scoutfs_listxattr,
 	.removexattr	= generic_removexattr,
+#endif
+	.listxattr	= scoutfs_listxattr,
 	.get_acl	= scoutfs_get_acl,
 };

@@ -174,8 +178,12 @@ static void set_inode_ops(struct inode *inode)
 		inode->i_fop = &scoutfs_file_fops;
 		break;
 	case S_IFDIR:
+#ifdef KC_LINUX_HAVE_RHEL_IOPS_WRAPPER
 		inode->i_op = &scoutfs_dir_iops.ops;
 		inode->i_flags |= S_IOPS_WRAPPER;
+#else
+		inode->i_op = &scoutfs_dir_iops;
+#endif
 		inode->i_fop = &scoutfs_dir_fops;
 		break;
 	case S_IFLNK:
@@ -247,7 +255,7 @@ static void load_inode(struct inode *inode, struct scoutfs_inode *cinode)
 	struct scoutfs_inode_info *si = SCOUTFS_I(inode);

 	i_size_write(inode, le64_to_cpu(cinode->size));
-	inode->i_version = le64_to_cpu(cinode->version);
+	inode_set_iversion_queried(inode, le64_to_cpu(cinode->version));
 	set_nlink(inode, le32_to_cpu(cinode->nlink));
 	i_uid_write(inode, le32_to_cpu(cinode->uid));
 	i_gid_write(inode, le32_to_cpu(cinode->gid));
@@ -340,10 +348,17 @@ int scoutfs_inode_refresh(struct inode *inode, struct scoutfs_lock *lock)
 	return ret;
 }

+#ifdef KC_LINUX_HAVE_RHEL_IOPS_WRAPPER
 int scoutfs_getattr(struct vfsmount *mnt, struct dentry *dentry,
 		    struct kstat *stat)
 {
 	struct inode *inode = dentry->d_inode;
+#else
+int scoutfs_getattr(const struct path *path, struct kstat *stat,
+		    u32 request_mask, unsigned int query_flags)
+{
+	struct inode *inode = d_inode(path->dentry);
+#endif
 	struct super_block *sb = inode->i_sb;
 	struct scoutfs_lock *lock = NULL;
 	int ret;
@@ -384,7 +399,7 @@ static int set_inode_size(struct inode *inode, struct scoutfs_lock *lock,
 		scoutfs_inode_inc_data_version(inode);

 	truncate_setsize(inode, new_size);
-	inode->i_ctime = inode->i_mtime = CURRENT_TIME;
+	inode->i_ctime = inode->i_mtime = current_time(inode);
 	if (truncate)
 		si->flags |= SCOUTFS_INO_FLAG_TRUNCATE;
 	scoutfs_inode_set_data_seq(inode);
@@ -467,8 +482,7 @@ retry:
 				 SCOUTFS_LKF_REFRESH_INODE, inode, &lock);
 	if (ret)
 		return ret;
-
-	ret = inode_change_ok(inode, attr);
+	ret = setattr_prepare(dentry, attr);
 	if (ret)
 		goto out;

@@ -496,9 +510,9 @@ retry:
 				scoutfs_unlock(sb, lock, SCOUTFS_LOCK_WRITE);

 				/* XXX callee locks instead? */
-				mutex_unlock(&inode->i_mutex);
+				inode_unlock(inode);
 				ret = scoutfs_data_wait(inode, &dw);
-				mutex_lock(&inode->i_mutex);
+				inode_lock(inode);

 				if (ret == 0)
 					goto retry;
@@ -750,7 +764,7 @@ struct inode *scoutfs_iget(struct super_block *sb, u64 ino, int lkf, int igf)
 		/* XXX ensure refresh, instead clear in drop_inode? */
 		si = SCOUTFS_I(inode);
 		atomic64_set(&si->last_refreshed, 0);
-		inode->i_version = 0;
+		inode_set_iversion_queried(inode, 0);
 	}

 	ret = scoutfs_inode_refresh(inode, lock);
@@ -798,7 +812,7 @@ static void store_inode(struct scoutfs_inode *cinode, struct inode *inode)
 	scoutfs_inode_get_onoff(inode, &online_blocks, &offline_blocks);

 	cinode->size = cpu_to_le64(i_size_read(inode));
-	cinode->version = cpu_to_le64(inode->i_version);
+	cinode->version = cpu_to_le64(inode_peek_iversion(inode));
 	cinode->nlink = cpu_to_le32(inode->i_nlink);
 	cinode->uid = cpu_to_le32(i_uid_read(inode));
 	cinode->gid = cpu_to_le32(i_gid_read(inode));
@@ -1475,7 +1489,7 @@ int scoutfs_new_inode(struct super_block *sb, struct inode *dir, umode_t mode, d
 	inode->i_ino = ino; /* XXX overflow */
 	inode_init_owner(inode, dir, mode);
 	inode_set_bytes(inode, 0);
-	inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
+	inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
 	inode->i_rdev = rdev;
 	set_inode_ops(inode);

--- a/kmod/src/inode.h
+++ b/kmod/src/inode.h
@@ -22,7 +22,7 @@ struct scoutfs_inode_info {
 	u64 online_blocks;
 	u64 offline_blocks;
 	u32 flags;
-	struct timespec crtime;
+	struct kc_timespec crtime;

 	/*
 	 * Protects per-inode extent items, most particularly readers
@@ -123,8 +123,13 @@ void scoutfs_inode_get_onoff(struct inode *inode, s64 *on, s64 *off);
 int scoutfs_complete_truncate(struct inode *inode, struct scoutfs_lock *lock);

 int scoutfs_inode_refresh(struct inode *inode, struct scoutfs_lock *lock);
+#ifdef KC_LINUX_HAVE_RHEL_IOPS_WRAPPER
 int scoutfs_getattr(struct vfsmount *mnt, struct dentry *dentry,
 		    struct kstat *stat);
+#else
+int scoutfs_getattr(const struct path *path, struct kstat *stat,
+		    u32 request_mask, unsigned int query_flags);
+#endif
 int scoutfs_setattr(struct dentry *dentry, struct iattr *attr);

 int scoutfs_inode_orphan_create(struct super_block *sb, u64 ino, struct scoutfs_lock *lock,
--- a/kmod/src/ioctl.c
+++ b/kmod/src/ioctl.c
@@ -22,6 +22,7 @@
 #include <linux/sched.h>
 #include <linux/aio.h>
 #include <linux/list_sort.h>
+#include <linux/backing-dev.h>

 #include "format.h"
 #include "key.h"
@@ -302,7 +303,7 @@ static long scoutfs_ioc_release(struct file *file, unsigned long arg)
 	if (ret)
 		return ret;

-	mutex_lock(&inode->i_mutex);
+	inode_lock(inode);

 	ret = scoutfs_lock_inode(sb, SCOUTFS_LOCK_WRITE,
 				 SCOUTFS_LKF_REFRESH_INODE, inode, &lock);
@@ -351,7 +352,7 @@ static long scoutfs_ioc_release(struct file *file, unsigned long arg)

 out:
 	scoutfs_unlock(sb, lock, SCOUTFS_LOCK_WRITE);
-	mutex_unlock(&inode->i_mutex);
+	inode_unlock(inode);
 	mnt_drop_write_file(file);

 	trace_scoutfs_ioc_release_ret(sb, scoutfs_ino(inode), ret);
@@ -393,7 +394,7 @@ static long scoutfs_ioc_data_wait_err(struct file *file, unsigned long arg)
 		goto out;
 	}

-	mutex_lock(&inode->i_mutex);
+	inode_lock(inode);

 	ret = scoutfs_lock_inode(sb, SCOUTFS_LOCK_READ,
 				 SCOUTFS_LKF_REFRESH_INODE, inode, &lock);
@@ -411,7 +412,7 @@ static long scoutfs_ioc_data_wait_err(struct file *file, unsigned long arg)

 	scoutfs_unlock(sb, lock, SCOUTFS_LOCK_READ);
 unlock:
-	mutex_unlock(&inode->i_mutex);
+	inode_unlock(inode);
 	iput(inode);
 out:
 	return ret;
@@ -448,7 +449,6 @@ static long scoutfs_ioc_stage(struct file *file, unsigned long arg)
 {
 	struct inode *inode = file_inode(file);
 	struct super_block *sb = inode->i_sb;
-	struct address_space *mapping = inode->i_mapping;
 	struct scoutfs_inode_info *si = SCOUTFS_I(inode);
 	SCOUTFS_DECLARE_PER_TASK_ENTRY(pt_ent);
 	struct scoutfs_ioctl_stage args;
@@ -480,8 +480,10 @@ static long scoutfs_ioc_stage(struct file *file, unsigned long arg)
 	/* the iocb is really only used for the file pointer :P */
 	init_sync_kiocb(&kiocb, file);
 	kiocb.ki_pos = args.offset;
+#ifdef KC_LINUX_AIO_KI_LEFT
 	kiocb.ki_left = args.length;
 	kiocb.ki_nbytes = args.length;
+#endif
 	iov.iov_base = (void __user *)(unsigned long)args.buf_ptr;
 	iov.iov_len = args.length;

@@ -489,7 +491,7 @@ static long scoutfs_ioc_stage(struct file *file, unsigned long arg)
 	if (ret)
 		return ret;

-	mutex_lock(&inode->i_mutex);
+	inode_lock(inode);

 	ret = scoutfs_lock_inode(sb, SCOUTFS_LOCK_WRITE,
 				 SCOUTFS_LKF_REFRESH_INODE, inode, &lock);
@@ -516,7 +518,7 @@ static long scoutfs_ioc_stage(struct file *file, unsigned long arg)
 	}

 	si->staging = true;
-	current->backing_dev_info = mapping->backing_dev_info;
+	current->backing_dev_info = inode_to_bdi(inode);

 	pos = args.offset;
 	written = 0;
@@ -533,7 +535,7 @@ static long scoutfs_ioc_stage(struct file *file, unsigned long arg)
 out:
 	scoutfs_per_task_del(&si->pt_data_lock, &pt_ent);
 	scoutfs_unlock(sb, lock, SCOUTFS_LOCK_WRITE);
-	mutex_unlock(&inode->i_mutex);
+	inode_unlock(inode);
 	mnt_drop_write_file(file);

 	trace_scoutfs_ioc_stage_ret(sb, scoutfs_ino(inode), ret);
@@ -652,7 +654,7 @@ static long scoutfs_ioc_setattr_more(struct file *file, unsigned long arg)
 	if (ret)
 		goto out;

-	mutex_lock(&inode->i_mutex);
+	inode_lock(inode);

 	ret = scoutfs_lock_inode(sb, SCOUTFS_LOCK_WRITE,
 				 SCOUTFS_LKF_REFRESH_INODE, inode, &lock);
@@ -696,7 +698,7 @@ static long scoutfs_ioc_setattr_more(struct file *file, unsigned long arg)
 unlock:
 	scoutfs_inode_index_unlock(sb, &ind_locks);
 	scoutfs_unlock(sb, lock, SCOUTFS_LOCK_WRITE);
-	mutex_unlock(&inode->i_mutex);
+	inode_unlock(inode);
 	mnt_drop_write_file(file);
 out:

@@ -1398,6 +1400,110 @@ out:
 	return ret ?: nr;
 }

+/*
+ * Copy entries that point to an inode to the user's buffer.  We copy to
+ * userspace from copies of the entries that are acquired under a lock
+ * so that we don't fault while holding cluster locks.  It also gives us
+ * a chance to limit the amount of work under each lock hold.
+ */
+static long scoutfs_ioc_get_referring_entries(struct file *file, unsigned long arg)
+{
+	struct super_block *sb = file_inode(file)->i_sb;
+	struct scoutfs_ioctl_get_referring_entries gre;
+	struct scoutfs_link_backref_entry *bref = NULL;
+	struct scoutfs_link_backref_entry *bref_tmp;
+	struct scoutfs_ioctl_dirent __user *uent;
+	struct scoutfs_ioctl_dirent ent;
+	LIST_HEAD(list);
+	u64 copied;
+	int name_len;
+	int bytes;
+	long nr;
+	int ret;
+
+	if (!capable(CAP_DAC_READ_SEARCH))
+		return -EPERM;
+
+	if (copy_from_user(&gre, (void __user *)arg, sizeof(gre)))
+		return -EFAULT;
+
+	uent = (void __user *)(unsigned long)gre.entries_ptr;
+	copied = 0;
+	nr = 0;
+
+	/* use entry as cursor between calls */
+	ent.dir_ino = gre.dir_ino;
+	ent.dir_pos = gre.dir_pos;
+
+	for (;;) {
+		ret = scoutfs_dir_add_next_linkrefs(sb, gre.ino, ent.dir_ino, ent.dir_pos, 1024,
+						    &list);
+		if (ret < 0) {
+			if (ret == -ENOENT)
+				ret = 0;
+			goto out;
+		}
+
+		/* _add_next adds each entry to the head, _reverse for key order */
+		list_for_each_entry_safe_reverse(bref, bref_tmp, &list, head) {
+			list_del_init(&bref->head);
+
+			name_len = bref->name_len;
+			bytes = ALIGN(offsetof(struct scoutfs_ioctl_dirent, name[name_len + 1]),
+				      16);
+			if (copied + bytes > gre.entries_bytes) {
+				ret = -EINVAL;
+				goto out;
+			}
+
+			ent.dir_ino = bref->dir_ino;
+			ent.dir_pos = bref->dir_pos;
+			ent.ino = gre.ino;
+			ent.entry_bytes = bytes;
+			ent.flags = bref->last ? SCOUTFS_IOCTL_DIRENT_FLAG_LAST : 0;
+			ent.d_type = bref->d_type;
+			ent.name_len = name_len;
+
+			if (copy_to_user(uent, &ent, sizeof(struct scoutfs_ioctl_dirent)) ||
+			    copy_to_user(&uent->name[0], bref->dent.name, name_len) ||
+			    put_user('\0', &uent->name[name_len])) {
+				ret = -EFAULT;
+				goto out;
+			}
+
+			kfree(bref);
+			bref = NULL;
+
+			uent = (void __user *)uent + bytes;
+			copied += bytes;
+			nr++;
+
+			if (nr == LONG_MAX || (ent.flags & SCOUTFS_IOCTL_DIRENT_FLAG_LAST)) {
+				ret = 0;
+				goto out;
+			}
+		}
+
+		/* advance cursor pos from last copied entry */
+		if (++ent.dir_pos == 0) {
+			if (++ent.dir_ino == 0) {
+				ret = 0;
+				goto out;
+			}
+		}
+	}
+
+	ret = 0;
+out:
+	kfree(bref);
+	list_for_each_entry_safe(bref, bref_tmp, &list, head) {
+		list_del_init(&bref->head);
+		kfree(bref);
+	}
+
+	return nr ?: ret;
+}
+
 long scoutfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 {
 	switch (cmd) {
@@ -1433,6 +1539,8 @@ long scoutfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 		return scoutfs_ioc_read_xattr_totals(file, arg);
 	case SCOUTFS_IOC_GET_ALLOCATED_INOS:
 		return scoutfs_ioc_get_allocated_inos(file, arg);
+	case SCOUTFS_IOC_GET_REFERRING_ENTRIES:
+		return scoutfs_ioc_get_referring_entries(file, arg);
 	}

 	return -ENOTTY;
--- a/kmod/src/ioctl.h
+++ b/kmod/src/ioctl.h
@@ -559,4 +559,118 @@ struct scoutfs_ioctl_get_allocated_inos {
 #define SCOUTFS_IOC_GET_ALLOCATED_INOS \
 	_IOW(SCOUTFS_IOCTL_MAGIC, 16, struct scoutfs_ioctl_get_allocated_inos)

+/*
+ * Get directory entries that refer to a specific inode.
+ *
+ * @ino: The target ino that we're finding referring entries to.
+ * Constant across all the calls that make up an iteration over all the
+ * inode's entries.
+ *
+ * @dir_ino: The inode number of a directory containing the entry to our
+ * inode to search from.  If this parent directory contains no more
+ * entries to our inode then we'll search through other parent directory
+ * inodes in inode order.
+ *
+ * @dir_pos: The position in the dir_ino parent directory of the entry
+ * to our inode to search from.  If there is no entry at this position
+ * then we'll search through other entry positions in increasing order.
+ * If we exhaust the parent directory then we'll search through
+ * additional parent directories in inode order.
+ *
+ * @entries_ptr: A pointer to the buffer where found entries will be
+ * stored.  The pointer must be aligned to 16 bytes.
+ *
+ * @entries_bytes: The size of the buffer that will contain entries.
+ *
+ * To start iterating set the desired target ino, dir_ino to 0, dir_pos
+ * to 0, and set result_ptr and _bytes to a sufficiently large buffer.
+ * Each entry struct that's stored in the buffer adds some overhead so a
+ * large multiple of the largest possible name is a reasonable choice.
+ * (A few multiples of PATH_MAX perhaps.)
+ *
+ * Each call returns the total number of entries that were stored in the
+ * entries buffer.  Zero is returned when the search was successful and
+ * no referring entries were found.  The entries can be iterated over by
+ * advancing each starting struct offset by the total number of bytes in
+ * each entry.  If the _LAST flag is set on an entry then there were no
+ * more entries referring to the inode at the time of the call and
+ * iteration can be stopped.
+ *
+ * To resume iteration set the next call's starting dir_ino and dir_pos
+ * to one past the last entry seen.  Increment the last entry's dir_pos,
+ * and if it wrapped to 0, increment its dir_ino.
+ *
+ * This does not check that the caller has permission to read the
+ * entries found in each containing directory.  It requires
+ * CAP_DAC_READ_SEARCH which bypasses path traversal permissions
+ * checking.
+ *
+ * Entries returned by a single call can reflect any combination of
+ * racing creation and removal of entries.  Each entry existed at the
+ * time it was read though it may have changed in the time it took to
+ * return from the call.  The set of entries returned may no longer
+ * reflect the current set of entries and may not have existed at the
+ * same time.
+ *
+ * This has no knowledge of the life cycle of the inode.  It can return
+ * 0 when there are no referring entries because either the target inode
+ * doesn't exist, it is in the process of being deleted, or because it
+ * is still open while being unlinked.
+ *
+ * On success this returns the number of entries filled in the buffer.
+ * A return of 0 indicates that no entries referred to the inode.
+ *
+ * EINVAL is returned when there is a problem with the buffer.  Either
+ * it was not aligned or it was not large enough for the first entry.
+ *
+ * Many other errnos indicate hard failure to find the next entry.
+ */
+struct scoutfs_ioctl_get_referring_entries {
+	__u64 ino;
+	__u64 dir_ino;
+	__u64 dir_pos;
+	__u64 entries_ptr;
+	__u64 entries_bytes;
+};
+
+/*
+ * @dir_ino: The inode of the directory containing the entry.
+ *
+ * @dir_pos: The readdir f_pos position of the entry within the
+ * directory.
+ *
+ * @ino: The inode number of the target of the entry.
+ *
+ * @flags: Flags associated with this entry.
+ *
+ * @d_type: Inode type as specified with DT_ enum values in readdir(3).
+ *
+ * @entry_bytes: The total bytes taken by the entry in memory, including
+ * the name and any alignment padding.  The start of a following entry
+ * will be found after this number of bytes.
+ *
+ * @name_len: The number of bytes in the name not including the trailing
+ * null, ala strlen(3).
+ *
+ * @name: The null terminated name of the referring entry.  In the
+ * struct definition this array is sized to naturally align the struct.
+ * That number of padded bytes are not necessarily found in the buffer
+ * returned by _get_referring_entries;
+ */
+struct scoutfs_ioctl_dirent {
+	__u64 dir_ino;
+	__u64 dir_pos;
+	__u64 ino;
+	__u16 entry_bytes;
+	__u8  flags;
+	__u8  d_type;
+	__u8  name_len;
+	__u8  name[3];
+};
+
+#define SCOUTFS_IOCTL_DIRENT_FLAG_LAST (1 << 0)
+
+#define SCOUTFS_IOC_GET_REFERRING_ENTRIES \
+	_IOW(SCOUTFS_IOCTL_MAGIC, 17, struct scoutfs_ioctl_get_referring_entries)
+
 #endif
--- a/kmod/src/item.c
+++ b/kmod/src/item.c
@@ -27,6 +27,7 @@
 #include "trans.h"
 #include "counters.h"
 #include "scoutfs_trace.h"
+#include "util.h"

 /*
 * The item cache maintains a consistent view of items that are read
@@ -76,8 +77,10 @@ struct item_cache_info {
 	/* almost always read, barely written */
 	struct super_block *sb;
 	struct item_percpu_pages __percpu *pcpu_pages;
-	struct shrinker shrinker;
+	KC_DEFINE_SHRINKER(shrinker);
+#ifdef KC_CPU_NOTIFIER
 	struct notifier_block notifier;
+#endif

 	/* often walked, but per-cpu refs are fast path */
 	rwlock_t rwlock;
@@ -2277,7 +2280,7 @@ int scoutfs_item_write_dirty(struct super_block *sb)
 		ret = -ENOMEM;
 		goto out;
 	}
-	list_add(&page->list, &pages);
+	list_add(&page->lru, &pages);

 	first = NULL;
 	prev = &first;
@@ -2290,7 +2293,7 @@ int scoutfs_item_write_dirty(struct super_block *sb)
 				ret = -ENOMEM;
 				goto out;
 			}
-			list_add(&second->list, &pages);
+			list_add(&second->lru, &pages);
 		}

 		/* read lock next sorted page, we're only dirty_list user */
@@ -2347,8 +2350,8 @@ int scoutfs_item_write_dirty(struct super_block *sb)
 	/* write all the dirty items into log btree blocks */
 	ret = scoutfs_forest_insert_list(sb, first);
 out:
-	list_for_each_entry_safe(page, second, &pages, list) {
-		list_del_init(&page->list);
+	list_for_each_entry_safe(page, second, &pages, lru) {
+		list_del_init(&page->lru);
 		__free_page(page);
 	}

@@ -2530,27 +2533,35 @@ retry:
 	put_pg(sb, right);
 }

+static unsigned long item_cache_count_objects(struct shrinker *shrink,
+					      struct shrink_control *sc)
+{
+	struct item_cache_info *cinf = KC_SHRINKER_CONTAINER_OF(shrink, struct item_cache_info);
+	struct super_block *sb = cinf->sb;
+
+	scoutfs_inc_counter(sb, item_cache_count_objects);
+
+	return shrinker_min_long(cinf->lru_pages);
+}
+
 /*
 * Shrink the size the item cache.  We're operating against the fast
 * path lock ordering and we skip pages if we can't acquire locks.  We
 * can run into dirty pages or pages with items that weren't visible to
 * the earliest active reader which must be skipped.
 */
-static int item_lru_shrink(struct shrinker *shrink,
-			   struct shrink_control *sc)
+static unsigned long item_cache_scan_objects(struct shrinker *shrink,
+					     struct shrink_control *sc)
 {
-	struct item_cache_info *cinf = container_of(shrink,
-						    struct item_cache_info,
-						    shrinker);
+	struct item_cache_info *cinf = KC_SHRINKER_CONTAINER_OF(shrink, struct item_cache_info);
 	struct super_block *sb = cinf->sb;
 	struct cached_page *tmp;
 	struct cached_page *pg;
+	unsigned long freed = 0;
 	u64 first_reader_seq;
-	int nr;
+	int nr = sc->nr_to_scan;

-	if (sc->nr_to_scan == 0)
-		goto out;
-	nr = sc->nr_to_scan;
+	scoutfs_inc_counter(sb, item_cache_scan_objects);

 	/* can't invalidate pages with items that weren't visible to first reader */
 	first_reader_seq = first_active_reader_seq(cinf);
@@ -2582,6 +2593,7 @@ static int item_lru_shrink(struct shrinker *shrink,
 		rbtree_erase(&pg->node, &cinf->pg_root);
 		invalidate_pcpu_page(pg);
 		write_unlock(&pg->rwlock);
+		freed++;

 		put_pg(sb, pg);

@@ -2591,10 +2603,11 @@ static int item_lru_shrink(struct shrinker *shrink,

 	write_unlock(&cinf->rwlock);
 	spin_unlock(&cinf->lru_lock);
-out:
-	return min_t(unsigned long, cinf->lru_pages, INT_MAX);
+
+	return freed;
 }

+#ifdef KC_CPU_NOTIFIER
 static int item_cpu_callback(struct notifier_block *nfb,
 			     unsigned long action, void *hcpu)
 {
@@ -2609,6 +2622,7 @@ static int item_cpu_callback(struct notifier_block *nfb,

 	return NOTIFY_OK;
 }
+#endif

 int scoutfs_item_setup(struct super_block *sb)
 {
@@ -2638,11 +2652,13 @@ int scoutfs_item_setup(struct super_block *sb)
 	for_each_possible_cpu(cpu)
 		init_pcpu_pages(cinf, cpu);

-	cinf->shrinker.shrink = item_lru_shrink;
-	cinf->shrinker.seeks = DEFAULT_SEEKS;
-	register_shrinker(&cinf->shrinker);
+	KC_INIT_SHRINKER_FUNCS(&cinf->shrinker, item_cache_count_objects,
+			       item_cache_scan_objects);
+	KC_REGISTER_SHRINKER(&cinf->shrinker);
+#ifdef KC_CPU_NOTIFIER
        cinf->notifier.notifier_call = item_cpu_callback;
        register_hotcpu_notifier(&cinf->notifier);
+#endif

 	sbi->item_cache_info = cinf;
 	return 0;
@@ -2662,8 +2678,10 @@ void scoutfs_item_destroy(struct super_block *sb)
 	if (cinf) {
 		BUG_ON(!list_empty(&cinf->active_list));

+#ifdef KC_CPU_NOTIFIER
 		unregister_hotcpu_notifier(&cinf->notifier);
-		unregister_shrinker(&cinf->shrinker);
+#endif
+		KC_UNREGISTER_SHRINKER(&cinf->shrinker);

 		for_each_possible_cpu(cpu)
 			drop_pcpu_pages(sb, cinf, cpu);
--- a/kmod/src/kernelcompat.c
+++ b/kmod/src/kernelcompat.c
@@ -0,0 +1,84 @@
+
+#include <linux/uio.h>
+
+#include "kernelcompat.h"
+
+#ifdef KC_SHRINKER_SHRINK
+#include <linux/shrinker.h>
+/*
+ * If a target doesn't have that .{count,scan}_objects() interface then
+ * we have a .shrink() helper that performs the shrink work in terms of
+ * count/scan.
+ */
+int kc_shrink_wrapper_fn(struct shrinker *shrink, struct shrink_control *sc)
+{
+	struct kc_shrinker_wrapper *wrapper = container_of(shrink, struct kc_shrinker_wrapper, shrink);
+	unsigned long nr;
+	unsigned long rc;
+
+	if (sc->nr_to_scan != 0) {
+		rc = wrapper->scan_objects(shrink, sc);
+		/* translate magic values to the equivalent for older kernels */
+		if (rc == SHRINK_STOP)
+			return -1;
+		else if (rc == SHRINK_EMPTY)
+			return 0;
+	}
+
+	nr = wrapper->count_objects(shrink, sc);
+
+	return min_t(unsigned long, nr, INT_MAX);
+}
+#endif
+
+#ifndef KC_CURRENT_TIME_INODE
+struct timespec64 kc_current_time(struct inode *inode)
+{
+	struct timespec64 now;
+	unsigned gran;
+
+	getnstimeofday64(&now);
+
+	if (unlikely(!inode->i_sb)) {
+		WARN(1, "current_time() called with uninitialized super_block in the inode");
+		return now;
+	}
+
+	gran = inode->i_sb->s_time_gran;
+
+	/* Avoid division in the common cases 1 ns and 1 s. */
+	if (gran == 1) {
+		/* nothing */
+	} else if (gran == NSEC_PER_SEC) {
+		now.tv_nsec = 0;
+	} else if (gran > 1 && gran < NSEC_PER_SEC) {
+		now.tv_nsec -= now.tv_nsec % gran;
+	} else {
+		WARN(1, "illegal file time granularity: %u", gran);
+	}
+
+	return now;
+}
+#endif
+
+#ifndef KC_GENERIC_FILE_BUFFERED_WRITE
+ssize_t
+kc_generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov,
+			       unsigned long nr_segs, loff_t pos, loff_t *ppos,
+			       size_t count, ssize_t written)
+{
+	struct file *file = iocb->ki_filp;
+	ssize_t status;
+	struct iov_iter i;
+
+	iov_iter_init(&i, WRITE, iov, nr_segs, count);
+	status = generic_perform_write(file, &i, pos);
+
+	if (likely(status >= 0)) {
+		written += status;
+		*ppos = pos + status;
+	}
+
+	return written ? written : status;
+}
+#endif
--- a/kmod/src/kernelcompat.h
+++ b/kmod/src/kernelcompat.h
@@ -1,8 +1,35 @@
 #ifndef _SCOUTFS_KERNELCOMPAT_H_
 #define _SCOUTFS_KERNELCOMPAT_H_

-#ifndef KC_ITERATE_DIR_CONTEXT
+#include <linux/kernel.h>
 #include <linux/fs.h>
+
+/*
+ * v4.15-rc3-4-gae5e165d855d
+ *
+ * new API for handling inode->i_version. This forces us to
+ * include this API where we need. We include it here for
+ * convenience instead of where it's needed.
+ */
+#ifdef KC_NEED_LINUX_IVERSION_H
+#include <linux/iversion.h>
+#else
+/*
+ * Kernels before above version will need to fall back to
+ * manipulating inode->i_version as previous with degraded
+ * methods.
+ */
+#define inode_set_iversion_queried(inode, val)	\
+do {						\
+	(inode)->i_version = val;		\
+} while (0)
+#define inode_peek_iversion(inode)		\
+({						\
+	(inode)->i_version;			\
+})
+#endif
+
+#ifndef KC_ITERATE_DIR_CONTEXT
 typedef filldir_t kc_readdir_ctx_t;
 #define KC_DECLARE_READDIR(name, file, dirent, ctx) name(file, dirent, ctx)
 #define KC_FOP_READDIR readdir
@@ -52,4 +79,198 @@ static inline int dir_emit_dots(struct file *file, void *dirent,
 #define kc_posix_acl_valid(user_ns, acl) posix_acl_valid(acl)
 #endif

+/*
+ * v3.6-rc1-24-gdbf2576e37da
+ *
+ * All workqueues are now non-reentrant, and the bit flag is removed
+ * shortly after its uses were removed.
+ */
+#ifndef WQ_NON_REENTRANT
+#define WQ_NON_REENTRANT 0
+#endif
+
+/*
+ * v3.18-rc2-19-gb5ae6b15bd73
+ *
+ * Folds d_materialise_unique into d_splice_alias. Note reversal
+ * of arguments (Also note Documentation/filesystems/porting.rst)
+ */
+#ifndef KC_D_MATERIALISE_UNIQUE
+#define d_materialise_unique(dentry, inode) d_splice_alias(inode, dentry)
+#endif
+
+/*
+ * v4.8-rc1-29-g31051c85b5e2
+ *
+ * fall back to inode_change_ok() if setattr_prepare() isn't available
+ */
+#ifndef KC_SETATTR_PREPARE
+#define setattr_prepare(dentry, attr) inode_change_ok(d_inode(dentry), attr)
+#endif
+
+#ifndef KC___POSIX_ACL_CREATE
+#define __posix_acl_create posix_acl_create
+#define __posix_acl_chmod posix_acl_chmod
+#endif
+
+#ifndef KC_PERCPU_COUNTER_ADD_BATCH
+#define percpu_counter_add_batch __percpu_counter_add
+#endif
+
+#ifndef KC_MEMALLOC_NOFS_SAVE
+#define memalloc_nofs_save memalloc_noio_save
+#define memalloc_nofs_restore memalloc_noio_restore
+#endif
+
+#ifdef KC_BIO_BI_OPF
+#define kc_bio_get_opf(bio)		\
+({					\
+	(bio)->bi_opf;			\
+})
+#define kc_bio_set_opf(bio, opf)	\
+do {					\
+	(bio)->bi_opf = opf;		\
+} while (0)
+#define kc_bio_set_sector(bio, sect)	\
+do {					\
+	(bio)->bi_iter.bi_sector = sect;\
+} while (0)
+#define kc_submit_bio(bio) submit_bio(bio)
+#else
+#define kc_bio_get_opf(bio)		\
+({					\
+	(bio)->bi_rw;			\
+})
+#define kc_bio_set_opf(bio, opf)	\
+do {					\
+	(bio)->bi_rw = opf;		\
+} while (0)
+#define kc_bio_set_sector(bio, sect)	\
+do {					\
+	(bio)->bi_sector = sect;	\
+} while (0)
+#define kc_submit_bio(bio)		\
+do {					\
+	submit_bio((bio)->bi_rw, bio);	\
+} while (0)
+#define bio_set_dev(bio, bdev)		\
+do {					\
+	(bio)->bi_bdev = (bdev);	\
+} while (0)
+#endif
+
+#ifdef KC_BIO_BI_STATUS
+#define KC_DECLARE_BIO_END_IO(name, bio)	name(bio)
+#define kc_bio_get_errno(bio)			({ blk_status_to_errno((bio)->bi_status); })
+#else
+#define KC_DECLARE_BIO_END_IO(name, bio)	name(bio, int _error_arg)
+#define kc_bio_get_errno(bio)			({ (int)((void)(bio), _error_arg); })
+#endif
+
+/*
+ * v4.13-rc1-6-ge462ec50cb5f
+ *
+ * MS_* (mount) flags from <linux/mount.h> should not be used in the kernel
+ * anymore from 4.x onwards. Instead, we need to use the SB_* (superblock) flags
+ */
+#ifndef SB_POSIXACL
+#define SB_POSIXACL MS_POSIXACL
+#define SB_I_VERSION MS_I_VERSION
+#endif
+
+#ifndef KC_CURRENT_TIME_INODE
+struct timespec64 kc_current_time(struct inode *inode);
+#define current_time kc_current_time
+#define kc_timespec timespec
+#else
+#define kc_timespec timespec64
+#endif
+
+#ifndef KC_SHRINKER_SHRINK
+
+#define KC_DEFINE_SHRINKER(name) struct shrinker name
+#define KC_INIT_SHRINKER_FUNCS(name, countfn, scanfn) do {	\
+	__typeof__(name) _shrink = (name);			\
+	_shrink->count_objects = (countfn);			\
+	_shrink->scan_objects = (scanfn);			\
+	_shrink->seeks = DEFAULT_SEEKS;			\
+} while (0)
+
+#define KC_SHRINKER_CONTAINER_OF(ptr, type) container_of(ptr, type, shrinker)
+#define KC_REGISTER_SHRINKER(ptr) (register_shrinker(ptr))
+#define KC_UNREGISTER_SHRINKER(ptr) (unregister_shrinker(ptr))
+#define KC_SHRINKER_FN(ptr) (ptr)
+#else
+
+#include <linux/shrinker.h>
+#ifndef SHRINK_STOP
+#define SHRINK_STOP (~0UL)
+#define SHRINK_EMPTY (~0UL - 1)
+#endif
+
+int kc_shrink_wrapper_fn(struct shrinker *shrink, struct shrink_control *sc);
+struct kc_shrinker_wrapper {
+	unsigned long (*count_objects)(struct shrinker *, struct shrink_control *sc);
+	unsigned long (*scan_objects)(struct shrinker *, struct shrink_control *sc);
+	struct shrinker shrink;
+};
+
+#define KC_DEFINE_SHRINKER(name) struct kc_shrinker_wrapper name;
+#define KC_INIT_SHRINKER_FUNCS(name, countfn, scanfn) do {	\
+	struct kc_shrinker_wrapper *_wrap = (name);		\
+	_wrap->count_objects = (countfn);			\
+	_wrap->scan_objects = (scanfn);				\
+	_wrap->shrink.shrink = kc_shrink_wrapper_fn;		\
+	_wrap->shrink.seeks = DEFAULT_SEEKS;			\
+} while (0)
+#define KC_SHRINKER_CONTAINER_OF(ptr, type) container_of(container_of(ptr, struct kc_shrinker_wrapper, shrink), type, shrinker)
+#define KC_REGISTER_SHRINKER(ptr) (register_shrinker(ptr.shrink))
+#define KC_UNREGISTER_SHRINKER(ptr) (unregister_shrinker(ptr.shrink))
+#define KC_SHRINKER_FN(ptr) (ptr.shrink)
+
+#endif /* KC_SHRINKER_SHRINK */
+
+#ifdef KC_KERNEL_GETSOCKNAME_ADDRLEN
+#include <linux/net.h>
+#include <linux/inet.h>
+static inline int kc_kernel_getsockname(struct socket *sock, struct sockaddr *addr)
+{
+	int addrlen = sizeof(struct sockaddr_in);
+	int ret = kernel_getsockname(sock, addr, &addrlen);
+	if (ret == 0 && addrlen != sizeof(struct sockaddr_in))
+		return -EAFNOSUPPORT;
+	else if (ret < 0)
+		return ret;
+
+	return sizeof(struct sockaddr_in);
+}
+static inline int kc_kernel_getpeername(struct socket *sock, struct sockaddr *addr)
+{
+	int addrlen = sizeof(struct sockaddr_in);
+	int ret = kernel_getpeername(sock, addr, &addrlen);
+	if (ret == 0 && addrlen != sizeof(struct sockaddr_in))
+		return -EAFNOSUPPORT;
+	else if (ret < 0)
+		return ret;
+
+	return sizeof(struct sockaddr_in);
+}
+#else
+#define kc_kernel_getsockname(sock, addr) kernel_getsockname(sock, addr)
+#define kc_kernel_getpeername(sock, addr) kernel_getpeername(sock, addr)
+#endif
+
+#ifdef KC_SOCK_CREATE_KERN_NET
+#define kc_sock_create_kern(family, type, proto, res) sock_create_kern(&init_net, family, type, proto, res)
+#else
+#define kc_sock_create_kern sock_create_kern
+#endif
+
+#ifndef KC_GENERIC_FILE_BUFFERED_WRITE
+ssize_t kc_generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov,
+               unsigned long nr_segs, loff_t pos, loff_t *ppos,
+               size_t count, ssize_t written);
+#define generic_file_buffered_write kc_generic_file_buffered_write
+#endif
+
 #endif
--- a/kmod/src/lock.c
+++ b/kmod/src/lock.c
@@ -12,7 +12,6 @@
 */
 #include <linux/kernel.h>
 #include <linux/fs.h>
-#include <linux/preempt_mask.h> /* a rhel shed.h needed preempt_offset? */
 #include <linux/sched.h>
 #include <linux/slab.h>
 #include <linux/mm.h>
@@ -36,6 +35,7 @@
 #include "xattr.h"
 #include "item.h"
 #include "omap.h"
+#include "util.h"

 /*
 * scoutfs uses a lock service to manage item cache consistency between
@@ -77,7 +77,7 @@ struct lock_info {
 	bool unmounting;
 	struct rb_root lock_tree;
 	struct rb_root lock_range_tree;
-	struct shrinker shrinker;
+	KC_DEFINE_SHRINKER(shrinker);
 	struct list_head lru_list;
 	unsigned long long lru_nr;
 	struct workqueue_struct *workq;
@@ -1346,7 +1346,7 @@ void scoutfs_lock_del_coverage(struct super_block *sb,
 bool scoutfs_lock_protected(struct scoutfs_lock *lock, struct scoutfs_key *key,
 			    enum scoutfs_lock_mode mode)
 {
-	signed char lock_mode = ACCESS_ONCE(lock->mode);
+	signed char lock_mode = READ_ONCE(lock->mode);

 	return lock_modes_match(lock_mode, mode) &&
 	       scoutfs_key_compare_ranges(key, key,
@@ -1401,6 +1401,17 @@ static void lock_shrink_worker(struct work_struct *work)
 	}
 }

+static unsigned long lock_count_objects(struct shrinker *shrink,
+					struct shrink_control *sc)
+{
+	struct lock_info *linfo = KC_SHRINKER_CONTAINER_OF(shrink, struct lock_info);
+	struct super_block *sb = linfo->sb;
+
+	scoutfs_inc_counter(sb, lock_count_objects);
+
+	return shrinker_min_long(linfo->lru_nr);
+}
+
 /*
 * Start the shrinking process for locks on the lru.  If a lock is on
 * the lru then it can't have any active users.  We don't want to block
@@ -1413,21 +1424,18 @@ static void lock_shrink_worker(struct work_struct *work)
 * mode which will prevent the lock from being freed when the null
 * response arrives.
 */
-static int scoutfs_lock_shrink(struct shrinker *shrink,
-			       struct shrink_control *sc)
+static unsigned long lock_scan_objects(struct shrinker *shrink,
+				       struct shrink_control *sc)
 {
-	struct lock_info *linfo = container_of(shrink, struct lock_info,
-					       shrinker);
+	struct lock_info *linfo = KC_SHRINKER_CONTAINER_OF(shrink, struct lock_info);
 	struct super_block *sb = linfo->sb;
 	struct scoutfs_lock *lock;
 	struct scoutfs_lock *tmp;
-	unsigned long nr;
+	unsigned long freed = 0;
+	unsigned long nr = sc->nr_to_scan;
 	bool added = false;
-	int ret;

-	nr = sc->nr_to_scan;
-	if (nr == 0)
-		goto out;
+	scoutfs_inc_counter(sb, lock_scan_objects);

 	spin_lock(&linfo->lock);

@@ -1445,6 +1453,7 @@ restart:
 		lock->request_pending = 1;
 		list_add_tail(&lock->shrink_head, &linfo->shrink_list);
 		added = true;
+		freed++;

 		scoutfs_inc_counter(sb, lock_shrink_attempted);
 		trace_scoutfs_lock_shrink(sb, lock);
@@ -1459,10 +1468,8 @@ restart:
 	if (added)
 		queue_work(linfo->workq, &linfo->shrink_work);

-out:
-	ret = min_t(unsigned long, linfo->lru_nr, INT_MAX);
-	trace_scoutfs_lock_shrink_exit(sb, sc->nr_to_scan, ret);
-	return ret;
+	trace_scoutfs_lock_shrink_exit(sb, sc->nr_to_scan, freed);
+	return freed;
 }

 void scoutfs_free_unused_locks(struct super_block *sb)
@@ -1473,7 +1480,7 @@ void scoutfs_free_unused_locks(struct super_block *sb)
 		.nr_to_scan = INT_MAX,
 	};

-	linfo->shrinker.shrink(&linfo->shrinker, &sc);
+	lock_scan_objects(KC_SHRINKER_FN(&linfo->shrinker), &sc);
 }

 static void lock_tseq_show(struct seq_file *m, struct scoutfs_tseq_entry *ent)
@@ -1580,7 +1587,7 @@ void scoutfs_lock_shutdown(struct super_block *sb)
 	trace_scoutfs_lock_shutdown(sb, linfo);

 	/* stop the shrinker from queueing work */
-	unregister_shrinker(&linfo->shrinker);
+	KC_UNREGISTER_SHRINKER(&linfo->shrinker);
 	flush_work(&linfo->shrink_work);

 	/* cause current and future lock calls to return errors */
@@ -1699,9 +1706,9 @@ int scoutfs_lock_setup(struct super_block *sb)
 	spin_lock_init(&linfo->lock);
 	linfo->lock_tree = RB_ROOT;
 	linfo->lock_range_tree = RB_ROOT;
-	linfo->shrinker.shrink = scoutfs_lock_shrink;
-	linfo->shrinker.seeks = DEFAULT_SEEKS;
-	register_shrinker(&linfo->shrinker);
+	KC_INIT_SHRINKER_FUNCS(&linfo->shrinker, lock_count_objects,
+			       lock_scan_objects);
+	KC_REGISTER_SHRINKER(&linfo->shrinker);
 	INIT_LIST_HEAD(&linfo->lru_list);
 	INIT_WORK(&linfo->inv_work, lock_invalidate_worker);
 	INIT_LIST_HEAD(&linfo->inv_list);
--- a/kmod/src/net.c
+++ b/kmod/src/net.c
@@ -549,12 +549,16 @@ static int recvmsg_full(struct socket *sock, void *buf, unsigned len)

 	while (len) {
 		memset(&msg, 0, sizeof(msg));
-		msg.msg_iov = (struct iovec *)&kv;
-		msg.msg_iovlen = 1;
 		msg.msg_flags = MSG_NOSIGNAL;
 		kv.iov_base = buf;
 		kv.iov_len = len;

+#ifndef KC_MSGHDR_STRUCT_IOV_ITER
+		msg.msg_iov = (struct iovec *)&kv;
+		msg.msg_iovlen = 1;
+#else
+		iov_iter_init(&msg.msg_iter, READ, (struct iovec *)&kv, len, 1);
+#endif
 		ret = kernel_recvmsg(sock, &msg, &kv, 1, len, msg.msg_flags);
 		if (ret <= 0)
 			return -ECONNABORTED;
@@ -707,12 +711,16 @@ static int sendmsg_full(struct socket *sock, void *buf, unsigned len)

 	while (len) {
 		memset(&msg, 0, sizeof(msg));
-		msg.msg_iov = (struct iovec *)&kv;
-		msg.msg_iovlen = 1;
 		msg.msg_flags = MSG_NOSIGNAL;
 		kv.iov_base = buf;
 		kv.iov_len = len;

+#ifndef KC_MSGHDR_STRUCT_IOV_ITER
+		msg.msg_iov = (struct iovec *)&kv;
+		msg.msg_iovlen = 1;
+#else
+		iov_iter_init(&msg.msg_iter, WRITE, (struct iovec *)&kv, len, 1);
+#endif
 		ret = kernel_sendmsg(sock, &msg, &kv, 1, len);
 		if (ret <= 0)
 			return -ECONNABORTED;
@@ -897,7 +905,6 @@ static int sock_opts_and_names(struct scoutfs_net_connection *conn,
 			       struct socket *sock)
 {
 	struct timeval tv;
-	int addrlen;
 	int optval;
 	int ret;

@@ -947,23 +954,18 @@ static int sock_opts_and_names(struct scoutfs_net_connection *conn,
 	if (ret)
 		goto out;

-	addrlen = sizeof(struct sockaddr_in);
-	ret = kernel_getsockname(sock, (struct sockaddr *)&conn->sockname,
-				 &addrlen);
-	if (ret == 0 && addrlen != sizeof(struct sockaddr_in))
-		ret = -EAFNOSUPPORT;
-	if (ret)
+	ret = kc_kernel_getsockname(sock, (struct sockaddr *)&conn->sockname);
+	if (ret < 0)
 		goto out;

-	addrlen = sizeof(struct sockaddr_in);
-	ret = kernel_getpeername(sock, (struct sockaddr *)&conn->peername,
-				 &addrlen);
-	if (ret == 0 && addrlen != sizeof(struct sockaddr_in))
-		ret = -EAFNOSUPPORT;
-	if (ret)
+	ret = kc_kernel_getpeername(sock, (struct sockaddr *)&conn->peername);
+	if (ret < 0)
 		goto out;

+	ret = 0;
+
 	conn->last_peername = conn->peername;
+
 out:
 	return ret;
 }
@@ -1052,7 +1054,7 @@ static void scoutfs_net_connect_worker(struct work_struct *work)

 	trace_scoutfs_net_connect_work_enter(sb, 0, 0);

-	ret = sock_create_kern(AF_INET, SOCK_STREAM, IPPROTO_TCP, &sock);
+	ret = kc_sock_create_kern(AF_INET, SOCK_STREAM, IPPROTO_TCP, &sock);
 	if (ret)
 		goto out;

@@ -1453,7 +1455,7 @@ int scoutfs_net_bind(struct super_block *sb,
 	if (WARN_ON_ONCE(conn->sock))
 		return -EINVAL;

-	ret = sock_create_kern(AF_INET, SOCK_STREAM, IPPROTO_TCP, &sock);
+	ret = kc_sock_create_kern(AF_INET, SOCK_STREAM, IPPROTO_TCP, &sock);
 	if (ret)
 		goto out;

@@ -1471,20 +1473,18 @@ int scoutfs_net_bind(struct super_block *sb,
 		goto out;

 	ret = kernel_listen(sock, 255);
-	if (ret)
+	if (ret < 0)
 		goto out;

-	addrlen = sizeof(struct sockaddr_in);
-	ret = kernel_getsockname(sock, (struct sockaddr *)&conn->sockname,
-				 &addrlen);
-	if (ret == 0 && addrlen != sizeof(struct sockaddr_in))
-		ret = -EAFNOSUPPORT;
-	if (ret)
+	ret = kc_kernel_getsockname(sock, (struct sockaddr *)&conn->sockname);
+	if (ret < 0)
 		goto out;

+	ret = 0;
+
 	conn->sock = sock;
 	*sin = conn->sockname;
-	ret = 0;
+
 out:
 	if (ret < 0 && sock)
 		sock_release(sock);
--- a/kmod/src/options.c
+++ b/kmod/src/options.c
@@ -131,10 +131,8 @@ static void init_default_options(struct scoutfs_mount_options *opts)
 	opts->quorum_slot_nr = -1;
 }

-static int set_quorum_heartbeat_timeout_ms(struct super_block *sb, int ret, u64 val)
+static int verify_quorum_heartbeat_timeout_ms(struct super_block *sb, int ret, u64 val)
 {
-	DECLARE_OPTIONS_INFO(sb, optinf);
-
 	if (ret < 0) {
 		scoutfs_err(sb, "failed to parse quorum_heartbeat_timeout_ms value");
 		return -EINVAL;
@@ -145,10 +143,6 @@ static int set_quorum_heartbeat_timeout_ms(struct super_block *sb, int ret, u64
 		return -EINVAL;
 	}

-	write_seqlock(&optinf->seqlock);
-	optinf->opts.quorum_heartbeat_timeout_ms = val;
-	write_sequnlock(&optinf->seqlock);
-
 	return 0;
 }

@@ -175,7 +169,7 @@ static int parse_options(struct super_block *sb, char *options, struct scoutfs_m
 		switch (token) {

 		case Opt_acl:
-			sb->s_flags |= MS_POSIXACL;
+			sb->s_flags |= SB_POSIXACL;
 			break;

 		case Opt_data_prealloc_blocks:
@@ -209,7 +203,7 @@ static int parse_options(struct super_block *sb, char *options, struct scoutfs_m
 			break;

 		case Opt_noacl:
-			sb->s_flags &= ~MS_POSIXACL;
+			sb->s_flags &= ~SB_POSIXACL;
 			break;

 		case Opt_orphan_scan_delay_ms:
@@ -232,9 +226,10 @@ static int parse_options(struct super_block *sb, char *options, struct scoutfs_m

 		case Opt_quorum_heartbeat_timeout_ms:
 			ret = match_u64(args, &nr64);
-			ret = set_quorum_heartbeat_timeout_ms(sb, ret, nr64);
+			ret = verify_quorum_heartbeat_timeout_ms(sb, ret, nr64);
 			if (ret < 0)
 				return ret;
+			opts->quorum_heartbeat_timeout_ms = nr64;
 			break;

 		case Opt_quorum_slot_nr:
@@ -332,7 +327,7 @@ int scoutfs_options_show(struct seq_file *seq, struct dentry *root)
 {
 	struct super_block *sb = root->d_sb;
 	struct scoutfs_mount_options opts;
-	const bool is_acl = !!(sb->s_flags & MS_POSIXACL);
+	const bool is_acl = !!(sb->s_flags & SB_POSIXACL);

 	scoutfs_options_read(sb, &opts);

@@ -493,6 +488,7 @@ static ssize_t quorum_heartbeat_timeout_ms_store(struct kobject *kobj, struct ko
 						 const char *buf, size_t count)
 {
 	struct super_block *sb = SCOUTFS_SYSFS_ATTRS_SB(kobj);
+	DECLARE_OPTIONS_INFO(sb, optinf);
 	char nullterm[30]; /* more than enough for octal -U64_MAX */
 	u64 val;
 	int len;
@@ -503,9 +499,13 @@ static ssize_t quorum_heartbeat_timeout_ms_store(struct kobject *kobj, struct ko
 	nullterm[len] = '\0';

 	ret = kstrtoll(nullterm, 0, &val);
-	ret = set_quorum_heartbeat_timeout_ms(sb, ret, val);
-	if (ret == 0)
+	ret = verify_quorum_heartbeat_timeout_ms(sb, ret, val);
+	if (ret == 0) {
+		write_seqlock(&optinf->seqlock);
+		optinf->opts.quorum_heartbeat_timeout_ms = val;
+		write_sequnlock(&optinf->seqlock);
 		ret = count;
+	}

 	return ret;
 }
--- a/kmod/src/quorum.c
+++ b/kmod/src/quorum.c
@@ -183,7 +183,7 @@ static int create_socket(struct super_block *sb)
 	int addrlen;
 	int ret;

-	ret = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock);
+	ret = kc_sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock);
 	if (ret) {
 		scoutfs_err(sb, "quorum couldn't create udp socket: %d", ret);
 		goto out;
@@ -243,8 +243,10 @@ static int send_msg_members(struct super_block *sb, int type, u64 term, int only
 	};
 	struct sockaddr_in sin;
 	struct msghdr mh = {
+#ifndef KC_MSGHDR_STRUCT_IOV_ITER
 		.msg_iov = (struct iovec *)&kv,
 		.msg_iovlen = 1,
+#endif
 		.msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL,
 		.msg_name = &sin,
 		.msg_namelen = sizeof(sin),
@@ -266,6 +268,9 @@ static int send_msg_members(struct super_block *sb, int type, u64 term, int only

 		scoutfs_quorum_slot_sin(&qinf->qconf, i, &sin);
 		now = ktime_get();
+#ifdef KC_MSGHDR_STRUCT_IOV_ITER
+		iov_iter_init(&mh.msg_iter, WRITE, (struct iovec *)&kv, sizeof(qmes), 1);
+#endif
 		ret = kernel_sendmsg(qinf->sock, &mh, &kv, 1, kv.iov_len);
 		if (ret != kv.iov_len)
 			failed++;
@@ -308,8 +313,10 @@ static int recv_msg(struct super_block *sb, struct quorum_host_msg *msg,
 		.iov_len = sizeof(struct scoutfs_quorum_message),
 	};
 	struct msghdr mh = {
+#ifndef KC_MSGHDR_STRUCT_IOV_ITER
 		.msg_iov = (struct iovec *)&kv,
 		.msg_iovlen = 1,
+#endif
 		.msg_flags = MSG_NOSIGNAL,
 	};

@@ -331,6 +338,9 @@ static int recv_msg(struct super_block *sb, struct quorum_host_msg *msg,
 			return ret;
 	}

+#ifdef KC_MSGHDR_STRUCT_IOV_ITER
+	iov_iter_init(&mh.msg_iter, READ, (struct iovec *)&kv, sizeof(struct scoutfs_quorum_message), 1);
+#endif
 	ret = kernel_recvmsg(qinf->sock, &mh, &kv, 1, kv.iov_len, mh.msg_flags);
 	if (ret < 0)
 		return ret;
@@ -719,11 +729,13 @@ static void scoutfs_quorum_worker(struct work_struct *work)
 	struct sockaddr_in unused;
 	struct quorum_host_msg msg;
 	struct quorum_status qst = {0,};
-	struct hb_recording hbr = {{0,},};
+	struct hb_recording hbr;
 	bool record_hb;
 	int ret;
 	int err;

+	memset(&hbr, 0, sizeof(struct hb_recording));
+
 	/* recording votes from slots as native single word bitmap */
 	BUILD_BUG_ON(SCOUTFS_QUORUM_MAX_SLOTS > BITS_PER_LONG);

@@ -771,8 +783,7 @@ static void scoutfs_quorum_worker(struct work_struct *work)
 			msg.type = SCOUTFS_QUORUM_MSG_INVALID;

 		trace_scoutfs_quorum_loop(sb, qst.role, qst.term, qst.vote_for,
-					  qst.vote_bits,
-					  ktime_to_timespec64(qst.timeout));
+					  qst.vote_bits, ktime_to_ns(qst.timeout));

 		/* receiving greater terms resets term, becomes follower */
 		if (msg.type != SCOUTFS_QUORUM_MSG_INVALID &&
--- a/kmod/src/scoutfs_trace.h
+++ b/kmod/src/scoutfs_trace.h
@@ -817,22 +817,17 @@ TRACE_EVENT(scoutfs_advance_dirty_super,
 	TP_printk(SCSBF" super seq now %llu", SCSB_TRACE_ARGS, __entry->seq)
 );

-TRACE_EVENT(scoutfs_dir_add_next_linkref,
+TRACE_EVENT(scoutfs_dir_add_next_linkref_found,
 	TP_PROTO(struct super_block *sb, __u64 ino, __u64 dir_ino,
-		 __u64 dir_pos, int ret, __u64 found_dir_ino,
-		 __u64 found_dir_pos, unsigned int name_len),
+		 __u64 dir_pos, unsigned int name_len),

-	TP_ARGS(sb, ino, dir_ino, dir_pos, ret, found_dir_pos, found_dir_ino,
-		name_len),
+	TP_ARGS(sb, ino, dir_ino, dir_pos, name_len),

 	TP_STRUCT__entry(
 		SCSB_TRACE_FIELDS
 		__field(__u64, ino)
 		__field(__u64, dir_ino)
 		__field(__u64, dir_pos)
-		__field(int, ret)
-		__field(__u64, found_dir_ino)
-		__field(__u64, found_dir_pos)
 		__field(unsigned int, name_len)
 	),

@@ -841,16 +836,43 @@ TRACE_EVENT(scoutfs_dir_add_next_linkref,
 		__entry->ino = ino;
 		__entry->dir_ino = dir_ino;
 		__entry->dir_pos = dir_pos;
-		__entry->ret = ret;
-		__entry->found_dir_ino = dir_ino;
-		__entry->found_dir_pos = dir_pos;
 		__entry->name_len = name_len;
 	),

-	TP_printk(SCSBF" ino %llu dir_ino %llu dir_pos %llu ret %d found_dir_ino %llu found_dir_pos %llu name_len %u",
-		  SCSB_TRACE_ARGS, __entry->ino, __entry->dir_pos,
-		  __entry->dir_ino, __entry->ret, __entry->found_dir_pos,
-		  __entry->found_dir_ino, __entry->name_len)
+	TP_printk(SCSBF" ino %llu dir_ino %llu dir_pos %llu name_len %u",
+		  SCSB_TRACE_ARGS, __entry->ino, __entry->dir_ino,
+		  __entry->dir_pos, __entry->name_len)
+);
+
+TRACE_EVENT(scoutfs_dir_add_next_linkrefs,
+	TP_PROTO(struct super_block *sb, __u64 ino, __u64 dir_ino,
+		 __u64 dir_pos, int count, int nr, int ret),
+
+	TP_ARGS(sb, ino, dir_ino, dir_pos, count, nr, ret),
+
+	TP_STRUCT__entry(
+		SCSB_TRACE_FIELDS
+		__field(__u64, ino)
+		__field(__u64, dir_ino)
+		__field(__u64, dir_pos)
+		__field(int, count)
+		__field(int, nr)
+		__field(int, ret)
+	),
+
+	TP_fast_assign(
+		SCSB_TRACE_ASSIGN(sb);
+		__entry->ino = ino;
+		__entry->dir_ino = dir_ino;
+		__entry->dir_pos = dir_pos;
+		__entry->count = count;
+		__entry->nr = nr;
+		__entry->ret = ret;
+	),
+
+	TP_printk(SCSBF" ino %llu dir_ino %llu dir_pos %llu count %d nr %d ret %d",
+		  SCSB_TRACE_ARGS, __entry->ino, __entry->dir_ino,
+		  __entry->dir_pos, __entry->count, __entry->nr, __entry->ret)
 );

 TRACE_EVENT(scoutfs_write_begin,
@@ -1874,8 +1896,9 @@ DEFINE_EVENT(scoutfs_server_client_count_class, scoutfs_server_client_down,

 DECLARE_EVENT_CLASS(scoutfs_server_commit_users_class,
        TP_PROTO(struct super_block *sb, int holding, int applying, int nr_holders,
-		 u32 avail_before, u32 freed_before, int exceeded),
-        TP_ARGS(sb, holding, applying, nr_holders, avail_before, freed_before, exceeded),
+		 u32 avail_before, u32 freed_before, int committing, int exceeded),
+        TP_ARGS(sb, holding, applying, nr_holders, avail_before, freed_before, committing,
+		exceeded),
        TP_STRUCT__entry(
 		SCSB_TRACE_FIELDS
 		__field(int, holding)
@@ -1883,6 +1906,7 @@ DECLARE_EVENT_CLASS(scoutfs_server_commit_users_class,
 		__field(int, nr_holders)
 		__field(__u32, avail_before)
 		__field(__u32, freed_before)
+		__field(int, committing)
 		__field(int, exceeded)
        ),
        TP_fast_assign(
@@ -1892,31 +1916,33 @@ DECLARE_EVENT_CLASS(scoutfs_server_commit_users_class,
 		__entry->nr_holders = nr_holders;
 		__entry->avail_before = avail_before;
 		__entry->freed_before = freed_before;
+		__entry->committing = !!committing;
 		__entry->exceeded = !!exceeded;
        ),
-	TP_printk(SCSBF" holding %u applying %u nr %u avail_before %u freed_before %u exceeded %u",
+	TP_printk(SCSBF" holding %u applying %u nr %u avail_before %u freed_before %u committing %u exceeded %u",
 		  SCSB_TRACE_ARGS, __entry->holding, __entry->applying, __entry->nr_holders,
-		  __entry->avail_before, __entry->freed_before, __entry->exceeded)
+		  __entry->avail_before, __entry->freed_before, __entry->committing,
+		  __entry->exceeded)
 );
 DEFINE_EVENT(scoutfs_server_commit_users_class, scoutfs_server_commit_hold,
        TP_PROTO(struct super_block *sb, int holding, int applying, int nr_holders,
-		 u32 avail_before, u32 freed_before, int exceeded),
-        TP_ARGS(sb, holding, applying, nr_holders, avail_before, freed_before, exceeded)
+		 u32 avail_before, u32 freed_before, int committing, int exceeded),
+        TP_ARGS(sb, holding, applying, nr_holders, avail_before, freed_before, committing, exceeded)
 );
 DEFINE_EVENT(scoutfs_server_commit_users_class, scoutfs_server_commit_apply,
        TP_PROTO(struct super_block *sb, int holding, int applying, int nr_holders,
-		 u32 avail_before, u32 freed_before, int exceeded),
-        TP_ARGS(sb, holding, applying, nr_holders, avail_before, freed_before, exceeded)
+		 u32 avail_before, u32 freed_before, int committing, int exceeded),
+        TP_ARGS(sb, holding, applying, nr_holders, avail_before, freed_before, committing, exceeded)
 );
 DEFINE_EVENT(scoutfs_server_commit_users_class, scoutfs_server_commit_start,
        TP_PROTO(struct super_block *sb, int holding, int applying, int nr_holders,
-		 u32 avail_before, u32 freed_before, int exceeded),
-        TP_ARGS(sb, holding, applying, nr_holders, avail_before, freed_before, exceeded)
+		 u32 avail_before, u32 freed_before, int committing, int exceeded),
+        TP_ARGS(sb, holding, applying, nr_holders, avail_before, freed_before, committing, exceeded)
 );
 DEFINE_EVENT(scoutfs_server_commit_users_class, scoutfs_server_commit_end,
        TP_PROTO(struct super_block *sb, int holding, int applying, int nr_holders,
-		 u32 avail_before, u32 freed_before, int exceeded),
-        TP_ARGS(sb, holding, applying, nr_holders, avail_before, freed_before, exceeded)
+		 u32 avail_before, u32 freed_before, int committing, int exceeded),
+        TP_ARGS(sb, holding, applying, nr_holders, avail_before, freed_before, committing, exceeded)
 );

 #define slt_symbolic(mode)						\
@@ -1998,9 +2024,9 @@ DEFINE_EVENT(scoutfs_quorum_message_class, scoutfs_quorum_recv_message,

 TRACE_EVENT(scoutfs_quorum_loop,
 	TP_PROTO(struct super_block *sb, int role, u64 term, int vote_for,
-		 unsigned long vote_bits, struct timespec64 timeout),
+		 unsigned long vote_bits, unsigned long long nsecs),

-	TP_ARGS(sb, role, term, vote_for, vote_bits, timeout),
+	TP_ARGS(sb, role, term, vote_for, vote_bits, nsecs),

 	TP_STRUCT__entry(
 		SCSB_TRACE_FIELDS
@@ -2009,8 +2035,7 @@ TRACE_EVENT(scoutfs_quorum_loop,
 		__field(int, vote_for)
 		__field(unsigned long, vote_bits)
 		__field(unsigned long, vote_count)
-		__field(unsigned long long, timeout_sec)
-		__field(int, timeout_nsec)
+		__field(unsigned long long, nsecs)
 	),

 	TP_fast_assign(
@@ -2020,14 +2045,13 @@ TRACE_EVENT(scoutfs_quorum_loop,
 		__entry->vote_for = vote_for;
 		__entry->vote_bits = vote_bits;
 		__entry->vote_count = hweight_long(vote_bits);
-		__entry->timeout_sec = timeout.tv_sec;
-		__entry->timeout_nsec = timeout.tv_nsec;
+		__entry->nsecs = nsecs;
 	),

-	TP_printk(SCSBF" term %llu role %d vote_for %d vote_bits 0x%lx vote_count %lu timeout %llu.%u",
+	TP_printk(SCSBF" term %llu role %d vote_for %d vote_bits 0x%lx vote_count %lu timeout %llu",
 		  SCSB_TRACE_ARGS, __entry->term, __entry->role,
 		  __entry->vote_for, __entry->vote_bits, __entry->vote_count,
-		  __entry->timeout_sec, __entry->timeout_nsec)
+		  __entry->nsecs)
 );

 TRACE_EVENT(scoutfs_trans_seq_last,
--- a/kmod/src/server.c
+++ b/kmod/src/server.c
@@ -67,6 +67,7 @@ struct commit_users {
 	unsigned int nr_holders;
 	u32 avail_before;
 	u32 freed_before;
+	bool committing;
 	bool exceeded;
 };

@@ -84,7 +85,7 @@ do {												\
 	__typeof__(cusers) _cusers = (cusers);							\
 	trace_scoutfs_server_commit_##which(sb, !list_empty(&_cusers->holding),			\
 		!list_empty(&_cusers->applying), _cusers->nr_holders, _cusers->avail_before,	\
-		_cusers->freed_before, _cusers->exceeded);					\
+		_cusers->freed_before, _cusers->committing, _cusers->exceeded);			\
 } while (0)

 struct server_info {
@@ -282,6 +283,14 @@ struct commit_hold {
 * per-holder allocation consumption tracking.   The best we can do is
 * flag all the current holders so that as they release we can see
 * everyone involved in crossing the limit.
+ *
+ * The consumption of space to record freed blocks is tricky.  The
+ * freed_before value was the space available as the holder started.
+ * But that happens before we actually dirty the first block in the
+ * freed list.  If that block is too full then we just allocate a new
+ * empty first block.  In that case the current remaining here can be a
+ * lot more than the initial freed_before.  We account for that and
+ * treat freed_before as the maximum capacity.
 */
 static void check_holder_budget(struct super_block *sb, struct server_info *server,
 				struct commit_users *cusers)
@@ -301,8 +310,13 @@ static void check_holder_budget(struct super_block *sb, struct server_info *serv
 		return;

 	scoutfs_alloc_meta_remaining(&server->alloc, &avail_now, &freed_now);
+
 	avail_used = cusers->avail_before - avail_now;
-	freed_used = cusers->freed_before - freed_now;
+	if (freed_now < cusers->freed_before)
+		freed_used = cusers->freed_before - freed_now;
+	else
+		freed_used = SCOUTFS_ALLOC_LIST_MAX_BLOCKS - freed_now;
+
 	budget = cusers->nr_holders * COMMIT_HOLD_ALLOC_BUDGET;
 	if (avail_used <= budget && freed_used <= budget)
 		return;
@@ -325,31 +339,18 @@ static void check_holder_budget(struct super_block *sb, struct server_info *serv
 /*
 * We don't have per-holder consumption.   We allow commit holders as
 * long as the total budget of all the holders doesn't exceed the alloc
- * resources that were available
+ * resources that were available.  If a hold is waiting for budget
+ * availability in the allocators then we try and kick off a commit to
+ * fill and use the next allocators after the current transaction.
 */
-static bool commit_alloc_has_room(struct server_info *server, struct commit_users *cusers,
-				  unsigned int more_holders)
-{
-	u32 avail_before;
-	u32 freed_before;
-	u32 budget;
-
-	if (cusers->nr_holders > 0) {
-		avail_before = cusers->avail_before;
-		freed_before = cusers->freed_before;
-	} else {
-		scoutfs_alloc_meta_remaining(&server->alloc, &avail_before, &freed_before);
-	}
-
-	budget = (cusers->nr_holders + more_holders) * COMMIT_HOLD_ALLOC_BUDGET;
-
-	return avail_before >= budget && freed_before >= budget;
-}
-
 static bool hold_commit(struct super_block *sb, struct server_info *server,
 			struct commit_users *cusers, struct commit_hold *hold)
 {
-	bool held = false;
+	bool has_room;
+	bool held;
+	u32 budget;
+	u32 av;
+	u32 fr;

 	spin_lock(&cusers->lock);

@@ -357,19 +358,39 @@ static bool hold_commit(struct super_block *sb, struct server_info *server,

 	check_holder_budget(sb, server, cusers);

+	if (cusers->nr_holders == 0) {
+		scoutfs_alloc_meta_remaining(&server->alloc, &av, &fr);
+	} else {
+		av = cusers->avail_before;
+		fr = cusers->freed_before;
+	}
+
 	/* +2 for our additional hold and then for the final commit work the server does */
-	if (list_empty(&cusers->applying) && commit_alloc_has_room(server, cusers, 2)) {
-		scoutfs_alloc_meta_remaining(&server->alloc, &hold->avail, &hold->freed);
+	budget = (cusers->nr_holders + 2) * COMMIT_HOLD_ALLOC_BUDGET;
+	has_room = av >= budget && fr >= budget;
+	/* checking applying so holders drain once an apply caller starts waiting */
+	held = !cusers->committing && has_room && list_empty(&cusers->applying);
+
+	if (held) {
 		if (cusers->nr_holders == 0) {
-			cusers->avail_before = hold->avail;
-			cusers->freed_before = hold->freed;
+			cusers->avail_before = av;
+			cusers->freed_before = fr;
+			hold->avail = av;
+			hold->freed = fr;
 			cusers->exceeded = false;
+		} else {
+			scoutfs_alloc_meta_remaining(&server->alloc, &hold->avail, &hold->freed);
 		}
+
 		hold->exceeded = false;
 		hold->start = ktime_get();
 		list_add_tail(&hold->entry, &cusers->holding);
+
 		cusers->nr_holders++;
-		held = true;
+
+	} else if (!has_room && cusers->nr_holders == 0 && !cusers->committing) {
+		cusers->committing = true;
+		queue_work(server->wq, &server->commit_work);
 	}

 	spin_unlock(&cusers->lock);
@@ -403,7 +424,6 @@ static int server_apply_commit(struct super_block *sb, struct commit_hold *hold,
 	DECLARE_SERVER_INFO(sb, server);
 	struct commit_users *cusers = &server->cusers;
 	struct timespec ts;
-	bool start_commit;

 	spin_lock(&cusers->lock);

@@ -424,12 +444,14 @@ static int server_apply_commit(struct super_block *sb, struct commit_hold *hold,
 		list_del_init(&hold->entry);
 		hold->ret = err;
 	}
-	cusers->nr_holders--;
-	start_commit = cusers->nr_holders == 0 && !list_empty(&cusers->applying);
-	spin_unlock(&cusers->lock);

-	if (start_commit)
+	cusers->nr_holders--;
+	if (cusers->nr_holders == 0 && !cusers->committing && !list_empty(&cusers->applying)) {
+		cusers->committing = true;
 		queue_work(server->wq, &server->commit_work);
+	}
+
+	spin_unlock(&cusers->lock);

 	wait_event(cusers->waitq, list_empty_careful(&hold->entry));
 	smp_rmb(); /* entry load before ret */
@@ -438,8 +460,8 @@ static int server_apply_commit(struct super_block *sb, struct commit_hold *hold,

 /*
 * Start a commit from the commit work.  We should only have been queued
- * while a holder is waiting to apply after all active holders have
- * finished.
+ * while there are no active holders and someone started the commit.
+ * There may or may not be blocked apply callers waiting for the result.
 */
 static int commit_start(struct super_block *sb, struct commit_users *cusers)
 {
@@ -448,7 +470,7 @@ static int commit_start(struct super_block *sb, struct commit_users *cusers)
 	/* make sure holders held off once commit started */
 	spin_lock(&cusers->lock);
 	TRACE_COMMIT_USERS(sb, cusers, start);
-	if (WARN_ON_ONCE(list_empty(&cusers->applying) || cusers->nr_holders != 0))
+	if (WARN_ON_ONCE(!cusers->committing || cusers->nr_holders != 0))
 		ret = -EINVAL;
 	spin_unlock(&cusers->lock);

@@ -471,6 +493,7 @@ static void commit_end(struct super_block *sb, struct commit_users *cusers, int
 	smp_wmb(); /* ret stores before list updates */
 	list_for_each_entry_safe(hold, tmp, &cusers->applying, entry)
 		list_del_init(&hold->entry);
+	cusers->committing = false;
 	spin_unlock(&cusers->lock);

 	wake_up(&cusers->waitq);
@@ -543,7 +566,7 @@ static void set_stable_super(struct server_info *server, struct scoutfs_super_bl
 * implement commits with a single pending work func.
 *
 * Processing paths hold the commit while they're making multiple
- * dependent changes.  When they're done and want it persistent they add
+ * dependent changes.  When they're done and want it persistent they
 * queue the commit work.  This work runs, performs the commit, and
 * wakes all the applying waiters with the result.  Readers can run
 * concurrently with these commits.
@@ -2058,6 +2081,13 @@ out:
 * reset the next range key if there's still work to do.  If the
 * operation is complete then we tear down the input log_trees items and
 * delete the status.
+ *
+ * Processing all the completions can take more than one transaction.
+ * We return -EINPROGRESS if we have to commit a transaction and the
+ * caller will apply the commit and immediate call back in so we can
+ * perform another commit.  We need to be very careful to leave the
+ * status in a state where requests won't be issued at the wrong time
+ * (by forcing nr_completions to a batch while we delete them).
 */
 static int splice_log_merge_completions(struct super_block *sb,
 					struct scoutfs_log_merge_status *stat,
@@ -2070,15 +2100,29 @@ static int splice_log_merge_completions(struct super_block *sb,
 	struct scoutfs_log_merge_range rng;
 	struct scoutfs_log_trees lt = {{{0,}}};
 	SCOUTFS_BTREE_ITEM_REF(iref);
+	bool upd_stat = true;
+	int einprogress = 0;
 	struct scoutfs_key key;
 	char *err_str = NULL;
+	u32 alloc_low;
+	u32 tmp;
 	u64 seq;
 	int ret;
+	int err;

 	/* musn't rebalance fs tree parents while reqs rely on their key bounds */
 	if (WARN_ON_ONCE(le64_to_cpu(stat->nr_requests) > 0))
 		return -EIO;

+	/*
+	 * Be overly conservative about how low the allocator can get
+	 * before we commit.  This gives us a lot of work to do in a
+	 * commit while also allowing a pretty big smallest allocator to
+	 * work with the theoretically unbounded alloc list splicing.
+	 */
+	scoutfs_alloc_meta_remaining(&server->alloc, &alloc_low, &tmp);
+	alloc_low = min(alloc_low, tmp) / 4;
+
 	/*
 	 * Splice in all the completed subtrees at the initial parent
 	 * blocks in the main fs_tree before rebalancing any of them.
@@ -2100,6 +2144,22 @@ static int splice_log_merge_completions(struct super_block *sb,

 		seq = le64_to_cpu(comp.seq);

+		/*
+		 * Use having cleared the lists as an indication that
+		 * we've already set the parents and don't need to dirty
+		 * the btree blocks to do it all over again.  This is
+		 * safe because there is always an fs block that the
+		 * merge dirties and frees into the meta_freed list.
+		 */
+		if (comp.meta_avail.ref.blkno == 0 && comp.meta_freed.ref.blkno == 0)
+			continue;
+
+		if (scoutfs_alloc_meta_low(sb, &server->alloc, alloc_low)) {
+			einprogress = -EINPROGRESS;
+			ret = 0;
+			goto out;
+		}
+
 		ret = scoutfs_btree_set_parent(sb, &server->alloc, &server->wri,
 					       &super->fs_root, &comp.start,
 					       &comp.root);
@@ -2134,6 +2194,14 @@ static int splice_log_merge_completions(struct super_block *sb,
 		}
 	}

+	/*
+	 * Once we start rebalancing we force the number of completions
+	 * to a batch so that requests won't be issued.  Once we're done
+	 * we clear the completion count and requests can flow again.
+	 */
+	if (le64_to_cpu(stat->nr_complete) < LOG_MERGE_SPLICE_BATCH)
+		stat->nr_complete = cpu_to_le64(LOG_MERGE_SPLICE_BATCH);
+
 	/*
 	 * Now with all the parent blocks spliced in, rebalance items
 	 * amongst parents that needed to split/join and delete the
@@ -2155,6 +2223,12 @@ static int splice_log_merge_completions(struct super_block *sb,

 		seq = le64_to_cpu(comp.seq);

+		if (scoutfs_alloc_meta_low(sb, &server->alloc, alloc_low)) {
+			einprogress = -EINPROGRESS;
+			ret = 0;
+			goto out;
+		}
+
 		/* balance when there was a remaining key range */
 		if (le64_to_cpu(comp.flags) & SCOUTFS_LOG_MERGE_COMP_REMAIN) {
 			ret = scoutfs_btree_rebalance(sb, &server->alloc,
@@ -2194,18 +2268,11 @@ static int splice_log_merge_completions(struct super_block *sb,
 		}
 	}

-	/* update the status once all completes are processed */
-	scoutfs_key_set_zeros(&stat->next_range_key);
-	stat->nr_complete = 0;
-
 	/* update counts and done if there's still ranges to process */
 	if (!no_ranges) {
-		init_log_merge_key(&key, SCOUTFS_LOG_MERGE_STATUS_ZONE, 0, 0);
-		ret = scoutfs_btree_update(sb, &server->alloc, &server->wri,
-					   &super->log_merge, &key,
-					   stat, sizeof(*stat));
-		if (ret < 0)
-			err_str = "update status";
+		scoutfs_key_set_zeros(&stat->next_range_key);
+		stat->nr_complete = 0;
+		ret = 0;
 		goto out;
 	}

@@ -2241,6 +2308,12 @@ static int splice_log_merge_completions(struct super_block *sb,
 		      (le64_to_cpu(lt.finalize_seq) < le64_to_cpu(stat->seq))))
 			continue;

+		if (scoutfs_alloc_meta_low(sb, &server->alloc, alloc_low)) {
+			einprogress = -EINPROGRESS;
+			ret = 0;
+			goto out;
+		}
+
 		fr.root = lt.item_root;
 		scoutfs_key_set_zeros(&fr.key);
 		fr.seq = cpu_to_le64(scoutfs_server_next_seq(sb));
@@ -2274,9 +2347,10 @@ static int splice_log_merge_completions(struct super_block *sb,
 		}

 		le64_add_cpu(&super->inode_count, le64_to_cpu(lt.inode_count_delta));
-
 	}

+	/* everything's done, remove the merge operation */
+	upd_stat = false;
 	init_log_merge_key(&key, SCOUTFS_LOG_MERGE_STATUS_ZONE, 0, 0);
 	ret = scoutfs_btree_delete(sb, &server->alloc, &server->wri,
 				   &super->log_merge, &key);
@@ -2285,12 +2359,23 @@ static int splice_log_merge_completions(struct super_block *sb,
 	else
 		err_str = "deleting merge status item";
 out:
+	if (upd_stat) {
+		init_log_merge_key(&key, SCOUTFS_LOG_MERGE_STATUS_ZONE, 0, 0);
+		err = scoutfs_btree_update(sb, &server->alloc, &server->wri,
+					   &super->log_merge, &key,
+					   stat, sizeof(struct scoutfs_log_merge_status));
+		if (err && !ret) {
+			err_str = "updating merge status item";
+			ret = err;
+		}
+	}
+
 	if (ret < 0)
 		scoutfs_err(sb, "server error %d splicing log merge completion: %s", ret, err_str);

 	BUG_ON(ret); /* inconsistent */

-	return ret;
+	return ret ?: einprogress;
 }

 /*
@@ -2465,6 +2550,12 @@ static void server_log_merge_free_work(struct work_struct *work)
 }

 /*
+ * Clients regularly ask if there is log merge work to do.  We process
+ * completions inline before responding so that we don't create large
+ * delays between completion processing and the next request.  We don't
+ * mind if the client get_log_merge request sees high latency, the
+ * blocked caller has nothing else to do.
+ *
 * This will return ENOENT to the client if there is no work to do.
 */
 static int server_get_log_merge(struct super_block *sb,
@@ -2532,14 +2623,22 @@ restart:
 			goto out;
 		}

-		/* maybe splice now that we know if there's ranges */
+		/* splice if we have a batch or ran out of ranges */
 		no_next = ret == -ENOENT;
 		no_ranges = scoutfs_key_is_zeros(&stat.next_range_key) && ret == -ENOENT;
 		if (le64_to_cpu(stat.nr_requests) == 0 &&
 		    (no_next || le64_to_cpu(stat.nr_complete) >= LOG_MERGE_SPLICE_BATCH)) {
 			ret = splice_log_merge_completions(sb, &stat, no_ranges);
-			if (ret < 0)
+			if (ret == -EINPROGRESS) {
+				mutex_unlock(&server->logs_mutex);
+				ret = server_apply_commit(sb, &hold, 0);
+				if (ret < 0)
+					goto respond;
+				server_hold_commit(sb, &hold);
+				mutex_lock(&server->logs_mutex);
+			} else if (ret < 0) {
 				goto out;
+			}
 			/* splicing resets key and adds ranges, could finish status */
 			goto restart;
 		}
@@ -2741,6 +2840,7 @@ out:
 	mutex_unlock(&server->logs_mutex);
 	ret = server_apply_commit(sb, &hold, ret);

+respond:
 	return scoutfs_net_response(sb, conn, cmd, id, ret, &req, sizeof(req));
 }

@@ -4364,7 +4464,7 @@ void scoutfs_server_stop_wait(struct super_block *sb)
 	DECLARE_SERVER_INFO(sb, server);

 	stop_server(server);
-	flush_work_sync(&server->work);
+	flush_work(&server->work);
 }

 int scoutfs_server_setup(struct super_block *sb)
--- a/kmod/src/srch.c
+++ b/kmod/src/srch.c
@@ -1747,7 +1747,7 @@ static int compact_logs(struct super_block *sb,
 				goto out;
 			}
 			page->private = 0;
-			list_add_tail(&page->list, &pages);
+			list_add_tail(&page->lru, &pages);
 			nr_pages++;
 			scoutfs_inc_counter(sb, srch_compact_log_page);
 		}
@@ -1800,7 +1800,7 @@ static int compact_logs(struct super_block *sb,

 	/* sort page entries and reset private for _next */
 	i = 0;
-	list_for_each_entry(page, &pages, list) {
+	list_for_each_entry(page, &pages, lru) {
 		args[i++] = page;

 		if (atomic_read(&srinf->shutdown)) {
@@ -1821,7 +1821,7 @@ static int compact_logs(struct super_block *sb,
 		goto out;

 	/* make sure we finished all the pages */
-	list_for_each_entry(page, &pages, list) {
+	list_for_each_entry(page, &pages, lru) {
 		sre = page_priv_sre(page);
 		if (page->private < SRES_PER_PAGE && sre->ino != 0) {
 			ret = -ENOSPC;
@@ -1834,8 +1834,8 @@ static int compact_logs(struct super_block *sb,
 out:
 	scoutfs_block_put(sb, bl);
 	vfree(args);
-	list_for_each_entry_safe(page, tmp, &pages, list) {
-		list_del(&page->list);
+	list_for_each_entry_safe(page, tmp, &pages, lru) {
+		list_del(&page->lru);
 		__free_page(page);
 	}

--- a/kmod/src/super.c
+++ b/kmod/src/super.c
@@ -13,6 +13,7 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/fs.h>
+#include <linux/blkdev.h>
 #include <linux/slab.h>
 #include <linux/pagemap.h>
 #include <linux/magic.h>
@@ -178,7 +179,7 @@ static void scoutfs_put_super(struct super_block *sb)
 	/*
 	 * Wait for invalidation and iput to finish with any lingering
 	 * inode references that escaped the evict_inodes in
-	 * generic_shutdown_super.  MS_ACTIVE is clear so final iput
+	 * generic_shutdown_super.  SB_ACTIVE is clear so final iput
 	 * will always evict.
 	 */
 	scoutfs_lock_flush_invalidate(sb);
@@ -485,7 +486,7 @@ static int scoutfs_fill_super(struct super_block *sb, void *data, int silent)
 	sb->s_d_op = &scoutfs_dentry_ops;
 	sb->s_export_op = &scoutfs_export_ops;
 	sb->s_xattr = scoutfs_xattr_handlers;
-	sb->s_flags |= MS_I_VERSION | MS_POSIXACL;
+	sb->s_flags |= SB_I_VERSION | SB_POSIXACL;
 	sb->s_time_gran = 1;

 	/* btree blocks use long lived bh->b_data refs */
@@ -674,14 +675,14 @@ out:
 		teardown_module();
 	return ret;
 }
-module_init(scoutfs_module_init)
+module_init(scoutfs_module_init);

 static void __exit scoutfs_module_exit(void)
 {
 	unregister_filesystem(&scoutfs_fs_type);
 	teardown_module();
 }
-module_exit(scoutfs_module_exit)
+module_exit(scoutfs_module_exit);

 MODULE_AUTHOR("Zach Brown <zab@versity.com>");
 MODULE_LICENSE("GPL");
--- a/kmod/src/tseq.c
+++ b/kmod/src/tseq.c
@@ -46,6 +46,23 @@ static struct scoutfs_tseq_entry *tseq_rb_next(struct scoutfs_tseq_entry *ent)
 	return rb_entry(node, struct scoutfs_tseq_entry, node);
 }

+#ifdef KC_RB_TREE_AUGMENTED_COMPUTE_MAX
+static bool tseq_compute_total(struct scoutfs_tseq_entry *ent, bool exit)
+{
+	loff_t total = 1 + tseq_node_total(ent->node.rb_left) +
+		       tseq_node_total(ent->node.rb_right);
+
+	if (exit && ent->total == total)
+		return true;
+
+	ent->total = total;
+	return false;
+}
+
+RB_DECLARE_CALLBACKS(static, tseq_rb_callbacks, struct scoutfs_tseq_entry,
+		     node, total, tseq_compute_total);
+#else
+
 static loff_t tseq_compute_total(struct scoutfs_tseq_entry *ent)
 {
 	return 1 + tseq_node_total(ent->node.rb_left) +
@@ -53,7 +70,8 @@ static loff_t tseq_compute_total(struct scoutfs_tseq_entry *ent)
 }

 RB_DECLARE_CALLBACKS(static, tseq_rb_callbacks, struct scoutfs_tseq_entry,
-		     node, loff_t, total, tseq_compute_total)
+		     node, loff_t, total, tseq_compute_total);
+#endif

 void scoutfs_tseq_tree_init(struct scoutfs_tseq_tree *tree,
 			    scoutfs_tseq_show_t show)
--- a/kmod/src/util.h
+++ b/kmod/src/util.h
@@ -17,4 +17,15 @@ static inline void down_write_two(struct rw_semaphore *a,
 	down_write_nested(b, SINGLE_DEPTH_NESTING);
 }

+/*
+ * When returning shrinker counts from scan_objects, we should steer
+ * clear of the magic SHRINK_STOP and SHRINK_EMPTY values, which are near
+ * ~0UL values. Hence, we cap count to ~0L, which is arbitarily high
+ * enough to avoid it.
+ */
+static inline long shrinker_min_long(long count)
+{
+	return min(count, LONG_MAX);
+}
+
 #endif
--- a/kmod/src/xattr.c
+++ b/kmod/src/xattr.c
@@ -773,7 +773,7 @@ int scoutfs_xattr_set_locked(struct inode *inode, const char *name, size_t name_

 	/* XXX do these want i_mutex or anything? */
 	inode_inc_iversion(inode);
-	inode->i_ctime = CURRENT_TIME;
+	inode->i_ctime = current_time(inode);
 	ret = 0;

 out:
@@ -850,6 +850,7 @@ unlock:
 	return ret;
 }

+#ifndef KC_XATTR_STRUCT_XATTR_HANDLER
 /*
 * Future kernels have this amazing hack to rewind the name to get the
 * skipped prefix.  We're back in the stone ages without the handler
@@ -857,22 +858,41 @@ unlock:
 * compat hook to either call the kernel's xattr_full_name(handler), or
 * our hack to use the flags as the prefix length.
 */
-static const char *full_name_hack(void *handler, const char *name, int len)
+static const char *full_name_hack(const char *name, int len)
 {
 	return name - len;
 }
+#endif

-static int scoutfs_xattr_get_handler(struct dentry *dentry, const char *name,
-				     void *value, size_t size, int handler_flags)
+static int scoutfs_xattr_get_handler
+#ifdef KC_XATTR_STRUCT_XATTR_HANDLER
+		(const struct xattr_handler *handler, struct dentry *dentry,
+		 struct inode *inode, const char *name, void *value,
+		 size_t size)
 {
-	name = full_name_hack(NULL, name, handler_flags);
+	name = xattr_full_name(handler, name);
+#else
+		(struct dentry *dentry, const char *name,
+		 void *value, size_t size, int handler_flags)
+{
+	name = full_name_hack(name, handler_flags);
+#endif
 	return scoutfs_xattr_get(dentry, name, value, size);
 }

-static int scoutfs_xattr_set_handler(struct dentry *dentry, const char *name,
-				     const void *value, size_t size, int flags, int handler_flags)
+static int scoutfs_xattr_set_handler
+#ifdef KC_XATTR_STRUCT_XATTR_HANDLER
+		(const struct xattr_handler *handler, struct dentry *dentry,
+		 struct inode *inode, const char *name, const void *value,
+		 size_t size, int flags)
 {
-	name = full_name_hack(NULL, name, handler_flags);
+	name = xattr_full_name(handler, name);
+#else
+		(struct dentry *dentry, const char *name,
+		 const void *value, size_t size, int flags, int handler_flags)
+{
+	name = full_name_hack(name, handler_flags);
+#endif
 	return scoutfs_xattr_set(dentry, name, value, size, flags);
 }

@@ -905,14 +925,22 @@ static const struct xattr_handler scoutfs_xattr_security_handler = {
 };

 static const struct xattr_handler scoutfs_xattr_acl_access_handler = {
+#ifdef KC_XATTR_HANDLER_NAME
+	.name   = XATTR_NAME_POSIX_ACL_ACCESS,
+#else
 	.prefix = XATTR_NAME_POSIX_ACL_ACCESS,
+#endif
 	.flags  = ACL_TYPE_ACCESS,
 	.get    = scoutfs_acl_get_xattr,
 	.set    = scoutfs_acl_set_xattr,
 };

 static const struct xattr_handler scoutfs_xattr_acl_default_handler = {
+#ifdef KC_XATTR_HANDLER_NAME
+	.name   = XATTR_NAME_POSIX_ACL_DEFAULT,
+#else
 	.prefix = XATTR_NAME_POSIX_ACL_DEFAULT,
+#endif
 	.flags  = ACL_TYPE_DEFAULT,
 	.get    = scoutfs_acl_get_xattr,
 	.set    = scoutfs_acl_set_xattr,
--- a/tests/funcs/exec.sh
+++ b/tests/funcs/exec.sh
@@ -35,7 +35,7 @@ t_fail()
 t_quiet()
 {
 	echo "# $*" >> "$T_TMPDIR/quiet.log"
-	"$@" > "$T_TMPDIR/quiet.log" 2>&1 || \
+	"$@" >> "$T_TMPDIR/quiet.log" 2>&1 || \
 		t_fail "quiet command failed"
 }

--- a/tests/funcs/filter.sh
+++ b/tests/funcs/filter.sh
@@ -18,6 +18,7 @@ t_filter_dmesg()

 	# the kernel can just be noisy
 	re=" used greatest stack depth: "
+	re="$re|sched: RT throttling activated"

 	# mkfs/mount checks partition tables
 	re="$re|unknown partition table"
@@ -84,5 +85,11 @@ t_filter_dmesg()
 	re="$re|scoutfs .* error.*server failed to bind to.*"
 	re="$re|scoutfs .* critical transaction commit failure.*"

+	# change-devices causes loop device resizing
+	re="$re|loop[0-9].* detected capacity change from.*"
+
+	# ignore systemd-journal rotating
+	re="$re|systemd-journald.*"
+
 	egrep -v "($re)" 
 }
--- a/tests/funcs/fs.sh
+++ b/tests/funcs/fs.sh
@@ -153,7 +153,27 @@ t_mount()
 	test "$nr" -lt "$T_NR_MOUNTS" || \
 		t_fail "fs nr $nr invalid"

-	eval t_quiet mount -t scoutfs \$T_O$nr \$T_DB$nr \$T_M$nr
+	eval t_quiet mount -t scoutfs \$T_O$nr\$opt \$T_DB$nr \$T_M$nr
+}
+
+#
+# Mount with an optional mount option string.  If the string is empty
+# then the saved mount options are used.  If the string has contents
+# then it is appended to the end of the saved options with a separating
+# comma.
+#
+# Unlike t_mount this won't inherently fail in t_quiet, errors are
+# returned so bad options can be tested.
+#
+t_mount_opt()
+{
+	local nr="$1"
+	local opt="${2:+,$2}"
+
+	test "$nr" -lt "$T_NR_MOUNTS" || \
+		t_fail "fs nr $nr invalid"
+
+	eval mount -t scoutfs \$T_O$nr\$opt \$T_DB$nr \$T_M$nr
 }

 t_umount()
--- a/tests/golden/basic-posix-consistency
+++ b/tests/golden/basic-posix-consistency
@@ -47,7 +47,7 @@ four
 --- dir within dir
 --- overwrite file
 --- can't overwrite non-empty dir
-mv: cannot move ‘/mnt/test/test/basic-posix-consistency/dir/c/clobber’ to ‘/mnt/test/test/basic-posix-consistency/dir/a/dir’: Directory not empty
+mv: cannot move '/mnt/test/test/basic-posix-consistency/dir/c/clobber' to '/mnt/test/test/basic-posix-consistency/dir/a/dir': Directory not empty
 --- can overwrite empty dir
 --- can rename into root
 == path resoluion
--- a/tests/golden/data-prealloc
+++ b/tests/golden/data-prealloc
@@ -24,3 +24,307 @@
 /mnt/test/test/data-prealloc/file-2: 5 extents found
 /mnt/test/test/data-prealloc/file-1: 3 extents found
 /mnt/test/test/data-prealloc/file-2: 3 extents found
+== block writes into region allocs hole
+wrote blk 24
+wrote blk 32
+wrote blk 40
+wrote blk 55
+wrote blk 63
+wrote blk 71
+wrote blk 72
+wrote blk 79
+wrote blk 80
+wrote blk 87
+wrote blk 88
+wrote blk 95
+before:
+24.. 1: 
+32.. 1: 
+40.. 1: 
+55.. 1: 
+63.. 1: 
+71.. 2: 
+79.. 2: 
+87.. 2: 
+95.. 1: eof
+writing into existing 0 at pos 0
+wrote blk 0
+0.. 1: 
+1.. 7: unwritten
+24.. 1: 
+32.. 1: 
+40.. 1: 
+55.. 1: 
+63.. 1: 
+71.. 2: 
+79.. 2: 
+87.. 2: 
+95.. 1: eof
+writing into existing 0 at pos 1
+wrote blk 15
+0.. 1: 
+1.. 14: unwritten
+15.. 1: 
+24.. 1: 
+32.. 1: 
+40.. 1: 
+55.. 1: 
+63.. 1: 
+71.. 2: 
+79.. 2: 
+87.. 2: 
+95.. 1: eof
+writing into existing 0 at pos 2
+wrote blk 19
+0.. 1: 
+1.. 14: unwritten
+15.. 1: 
+16.. 3: unwritten
+19.. 1: 
+20.. 4: unwritten
+24.. 1: 
+32.. 1: 
+40.. 1: 
+55.. 1: 
+63.. 1: 
+71.. 2: 
+79.. 2: 
+87.. 2: 
+95.. 1: eof
+writing into existing 1 at pos 0
+wrote blk 25
+0.. 1: 
+1.. 14: unwritten
+15.. 1: 
+16.. 3: unwritten
+19.. 1: 
+20.. 4: unwritten
+24.. 1: 
+25.. 1: 
+26.. 6: unwritten
+32.. 1: 
+40.. 1: 
+55.. 1: 
+63.. 1: 
+71.. 2: 
+79.. 2: 
+87.. 2: 
+95.. 1: eof
+writing into existing 1 at pos 1
+wrote blk 39
+0.. 1: 
+1.. 14: unwritten
+15.. 1: 
+16.. 3: unwritten
+19.. 1: 
+20.. 4: unwritten
+24.. 1: 
+25.. 1: 
+26.. 6: unwritten
+32.. 1: 
+39.. 1: 
+40.. 1: 
+55.. 1: 
+63.. 1: 
+71.. 2: 
+79.. 2: 
+87.. 2: 
+95.. 1: eof
+writing into existing 1 at pos 2
+wrote blk 44
+0.. 1: 
+1.. 14: unwritten
+15.. 1: 
+16.. 3: unwritten
+19.. 1: 
+20.. 4: unwritten
+24.. 1: 
+25.. 1: 
+26.. 6: unwritten
+32.. 1: 
+39.. 1: 
+40.. 1: 
+44.. 1: 
+45.. 3: unwritten
+55.. 1: 
+63.. 1: 
+71.. 2: 
+79.. 2: 
+87.. 2: 
+95.. 1: eof
+writing into existing 2 at pos 0
+wrote blk 48
+0.. 1: 
+1.. 14: unwritten
+15.. 1: 
+16.. 3: unwritten
+19.. 1: 
+20.. 4: unwritten
+24.. 1: 
+25.. 1: 
+26.. 6: unwritten
+32.. 1: 
+39.. 1: 
+40.. 1: 
+44.. 1: 
+45.. 3: unwritten
+48.. 1: 
+49.. 6: unwritten
+55.. 1: 
+63.. 1: 
+71.. 2: 
+79.. 2: 
+87.. 2: 
+95.. 1: eof
+writing into existing 2 at pos 1
+wrote blk 62
+0.. 1: 
+1.. 14: unwritten
+15.. 1: 
+16.. 3: unwritten
+19.. 1: 
+20.. 4: unwritten
+24.. 1: 
+25.. 1: 
+26.. 6: unwritten
+32.. 1: 
+39.. 1: 
+40.. 1: 
+44.. 1: 
+45.. 3: unwritten
+48.. 1: 
+49.. 6: unwritten
+55.. 1: 
+56.. 6: unwritten
+62.. 1: 
+63.. 1: 
+71.. 2: 
+79.. 2: 
+87.. 2: 
+95.. 1: eof
+writing into existing 2 at pos 2
+wrote blk 67
+0.. 1: 
+1.. 14: unwritten
+15.. 1: 
+16.. 3: unwritten
+19.. 1: 
+20.. 4: unwritten
+24.. 1: 
+25.. 1: 
+26.. 6: unwritten
+32.. 1: 
+39.. 1: 
+40.. 1: 
+44.. 1: 
+45.. 3: unwritten
+48.. 1: 
+49.. 6: unwritten
+55.. 1: 
+56.. 6: unwritten
+62.. 1: 
+63.. 1: 
+64.. 3: unwritten
+67.. 1: 
+68.. 3: unwritten
+71.. 2: 
+79.. 2: 
+87.. 2: 
+95.. 1: eof
+writing into existing 3 at pos 0
+wrote blk 73
+0.. 1: 
+1.. 14: unwritten
+15.. 1: 
+16.. 3: unwritten
+19.. 1: 
+20.. 4: unwritten
+24.. 1: 
+25.. 1: 
+26.. 6: unwritten
+32.. 1: 
+39.. 1: 
+40.. 1: 
+44.. 1: 
+45.. 3: unwritten
+48.. 1: 
+49.. 6: unwritten
+55.. 1: 
+56.. 6: unwritten
+62.. 1: 
+63.. 1: 
+64.. 3: unwritten
+67.. 1: 
+68.. 3: unwritten
+71.. 2: 
+73.. 1: 
+74.. 5: unwritten
+79.. 2: 
+87.. 2: 
+95.. 1: eof
+writing into existing 3 at pos 1
+wrote blk 86
+0.. 1: 
+1.. 14: unwritten
+15.. 1: 
+16.. 3: unwritten
+19.. 1: 
+20.. 4: unwritten
+24.. 1: 
+25.. 1: 
+26.. 6: unwritten
+32.. 1: 
+39.. 1: 
+40.. 1: 
+44.. 1: 
+45.. 3: unwritten
+48.. 1: 
+49.. 6: unwritten
+55.. 1: 
+56.. 6: unwritten
+62.. 1: 
+63.. 1: 
+64.. 3: unwritten
+67.. 1: 
+68.. 3: unwritten
+71.. 2: 
+73.. 1: 
+74.. 5: unwritten
+79.. 2: 
+86.. 1: 
+87.. 2: 
+95.. 1: eof
+writing into existing 3 at pos 2
+wrote blk 92
+0.. 1: 
+1.. 14: unwritten
+15.. 1: 
+16.. 3: unwritten
+19.. 1: 
+20.. 4: unwritten
+24.. 1: 
+25.. 1: 
+26.. 6: unwritten
+32.. 1: 
+39.. 1: 
+40.. 1: 
+44.. 1: 
+45.. 3: unwritten
+48.. 1: 
+49.. 6: unwritten
+55.. 1: 
+56.. 6: unwritten
+62.. 1: 
+63.. 1: 
+64.. 3: unwritten
+67.. 1: 
+68.. 3: unwritten
+71.. 2: 
+73.. 1: 
+74.. 5: unwritten
+79.. 2: 
+86.. 1: 
+87.. 2: 
+92.. 1: 
+93.. 2: unwritten
+95.. 1: eof
--- a/tests/golden/get-referring-entries
+++ b/tests/golden/get-referring-entries
@@ -0,0 +1,18 @@
+== root inode returns nothing
+== crazy large unused inode does nothing
+== basic entry
+file
+== rename
+renamed
+== hard link
+file
+link
+== removal
+== different dirs
+== file types
+type b name block
+type c name char
+type d name dir
+type f name file
+type l name symlink
+== all name lengths work
--- a/tests/golden/inode-deletion
+++ b/tests/golden/inode-deletion
@@ -17,7 +17,7 @@ ino not found in dseq index
 mount 0 contents after mount 1 rm: contents
 ino found in dseq index
 ino found in dseq index
-stat: cannot stat ‘/mnt/test/test/inode-deletion/file’: No such file or directory
+stat: cannot stat '/mnt/test/test/inode-deletion/file': No such file or directory
 ino not found in dseq index
 ino not found in dseq index
 == lots of deletions use one open map
--- a/tests/golden/offline-extent-waiting
+++ b/tests/golden/offline-extent-waiting
@@ -20,10 +20,10 @@ offline waiting should now have two known entries:
 data_wait_err found 2 waiters.
 offline waiting should now have 0 known entries:
 0
-dd: error reading ‘/mnt/test/test/offline-extent-waiting/dir/file’: Input/output error
+dd: error reading '/mnt/test/test/offline-extent-waiting/dir/file': Input/output error
 0+0 records in
 0+0 records out
-dd: error reading ‘/mnt/test/test/offline-extent-waiting/dir/file’: Input/output error
+dd: error reading '/mnt/test/test/offline-extent-waiting/dir/file': Input/output error
 0+0 records in
 0+0 records out
 offline waiting should be empty again:
--- a/tests/golden/quorum-heartbeat-timeout
+++ b/tests/golden/quorum-heartbeat-timeout
@@ -1,2 +1,5 @@
 == bad timeout values fail
-== test different timeouts
+== bad mount option fails
+== mount option
+== sysfs
+== reset all options
--- a/tests/golden/xfstests
+++ b/tests/golden/xfstests
@@ -241,7 +241,6 @@ generic/312
 generic/314
 generic/316
 generic/317
-generic/318
 generic/324
 generic/326
 generic/327
--- a/tests/run-tests.sh
+++ b/tests/run-tests.sh
@@ -1,5 +1,8 @@
 #!/usr/bin/bash

+# Force system tools to use ASCII quotes
+export LC_ALL=C
+
 #
 # XXX
 #  - could have helper functions for waiting for pids
--- a/tests/sequence
+++ b/tests/sequence
@@ -5,6 +5,7 @@ inode-items-updated.sh
 simple-inode-index.sh
 simple-staging.sh
 simple-release-extents.sh
+get-referring-entries.sh
 fallocate.sh
 basic-truncate.sh
 data-prealloc.sh
--- a/tests/src/handle_fsetxattr.c
+++ b/tests/src/handle_fsetxattr.c
@@ -48,7 +48,7 @@ struct our_handle {
 static void exit_usage(void)
 {
 	printf(" -h/-?         output this usage message and exit\n"
-	       " -e            keep trying on enoent, consider success an error\n"
+	       " -e            keep trying on enoent and estale, consider success an error\n"
 	       " -i <num>      64bit inode number for handle open, can be multiple\n"
 	       " -m <string>   scoutfs mount path string for ioctl fd\n"
 	       " -n <string>   optional xattr name string, defaults to \""DEFAULT_NAME"\"\n"
@@ -149,7 +149,7 @@ int main(int argc, char **argv)

 				fd = open_by_handle_at(mntfd, &handle.handle, O_RDWR);
 				if (fd == -1) {
-					if (!enoent_success_err || errno != ENOENT) {
+					if (!enoent_success_err || ( errno != ENOENT && errno != ESTALE )) {
 						perror("open_by_handle_at");
 						return 1;
 					}
--- a/tests/tests/data-prealloc.sh
+++ b/tests/tests/data-prealloc.sh
@@ -6,6 +6,15 @@
 #
 t_require_commands scoutfs stat filefrag dd touch truncate

+write_block()
+{
+	local file="$1"
+	local blk="$2"
+
+	dd if=/dev/zero of="$file" bs=4096 seek=$blk count=1 conv=notrunc status=none
+	echo "wrote blk $blk"
+}
+
 write_forwards()
 {
 	local prefix="$1"
@@ -70,6 +79,25 @@ print_extents_found()
 	filefrag "$prefix"* 2>&1 | grep "extent.*found" | t_filter_fs
 }

+#
+# print the logical start, len, and flags if they're there.
+#
+print_logical_extents()
+{
+	local file="$1"
+
+	filefrag -v -b4096 "$file" 2>&1 | t_filter_fs | awk '
+		($1 ~ /[0-9]+:/) {
+			if ($NF !~  /[0-9]+:/) {
+				flags=$NF
+			} else {
+				flags=""
+			}
+			print $2, $6, flags
+		}
+	' | sed 's/last,eof/eof/'
+}
+
 t_save_all_sysfs_mount_options data_prealloc_blocks
 t_save_all_sysfs_mount_options data_prealloc_contig_only
 restore_options()
@@ -133,4 +161,71 @@ t_set_sysfs_mount_option 0 data_prealloc_contig_only 0
 write_forwards $prefix 3
 print_extents_found $prefix

+#
+# prepare aligned regions of 8 blocks that we'll write into.
+# We'll right into the first, last, and middle block of each
+# region which was prepared with no existing extents, one at
+# the start, and one at the end.
+#
+# Let's keep this last because it creates a ton of output to read
+# through.  The correct output is tied to preallocation strategy so it
+# has to be verified each time we change preallocation.
+#
+echo "== block writes into region allocs hole" 
+t_set_sysfs_mount_option 0 data_prealloc_blocks 8
+t_set_sysfs_mount_option 0 data_prealloc_contig_only 1
+touch "$prefix"
+truncate -s 0 "$prefix"
+
+# write initial blocks in regions
+base=0
+for sides in 0 1 2 3; do
+	for i in 0 1 2; do
+                case "$sides" in
+			# none
+			0) ;;
+			# left
+			1) write_block $prefix $((base + 0)) ;;
+			# right
+			2) write_block $prefix $((base + 7)) ;;
+			# both
+			3) write_block $prefix $((base + 0)) 
+			   write_block $prefix $((base + 7)) ;;
+		esac
+		((base+=8))
+	done
+done
+
+echo before:
+print_logical_extents "$prefix"
+
+# now write into the first, middle, and last empty block of each
+t_set_sysfs_mount_option 0 data_prealloc_contig_only 0
+base=0
+for sides in 0 1 2 3; do
+	for i in 0 1 2; do
+		echo "writing into existing $sides at pos $i"
+		case "$sides" in
+			# none
+			0) left=$base; right=$((base + 7));;
+			# left
+			1) left=$((base + 1)); right=$((base + 7));;
+			# right
+			2) left=$((base)); right=$((base + 6));;
+			# both
+			3) left=$((base + 1)); right=$((base + 6));;
+		esac
+		case "$i" in
+			# start
+			0) write_block $prefix $left ;;
+			# end
+			1) write_block $prefix $right ;;
+			# mid (both has 6 blocks internally)
+			2) write_block $prefix $((left + 3)) ;;
+		esac
+		print_logical_extents "$prefix"
+		((base+=8))
+	done
+done
+
 t_pass
--- a/tests/tests/fence-and-reclaim.sh
+++ b/tests/tests/fence-and-reclaim.sh
@@ -7,14 +7,11 @@ t_require_mounts 2

 #
 # Make sure that all mounts can read the results of a write from each
-# mount.  And make sure that the greatest of all the written seqs is
-# visible after the writes were commited by remote reads.
+# mount.
 #
 check_read_write()
 {
 	local expected
-	local greatest=0
-	local seq
 	local path
 	local saw
 	local w
@@ -25,11 +22,6 @@ check_read_write()
 		eval path="\$T_D${w}/written"
 		echo "$expected" > "$path"

-		seq=$(scoutfs stat -s meta_seq $path)
-		if [ "$seq" -gt "$greatest" ]; then
-			greatest=$seq
-		fi
-
 		for r in $(t_fs_nrs); do
 			eval path="\$T_D${r}/written"
 			saw=$(cat "$path")
@@ -38,11 +30,6 @@ check_read_write()
 			fi
 		done
 	done
-
-	seq=$(scoutfs statfs -s committed_seq -p $T_D0)
-	if [ "$seq" -lt "$greatest" ]; then
-		echo "committed_seq $seq less than greatest $greatest"
-	fi
 }

 # verify that fenced ran our testing fence script
--- a/tests/tests/get-referring-entries.sh
+++ b/tests/tests/get-referring-entries.sh
@@ -0,0 +1,99 @@
+
+#
+# Test _GET_REFERRING_ENTRIES ioctl via the get-referring-entries cli
+# command
+#
+
+# consistently print only entry names
+filter_names() {
+	exec cut -d ' ' -f 8- | sort
+}
+
+# print entries with type characters to match find.  not happy with hard
+# coding, but abi won't change much.
+filter_types() {
+	exec cut -d ' ' -f 5- | \
+	sed \
+		-e 's/type 1 /type p /' \
+		-e 's/type 2 /type c /' \
+		-e 's/type 4 /type d /' \
+		-e 's/type 6 /type b /' \
+		-e 's/type 8 /type f /' \
+		-e 's/type 10 /type l /' \
+		-e 's/type 12 /type s /' \
+		| \
+	sort
+}
+
+n_chars() {
+	local n="$1"
+	printf 'A%.0s' $(eval echo {1..\$n})
+}
+
+GRE="scoutfs get-referring-entries -p $T_M0"
+
+echo "== root inode returns nothing"
+$GRE 1
+
+echo "== crazy large unused inode does nothing"
+$GRE 4611686018427387904 # 1 << 62
+
+echo "== basic entry"
+touch $T_D0/file
+ino=$(stat -c '%i' $T_D0/file)
+$GRE $ino | filter_names
+
+echo "== rename"
+mv $T_D0/file $T_D0/renamed
+$GRE $ino | filter_names
+
+echo "== hard link"
+mv $T_D0/renamed $T_D0/file
+ln $T_D0/file $T_D0/link
+$GRE $ino | filter_names
+
+echo "== removal"
+rm $T_D0/file $T_D0/link
+$GRE $ino
+
+echo "== different dirs"
+touch $T_D0/file
+ino=$(stat -c '%i' $T_D0/file)
+for i in $(seq 1 10); do
+	mkdir $T_D0/dir-$i
+	ln $T_D0/file $T_D0/dir-$i/file-$i
+done
+diff -u <(find $T_D0 -type f -printf '%f\n' | sort) <($GRE $ino | filter_names)
+rm $T_D0/file
+
+echo "== file types"
+mkdir $T_D0/dir
+touch $T_D0/dir/file
+mkdir $T_D0/dir/dir
+ln -s $T_D0/dir/file $T_D0/dir/symlink
+mknod $T_D0/dir/char c 1 3 # null
+mknod $T_D0/dir/block b 7 0 # loop0
+for name in $(ls -UA $T_D0/dir | sort); do
+	ino=$(stat -c '%i' $T_D0/dir/$name)
+	$GRE $ino | filter_types
+done
+rm -rf $T_D0/dir
+
+echo "== all name lengths work"
+mkdir $T_D0/dir
+touch $T_D0/dir/file
+ino=$(stat -c '%i' $T_D0/dir/file)
+name=""
+> $T_TMP.unsorted
+for i in $(seq 1 255); do
+	name+="a"
+	echo "$name" >> $T_TMP.unsorted
+	ln $T_D0/dir/file $T_D0/dir/$name
+done
+sort $T_TMP.unsorted > $T_TMP.sorted
+rm $T_D0/dir/file
+$GRE $ino | filter_names > $T_TMP.gre
+diff -u $T_TMP.sorted $T_TMP.gre
+rm -rf $T_D0/dir
+
+t_pass
--- a/tests/tests/inode-deletion.sh
+++ b/tests/tests/inode-deletion.sh
@@ -72,7 +72,7 @@ check_ino_index "$ino" "$dseq" "$T_M0"
 check_ino_index "$ino" "$dseq" "$T_M1"
 exec {FD}>&-  # close
 # we know that revalidating will unhash the remote dentry
-stat "$T_D0/file" 2>&1 | t_filter_fs
+stat "$T_D0/file" 2>&1 | sed 's/cannot statx/cannot stat/' | t_filter_fs
 check_ino_index "$ino" "$dseq" "$T_M0"
 check_ino_index "$ino" "$dseq" "$T_M1"

--- a/tests/tests/quorum-heartbeat-timeout.sh
+++ b/tests/tests/quorum-heartbeat-timeout.sh
@@ -17,43 +17,52 @@ set_bad_timeout() {
 		t_fail "set bad q hb to $to"
 }

-set_quorum_timeouts()
+set_timeout()
 {
-	local to="$1"
-	local was
+	local nr="$1"
+	local how="$2"
+	local to="$3"
 	local is

-	for nr in $(t_quorum_nrs); do
-		local mnt="$(eval echo \$T_M$nr)"
-
-		was=$(t_get_sysfs_mount_option $nr quorum_heartbeat_timeout_ms)
+	if [ $how == "sysfs" ]; then
 		t_set_sysfs_mount_option $nr quorum_heartbeat_timeout_ms $to
-		is=$(t_get_sysfs_mount_option $nr quorum_heartbeat_timeout_ms)
+	fi
+	if [ $how == "mount" ]; then
+		t_umount $nr
+		t_mount_opt $nr "quorum_heartbeat_timeout_ms=$to"
+	fi

-		if [ "$is" != "$to" ]; then
-			t_fail "tried to set qhbto on $nr to $to but got $is"
-		fi
-	done
+	is=$(t_get_sysfs_mount_option $nr quorum_heartbeat_timeout_ms)
+
+	if [ "$is" != "$to" ]; then
+		t_fail "tried to set qhbto on $nr via $how to $to but got $is"
+	fi
 }

 test_timeout()
 {
-	local to="$1"
-	local orig_to
+	local how="$1"
+	local to="$2"
 	local start
 	local nr
+	local sv
 	local delay
+	local low
+	local high

-	# set new timeouts, saving original
-	orig_to=$(t_get_sysfs_mount_option 0 quorum_heartbeat_timeout_ms)
-	set_quorum_timeouts $to
+	# set timeout on non-server quorum mounts
+	sv=$(t_server_nr)
+	for nr in $(t_quorum_nrs); do
+		if [ $nr -ne $sv ]; then
+			set_timeout $nr $how $to
+		fi
+	done

 	# give followers time to recv heartbeats and reset timeouts
 	sleep 1

 	# tear down the current server/leader
-	nr=$(t_server_nr)
-	t_force_umount $nr
+	t_force_umount $sv

 	# see how long it takes for the next leader to start
 	start=$(time_ms)
@@ -64,15 +73,15 @@ test_timeout()
 	echo "to $to delay $delay" >> $T_TMP.delay

 	# restore the mount that we tore down
-	t_mount $nr
+	t_mount $sv

-	# reset the original timeouts
-	set_quorum_timeouts $orig_to
+	# make sure the new leader delay was reasonable, allowing for some slack
+	low=$((to - 1000))
+	high=$((to + 5000))

 	# make sure the new leader delay was reasonable
-	test "$delay" -gt "$to" || t_fail "delay $delay < to $to"
-	# allow 5 seconds of slop
-	test "$delay" -lt $(($to + 5000)) || t_fail "delay $delay > to $to + 5sec"
+	test "$delay" -lt "$low" && t_fail "delay $delay < low $low (to $to)"
+	test "$delay" -gt "$high" && t_fail "delay $delay > high $high (to $to)"
 }

 echo "== bad timeout values fail"
@@ -80,10 +89,29 @@ set_bad_timeout 0
 set_bad_timeout -1
 set_bad_timeout 1000000

-echo "== test different timeouts"
+echo "== bad mount option fails"
+if [ "$(t_server_nr)" == 0 ]; then
+	nr=1
+else
+	nr=0
+fi
+t_umount $nr
+t_mount_opt $nr "quorum_heartbeat_timeout_ms=1000000" 2>/dev/null && \
+	t_fail "bad mount option succeeded"
+t_mount $nr
+
+echo "== mount option"
 def=$(t_get_sysfs_mount_option 0 quorum_heartbeat_timeout_ms)
-test_timeout $def
-test_timeout 3000
-test_timeout $((def + 19000))
+test_timeout mount $def
+test_timeout mount 3000
+test_timeout mount $((def + 19000))
+
+echo "== sysfs"
+test_timeout sysfs $def
+test_timeout sysfs 3000
+test_timeout sysfs $((def + 19000))
+
+echo "== reset all options"
+t_remount_all

 t_pass
--- a/tests/tests/resize-devices.sh
+++ b/tests/tests/resize-devices.sh
@@ -2,6 +2,8 @@
 # Some basic tests of online resizing metadata and data devices.
 #

+t_require_commands bc
+
 statfs_total() {
 	local single="total_$1_blocks"
 	local mnt="$2"
--- a/tests/tests/setattr_more.sh
+++ b/tests/tests/setattr_more.sh
@@ -55,10 +55,17 @@ scoutfs setattr -t 67305985.999999999 -V 1 -s 1 "$FILE" 2>&1 | t_filter_fs
 TZ=GMT stat -c "%z" "$FILE"
 rm "$FILE"

+#
+# With e2fsprogs-v1.42.10-10-g29758d2f, the output of filefrag 'flags' changes
+# significantly. First, the _LAST flag is now output. Second, the 'unknown'
+# flag is now printed out as 'unknown_loc'. To compensate for this, we check
+# and replace the "correct" output for new versions here with the expected
+# value.
+#
 echo "== large offline extents are created"
 touch "$FILE"
 scoutfs setattr -V 1 -o -s $((10007 * 4096)) "$FILE" 2>&1 | t_filter_fs
-filefrag -v -b4096 "$FILE" 2>&1 | t_filter_fs
+filefrag -v -b4096 "$FILE" 2>&1 | sed 's/last,unknown_loc,eof$/unknown,eof/' | t_filter_fs
 rm "$FILE"

 # had a bug where we were creating extents that were too long
--- a/tests/tests/simple-xattr-unit.sh
+++ b/tests/tests/simple-xattr-unit.sh
@@ -27,15 +27,9 @@ test_xattr_lengths() {
 	echo "key len $name_len val len $val_len" >> "$T_TMP.log"
 	setfattr -n $name -v \"$val\" "$FILE"

-	# grep has trouble with enormous args?  so we dump the
-	# name=value to a file and compare with a known good file
-	getfattr -d --absolute-names "$FILE" | grep "$name" > "$T_TMP.got"
+	getfattr -d --only-values --absolute-names "$FILE" -n "$name" > "$T_TMP.got"
+	echo -n "$val" > "$T_TMP.good"

-	if [ $val_len == 0 ]; then
-		echo "$name" > "$T_TMP.good"
-	else
-		echo "$name=\"$val\"" > "$T_TMP.good"
-	fi
 	cmp "$T_TMP.good" "$T_TMP.got" || \
 		t_fail "cmp failed name len $name_len val len $val_len"

--- a/tests/tests/xfstests.sh
+++ b/tests/tests/xfstests.sh
@@ -75,6 +75,7 @@ generic/215	# mmap missing
 generic/246	# mmap missing
 generic/247	# mmap missing
 generic/248	# mmap missing
+generic/318	# can't support user namespaces until v5.11
 generic/321	# requires selinux enabled for '+' in ls?
 generic/325	# mmap missing
 generic/338	# BUG_ON update inode error handling
--- a/utils/man/scoutfs.8
+++ b/utils/man/scoutfs.8
@@ -209,6 +209,29 @@ A path within a ScoutFS filesystem.
 .RE
 .PD

+.TP
+.BI "get-referring-entries [-p|--path PATH] INO"
+.sp
+Find directory entries that reference an inode number.
+.sp
+Display all the directory entries that refer to a given inode.  Each
+entry includes the inode number of the directory that contains it, the
+d_off and d_type values for the entry as described by
+.BR readdir (3)
+, and the name of the entry.
+.RS 1.0i
+.PD 0
+.TP
+.sp
+.TP
+.B "-p, --path PATH"
+A path within a ScoutFS filesystem.
+.TP
+.B "INO"
+The inode number of the target inode.
+.RE
+.PD
+
 .TP
 .BI "ino-path INODE-NUM [-p|--path PATH]"
 .sp
--- a/utils/scoutfs-utils.spec.in
+++ b/utils/scoutfs-utils.spec.in
@@ -61,7 +61,7 @@ install -m 644 -D fenced/scoutfs-fenced.conf.example $RPM_BUILD_ROOT%{_sysconfdi
 %files
 %defattr(644,root,root,755)
 %{_mandir}/man*/scoutfs*.gz
-%{_unitdir}/scoutfs-fenced.service
+/%{_unitdir}/scoutfs-fenced.service
 %{_sysconfdir}/scoutfs
 %defattr(755,root,root,755)
 %{_sbindir}/scoutfs
--- a/utils/src/get_referring_entries.c
+++ b/utils/src/get_referring_entries.c
@@ -0,0 +1,150 @@
+#include <unistd.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/ioctl.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <string.h>
+#include <limits.h>
+#include <argp.h>
+
+#include "sparse.h"
+#include "parse.h"
+#include "util.h"
+#include "format.h"
+#include "ioctl.h"
+#include "parse.h"
+#include "cmd.h"
+
+struct gre_args {
+	char *path;
+	u64 ino;
+};
+
+static int do_get_referring_entries(struct gre_args *args)
+{
+	struct scoutfs_ioctl_get_referring_entries gre;
+	struct scoutfs_ioctl_dirent *dent;
+	unsigned int bytes;
+	void *buf;
+	int ret;
+	int fd;
+
+	fd = get_path(args->path, O_RDONLY);
+	if (fd < 0)
+		return fd;
+
+	bytes = PATH_MAX * 1024;
+	buf = malloc(bytes);
+	if (!buf) {
+		fprintf(stderr, "couldn't allocate %u byte buffer\n", bytes);
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	gre.ino = args->ino;
+	gre.dir_ino = 0;
+	gre.dir_pos = 0;
+	gre.entries_ptr = (intptr_t)buf;
+	gre.entries_bytes = bytes;
+
+	for (;;) {
+		ret = ioctl(fd, SCOUTFS_IOC_GET_REFERRING_ENTRIES, &gre);
+		if (ret <= 0) {
+			if (ret < 0) {
+				ret = -errno;
+				fprintf(stderr, "ioctl failed: %s (%d)\n", strerror(errno), errno);
+			}
+			goto out;
+		}
+
+		dent = buf;
+		while (ret-- > 0) {
+			printf("dir %llu pos %llu type %u name %s\n",
+			       dent->dir_ino, dent->dir_pos, dent->d_type, dent->name);
+
+			gre.dir_ino = dent->dir_ino;
+			gre.dir_pos = dent->dir_pos;
+
+			if (dent->flags & SCOUTFS_IOCTL_DIRENT_FLAG_LAST) {
+				ret = 0;
+				goto out;
+			}
+
+			dent = (void *)dent + dent->entry_bytes;
+		}
+
+		if (++gre.dir_pos == 0) {
+			if (++gre.dir_ino == 0) {
+				ret = 0;
+				goto out;
+			}
+		}
+	}
+
+out:
+	close(fd);
+	free(buf);
+
+	return ret;
+};
+
+static int parse_opt(int key, char *arg, struct argp_state *state)
+{
+	struct gre_args *args = state->input;
+	int ret;
+
+	switch (key) {
+	case 'p':
+		args->path = strdup_or_error(state, arg);
+		break;
+	case ARGP_KEY_ARG:
+		if (args->ino)
+			argp_error(state, "more than one argument given");
+		ret = parse_u64(arg, &args->ino);
+		if (ret)
+			argp_error(state, "inode parse error");
+		break;
+	case ARGP_KEY_FINI:
+		if (!args->ino) {
+			argp_error(state, "must provide inode number");
+		}
+		break;
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+static struct argp_option options[] = {
+	{ "path", 'p', "PATH", 0, "Path to ScoutFS filesystem"},
+	{ NULL }
+};
+
+static struct argp argp = {
+	options,
+	parse_opt,
+	"INODE-NUM",
+	"Print directory entries that refer to inode number"
+};
+
+static int get_referring_entries_cmd(int argc, char **argv)
+{
+	struct gre_args args = {NULL};
+	int ret;
+
+	ret = argp_parse(&argp, argc, argv, 0, NULL, &args);
+	if (ret)
+		return ret;
+
+	return do_get_referring_entries(&args);
+}
+
+
+static void __attribute__((constructor)) get_referring_entries_ctor(void)
+{
+	cmd_register_argp("get-referring-entries", &argp, GROUP_SEARCH, get_referring_entries_cmd);
+}