Drop KC_HAS_SET_ACL

RHEL7 was the only conditional user of this define, but since support for that is removed, these can be dropped. Signed-off-by: Auke Kok <auke.kok@versity.com>
Collapse scoutfs_rename2 into scoutfs_rename_common.
2026-06-09 21:22:36 +00:00 · 2026-06-08 11:53:16 -07:00 · 2026-06-08 11:27:46 -07:00 · 2026-06-08 11:27:33 -07:00 · 2026-06-05 09:49:45 -07:00 · 2026-06-05 09:49:45 -07:00
52 changed files with 626 additions and 3197 deletions
@@ -1,6 +1,35 @@
 Versity ScoutFS Release Notes
 =============================

+---
+v1.32
+\
+*June 2, 2026*
+
+Fix writing POSIX ACLs over NFS mounts that export the scoutfs
+filesystem.
+
+Add support for kernels in the RHEL 9.8 minor release.
+
+Reduce unneeded block allocation when data\_prealloc\_contig\_only was
+set to 0. This will help achieve more efficient data space usage when
+writing small files.
+
+---
+v1.31
+\
+*May 5, 2026*
+
+Fix race between modifying quota rules and internal reading of the rules
+that tripped an assertion.
+
+Fix a bug that could skip merging totl items under specific heavy write
+loads.  This could lead to merged totl items incorrectly tracking the
+sum of all the contributing totl xattrs.
+
+Fix many small low risk bugs in error paths that were found with code
+analysis and testing.
+
 ---
 v1.30
 \
@@ -13,7 +13,6 @@ scoutfs-y +=			\
 	avl.o			\
 	alloc.o			\
 	block.o			\
-	bsearch_index.o		\
 	btree.o			\
 	client.o		\
 	counters.o		\
@@ -37,7 +36,6 @@ scoutfs-y +=			\
 	per_task.o		\
 	quorum.o		\
 	quota.o			\
-	raw.o			\
 	recov.o			\
 	scoutfs_trace.o		\
 	server.o		\
@@ -6,231 +6,6 @@

 ccflags-y += -include $(src)/kernelcompat.h

-#
-# v3.18-rc2-19-gb5ae6b15bd73
-# 
-# Folds d_materialise_unique into d_splice_alias. Note reversal
-# of arguments (Also note Documentation/filesystems/porting.rst)
-#
-ifneq (,$(shell grep 'd_materialise_unique' include/linux/dcache.h))
-ccflags-y += -DKC_D_MATERIALISE_UNIQUE=1
-endif
-
-#
-# RHEL extended the fop struct so to use it we have to set
-# a flag to indicate that the struct is large enough and
-# contains the pointer.
-#
-ifneq (,$(shell grep 'FMODE_KABI_ITERATE' include/linux/fs.h))
-ccflags-y += -DKC_FMODE_KABI_ITERATE
-endif
-
-#
-# v4.7-rc2-23-g0d4d717f2583
-#
-# Added user_ns argument to posix_acl_valid
-#
-ifneq (,$(shell grep 'posix_acl_valid.*user_namespace' include/linux/posix_acl.h))
-ccflags-y += -DKC_POSIX_ACL_VALID_USER_NS
-endif
-
-#
-# v5.3-12296-g6d2052d188d9
-#
-# The RBCOMPUTE function is now passed an extra flag, and should return a bool
-# to indicate whether the propagated callback should stop or not.
-#
-ifneq (,$(shell grep 'static inline bool RBNAME.*_compute_max' include/linux/rbtree_augmented.h))
-ccflags-y += -DKC_RB_TREE_AUGMENTED_COMPUTE_MAX
-endif
-
-#
-# v3.13-25-g37bc15392a23
-#
-# Renames posix_acl_create to __posix_acl_create and provide some
-# new interfaces for creating ACLs
-#
-ifneq (,$(shell grep '__posix_acl_create' include/linux/posix_acl.h))
-ccflags-y += -DKC___POSIX_ACL_CREATE
-endif
-
-#
-# v4.8-rc1-29-g31051c85b5e2
-#
-# inode_change_ok() removed - replace with setattr_prepare()
-# v5.11-rc4-7-g2f221d6f7b88 removes extern attribute
-#
-ifneq (,$(shell grep 'int setattr_prepare' include/linux/fs.h))
-ccflags-y += -DKC_SETATTR_PREPARE
-endif
-
-#
-# v4.15-rc3-4-gae5e165d855d
-#
-# linux/iversion.h needs to manually be included for code that
-# manipulates this field.
-#
-ifneq (,$(shell grep -s 'define _LINUX_IVERSION_H' include/linux/iversion.h))
-ccflags-y += -DKC_NEED_LINUX_IVERSION_H=1
-endif
-
-# v4.11-12447-g104b4e5139fe
-#
-# Renamed __percpu_counter_add to percpu_counter_add_batch to clarify
-# that the __ wasn't less safe, just took an extra parameter.
-#
-ifneq (,$(shell grep 'percpu_counter_add_batch' include/linux/percpu_counter.h))
-ccflags-y += -DKC_PERCPU_COUNTER_ADD_BATCH
-endif
-
-#
-# v4.11-4550-g7dea19f9ee63
-#
-# Introduced memalloc_nofs_{save,restore} preferred instead of _noio_.
-#
-ifneq (,$(shell grep 'memalloc_nofs_save' include/linux/sched/mm.h))
-ccflags-y += -DKC_MEMALLOC_NOFS_SAVE
-endif
-
-#
-# v4.7-12414-g1eff9d322a44
-#
-# Renamed bi_rw to bi_opf to force old code to catch up.  We use it as a
-# single switch between old and new bio structures.
-#
-ifneq (,$(shell grep 'bi_opf' include/linux/blk_types.h))
-ccflags-y += -DKC_BIO_BI_OPF
-endif
-
-#
-# v4.12-rc2-201-g4e4cbee93d56
-#
-# Moves to bi_status BLK_STS_ API instead of having a mix of error
-# end_io args or bi_error.
-#
-ifneq (,$(shell grep 'bi_status' include/linux/blk_types.h))
-ccflags-y += -DKC_BIO_BI_STATUS
-endif
-
-#
-# v3.11-8765-ga0b02131c5fc
-#
-# Remove the old ->shrink() API, ->{scan,count}_objects is preferred.
-#
-ifneq (,$(shell grep '(*shrink)' include/linux/shrinker.h))
-ccflags-y += -DKC_SHRINKER_SHRINK
-endif
-
-#
-# v3.19-4777-g6bec00352861
-#
-# backing_dev_info is removed from address_space. Instead we need to use
-# inode_to_bdi() inline from <backing-dev.h>.
-#
-ifneq (,$(shell grep 'struct backing_dev_info.*backing_dev_info' include/linux/fs.h))
-ccflags-y += -DKC_LINUX_BACKING_DEV_INFO=1
-endif
-
-#
-# v4.3-9290-ge409de992e3e
-#
-# xattr handlers are now passed a struct that contains `flags`
-#
-ifneq (,$(shell grep 'int...get..const struct xattr_handler.*struct dentry.*dentry,' include/linux/xattr.h))
-ccflags-y += -DKC_XATTR_STRUCT_XATTR_HANDLER=1
-endif
-
-#
-# v4.16-rc1-1-g9b2c45d479d0
-#
-# kernel_getsockname() and kernel_getpeername dropped addrlen arg
-#
-ifneq (,$(shell grep 'kernel_getsockname.*,$$' include/linux/net.h))
-ccflags-y += -DKC_KERNEL_GETSOCKNAME_ADDRLEN=1
-endif
-
-#
-# v4.1-rc1-410-geeb1bd5c40ed
-#
-# Adds a struct net parameter to sock_create_kern
-#
-ifneq (,$(shell grep 'sock_create_kern.*struct net' include/linux/net.h))
-ccflags-y += -DKC_SOCK_CREATE_KERN_NET=1
-endif
-
-#
-# v4.17-rc6-7-g95582b008388
-#
-# Kernel has current_time(inode) to uniformly retreive timespec in the right unit
-#
-ifneq (,$(shell grep 'struct timespec64 current_time' include/linux/fs.h))
-ccflags-y += -DKC_CURRENT_TIME_INODE=1
-endif
-
-#
-# v4.9-12228-g530e9b76ae8f
-#
-# register_cpu_notifier and family were all removed and to be
-# replaced with cpuhp_* API calls.
-#
-ifneq (,$(shell grep 'define register_hotcpu_notifier' include/linux/cpu.h))
-ccflags-y += -DKC_CPU_NOTIFIER
-endif
-
-#
-# v3.14-rc8-130-gccad2365668f
-#
-# generic_file_buffered_write is removed, backport it
-#
-ifneq (,$(shell grep 'extern ssize_t generic_file_buffered_write' include/linux/fs.h))
-ccflags-y += -DKC_GENERIC_FILE_BUFFERED_WRITE=1
-endif
-
-#
-# v5.7-438-g8151b4c8bee4
-#
-# struct address_space_operations switches away from .readpages to .readahead
-#
-# RHEL has backported this feature all the way to RHEL8, as part of RHEL_KABI,
-# which means we need to detect this very precisely
-#
-ifneq (,$(shell grep 'readahead.*struct readahead_control' include/linux/fs.h))
-ccflags-y += -DKC_FILE_AOPS_READAHEAD
-endif
-
-#
-# v4.0-rc7-1743-g8436318205b9
-#
-# .aio_read and .aio_write no longer exist. All reads and writes now use the
-# .read_iter and .write_iter methods, or must implement .read and .write (which
-# we don't).
-#
-ifneq (,$(shell grep 'ssize_t.*aio_read' include/linux/fs.h))
-ccflags-y += -DKC_LINUX_HAVE_FOP_AIO_READ=1
-endif
-
-#
-# rhel7 has a custom inode_operations_wrapper struct that is discarded
-# entirely in favor of upstream structure since rhel8.
-#
-ifneq (,$(shell grep 'void.*follow_link.*struct dentry' include/linux/fs.h))
-ccflags-y += -DKC_LINUX_HAVE_RHEL_IOPS_WRAPPER=1
-endif
-
-ifneq (,$(shell grep 'size_t.*ki_left;' include/linux/aio.h))
-ccflags-y += -DKC_LINUX_AIO_KI_LEFT=1
-endif
-
-#
-# v4.4-rc4-4-g98e9cb5711c6
-#
-# Introduces a new xattr_handler .name member that can be used to match the
-# entire field, instead of just a prefix. For these kernels, we must use
-# the new .name field instead.
-ifneq (,$(shell grep 'static inline const char .xattr_prefix' include/linux/xattr.h))
-ccflags-y += -DKC_XATTR_HANDLER_NAME=1
-endif
-
 #
 # v5.19-rc4-96-g342a72a33407
 #
@@ -334,14 +109,6 @@ ifneq (,$(shell grep 'int tcp_sock_set_keepintvl' include/linux/tcp.h))
 ccflags-y += -DKC_HAVE_TCP_SET_SOCKFN
 endif

-#
-# v4.16-rc3-13-ga84d1169164b
-#
-# Fixes y2038 issues with struct timeval.
-ifneq (,$(shell grep -s '^struct __kernel_old_timeval .' include/uapi/linux/time_types.h))
-ccflags-y += -DKC_KERNEL_OLD_TIMEVAL_STRUCT
-endif
-
 #
 # v5.19-rc4-52-ge33c267ab70d
 #
@@ -411,47 +178,6 @@ ifneq (,$(shell grep 'struct file.*bdev_file_open_by_path.const char.*path' incl
 ccflags-y += -DKC_BDEV_FILE_OPEN_BY_PATH
 endif

-# v4.0-rc7-1796-gfe0f07d08ee3
-#
-# direct-io changes modify inode_dio_done to now be called inode_dio_end
-ifneq (,$(shell grep 'void inode_dio_end.struct inode' include/linux/fs.h))
-ccflags-y += -DKC_INODE_DIO_END
-endif
-
-#
-# v5.0-6476-g3d3539018d2c
-#
-# page fault handlers return a bitmask vm_fault_t instead
-# Note: el8's header has a slightly modified prefix here
-ifneq (,$(shell grep 'typedef.*__bitwise unsigned.*int vm_fault_t' include/linux/mm_types.h))
-ccflags-y += -DKC_MM_VM_FAULT_T
-endif
-
-# v3.19-499-gd83a08db5ba6
-#
-# .remap pages becomes obsolete
-ifneq (,$(shell grep 'int ..remap_pages..struct vm_area_struct' include/linux/mm.h))
-ccflags-y += -DKC_MM_REMAP_PAGES
-endif
-
-#
-# v3.19-4742-g503c358cf192
-#
-# list_lru_shrink_count() and list_lru_shrink_walk() introduced
-#
-ifneq (,$(shell grep 'list_lru_shrink_count.*struct list_lru' include/linux/list_lru.h))
-ccflags-y += -DKC_LIST_LRU_SHRINK_COUNT_WALK
-endif
-
-#
-# v3.19-4757-g3f97b163207c
-#
-# lru_list_walk_cb lru arg added
-#
-ifneq (,$(shell grep 'struct list_head \*item, spinlock_t \*lock, void \*cb_arg' include/linux/list_lru.h))
-ccflags-y += -DKC_LIST_LRU_WALK_CB_ITEM_LOCK
-endif
-
 #
 # v6.7-rc4-153-g0a97c01cd20b
 #
@@ -470,15 +196,6 @@ ifneq (,$(shell grep 'struct list_lru_one \*list, spinlock_t \*lock, void \*cb_a
 ccflags-y += -DKC_LIST_LRU_WALK_CB_LIST_LOCK
 endif

-#
-# v5.1-rc4-273-ge9b98e162aa5
-#
-# introduce stack trace helpers
-#
-ifneq (,$(shell grep '^unsigned int stack_trace_save' include/linux/stacktrace.h))
-ccflags-y += -DKC_STACK_TRACE_SAVE
-endif
-
 #
 # v6.1-rc1-2-g138060ba92b3
 #
@@ -496,3 +213,12 @@ endif
 ifneq (,$(shell grep 'struct posix_acl.*get_inode_acl' include/linux/fs.h))
 ccflags-y += -DKC_GET_INODE_ACL
 endif
+
+#
+# v6.15-13744-g41cb08555c41
+#
+# from_timer renamed to timer_container_of.
+#
+ifneq (,$(shell grep 'define timer_container_of' include/linux/timer.h))
+ccflags-y += -DKC_TIMER_CONTAINER_OF
+endif
@@ -16,6 +16,7 @@
 #include <linux/xattr.h>
 #include <linux/posix_acl.h>
 #include <linux/posix_acl_xattr.h>
+#include <linux/iversion.h>

 #include "format.h"
 #include "super.h"
@@ -69,15 +70,6 @@ struct posix_acl *scoutfs_get_acl_locked(struct inode *inode, int type, struct s
 	char *name;
 	int ret;

-#ifndef KC___POSIX_ACL_CREATE
-	if (!IS_POSIXACL(inode))
-		return NULL;
-
-	acl = get_cached_acl(inode, type);
-	if (acl != ACL_NOT_CACHED)
-		return acl;
-#endif
-
 	ret = acl_xattr_name_len(type, &name, NULL);
 	if (ret < 0)
 		return ERR_PTR(ret);
@@ -123,11 +115,6 @@ struct posix_acl *scoutfs_get_acl(struct inode *inode, int type)
 		return ERR_PTR(-ECHILD);
 #endif

-#ifndef KC___POSIX_ACL_CREATE
-	if (!IS_POSIXACL(inode))
-		return NULL;
-#endif
-
 	ret = scoutfs_lock_inode(sb, SCOUTFS_LOCK_READ, 0, inode, &lock);
 	if (ret < 0) {
 		acl = ERR_PTR(ret);
@@ -216,7 +203,8 @@ int scoutfs_set_acl(KC_VFS_NS_DEF
 {
 	struct inode *inode = dentry->d_inode;
 #else
-int scoutfs_set_acl(struct inode *inode, struct posix_acl *acl, int type)
+int scoutfs_set_acl(KC_VFS_NS_DEF
+		    struct inode *inode, struct posix_acl *acl, int type)
 {
 #endif
 	struct super_block *sb = inode->i_sb;
@@ -239,17 +227,11 @@ int scoutfs_set_acl(struct inode *inode, struct posix_acl *acl, int type)
 	scoutfs_unlock(sb, lock, SCOUTFS_LOCK_WRITE);
 	return ret;
 }
-#ifdef KC_XATTR_STRUCT_XATTR_HANDLER
 int scoutfs_acl_get_xattr(const struct xattr_handler *handler, struct dentry *dentry,
 			  struct inode *inode, const char *name, void *value,
 			  size_t size)
 {
 	int type = handler->flags;
-#else
-int scoutfs_acl_get_xattr(struct dentry *dentry, const char *name, void *value, size_t size,
-			  int type)
-{
-#endif
 	struct posix_acl *acl;
 	int ret = 0;

@@ -272,7 +254,6 @@ int scoutfs_acl_get_xattr(struct dentry *dentry, const char *name, void *value,
 	return ret;
 }

-#ifdef KC_XATTR_STRUCT_XATTR_HANDLER
 int scoutfs_acl_set_xattr(const struct xattr_handler *handler,
 			  KC_VFS_NS_DEF
 			  struct dentry *dentry,
@@ -280,11 +261,6 @@ int scoutfs_acl_set_xattr(const struct xattr_handler *handler,
 			  size_t size, int flags)
 {
 	int type = handler->flags;
-#else
-int scoutfs_acl_set_xattr(struct dentry *dentry, const char *name, const void *value, size_t size,
-			  int flags, int type)
-{
-#endif
 	struct posix_acl *acl = NULL;
 	int ret;

@@ -300,7 +276,7 @@ int scoutfs_acl_set_xattr(struct dentry *dentry, const char *name, const void *v
 			return PTR_ERR(acl);

 		if (acl) {
-			ret = kc_posix_acl_valid(&init_user_ns, acl);
+			ret = posix_acl_valid(&init_user_ns, acl);
 			if (ret)
 				goto out;
 		}
@@ -309,7 +285,7 @@ int scoutfs_acl_set_xattr(struct dentry *dentry, const char *name, const void *v
 #ifdef KC_SET_ACL_DENTRY
 	ret = scoutfs_set_acl(KC_VFS_INIT_NS dentry, acl, type);
 #else
-	ret = scoutfs_set_acl(dentry->d_inode, acl, type);
+	ret = scoutfs_set_acl(KC_VFS_INIT_NS dentry->d_inode, acl, type);
 #endif
 out:
 	posix_acl_release(acl);
@@ -5,7 +5,8 @@
 int scoutfs_set_acl(KC_VFS_NS_DEF
 		    struct dentry *dentry, struct posix_acl *acl, int type);
 #else
-int scoutfs_set_acl(struct inode *inode, struct posix_acl *acl, int type);
+int scoutfs_set_acl(KC_VFS_NS_DEF
+		    struct inode *inode, struct posix_acl *acl, int type);
 #endif
 #ifdef KC_GET_INODE_ACL
 struct posix_acl *scoutfs_get_acl(struct inode *inode, int type, bool rcu);
@@ -15,7 +16,6 @@ struct posix_acl *scoutfs_get_acl(struct inode *inode, int type);
 struct posix_acl *scoutfs_get_acl_locked(struct inode *inode, int type, struct scoutfs_lock *lock);
 int scoutfs_set_acl_locked(struct inode *inode, struct posix_acl *acl, int type,
 			   struct scoutfs_lock *lock, struct list_head *ind_locks);
-#ifdef KC_XATTR_STRUCT_XATTR_HANDLER
 int scoutfs_acl_get_xattr(const struct xattr_handler *, struct dentry *dentry,
 			  struct inode *inode, const char *name, void *value,
 			  size_t size);
@@ -24,12 +24,6 @@ int scoutfs_acl_set_xattr(const struct xattr_handler *,
 			  struct dentry *dentry,
 			  struct inode *inode, const char *name, const void *value,
 			  size_t size, int flags);
-#else
-int scoutfs_acl_get_xattr(struct dentry *dentry, const char *name, void *value, size_t size,
-			  int type);
-int scoutfs_acl_set_xattr(struct dentry *dentry, const char *name, const void *value, size_t size,
-			  int flags, int type);
-#endif
 int scoutfs_acl_chmod_locked(struct inode *inode, struct iattr *attr,
 			     struct scoutfs_lock *lock, struct list_head *ind_locks);
 int scoutfs_init_acl_locked(struct inode *inode, struct inode *dir,
@@ -218,6 +218,7 @@ static void block_free_work(struct work_struct *work)

 	llist_for_each_entry_safe(bp, tmp, deleted, free_node) {
 		block_free(sb, bp);
+		cond_resched();
 	}
 }

@@ -443,13 +444,13 @@ static void block_end_io(struct super_block *sb, blk_opf_t opf,
 		wake_up(&binf->waitq);
 }

-static void KC_DECLARE_BIO_END_IO(block_bio_end_io, struct bio *bio)
+static void block_bio_end_io(struct bio *bio)
 {
 	struct block_private *bp = bio->bi_private;
 	struct super_block *sb = bp->sb;

 	TRACE_BLOCK(end_io, bp);
-	block_end_io(sb, kc_bio_get_opf(bio), bp, kc_bio_get_errno(bio));
+	block_end_io(sb, bio->bi_opf, bp, blk_status_to_errno(bio->bi_status));
 	bio_put(bio);
 }

@@ -467,9 +468,6 @@ static int block_submit_bio(struct super_block *sb, struct block_private *bp,
 	sector_t sector;
 	int ret = 0;

-	if (scoutfs_forcing_unmount(sb))
-		return -ENOLINK;
-
 	sector = bp->bl.blkno << (SCOUTFS_BLOCK_LG_SHIFT - 9);

 	WARN_ON_ONCE(bp->bl.blkno == U64_MAX);
@@ -480,6 +478,17 @@ static int block_submit_bio(struct super_block *sb, struct block_private *bp,
 	set_bit(BLOCK_BIT_IO_BUSY, &bp->bits);
 	block_get(bp);

+	/*
+	 * A second thread may already be waiting on this block's completion
+	 * after this thread won the race to submit the block.  We exit through
+	 * the block_end_io error path which sets BLOCK_BIT_ERROR and assures
+	 * that other callers in the waitq get woken up.
+	 */
+	if (scoutfs_forcing_unmount(sb)) {
+		ret = -ENOLINK;
+		goto end_io;
+	}
+
 	blk_start_plug(&plug);

 	for (off = 0; off < SCOUTFS_BLOCK_LG_SIZE; off += PAGE_SIZE) {
@@ -490,7 +499,7 @@ static int block_submit_bio(struct super_block *sb, struct block_private *bp,
 				break;
 			}

-			kc_bio_set_sector(bio, sector + (off >> 9));
+			bio->bi_iter.bi_sector = sector + (off >> 9);
 			bio->bi_end_io = block_bio_end_io;
 			bio->bi_private = bp;

@@ -507,16 +516,17 @@ static int block_submit_bio(struct super_block *sb, struct block_private *bp,
 			BUG();

 		if (!bio_add_page(bio, page, PAGE_SIZE, 0)) {
-			kc_submit_bio(bio);
+			submit_bio(bio);
 			bio = NULL;
 		}
 	}

 	if (bio)
-		kc_submit_bio(bio);
+		submit_bio(bio);

 	blk_finish_plug(&plug);

+end_io:
 	/* let racing end_io know we're done */
 	block_end_io(sb, opf, bp, ret);

@@ -1169,11 +1179,11 @@ struct sm_block_completion {
 	int err;
 };

-static void KC_DECLARE_BIO_END_IO(sm_block_bio_end_io, struct bio *bio)
+static void sm_block_bio_end_io(struct bio *bio)
 {
 	struct sm_block_completion *sbc = bio->bi_private;

-	sbc->err = kc_bio_get_errno(bio);
+	sbc->err = blk_status_to_errno(bio->bi_status);
 	complete(&sbc->comp);
 	bio_put(bio);
 }
@@ -1226,7 +1236,7 @@ static int sm_block_io(struct super_block *sb, struct block_device *bdev, blk_op
 		goto out;
 	}

-	kc_bio_set_sector(bio, blkno << (SCOUTFS_BLOCK_SM_SHIFT - 9));
+	bio->bi_iter.bi_sector = blkno << (SCOUTFS_BLOCK_SM_SHIFT - 9);
 	bio->bi_end_io = sm_block_bio_end_io;
 	bio->bi_private = &sbc;
 	bio_add_page(bio, page, SCOUTFS_BLOCK_SM_SIZE, 0);
@@ -1234,7 +1244,7 @@ static int sm_block_io(struct super_block *sb, struct block_device *bdev, blk_op
 	init_completion(&sbc.comp);
 	sbc.err = 0;

-	kc_submit_bio(bio);
+	submit_bio(bio);

 	wait_for_completion(&sbc.comp);
 	ret = sbc.err;
@@ -1,8 +1,6 @@
 #ifndef _SCOUTFS_BLOCK_H_
 #define _SCOUTFS_BLOCK_H_

-struct scoutfs_alloc;
-
 struct scoutfs_block_writer {
 	spinlock_t lock;
 	struct list_head dirty_list;
@@ -1,59 +0,0 @@
-/*
- * Copyright (C) 2026 Versity Software, Inc.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License v2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- */
-#include <linux/kernel.h>
-#include <linux/bsearch.h>
-
-#include "bsearch_index.h"
-
-struct bsearch_index_key {
-	int (*cmp)(const void *key, const void *elt);
-	/* the key has to be const, so we have to update the index through a pointer */
-	void **index_elt;
-	const void *key;
-	size_t size;
-};
-
-static int cmp_index(const void *key, const void *elt)
-{
-	const struct bsearch_index_key *bik = key;
-	int cmp = bik->cmp(bik->key, elt);
-
-	if (cmp > 0)
-		*(bik->index_elt) = (void *)elt + bik->size;
-	else
-		*(bik->index_elt) = (void *)elt;
-
-	return cmp;
-}
-
-/*
- * A bsearch() wrapper that returns the index of the element of the
- * array that the key would be stored in to maintain sort order.  It's
- * the first element where the existing element is greater than the key.
- * It returns the size of the array if the key is greater than the last
- * element in the array.
- */
-size_t bsearch_index(const void *key, const void *base, size_t num, size_t size,
-		     int (*cmp)(const void *key, const void *elt))
-{
-	void *index_elt = (void *)base;
-	struct bsearch_index_key bik = {
-		.cmp = cmp,
-		.index_elt = &index_elt,
-		.key = key,
-		.size = size,
-	};
-
-	bsearch(&bik, base, num, size, cmp_index);
-	return ((unsigned long)index_elt - (unsigned long)base) / size;
-}
@@ -1,7 +0,0 @@
-#ifndef _SCOUTFS_BSEARCH_INDEX_H_
-#define _SCOUTFS_BSEARCH_INDEX_H_
-
-size_t bsearch_index(const void *key, const void *base, size_t num, size_t size,
-		     int (*cmp)(const void *key, const void *elt));
-
-#endif
@@ -1816,11 +1816,6 @@ int scoutfs_btree_dirty(struct super_block *sb,
 * Call the users callback on all the items in the leaf that we find.
 * We also set the caller's keys for the first and last possible keys
 * that could exist in the leaf block.
- *
- * The callback can set a new key to continue reading from rather than
- * iterating over all the items.  It modifies the key and returns
- * -ESRCH, which performs a new avl search.  If the modified key falls
- * outside of the range of keys in the block then we return.
 */
 int scoutfs_btree_read_items(struct super_block *sb,
 			     struct scoutfs_btree_root *root,
@@ -1834,7 +1829,6 @@ int scoutfs_btree_read_items(struct super_block *sb,
 	struct scoutfs_avl_node *next_node;
 	struct scoutfs_avl_node *node;
 	struct btree_walk_key_range kr;
-	struct scoutfs_key cb_key;
 	struct scoutfs_block *bl;
 	int ret;

@@ -1848,32 +1842,22 @@ int scoutfs_btree_read_items(struct super_block *sb,
 	if (scoutfs_key_compare(&kr.end, end) < 0)
 		*end = kr.end;

-	cb_key = *start;
-search:
-	node = scoutfs_avl_search(&bt->item_root, cmp_key_item, &cb_key, NULL,
+	node = scoutfs_avl_search(&bt->item_root, cmp_key_item, start, NULL,
 				  NULL, &next_node, NULL) ?: next_node;
 	while (node) {
 		item = node_item(node);
 		if (scoutfs_key_compare(&item->key, end) > 0)
 			break;

-		cb_key = *item_key(item);
-		ret = cb(sb, &cb_key, le64_to_cpu(item->seq), item->flags,
+		ret = cb(sb, item_key(item), le64_to_cpu(item->seq), item->flags,
 			 item_val(bt, item), item_val_len(item), arg);
-		if (ret < 0) {
-			if (ret == -ESRCH) {
-				if (scoutfs_key_compare(&cb_key, start) >= 0)
-					goto search;
-				ret = 0;
-			}
-			goto out;
-		}
+		if (ret < 0)
+			break;

 		node = scoutfs_avl_next(&bt->item_root, node);
 	}

 	scoutfs_block_put(sb, bl);
-	ret = 0;
 out:
 	return ret;
 }
@@ -23,6 +23,7 @@
 #include <linux/fiemap.h>
 #include <linux/writeback.h>
 #include <linux/overflow.h>
+#include <linux/iversion.h>

 #include "format.h"
 #include "super.h"
@@ -422,6 +423,8 @@ static int alloc_block(struct super_block *sb, struct inode *inode,

 	mutex_lock(&datinf->mutex);

+	scoutfs_inode_get_onoff(inode, &online, &offline);
+
 	/* default to single allocation at the written block */
 	start = iblock;
 	count = 1;
@@ -444,7 +447,6 @@ static int alloc_block(struct super_block *sb, struct inode *inode,
 		 * the preallocation size to the number of online
 		 * blocks.
 		 */
-		scoutfs_inode_get_onoff(inode, &online, &offline);
 		if (iblock > 1 && iblock == online) {
 			ret = scoutfs_ext_next(sb, &data_ext_ops, &args,
 					       iblock, 1, &found);
@@ -486,6 +488,13 @@ static int alloc_block(struct super_block *sb, struct inode *inode,
 		/* trim count by next extent after iblock */
 		if (found.len && found.start > start && found.start < start + count)
 			count = (found.start - start);
+
+		/*
+		 * Ramp the aligned region size up proportionally with
+		 * the file's online block count rather than jumping to
+		 * the full prealloc size.
+		 */
+		count = max_t(u64, 1, min(count, online));
 	}

 	/* overall prealloc limit */
@@ -749,54 +758,6 @@ static int scoutfs_readpage(struct file *file, struct page *page)
 	return ret;
 }

-#ifndef KC_FILE_AOPS_READAHEAD
-/*
- * This is used for opportunistic read-ahead which can throw the pages
- * away if it needs to.  If the caller didn't deal with offline extents
- * then we drop those pages rather than trying to wait.  Whoever is
- * staging offline extents should be doing it in enormous chunks so that
- * read-ahead can ramp up within each staged region.  The check for
- * offline extents is cheap when the inode has no offline extents.
- */
-static int scoutfs_readpages(struct file *file, struct address_space *mapping,
-			     struct list_head *pages, unsigned nr_pages)
-{
-	struct inode *inode = file->f_inode;
-	struct super_block *sb = inode->i_sb;
-	struct scoutfs_lock *inode_lock = NULL;
-	struct page *page;
-	struct page *tmp;
-	int ret;
-
-	ret = scoutfs_lock_inode(sb, SCOUTFS_LOCK_READ,
-				 SCOUTFS_LKF_REFRESH_INODE, inode, &inode_lock);
-	if (ret)
-		goto out;
-
-	list_for_each_entry_safe(page, tmp, pages, lru) {
-		ret = scoutfs_data_wait_check(inode, page_offset(page),
-					      PAGE_SIZE, SEF_OFFLINE,
-					      SCOUTFS_IOC_DWO_READ, NULL,
-					      inode_lock);
-		if (ret < 0)
-			goto out;
-		if (ret > 0) {
-			list_del(&page->lru);
-			put_page(page);
-			if (--nr_pages == 0) {
-				ret = 0;
-				goto out;
-			}
-		}
-	}
-
-	ret = mpage_readpages(mapping, pages, nr_pages, scoutfs_get_block_read);
-out:
-	scoutfs_unlock(sb, inode_lock, SCOUTFS_LOCK_READ);
-	BUG_ON(!list_empty(pages));
-	return ret;
-}
-#else
 static void scoutfs_readahead(struct readahead_control *rac)
 {
 	struct inode *inode = rac->file->f_inode;
@@ -818,7 +779,6 @@ static void scoutfs_readahead(struct readahead_control *rac)

 	scoutfs_unlock(sb, inode_lock, SCOUTFS_LOCK_READ);
 }
-#endif

 static int scoutfs_writepage(struct page *page, struct writeback_control *wbc)
 {
@@ -1259,7 +1219,7 @@ int scoutfs_data_move_blocks(struct inode *from, u64 from_off,
 	struct data_ext_args from_args;
 	struct data_ext_args to_args;
 	struct scoutfs_extent ext;
-	struct kc_timespec cur_time;
+	struct timespec64 cur_time;
 	LIST_HEAD(locks);
 	bool done = false;
 	loff_t from_size;
@@ -2055,15 +2015,9 @@ int scoutfs_data_waiting(struct super_block *sb, u64 ino, u64 iblock,
 	return ret;
 }

-#ifdef KC_MM_VM_FAULT_T
 static vm_fault_t scoutfs_data_page_mkwrite(struct vm_fault *vmf)
 {
 	struct vm_area_struct *vma = vmf->vma;
-#else
-static int scoutfs_data_page_mkwrite(struct vm_area_struct *vma,
-				     struct vm_fault *vmf)
-{
-#endif
 	struct page *page = vmf->page;
 	struct file *file = vma->vm_file;
 	struct inode *inode = file_inode(file);
@@ -2205,14 +2159,9 @@ out:
 	return ret;
 }

-#ifdef KC_MM_VM_FAULT_T
 static vm_fault_t scoutfs_data_filemap_fault(struct vm_fault *vmf)
 {
 	struct vm_area_struct *vma = vmf->vma;
-#else
-static int scoutfs_data_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
-{
-#endif
 	struct file *file = vma->vm_file;
 	struct inode *inode = file_inode(file);
 	struct scoutfs_inode_info *si = SCOUTFS_I(inode);
@@ -2247,15 +2196,11 @@ retry:
 		}
 	}

-#ifdef KC_MM_VM_FAULT_T
 	ret = filemap_fault(vmf);
-#else
-	ret = filemap_fault(vma, vmf);
-#endif

 out:
 	if (scoutfs_per_task_del(&si->pt_data_lock, &pt_ent))
-		kc_inode_dio_end(inode);
+		inode_dio_end(inode);
 	scoutfs_unlock(sb, inode_lock, SCOUTFS_LOCK_READ);
 	if (scoutfs_data_wait_found(&dw)) {
 		err = scoutfs_data_wait(inode, &dw);
@@ -2273,9 +2218,6 @@ out:
 static const struct vm_operations_struct scoutfs_data_file_vm_ops = {
 	.fault		= scoutfs_data_filemap_fault,
 	.page_mkwrite	= scoutfs_data_page_mkwrite,
-#ifdef KC_MM_REMAP_PAGES
-	.remap_pages	= generic_file_remap_pages,
-#endif
 };

 static int scoutfs_file_mmap(struct file *file, struct vm_area_struct *vma)
@@ -2293,11 +2235,7 @@ const struct address_space_operations scoutfs_file_aops = {
 #else
 	.readpage		= scoutfs_readpage,
 #endif
-#ifndef KC_FILE_AOPS_READAHEAD
-	.readpages		= scoutfs_readpages,
-#else
 	.readahead		= scoutfs_readahead,
-#endif
 	.writepage		= scoutfs_writepage,
 	.writepages		= scoutfs_writepages,
 	.write_begin		= scoutfs_write_begin,
@@ -2305,17 +2243,10 @@ const struct address_space_operations scoutfs_file_aops = {
 };

 const struct file_operations scoutfs_file_fops = {
-#ifdef KC_LINUX_HAVE_FOP_AIO_READ
-	.read		= do_sync_read,
-	.write		= do_sync_write,
-	.aio_read	= scoutfs_file_aio_read,
-	.aio_write	= scoutfs_file_aio_write,
-#else
 	.read_iter	= scoutfs_file_read_iter,
 	.write_iter	= scoutfs_file_write_iter,
 	.splice_read	= generic_file_splice_read,
 	.splice_write	= iter_file_splice_write,
-#endif
 	.mmap		= scoutfs_file_mmap,
 	.unlocked_ioctl	= scoutfs_ioctl,
 	.fsync		= scoutfs_file_fsync,
@@ -18,6 +18,7 @@
 #include <linux/xattr.h>
 #include <linux/namei.h>
 #include <linux/mm.h>
+#include <linux/iversion.h>

 #include "format.h"
 #include "file.h"
@@ -422,18 +423,7 @@ out:
 	else
 		inode = scoutfs_iget(sb, ino, 0, 0);

-	/*
-	 * We can't splice dir aliases into the dcache.  dir entries
-	 * might have changed on other nodes so our dcache could still
-	 * contain them, rather than having been moved in rename.  For
-	 * dirs, we use d_materialize_unique to remove any existing
-	 * aliases which must be stale.  Our inode numbers aren't reused
-	 * so inodes pointed to by entries can't change types.
-	 */
-	if (!IS_ERR_OR_NULL(inode) && S_ISDIR(inode->i_mode))
-		return d_materialise_unique(dentry, inode);
-	else
-		return d_splice_alias(inode, dentry);
+	return d_splice_alias(inode, dentry);
 }

 /*
@@ -962,7 +952,7 @@ static int scoutfs_unlink(struct inode *dir, struct dentry *dentry)
 {
 	struct super_block *sb = dir->i_sb;
 	struct inode *inode = dentry->d_inode;
-	struct kc_timespec ts = current_time(inode);
+	struct timespec64 ts = current_time(inode);
 	struct scoutfs_lock *inode_lock = NULL;
 	struct scoutfs_lock *orph_lock = NULL;
 	struct scoutfs_lock *dir_lock = NULL;
@@ -1197,24 +1187,6 @@ out:
 	return path;
 }

-#ifdef KC_LINUX_HAVE_RHEL_IOPS_WRAPPER
-static void *scoutfs_follow_link(struct dentry *dentry, struct nameidata *nd)
-{
-	char *path;
-
-	path = scoutfs_get_link_target(dentry);
-	if (!IS_ERR_OR_NULL(path))
-		nd_set_link(nd, path);
-	return path;
-}
-
-static void scoutfs_put_link(struct dentry *dentry, struct nameidata *nd,
-			     void *cookie)
-{
-	if (!IS_ERR_OR_NULL(cookie))
-		kfree(cookie);
-}
-#else
 static const char *scoutfs_get_link(struct dentry *dentry, struct inode *inode, struct delayed_call *done)
 {
 	char *path;
@@ -1225,7 +1197,6 @@ static const char *scoutfs_get_link(struct dentry *dentry, struct inode *inode,

 	return path;
 }
-#endif

 /*
 * Symlink target paths can be annoyingly large.  We store relatively
@@ -1635,7 +1606,7 @@ static int scoutfs_rename_common(KC_VFS_NS_DEF
 	struct scoutfs_lock *orph_lock = NULL;
 	struct scoutfs_dirent new_dent;
 	struct scoutfs_dirent old_dent;
-	struct kc_timespec now;
+	struct timespec64 now;
 	bool ins_new = false;
 	bool del_new = false;
 	bool ins_old = false;
@@ -1647,6 +1618,9 @@ static int scoutfs_rename_common(KC_VFS_NS_DEF
 	int ret;
 	int err;

+	if (flags & ~RENAME_NOREPLACE)
+		return -EINVAL;
+
 	trace_scoutfs_rename(sb, old_dir, old_dentry, new_dir, new_dentry);

 	old_hash = dirent_name_hash(old_dentry->d_name.name,
@@ -1892,36 +1866,7 @@ out_unlock:
 	return ret;
 }

-#ifdef KC_LINUX_HAVE_RHEL_IOPS_WRAPPER
-static int scoutfs_rename(struct inode *old_dir,
-			  struct dentry *old_dentry, struct inode *new_dir,
-			  struct dentry *new_dentry)
-{
-	return scoutfs_rename_common(KC_VFS_INIT_NS
-				     old_dir, old_dentry, new_dir, new_dentry, 0);
-}
-#endif

-static int scoutfs_rename2(KC_VFS_NS_DEF
-			  struct inode *old_dir,
-			  struct dentry *old_dentry, struct inode *new_dir,
-			  struct dentry *new_dentry, unsigned int flags)
-{
-	if (flags & ~RENAME_NOREPLACE)
-		return -EINVAL;
-
-	return scoutfs_rename_common(KC_VFS_NS
-				     old_dir, old_dentry, new_dir, new_dentry, flags);
-}
-
-#ifdef KC_FMODE_KABI_ITERATE
-/* we only need this to set the iterate flag for kabi :/ */
-static int scoutfs_dir_open(struct inode *inode, struct file *file)
-{
-        file->f_mode |= FMODE_KABI_ITERATE;
-        return 0;
-}
-#endif

 static int scoutfs_tmpfile(KC_VFS_NS_DEF
 			   struct inode *dir,
@@ -1991,29 +1936,15 @@ out:
 }

 const struct inode_operations scoutfs_symlink_iops = {
-#ifdef KC_LINUX_HAVE_RHEL_IOPS_WRAPPER
-	.readlink       = generic_readlink,
-	.follow_link    = scoutfs_follow_link,
-	.put_link       = scoutfs_put_link,
-#else
 	.get_link	= scoutfs_get_link,
-#endif
 	.getattr	= scoutfs_getattr,
 	.setattr	= scoutfs_setattr,
-#ifdef KC_LINUX_HAVE_RHEL_IOPS_WRAPPER
-	.setxattr	= generic_setxattr,
-	.getxattr	= generic_getxattr,
-#endif
 	.listxattr	= scoutfs_listxattr,
-#ifdef KC_LINUX_HAVE_RHEL_IOPS_WRAPPER
-	.removexattr	= generic_removexattr,
-#endif
 #ifdef KC_GET_INODE_ACL
 	.get_inode_acl	= scoutfs_get_acl,
 #else
 	.get_acl	= scoutfs_get_acl,
 #endif
-#ifndef KC_LINUX_HAVE_RHEL_IOPS_WRAPPER
 	.tmpfile	= scoutfs_tmpfile,
 	.rename		= scoutfs_rename_common,
 	.symlink	= scoutfs_symlink,
@@ -2022,26 +1953,17 @@ const struct inode_operations scoutfs_symlink_iops = {
 	.mkdir		= scoutfs_mkdir,
 	.create		= scoutfs_create,
 	.lookup		= scoutfs_lookup,
-#endif
 };

 const struct file_operations scoutfs_dir_fops = {
 	.iterate	= scoutfs_readdir,
-#ifdef KC_FMODE_KABI_ITERATE
-	.open		= scoutfs_dir_open,
-#endif
 	.unlocked_ioctl	= scoutfs_ioctl,
 	.fsync		= scoutfs_file_fsync,
 	.llseek		= generic_file_llseek,
 };


-#ifdef KC_LINUX_HAVE_RHEL_IOPS_WRAPPER
-const struct inode_operations_wrapper scoutfs_dir_iops = {
-	.ops = {
-#else
 const struct inode_operations scoutfs_dir_iops = {
-#endif
 	.lookup		= scoutfs_lookup,
 	.mknod		= scoutfs_mknod,
 	.create		= scoutfs_create,
@@ -2051,30 +1973,15 @@ const struct inode_operations scoutfs_dir_iops = {
 	.rmdir		= scoutfs_unlink,
 	.getattr	= scoutfs_getattr,
 	.setattr	= scoutfs_setattr,
-#ifdef KC_LINUX_HAVE_RHEL_IOPS_WRAPPER
-	.rename		= scoutfs_rename,
-	.setxattr	= generic_setxattr,
-	.getxattr	= generic_getxattr,
-	.removexattr	= generic_removexattr,
-#endif
 	.listxattr	= scoutfs_listxattr,
 #ifdef KC_GET_INODE_ACL
 	.get_inode_acl	= scoutfs_get_acl,
 #else
 	.get_acl	= scoutfs_get_acl,
 #endif
-#ifdef KC_SET_ACL_DENTRY
 	.set_acl	= scoutfs_set_acl,
-#endif
 	.symlink	= scoutfs_symlink,
 	.permission	= scoutfs_permission,
-#ifdef KC_LINUX_HAVE_RHEL_IOPS_WRAPPER
-	},
-#endif
 	.tmpfile	= scoutfs_tmpfile,
-#ifdef KC_LINUX_HAVE_RHEL_IOPS_WRAPPER
-	.rename2	= scoutfs_rename2,
-#else
-	.rename		= scoutfs_rename2,
-#endif
+	.rename		= scoutfs_rename_common,
 };
@@ -5,11 +5,7 @@
 #include "lock.h"

 extern const struct file_operations scoutfs_dir_fops;
-#ifdef KC_LINUX_HAVE_RHEL_IOPS_WRAPPER
-extern const struct inode_operations_wrapper scoutfs_dir_iops;
-#else
 extern const struct inode_operations scoutfs_dir_iops;
-#endif
 extern const struct inode_operations scoutfs_symlink_iops;

 extern const struct dentry_operations scoutfs_dentry_ops;
@@ -222,7 +222,7 @@ static struct attribute *fence_attrs[] = {

 static void fence_timeout(struct timer_list *timer)
 {
-	struct pending_fence *fence = from_timer(fence, timer, timer);
+	struct pending_fence *fence = timer_container_of(fence, timer, timer);
 	struct super_block *sb = fence->sb;
 	DECLARE_FENCE_INFO(sb, fi);

@@ -424,8 +424,7 @@ int scoutfs_fence_setup(struct super_block *sb)
 		goto out;
 	}

-	fi->wq = alloc_workqueue("scoutfs_fence",
-				 WQ_UNBOUND | WQ_NON_REENTRANT, 0);
+	fi->wq = alloc_workqueue("scoutfs_fence", WQ_UNBOUND, 0);
 	if (!fi->wq) {
 		ret = -ENOMEM;
 		goto out;
@@ -30,133 +30,6 @@
 #include "omap.h"
 #include "quota.h"

-#ifdef KC_LINUX_HAVE_FOP_AIO_READ
-/*
- * Start a high level file read.  We check for offline extents in the
- * read region here so that we only check the extents once.  We use the
- * dio count to prevent releasing while we're reading after we've
- * checked the extents.
- */
-ssize_t scoutfs_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
-			      unsigned long nr_segs, loff_t pos)
-{
-	struct file *file = iocb->ki_filp;
-	struct inode *inode = file_inode(file);
-	struct scoutfs_inode_info *si = SCOUTFS_I(inode);
-	struct super_block *sb = inode->i_sb;
-	struct scoutfs_lock *scoutfs_inode_lock = NULL;
-	SCOUTFS_DECLARE_PER_TASK_ENTRY(pt_ent);
-	DECLARE_DATA_WAIT(dw);
-	int ret;
-
-retry:
-	/* protect checked extents from release */
-	inode_lock(inode);
-	atomic_inc(&inode->i_dio_count);
-	inode_unlock(inode);
-
-	ret = scoutfs_lock_inode(sb, SCOUTFS_LOCK_READ,
-				 SCOUTFS_LKF_REFRESH_INODE, inode, &scoutfs_inode_lock);
-	if (ret)
-		goto out;
-
-	if (scoutfs_per_task_add_excl(&si->pt_data_lock, &pt_ent, scoutfs_inode_lock)) {
-		ret = scoutfs_data_wait_check_iov(inode, iov, nr_segs, pos,
-						  SEF_OFFLINE,
-						  SCOUTFS_IOC_DWO_READ,
-						  &dw, scoutfs_inode_lock);
-		if (ret != 0)
-			goto out;
-	} else {
-		WARN_ON_ONCE(true);
-	}
-
-	ret = generic_file_aio_read(iocb, iov, nr_segs, pos);
-
-out:
-	inode_dio_done(inode);
-	scoutfs_per_task_del(&si->pt_data_lock, &pt_ent);
-	scoutfs_unlock(sb, scoutfs_inode_lock, SCOUTFS_LOCK_READ);
-
-	if (scoutfs_data_wait_found(&dw)) {
-		ret = scoutfs_data_wait(inode, &dw);
-		if (ret == 0)
-			goto retry;
-	}
-
-	return ret;
-}
-
-ssize_t scoutfs_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
-			       unsigned long nr_segs, loff_t pos)
-{
-	struct file *file = iocb->ki_filp;
-	struct inode *inode = file_inode(file);
-	struct scoutfs_inode_info *si = SCOUTFS_I(inode);
-	struct super_block *sb = inode->i_sb;
-	struct scoutfs_lock *scoutfs_inode_lock = NULL;
-	SCOUTFS_DECLARE_PER_TASK_ENTRY(pt_ent);
-	DECLARE_DATA_WAIT(dw);
-	int ret;
-
-	if (iocb->ki_left == 0) /* Does this even happen? */
-		return 0;
-
-retry:
-	inode_lock(inode);
-	ret = scoutfs_lock_inode(sb, SCOUTFS_LOCK_WRITE,
-				 SCOUTFS_LKF_REFRESH_INODE, inode, &scoutfs_inode_lock);
-	if (ret)
-		goto out;
-
-	ret = scoutfs_inode_check_retention(inode);
-	if (ret < 0)
-		goto out;
-
-	ret = scoutfs_complete_truncate(inode, scoutfs_inode_lock);
-	if (ret)
-		goto out;
-
-	if (scoutfs_per_task_add_excl(&si->pt_data_lock, &pt_ent, scoutfs_inode_lock)) {
-		/* data_version is per inode, whole file must be online */
-		ret = scoutfs_data_wait_check(inode, 0, i_size_read(inode),
-					      SEF_OFFLINE,
-					      SCOUTFS_IOC_DWO_WRITE,
-					      &dw, scoutfs_inode_lock);
-		if (ret != 0)
-			goto out;
-	}
-
-	ret = scoutfs_quota_check_data(sb, inode);
-	if (ret)
-		goto out;
-
-	/* XXX: remove SUID bit */
-
-	ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos);
-
-out:
-	scoutfs_per_task_del(&si->pt_data_lock, &pt_ent);
-	scoutfs_unlock(sb, scoutfs_inode_lock, SCOUTFS_LOCK_WRITE);
-	inode_unlock(inode);
-
-	if (scoutfs_data_wait_found(&dw)) {
-		ret = scoutfs_data_wait(inode, &dw);
-		if (ret == 0)
-			goto retry;
-	}
-
-	if (ret > 0 || ret == -EIOCBQUEUED) {
-		ssize_t err;
-
-		err = generic_write_sync(file, pos, ret);
-		if (err < 0 && ret > 0)
-			ret = err;
-	}
-
-	return ret;
-}
-#else
 ssize_t scoutfs_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
 {
 	struct file *file = iocb->ki_filp;
@@ -265,7 +138,6 @@ out:

 	return ret;
 }
-#endif

 int scoutfs_permission(KC_VFS_NS_DEF
 		       struct inode *inode, int mask)
@@ -1,15 +1,8 @@
 #ifndef _SCOUTFS_FILE_H_
 #define _SCOUTFS_FILE_H_

-#ifdef KC_LINUX_HAVE_FOP_AIO_READ
-ssize_t scoutfs_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
-			      unsigned long nr_segs, loff_t pos);
-ssize_t scoutfs_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
-			       unsigned long nr_segs, loff_t pos);
-#else
 ssize_t scoutfs_file_read_iter(struct kiocb *, struct iov_iter *);
 ssize_t scoutfs_file_write_iter(struct kiocb *, struct iov_iter *);
-#endif
 int scoutfs_permission(KC_VFS_NS_DEF
 		       struct inode *inode, int mask);
 loff_t scoutfs_file_llseek(struct file *file, loff_t offset, int whence);
@@ -114,42 +114,6 @@ static struct scoutfs_block *read_bloom_ref(struct super_block *sb, struct scout
 	return bl;
 }

-/*
- * Returns >0 if there was a bloom block and all the bits were present.
- */
-static int all_bloom_bits_present(struct super_block *sb, struct scoutfs_block_ref *ref,
-				  struct forest_bloom_nrs *bloom)
-{
-	struct scoutfs_bloom_block *bb;
-	struct scoutfs_block *bl;
-	int i;
-
-	if (ref->blkno == 0)
-		return 0;
-
-	bl = read_bloom_ref(sb, ref);
-	if (IS_ERR(bl))
-		return PTR_ERR(bl);
-
-	bb = bl->data;
-
-	for (i = 0; i < ARRAY_SIZE(bloom->nrs); i++) {
-		if (!test_bit_le(bloom->nrs[i], bb->bits))
-			break;
-	}
-
-	scoutfs_block_put(sb, bl);
-
-	/* one of the bloom bits wasn't set */
-	if (i != ARRAY_SIZE(bloom->nrs)) {
-		scoutfs_inc_counter(sb, forest_bloom_fail);
-		return 0;
-	}
-
-	scoutfs_inc_counter(sb, forest_bloom_pass);
-	return 1;
-}
-
 /*
 * This is an unlocked iteration across all the btrees to find a hint at
 * the next key that the caller could read.  It's used to find out what
@@ -263,13 +227,9 @@ static int forest_read_items(struct super_block *sb, struct scoutfs_key *key, u6
 }

 /*
- * Call the caller's callback for every item in the leaf blocks in each
- * forest btree that contain the caller's key.
- *
- * If a bloom key is provided then each log tree's bloom block is
- * checked and only trees with all the bloom key's bloom bits set will
- * be read from.  When the bloom key is null all trees will be read
- * from.
+ * For each forest btree whose bloom block indicates that the lock might
+ * have items stored, call the caller's callback for every item in the
+ * leaf block in each tree which contains the key.
 *
 * The btree iter calls clamp the caller's range to the tightest range
 * that covers all the blocks.  Any keys outside of this range can't be
@@ -288,17 +248,24 @@ int scoutfs_forest_read_items_roots(struct super_block *sb, struct scoutfs_net_r
 		.cb_arg = arg,
 	};
 	struct scoutfs_log_trees lt;
+	struct scoutfs_bloom_block *bb;
 	struct forest_bloom_nrs bloom;
 	SCOUTFS_BTREE_ITEM_REF(iref);
+	struct scoutfs_block *bl;
 	struct scoutfs_key ltk;
+	struct scoutfs_key orig_start = *start;
+	struct scoutfs_key orig_end = *end;
 	int ret;
+	int i;

 	scoutfs_inc_counter(sb, forest_read_items);
-	if (bloom_key)
-		calc_bloom_nrs(&bloom, bloom_key);
+	calc_bloom_nrs(&bloom, bloom_key);

 	trace_scoutfs_forest_using_roots(sb, &roots->fs_root, &roots->logs_root);

+	*start = orig_start;
+	*end = orig_end;
+
 	/* start with fs root items */
 	rid.fic |= FIC_FS_ROOT;
 	ret = scoutfs_btree_read_items(sb, &roots->fs_root, key, start, end,
@@ -325,17 +292,30 @@ int scoutfs_forest_read_items_roots(struct super_block *sb, struct scoutfs_net_r
 			goto out; /* including stale */
 		}

-		/* we're not expecting -ENOENT from _read_items */
-		if (lt.item_root.ref.blkno == 0)
+		if (lt.bloom_ref.blkno == 0)
 			continue;

-		if (bloom_key) {
-			ret = all_bloom_bits_present(sb, &lt.bloom_ref, &bloom);
-			if (ret < 0)
-				goto out;
-			if (ret == 0)
-				continue;
+		bl = read_bloom_ref(sb, &lt.bloom_ref);
+		if (IS_ERR(bl)) {
+			ret = PTR_ERR(bl);
+			goto out;
 		}
+		bb = bl->data;
+
+		for (i = 0; i < ARRAY_SIZE(bloom.nrs); i++) {
+			if (!test_bit_le(bloom.nrs[i], bb->bits))
+				break;
+		}
+
+		scoutfs_block_put(sb, bl);
+
+		/* one of the bloom bits wasn't set */
+		if (i != ARRAY_SIZE(bloom.nrs)) {
+			scoutfs_inc_counter(sb, forest_bloom_fail);
+			continue;
+		}
+
+		scoutfs_inc_counter(sb, forest_bloom_pass);

 		if ((le64_to_cpu(lt.flags) & SCOUTFS_LOG_TREES_FINALIZED) &&
 		    (merge_input_seq == 0 ||
@@ -803,7 +783,7 @@ int scoutfs_forest_setup(struct super_block *sb)
 			  scoutfs_forest_log_merge_worker);
 	sbi->forest_info = finf;

-	finf->workq = alloc_workqueue("scoutfs_log_merge", WQ_NON_REENTRANT |
+	finf->workq = alloc_workqueue("scoutfs_log_merge",
 				      WQ_UNBOUND | WQ_HIGHPRI, 0);
 	if (!finf->workq) {
 		ret = -ENOMEM;
@@ -21,6 +21,7 @@
 #include <linux/list_sort.h>
 #include <linux/workqueue.h>
 #include <linux/buffer_head.h>
+#include <linux/iversion.h>

 #include "format.h"
 #include "super.h"
@@ -143,40 +144,26 @@ void scoutfs_destroy_inode(struct inode *inode)
 static const struct inode_operations scoutfs_file_iops = {
 	.getattr	= scoutfs_getattr,
 	.setattr	= scoutfs_setattr,
-#ifdef KC_LINUX_HAVE_RHEL_IOPS_WRAPPER
-	.setxattr	= generic_setxattr,
-	.getxattr	= generic_getxattr,
-	.removexattr	= generic_removexattr,
-#endif
 	.listxattr	= scoutfs_listxattr,
 #ifdef KC_GET_INODE_ACL
 	.get_inode_acl	= scoutfs_get_acl,
 #else
 	.get_acl	= scoutfs_get_acl,
 #endif
-#ifdef KC_SET_ACL_DENTRY
 	.set_acl	= scoutfs_set_acl,
-#endif
 	.fiemap		= scoutfs_data_fiemap,
 };

 static const struct inode_operations scoutfs_special_iops = {
 	.getattr	= scoutfs_getattr,
 	.setattr	= scoutfs_setattr,
-#ifdef KC_LINUX_HAVE_RHEL_IOPS_WRAPPER
-	.setxattr	= generic_setxattr,
-	.getxattr	= generic_getxattr,
-	.removexattr	= generic_removexattr,
-#endif
 	.listxattr	= scoutfs_listxattr,
 #ifdef KC_GET_INODE_ACL
 	.get_inode_acl	= scoutfs_get_acl,
 #else
 	.get_acl	= scoutfs_get_acl,
 #endif
-#ifdef KC_SET_ACL_DENTRY
 	.set_acl	= scoutfs_set_acl,
-#endif
 };

 /*
@@ -192,12 +179,7 @@ static void set_inode_ops(struct inode *inode)
 		inode->i_fop = &scoutfs_file_fops;
 		break;
 	case S_IFDIR:
-#ifdef KC_LINUX_HAVE_RHEL_IOPS_WRAPPER
-		inode->i_op = &scoutfs_dir_iops.ops;
-		inode->i_flags |= S_IOPS_WRAPPER;
-#else
 		inode->i_op = &scoutfs_dir_iops;
-#endif
 		inode->i_fop = &scoutfs_dir_fops;
 		break;
 	case S_IFLNK:
@@ -381,18 +363,11 @@ int scoutfs_inode_refresh(struct inode *inode, struct scoutfs_lock *lock)
 	return ret;
 }

-#ifdef KC_LINUX_HAVE_RHEL_IOPS_WRAPPER
-int scoutfs_getattr(struct vfsmount *mnt, struct dentry *dentry,
-		    struct kstat *stat)
-{
-	struct inode *inode = dentry->d_inode;
-#else
 int scoutfs_getattr(KC_VFS_NS_DEF
 		    const struct path *path, struct kstat *stat,
 		    u32 request_mask, unsigned int query_flags)
 {
 	struct inode *inode = d_inode(path->dentry);
-#endif
 	struct super_block *sb = inode->i_sb;
 	struct scoutfs_lock *lock = NULL;
 	int ret;
@@ -549,6 +524,7 @@ retry:
 				goto out;
 			if (scoutfs_data_wait_found(&dw)) {
 				scoutfs_unlock(sb, lock, SCOUTFS_LOCK_WRITE);
+				lock = NULL;

 				/* XXX callee locks instead? */
 				inode_unlock(inode);
@@ -23,7 +23,7 @@ struct scoutfs_inode_info {
 	u64 offline_blocks;
 	u64 proj;
 	u32 flags;
-	struct kc_timespec crtime;
+	struct timespec64 crtime;

 	/*
 	 * Protects per-inode extent items, most particularly readers
@@ -131,14 +131,9 @@ int scoutfs_complete_truncate(struct inode *inode, struct scoutfs_lock *lock);
 int scoutfs_inode_check_retention(struct inode *inode);

 int scoutfs_inode_refresh(struct inode *inode, struct scoutfs_lock *lock);
-#ifdef KC_LINUX_HAVE_RHEL_IOPS_WRAPPER
-int scoutfs_getattr(struct vfsmount *mnt, struct dentry *dentry,
-		    struct kstat *stat);
-#else
 int scoutfs_getattr(KC_VFS_NS_DEF
 		    const struct path *path, struct kstat *stat,
 		    u32 request_mask, unsigned int query_flags);
-#endif
 int scoutfs_setattr(KC_VFS_NS_DEF
 		    struct dentry *dentry, struct iattr *attr);

@@ -49,7 +49,6 @@
 #include "quota.h"
 #include "scoutfs_trace.h"
 #include "util.h"
-#include "raw.h"

 /*
 * We make inode index items coherent by locking fixed size regions of
@@ -490,6 +489,7 @@ static long scoutfs_ioc_stage(struct file *file, unsigned long arg)
 	struct scoutfs_lock *lock = NULL;
 	struct kiocb kiocb;
 	struct iovec iov;
+	struct iov_iter iter;
 	size_t written;
 	loff_t end_size;
 	loff_t isize;
@@ -515,10 +515,6 @@ static long scoutfs_ioc_stage(struct file *file, unsigned long arg)
 	/* the iocb is really only used for the file pointer :P */
 	init_sync_kiocb(&kiocb, file);
 	kiocb.ki_pos = args.offset;
-#ifdef KC_LINUX_AIO_KI_LEFT
-	kiocb.ki_left = args.length;
-	kiocb.ki_nbytes = args.length;
-#endif
 	iov.iov_base = (void __user *)(unsigned long)args.buf_ptr;
 	iov.iov_len = args.length;

@@ -560,8 +556,9 @@ static long scoutfs_ioc_stage(struct file *file, unsigned long arg)
 	pos = args.offset;
 	written = 0;
 	do {
-		ret = generic_file_buffered_write(&kiocb, &iov, 1, pos, &pos,
-						  args.length, written);
+		iov_iter_init(&iter, WRITE, &iov, 1, args.length);
+		ret = kc_generic_perform_write(&kiocb, &iter, pos);
+
 		BUG_ON(ret == -EIOCBQUEUED);
 		if (ret > 0)
 			written += ret;
@@ -1740,65 +1737,39 @@ out:
 	return ret;
 }

-static long scoutfs_ioc_raw_read_meta_seq(struct file *file, unsigned long arg)
+static long scoutfs_ioc_inject_totl_delta(struct file *file, unsigned long arg)
 {
 	struct super_block *sb = file_inode(file)->i_sb;
-	struct scoutfs_ioctl_raw_read_meta_seq __user *urms = (void __user *)arg;
-	struct scoutfs_ioctl_raw_read_meta_seq rms;
+	struct scoutfs_ioctl_inject_totl_delta __user *uitd = (void __user *)arg;
+	struct scoutfs_ioctl_inject_totl_delta itd;
+	struct scoutfs_xattr_totl_val tval;
+	struct scoutfs_lock *lock = NULL;
+	struct scoutfs_key key;
 	int ret;

-	if (!capable(CAP_SYS_ADMIN)) {
-		ret = -EPERM;
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	if (copy_from_user(&itd, uitd, sizeof(itd)))
+		return -EFAULT;
+
+	scoutfs_xattr_init_totl_key(&key, itd.name);
+	tval.total = cpu_to_le64((u64)itd.total);
+	tval.count = cpu_to_le64((u64)itd.count);
+
+	ret = scoutfs_lock_xattr_totl(sb, SCOUTFS_LOCK_WRITE_ONLY, 0, &lock);
+	if (ret < 0)
 		goto out;
-	}

-	if (copy_from_user(&rms, urms, sizeof(rms))) {
-		ret = -EFAULT;
-		goto out;
-	}
+	ret = scoutfs_hold_trans(sb, true);
+	if (ret < 0)
+		goto unlock;

-	if (rms.results_size == 0) {
-		ret = 0;
-		goto out;
-	}
+	ret = scoutfs_item_delta(sb, &key, &tval, sizeof(tval), lock);

-	if (rms.results_size < sizeof(struct scoutfs_ioctl_meta_seq) ||
-	    rms.results_size > INT_MAX) {
-		ret = -EINVAL;
-		goto out;
-	}
-
-	ret = scoutfs_raw_read_meta_seq(sb, &rms, &rms.last);
-	if (ret >= 0 && copy_to_user(&urms->last, &rms.last, sizeof(rms.last)))
-		ret = -EFAULT;
-out:
-	return ret;
-}
-
-static long scoutfs_ioc_raw_read_inode_info(struct file *file, unsigned long arg)
-{
-	struct super_block *sb = file_inode(file)->i_sb;
-	struct scoutfs_ioctl_raw_read_inode_info __user *urii = (void __user *)arg;
-	struct scoutfs_ioctl_raw_read_inode_info rii;
-	int ret;
-
-	if (!capable(CAP_SYS_ADMIN)) {
-		ret = -EPERM;
-		goto out;
-	}
-
-	if (copy_from_user(&rii, urii, sizeof(rii))) {
-		ret = -EFAULT;
-		goto out;
-	}
-
-	if (rii.inos_count == 0 || rii.results_size > INT_MAX ||
-	    !IS_ALIGNED(rii.inos_ptr, __alignof__(__u64))) {
-		ret = -EINVAL;
-		goto out;
-	}
-
-	ret = scoutfs_raw_read_inode_info(sb, &rii);
+	scoutfs_release_trans(sb);
+unlock:
+	scoutfs_unlock(sb, lock, SCOUTFS_LOCK_WRITE_ONLY);
 out:
 	return ret;
 }
@@ -1854,10 +1825,8 @@ long scoutfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 		return scoutfs_ioc_read_xattr_index(file, arg);
 	case SCOUTFS_IOC_PUNCH_OFFLINE:
 		return scoutfs_ioc_punch_offline(file, arg);
-	case SCOUTFS_IOC_RAW_READ_META_SEQ:
-		return scoutfs_ioc_raw_read_meta_seq(file, arg);
-	case SCOUTFS_IOC_RAW_READ_INODE_INFO:
-		return scoutfs_ioc_raw_read_inode_info(file, arg);
+	case SCOUTFS_IOC_INJECT_TOTL_DELTA:
+		return scoutfs_ioc_inject_totl_delta(file, arg);
 	}

 	return -ENOTTY;
@@ -15,6 +15,20 @@

 #define SCOUTFS_IOCTL_MAGIC 0xE8  /* arbitrarily chosen hole in ioctl-number.rst */

+/*
+ * Packed scoutfs keys rarely cross the ioctl boundary so we have a
+ * translation struct.
+ */
+struct scoutfs_ioctl_key {
+	__le64	_sk_first;
+	__le64	_sk_second;
+	__le64	_sk_third;
+	__u8	_sk_fourth;
+	__u8	sk_type;
+	__u8	sk_zone;
+	__u8	_pad[5];
+};
+
 struct scoutfs_ioctl_walk_inodes_entry {
 	__u64 major;
 	__u64 ino;
@@ -863,168 +877,16 @@ struct scoutfs_ioctl_punch_offline {
 	_IOW(SCOUTFS_IOCTL_MAGIC, 24, struct scoutfs_ioctl_punch_offline)

 /*
- * Read meta_seq items without cluster locking.
- *
- * @start is the first meta_seq item value that could be returned.
- * {0,0} is the minimum.
- *
- * @end is the last meta_seq item value that could be returned.
- * {U64_MAX, U64_MAX} is the maximum.
- *
- * @last is only set on success from the call.  It's the last meta_seq
- * item that could have been returned.  This lets the caller detect that
- * the full input range wasn't explored.  Another call can be made with
- * start set to just after this.
- *
- * @results_ptr is a pointer to an array of (struct
- * scoutfs_ioctl_meta_seq) elements that were found in the input range.
- *
- * @results_size is the count of elements in the results_ptr array and
- * the maximum number of results that can be returned.  There must be
- * room for at least one result.
- *
- * Return existing meta_seq items starting from @start until @last.
- * Partial results can be returned and is indicated by @last being set
- * to an item before @last.
- *
- * The results are sorted first by increasing meta_seq and then by
- * increasing ino.  All of the results are from one version of file
- * system metadata.  This means that an inode can not be found multiple
- * times within the results of one call.
- *
- * This call ignores currently dirty transactions and reads persistent
- * items directly.  A transaction can be written after this call and
- * cause meta_seq items to appear before or within the results from this
- * call.
- *
- * The number of meta_seq items stored in the results buffer is returned
- * and @last is updated.  0 items can be returned if none are found
- * within the input range.
- *
- * Unique errors:
- *
- *  -EINVAL: The result count was 0 or greater than INT_MAX.
- *
- *  -ESTALE: The results could not be read from one stable version of
- *    file system metadata.  Decrease the number of inodes requested.
+ * Inject a signed (total, count) delta at the totl key @name (a, b, c
+ * match the trailing dotted u64s of a totl xattr name).
 */
-struct scoutfs_ioctl_meta_seq {
-	__u64 meta_seq;
-	__u64 ino;
-};
-struct scoutfs_ioctl_raw_read_meta_seq {
-	struct scoutfs_ioctl_meta_seq start;
-	struct scoutfs_ioctl_meta_seq end;
-	struct scoutfs_ioctl_meta_seq last;
-	__u64 results_ptr;
-	__u32 results_size;
-	__u32 _pad;
-};
-#define SCOUTFS_IOC_RAW_READ_META_SEQ \
-	_IOR(SCOUTFS_IOCTL_MAGIC, 25, struct scoutfs_ioctl_raw_read_meta_seq)
-
-
-/*
- * Read inode metadata without cluster locking.
- *
- * @inos_ptr is a pointer to an aligned array of 64bit inode numbers.
- *
- * @inos_count is the number of elements in the array.  The inode
- * numbers must not be zero, must strictly increase, and must not
- * contain any duplicates.
- *
- * @names_ptr is a pointer to a byte array of xattr names to return with
- * each inode.  The names are identical to those used in
- * {get,set}xattr(2).  The names must be null terminated and no two
- * names may be equal.
- *
- * @names_count is the number of names that will be found in the
- * names_ptr buffer.
- *
- * @results_ptr is a pointer to a buffer that will be filled by the read
- * inode info results.  The result structs and payloads are not aligned.
- * Callers will almost certainly need to copy them into aligned
- * addresses before referencing their contents.
- *
- * @results_size is the number of bytes available in the results_ptr
- * buffer.
- *
- * For each inode an _INODE result will always be returned.  Then a
- * _XATTR result will be returned for each xattr on the inode that
- * matches one of the given input names.
- *
- * Each call will not return partial results. -ERANGE is returned if the
- * results for the requested inodes do not fit in the results buffer.
- *
- * The info for one call is from one consistent version of the file
- * system metadata.  The call can have to retry if it sees metadata
- * change during its call.  -ESTALE will be returned if it was not able
- * to read all the inodes info from one metadata version.  The number of
- * inodes being read can be decreased to avoid this.
- *
- * Inodes with an nlink of 0 are not returned.
- *
- * The size in bytes of filled results is returned.  A non-zero return
- * will always include at least one full
- * (struct scoutfs_ioctl_raw_read_result) header.
- *
- * Unique errors:
- *
- *  -EINVAL: The inode count can't be zero. The inos ptr must be aligned
- *    to __u64 alignment.  The results buffer size can't be larger than
- *    INT_MAX.  Inode numbers can't be zero, must be sorted, and can't
- *    have duplicates.  The xattr names must be unique, null terminated,
- *    and less than 256 bytes long.
- *
- *  -ERANGE: The results for the requested inodes do not fit in the
- *    results buffer.  Increase the buffer size (perhaps allowing for all
- *    xattrs with large values) or decrease the number of inodes per call.
- *
- *  -ESTALE: The results could not be read from one stable version of
- *    file system metadata.  Decrease the number of inodes requested.
- *
- *  -EUCLEAN: Internal xattr metadata is inconsistent.
- */
-
-struct scoutfs_ioctl_raw_read_inode_info {
-	__u64 inos_ptr;
-	__u32 inos_count;
-	__u32 names_count;
-	__u64 names_ptr;
-	__u64 results_ptr;
-	__u32 results_size;
-	__u8  _pad[4];
+struct scoutfs_ioctl_inject_totl_delta {
+	__u64	name[SCOUTFS_IOCTL_XATTR_TOTAL_NAME_NR];
+	__s64	total;
+	__s64	count;
 };

-/*
- * @type is one of the enums that determines the type of the following
- * result payload.
- *
- * @size is the number of bytes of result payload immediately following
- * the result struct.  It does not include the size of the result struct
- * header.
- */
-struct scoutfs_ioctl_raw_read_result {
-	__u32 size;
-	__u8  _pad[7];
-	__u8 type;
-};
-
-/*
- * The _INODE result contains an initial 64bit inode number followed by a
- * struct scoutfs_inode as defined in format.h.  The size includes the
- * 8byte initial inode number.  With that subtracted the size of the
- * inode struct defines its version (and so the fields it supports).
- */
-#define SCOUTFS_IOC_RAW_READ_RESULT_INODE	1
-/*
- * The result payload contains the null terminated name and the value.
- * The value size can be found by subtracting the null terminated name
- * length from the result size.
- */
-#define SCOUTFS_IOC_RAW_READ_RESULT_XATTR	2
-
-#define SCOUTFS_IOC_RAW_READ_INODE_INFO \
-	_IOR(SCOUTFS_IOCTL_MAGIC, 25, struct scoutfs_ioctl_raw_read_inode_info)
+#define SCOUTFS_IOC_INJECT_TOTL_DELTA \
+	_IOW(SCOUTFS_IOCTL_MAGIC, 25, struct scoutfs_ioctl_inject_totl_delta)

 #endif
@@ -79,9 +79,6 @@ struct item_cache_info {
 	struct super_block *sb;
 	struct item_percpu_pages __percpu *pcpu_pages;
 	KC_DEFINE_SHRINKER(shrinker);
-#ifdef KC_CPU_NOTIFIER
-	struct notifier_block notifier;
-#endif

 	/* often walked, but per-cpu refs are fast path */
 	rwlock_t rwlock;
@@ -2584,22 +2581,6 @@ static unsigned long item_cache_scan_objects(struct shrinker *shrink,
 	return freed;
 }

-#ifdef KC_CPU_NOTIFIER
-static int item_cpu_callback(struct notifier_block *nfb,
-			     unsigned long action, void *hcpu)
-{
-	struct item_cache_info *cinf = container_of(nfb,
-						    struct item_cache_info,
-						    notifier);
-	struct super_block *sb = cinf->sb;
-	unsigned long cpu = (unsigned long)hcpu;
-
-        if (action == CPU_DEAD)
-		drop_pcpu_pages(sb, cinf, cpu);
-
-	return NOTIFY_OK;
-}
-#endif

 int scoutfs_item_setup(struct super_block *sb)
 {
@@ -2630,10 +2611,6 @@ int scoutfs_item_setup(struct super_block *sb)
 	KC_INIT_SHRINKER_FUNCS(&cinf->shrinker, item_cache_count_objects,
 			       item_cache_scan_objects);
 	KC_REGISTER_SHRINKER(&cinf->shrinker, "scoutfs-item:" SCSBF, SCSB_ARGS(sb));
-#ifdef KC_CPU_NOTIFIER
-        cinf->notifier.notifier_call = item_cpu_callback;
-        register_hotcpu_notifier(&cinf->notifier);
-#endif

 	sbi->item_cache_info = cinf;
 	return 0;
@@ -2651,9 +2628,6 @@ void scoutfs_item_destroy(struct super_block *sb)
 	int cpu;

 	if (cinf) {
-#ifdef KC_CPU_NOTIFIER
-		unregister_hotcpu_notifier(&cinf->notifier);
-#endif
 		KC_UNREGISTER_SHRINKER(&cinf->shrinker);

 		for_each_possible_cpu(cpu)
@@ -3,119 +3,8 @@

 #include "kernelcompat.h"

-#ifdef KC_SHRINKER_SHRINK
-#include <linux/shrinker.h>
-/*
- * If a target doesn't have that .{count,scan}_objects() interface then
- * we have a .shrink() helper that performs the shrink work in terms of
- * count/scan.
- */
-int kc_shrink_wrapper_fn(struct shrinker *shrink, struct shrink_control *sc)
-{
-	struct kc_shrinker_wrapper *wrapper = container_of(shrink, struct kc_shrinker_wrapper, shrink);
-	unsigned long nr;
-	unsigned long rc;
-
-	if (sc->nr_to_scan != 0) {
-		rc = wrapper->scan_objects(shrink, sc);
-		/* translate magic values to the equivalent for older kernels */
-		if (rc == SHRINK_STOP)
-			return -1;
-		else if (rc == SHRINK_EMPTY)
-			return 0;
-	}
-
-	nr = wrapper->count_objects(shrink, sc);
-
-	return min_t(unsigned long, nr, INT_MAX);
-}
-#endif
-
-#ifndef KC_CURRENT_TIME_INODE
-struct timespec64 kc_current_time(struct inode *inode)
-{
-	struct timespec64 now;
-	unsigned gran;
-
-	getnstimeofday64(&now);
-
-	if (unlikely(!inode->i_sb)) {
-		WARN(1, "current_time() called with uninitialized super_block in the inode");
-		return now;
-	}
-
-	gran = inode->i_sb->s_time_gran;
-
-	/* Avoid division in the common cases 1 ns and 1 s. */
-	if (gran == 1) {
-		/* nothing */
-	} else if (gran == NSEC_PER_SEC) {
-		now.tv_nsec = 0;
-	} else if (gran > 1 && gran < NSEC_PER_SEC) {
-		now.tv_nsec -= now.tv_nsec % gran;
-	} else {
-		WARN(1, "illegal file time granularity: %u", gran);
-	}
-
-	return now;
-}
-#endif
-
-#ifndef KC_GENERIC_FILE_BUFFERED_WRITE
-ssize_t
-kc_generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov,
-			       unsigned long nr_segs, loff_t pos, loff_t *ppos,
-			       size_t count, ssize_t written)
-{
-	ssize_t status;
-	struct iov_iter i;
-
-	iov_iter_init(&i, WRITE, iov, nr_segs, count);
-	status = kc_generic_perform_write(iocb, &i, pos);
-
-	if (likely(status >= 0)) {
-		written += status;
-		*ppos = pos + status;
-	}
-
-	return written ? written : status;
-}
-#endif
-
 #include <linux/list_lru.h>

-#ifdef KC_LIST_LRU_WALK_CB_ITEM_LOCK
-static enum lru_status kc_isolate(struct list_head *item, spinlock_t *lock, void *cb_arg)
-{
-	struct kc_isolate_args *args = cb_arg;
-
-	/* isolate doesn't use list, nr_items updated in caller */
-	return args->isolate(item, NULL, args->cb_arg);
-}
-
-unsigned long kc_list_lru_walk(struct list_lru *lru, kc_list_lru_walk_cb_t isolate, void *cb_arg,
-				      unsigned long nr_to_walk)
-{
-	struct kc_isolate_args args = {
-		.isolate = isolate,
-		.cb_arg = cb_arg,
-	};
-
-	return list_lru_walk(lru, kc_isolate, &args, nr_to_walk);
-}
-
-unsigned long kc_list_lru_shrink_walk(struct list_lru *lru, struct shrink_control *sc,
-				      kc_list_lru_walk_cb_t isolate, void *cb_arg)
-{
-	struct kc_isolate_args args = {
-		.isolate = isolate,
-		.cb_arg = cb_arg,
-	};
-
-	return list_lru_shrink_walk(lru, sc, kc_isolate, &args);
-}
-#endif
-
 #ifdef KC_LIST_LRU_WALK_CB_LIST_LOCK
 static enum lru_status kc_isolate(struct list_head *item, struct list_lru_one *list,
 				  spinlock_t *lock, void *cb_arg)
@@ -4,146 +4,6 @@
 #include <linux/kernel.h>
 #include <linux/fs.h>

-/*
- * v4.15-rc3-4-gae5e165d855d
- *
- * new API for handling inode->i_version. This forces us to
- * include this API where we need. We include it here for
- * convenience instead of where it's needed.
- */
-#ifdef KC_NEED_LINUX_IVERSION_H
-#include <linux/iversion.h>
-#else
-/*
- * Kernels before above version will need to fall back to
- * manipulating inode->i_version as previous with degraded
- * methods.
- */
-#define inode_set_iversion_queried(inode, val)	\
-do {						\
-	(inode)->i_version = val;		\
-} while (0)
-#define inode_peek_iversion(inode)		\
-({						\
-	(inode)->i_version;			\
-})
-#endif
-
-#ifdef KC_POSIX_ACL_VALID_USER_NS
-#define kc_posix_acl_valid(user_ns, acl) posix_acl_valid(user_ns, acl)
-#else
-#define kc_posix_acl_valid(user_ns, acl) posix_acl_valid(acl)
-#endif
-
-/*
- * v3.6-rc1-24-gdbf2576e37da
- *
- * All workqueues are now non-reentrant, and the bit flag is removed
- * shortly after its uses were removed.
- */
-#ifndef WQ_NON_REENTRANT
-#define WQ_NON_REENTRANT 0
-#endif
-
-/*
- * v3.18-rc2-19-gb5ae6b15bd73
- *
- * Folds d_materialise_unique into d_splice_alias. Note reversal
- * of arguments (Also note Documentation/filesystems/porting.rst)
- */
-#ifndef KC_D_MATERIALISE_UNIQUE
-#define d_materialise_unique(dentry, inode) d_splice_alias(inode, dentry)
-#endif
-
-/*
- * v4.8-rc1-29-g31051c85b5e2
- *
- * fall back to inode_change_ok() if setattr_prepare() isn't available
- */
-#ifndef KC_SETATTR_PREPARE
-#define setattr_prepare(dentry, attr) inode_change_ok(d_inode(dentry), attr)
-#endif
-
-#ifndef KC___POSIX_ACL_CREATE
-#define __posix_acl_create posix_acl_create
-#define __posix_acl_chmod posix_acl_chmod
-#endif
-
-#ifndef KC_PERCPU_COUNTER_ADD_BATCH
-#define percpu_counter_add_batch __percpu_counter_add
-#endif
-
-#ifndef KC_MEMALLOC_NOFS_SAVE
-#define memalloc_nofs_save memalloc_noio_save
-#define memalloc_nofs_restore memalloc_noio_restore
-#endif
-
-#ifdef KC_BIO_BI_OPF
-#define kc_bio_get_opf(bio)		\
-({					\
-	(bio)->bi_opf;			\
-})
-#define kc_bio_set_opf(bio, opf)	\
-do {					\
-	(bio)->bi_opf = opf;		\
-} while (0)
-#define kc_bio_set_sector(bio, sect)	\
-do {					\
-	(bio)->bi_iter.bi_sector = sect;\
-} while (0)
-#define kc_submit_bio(bio) submit_bio(bio)
-#else
-#define kc_bio_get_opf(bio)		\
-({					\
-	(bio)->bi_rw;			\
-})
-#define kc_bio_set_opf(bio, opf)	\
-do {					\
-	(bio)->bi_rw = opf;		\
-} while (0)
-#define kc_bio_set_sector(bio, sect)	\
-do {					\
-	(bio)->bi_sector = sect;	\
-} while (0)
-#define kc_submit_bio(bio)		\
-do {					\
-	submit_bio((bio)->bi_rw, bio);	\
-} while (0)
-#define bio_set_dev(bio, bdev)		\
-do {					\
-	(bio)->bi_bdev = (bdev);	\
-} while (0)
-#endif
-
-#ifdef KC_BIO_BI_STATUS
-#define KC_DECLARE_BIO_END_IO(name, bio)	name(bio)
-#define kc_bio_get_errno(bio)			({ blk_status_to_errno((bio)->bi_status); })
-#else
-#define KC_DECLARE_BIO_END_IO(name, bio)	name(bio, int _error_arg)
-#define kc_bio_get_errno(bio)			({ (int)((void)(bio), _error_arg); })
-#endif
-
-/*
- * v4.13-rc1-6-ge462ec50cb5f
- *
- * MS_* (mount) flags from <linux/mount.h> should not be used in the kernel
- * anymore from 4.x onwards. Instead, we need to use the SB_* (superblock) flags
- */
-#ifndef SB_POSIXACL
-#define SB_POSIXACL MS_POSIXACL
-#define SB_I_VERSION MS_I_VERSION
-#endif
-
-#ifndef KC_CURRENT_TIME_INODE
-struct timespec64 kc_current_time(struct inode *inode);
-#define current_time kc_current_time
-#define kc_timespec timespec
-#else
-#define kc_timespec timespec64
-#endif
-
-#ifndef KC_SHRINKER_SHRINK
-
 #define KC_DEFINE_SHRINKER(name) struct shrinker name
 #define KC_INIT_SHRINKER_FUNCS(name, countfn, scanfn) do {	\
 	__typeof__(name) _shrink = (name);			\
@@ -160,77 +20,7 @@ struct timespec64 kc_current_time(struct inode *inode);
 #endif /* KC_SHRINKER_NAME */
 #define KC_UNREGISTER_SHRINKER(ptr) (unregister_shrinker(ptr))
 #define KC_SHRINKER_FN(ptr) (ptr)
-#else

-#include <linux/shrinker.h>
-#ifndef SHRINK_STOP
-#define SHRINK_STOP (~0UL)
-#define SHRINK_EMPTY (~0UL - 1)
-#endif
-
-int kc_shrink_wrapper_fn(struct shrinker *shrink, struct shrink_control *sc);
-struct kc_shrinker_wrapper {
-	unsigned long (*count_objects)(struct shrinker *, struct shrink_control *sc);
-	unsigned long (*scan_objects)(struct shrinker *, struct shrink_control *sc);
-	struct shrinker shrink;
-};
-
-#define KC_DEFINE_SHRINKER(name) struct kc_shrinker_wrapper name;
-#define KC_INIT_SHRINKER_FUNCS(name, countfn, scanfn) do {	\
-	struct kc_shrinker_wrapper *_wrap = (name);		\
-	_wrap->count_objects = (countfn);			\
-	_wrap->scan_objects = (scanfn);				\
-	_wrap->shrink.shrink = kc_shrink_wrapper_fn;		\
-	_wrap->shrink.seeks = DEFAULT_SEEKS;			\
-} while (0)
-#define KC_SHRINKER_CONTAINER_OF(ptr, type) container_of(container_of(ptr, struct kc_shrinker_wrapper, shrink), type, shrinker)
-#define KC_REGISTER_SHRINKER(ptr, fmt, ...) (register_shrinker(ptr.shrink))
-#define KC_UNREGISTER_SHRINKER(ptr) (unregister_shrinker(ptr.shrink))
-#define KC_SHRINKER_FN(ptr) (ptr.shrink)
-
-#endif /* KC_SHRINKER_SHRINK */
-
-#ifdef KC_KERNEL_GETSOCKNAME_ADDRLEN
-#include <linux/net.h>
-#include <linux/inet.h>
-static inline int kc_kernel_getsockname(struct socket *sock, struct sockaddr *addr)
-{
-	int addrlen = sizeof(struct sockaddr_in);
-	int ret = kernel_getsockname(sock, addr, &addrlen);
-	if (ret == 0 && addrlen != sizeof(struct sockaddr_in))
-		return -EAFNOSUPPORT;
-	else if (ret < 0)
-		return ret;
-
-	return sizeof(struct sockaddr_in);
-}
-static inline int kc_kernel_getpeername(struct socket *sock, struct sockaddr *addr)
-{
-	int addrlen = sizeof(struct sockaddr_in);
-	int ret = kernel_getpeername(sock, addr, &addrlen);
-	if (ret == 0 && addrlen != sizeof(struct sockaddr_in))
-		return -EAFNOSUPPORT;
-	else if (ret < 0)
-		return ret;
-
-	return sizeof(struct sockaddr_in);
-}
-#else
-#define kc_kernel_getsockname(sock, addr) kernel_getsockname(sock, addr)
-#define kc_kernel_getpeername(sock, addr) kernel_getpeername(sock, addr)
-#endif
-
-#ifdef KC_SOCK_CREATE_KERN_NET
-#define kc_sock_create_kern(family, type, proto, res) sock_create_kern(&init_net, family, type, proto, res)
-#else
-#define kc_sock_create_kern sock_create_kern
-#endif
-
-#ifndef KC_GENERIC_FILE_BUFFERED_WRITE
-ssize_t kc_generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov,
-               unsigned long nr_segs, loff_t pos, loff_t *ppos,
-               size_t count, ssize_t written);
-#define generic_file_buffered_write kc_generic_file_buffered_write
 #ifdef KC_GENERIC_PERFORM_WRITE_KIOCB_IOV_ITER
 static inline int kc_generic_perform_write(struct kiocb *iocb, struct iov_iter *iter, loff_t pos)
 {
@@ -244,7 +34,6 @@ static inline int kc_generic_perform_write(struct kiocb *iocb, struct iov_iter *
 	return generic_perform_write(file, iter, pos);
 }
 #endif
-#endif // KC_GENERIC_FILE_BUFFERED_WRITE

 #ifndef KC_HAVE_BLK_OPF_T
 /* typedef __u32 __bitwise blk_opf_t; */
@@ -288,7 +77,7 @@ static inline struct bio *kc_bio_alloc(struct block_device *bdev, unsigned short
 {
 	struct bio *b = bio_alloc(gfp_mask, nr_vecs);
 	if (b) {
-		kc_bio_set_opf(b, opf);
+		b->bi_opf = opf;
 		bio_set_dev(b, bdev);
 	}
 	return b;
@@ -299,11 +88,6 @@ static inline struct bio *kc_bio_alloc(struct block_device *bdev, unsigned short
 #define fiemap_prep(inode, fieinfo, start, len, flags) fiemap_check_flags(fieinfo, flags)
 #endif

-#ifndef KC_KERNEL_OLD_TIMEVAL_STRUCT
-#define __kernel_old_timeval timeval
-#define ns_to_kernel_old_timeval(ktime) ns_to_timeval(ktime.tv64)
-#endif
-
 #ifdef KC_SOCK_SET_SNDTIMEO
 #include <net/sock.h>
 static inline int kc_sock_set_sndtimeo(struct socket *sock, s64 secs)
@@ -400,45 +184,14 @@ static inline int kc_tcp_sock_set_nodelay(struct socket *sock)
 }
 #endif

-#ifdef KC_INODE_DIO_END
-#define kc_inode_dio_end inode_dio_end
-#else
-#define kc_inode_dio_end inode_dio_done
-#endif
-
-#ifndef KC_MM_VM_FAULT_T
-typedef unsigned int vm_fault_t;
-static inline vm_fault_t vmf_error(int err)
-{
-	if (err == -ENOMEM)
-		return VM_FAULT_OOM;
-	return VM_FAULT_SIGBUS;
-}
-#endif
-
 #include <linux/list_lru.h>

-#ifndef KC_LIST_LRU_SHRINK_COUNT_WALK
-/* we don't bother with sc->{nid,memcg} (which doesn't exist in oldest kernels) */
-static inline unsigned long list_lru_shrink_count(struct list_lru *lru,
-                                                  struct shrink_control *sc)
-{
-        return list_lru_count(lru);
-}
-static inline unsigned long
-list_lru_shrink_walk(struct list_lru *lru, struct shrink_control *sc,
-		     list_lru_walk_cb isolate, void *cb_arg)
-{
-	return list_lru_walk(lru, isolate, cb_arg, sc->nr_to_scan);
-}
-#endif
-
 #ifndef KC_LIST_LRU_ADD_OBJ
 #define list_lru_add_obj list_lru_add
 #define list_lru_del_obj list_lru_del
 #endif

-#if defined(KC_LIST_LRU_WALK_CB_LIST_LOCK) || defined(KC_LIST_LRU_WALK_CB_ITEM_LOCK)
+#if defined(KC_LIST_LRU_WALK_CB_LIST_LOCK)
 struct list_lru_one;
 typedef enum lru_status (*kc_list_lru_walk_cb_t)(struct list_head *item, struct list_lru_one *list,
 						 void *cb_arg);
@@ -454,39 +207,9 @@ unsigned long kc_list_lru_shrink_walk(struct list_lru *lru, struct shrink_contro
 #define kc_list_lru_shrink_walk list_lru_shrink_walk
 #endif

-#if defined(KC_LIST_LRU_WALK_CB_ITEM_LOCK)
-/* isolate moved by hand, nr_items updated in walk as _REMOVE returned */
-static inline void list_lru_isolate_move(struct list_lru_one *list, struct list_head *item,
-					 struct list_head *head)
-{
-        list_move(item, head);
-}
-#endif
-
-#ifndef KC_STACK_TRACE_SAVE
-#include <linux/stacktrace.h>
-static inline unsigned int stack_trace_save(unsigned long *store, unsigned int size,
-					    unsigned int skipnr)
-{
-        struct stack_trace trace = {
-                .entries        = store,
-                .max_entries    = size,
-                .skip           = skipnr,
-        };
-
-        save_stack_trace(&trace);
-        return trace.nr_entries;
-}
-
-static inline void stack_trace_print(unsigned long *entries, unsigned int nr_entries, int spaces)
-{
-        struct stack_trace trace = {
-                .entries        = entries,
-                .nr_entries     = nr_entries,
-        };
-
-	print_stack_trace(&trace, spaces);
-}
+#ifndef KC_TIMER_CONTAINER_OF
+#define timer_container_of(var, callback_timer, timer_fieldname) \
+	from_timer(var, callback_timer, timer_fieldname)
 #endif

 #endif
@@ -813,6 +813,7 @@ int scoutfs_lock_invalidate_request(struct super_block *sb, u64 net_id,

 out:
 	if (!lock) {
+		kfree(ireq);
 		ret = scoutfs_client_lock_response(sb, net_id, nl);
 		BUG_ON(ret); /* lock server doesn't fence timed out client requests */
 	}
@@ -1093,24 +1094,19 @@ out_unlock:
 	return ret;
 }

-void scoutfs_lock_get_fs_item_range(u64 ino, struct scoutfs_key *start, struct scoutfs_key *end)
-{
-	scoutfs_key_set_zeros(start);
-	start->sk_zone = SCOUTFS_FS_ZONE;
-	start->ski_ino = cpu_to_le64(ino & ~(u64)SCOUTFS_LOCK_INODE_GROUP_MASK);
-
-	scoutfs_key_set_ones(end);
-	end->sk_zone = SCOUTFS_FS_ZONE;
-	end->ski_ino = cpu_to_le64(ino | SCOUTFS_LOCK_INODE_GROUP_MASK);
-}
-
 int scoutfs_lock_ino(struct super_block *sb, enum scoutfs_lock_mode mode, int flags, u64 ino,
 		     struct scoutfs_lock **ret_lock)
 {
 	struct scoutfs_key start;
 	struct scoutfs_key end;

-	scoutfs_lock_get_fs_item_range(ino, &start, &end);
+	scoutfs_key_set_zeros(&start);
+	start.sk_zone = SCOUTFS_FS_ZONE;
+	start.ski_ino = cpu_to_le64(ino & ~(u64)SCOUTFS_LOCK_INODE_GROUP_MASK);
+
+	scoutfs_key_set_ones(&end);
+	end.sk_zone = SCOUTFS_FS_ZONE;
+	end.ski_ino = cpu_to_le64(ino | SCOUTFS_LOCK_INODE_GROUP_MASK);

 	return lock_key_range(sb, mode, flags, &start, &end, ret_lock);
 }
@@ -1697,8 +1693,7 @@ int scoutfs_lock_setup(struct super_block *sb)
 	}

 	linfo->workq = alloc_workqueue("scoutfs_lock_client_work",
-				       WQ_NON_REENTRANT | WQ_UNBOUND |
-				       WQ_HIGHPRI, 0);
+				       WQ_UNBOUND | WQ_HIGHPRI, 0);
 	if (!linfo->workq) {
 		ret = -ENOMEM;
 		goto out;
@@ -65,7 +65,6 @@ int scoutfs_lock_invalidate_request(struct super_block *sb, u64 net_id,
 int scoutfs_lock_recover_request(struct super_block *sb, u64 net_id,
 				 struct scoutfs_key *key);

-void scoutfs_lock_get_fs_item_range(u64 ino, struct scoutfs_key *start, struct scoutfs_key *end);
 int scoutfs_lock_inode(struct super_block *sb, enum scoutfs_lock_mode mode, int flags,
 		       struct inode *inode, struct scoutfs_lock **ret_lock);
 int scoutfs_lock_ino(struct super_block *sb, enum scoutfs_lock_mode mode, int flags, u64 ino,
@@ -525,7 +525,7 @@ static int process_response(struct scoutfs_net_connection *conn,
 	struct super_block *sb = conn->sb;
 	struct message_send *msend;
 	scoutfs_net_response_t resp_func = NULL;
-	void *resp_data;
+	void *resp_data = NULL;

 	spin_lock(&conn->lock);

@@ -804,7 +804,7 @@ static void scoutfs_net_recv_worker(struct work_struct *work)
 			if (invalid_message(conn, nh)) {
 				scoutfs_inc_counter(sb, net_recv_invalid_message);
 				ret = -EBADMSG;
-				break;
+				goto out;
 			}

 			data_len = le16_to_cpu(nh->data_len);
@@ -1113,11 +1113,11 @@ static int sock_opts_and_names(struct super_block *sb,
 	if (ret)
 		goto out;

-	ret = kc_kernel_getsockname(sock, (struct sockaddr *)&conn->sockname);
+	ret = kernel_getsockname(sock, (struct sockaddr *)&conn->sockname);
 	if (ret < 0)
 		goto out;

-	ret = kc_kernel_getpeername(sock, (struct sockaddr *)&conn->peername);
+	ret = kernel_getpeername(sock, (struct sockaddr *)&conn->peername);
 	if (ret < 0)
 		goto out;

@@ -1218,7 +1218,7 @@ static void scoutfs_net_connect_worker(struct work_struct *work)

 	trace_scoutfs_net_connect_work_enter(sb, 0, 0);

-	ret = kc_sock_create_kern(AF_INET, SOCK_STREAM, IPPROTO_TCP, &sock);
+	ret = sock_create_kern(&init_net, AF_INET, SOCK_STREAM, IPPROTO_TCP, &sock);
 	if (ret)
 		goto out;

@@ -1517,8 +1517,7 @@ scoutfs_net_alloc_conn(struct super_block *sb,
 		conn->ordered_proc_wlists = kmalloc_array(nr, sizeof(struct scoutfs_work_list),
 							  GFP_NOFS);
 		conn->workq = alloc_workqueue("scoutfs_net_%s",
-					      WQ_UNBOUND | WQ_NON_REENTRANT, 0,
-					      name_suffix);
+					      WQ_UNBOUND, 0, name_suffix);
 	}
 	if (!conn || (info_size && !conn->info) || !conn->workq || !conn->ordered_proc_wlists) {
 		if (conn) {
@@ -1630,7 +1629,7 @@ int scoutfs_net_bind(struct super_block *sb,
 	if (WARN_ON_ONCE(conn->sock))
 		return -EINVAL;

-	ret = kc_sock_create_kern(AF_INET, SOCK_STREAM, IPPROTO_TCP, &sock);
+	ret = sock_create_kern(&init_net, AF_INET, SOCK_STREAM, IPPROTO_TCP, &sock);
 	if (ret)
 		goto out;

@@ -1651,7 +1650,7 @@ int scoutfs_net_bind(struct super_block *sb,
 	if (ret < 0)
 		goto out;

-	ret = kc_kernel_getsockname(sock, (struct sockaddr *)&conn->sockname);
+	ret = kernel_getsockname(sock, (struct sockaddr *)&conn->sockname);
 	if (ret < 0)
 		goto out;

@@ -2099,11 +2098,9 @@ int scoutfs_net_setup(struct super_block *sb)
 	scoutfs_tseq_tree_init(&ninf->msg_tseq_tree, net_tseq_show_msg);

 	ninf->shutdown_workq = alloc_workqueue("scoutfs_net_shutdown",
-					       WQ_UNBOUND | WQ_NON_REENTRANT,
-					       0);
+					       WQ_UNBOUND, 0);
 	ninf->destroy_workq = alloc_workqueue("scoutfs_net_destroy",
-					       WQ_UNBOUND | WQ_NON_REENTRANT,
-					       0);
+					       WQ_UNBOUND, 0);
 	if (!ninf->shutdown_workq || !ninf->destroy_workq) {
 		ret = -ENOMEM;
 		goto out;
@@ -183,7 +183,7 @@ static int create_socket(struct super_block *sb)
 	int addrlen;
 	int ret;

-	ret = kc_sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock);
+	ret = sock_create_kern(&init_net, PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock);
 	if (ret) {
 		scoutfs_err(sb, "quorum couldn't create udp socket: %d", ret);
 		goto out;
@@ -1332,8 +1332,7 @@ int scoutfs_quorum_setup(struct super_block *sb)

 	/* a high priority single threaded context without mem reclaim */
 	qinf->workq = alloc_workqueue("scoutfs_quorum_work",
-				       WQ_NON_REENTRANT | WQ_UNBOUND |
-				       WQ_HIGHPRI, 1);
+				       WQ_UNBOUND | WQ_HIGHPRI, 1);
 	if (!qinf->workq) {
 		ret = -ENOMEM;
 		goto out;
@@ -1114,6 +1114,7 @@ int scoutfs_quota_mod_rule(struct super_block *sb, bool is_add,
 			goto release;
 	}

+	wait_event(qtinf->waitq, !ruleset_is_busy(qtinf));
 	scoutfs_quota_invalidate(sb);
 	ret = 0;

@@ -1142,12 +1143,17 @@ void scoutfs_quota_get_lock_range(struct scoutfs_key *start, struct scoutfs_key
 }

 /*
- * This is called during cluster lock invalidation to indicate that the
- * ruleset is no longer protected by cluster locking and might have been
- * modified.  We mark the ruleset invalid and free it once all readers
- * drain.  The next check will acquire the cluster lock and read the
- * rules.  Because this is called during invalidation this is serialized
- * with write holders of cluster locks so we can never see -EBUSY here.
+ * Mark the cached ruleset invalid and free the previous one once readers
+ * drain.  Called from cluster lock invalidation and from quota rule
+ * modification.
+ *
+ * Cluster lock invalidation runs only after the lock layer has drained
+ * local READ users.  Since EBUSY is set only while a reader holds READ,
+ * the reader has already published by the time we run.
+ *
+ * Quota rule modification waits on the waitq for any in-flight reader
+ * to publish before calling here, so the next check rebuilds against
+ * the newly written rules rather than the reader's stale result.
 */
 void scoutfs_quota_invalidate(struct super_block *sb)
 {
@@ -1161,13 +1167,10 @@ void scoutfs_quota_invalidate(struct super_block *sb)

 	spin_lock(&qtinf->lock);
 	rs = rcu_dereference_protected(qtinf->ruleset, lockdep_is_held(&qtinf->lock));
-	if (rs != ERR_PTR(-EINVAL))
+	if (rs == ERR_PTR(-ENOENT) || !IS_ERR(rs))
 		rcu_assign_pointer(qtinf->ruleset, ERR_PTR(-EINVAL));
 	spin_unlock(&qtinf->lock);

-	/* cluster locking should have prevented this */
-	BUG_ON(rs == ERR_PTR(-EBUSY));
-
 	if (!IS_ERR(rs))
 		call_rcu(&rs->rcu, free_ruleset_rcu);

@@ -1,744 +0,0 @@
-/*
- * Copyright (C) 2026 Versity Software, Inc.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License v2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- */
-#include <linux/kernel.h>
-#include <linux/fs.h>
-#include <linux/list_sort.h>
-#include <linux/sort.h>
-
-#include "format.h"
-#include "key.h"
-#include "block.h"
-#include "inode.h"
-#include "forest.h"
-#include "client.h"
-#include "ioctl.h"
-#include "lock.h"
-#include "xattr.h"
-#include "attr_x.h"
-#include "bsearch_index.h"
-#include "raw.h"
-
-struct fs_item {
-	struct list_head head;
-	struct scoutfs_key key;
-	u64 seq;
-	int val_len;
-	bool deletion;
-	/* val is aligned so we can deref structs in vals */
-	u8 val[0] __aligned(ARCH_KMALLOC_MINALIGN);
-};
-
-static int save_fs_item(struct list_head *list, struct scoutfs_key *key, u64 seq, u8 flags,
-			void *val, int val_len)
-{
-	struct fs_item *fsi;
-
-	/* max btree val len is hundreds of bytes */
-	fsi = kmalloc(offsetof(struct fs_item, val[val_len]), GFP_NOFS);
-	if (!fsi)
-		return -ENOMEM;
-
-	fsi->key = *key;
-	fsi->seq = seq;
-	fsi->val_len = val_len;
-	fsi->deletion = !!(flags & SCOUTFS_ITEM_FLAG_DELETION);
-	if (val_len > 0)
-		memcpy(fsi->val, val, val_len);
-	list_add_tail(&fsi->head, list);
-
-	return 0;
-}
-
-static void free_fs_item(struct fs_item *fsi)
-{
-	if (!list_empty(&fsi->head))
-		list_del_init(&fsi->head);
-	kfree(fsi);
-}
-
-static void free_fs_items(struct list_head *list)
-{
-	struct fs_item *fsi;
-	struct fs_item *tmp;
-
-	list_for_each_entry_safe(fsi, tmp, list, head)
-		free_fs_item(fsi);
-}
-
-static struct fs_item *next_fs_item(struct list_head *list, struct fs_item *fsi)
-{
-	list_for_each_entry_continue(fsi, list, head)
-		return fsi;
-	return NULL;
-}
-
-static int cmp_fs_items(void *priv, KC_LIST_CMP_CONST struct list_head *A,
-			KC_LIST_CMP_CONST struct list_head *B)
-{
-	KC_LIST_CMP_CONST struct fs_item *a =
-		container_of(A, KC_LIST_CMP_CONST struct fs_item, head);
-	KC_LIST_CMP_CONST struct fs_item *b =
-		container_of(B, KC_LIST_CMP_CONST struct fs_item, head);
-
-	return scoutfs_key_compare(&a->key, &b->key) ?: -scoutfs_cmp(a->seq, b->seq);
-}
-
-static void sort_and_remove(struct list_head *list, struct scoutfs_key *end)
-{
-	struct fs_item *prev;
-	struct fs_item *fsi;
-	struct fs_item *tmp;
-
-	list_sort(NULL, list, cmp_fs_items);
-
-	/* start by removing any items read before end was decreased by later blocks */
-	list_for_each_entry_safe_reverse(fsi, tmp, list, head) {
-		if (scoutfs_key_compare(&fsi->key, end) > 0)
-			free_fs_item(fsi);
-		else
-			break;
-	}
-
-	prev = NULL;
-	list_for_each_entry_safe(fsi, tmp, list, head) {
-		/* remove this item if it's an older version of previous item */
-		if (prev && scoutfs_key_compare(&prev->key, &fsi->key) == 0) {
-			free_fs_item(fsi);
-			continue;
-		}
-
-		/* remove previous deletion item once it has removed all older versions */
-		if (prev && prev->deletion)
-			free_fs_item(prev);
-
-		/* next item might match this, record to compare */
-		prev = fsi;
-	}
-
-	/* remove the last item if it's a deletion */
-	list_for_each_entry_reverse(fsi, list, head) {
-		if (fsi->deletion)
-			free_fs_item(fsi);
-		break;
-	}
-}
-
-static int save_all_items(struct super_block *sb, struct scoutfs_key *key, u64 seq, u8 flags,
-			  void *val, int val_len, int fic, void *arg)
-{
-	struct list_head *list = arg;
-
-	return save_fs_item(list, key, seq, flags, val, val_len);
-}
-
-/* -------------- */
-
-static void ms_from_key(struct scoutfs_ioctl_meta_seq *ms, struct scoutfs_key *key)
-{
-	ms->meta_seq = le64_to_cpu(key->skii_major);
-	ms->ino = le64_to_cpu(key->skii_ino);
-}
-
-/*
- * Increment the key's ino->meta_seq so that we don't land between items.
- */
-static void inc_meta_seq(struct scoutfs_key *key)
-{
-	le64_add_cpu(&key->skii_ino, 1);
-	if (key->skii_ino == 0)
-		le64_add_cpu(&key->skii_major, 1);
-}
-
-int scoutfs_raw_read_meta_seq(struct super_block *sb,
-			      struct scoutfs_ioctl_raw_read_meta_seq *rms,
-			      struct scoutfs_ioctl_meta_seq *last_ret)
-{
-	struct scoutfs_ioctl_meta_seq __user *ums;
-	struct scoutfs_ioctl_meta_seq ms;
-	struct scoutfs_net_roots roots;
-	DECLARE_SAVED_REFS(saved);
-	struct scoutfs_key start;
-	struct scoutfs_key last;
-	struct scoutfs_key key;
-	struct scoutfs_key end;
-	struct fs_item *fsi;
-	struct fs_item *tmp;
-	LIST_HEAD(list);
-	int retries;
-	int copied;
-	int count;
-	int ret;
-
-	ums = (void __user *)rms->results_ptr;
-	count = rms->results_size / sizeof(struct scoutfs_ioctl_meta_seq);
-	retries = 10;
-	copied = 0;
-
-	scoutfs_inode_init_index_key(&last, SCOUTFS_INODE_INDEX_META_SEQ_TYPE,
-				     rms->end.meta_seq, 0, rms->end.ino);
-
-retry:
-	ret = scoutfs_client_get_roots(sb, &roots);
-	if (ret)
-		goto out;
-
-	scoutfs_inode_init_index_key(&key, SCOUTFS_INODE_INDEX_META_SEQ_TYPE,
-				     rms->start.meta_seq, 0, rms->start.ino);
-
-	for (;;) {
-		start = key;
-		end = last;
-		ret = scoutfs_forest_read_items_roots(sb, &roots, 0, &key, NULL, &start, &end,
-						      save_all_items, &list);
-		if (ret < 0)
-			goto out;
-
-		sort_and_remove(&list, &end);
-
-		list_for_each_entry_safe(fsi, tmp, &list, head) {
-
-			if (copied == count) {
-				/* results are full, set end to before item can't return */
-				end = fsi->key;
-				le64_add_cpu(&end.skii_ino, -1ULL);
-				ret = 0;
-				goto out;
-			}
-
-			ms_from_key(&ms, &fsi->key);
-			if (copy_to_user(&ums[copied], &ms, sizeof(ms))) {
-				ret = -EFAULT;
-				goto out;
-			}
-
-			free_fs_item(fsi);
-			copied++;
-		}
-
-		if (scoutfs_key_compare(&end, &last) >= 0) {
-			end = last;
-			break;
-		}
-
-		key = end;
-		inc_meta_seq(&key);
-	}
-
-	ret = 0;
-out:
-	free_fs_items(&list);
-
-	ret = scoutfs_block_check_stale(sb, ret, &saved, &roots.fs_root.ref, &roots.logs_root.ref);
-	if (ret == -ESTALE && copied == 0 && retries-- > 0)
-		goto retry;
-
-	ms_from_key(last_ret, &end);
-
-	return ret ?: copied;
-}
-
-/* -------------- */
-
-struct inode_info_context {
-	size_t nr_inos;
-	u64 *inos;
-
-	size_t nr_names;
-	struct xattr_name {
-		u64 hash;
-		char *name;
-		u8 name_len; /* no null */
-	} *names;
-
-	struct list_head fs_items;
-};
-
-static int cmp_u64(const void *A, const void *B)
-{
-	const u64 *a = A;
-	const u64 *b = B;
-
-	return scoutfs_cmp(*a, *b);
-}
-
-static int cmp_name_hash(const void *A, const void *B)
-{
-	const struct xattr_name *a = A;
-	const struct xattr_name *b = B;
-
-	return scoutfs_cmp(a->hash, b->hash);
-}
-
-static int cmp_name_string(const void *A, const void *B)
-{
-	const struct xattr_name *a = A;
-	const struct xattr_name *b = B;
-
-	return scoutfs_cmp(a->name_len, b->name_len) ?: memcmp(a->name, b->name, a->name_len);
-}
-
-static int setup_context(struct inode_info_context *ctx,
-			 struct scoutfs_ioctl_raw_read_inode_info *rii)
-{
-	__u64 __user *uinos = (void __user *)rii->inos_ptr;
-	char __user *uname;
-	long len_null;
-	long len;
-	int ret;
-	u32 i;
-
-	ctx->nr_inos = rii->inos_count;
-	ctx->nr_names = rii->names_count;
-	INIT_LIST_HEAD(&ctx->fs_items);
-
-	ctx->inos = kvmalloc_array(ctx->nr_inos, sizeof(ctx->inos[0]), GFP_KERNEL);
-	ctx->names = kvcalloc(ctx->nr_names, sizeof(ctx->names[0]), GFP_KERNEL);
-	if (!ctx->inos || !ctx->names) {
-		ret = -ENOMEM;
-		goto out;
-	}
-
-	if (copy_from_user(ctx->inos, uinos, ctx->nr_inos * sizeof(ctx->inos[0]))) {
-		ret = -EFAULT;
-		goto out;
-	}
-
-	/* inos must not be 0 and must increase and contain no duplicates */
-	if (ctx->inos[0] == 0) {
-		ret = -EINVAL;
-		goto out;
-	}
-	for (i = 1; i < ctx->nr_inos; i++) {
-		if (ctx->inos[i] <= ctx->inos[i - 1]) {
-			ret = -EINVAL;
-			goto out;
-		}
-	}
-
-	uname = (void __user *)rii->names_ptr;
-	for (i = 0; i < ctx->nr_names; i++) {
-		len_null = SCOUTFS_XATTR_MAX_NAME_LEN + 1;
-		ret = strnlen_user(uname, len_null);
-		if (ret <= 1 || ret > len_null) {
-			if (ret >= 0)
-				ret = -EINVAL;
-			goto out;
-		}
-		len_null = ret;
-		len = len_null - 1;
-
-		ctx->names[i].name_len = len;
-		ctx->names[i].name = kmalloc(len_null, GFP_KERNEL);
-		if (!ctx->names[i].name) {
-			ret = -ENOMEM;
-			goto out;
-		}
-
-		ret = strncpy_from_user(ctx->names[i].name, uname, len_null);
-		if (ret != len) {
-			if (ret >= 0)
-				ret = -EINVAL;
-			goto out;
-		}
-
-		ctx->names[i].hash = scoutfs_xattr_name_hash(ctx->names[i].name, len);
-		uname += len_null;
-	}
-
-	/* make sure all the names differ */
-	sort(ctx->names, ctx->nr_names, sizeof(ctx->names[0]), cmp_name_string, NULL);
-	for (i = 1; i < ctx->nr_names; i++) {
-		if (cmp_name_string(&ctx->names[i - 1], &ctx->names[i]) == 0) {
-			ret = -EINVAL;
-			goto out;
-		}
-	}
-
-	/* then leave them sorted by hash */
-	sort(ctx->names, ctx->nr_names, sizeof(ctx->names[0]), cmp_name_hash, NULL);
-
-	ret = 0;
-out:
-	return ret;
-}
-
-static void free_context(struct inode_info_context *ctx)
-{
-	int i;
-
-	kvfree(ctx->inos);
-
-	if (ctx->names) {
-		for (i = 0; i < ctx->nr_names; i++) {
-			if (!ctx->names[i].name)
-				break;
-			kfree(ctx->names[i].name);
-		}
-		kvfree(ctx->names);
-	}
-}
-
-/*
- * Iterate over fs items and save any that we're interested in.  We want
- * inode struct items and any xattr items whose hashes collide with the
- * xattr names we're searching for.
- *
- * Our forest calls can be advancing through the key space as we see
- * slices that intersect with blocks in trees.  And each forest caller
- * can be resetting the key position to the start of each forest block
- * it reads in an intersection.
- *
- * From this callback's perspective, the key can be jumping all over the
- * place.  We don't have any iterative position state.  For each key we
- * decide if we want to save it and then set the key to the next key we
- * want after the current key.  We'll combine all the saved keys later.
- */
-static int save_info_items(struct super_block *sb, struct scoutfs_key *key, u64 seq,
-			   u8 flags, void *val, int val_len, int fic, void *arg)
-{
-	u64 ino = le64_to_cpu(key->_sk_first);
-	struct inode_info_context *ctx = arg;
-	struct xattr_name name;
-	size_t name_ind;
-	size_t ino_ind;
-	bool hash_match;
-	bool ino_match;
-	int ret;
-
-	ino_ind = bsearch_index(&ino, ctx->inos, ctx->nr_inos, sizeof(ctx->inos[0]), cmp_u64);
-	ino_match = ino_ind < ctx->nr_inos && ctx->inos[ino_ind] == ino;
-
-	/* jump to to next ino, could be for this key if we're before the ino struct */
-	if (!ino_match || key->sk_type < SCOUTFS_INODE_TYPE)
-		goto next_inode;
-
-	/* find our search position in xattrs */
-	if (key->sk_type < SCOUTFS_XATTR_TYPE) {
-		name_ind = 0;
-		hash_match = false;
-
-	} else if (key->sk_type == SCOUTFS_XATTR_TYPE) {
-		name = (struct xattr_name) { .hash = le64_to_cpu(key->skx_name_hash) };
-		name_ind = bsearch_index(&name, ctx->names, ctx->nr_names, sizeof(ctx->names[0]),
-					 cmp_name_hash);
-		hash_match = name_ind < ctx->nr_names && ctx->names[name_ind].hash == name.hash;
-	} else {
-		name_ind = ctx->nr_names;
-		hash_match = false;
-	}
-
-	/* save inode items for our search and all xattr items that match search hashes */
-	if (key->sk_type == SCOUTFS_INODE_TYPE || hash_match) {
-		ret = save_fs_item(&ctx->fs_items, key,  seq, flags, val, val_len);
-		if (ret < 0)
-			goto out;
-	}
-
-	/* let the caller continue iterating through matching xattr items */
-	if (hash_match) {
-		ret = 0;
-		goto out;
-	}
-
-	/* jump to the next xattr */
-	if (name_ind < ctx->nr_names) {
-		scoutfs_xattr_init_key(key, ino, ctx->names[name_ind].hash, 0);
-		ret = -ESRCH;
-		goto out;
-	}
-
-	/* no more xattrs, must be done with this ino */
-	ino_ind++;
-
-next_inode:
-	/* now jump to next inode struct key, or we're done */
-	if (ino_ind < ctx->nr_inos)
-		scoutfs_inode_init_key(key, ctx->inos[ino_ind]);
-	else
-		scoutfs_key_set_ones(key);
-
-	ret = -ESRCH;
-out:
-	return ret;
-}
-
-static int copy_to_user_off(void __user *dst, size_t *dst_off, size_t dst_size,
-			    void *src, size_t copy_size)
-{
-	if (copy_size == 0)
-		return 0;
-	if (*dst_off + copy_size > dst_size)
-		return -ERANGE;
-	if (copy_to_user(dst + *dst_off, src, copy_size))
-		return -EFAULT;
-
-	*dst_off += copy_size;
-	return 0;
-}
-
-static int copy_result_to_user(void __user *ures, size_t *off, size_t size, u8 type,
-			       void *a_data, size_t a_len, void *b_data, size_t b_len,
-			       size_t extra_size)
-{
-	struct scoutfs_ioctl_raw_read_result res;
-	const size_t szof_res = sizeof(struct scoutfs_ioctl_raw_read_result);
-
-	memzero_explicit(&res, szof_res);
-	res = (struct scoutfs_ioctl_raw_read_result) {
-		.size = a_len + b_len + extra_size,
-		.type = type,
-	};
-
-	return copy_to_user_off(ures, off, size, &res, szof_res) ?:
-	       (a_len ? copy_to_user_off(ures, off, size, a_data, a_len) : 0) ?:
-	       (b_len ? copy_to_user_off(ures, off, size, b_data, b_len) : 0);
-}
-
-static int copy_item_results_to_user(struct super_block *sb, struct inode_info_context *ctx,
-				     void __user *ures, size_t *off, size_t size,
-				     struct fs_item *fsi)
-{
-	struct scoutfs_inode *cinode;
-	struct scoutfs_xattr *xat;
-	static char null = '\0';
-	size_t len;
-	u64 ino;
-	int ret = 0;
-
-	if (fsi->key.sk_type == SCOUTFS_INODE_TYPE) {
-		cinode = (void *)fsi->val;
-		ino = le64_to_cpu(fsi->key.ski_ino);
-
-		ret = copy_result_to_user(ures, off, size, SCOUTFS_IOC_RAW_READ_RESULT_INODE,
-					  &ino, sizeof(ino), cinode, sizeof(struct scoutfs_inode),
-					  0);
-
-	} else if (fsi->key.sk_type == SCOUTFS_XATTR_TYPE) {
-		if (fsi->key.skx_part == 0) {
-			xat = (void *)fsi->val;
-			ret = copy_result_to_user(ures, off, size,
-						  SCOUTFS_IOC_RAW_READ_RESULT_XATTR, xat->name,
-						  xat->name_len, &null, sizeof(null),
-						  le16_to_cpu(xat->val_len));
-			if (ret == 0 && xat->val_len != 0) {
-				/* then append the start of the value */
-				len = fsi->val_len -
-				      offsetof(struct scoutfs_xattr, name[xat->name_len]);
-				ret = copy_to_user_off(ures, off, size, xat->name + xat->name_len,
-						       len);
-			}
-		} else {
-			/* continue appending partial values */
-			ret = copy_to_user_off(ures, off, size, fsi->val, fsi->val_len);
-		}
-	}
-
-	return ret;
-}
-
-static bool ignore_zero_nlink(struct inode_info_context *ctx, struct fs_item *fsi)
-{
-	struct scoutfs_inode *cinode = (void *)fsi->val;
-
-	return cinode->nlink == 0;
-}
-
-static bool ignore_xattr_name(struct inode_info_context *ctx, struct fs_item *fsi)
-{
-	struct scoutfs_xattr *xat = (void *)fsi->val;
-	struct xattr_name name = {
-		.hash = le64_to_cpu(fsi->key.skx_name_hash),
-		.name = xat->name,
-		.name_len = xat->name_len,
-	};
-	size_t i;
-
-	for (i = bsearch_index(&name, ctx->names, ctx->nr_names, sizeof(ctx->names[0]),
-			       cmp_name_hash);
-	     i < ctx->nr_names && name.hash == ctx->names[i].hash; i++) {
-		if (cmp_name_string(&name, &ctx->names[i]) == 0)
-			return false;
-	}
-
-	return true;
-}
-
-static int copy_results_to_user(struct super_block *sb, struct inode_info_context *ctx,
-				struct scoutfs_ioctl_raw_read_inode_info *rii)
-{
-	void __user *ures = (void __user *)rii->results_ptr;
-	struct scoutfs_xattr *xat;
-	struct fs_item *next;
-	struct fs_item *fsi;
-	struct fs_item *tmp;
-	size_t xattr_end;
-	size_t off;
-	__le64 in_ino;
-	__le64 in_id;
-	int ret;
-
-	in_ino = 0;
-	xattr_end = 0;
-	in_id = 0;
-	off = 0;
-
-	list_for_each_entry_safe(fsi, tmp, &ctx->fs_items, head) {
-		/*
-		 * ignore:
-		 *  - inodes with an nlink of 0
-		 *  - all items for an ino after the inode struct that we're ignoring 
-		 *  - first xattr parts with a name we don't need
-		 *  - additional xattr parts when we ignored the first
-		 */
-		if ((fsi->key.sk_type == SCOUTFS_INODE_TYPE && ignore_zero_nlink(ctx, fsi)) ||
-		    (fsi->key.sk_type > SCOUTFS_INODE_TYPE && fsi->key._sk_first != in_ino) ||
-		    (fsi->key.sk_type == SCOUTFS_XATTR_TYPE &&
-		     ((fsi->key.skx_part == 0 && ignore_xattr_name(ctx, fsi)) ||
-		      (fsi->key.skx_part > 0 && fsi->key.skx_id != in_id)))) {
-			free_fs_item(fsi);
-			in_ino = 0;
-			in_id = 0;
-			continue;
-		}
-
-		/* advance ino/xattr stream context state machine */
-		if (fsi->key.sk_type == SCOUTFS_INODE_TYPE) {
-			in_ino = fsi->key.ski_ino;
-			in_id = 0;
-		} else if (fsi->key.sk_type == SCOUTFS_XATTR_TYPE && fsi->key.skx_part == 0) {
-			in_id = fsi->key.skx_id;
-			/* save the required offset after the complete xattr */
-			xat = (void *)fsi->val;
-			xattr_end = off + sizeof(struct scoutfs_ioctl_raw_read_result) +
-				    xat->name_len + 1 + le16_to_cpu(xat->val_len);
-		}
-
-		/* copy results, usually with header, but additional xattr parts copied raw */
-		ret = copy_item_results_to_user(sb, ctx, ures, &off, rii->results_size, fsi);
-		if (ret < 0)
-			goto out;
-
-		/* make sure we saw all xattr parts and copied the correct size */
-		if (xattr_end > 0 &&
-		    !((next = next_fs_item(&ctx->fs_items, fsi)) &&
-		      next->key.sk_type == SCOUTFS_XATTR_TYPE && next->key.skx_ino == in_ino &&
-		      next->key.skx_id == in_id)) {
-			if (off != xattr_end) {
-				ret = -EUCLEAN;
-				goto out;
-			}
-			xattr_end = 0;
-		}
-	}
-
-	ret = 0;
-out:
-	return ret ?: off;
-}
-
-/*
- * If the key is for an inode we're not interested in, or if its past
- * the xattr items, then advance to the next inode.  This is used
- * between forest read items calls to avoid leaf blocks.  The callback
- * takes care of iterating through the items for an inode across
- * multiple leaves.
- */
-static void advance_key_ino(struct scoutfs_key *key, struct inode_info_context *ctx)
-{
-	u64 ino = le64_to_cpu(key->_sk_first);
-	size_t ino_ind;
-
-	ino_ind = bsearch_index(&ino, ctx->inos, ctx->nr_inos, sizeof(ctx->inos[0]), cmp_u64);
-	if (ino_ind < ctx->nr_inos && ctx->inos[ino_ind] == ino) {
-		if (key->sk_type <= SCOUTFS_XATTR_TYPE)
-			return;
-		else
-			ino_ind++;
-	}
-
-	if (ino_ind < ctx->nr_inos)
-		scoutfs_inode_init_key(key, ctx->inos[ino_ind]);
-	else
-		scoutfs_key_set_ones(key);
-}
-
-int scoutfs_raw_read_inode_info(struct super_block *sb,
-				struct scoutfs_ioctl_raw_read_inode_info *rii)
-{
-	struct inode_info_context ctx = {0, };
-	struct scoutfs_net_roots roots;
-	DECLARE_SAVED_REFS(saved);
-	struct scoutfs_key lock_start;
-	struct scoutfs_key lock_end;
-	struct scoutfs_key start;
-	struct scoutfs_key last;
-	struct scoutfs_key key;
-	struct scoutfs_key end;
-	LIST_HEAD(list);
-	int retries = 10;
-	int ret;
-
-	ret = setup_context(&ctx, rii);
-	if (ret < 0)
-		goto out;
-
-	if (ctx.nr_names > 0)
-		scoutfs_xattr_init_key(&last, ctx.inos[ctx.nr_inos -1],
-				       ctx.names[ctx.nr_names - 1].hash, U64_MAX);
-	else
-		scoutfs_inode_init_key(&last, ctx.inos[ctx.nr_inos - 1]);
-
-retry:
-	ret = scoutfs_client_get_roots(sb, &roots);
-	if (ret)
-		goto out;
-
-	scoutfs_inode_init_key(&key, ctx.inos[0]);
-
-	while (scoutfs_key_compare(&key, &last) <= 0) {
-		scoutfs_lock_get_fs_item_range(le64_to_cpu(key._sk_first), &lock_start, &lock_end);
-
-		start = key;
-		end = last;
-		if (scoutfs_key_compare(&lock_end, &end) < 0)
-			end = lock_end;
-
-		ret = scoutfs_forest_read_items_roots(sb, &roots, 0, &key, &lock_start,
-						      &start, &end, save_info_items, &ctx);
-		if (ret < 0)
-			goto out;
-
-		/* save each sorted batch, might have partial results for an inode */
-		sort_and_remove(&ctx.fs_items, &end);
-		list_splice_tail_init(&ctx.fs_items, &list);
-
-		key = end;
-		if (!scoutfs_key_is_ones(&key)) {
-			scoutfs_key_inc(&key);
-			advance_key_ino(&key, &ctx);
-		}
-	}
-
-	list_splice_tail_init(&list, &ctx.fs_items);
-	ret = copy_results_to_user(sb, &ctx, rii);
-out:
-	free_fs_items(&list);
-	free_fs_items(&ctx.fs_items);
-
-	ret = scoutfs_block_check_stale(sb, ret, &saved, &roots.fs_root.ref, &roots.logs_root.ref);
-	if (ret == -ESTALE && retries-- > 0)
-		goto retry;
-
-	free_context(&ctx);
-	return ret;
-}
@@ -1,10 +0,0 @@
-#ifndef _SCOUTFS_RAW_H_
-#define _SCOUTFS_RAW_H_
-
-int scoutfs_raw_read_meta_seq(struct super_block *sb,
-			      struct scoutfs_ioctl_raw_read_meta_seq *rms,
-			      struct scoutfs_ioctl_meta_seq *last_ret);
-int scoutfs_raw_read_inode_info(struct super_block *sb,
-				struct scoutfs_ioctl_raw_read_inode_info *rii);
-
-#endif
@@ -134,7 +134,7 @@ static int recov_finished(struct recov_info *recinf)

 static void timer_callback(struct timer_list *timer)
 {
-	struct recov_info *recinf = from_timer(recinf, timer, timer);
+	struct recov_info *recinf = timer_container_of(recinf, timer, timer);

 	recinf->timeout_fn(recinf->sb);
 }
@@ -1077,8 +1077,7 @@ static int next_log_merge_range(struct super_block *sb, struct scoutfs_btree_roo
 	struct scoutfs_key key;
 	int ret;

-	key = *start;
-	key.sk_zone = SCOUTFS_LOG_MERGE_RANGE_ZONE;
+	init_log_merge_key(&key, SCOUTFS_LOG_MERGE_RANGE_ZONE, 0, 0);
 	scoutfs_key_set_ones(&rng->start);

 	do {
@@ -4752,7 +4751,7 @@ int scoutfs_server_setup(struct super_block *sb)
 	INIT_DELAYED_WORK(&server->reclaim_dwork, reclaim_worker);

 	server->wq = alloc_workqueue("scoutfs_server",
-				     WQ_UNBOUND | WQ_NON_REENTRANT, 0);
+				     WQ_UNBOUND, 0);
 	if (!server->wq) {
 		kfree(server);
 		return -ENOMEM;
@@ -2392,8 +2392,7 @@ int scoutfs_srch_setup(struct super_block *sb)
 		goto out;

 	srinf->workq = alloc_workqueue("scoutfs_srch_compact",
-				       WQ_NON_REENTRANT | WQ_UNBOUND |
-				       WQ_HIGHPRI, 0);
+				       WQ_UNBOUND | WQ_HIGHPRI, 0);
 	if (!srinf->workq) {
 		ret = -ENOMEM;
 		goto out;
@@ -46,7 +46,6 @@ static struct scoutfs_tseq_entry *tseq_rb_next(struct scoutfs_tseq_entry *ent)
 	return rb_entry(node, struct scoutfs_tseq_entry, node);
 }

-#ifdef KC_RB_TREE_AUGMENTED_COMPUTE_MAX
 static bool tseq_compute_total(struct scoutfs_tseq_entry *ent, bool exit)
 {
 	loff_t total = 1 + tseq_node_total(ent->node.rb_left) +
@@ -61,17 +60,6 @@ static bool tseq_compute_total(struct scoutfs_tseq_entry *ent, bool exit)

 RB_DECLARE_CALLBACKS(static, tseq_rb_callbacks, struct scoutfs_tseq_entry,
 		     node, total, tseq_compute_total);
-#else
-
-static loff_t tseq_compute_total(struct scoutfs_tseq_entry *ent)
-{
-	return 1 + tseq_node_total(ent->node.rb_left) +
-	       tseq_node_total(ent->node.rb_right);
-}
-
-RB_DECLARE_CALLBACKS(static, tseq_rb_callbacks, struct scoutfs_tseq_entry,
-		     node, loff_t, total, tseq_compute_total);
-#endif

 void scoutfs_tseq_tree_init(struct scoutfs_tseq_tree *tree,
 			    scoutfs_tseq_show_t show)
@@ -16,6 +16,7 @@
 #include <linux/xattr.h>
 #include <linux/crc32c.h>
 #include <linux/posix_acl.h>
+#include <linux/iversion.h>

 #include "format.h"
 #include "inode.h"
@@ -47,7 +48,7 @@
 *  - add acl support and call generic xattr->handlers for SYSTEM
 */

-u32 scoutfs_xattr_name_hash(const char *name, unsigned int name_len)
+static u32 xattr_name_hash(const char *name, unsigned int name_len)
 {
 	return crc32c(U32_MAX, name, name_len);
 }
@@ -65,7 +66,8 @@ static unsigned int xattr_nr_parts(struct scoutfs_xattr *xat)
 				      le16_to_cpu(xat->val_len));
 }

-void scoutfs_xattr_init_key(struct scoutfs_key *key, u64 ino, u32 name_hash, u64 id)
+static void init_xattr_key(struct scoutfs_key *key, u64 ino, u32 name_hash,
+			   u64 id)
 {
 	*key = (struct scoutfs_key) {
 		.sk_zone = SCOUTFS_FS_ZONE,
@@ -186,10 +188,10 @@ static int get_next_xattr(struct inode *inode, struct scoutfs_key *key,
 		return -EINVAL;

 	if (name_len)
-		name_hash = scoutfs_xattr_name_hash(name, name_len);
+		name_hash = xattr_name_hash(name, name_len);

-	scoutfs_xattr_init_key(key, scoutfs_ino(inode), name_hash, id);
-	scoutfs_xattr_init_key(&last, scoutfs_ino(inode), U32_MAX, U64_MAX);
+	init_xattr_key(key, scoutfs_ino(inode), name_hash, id);
+	init_xattr_key(&last, scoutfs_ino(inode), U32_MAX, U64_MAX);

 	for (;;) {
 		ret = scoutfs_item_next(sb, key, &last, xat, xat_bytes, lock);
@@ -334,8 +336,8 @@ static int create_xattr_items(struct inode *inode, u64 id, struct scoutfs_xattr
 	int len;
 	int i;

-	scoutfs_xattr_init_key(&key, scoutfs_ino(inode),
-		       scoutfs_xattr_name_hash(xat->name, xat->name_len), id);
+	init_xattr_key(&key, scoutfs_ino(inode),
+		       xattr_name_hash(xat->name, xat->name_len), id);

 	for (i = 0; i < new_parts; i++) {
 		key.skx_part = i;
@@ -364,7 +366,7 @@ static int delete_xattr_items(struct inode *inode, u32 name_hash, u64 id,
 	int ret = 0;
 	int i;

-	scoutfs_xattr_init_key(&key, scoutfs_ino(inode), name_hash, id);
+	init_xattr_key(&key, scoutfs_ino(inode), name_hash, id);

 	/* dirty additional existing old items */
 	for (i = 1; i < nr_parts; i++) {
@@ -406,8 +408,8 @@ static int change_xattr_items(struct inode *inode, u64 id,
 	int i;
 	int ret;

-	scoutfs_xattr_init_key(&key, scoutfs_ino(inode),
-			       scoutfs_xattr_name_hash(xat->name, xat->name_len), id);
+	init_xattr_key(&key, scoutfs_ino(inode),
+		       xattr_name_hash(xat->name, xat->name_len), id);

 	/* dirty existing old items */
 	for (i = 0; i < old_parts; i++) {
@@ -993,38 +995,17 @@ unlock:
 	return ret;
 }

-#ifndef KC_XATTR_STRUCT_XATTR_HANDLER
-/*
- * Future kernels have this amazing hack to rewind the name to get the
- * skipped prefix.  We're back in the stone ages without the handler
- * arg, so we Just Know that this is possible.  This will become a
- * compat hook to either call the kernel's xattr_full_name(handler), or
- * our hack to use the flags as the prefix length.
- */
-static const char *full_name_hack(const char *name, int len)
-{
-	return name - len;
-}
-#endif

 static int scoutfs_xattr_get_handler
-#ifdef KC_XATTR_STRUCT_XATTR_HANDLER
 		(const struct xattr_handler *handler, struct dentry *dentry,
 		 struct inode *inode, const char *name, void *value,
 		 size_t size)
 {
 	name = xattr_full_name(handler, name);
-#else
-		(struct dentry *dentry, const char *name,
-		 void *value, size_t size, int handler_flags)
-{
-	name = full_name_hack(name, handler_flags);
-#endif
 	return scoutfs_xattr_get(dentry, name, value, size);
 }

 static int scoutfs_xattr_set_handler
-#ifdef KC_XATTR_STRUCT_XATTR_HANDLER
 		(const struct xattr_handler *handler,
 		 KC_VFS_NS_DEF
 		 struct dentry *dentry,
@@ -1032,12 +1013,6 @@ static int scoutfs_xattr_set_handler
 		 size_t size, int flags)
 {
 	name = xattr_full_name(handler, name);
-#else
-		(struct dentry *dentry, const char *name,
-		 const void *value, size_t size, int flags, int handler_flags)
-{
-	name = full_name_hack(name, handler_flags);
-#endif
 	return scoutfs_xattr_set(dentry, name, value, size, flags);
 }

@@ -1070,22 +1045,14 @@ static const struct xattr_handler scoutfs_xattr_security_handler = {
 };

 static const struct xattr_handler scoutfs_xattr_acl_access_handler = {
-#ifdef KC_XATTR_HANDLER_NAME
 	.name   = XATTR_NAME_POSIX_ACL_ACCESS,
-#else
-	.prefix = XATTR_NAME_POSIX_ACL_ACCESS,
-#endif
 	.flags  = ACL_TYPE_ACCESS,
 	.get    = scoutfs_acl_get_xattr,
 	.set    = scoutfs_acl_set_xattr,
 };

 static const struct xattr_handler scoutfs_xattr_acl_default_handler = {
-#ifdef KC_XATTR_HANDLER_NAME
 	.name   = XATTR_NAME_POSIX_ACL_DEFAULT,
-#else
-	.prefix = XATTR_NAME_POSIX_ACL_DEFAULT,
-#endif
 	.flags  = ACL_TYPE_DEFAULT,
 	.get    = scoutfs_acl_get_xattr,
 	.set    = scoutfs_acl_set_xattr,
@@ -1223,8 +1190,8 @@ int scoutfs_xattr_drop(struct super_block *sb, u64 ino,
 		goto out;
 	}

-	scoutfs_xattr_init_key(&key, ino, 0, 0);
-	scoutfs_xattr_init_key(&last, ino, U32_MAX, U64_MAX);
+	init_xattr_key(&key, ino, 0, 0);
+	init_xattr_key(&last, ino, U32_MAX, U64_MAX);

 	for (;;) {
 		ret = scoutfs_item_next(sb, &key, &last, (void *)xat, bytes,
@@ -10,9 +10,6 @@ struct scoutfs_xattr_prefix_tags {

 extern const struct xattr_handler *scoutfs_xattr_handlers[];

-u32 scoutfs_xattr_name_hash(const char *name, unsigned int name_len);
-void scoutfs_xattr_init_key(struct scoutfs_key *key, u64 ino, u32 name_hash, u64 id);
-
 int scoutfs_xattr_get_locked(struct inode *inode, const char *name, void *buffer, size_t size,
 			     struct scoutfs_lock *lck);
 int scoutfs_xattr_set_locked(struct inode *inode, const char *name, size_t name_len,
@@ -12,4 +12,4 @@ src/o_tmpfile_umask
 src/o_tmpfile_linkat
 src/mmap_stress
 src/mmap_validate
-src/watch_raw_inode_change
+src/totl-delta-inject
@@ -16,7 +16,7 @@ BIN := src/createmany			\
 	src/o_tmpfile_linkat		\
 	src/mmap_stress			\
 	src/mmap_validate		\
-	src/watch_raw_inode_change
+	src/totl-delta-inject

 DEPS := $(wildcard src/*.d)

@@ -171,6 +171,13 @@ t_filter_dmesg()
 	# orphan log trees reclaim is handled, not an error
 	re="$re|scoutfs .* reclaiming orphan log trees"

+	# nfs can emit a whole range of messages we can ignore
+	re="$re|Installing knfsd .*"
+	re="$re|nfsd: .*"
+	re="$re|NFSD: .*"
+	re="$re|RPC: .*"
+	re="$re|FS-Cache: .*"
+
 	# fencing tests force unmounts and trigger timeouts
 	re="$re|scoutfs .* forcing unmount"
 	re="$re|scoutfs .* reconnect timed out"
@@ -0,0 +1,32 @@
+== write via NFS, read both sides
+== POSIX ACL set via NFS, read both sides
+user::rw-
+user:22222:rw-
+group::r--
+mask::rw-
+other::r--
+
+user::rw-
+user:22222:rw-
+group::r--
+mask::rw-
+other::r--
+
+== POSIX ACL set on scoutfs, read via NFS
+user::rw-
+user:22222:rw-
+group::r--
+group:44444:r--
+mask::rw-
+other::r--
+
+== default ACL inheritance via NFS
+user::rw-
+user:22222:rwx	#effective:rw-
+group::r-x	#effective:r--
+mask::rw-
+other::r--
+
+== NFS read demand-stages a released file
+1
+== cleanup
@@ -8,10 +8,10 @@
 /mnt/test/test/data-prealloc/file-1: extents: 32
 /mnt/test/test/data-prealloc/file-2: extents: 32
 == any writes to region prealloc get full extents
-/mnt/test/test/data-prealloc/file-1: extents: 4
-/mnt/test/test/data-prealloc/file-2: extents: 4
-/mnt/test/test/data-prealloc/file-1: extents: 4
-/mnt/test/test/data-prealloc/file-2: extents: 4
+/mnt/test/test/data-prealloc/file-1: extents: 8
+/mnt/test/test/data-prealloc/file-2: extents: 8
+/mnt/test/test/data-prealloc/file-1: extents: 8
+/mnt/test/test/data-prealloc/file-2: extents: 8
 == streaming offline writes get full extents either way
 /mnt/test/test/data-prealloc/file-1: extents: 4
 /mnt/test/test/data-prealloc/file-2: extents: 4
@@ -20,8 +20,8 @@
 == goofy preallocation amounts work
 /mnt/test/test/data-prealloc/file-1: extents: 6
 /mnt/test/test/data-prealloc/file-2: extents: 6
-/mnt/test/test/data-prealloc/file-1: extents: 6
-/mnt/test/test/data-prealloc/file-2: extents: 6
+/mnt/test/test/data-prealloc/file-1: extents: 10
+/mnt/test/test/data-prealloc/file-2: extents: 10
 /mnt/test/test/data-prealloc/file-1: extents: 3
 /mnt/test/test/data-prealloc/file-2: extents: 3
 == block writes into region allocs hole
@@ -1,4 +0,0 @@
-== ensuring utils and module for old versions
-== unmounting test fs and removing test module
-== testing combinations of old and new format versions
-== restoring test module and mount
@@ -0,0 +1,6 @@
+== setup
+== concurrent quota mod and check across mounts
+== verify quota rules are consistent after race
+== verify file creation still works under quota
+file visible on mount 1
+== cleanup
@@ -0,0 +1,10 @@
+== setup three files contributing to totl 8888.0.0
+== merge baseline into fs_root
+8888.0.0 = 42, 3
+== inject (+128, +2) unbalances totl 8888.0.0
+8888.0.0 = 170, 5
+== unlink f3 (value 32) produces a -32/-1 delta
+8888.0.0 = 138, 4
+== inject (-128, -2) restores accounting for the remaining files
+8888.0.0 = 10, 2
+== cleanup
@@ -3,6 +3,7 @@ basic-block-counts.sh
 basic-bad-mounts.sh
 basic-posix-acl.sh
 basic-acl-consistency.sh
+basic-nfs.sh
 inode-items-updated.sh
 simple-inode-index.sh
 simple-staging.sh
@@ -18,7 +19,6 @@ offline-extent-waiting.sh
 move-blocks.sh
 projects.sh
 large-fragmented-free.sh
-format-version-forward-back.sh
 enospc.sh
 mmap.sh
 srch-safe-merge-pos.sh
@@ -29,6 +29,8 @@ totl-xattr-tag.sh
 basic-xattr-indx.sh
 quota.sh
 totl-merge-read.sh
+quota-invalidate-race.sh
+totl-delta-inject.sh
 lock-refleak.sh
 lock-shrink-consistency.sh
 lock-shrink-read-race.sh
@@ -0,0 +1,121 @@
+/*
+ * Test helper that calls SCOUTFS_IOC_INJECT_TOTL_DELTA to seed
+ * arbitrary totl deltas.
+ *
+ * Copyright (C) 2026 Versity Software, Inc.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ */
+
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+#include <unistd.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdint.h>
+#include <inttypes.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <sys/ioctl.h>
+#include <linux/types.h>
+
+#include "ioctl.h"
+
+static void usage(const char *prog)
+{
+	fprintf(stderr,
+		"Usage: %s <mountpoint> <a>.<b>.<c> <total> <count>\n",
+		prog);
+	exit(2);
+}
+
+static int parse_s64(const char *s, int64_t *out)
+{
+	char *end;
+	int64_t v;
+
+	errno = 0;
+	v = strtoll(s, &end, 0);
+	if (errno || *end != '\0' || end == s)
+		return -1;
+	*out = v;
+	return 0;
+}
+
+/*
+ * Parse "<a>.<b>.<c>" into abc[0..2] (skxt_a, skxt_b, skxt_c).  Each
+ * component must be a non-empty unsigned base-0 integer.
+ */
+static int parse_dotted_name(const char *s, uint64_t abc[3])
+{
+	const char *p = s;
+	char *end;
+	int i;
+
+	for (i = 0; i < 3; i++) {
+		if (*p == '\0' || *p == '.')
+			return -1;
+		errno = 0;
+		abc[i] = strtoull(p, &end, 0);
+		if (errno || end == p)
+			return -1;
+
+		if (i < 2) {
+			if (*end != '.')
+				return -1;
+			p = end + 1;
+		} else {
+			if (*end != '\0')
+				return -1;
+		}
+	}
+	return 0;
+}
+
+int main(int argc, char **argv)
+{
+	struct scoutfs_ioctl_inject_totl_delta itd = {{0,}};
+	uint64_t abc[3];
+	int64_t total, count;
+	int fd;
+	int ret;
+
+	if (argc != 5)
+		usage(argv[0]);
+
+	if (parse_dotted_name(argv[2], abc) ||
+	    parse_s64(argv[3], &total) ||
+	    parse_s64(argv[4], &count)) {
+		fprintf(stderr, "could not parse arguments\n");
+		usage(argv[0]);
+	}
+
+	itd.name[0] = abc[0];
+	itd.name[1] = abc[1];
+	itd.name[2] = abc[2];
+	itd.total = total;
+	itd.count = count;
+
+	fd = open(argv[1], O_RDONLY | O_DIRECTORY);
+	if (fd < 0) {
+		fprintf(stderr, "open(%s): %s\n", argv[1], strerror(errno));
+		return 1;
+	}
+
+	ret = ioctl(fd, SCOUTFS_IOC_INJECT_TOTL_DELTA, &itd);
+	if (ret < 0) {
+		fprintf(stderr,
+			"INJECT_TOTL_DELTA(%" PRIu64 ".%" PRIu64 ".%" PRIu64
+			", total=%" PRId64 ", count=%" PRId64 "): %s\n",
+			abc[0], abc[1], abc[2], total, count, strerror(errno));
+		close(fd);
+		return 1;
+	}
+
+	close(fd);
+	return 0;
+}
@@ -1,664 +0,0 @@
-/*
- * Copyright (C) 2026 Versity Software, Inc.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License v2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- */
-
-#ifndef _GNU_SOURCE
-#define _GNU_SOURCE
-#endif
-#include <unistd.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <stdint.h>
-#include <string.h>
-#include <sys/ioctl.h>
-#include <fcntl.h>
-#include <errno.h>
-#include <time.h>
-#include <linux/types.h>
-#include <assert.h>
-#include <stdbool.h>
-
-#include "../../utils/src/util.h"
-#include "ioctl.h"
-#include "format.h"
-
-/*
- * This is a quick example of using the raw reading ioctls to get info
- * on inodes as they change.  We maintain an array of meta_seq items for
- * inodes that we've seen.  If we read the current meta_seq items and
- * see differences then we get inode info and update our array with what
- * we find.
- *
- * This only maintains one array and sorts it back and forth as we walk
- * the meta_seq items and then search by inode number.  This will
- * eventually use far too much cpu as the number of inodes increases.
- */
-
-#define MSF		"%llu.%llu"
-#define MSA(ms)		(ms)->meta_seq, (ms)->ino
-#define NERRF		"nerr %d (\"%s\")"
-#define NERRA(nerr)	nerr, strerror(-nerr)
-
-#define prerror(fmt, args...) \
-	fprintf(stderr, "error: "fmt"\n", ##args)
-
-#define prdebug(fmt, args...) \
-do { \
-	if (opts.debug) \
-		printf(fmt"\n", ##args); \
-} while (0)
-
-static struct opts {
-	bool debug;
-	char *path;
-	char *names;
-	size_t names_size;
-	size_t names_count;
-} opts;
-
-struct stats {
-	__u64 start;
-	__u64 last;
-
-	struct per_call {
-		__u64 begin;
-		__u64 calls;
-		__u64 time;
-		__u64 inos;
-	} rms, rii;
-
-	__u64 inodes;
-	__u64 add;
-	__u64 remove;
-	__u64 update;
-
-	unsigned lines;
-} stats;
-
-struct meta_seq_array {
-	size_t nr;
-	size_t alloc;
-	struct scoutfs_ioctl_meta_seq *ms;
-};
-
-#define INO_BATCH	1000
-/* *2 for gratuitous allowance for struct expansion */
-#define RESULTS_SIZE	(INO_BATCH * 2 * (sizeof(struct scoutfs_ioctl_raw_read_result) + \
-		                          sizeof(__u64) + \
-		                          180 /* ~= sizeof(struct scoutfs_inode) */ + \
-		                          sizeof(struct scoutfs_ioctl_inode_attr_x)))
-
-#define NSEC_PER_SEC 1000000000
-
-static __u64 get_ns(void)
-{
-	struct timespec tp;
-	int ret;
-
-	ret = clock_gettime(CLOCK_MONOTONIC, &tp);
-	if (ret != 0) {
-		ret = -errno;
-		prerror("clock_gettime() error: "NERRF, NERRA(ret));
-		exit(2);
-	}
-
-	return ((__u64)tp.tv_sec * NSEC_PER_SEC) + (__u64)tp.tv_nsec;
-}
-static void begin_call(struct per_call *pc)
-{
-	pc->begin = get_ns();
-}
-
-static void end_call(struct per_call *pc)
-{
-	pc->calls++;
-	pc->time += get_ns() - pc->begin;
-}
-
-static int expand_array(struct meta_seq_array *arr, size_t additional)
-{
-#define ALLOC_BATCH	(1024 * 1024 / (sizeof(struct scoutfs_ioctl_meta_seq)))
-	struct scoutfs_ioctl_meta_seq *ms;
-	size_t expand;
-
-	if (arr->nr + additional <= arr->alloc)
-		return 0;
-
-	expand = arr->alloc + ALLOC_BATCH;
-	ms = reallocarray(arr->ms, expand, sizeof(arr->ms[0]));
-	if (!ms) {
-		prerror("allocating ms array with %zu elements failed", expand);
-		return -ENOMEM;
-	}
-
-	arr->alloc = expand;
-	arr->ms = ms;
-
-	return 0;
-}
-
-static void inc_ms(struct scoutfs_ioctl_meta_seq *ms)
-{
-	if (++ms->ino == 0)
-		ms->meta_seq++;
-}
-
-static void set_ms(struct scoutfs_ioctl_meta_seq *ms, __u64 meta_seq, __u64 ino)
-{
-	ms->meta_seq = meta_seq;
-	ms->ino = ino;
-}
-
-static int compar_ms_ino(const void *A, const void *B)
-{
-	const struct scoutfs_ioctl_meta_seq *a = A;
-	const struct scoutfs_ioctl_meta_seq *b = B;
-
-	return a->ino < b->ino ? -1 : a->ino > b->ino ? 1 : 0;
-}
-
-static int compar_ms_meta_seq(const void *A, const void *B)
-{
-	const struct scoutfs_ioctl_meta_seq *a = A;
-	const struct scoutfs_ioctl_meta_seq *b = B;
-
-	return a->meta_seq < b->meta_seq ? -1 : a->meta_seq > b->meta_seq ? 1 :
-	       compar_ms_ino(A, B);
-}
-
-static int compar_u64(const void *A, const void *B)
-{
-	const __u64 *a = A;
-	const __u64 *b = B;
-
-	return *a < *b ? -1 : *a > *b ? 1 : 0;
-}
-
-struct bsearch_ind_key {
-	int (*compar)(const void *a, const void *b);
-	void *key;
-	size_t size;
-	void **index;
-};
-
-static int bsearch_ind_compar(const void *a, const void *b)
-{
-	const struct bsearch_ind_key *bik = (const void *)((unsigned long)a ^ 1);
-	int cmp;
-
-	/* this key hack only works if compar is always called where a is key and b is &base[..] */
-	assert((unsigned long)a & 1);
-	assert(!((unsigned long)b & 1));
-
-	cmp = bik->compar(bik->key, b);
-	if (cmp > 0)
-		*(bik->index) = (void *)b + bik->size;
-	else
-		*(bik->index) = (void *)b;
-
-	return cmp;
-}
-
-static size_t bsearch_ind(const void *key, const void *base, size_t nmemb, size_t size,
-			  int (*compar)(const void *a, const void *b))
-{
-	void *index = (void *)base;
-	struct bsearch_ind_key bik = {
-		.compar = compar,
-		.key = (void *)key,
-		.size = size,
-		.index = &index,
-	};
-
-	bsearch((void *)(((unsigned long)&bik) | 1), base, nmemb, size, bsearch_ind_compar);
-
-	return (index - base) / size;
-}
-
-/*
- * Generate a sorted list of inode numbers for the meta_seq items that
- * differ between the results from raw_read_meta_seq and the items we
- * have saved in our array. 
- */
-static int differing_inos(__u64 *inos, struct meta_seq_array *arr,
-			  struct scoutfs_ioctl_meta_seq *start,
-			  struct scoutfs_ioctl_meta_seq *last,
-			  struct scoutfs_ioctl_meta_seq *ms, size_t nr)
-{
-	size_t arr_last;
-	size_t a;
-	size_t m;
-	int nr_inos;
-	int cmp;
-	int i;
-	int n;
-
-	/* find where we're going to stop in arr */
-	arr_last = bsearch_ind(last, arr->ms, arr->nr, sizeof(arr->ms[0]), compar_ms_meta_seq);
-	if (arr_last < arr->nr && compar_ms_meta_seq(&arr->ms[arr_last], last) == 0)
-		arr_last++;
-
-	a = bsearch_ind(start, arr->ms, arr->nr, sizeof(arr->ms[0]), compar_ms_meta_seq);
-
-	for (m = 0, nr_inos = 0; (a < arr_last || m < nr) && nr_inos < INO_BATCH; ) {
-
-		prdebug("diffing: m %zu nr %zu | a %zu arr_last %zu | nr_inos %d",
-			m, nr, a, arr_last, nr_inos);
-		if (a < arr_last)
-			prdebug("  arr->ms[%zu] = "MSF, a, MSA(&arr->ms[a]));
-		if (m < nr)
-			prdebug("  ms[%zu] = "MSF, m, MSA(&ms[m]));
-
-		/* setup comparison to copy lesser or only */
-		if (a < arr_last && m < nr)
-			cmp = compar_ms_meta_seq(&arr->ms[a], &ms[m]);
-		else if (a < arr_last)
-			cmp = -1;
-		else
-			cmp = 1;
-
-		prdebug("  cmp %d", cmp);
-
-		if (cmp == 0) {
-			/* ignore both when they match */
-			a++;
-			m++;
-		} else if (cmp < 0) {
-			inos[nr_inos++] = arr->ms[a++].ino;
-		} else { /* cmp > 0 */
-			inos[nr_inos++] = ms[m++].ino;
-		}
-	}
-
-	/* if we didn't consume all the read meta_seq then we might need to clamp last */
-	if (m < nr && compar_ms_meta_seq(&ms[m], last) <= 0) {
-		*last = ms[m];
-		last->ino--; /* must be non-zero, can't wrap */
-	}
-
-	/* sort and remove duplicate inode numbers */
-	if (nr_inos > 0) {
-		qsort(inos, nr_inos, sizeof(inos[0]), compar_u64);
-		for (i = 1, n = 1; i < nr_inos; i++) {
-			if (inos[i] != inos[n - 1])
-				inos[n++] = inos[i];
-		}
-		nr_inos = n;
-	}
-
-	return nr_inos;
-}
-
-/*
- * We're not really validating the result stream.  We assume that the offset currently
- * points at an inode.  We fill the caller's ms with its info then iterate through
- * all its results until the next ino.
- */
-static ssize_t read_inode_results(void *buf, size_t off, size_t size,
-				  struct scoutfs_ioctl_meta_seq *found)
-{
-	struct scoutfs_ioctl_raw_read_result res;
-	size_t len;
-	__le64 ms;
-
-	found->ino = 0;
-
-	while (off < size) {
-		memcpy(&res, buf + off, sizeof(res));
-		prdebug("res %u %u", res.type, res.size);
-
-		if (res.type == SCOUTFS_IOC_RAW_READ_RESULT_INODE && found->ino != 0)
-			break;
-
-		off += sizeof(res);
-
-		switch(res.type) {
-			case SCOUTFS_IOC_RAW_READ_RESULT_INODE:
-				memcpy(&found->ino, buf + off, sizeof(__u64));
-				memcpy(&ms, buf + off + sizeof(__u64) +
-				       offsetof(struct scoutfs_inode, meta_seq), sizeof(__le64));
-				found->meta_seq = le64_to_cpu(ms);
-				prdebug("res ino %llu ms %llu", found->ino, found->meta_seq);
-				break;
-
-			case SCOUTFS_IOC_RAW_READ_RESULT_XATTR:
-				len = strlen((char *)buf + off) + 1;
-				prdebug("res xattr '%s' len %d: '%.*s'",
-					(char *)buf + off, 
-					(int)(res.size - len),
-					(int)(res.size - len),
-					(char *)buf + off + len);
-				break;
-		};
-		off += res.size;
-	}
-
-	return off;
-}
-
-/*
- * inos[] contains the inode numbers that we're interested in.  Get
- * their info and update our array with what we find.
- */
-static int read_inode_info(int fd, void *buf, struct meta_seq_array *arr, __u64 *inos, int nr_inos)
-{
-	struct scoutfs_ioctl_raw_read_inode_info rii;
-	struct scoutfs_ioctl_meta_seq found;
-	struct scoutfs_ioctl_meta_seq ms;
-	ssize_t off;
-	size_t size;
-	size_t ind;
-	size_t added;
-	int i;
-	int ret;
-
-	rii = (struct scoutfs_ioctl_raw_read_inode_info) {
-		.inos_ptr = (unsigned long)inos,
-		.inos_count = nr_inos,
-		.names_ptr = (unsigned long)opts.names,
-		.names_count = opts.names_count,
-		.results_ptr = (unsigned long)buf,
-		.results_size = RESULTS_SIZE,
-	};
-
-	begin_call(&stats.rii);
-	ret = ioctl(fd, SCOUTFS_IOC_RAW_READ_INODE_INFO, &rii);
-	if (ret < 0) {
-		ret = -errno;
-		prerror("READ_INODE_INFO ioctl failed: "NERRF, NERRA(ret));
-		goto out;
-	}
-	end_call(&stats.rii);
-
-	prdebug("gii ret %d", ret);
-
-	off = 0;
-	size = ret;
-	set_ms(&found, 0, 0);
-	added = 0;
-	i = 0;
-
-	/* sort by ino so we can search by ino for updates */
-	qsort(arr->ms, arr->nr, sizeof(arr->ms[0]), compar_ms_ino);
-
-	while (i < nr_inos) {
-		/* find next ino */
-		if (!found.ino && off < size) {
-			off = read_inode_results(buf, off, size, &found);
-			if (off < 0) {
-				ret = off;
-				goto out;
-			}
-			stats.rii.inos++;
-		}
-
-		if (i < nr_inos && (!found.ino || inos[i] < found.ino)) {
-			/* delete any record of inodes we didn't find */
-			set_ms(&ms, UINT64_MAX, inos[i]);
-			i++;
-
-		} else if (found.ino) {
-			/* update/add arr to match the found ino */
-			ms = found;
-			if (i < nr_inos && inos[i] == found.ino)
-				i++;
-			set_ms(&found, 0, 0);
-		}
-
-		/* find existing record */
-		ind = bsearch_ind(&ms, arr->ms, arr->nr, sizeof(arr->ms[0]), compar_ms_ino);
-		if (ind < arr->nr && arr->ms[ind].ino == ms.ino) {
-			/* update existing ino, can be marking for deletion */
-			prdebug("updating arr [%zu] ino %llu ms %llu -> %llu",
-					ind, ms.ino, arr->ms[ind].meta_seq, ms.meta_seq);
-			arr->ms[ind].meta_seq = ms.meta_seq;
-			if (ms.meta_seq == UINT64_MAX)
-				stats.remove++;
-			else
-				stats.update++;
-
-		} else if (ms.meta_seq != UINT64_MAX) {
-			/* append new found, maintaining existing sorting */
-			arr->ms[arr->nr + added] = ms;
-			prdebug("adding arr [%zu] ino %llu ms %llu",
-					arr->nr + added, ms.ino, ms.meta_seq);
-			added++;
-			stats.add++;
-		}
-	}
-
-	/* sort by seq again for next meta seq read */
-	arr->nr += added;
-	qsort(arr->ms, arr->nr, sizeof(arr->ms[0]), compar_ms_meta_seq);
-
-	/* and trim off any deletions */
-	while (arr->nr > 0 && arr->ms[arr->nr - 1].meta_seq == UINT64_MAX)
-		arr->nr--;
-
-	ret = 0;
-out:
-	return ret;
-}
-
-static double secs(u64 a_ns, u64 b_ns)
-{
-	return (double)(a_ns - b_ns) / NSEC_PER_SEC;
-}
-
-static double nr_per_sec(u64 nr, __u64 nsec)
-{
-	if (nsec == 0)
-		return 0;
-
-	return (double)nr / secs(nsec, 0);
-}
-
-static void print_stats(void)
-{
-	u64 now = get_ns();
-
-	if (secs(now, stats.last) < 1.0)
-		return;
-
-	if ((stats.lines++ % 16) == 0) {
-		printf("%6s | %-29s | %-23s | %-23s\n",
-			"", "inodes", "meta_seq", "inode_info");
-		printf("%6s | %8s %6s %6s %6s | %7s %7s %7s | %7s %7s %7s\n",
-			"now",
-			"total", "add", "remove", "update",
-			"calls", "inos", "inos/s",
-			"calls", "inos", "inos/s");
-	}
-
-	printf("%6.3lf | %8llu %6llu %6llu %6llu | %7llu %7llu %7.0lf | %7llu %7llu %7.0lf\n",
-		secs(now, stats.start),
-		stats.inodes, stats.add, stats.remove, stats.update,
-		stats.rms.calls, stats.rms.inos, nr_per_sec(stats.rms.inos, stats.rms.time),
-		stats.rii.calls, stats.rii.inos, nr_per_sec(stats.rms.inos, stats.rii.time));
-
-	stats.last = now;
-
-	{
-		struct stats save = stats;
-		stats = (struct stats) {
-			.start = save.start,
-			.last = save.last,
-			.lines = save.lines,
-		};
-	}
-}
-
-static void add_xattr(char *name)
-{
-	size_t len_null;
-	char *names;
-	int ret;
-
-	len_null = strlen(name) + 1;
-	names = realloc(opts.names, opts.names_size + len_null);
-	if (!names) {
-		ret = -errno;
-		prerror("allocation of xattr names buffer failed: "NERRF, NERRA(ret));
-		exit(3);
-	}
-
-	memcpy(names + opts.names_size, name, len_null);
-
-	opts.names = names;
-	opts.names_size += len_null;
-	opts.names_count++;
-}
-
-static bool parse_opts(int argc, char **argv)
-{
-	bool usage = false;
-	int c;
-
-	opts = (struct opts) {
-		.debug = false,
-	};
-
-        while ((c = getopt(argc, argv, "dp:x:")) != -1) {
-                switch(c) {
-                case 'd':
-                        opts.debug = true;
-                        break;
-                case 'p':
-                        opts.path = strdup(optarg);
-                        break;
-                case 'x':
-			add_xattr(optarg);
-                        break;
-                case '?':
-                        printf("Unknown option '%c'\n", optopt);
-			usage = true;
-                }
-	}
-
-	if (!usage) {
-		usage = true;
-		if (!opts.path)
-			printf("need -p path option\n");
-		else
-			usage = false;
-	}
-
-	if (usage) {
-		printf("\nusage:\n"
-		       " -d      | enable verbose debugging output\n"
-		       " -p PATH | path to file system to watch\n"
-		       " -x NAME | try to read named xattr with inodes, can be many\n"
-		      );
-		return false;
-	}
-
-	return true;
-}
-
-int main(int argc, char **argv)
-{
-	struct scoutfs_ioctl_raw_read_meta_seq rms = {0,};
-	struct scoutfs_ioctl_meta_seq *ms;
-	struct meta_seq_array arr = {0,};
-	__u64 *inos = NULL;
-	void *buf = NULL;
-	int fd = -1;
-	int nr_inos;
-	int nr;
-	int i;
-	int ret;
-
-	if (!parse_opts(argc, argv))
-		exit(1);
-
-	inos = calloc(INO_BATCH, sizeof(inos[0]));
-	buf = malloc(RESULTS_SIZE);
-	if (!inos || !buf) {
-		ret = -ENOMEM;
-		goto out;
-	}
-
-	rms.results_ptr = (unsigned long)buf;
-	rms.results_size = min(RESULTS_SIZE, INO_BATCH * sizeof(struct scoutfs_ioctl_meta_seq));
-
-	fd = open(opts.path, O_RDONLY);
-	if (fd == -1) {
-		perror("error");
-		exit(1);
-	}
-
-	stats.start = get_ns();
-
-	for (;;) {
-		set_ms(&rms.start, 0, 0);
-		set_ms(&rms.end, UINT64_MAX, UINT64_MAX);
-
-		do {
-			begin_call(&stats.rms);
-			ret = ioctl(fd, SCOUTFS_IOC_RAW_READ_META_SEQ, &rms);
-			if (ret < 0) {
-				ret = -errno;
-				prerror("READ_META_SEQ ioctl failed, "
-					"start "MSF" end "MSF", "NERRF,
-					MSA(&rms.start), MSA(&rms.end), NERRA(ret));
-				goto out;
-			}
-			end_call(&stats.rms);
-			stats.rms.inos += ret;
-
-			prdebug("RMS last "MSF" ret %d:", MSA(&rms.last), ret);
-
-			nr = ret;
-			ms = buf;
-
-			if (opts.debug && nr > 0) {
-				for (i = 0; i < nr; i++)
-					prdebug(" [%u] "MSF"", i, MSA(&ms[i]));
-			}
-
-			nr_inos = differing_inos(inos, &arr, &rms.start, &rms.last, ms, nr);
-
-			if (nr_inos > 0) {
-				prdebug("diff inos %d:", nr_inos);
-				for (i = 0; i < nr_inos; i++)
-					prdebug(" [%u] %llu", i, inos[i]);
-
-				ret = expand_array(&arr, nr_inos) ?:
-				      read_inode_info(fd, buf, &arr, inos, nr_inos);
-				if (ret < 0)
-					goto out;
-			}
-
-			stats.inodes = arr.nr;
-			print_stats();
-
-			rms.start = rms.last;
-			inc_ms(&rms.start);
-
-		} while (rms.last.meta_seq != UINT64_MAX || rms.last.ino != UINT64_MAX);
-
-
-		sleep(1);
-	}
-
-	ret = 0;
-out:
-	if (fd >= 0)
-		close(fd);
-
-	free(inos);
-	free(buf);
-	free(arr.ms);
-	free(opts.names);
-
-	return ret;
-}
@@ -0,0 +1,86 @@
+#
+# Test basic scoutfs-nfs interactions:
+# - read/write
+# - stage/release and data wait
+# - nfs setacl/getacl mapping
+#
+
+t_require_commands scoutfs setfacl getfacl exportfs mount.nfs umount \
+		   stat dd cmp systemctl
+
+systemctl start nfs-server >> "$T_TMPDIR/nfs.log" 2>&1 || \
+	t_skip "nfs-server not available"
+
+# Keep file creation modes deterministic for the ACL golden output.
+umask 022
+
+EXPORT_OPTS="rw,async,no_root_squash,no_subtree_check,fsid=42"
+NFS_MNT="$T_TMP.nfs"
+NFS_DIR="$NFS_MNT/test/basic-nfs"
+
+filter() { sed "s@$T_TMPDIR@T_TMPDIR@g" | t_filter_fs; }
+gf() { getfacl -n --omit-header "$@" 2>/dev/null; }
+
+teardown_nfs()
+{
+	(
+		umount "$NFS_MNT"
+		exportfs -u "127.0.0.1:$T_M0"
+		exportfs -f
+		systemctl stop nfs-server
+		rmdir "$NFS_MNT"
+	) >> "$T_TMPDIR/nfs.log" 2>&1
+}
+trap teardown_nfs EXIT
+
+exportfs -u "127.0.0.1:$T_M0" >> "$T_TMPDIR/nfs.log" 2>&1 || true
+t_quiet mkdir -p "$NFS_MNT"
+exportfs -o "$EXPORT_OPTS" "127.0.0.1:$T_M0" >> "$T_TMPDIR/nfs.log" 2>&1
+mount.nfs -o vers=3,noac,actimeo=0 "127.0.0.1:$T_M0" "$NFS_MNT" >> "$T_TMPDIR/nfs.log" 2>&1
+
+test -d "$NFS_DIR" || t_fail "test dir $NFS_DIR not visible over NFS"
+
+echo "== write via NFS, read both sides"
+dd if=/dev/urandom bs=4096 count=1 of="$T_TMP.data" status=none
+cp "$T_TMP.data" "$NFS_DIR/file"
+cmp "$T_TMP.data" "$T_D0/file"
+cmp "$T_TMP.data" "$NFS_DIR/file"
+
+echo "== POSIX ACL set via NFS, read both sides"
+setfacl -m u:22222:rw "$NFS_DIR/file" 2>&1 | filter
+gf "$NFS_DIR/file"
+gf "$T_D0/file"
+
+echo "== POSIX ACL set on scoutfs, read via NFS"
+setfacl -m g:44444:r "$T_D0/file" 2>&1 | filter
+gf "$NFS_DIR/file"
+
+echo "== default ACL inheritance via NFS"
+mkdir "$NFS_DIR/d"
+setfacl -d -m u:22222:rwx "$NFS_DIR/d" 2>&1 | filter
+touch "$NFS_DIR/d/child"
+gf "$NFS_DIR/d/child"
+
+echo "== NFS read demand-stages a released file"
+dd if=/dev/urandom bs=4096 count=1 of="$T_TMP.big" status=none
+cp "$T_TMP.big" "$T_D0/big"
+sync
+vers=$(scoutfs stat -s data_version "$T_D0/big")
+t_quiet scoutfs release "$T_D0/big" -V "$vers" -o 0 -l 4K
+
+# NFS read against the offline file blocks in scoutfs_read waiting
+# for the data to come back online.
+cat "$NFS_DIR/big" > "$T_TMP.read" &
+read_pid=$!
+sleep 1
+scoutfs data-waiting -B 0 -I 0 -p "$T_D0" | wc -l
+
+t_quiet scoutfs stage "$T_TMP.big" "$T_D0/big" -V "$vers" -o 0 -l 4096
+wait "$read_pid"
+cmp "$T_TMP.big" "$T_TMP.read"
+
+echo "== cleanup"
+rm -f "$T_D0/file" "$T_D0/big"
+rm -rf "$T_D0/d"
+
+t_pass
@@ -1,184 +0,0 @@
-#
-# Test our basic ability to work with different format versions.
-#
-# The current code being tested has a range of supported format
-# versions.   For each of the older supported format versions we have a
-# git hash of the commit before the next greater version was introduced.
-# We build versions of the scoutfs utility and kernel module for the
-# last commit in tree that had a lesser supported version as its max
-# supported version.   We use those binaries to test forward and back
-# compat as new and old code works with a persistent volume with a given
-# format version.
-#
-
-# not supported on el8 or higher
-if [ $(source /etc/os-release ; echo ${VERSION_ID:0:1}) -gt 7 ]; then
-	t_skip_permitted "Unsupported OS version"
-fi
-
-mount_has_format_version()
-{
-	local mnt="$1"
-	local vers="$2"
-	local sysfs_fmt_vers="$(t_sysfs_path_from_mnt $SCR)/format_version"
-
-	test "$(cat $sysfs_fmt_vers)" == "$vers"
-}
-
-SCR="/mnt/scoutfs.scratch"
-
-MIN=$(modinfo $T_MODULE | awk '($1 == "scoutfs_format_version_min:"){print $2}')
-MAX=$(modinfo $T_MODULE | awk '($1 == "scoutfs_format_version_max:"){print $2}')
-
-echo "min: $MIN max: $MAX" > "$T_TMP.log"
-
-test "$MIN" -gt 0 -a "$MAX" -gt 0 -a "$MIN" -le "$MAX" || \
-	t_fail "parsed bad versions, min: $MIN max: $MAX"
-
-test "$MIN" == "$MAX" && \
-	t_skip "only one supported format version: $MIN"
-
-# prepare dir and wipe any weird old partial state
-builds="$T_RESULTS/format_version_builds"
-mkdir -p "$builds"
-
-echo "== ensuring utils and module for old versions"
-declare -A commits
-commits[1]=c3c4b080
-for vers in $(seq $MIN $((MAX - 1))); do
-	dir="$builds/$vers"
-	platform=$(uname -rp)
-	buildmark="$dir/buildmark"
-	commit="${commits[$vers]}"
-
-	test -n "$commit" || \
-		t_fail "no commit for vers $vers"
-
-	# have our files for this version
-	test "$(cat $buildmark 2>&1)" == "$platform" && \
-		continue
-
-	# build as one big sequence of commands that can return failure
-	(
-		set -o pipefail
-
-		rm -rf $dir							&&
-		mkdir -p $dir/building						&&
-		cd "$T_TESTS/.."						&&
-		git archive --format=tar "$commit" | tar -C "$dir/building" -xf - &&
-		cd -								&&
-		find $dir							&&
-		make -C "$dir/building"						&&
-		mv $dir/building/utils/src/scoutfs $dir				&&
-		mv $dir/building/kmod/src/scoutfs.ko $dir			&&
-		rm -rf $dir/building						&&
-		echo "$platform" > $buildmark					&&
-		find $dir							&&
-		cat $buildmark
-	) >> "$T_TMP.log" 2>&1 || t_fail "version $vers build failed"
-done
-
-echo "== unmounting test fs and removing test module"
-t_quiet t_umount_all
-t_quiet rmmod scoutfs
-
-echo "== testing combinations of old and new format versions"
-mkdir -p "$SCR"
-for vers in $(seq $MIN $((MAX - 1))); do
-	old_scoutfs="$builds/$vers/scoutfs"
-	old_module="$builds/$vers/scoutfs.ko"
-
-	echo "mkfs $vers" >> "$T_TMP.log"
-	t_quiet $old_scoutfs mkfs -f -Q 0,127.0.0.1,$T_SCRATCH_PORT "$T_EX_META_DEV" "$T_EX_DATA_DEV" \
-		|| t_fail "mkfs $vers failed"
-
-	echo "mount $vers with $vers" >> "$T_TMP.log"
-	t_quiet insmod $old_module
-	t_quiet mount -t scoutfs -o metadev_path=$T_EX_META_DEV,quorum_slot_nr=0 \
-		"$T_EX_DATA_DEV" "$SCR"
-	t_quiet mount_has_format_version "$SCR" "$vers"
-
-	echo "creating files in $vers" >> "$T_TMP.log"
-	t_quiet touch "$SCR/file-"{1,2,3}
-	stat "$SCR"/file-* > "$T_TMP.stat" || \
-		t_fail "stat in $vers failed"
-
-	echo "remounting $vers fs with $MAX" >> "$T_TMP.log"
-	t_quiet umount "$SCR"
-	rmmod scoutfs
-	insmod "$T_MODULE"
-	t_quiet mount -t scoutfs -o metadev_path=$T_EX_META_DEV,quorum_slot_nr=0 \
-		"$T_EX_DATA_DEV" "$SCR"
-	t_quiet mount_has_format_version "$SCR" "$vers"
-
-	echo "verifying stat in $vers with $MAX" >> "$T_TMP.log"
-	diff -u "$T_TMP.stat" <(stat "$SCR"/file-*)
-
-	echo "keep/update/del existing, create new in $vers" >> "$T_TMP.log"
-	t_quiet touch "$SCR/file-2"
-	t_quiet rm -f "$SCR/file-3"
-	t_quiet touch "$SCR/file-4"
-	stat "$SCR"/file-* > "$T_TMP.stat" || \
-		t_fail "stat in $vers failed"
-
-	echo "remounting $vers fs with $vers" >> "$T_TMP.log"
-	t_quiet umount "$SCR"
-	rmmod scoutfs
-	insmod "$old_module"
-	t_quiet mount -t scoutfs -o metadev_path=$T_EX_META_DEV,quorum_slot_nr=0 \
-		"$T_EX_DATA_DEV" "$SCR"
-	t_quiet mount_has_format_version "$SCR" "$vers"
-
-	echo "verifying stat in $vers with $vers" >> "$T_TMP.log"
-	diff -u "$T_TMP.stat" <(stat "$SCR"/file-*)
-
-	echo "changing format vers to $MAX" >> "$T_TMP.log"
-	t_quiet umount "$SCR"
-	rmmod scoutfs
-	t_quiet scoutfs change-format-version -F -V $MAX $T_EX_META_DEV "$T_EX_DATA_DEV"
-
-	echo "mount fs $MAX with old $vers should fail" >> "$T_TMP.log"
-	insmod "$old_module"
-	mount -t scoutfs -o metadev_path=$T_EX_META_DEV,quorum_slot_nr=0 \
-		"$T_EX_DATA_DEV" "$SCR" >> "$T_TMP.log" 2>&1
-	if [ "$?" == "0" ]; then
-		umount "$SCR"
-		t_fail "old code ver $vers able to mount new ver $MAX"
-	fi
-
-	echo "remounting $MAX fs with $MAX" >> "$T_TMP.log"
-	rmmod scoutfs
-	insmod "$T_MODULE"
-	t_quiet mount -t scoutfs -o metadev_path=$T_EX_META_DEV,quorum_slot_nr=0 \
-		"$T_EX_DATA_DEV" "$SCR"
-	t_quiet mount_has_format_version "$SCR" "$MAX"
-
-	echo "verifying stat in $MAX with $MAX" >> "$T_TMP.log"
-	diff -u "$T_TMP.stat" <(stat "$SCR"/file-*)
-
-	echo "keep/update/del existing, create new in $MAX" >> "$T_TMP.log"
-	t_quiet touch "$SCR/file-2"
-	t_quiet rm -f "$SCR/file-4"
-	t_quiet touch "$SCR/file-5"
-	stat "$SCR"/file-* > "$T_TMP.stat" || \
-		t_fail "stat in $MAX failed"
-
-	echo "remounting $MAX fs with $MAX again" >> "$T_TMP.log"
-	t_quiet umount "$SCR"
-	t_quiet mount -t scoutfs -o metadev_path=$T_EX_META_DEV,quorum_slot_nr=0 \
-		"$T_EX_DATA_DEV" "$SCR"
-	t_quiet mount_has_format_version "$SCR" "$MAX"
-
-	echo "verifying stat in $MAX with $MAX again" >> "$T_TMP.log"
-	diff -u "$T_TMP.stat" <(stat "$SCR"/file-*)
-
-	echo "done with old vers $vers" >> "$T_TMP.log"
-	t_quiet umount "$SCR"
-	rmmod scoutfs
-done
-
-echo "== restoring test module and mount"
-insmod "$T_MODULE"
-t_mount_all
-
-t_pass
@@ -0,0 +1,70 @@
+#
+# Regression for the BUG_ON in scoutfs_quota_invalidate when a concurrent
+# ruleset read on one mount races with a quota rule modification.
+#
+
+t_require_mounts 2
+
+TEST_UID=22222
+SET_UID="--ruid=$TEST_UID --euid=$TEST_UID"
+
+echo "== setup"
+mkdir -p "$T_D0/dir"
+chown --quiet $TEST_UID "$T_D0/dir"
+
+# totl xattr gives quota checks something to consult
+setfattr -n scoutfs.totl.test.1.1.1 -v 1 "$T_D0/dir"
+
+echo "== concurrent quota mod and check across mounts"
+
+(
+	for i in $(seq 1 20); do
+		scoutfs quota-add -p "$T_M0" \
+			-r "1 1,L,- 1,L,- $i,L,- I 999999 -" 2>/dev/null
+		scoutfs quota-del -p "$T_M0" \
+			-r "1 1,L,- 1,L,- $i,L,- I 999999 -" 2>/dev/null
+	done
+) &
+MOD_PID=$!
+
+# same mount as the mod: races local read against invalidate
+(
+	for i in $(seq 1 50); do
+		setpriv $SET_UID touch "$T_D0/dir/race0_$i" 2>/dev/null
+		rm -f "$T_D0/dir/race0_$i"
+	done
+) &
+CHECK0_PID=$!
+
+# other mount: drives cross-node lock traffic
+(
+	for i in $(seq 1 50); do
+		setpriv $SET_UID touch "$T_D1/dir/race1_$i" 2>/dev/null
+		rm -f "$T_D1/dir/race1_$i"
+	done
+) &
+CHECK1_PID=$!
+
+t_quiet wait $MOD_PID
+t_quiet wait $CHECK0_PID
+t_quiet wait $CHECK1_PID
+
+echo "== verify quota rules are consistent after race"
+scoutfs quota-wipe -p "$T_M0"
+scoutfs quota-list -p "$T_M0"
+
+echo "== verify file creation still works under quota"
+scoutfs quota-add -p "$T_M0" -r "1 1,L,- 1,L,- 1,L,- I 999999 -"
+sync
+echo 1 > $(t_debugfs_path)/drop_weak_item_cache
+echo 1 > $(t_debugfs_path)/drop_quota_check_cache
+setpriv $SET_UID touch "$T_D0/dir/verify_file"
+test -f "$T_D1/dir/verify_file" && echo "file visible on mount 1"
+rm -f "$T_D0/dir/verify_file"
+scoutfs quota-wipe -p "$T_M0"
+
+echo "== cleanup"
+setfattr -x scoutfs.totl.test.1.1.1 "$T_D0/dir"
+rm -rf "$T_D0/dir"
+
+t_pass
@@ -0,0 +1,43 @@
+#
+# Exercise the SCOUTFS_IOC_INJECT_TOTL_DELTA ioctl that injects totl
+# deltas directly via totl-delta-inject(1).
+#
+
+t_require_commands setfattr scoutfs sync rm touch totl-delta-inject
+
+# force a log merge then read-xattr-totals filtered to our own keys
+read_totals()
+{
+	t_force_log_merge
+	sync
+	echo 1 > $(t_debugfs_path)/drop_weak_item_cache
+	scoutfs read-xattr-totals -p "$T_M0" | \
+		grep -E '^8888\.' || true
+}
+
+echo "== setup three files contributing to totl 8888.0.0"
+touch "$T_D0/f1" "$T_D0/f2" "$T_D0/f3"
+setfattr -n scoutfs.totl.inj.8888.0.0 -v 2  "$T_D0/f1"
+setfattr -n scoutfs.totl.inj.8888.0.0 -v 8  "$T_D0/f2"
+setfattr -n scoutfs.totl.inj.8888.0.0 -v 32 "$T_D0/f3"
+
+echo "== merge baseline into fs_root"
+read_totals
+
+echo "== inject (+128, +2) unbalances totl 8888.0.0"
+totl-delta-inject "$T_M0" 8888.0.0 128 2
+read_totals
+
+echo "== unlink f3 (value 32) produces a -32/-1 delta"
+rm -f "$T_D0/f3"
+read_totals
+
+echo "== inject (-128, -2) restores accounting for the remaining files"
+totl-delta-inject "$T_M0" 8888.0.0 -128 -2
+read_totals
+
+echo "== cleanup"
+rm -f "$T_D0/f1" "$T_D0/f2"
+read_totals
+
+t_pass
Author	SHA1	Message	Date
Auke Kok	99b87371c6	Drop KC_HAS_SET_ACL RHEL7 was the only conditional user of this define, but since support for that is removed, these can be dropped. Signed-off-by: Auke Kok <auke.kok@versity.com>	2026-06-08 11:53:16 -07:00
Auke Kok	d929b06292	Collapse scoutfs_rename2 into scoutfs_rename_common. scoutfs_rename2 was a thin shim that validated flags and forwarded to scoutfs_rename_common. It existed because the old el7 RHEL_IOPS_WRAPPER path used a non-flag-taking .rename op alongside .rename2; with that path gone there is only one rename method, and the wrapper has no purpose. Move the RENAME_NOREPLACE flag validation into scoutfs_rename_common and point the directory inode_operations .rename slot at it directly. The symlink inode_operations already used scoutfs_rename_common, so this also makes symlink rename consistently reject unknown flags instead of silently accepting them. Signed-off-by: Auke Kok <auke.kok@versity.com>	2026-06-08 11:27:46 -07:00
Auke Kok	f4a28f1f04	Remove KC_STACK_TRACE_SAVE compat for el7. el8 already provides stack_trace_save() and stack_trace_print() in linux/stacktrace.h, so the legacy save_stack_trace/print_stack_trace fallback inlines are dead. Drop the detection stanza and the inlines. Signed-off-by: Auke Kok <auke.kok@versity.com>	2026-06-08 11:27:33 -07:00
Auke Kok	56526e617f	Remove KC_MM_VM_FAULT_T compat for el7. el8 already provides vm_fault_t and vmf_error(), so the fallback typedef and inline are dead. Drop the detection stanza and the two function-signature ifdefs in data.c that switched between the pre-v4.11 and modern fault handler prototypes. Signed-off-by: Auke Kok <auke.kok@versity.com>	2026-06-05 09:49:45 -07:00
Auke Kok	77b8d3cb7e	Remove format-version 1 test. Only el7 was capable of testing this formatversion. And there no longer is el7 support. Remove the test. Signed-off-by: Auke Kok <auke.kok@versity.com>	2026-06-05 09:49:45 -07:00
Auke Kok	77156d695f	Remove list_lru_walk_* el7 compat. The original patch added compat for both el7 and higher kernels, so this just drops the el7 parts. Signed-off-by: Auke Kok <auke.kok@versity.com>	2026-06-05 09:49:45 -07:00
Auke Kok	9f6d46ab66	Remove list_lru_shrink_count() and list_lru_shrink_walk() compat for el7 Signed-off-by: Auke Kok <auke.kok@versity.com>	2026-06-05 09:49:45 -07:00
Auke Kok	0b669980d0	Remove .remap_pages method for el7 Signed-off-by: Auke Kok <auke.kok@versity.com>	2026-06-05 09:49:45 -07:00
Auke Kok	99eb9c2c45	Remove kc_inode_dio_end compat for el7. Signed-off-by: Auke Kok <auke.kok@versity.com>	2026-06-05 09:49:45 -07:00
Auke Kok	2ce496167c	Remove __kernel_old_timeval compat for el7. Signed-off-by: Auke Kok <auke.kok@versity.com>	2026-06-05 09:49:45 -07:00
Auke Kok	e7c8881ca4	Remove KC_XATTR_HANDLER_NAME compat for el7 Every kernel now supports .name instead of .prefix. Signed-off-by: Auke Kok <auke.kok@versity.com>	2026-06-05 09:49:45 -07:00
Auke Kok	505d7f8198	Remove el7 RHEL_IOPS_WRAPPER and KC_LINUX_AIO_KI_LEFT These were kind of globbed together. This removes a large amount of duplicate method definitions. Signed-off-by: Auke Kok <auke.kok@versity.com>	2026-06-05 09:49:45 -07:00
Auke Kok	32845f85c3	Remove .aio_read and .aio_write methods. This removes two large duplicate code blocks. Signed-off-by: Auke Kok <auke.kok@versity.com>	2026-06-05 09:49:45 -07:00
Auke Kok	1af1040631	Remove KC_FILE_AOPS_READAHEAD compat for el7 Thanks to RH backporting this 5.x kernel feature, we can drop a large chunk of code here. Signed-off-by: Auke Kok <auke.kok@versity.com>	2026-06-05 09:49:45 -07:00
Auke Kok	c81832a2c6	Remove MS_* (mount) flags for el7 These are obsolete, we won't need their backwards compatible versions anymore. Signed-off-by: Auke Kok <auke.kok@versity.com>	2026-06-05 09:49:45 -07:00
Auke Kok	986efe1ce1	All workqueues are non-reentrant since el8 Signed-off-by: Auke Kok <auke.kok@versity.com>	2026-06-05 09:49:45 -07:00
Auke Kok	2944ea6424	remove generic_file_buffered_write backport for el7 We still need to keep KC_GENERIC_PERFORM_WRITE_KIOCB_IOV_ITER for el8. So kc_generic_perform_write remains in place for now. Signed-off-by: Auke Kok <auke.kok@versity.com>	2026-06-05 09:49:45 -07:00
Auke Kok	16af2fe5ff	Remove KC_CPU_NOTIFIER compat for el7 Signed-off-by: Auke Kok <auke.kok@versity.com>	2026-06-05 09:49:45 -07:00
Auke Kok	99415423ae	Remove timespec64 compat for el7 Signed-off-by: Auke Kok <auke.kok@versity.com>	2026-06-05 09:49:45 -07:00
Auke Kok	c83e37c1c7	Remove kc_sock_create_kern compat for el7 Signed-off-by: Auke Kok <auke.kok@versity.com>	2026-06-05 09:49:45 -07:00
Auke Kok	d611cf1368	Remove kc_get_sock/peername compat for el7 Signed-off-by: Auke Kok <auke.kok@versity.com>	2026-06-05 09:49:45 -07:00
Auke Kok	a3b77c224f	Remove KC_XATTR_STRUCT_XATTR_HANDLER compat for el7 This includes removal of a lot of double function definitions. Signed-off-by: Auke Kok <auke.kok@versity.com>	2026-06-05 09:49:45 -07:00
Auke Kok	1824980d29	Remove KC_BIO_BI_STATUS compat for el7 Unwrap the remaining stubs. Signed-off-by: Auke Kok <auke.kok@versity.com>	2026-06-05 09:49:45 -07:00
Auke Kok	166238abc7	Remove KC_BIO_BI_OPF compat for el7 Unwrap the remaining stubs everywhere used. Signed-off-by: Auke Kok <auke.kok@versity.com>	2026-06-05 09:49:45 -07:00
Auke Kok	84a3e73086	Remove memalloc_noio_save/_restore compath for el7 Signed-off-by: Auke Kok <auke.kok@versity.com>	2026-06-05 09:49:45 -07:00
Auke Kok	7c0c1e6f38	Remove __percpu_counter_add compat for el7 Signed-off-by: Auke Kok <auke.kok@versity.com>	2026-06-05 09:49:45 -07:00
Auke Kok	276034f89d	Remove iversion.h compat for el7 For simplicity we just include the header here now that we don't need it in the kernelcompat anymore. Signed-off-by: Auke Kok <auke.kok@versity.com>	2026-06-05 09:49:45 -07:00
Auke Kok	9102dc0136	Remove setattr_prepare() compat for el7 Signed-off-by: Auke Kok <auke.kok@versity.com>	2026-06-05 09:49:45 -07:00
Auke Kok	94d06e65bc	Remove KC_RB_TREE_AUGMENTED_COMPUTE_MAX compat for el7 Signed-off-by: Auke Kok <auke.kok@versity.com>	2026-06-05 09:49:45 -07:00
Auke Kok	fcf94498fb	Remove kc_posix_acl_valid compat. This is a pre-el7 remnant, possibly from a really old rhel9 version, and was already effectively a stub. Signed-off-by: Auke Kok <auke.kok@versity.com>	2026-06-05 09:49:45 -07:00
Auke Kok	3d9a7918c4	Remove KC_FMODE_KABI_ITERATE compat for el7 Signed-off-by: Auke Kok <auke.kok@versity.com>	2026-06-05 09:49:45 -07:00
Auke Kok	64587c505a	Remove backing_dev_info compat for el7 Signed-off-by: Auke Kok <auke.kok@versity.com>	2026-06-05 09:49:45 -07:00
Auke Kok	31c9ea40ce	Remove shinker compat for el7 Signed-off-by: Auke Kok <auke.kok@versity.com>	2026-06-05 09:49:45 -07:00
Auke Kok	e1a8b17e0c	Remove posix_acl_create We're now using __posix_acl_create instead. Signed-off-by: Auke Kok <auke.kok@versity.com>	2026-06-05 09:49:45 -07:00
Auke Kok	845b43d29c	Remove use of d_materialise_unique This is no longer used. We always d_splice_alias. Signed-off-by: Auke Kok <auke.kok@versity.com>	2026-06-05 09:49:45 -07:00
Zach Brown	9ea53a160f	Merge pull request #320 from versity/auke/v1_32_release v1.32 Release	2026-06-03 11:51:18 -07:00
Auke Kok	cd73319d4d	v1.32 Release Finish the release notes for the 1.32 release. Signed-off-by: Auke Kok <auke.kok@versity.com>	2026-06-03 11:34:21 -07:00
Zach Brown	24aeb0175f	Merge pull request #319 from versity/auke/nfs_setfacl nfs setfacl + test (needs nfs-utils)	2026-06-03 10:06:17 -07:00
Auke Kok	f8f661d79c	Add basic NFS tests. This depends on nfs-utils being installed on the host. Without it it will skip, and count as a failure. It starts nfs-server and does a bare exportfs. - Tests basic read/write/stage/release/data wait. - Tests setfacl/getfacl. Signed-off-by: Auke Kok <auke.kok@versity.com>	2026-05-28 12:01:29 -07:00
Zach Brown	07e90422ee	Merge pull request #293 from versity/auke/data_prealloc_min scaling prealloc.	2026-05-28 09:40:31 -07:00
Zach Brown	634ca720c9	Merge pull request #318 from versity/auke/timer_container_of Use timer_container_of with fallback for from_timer -> el9.8 support	2026-05-28 09:38:19 -07:00
Auke Kok	fa560016d4	Register .set_acl unconditionally to fix POSIX ACL writes over NFS Scoutfs has supported posix ACLs through the xattr handler table, which allowed NFS to fetch them through this sideband, which worked for older kernels. With recent changes we've pulled in .get_acl because the mainline kernel is changing how ACL ops are called. But we still left .set_acl unreachable. This meant that on el9.7 nfs clients could now reach .get_acl, but still not set them. With this change, we're finally exposing .set_acl consistently across all el releases and allowing nfs clients to both get and set posix ACLs. Signed-off-by: Auke Kok <auke.kok@versity.com>	2026-05-28 09:02:47 -07:00
Auke Kok	1f1e3e9c6a	Use timer_container_of with fallback for from_timer El9.8 backported the upstream v6.15.* rename of from_timer to timer_container_of. Switch the two callers in fence.c and recov.c to the new style and add a simple kcompat define for older kernels. Signed-off-by: Auke Kok <auke.kok@versity.com>	2026-05-26 17:45:53 -04:00
Auke Kok	5a6523ecf4	Ramping up data preallocation Ramps up data preallocation based on the number of online blocks. This results in a simple 2<<n block allocation pattern until n=11 (2048) - the default value of data_prealloc_blocks. Signed-off-by: Auke Kok <auke.kok@versity.com>	2026-05-19 19:18:53 -07:00
Zach Brown	fece0a9372	Merge pull request #310 from versity/zab/v1.31 v1.31 Release	2026-05-06 10:37:07 -07:00
Zach Brown	aa432727f2	v1.31 Release Finish the release notes for the 1.31 release. Signed-off-by: Zach Brown <zab@versity.com>	2026-05-05 14:29:18 -07:00
Zach Brown	ceebadd139	Merge pull request #308 from versity/auke/totl-delta-repair totl key repair	2026-05-05 13:05:57 -07:00
Zach Brown	4b4ddc9ded	Merge pull request #298 from versity/auke/double_unlock_dw_truncate Fix double unlock in scoutfs_setattr data_wait error path	2026-05-04 09:52:29 -07:00
Zach Brown	94d3ece590	Merge pull request #299 from versity/auke/cond_resched_block_free Add cond_resched in block_free_work	2026-05-04 09:49:43 -07:00
Auke Kok	6d5517614b	Fix double unlock in scoutfs_setattr data_wait error path When scoutfs_setattr truncates a file with offline extents, it unlocks the inode lock before calling scoutfs_data_wait to wait for the data to be staged. If data_wait returns any error, the code jumps to 'goto out' which calls scoutfs_unlock again, thus double-unlocking the lock. Signed-off-by: Auke Kok <auke.kok@versity.com>	2026-05-04 09:48:54 -07:00
Auke Kok	10279d0b23	Add test exercising the totl delta inject ioctl. Skews a totl twice, restore it, and intersperse setfattr/unlink to exercise both injected and naturally-produced deltas. Signed-off-by: Auke Kok <auke.kok@versity.com>	2026-05-04 09:43:01 -07:00
Zach Brown	443c34309f	Merge pull request #303 from versity/auke/clang_build_werr 3 minor clang things	2026-05-04 09:42:43 -07:00
Auke Kok	5c81a979d5	Add SCOUTFS_IOC_INJECT_TOTL_DELTA ioctl. Inject a signed (total, count) delta at a totl key. No validity checking. Requires CAP_SYS_ADMIN. Signed-off-by: Auke Kok <auke.kok@versity.com>	2026-05-04 09:42:42 -07:00
Zach Brown	ec38b6e1c8	Merge pull request #305 from versity/auke/block_submit_bio_err Set BLOCK_BIT_ERROR on bio submit failure during forced unmount	2026-05-04 09:35:43 -07:00
Zach Brown	8e0066b231	Merge pull request #309 from versity/auke/quota_invalidate_race fix and test - quota invalidate race	2026-05-04 09:34:26 -07:00
Zach Brown	a0fda5b735	Merge pull request #307 from versity/zab/next_merge_range_zero Search all merge range items for next	2026-05-04 09:29:54 -07:00
Auke Kok	fc56a69d8f	Add quota invalidate race regression test Run concurrent quota add/del on one mount against rapid file creation and deletion on both mounts to exercise the race fixed in the previous commit. Signed-off-by: Auke Kok <auke.kok@versity.com>	2026-05-02 13:19:31 -07:00
Auke Kok	c8bc42ccdb	Fix quota invalidate race with concurrent ruleset read A quota check holds the quota cluster lock for READ and marks the cached ruleset EBUSY while loading rules. A quota mod on the same mount holds the lock for WRITE (compatible with the local READ) and calls scoutfs_quota_invalidate(), tripping BUG_ON(rs == ERR_PTR(-EBUSY)). Make invalidate skip EBUSY so the reader's claim is preserved, and have scoutfs_quota_mod_rule wait for the reader to finish before calling invalidate. Without the wait, the in-flight reader would publish its stale ruleset after invalidate runs, leaving the cache stale until the next invalidation. Signed-off-by: Auke Kok <auke.kok@versity.com>	2026-05-02 13:19:31 -07:00
Zach Brown	4db0a48fe4	Search all merge range items for next When searching for the next least merge range we need to sweep all the stored items because they're interleaved with respect to key sorting because we've clobbered the zone. To search all of them we need to start from 0, not from the caller's start key after setting the zone. If the caller happens to provide a start key with a small zone but large other fields (totl keys with sufficiently large identifiers) we can miss ranges. Signed-off-by: Zach Brown <zab@zabbo.net>	2026-04-29 10:17:38 -07:00
Auke Kok	ac1ab8e87f	Add cond_resched in block_free_work I'm seeing consistent CPU soft lockups in block_free_work on my bare metal system that aren't reached by VM instances. The reason is that the bare metal machine has a ton more memory available causing the block free work queue to grow much larger in size, and then it has so much work that it can take 30+ seconds before it goes through it all. This is all with a debug kernel. A non debug kernel will likely zoom through the outstanding work here at a much faster rate. Signed-off-by: Auke Kok <auke.kok@versity.com>	2026-04-22 13:39:32 -07:00
Auke Kok	8bfd35db0b	Set BLOCK_BIT_ERROR on bio submit failure during forced unmount block_submit_bio will return -ENOLINK if called during a forced shutdown, the bio is never submitted, and thus no completion callback will fire to set BLOCK_BIT_ERROR. Any other task waiting for this specific bp will end up waiting forever. To fix, fall through to the existing block_end_io call on the error path instead of returning directly. That means moving the forcing_unmount check past the setup calls so block_end_io's bookkeeping stays balanced. block_end_io then sets BLOCK_BIT_ERROR and wakes up waiters just as it would on a failed async completion. Signed-off-by: Auke Kok <auke.kok@versity.com>	2026-04-20 17:01:12 -07:00
Auke Kok	019125d86d	Don't swallow invalid message error A malformed message encountered here increases the counter, but doesn't tear down the connection because of the nested for loops. The comments indicate that that is the expected behavior - a misbehaving client should not be tolerated. Signed-off-by: Auke Kok <auke.kok@versity.com>	2026-04-15 17:02:40 -07:00
Auke Kok	347e27acec	Fix leak in client side lock invalidation Clang's scan-build found this leak when we get an invalidation for a lock we no longer have. Free ireq to fix. Signed-off-by: Auke Kok <auke.kok@versity.com>	2026-04-15 16:35:10 -07:00
Auke Kok	3ce5d47f2c	Initialize resp_data to silence clang uninitialized warning Clang flow analysis flags resp_data in process_response as possibly uninitialized when find_request returns NULL. kmod/src/net.c:533:6: error: variable 'resp_data' is used uninitialized whenever 'if' condition is false [-Werror,-Wsometimes-uninitialized] In practice the read is harmless because resp_func stays NULL in that path and call_resp_func only dereferences resp_data when resp_func is non-NULL. Initialize at declaration. Signed-off-by: Auke Kok <auke.kok@versity.com>	2026-04-15 14:06:46 -07:00