Remove update_finalized_and_inc_nr() missed during rebase

Signed-off-by: Chris Kirby <ckirby@versity.com>
Finalize open log tree in one commit
2026-06-09 21:22:36 +00:00 · 2025-11-03 14:42:27 -06:00 · 2025-11-03 14:17:32 -06:00 · 2025-11-03 14:16:52 -06:00 · 2025-10-31 09:08:17 -07:00 · 2025-10-30 14:47:18 -07:00
29 changed files with 638 additions and 126 deletions
@@ -14,6 +14,7 @@ scoutfs-y +=			\
 	alloc.o			\
 	block.o			\
 	btree.o			\
+	check.o			\
 	client.o		\
 	counters.o		\
 	data.o			\
@@ -278,6 +278,14 @@ ifneq (,$(shell grep 'int ..mknod. .struct user_namespace' include/linux/fs.h))
 ccflags-y += -DKC_VFS_METHOD_USER_NAMESPACE_ARG
 endif

+#
+# v6.2-rc1-2-gabf08576afe3
+#
+# fs: vfs methods use struct mnt_idmap instead of struct user_namespace
+ifneq (,$(shell grep 'int vfs_mknod.struct mnt_idmap' include/linux/fs.h))
+ccflags-y += -DKC_VFS_METHOD_MNT_IDMAP_ARG
+endif
+
 #
 # v5.17-rc2-21-g07888c665b40
 #
@@ -462,3 +470,19 @@ ifneq (,$(shell grep 'struct list_lru_one \*list, spinlock_t \*lock, void \*cb_a
 ccflags-y += -DKC_LIST_LRU_WALK_CB_LIST_LOCK
 endif

+#
+# v5.1-rc4-273-ge9b98e162aa5
+#
+# introduce stack trace helpers
+#
+ifneq (,$(shell grep '^unsigned int stack_trace_save' include/linux/stacktrace.h))
+ccflags-y += -DKC_STACK_TRACE_SAVE
+endif
+
+# v6.1-rc1-4-g7420332a6ff4
+#
+# .get_acl() method now has dentry arg (and mnt_idmap). The old get_acl has been renamed
+# to get_inode_acl() and is still available as well, but has an extra rcu param.
+ifneq (,$(shell grep 'struct posix_acl ...get_acl..struct mnt_idmap ., struct dentry' include/linux/fs.h))
+ccflags-y += -DKC_GET_ACL_DENTRY
+endif
@@ -107,8 +107,15 @@ struct posix_acl *scoutfs_get_acl_locked(struct inode *inode, int type, struct s
 	return acl;
 }

+#ifdef KC_GET_ACL_DENTRY
+struct posix_acl *scoutfs_get_acl(KC_VFS_NS_DEF
+				  struct dentry *dentry, int type)
+{
+	struct inode *inode = dentry->d_inode;
+#else
 struct posix_acl *scoutfs_get_acl(struct inode *inode, int type)
 {
+#endif
 	struct super_block *sb = inode->i_sb;
 	struct scoutfs_lock *lock = NULL;
 	struct posix_acl *acl;
@@ -201,8 +208,15 @@ out:
 	return ret;
 }

+#ifdef KC_GET_ACL_DENTRY
+int scoutfs_set_acl(KC_VFS_NS_DEF
+		    struct dentry *dentry, struct posix_acl *acl, int type)
+{
+	struct inode *inode = dentry->d_inode;
+#else
 int scoutfs_set_acl(struct inode *inode, struct posix_acl *acl, int type)
 {
+#endif
 	struct super_block *sb = inode->i_sb;
 	struct scoutfs_lock *lock = NULL;
 	LIST_HEAD(ind_locks);
@@ -240,7 +254,12 @@ int scoutfs_acl_get_xattr(struct dentry *dentry, const char *name, void *value,
 	if (!IS_POSIXACL(dentry->d_inode))
 		return -EOPNOTSUPP;

+#ifdef KC_GET_ACL_DENTRY
+	acl = scoutfs_get_acl(KC_VFS_INIT_NS
+			      dentry, type);
+#else
 	acl = scoutfs_get_acl(dentry->d_inode, type);
+#endif
 	if (IS_ERR(acl))
 		return PTR_ERR(acl);
 	if (acl == NULL)
@@ -286,7 +305,11 @@ int scoutfs_acl_set_xattr(struct dentry *dentry, const char *name, const void *v
 		}
 	}

+#ifdef KC_GET_ACL_DENTRY
+	ret = scoutfs_set_acl(KC_VFS_INIT_NS dentry, acl, type);
+#else
 	ret = scoutfs_set_acl(dentry->d_inode, acl, type);
+#endif
 out:
 	posix_acl_release(acl);

@@ -1,9 +1,14 @@
 #ifndef _SCOUTFS_ACL_H_
 #define _SCOUTFS_ACL_H_

+#ifdef KC_GET_ACL_DENTRY
+struct posix_acl *scoutfs_get_acl(KC_VFS_NS_DEF struct dentry *dentry, int type);
+int scoutfs_set_acl(KC_VFS_NS_DEF struct dentry *dentry, struct posix_acl *acl, int type);
+#else
 struct posix_acl *scoutfs_get_acl(struct inode *inode, int type);
-struct posix_acl *scoutfs_get_acl_locked(struct inode *inode, int type, struct scoutfs_lock *lock);
 int scoutfs_set_acl(struct inode *inode, struct posix_acl *acl, int type);
+#endif
+struct posix_acl *scoutfs_get_acl_locked(struct inode *inode, int type, struct scoutfs_lock *lock);
 int scoutfs_set_acl_locked(struct inode *inode, struct posix_acl *acl, int type,
 			   struct scoutfs_lock *lock, struct list_head *ind_locks);
 #ifdef KC_XATTR_STRUCT_XATTR_HANDLER
@@ -857,7 +857,7 @@ static int find_zone_extent(struct super_block *sb, struct scoutfs_alloc_root *r
 		.zone = SCOUTFS_FREE_EXTENT_ORDER_ZONE,
 	};
 	struct scoutfs_extent found;
-	struct scoutfs_extent ext;
+	struct scoutfs_extent ext = {0,};
 	u64 start;
 	u64 len;
 	int nr;
@@ -23,6 +23,7 @@
 #include <linux/random.h>
 #include <linux/sched/mm.h>
 #include <linux/list_lru.h>
+#include <linux/stacktrace.h>

 #include "format.h"
 #include "super.h"
@@ -80,6 +81,8 @@ struct block_private {
 		struct page *page;
 		void *virt;
 	};
+	unsigned int stack_len;
+	unsigned long stack[10];
 };

 #define TRACE_BLOCK(which, bp)									\
@@ -100,7 +103,17 @@ static __le32 block_calc_crc(struct scoutfs_block_header *hdr, u32 size)
 	return cpu_to_le32(calc);
 }

-static struct block_private *block_alloc(struct super_block *sb, u64 blkno)
+static noinline void save_block_stack(struct block_private *bp)
+{
+	bp->stack_len = stack_trace_save(bp->stack, ARRAY_SIZE(bp->stack), 2);
+}
+
+static void print_block_stack(struct block_private *bp)
+{
+	stack_trace_print(bp->stack, bp->stack_len, 1);
+}
+
+static noinline struct block_private *block_alloc(struct super_block *sb, u64 blkno)
 {
 	struct block_private *bp;
 	unsigned int nofs_flags;
@@ -156,6 +169,7 @@ static struct block_private *block_alloc(struct super_block *sb, u64 blkno)
 	atomic_set(&bp->io_count, 0);

 	TRACE_BLOCK(allocate, bp);
+	save_block_stack(bp);

 out:
 	if (!bp)
@@ -1113,6 +1127,19 @@ static unsigned long block_scan_objects(struct shrinker *shrink, struct shrink_c
 	return freed;
 }

+static enum lru_status dump_lru_block(struct list_head *item, struct list_lru_one *list,
+					 void *cb_arg)
+{
+	struct block_private *bp = container_of(item, struct block_private, lru_head);
+
+	printk("blkno %llu refcount 0x%x io_count %d bits 0x%lx\n",
+		bp->bl.blkno, atomic_read(&bp->refcount), atomic_read(&bp->io_count),
+		bp->bits);
+	print_block_stack(bp);
+
+	return LRU_SKIP;
+}
+
 /*
 * Called during shutdown with no other users.  The isolating walk must
 * find blocks on the lru that only have references for presence on the
@@ -1122,11 +1149,19 @@ static void block_shrink_all(struct super_block *sb)
 {
 	DECLARE_BLOCK_INFO(sb, binf);
 	DECLARE_ISOLATE_ARGS(sb, ia);
+	long count;

+	count = DIV_ROUND_UP(list_lru_count(&binf->lru), 128) * 2;
 	do {
 		kc_list_lru_walk(&binf->lru, isolate_lru_block, &ia, 128);
 		shrink_dispose_blocks(sb, &ia.dispose);
-        } while (list_lru_count(&binf->lru) > 0);
+	} while (list_lru_count(&binf->lru) > 0 && --count > 0);
+
+	count = list_lru_count(&binf->lru);
+	if (count > 0) {
+		scoutfs_err(sb, "failed to isolate/dispose %ld blocks", count);
+		kc_list_lru_walk(&binf->lru, dump_lru_block, sb, count);
+	}
 }

 struct sm_block_completion {
@@ -0,0 +1,356 @@
+#include <linux/kernel.h>
+#include <linux/fs.h>
+#include <linux/vmalloc.h>
+#include <linux/bitmap.h>
+
+#include "super.h"
+#include "format.h"
+#include "block.h"
+#include "msg.h"
+#include "avl.h"
+#include "check.h"
+
+struct bit_map {
+	unsigned long *addr;
+	long size;
+	long bytes;
+};
+
+static bool enabled = true;
+
+#define warn_once_disable(sb, cond, fmt, args...)	\
+({							\
+	bool cond_ = (cond);				\
+	static bool warned_ = false;			\
+							\
+	if (cond_ && !warned_) {			\
+		scoutfs_err(sb, "check: " fmt, ##args);	\
+		warned_ = true;				\
+		enabled = false;			\
+	}						\
+							\
+	cond_;						\
+})
+
+static void check_blkno(struct super_block *sb, struct bit_map *map, long nr)
+{
+	if (nr != 0 && !warn_once_disable(sb, nr < 0 || nr >= map->size,
+					  "nr %ld outside map->size %ld", nr, map->size))
+		warn_once_disable(sb, test_and_set_bit(nr, map->addr),
+			           "nr %ld already set", nr);
+}
+
+static void check_extent(struct super_block *sb, struct bit_map *map, u64 start, u64 len)
+{
+	unsigned long nr;
+
+	if (!warn_once_disable(sb, start >= map->size || len > map->size ||
+				   (start + len) > map->size,
+				   "start %llu len %llu oustdie map->size %ld",
+				   start, len, map->size)) {
+
+		nr = find_next_bit(map->addr, map->size, start);
+		warn_once_disable(sb, nr < start + len,
+				  "start %llu len %llu has bits already set, first %lu",
+				  start, len, nr);
+
+		bitmap_set(map->addr, start, len);
+	}
+}
+
+static void check_block_ref(struct super_block *sb, struct bit_map *map,
+			    struct scoutfs_block_ref *ref)
+{
+	check_blkno(sb, map, le64_to_cpu(ref->blkno));
+}
+
+/*
+ * As long as we're not handling errors, we can have this return the
+ * pointer to the block data if it was read successfully.  Everything
+ * else returns null and the caller backs off.
+ */
+static void *read_block_ref(struct super_block *sb, struct bit_map *map,
+			    struct scoutfs_block_ref *ref, u32 magic,
+			    struct scoutfs_block **bl_ret)
+{
+	check_block_ref(sb, map, ref);
+
+	if (ref->blkno != 0 && scoutfs_block_read_ref(sb, ref, magic, bl_ret) == 0)
+		return (*bl_ret)->data;
+
+	return NULL;
+}
+
+/* returns false if caller should stop iterating */
+typedef bool (*check_btree_item_cb)(struct super_block *sb, struct bit_map *map,
+				    struct scoutfs_key *key, void *val, u16 val_len);
+
+/*
+ * We walk the items in key order via the avl so that the item callbacks
+ * can have us stop iterating based on their knowledge of key ordering.
+ */
+static void check_btree_block_ref(struct super_block *sb, struct bit_map *map,
+				  u8 level, struct scoutfs_block_ref *ref,
+				  check_btree_item_cb item_cb)
+{
+	struct scoutfs_block *bl = NULL;
+	struct scoutfs_btree_block *bt;
+	struct scoutfs_btree_item *item;
+	struct scoutfs_avl_node *node;
+	void *val;
+	u16 val_off;
+	u16 val_len;
+
+	if (!(bt = read_block_ref(sb, map, ref, SCOUTFS_BLOCK_MAGIC_BTREE, &bl)))
+		return;
+
+	if (bt->level != level)
+		goto out;
+
+	for (node = scoutfs_avl_first(&bt->item_root);
+	     node != NULL;
+	     node = scoutfs_avl_next(&bt->item_root, node)) {
+		item = container_of(node, struct scoutfs_btree_item, node);
+
+		val_off = le16_to_cpu(item->val_off);
+		val_len = le16_to_cpu(item->val_len);
+		val = (void *)bt + val_off;
+
+		if (bt->level > 0)
+			check_btree_block_ref(sb, map, bt->level - 1, val, item_cb);
+		else if (item_cb && !item_cb(sb, map, &item->key, val, val_len))
+			break;
+	}
+out:
+	scoutfs_block_put(sb, bl);
+}
+
+static void check_btree_root(struct super_block *sb, struct bit_map *map,
+			     struct scoutfs_btree_root *root, check_btree_item_cb item_cb)
+{
+	if (root->height > 0)
+		check_btree_block_ref(sb, map, root->height - 1, &root->ref, item_cb);
+}
+
+static bool check_alloc_extent_item(struct super_block *sb, struct bit_map *map,
+				    struct scoutfs_key *key, void *val, u16 val_len)
+{
+	/* XXX only checking primary blkno items */
+	if (key->sk_zone == SCOUTFS_FREE_EXTENT_BLKNO_ZONE) {
+		check_extent(sb, map, le64_to_cpu(key->skfb_end) - le64_to_cpu(key->skfb_len) + 1,
+				      le64_to_cpu(key->skfb_len));
+		return true;
+	}
+
+	/* otherwise stop iterating over items */
+	return false;
+}
+
+static void check_alloc_root(struct super_block *sb, struct bit_map *map,
+			     struct scoutfs_alloc_root *root)
+{
+	check_btree_root(sb, map, &root->root, check_alloc_extent_item);
+}
+
+static void check_alloc_list_block_ref(struct super_block *sb, struct bit_map *map,
+				       struct scoutfs_block_ref *caller_ref)
+{
+	struct scoutfs_alloc_list_block *lblk;
+	struct scoutfs_block_ref ref;
+	struct scoutfs_block *bl;
+	u32 start;
+	u32 nr;
+	u32 i;
+
+	ref = *caller_ref;
+
+	while ((lblk = read_block_ref(sb, map, &ref, SCOUTFS_BLOCK_MAGIC_ALLOC_LIST, &bl))) {
+
+		start = le32_to_cpu(lblk->start);
+		nr = le32_to_cpu(lblk->nr);
+
+		/* could sort and combine into extents */
+		for (i = 0; i < nr; i++)
+			check_blkno(sb, map, le64_to_cpu(lblk->blknos[start + i]));
+
+		ref = lblk->next;
+		scoutfs_block_put(sb, bl);
+	}
+}
+
+static void check_alloc_list_head(struct super_block *sb, struct bit_map *map,
+				  struct scoutfs_alloc_list_head *lhead)
+{
+	check_alloc_list_block_ref(sb, map, &lhead->ref);
+}
+
+static bool check_log_merge_item(struct super_block *sb, struct bit_map *map,
+				 struct scoutfs_key *key, void *val, u16 val_len)
+{
+	struct scoutfs_log_merge_request *req;
+	struct scoutfs_log_merge_complete *comp;
+	struct scoutfs_log_merge_freeing *fr;
+
+	switch(key->sk_zone) {
+	case SCOUTFS_LOG_MERGE_REQUEST_ZONE:
+		req = val;
+		check_alloc_list_head(sb, map, &req->meta_avail);
+		check_alloc_list_head(sb, map, &req->meta_freed);
+		/* logs_root and root are shared refs */
+		break;
+
+	case SCOUTFS_LOG_MERGE_COMPLETE_ZONE:
+		comp = val;
+		check_alloc_list_head(sb, map, &comp->meta_avail);
+		check_alloc_list_head(sb, map, &comp->meta_freed);
+		/* XXX merged subtree?   hmm. */
+		break;
+
+	case SCOUTFS_LOG_MERGE_FREEING_ZONE:
+		fr = val;
+		check_btree_root(sb, map, &fr->root, NULL);
+		break;
+	}
+
+	return true;
+}
+
+static void check_srch_file_block_ref(struct super_block *sb, struct bit_map *map,
+				      u8 level, struct scoutfs_block_ref *ref)
+{
+	struct scoutfs_block *bl = NULL;
+	struct scoutfs_srch_parent *srp;
+	int i;
+
+	if (level == 0) {
+		check_block_ref(sb, map, ref);
+		return;
+	}
+
+	if (!(srp = read_block_ref(sb, map, ref, SCOUTFS_BLOCK_MAGIC_SRCH_PARENT, &bl)))
+		return;
+
+	for (i = 0; i < SCOUTFS_SRCH_PARENT_REFS; i++)
+		check_srch_file_block_ref(sb, map, level - 1, &srp->refs[i]);
+
+	scoutfs_block_put(sb, bl);
+}
+
+static void check_srch_file(struct super_block *sb, struct bit_map *map,
+			    struct scoutfs_srch_file *sfl)
+{
+	if (sfl->height > 0)
+		check_srch_file_block_ref(sb, map, sfl->height - 1, &sfl->ref);
+}
+
+static bool check_srch_item(struct super_block *sb, struct bit_map *map,
+			    struct scoutfs_key *key, void *val, u16 val_len)
+{
+	struct scoutfs_srch_file *sfl;
+	struct scoutfs_srch_compact *sc;
+
+	switch(key->sk_type) {
+	case SCOUTFS_SRCH_BLOCKS_TYPE:
+	case SCOUTFS_SRCH_LOG_TYPE:
+		sfl = val;
+		check_srch_file(sb, map, sfl);
+		break;
+	case SCOUTFS_SRCH_PENDING_TYPE:
+	case SCOUTFS_SRCH_BUSY_TYPE:
+		sc = val;
+		check_alloc_list_head(sb, map, &sc->meta_avail);
+		check_alloc_list_head(sb, map, &sc->meta_freed);
+		check_srch_file(sb, map, &sc->out);
+		break;
+	}
+
+	return true;
+}
+
+static bool check_log_trees_item(struct super_block *sb, struct bit_map *map,
+				 struct scoutfs_key *key, void *val, u16 val_len)
+{
+	struct scoutfs_log_trees *lt = val;
+
+	check_alloc_list_head(sb, map, &lt->meta_avail);
+	check_alloc_list_head(sb, map, &lt->meta_freed);
+	check_btree_root(sb, map, &lt->item_root, NULL);
+	check_block_ref(sb, map, &lt->bloom_ref);
+	check_btree_root(sb, map, &lt->data_avail.root, NULL);
+	check_btree_root(sb, map, &lt->data_freed.root, NULL);
+	check_srch_file(sb, map, &lt->srch_file);
+
+	return true;
+}
+
+static void check_super(struct super_block *sb, struct bit_map *map,
+			struct scoutfs_super_block *super)
+{
+	check_alloc_root(sb, map, &super->meta_alloc[0]);
+	check_alloc_root(sb, map, &super->meta_alloc[1]);
+	check_btree_root(sb, map, &super->data_alloc.root, NULL);
+	check_alloc_list_head(sb, map, &super->server_meta_avail[0]);
+	check_alloc_list_head(sb, map, &super->server_meta_avail[1]);
+	check_alloc_list_head(sb, map, &super->server_meta_freed[0]);
+	check_alloc_list_head(sb, map, &super->server_meta_freed[1]);
+	check_btree_root(sb, map, &super->fs_root, NULL);
+	check_btree_root(sb, map, &super->logs_root, check_log_trees_item);
+	check_btree_root(sb, map, &super->log_merge, check_log_merge_item);
+	check_btree_root(sb, map, &super->mounted_clients, NULL);
+	check_btree_root(sb, map, &super->srch_root, check_srch_item);
+}
+
+static void check_map(struct super_block *sb, struct bit_map *map)
+{
+	unsigned long nr = find_next_zero_bit(map->addr, map->size, 0);
+
+	warn_once_disable(sb, nr < map->size,
+			  "final map has missing bits, first %lu", nr);
+}
+
+/*
+ * This is called while the persistent block structures are stable.
+ * While we might have to drop stale cache as we read these blocks, we
+ * should be able to walk stable block references from the super.
+ */
+void scoutfs_check_meta_refs(struct super_block *sb, struct scoutfs_super_block *super)
+{
+	static struct bit_map map = {NULL,};
+	unsigned long bytes;
+	u64 size;
+
+	if (!enabled)
+		return;
+
+	size = le64_to_cpu(super->total_meta_blocks);
+
+	if (warn_once_disable(sb, size <= SCOUTFS_META_DEV_START_BLKNO,
+			       "total_meta %llu too small", size) ||
+	    warn_once_disable(sb, size > LONG_MAX,
+			       "total_meta %llu too large", size))
+		return;
+
+	bytes = DIV_ROUND_UP(size, 8);
+	if (size != map.size) {
+		if (map.addr) {
+			vfree(map.addr);
+			map.addr = NULL;
+		}
+
+		map.addr = vmalloc(bytes);
+		if (warn_once_disable(sb, !map.addr, "couldn't alloc %lu byte vmalloc", bytes))
+			return;
+
+		map.size = size;
+	}
+
+	memset(map.addr, 0, bytes);
+	/* initial large block numbers used by padding and 4k super and quorum blocks */
+	bitmap_set(map.addr, 0, SCOUTFS_META_DEV_START_BLKNO);
+
+	check_super(sb, &map, super);
+	check_map(sb, &map);
+
+	if (!enabled)
+		panic("found inconsistent meta refs");
+}
@@ -0,0 +1,6 @@
+#ifndef _SCOUTFS_CHECK_H_
+#define _SCOUTFS_CHECK_H_
+
+void scoutfs_check_meta_refs(struct super_block *sb, struct scoutfs_super_block *super);
+
+#endif
@@ -435,8 +435,8 @@ static int lookup_mounted_client_item(struct super_block *sb, u64 rid)
 	if (ret == -ENOENT)
 		ret = 0;

-	kfree(super);
 out:
+	kfree(super);
 	return ret;
 }

@@ -2053,6 +2053,9 @@ const struct inode_operations scoutfs_dir_iops = {
 #endif
 	.listxattr	= scoutfs_listxattr,
 	.get_acl	= scoutfs_get_acl,
+#ifdef KC_GET_ACL_DENTRY
+	.set_acl	= scoutfs_set_acl,
+#endif
 	.symlink	= scoutfs_symlink,
 	.permission	= scoutfs_permission,
 #ifdef KC_LINUX_HAVE_RHEL_IOPS_WRAPPER
@@ -150,6 +150,9 @@ static const struct inode_operations scoutfs_file_iops = {
 #endif
 	.listxattr	= scoutfs_listxattr,
 	.get_acl	= scoutfs_get_acl,
+#ifdef KC_GET_ACL_DENTRY
+	.set_acl	= scoutfs_set_acl,
+#endif
 	.fiemap		= scoutfs_data_fiemap,
 };

@@ -163,6 +166,9 @@ static const struct inode_operations scoutfs_special_iops = {
 #endif
 	.listxattr	= scoutfs_listxattr,
 	.get_acl	= scoutfs_get_acl,
+#ifdef KC_GET_ACL_DENTRY
+	.set_acl	= scoutfs_set_acl,
+#endif
 };

 /*
@@ -2188,7 +2194,7 @@ int scoutfs_inode_walk_writeback(struct super_block *sb, bool write)
 	struct scoutfs_inode_info *si;
 	struct scoutfs_inode_info *tmp;
 	struct inode *inode;
-	int ret;
+	int ret = 0;

 	spin_lock(&inf->writeback_lock);

@@ -954,6 +954,9 @@ static int copy_alloc_detail_to_user(struct super_block *sb, void *arg,
 	if (args->copied == args->nr)
 		return -EOVERFLOW;

+	/* .type and .pad need clearing */
+	memset(&ade, 0, sizeof(struct scoutfs_ioctl_alloc_detail_entry));
+
 	ade.blocks = blocks;
 	ade.id = id;
 	ade.meta = !!meta;
@@ -1369,7 +1372,7 @@ static long scoutfs_ioc_get_referring_entries(struct file *file, unsigned long a
 			ent.d_type = bref->d_type;
 			ent.name_len = name_len;

-			if (copy_to_user(uent, &ent, sizeof(struct scoutfs_ioctl_dirent)) ||
+			if (copy_to_user(uent, &ent, offsetof(struct scoutfs_ioctl_dirent, name[0])) ||
 			    copy_to_user(&uent->name[0], bref->dent.name, name_len) ||
 			    put_user('\0', &uent->name[name_len])) {
 				ret = -EFAULT;
@@ -86,6 +86,8 @@ struct item_cache_info {
 	/* often walked, but per-cpu refs are fast path */
 	rwlock_t rwlock;
 	struct rb_root pg_root;
+	/* stop readers from caching stale items behind reclaimed cleaned written items */
+	u64 read_dirty_barrier;

 	/* page-granular modification by writers, then exclusive to commit */
 	spinlock_t dirty_lock;
@@ -96,9 +98,6 @@ struct item_cache_info {
 	spinlock_t lru_lock;
 	struct list_head lru_list;
 	unsigned long lru_pages;
-
-	/* stop readers from caching stale items behind reclaimed cleaned written items */
-	atomic64_t read_dirty_barrier;
 };

 #define DECLARE_ITEM_CACHE_INFO(sb, name) \
@@ -1431,7 +1430,9 @@ static int read_pages(struct super_block *sb, struct item_cache_info *cinf,
 	pg->end = lock->end;
 	rbtree_insert(&pg->node, NULL, &root.rb_node, &root);

-	rdbar = atomic64_read(&cinf->read_dirty_barrier);
+	read_lock(&cinf->rwlock);
+	rdbar = cinf->read_dirty_barrier;
+	read_unlock(&cinf->rwlock);

 	start = lock->start;
 	end = lock->end;
@@ -1470,19 +1471,18 @@ static int read_pages(struct super_block *sb, struct item_cache_info *cinf,
 retry:
 	write_lock(&cinf->rwlock);

-	ret = 0;
+	/* can't insert if write has cleaned since we read */
+	if (cinf->read_dirty_barrier != rdbar) {
+		scoutfs_inc_counter(sb, item_read_pages_barrier);
+		ret = -ESTALE;
+		goto unlock;
+	}
+
 	while ((rd = first_page(&root))) {

 		pg = page_rbtree_walk(sb, &cinf->pg_root, &rd->start, &rd->end,
 				      NULL, NULL, &par, &pnode);
 		if (!pg) {
-			/* can't insert if write is cleaning (write_lock is read barrier) */
-			if (atomic64_read(&cinf->read_dirty_barrier) != rdbar) {
-				scoutfs_inc_counter(sb, item_read_pages_barrier);
-				ret = -ESTALE;
-				break;
-			}
-
 			/* insert read pages that don't intersect */
 			rbtree_erase(&rd->node, &root);
 			rbtree_insert(&rd->node, par, pnode, &cinf->pg_root);
@@ -1515,6 +1515,9 @@ retry:
 		}
 	}

+	ret = 0;
+
+unlock:
 	write_unlock(&cinf->rwlock);

 out:
@@ -2358,9 +2361,10 @@ int scoutfs_item_write_done(struct super_block *sb)
 	struct cached_item *tmp;
 	struct cached_page *pg;

-	/* don't let read_pages insert possibly stale items */
-	atomic64_inc(&cinf->read_dirty_barrier);
-	smp_mb__after_atomic();
+	/* don't let read_pages miss written+cleaned items */
+	write_lock(&cinf->rwlock);
+	cinf->read_dirty_barrier++;
+	write_unlock(&cinf->rwlock);

 	spin_lock(&cinf->dirty_lock);
 	while ((pg = list_first_entry_or_null(&cinf->dirty_list, struct cached_page, dirty_head))) {
@@ -2615,7 +2619,6 @@ int scoutfs_item_setup(struct super_block *sb)
 	atomic_set(&cinf->dirty_pages, 0);
 	spin_lock_init(&cinf->lru_lock);
 	INIT_LIST_HEAD(&cinf->lru_list);
-	atomic64_set(&cinf->read_dirty_barrier, 0);

 	cinf->pcpu_pages = alloc_percpu(struct item_percpu_pages);
 	if (!cinf->pcpu_pages)
@@ -263,6 +263,11 @@ typedef unsigned int blk_opf_t;
 #define kc__vmalloc __vmalloc
 #endif

+#ifdef KC_VFS_METHOD_MNT_IDMAP_ARG
+#define KC_VFS_NS_DEF struct mnt_idmap *mnt_idmap,
+#define KC_VFS_NS mnt_idmap,
+#define KC_VFS_INIT_NS &nop_mnt_idmap,
+#else
 #ifdef KC_VFS_METHOD_USER_NAMESPACE_ARG
 #define KC_VFS_NS_DEF struct user_namespace *mnt_user_ns,
 #define KC_VFS_NS mnt_user_ns,
@@ -272,6 +277,7 @@ typedef unsigned int blk_opf_t;
 #define KC_VFS_NS
 #define KC_VFS_INIT_NS
 #endif
+#endif /* KC_VFS_METHOD_MNT_IDMAP_ARG */

 #ifdef KC_BIO_ALLOC_DEV_OPF_ARGS
 #define kc_bio_alloc bio_alloc
@@ -457,4 +463,30 @@ static inline void list_lru_isolate_move(struct list_lru_one *list, struct list_
 }
 #endif

+#ifndef KC_STACK_TRACE_SAVE
+#include <linux/stacktrace.h>
+static inline unsigned int stack_trace_save(unsigned long *store, unsigned int size,
+					    unsigned int skipnr)
+{
+        struct stack_trace trace = {
+                .entries        = store,
+                .max_entries    = size,
+                .skip           = skipnr,
+        };
+
+        save_stack_trace(&trace);
+        return trace.nr_entries;
+}
+
+static inline void stack_trace_print(unsigned long *entries, unsigned int nr_entries, int spaces)
+{
+        struct stack_trace trace = {
+                .entries        = entries,
+                .nr_entries     = nr_entries,
+        };
+
+	print_stack_trace(&trace, spaces);
+}
+#endif
+
 #endif
@@ -1105,9 +1105,15 @@ static void scoutfs_net_listen_worker(struct work_struct *work)
 						  conn->notify_down,
 						  conn->info_size,
 						  conn->req_funcs, "accepted");
+		/*
+		 * scoutfs_net_alloc_conn() can fail due to ENOMEM. If this
+		 * is the only thing that does so, there's no harm in trying
+		 * to see if kernel_accept() can get enough memory to try accepting
+		 * a new connection again. If that then fails with ENOMEM, it'll
+		 * shut down the conn anyway. So just retry here.
+		 */
 		if (!acc_conn) {
 			sock_release(acc_sock);
-			ret = -ENOMEM;
 			continue;
 		}

@@ -592,7 +592,7 @@ static int handle_request(struct super_block *sb, struct omap_request *req)
 	ret = 0;
 out:
 	free_rids(&priv_rids);
-	if (ret < 0) {
+	if ((ret < 0) && (req != NULL)) {
 		ret = scoutfs_server_send_omap_response(sb, req->client_rid, req->client_id,
 							NULL, ret);
 		free_req(req);
@@ -128,7 +128,7 @@ static void free_options(struct scoutfs_mount_options *opts)
 #define MIN_DATA_PREALLOC_BLOCKS	1ULL
 #define MAX_DATA_PREALLOC_BLOCKS	((unsigned long long)SCOUTFS_BLOCK_SM_MAX)

-#define DEFAULT_TCP_KEEPALIVE_TIMEOUT_MS	(10 * MSEC_PER_SEC)
+#define DEFAULT_TCP_KEEPALIVE_TIMEOUT_MS	(60 * MSEC_PER_SEC)

 static void init_default_options(struct scoutfs_mount_options *opts)
 {
@@ -507,10 +507,10 @@ static int update_quorum_block(struct super_block *sb, int event, u64 term, bool
 		set_quorum_block_event(sb, &blk, event, term);
 		ret = write_quorum_block(sb, blkno, &blk);
 		if (ret < 0)
-			scoutfs_err(sb, "error %d reading quorum block %llu to update event %d term %llu",
+			scoutfs_err(sb, "error %d writing quorum block %llu after updating event %d term %llu",
 				    ret, blkno, event, term);
 	} else {
-		scoutfs_err(sb, "error %d writing quorum block %llu after updating event %d term %llu",
+		scoutfs_err(sb, "error %d reading quorum block %llu to update event %d term %llu",
 			    ret, blkno, event, term);
 	}

@@ -713,8 +713,6 @@ static void scoutfs_quorum_worker(struct work_struct *work)
 	struct quorum_status qst = {0,};
 	struct hb_recording hbr;
 	bool record_hb;
-	bool recv_failed;
-	bool initializing = true;
 	int ret;
 	int err;

@@ -747,8 +745,6 @@ static void scoutfs_quorum_worker(struct work_struct *work)

 		update_show_status(qinf, &qst);

-		recv_failed = false;
-
 		ret = recv_msg(sb, &msg, qst.timeout);
 		if (ret < 0) {
 			if (ret != -ETIMEDOUT && ret != -EAGAIN) {
@@ -756,9 +752,6 @@ static void scoutfs_quorum_worker(struct work_struct *work)
 				scoutfs_inc_counter(sb, quorum_recv_error);
 				goto out;
 			}
-
-			recv_failed = true;
-
 			msg.type = SCOUTFS_QUORUM_MSG_INVALID;
 			ret = 0;
 		}
@@ -816,13 +809,13 @@ static void scoutfs_quorum_worker(struct work_struct *work)

 		/* followers and candidates start new election on timeout */
 		if (qst.role != LEADER &&
-		    (initializing || recv_failed) &&
+		    msg.type == SCOUTFS_QUORUM_MSG_INVALID &&
 		    ktime_after(ktime_get(), qst.timeout)) {
 			/* .. but only if their server has stopped */
 			if (!scoutfs_server_is_down(sb)) {
 				qst.timeout = election_timeout();
 				scoutfs_inc_counter(sb, quorum_candidate_server_stopping);
-				goto again;
+				continue;
 			}

 			qst.role = CANDIDATE;
@@ -959,9 +952,6 @@ static void scoutfs_quorum_worker(struct work_struct *work)
 		}

 		record_hb_delay(sb, qinf, &hbr, record_hb, qst.role);
-
-again:
-		initializing = false;
 	}

 	update_show_status(qinf, &qst);
@@ -980,7 +970,10 @@ again:
 	}

 	/* record that this slot no longer has an active quorum */
-	update_quorum_block(sb, SCOUTFS_QUORUM_EVENT_END, qst.term, true);
+	err = update_quorum_block(sb, SCOUTFS_QUORUM_EVENT_END, qst.term, true);
+	if (err < 0 && ret == 0)
+		ret = err;
+
 out:
 	if (ret < 0) {
 		scoutfs_err(sb, "quorum service saw error %d, shutting down.  This mount is no longer participating in quorum.  It should be remounted to restore service.",
@@ -1069,7 +1062,7 @@ static char *role_str(int role)
 		[LEADER] = "leader",
 	};

-	if (role < 0 || role > ARRAY_SIZE(roles) || !roles[role])
+	if (role < 0 || role >= ARRAY_SIZE(roles) || !roles[role])
 		return "invalid";

 	return roles[role];
@@ -2134,7 +2134,7 @@ static int server_srch_commit_compact(struct super_block *sb,
 					  &super->srch_root, rid, sc,
 					  &av, &fr);
 	mutex_unlock(&server->srch_mutex);
-	if (ret < 0) /* XXX very bad, leaks allocators */
+	if (ret < 0)
 		goto apply;

 	/* reclaim allocators if they were set by _srch_commit_ */
@@ -2144,10 +2144,10 @@ static int server_srch_commit_compact(struct super_block *sb,
 	      scoutfs_alloc_splice_list(sb, &server->alloc, &server->wri,
 					server->other_freed, &fr);
 	mutex_unlock(&server->alloc_mutex);
+	WARN_ON(ret < 0); /* XXX leaks allocators */
 apply:
 	ret = server_apply_commit(sb, &hold, ret);
 out:
-	WARN_ON(ret < 0); /* XXX leaks allocators */
 	return scoutfs_net_response(sb, conn, cmd, id, ret, NULL, 0);
 }

@@ -537,23 +537,35 @@ out:
 * the pairs cancel each other out by all readers (the second encoding
 * looks like deletion) so they aren't visible to the first/last bounds of
 * the block or file.
+ *
+ * We use the same entry repeatedly, so the diff between them will be empty.
+ * This lets us just emit the two-byte count word, leaving the other bytes
+ * as zero.
+ *
+ * Split the desired total len into two pieces, adding any remainder to the
+ * first four-bit value.
 */
-static int append_padded_entry(struct scoutfs_srch_file *sfl, u64 blk,
-			       struct scoutfs_srch_block *srb, struct scoutfs_srch_entry *sre)
+static void append_padded_entry(struct scoutfs_srch_file *sfl,
+				struct scoutfs_srch_block *srb,
+				int len)
 {
-	int ret;
+	int each;
+	int rem;
+	u16 lengths = 0;
+	u8 *buf = srb->entries + le32_to_cpu(srb->entry_bytes);

-	ret = encode_entry(srb->entries + le32_to_cpu(srb->entry_bytes),
-			   sre, &srb->tail);
-	if (ret > 0) {
-		srb->tail = *sre;
-		le32_add_cpu(&srb->entry_nr, 1);
-		le32_add_cpu(&srb->entry_bytes, ret);
-		le64_add_cpu(&sfl->entries, 1);
-		ret = 0;
-	}
+	each = (len - 2) >> 1;
+	rem = (len - 2) & 1;

-	return ret;
+	lengths |= each + rem;
+	lengths |= each << 4;
+
+	memset(buf, 0, len);
+	put_unaligned_le16(lengths, buf);
+
+	le32_add_cpu(&srb->entry_nr, 1);
+	le32_add_cpu(&srb->entry_bytes, len);
+	le64_add_cpu(&sfl->entries, 1);
 }

 /*
@@ -564,61 +576,41 @@ static int append_padded_entry(struct scoutfs_srch_file *sfl, u64 blk,
 * This is called when there is a single existing entry in the block.
 * We have the entire block to work with.  We encode pairs of matching
 * entries.  This hides them from readers (both searches and merging) as
- * they're interpreted as creation and deletion and are deleted.  We use
- * the existing hash value of the first entry in the block but then set
- * the inode to an impossibly large number so it doesn't interfere with
- * anything.
+ * they're interpreted as creation and deletion and are deleted.
 *
- * To hit the specific offset we very carefully manage the amount of
- * bytes of change between fields in the entry.  We know that if we
- * change all the byte of the ino and id we end up with a 20 byte
- * (2+8+8,2) encoding of the pair of entries.  To have the last entry
- * start at the _SAFE_POS offset we know that the final 20 byte pair
- * encoding needs to end at 2 bytes (second entry encoding) after the
- * _SAFE_POS offset.
+ * For simplicity and to maintain sort ordering within the block, we reuse
+ * the existing entry. This lets us skip the encoding step, because we know
+ * the diff will be zero. We can zero-pad the resulting entries to hit the
+ * target offset exactly.
 *
- * So as we encode pairs we watch the delta of our current offset from
- * that desired final offset of 2 past _SAFE_POS.  If we're a multiple
- * of 20 away then we encode the full 20 byte pairs.  If we're not, then
- * we drop a byte to encode 19 bytes.  That'll slowly change the offset
- * to be a multiple of 20 again while encoding large entries.
+ * Because we can't predict the exact number of entry_bytes when we start,
+ * we adjust the byte count of subsequent entries until we wind up at a
+ * multiple of 20 bytes away from our goal and then use that length for
+ * the remaining entries.
+ *
+ * We could just use a single pair of unnaturally large entries to consume
+ * the needed space, adjusting for an odd number of entry_bytes if necessary.
+ * The use of 19 or 20 bytes for the entry pair matches what we would see with
+ * real (non-zero) entries that vary from the existing entry.
 */
-static void pad_entries_at_safe(struct scoutfs_srch_file *sfl, u64 blk,
+static void pad_entries_at_safe(struct scoutfs_srch_file *sfl,
 				struct scoutfs_srch_block *srb)
 {
-	struct scoutfs_srch_entry sre;
 	u32 target;
 	s32 diff;
-	u64 hash;
-	u64 ino;
-	u64 id;
-	int ret;
-
-	hash = le64_to_cpu(srb->tail.hash);
-	ino = le64_to_cpu(srb->tail.ino) | (1ULL << 62);
-	id = le64_to_cpu(srb->tail.id);

 	target = SCOUTFS_SRCH_BLOCK_SAFE_BYTES + 2;

 	while ((diff = target - le32_to_cpu(srb->entry_bytes)) > 0) {
-		ino ^= 1ULL << (7 * 8);
+		append_padded_entry(sfl, srb, 10);
 		if (diff % 20 == 0) {
-			id ^= 1ULL << (7 * 8);
+			append_padded_entry(sfl, srb, 10);
 		} else {
-			id ^= 1ULL << (6 * 8);
+			append_padded_entry(sfl, srb, 9);
 		}
-
-		sre.hash = cpu_to_le64(hash);
-		sre.ino = cpu_to_le64(ino);
-		sre.id = cpu_to_le64(id);
-
-		ret = append_padded_entry(sfl, blk, srb, &sre);
-		if (ret == 0)
-			ret = append_padded_entry(sfl, blk, srb, &sre);
-		BUG_ON(ret != 0);
-
-		diff = target - le32_to_cpu(srb->entry_bytes);
 	}
+
+	WARN_ON_ONCE(diff != 0);
 }

 /*
@@ -864,14 +856,14 @@ static int search_sorted_file(struct super_block *sb,
 		if (pos > SCOUTFS_SRCH_BLOCK_SAFE_BYTES) {
 			/* can only be inconsistency :/ */
 			ret = -EIO;
-			break;
+			goto out;
 		}

 		ret = decode_entry(srb->entries + pos, &sre, &prev);
 		if (ret <= 0) {
 			/* can only be inconsistency :/ */
 			ret = -EIO;
-			break;
+			goto out;
 		}
 		pos += ret;
 		prev = sre;
@@ -1414,7 +1406,7 @@ int scoutfs_srch_commit_compact(struct super_block *sb,
 			ret = -EIO;
 		scoutfs_btree_put_iref(&iref);
 	}
-	if (ret < 0) /* XXX leaks allocators */
+	if (ret < 0)
 		goto out;

 	/* restore busy to pending if the operation failed */
@@ -1434,10 +1426,8 @@ int scoutfs_srch_commit_compact(struct super_block *sb,
 	/* update file references if we finished compaction (!deleting) */
 	if (!(res->flags & SCOUTFS_SRCH_COMPACT_FLAG_DELETE)) {
 		ret = commit_files(sb, alloc, wri, root, res);
-		if (ret < 0) {
-			/* XXX we can't commit, shutdown? */
+		if (ret < 0)
 			goto out;
-		}

 		/* transition flags for deleting input files */
 		for (i = 0; i < res->nr; i++) {
@@ -1464,7 +1454,7 @@ update:
 			      le64_to_cpu(pending->id), 0);
 		ret = scoutfs_btree_insert(sb, alloc, wri, root, &key,
 					   pending, sizeof(*pending));
-		if (ret < 0)
+		if (WARN_ON_ONCE(ret < 0)) /* XXX inconsistency */
 			goto out;
 	}

@@ -1477,7 +1467,6 @@ update:
 		BUG_ON(err); /* both busy and pending present */
 	}
 out:
-	WARN_ON_ONCE(ret < 0); /* XXX inconsistency */
 	kfree(busy);
 	return ret;
 }
@@ -1675,7 +1664,7 @@ static int kway_merge(struct super_block *sb,
 			/* end sorted block on _SAFE offset for testing */
 			if (bl && le32_to_cpu(srb->entry_nr) == 1 && logs_input &&
 			    scoutfs_trigger(sb, SRCH_COMPACT_LOGS_PAD_SAFE)) {
-				pad_entries_at_safe(sfl, blk, srb);
+				pad_entries_at_safe(sfl, srb);
 				scoutfs_block_put(sb, bl);
 				bl = NULL;
 				blk++;
@@ -1873,7 +1862,7 @@ static int compact_logs(struct super_block *sb,
 		if (pos > SCOUTFS_SRCH_BLOCK_SAFE_BYTES) {
 			/* can only be inconsistency :/ */
 			ret = -EIO;
-			break;
+			goto out;
 		}

 		ret = decode_entry(srb->entries + pos, sre, &prev);
@@ -2287,12 +2276,11 @@ static void scoutfs_srch_compact_worker(struct work_struct *work)
 	} else {
 		ret = -EINVAL;
 	}
-	if (ret < 0)
-		goto commit;

-	ret = scoutfs_alloc_prepare_commit(sb, &alloc, &wri) ?:
+	scoutfs_alloc_prepare_commit(sb, &alloc, &wri);
+	if (ret == 0)
 	      scoutfs_block_writer_write(sb, &wri);
-commit:
+
 	/* the server won't use our partial compact if _ERROR is set */
 	sc->meta_avail = alloc.avail;
 	sc->meta_freed = alloc.freed;
@@ -2309,7 +2297,7 @@ out:
 		scoutfs_inc_counter(sb, srch_compact_error);

 	scoutfs_block_writer_forget_all(sb, &wri);
-	queue_compact_work(srinf, sc->nr > 0 && ret == 0);
+	queue_compact_work(srinf, sc != NULL && sc->nr > 0 && ret == 0);

 	kfree(sc);
 }
@@ -512,9 +512,9 @@ static int scoutfs_fill_super(struct super_block *sb, void *data, int silent)

 	sbi = kzalloc(sizeof(struct scoutfs_sb_info), GFP_KERNEL);
 	sb->s_fs_info = sbi;
-	sbi->sb = sb;
 	if (!sbi)
 		return -ENOMEM;
+	sbi->sb = sb;

 	ret = assign_random_id(sbi);
 	if (ret < 0)
@@ -69,6 +69,7 @@ $(basename $0) options:
    -r <dir>  | Specify the directory in which to store results of
              | test runs.  The directory will be created if it doesn't
              | exist.  Previous results will be deleted as each test runs.
+    -R        | shuffle the test order randomly using shuf
    -s        | Skip git repo checkouts.
    -t        | Enabled trace events that match the given glob argument.
              | Multiple options enable multiple globbed events.
@@ -89,6 +90,7 @@ done
 # set some T_ defaults
 T_TRACE_DUMP="0"
 T_TRACE_PRINTK="0"
+T_PORT_START="19700"

 # array declarations to be able to use array ops
 declare -a T_TRACE_GLOB
@@ -164,6 +166,9 @@ while true; do
 		T_RESULTS="$2"
 		shift
 		;;
+	-R)
+		T_SHUF="1"
+		;;
 	-s)
 	        T_SKIP_CHECKOUT="1"
 		;;
@@ -261,13 +266,37 @@ for e in T_META_DEVICE T_DATA_DEVICE T_EX_META_DEV T_EX_DATA_DEV T_KMOD T_RESULT
 	eval $e=\"$(readlink -f "${!e}")\"
 done

+# try and check ports, but not necessary
+T_TEST_PORT="$T_PORT_START"
+T_SCRATCH_PORT="$((T_PORT_START + 100))"
+T_DEV_PORT="$((T_PORT_START + 200))"
+read local_start local_end < /proc/sys/net/ipv4/ip_local_port_range
+if [ -n "$local_start" -a -n "$local_end" -a "$local_start" -lt "$local_end" ]; then
+	if [ ! "$T_DEV_PORT" -lt "$local_start" -a ! "$T_TEST_PORT" -gt "$local_end" ]; then
+		die "listening port range $T_TEST_PORT - $T_DEV_PORT is within local dynamic port range $local_start - $local_end in /proc/sys/net/ipv4/ip_local_port_range"
+	fi
+fi
+
+# permute sequence?
+T_SEQUENCE=sequence
+if [ -n "$T_SHUF" ]; then
+	msg "shuffling test order"
+	shuf sequence -o sequence.shuf
+	# keep xfstests at the end
+	if grep -q 'xfstests.sh' sequence.shuf ; then
+		sed -i '/xfstests.sh/d' sequence.shuf
+		echo "xfstests.sh" >> sequence.shuf
+	fi
+	T_SEQUENCE=sequence.shuf
+fi
+
 # include everything by default
 test -z "$T_INCLUDE" && T_INCLUDE="-e '.*'"
 # (quickly) exclude nothing by default
 test -z "$T_EXCLUDE" && T_EXCLUDE="-e '\Zx'"

 # eval to strip re ticks but not expand
-tests=$(grep -v "^#" sequence |
+tests=$(grep -v "^#" $T_SEQUENCE |
 	eval grep "$T_INCLUDE" | eval grep -v "$T_EXCLUDE")
 test -z "$tests" && \
 	die "no tests found by including $T_INCLUDE and excluding $T_EXCLUDE"
@@ -346,7 +375,7 @@ fi
 quo=""
 if [ -n "$T_MKFS" ]; then
 	for i in $(seq -0 $((T_QUORUM - 1))); do
-		quo="$quo -Q $i,127.0.0.1,$((42000 + i))"
+		quo="$quo -Q $i,127.0.0.1,$((T_TEST_PORT + i))"
 	done

 	msg "making new filesystem with $T_QUORUM quorum members"
@@ -15,7 +15,7 @@ echo "== prepare devices, mount point, and logs"
 SCR="$T_TMPDIR/mnt.scratch"
 mkdir -p "$SCR"
 > $T_TMP.mount.out
-scoutfs mkfs -f -Q 0,127.0.0.1,53000 "$T_EX_META_DEV" "$T_EX_DATA_DEV" > $T_TMP.mkfs.out 2>&1 \
+scoutfs mkfs -f -Q 0,127.0.0.1,$T_SCRATCH_PORT "$T_EX_META_DEV" "$T_EX_DATA_DEV" > $T_TMP.mkfs.out 2>&1 \
 	|| t_fail "mkfs failed"

 echo "== bad devices, bad options"
@@ -11,7 +11,7 @@ truncate -s $sz "$T_TMP.equal"
 truncate -s $large_sz "$T_TMP.large"

 echo "== make scratch fs"
-t_quiet scoutfs mkfs -f -Q 0,127.0.0.1,53000 "$T_EX_META_DEV" "$T_EX_DATA_DEV"
+t_quiet scoutfs mkfs -f -Q 0,127.0.0.1,$T_SCRATCH_PORT "$T_EX_META_DEV" "$T_EX_DATA_DEV"
 SCR="$T_TMPDIR/mnt.scratch"
 mkdir -p "$SCR"

@@ -57,7 +57,7 @@ test "$before" == "$after" || \
 # XXX this is all pretty manual, would be nice to have helpers
 echo "== make small meta fs"
 # meta device just big enough for reserves and the metadata we'll fill
-scoutfs mkfs -A -f -Q 0,127.0.0.1,53000 -m 10G "$T_EX_META_DEV" "$T_EX_DATA_DEV" > $T_TMP.mkfs.out 2>&1 || \
+scoutfs mkfs -A -f -Q 0,127.0.0.1,$T_SCRATCH_PORT -m 10G "$T_EX_META_DEV" "$T_EX_DATA_DEV" > $T_TMP.mkfs.out 2>&1 || \
 	t_fail "mkfs failed"
 SCR="$T_TMPDIR/mnt.scratch"
 mkdir -p "$SCR"
@@ -89,7 +89,7 @@ for vers in $(seq $MIN $((MAX - 1))); do
 	old_module="$builds/$vers/scoutfs.ko"

 	echo "mkfs $vers" >> "$T_TMP.log"
-	t_quiet $old_scoutfs mkfs -f -Q 0,127.0.0.1,53000 "$T_EX_META_DEV" "$T_EX_DATA_DEV" \
+	t_quiet $old_scoutfs mkfs -f -Q 0,127.0.0.1,$T_SCRATCH_PORT "$T_EX_META_DEV" "$T_EX_DATA_DEV" \
 		|| t_fail "mkfs $vers failed"

 	echo "mount $vers with $vers" >> "$T_TMP.log"
@@ -72,7 +72,7 @@ quarter_data=$(echo "$size_data / 4" | bc)

 # XXX this is all pretty manual, would be nice to have helpers
 echo "== make initial small fs"
-scoutfs mkfs -A -f -Q 0,127.0.0.1,53000 -m $quarter_meta -d $quarter_data \
+scoutfs mkfs -A -f -Q 0,127.0.0.1,$T_SCRATCH_PORT -m $quarter_meta -d $quarter_data \
 	"$T_EX_META_DEV" "$T_EX_DATA_DEV" > $T_TMP.mkfs.out 2>&1 || \
 		t_fail "mkfs failed"
 SCR="$T_TMPDIR/mnt.scratch"
@@ -50,9 +50,9 @@ t_quiet sync
 cat << EOF > local.config
 export FSTYP=scoutfs
 export MKFS_OPTIONS="-f"
-export MKFS_TEST_OPTIONS="-Q 0,127.0.0.1,42000"
-export MKFS_SCRATCH_OPTIONS="-Q 0,127.0.0.1,43000"
-export MKFS_DEV_OPTIONS="-Q 0,127.0.0.1,44000"
+export MKFS_TEST_OPTIONS="-Q 0,127.0.0.1,$T_TEST_PORT"
+export MKFS_SCRATCH_OPTIONS="-Q 0,127.0.0.1,$T_SCRATCH_PORT"
+export MKFS_DEV_OPTIONS="-Q 0,127.0.0.1,$T_DEV_PORT"
 export TEST_DEV=$T_DB0
 export TEST_DIR=$T_M0
 export SCRATCH_META_DEV=$T_EX_META_DEV
@@ -137,11 +137,10 @@ connection will wait for active TCP packets, before deciding that
 the connection is dead. This setting is per-mount and only changes
 the behavior of that mount.
 .sp
-The default value of this setting is 10000msec (10s). Any precision
+The default value of this setting is 60000msec (60s). Any precision
 beyond a whole second is likely unrealistic due to the nature of
 TCP keepalive mechanisms in the Linux kernel. Valid values are any
-value higher than 3000 (3s). Values that are higher than 30000msec
-(30s) will likely interfere with other embedded timeout values.
+value higher than 3000 (3s).
 .sp
 The TCP keepalive mechanism is complex and observing a lost connection
 quickly is important to maintain cluster stability. If the local