Introduce meta_reserve_blocks mount option, default value.

This option adds a mount option, with default value of 16384, that adds an additional reserve amount of blocks for the meta device. The default value is 16384, which corresponds to 1GB of space, and just about doubles the internal value for the reserve that is calculated based on clients/mounts dynamically in sort of standard values. It also just compromises about less than 2% of the meta device size for the smallest meta device size. A suggested value for larger deployments is like somewhere around 256 blocks per GB of meta device size, i.e. 1/64 of the meta device space, and about 1.6% in effect. Customers who are running into issues can adjust their mount options to increase the value to have a larger safety buffer, or decrease it to potentially have a way to get out of low space conditions temporarily. Obviously one would want to increase the value of this option after resolving the low space condition issues as soon as possible. Our test suite will run with meta_reserve_blocks=0, so that the behavior of any of our tests is functionally unaffected by this change, and won't interfere with resolving underlying ENOSPC issues and their resolution. The addition of this option however allows us to artifically create ENOSPC conditions at will, and we may want to add tests specifically that do so. Signed-off-by: Auke Kok <auke.kok@versity.com>
2026-01-10 13:47:27 +00:00 · 2025-04-17 16:06:33 -04:00
12 changed files with 122 additions and 582 deletions
--- a/kmod/src/options.c
+++ b/kmod/src/options.c
@@ -39,6 +39,7 @@ enum {
 	Opt_orphan_scan_delay_ms,
 	Opt_quorum_heartbeat_timeout_ms,
 	Opt_quorum_slot_nr,
+	Opt_meta_reserve_blocks,
 	Opt_err,
 };

@@ -52,6 +53,7 @@ static const match_table_t tokens = {
 	{Opt_orphan_scan_delay_ms, "orphan_scan_delay_ms=%s"},
 	{Opt_quorum_heartbeat_timeout_ms, "quorum_heartbeat_timeout_ms=%s"},
 	{Opt_quorum_slot_nr, "quorum_slot_nr=%s"},
+	{Opt_meta_reserve_blocks, "meta_reserve_blocks=%s"},
 	{Opt_err, NULL}
 };

@@ -126,6 +128,9 @@ static void free_options(struct scoutfs_mount_options *opts)
 #define MIN_DATA_PREALLOC_BLOCKS	1ULL
 #define MAX_DATA_PREALLOC_BLOCKS	((unsigned long long)SCOUTFS_BLOCK_SM_MAX)

+#define SCOUTFS_META_RESERVE_DEFAULT_BLOCKS 16384
+
+
 static void init_default_options(struct scoutfs_mount_options *opts)
 {
 	memset(opts, 0, sizeof(*opts));
@@ -136,6 +141,7 @@ static void init_default_options(struct scoutfs_mount_options *opts)
 	opts->orphan_scan_delay_ms = -1;
 	opts->quorum_heartbeat_timeout_ms = SCOUTFS_QUORUM_DEF_HB_TIMEO_MS;
 	opts->quorum_slot_nr = -1;
+	opts->meta_reserve_blocks = SCOUTFS_META_RESERVE_DEFAULT_BLOCKS;
 }

 static int verify_log_merge_wait_timeout_ms(struct super_block *sb, int ret, int val)
@@ -167,6 +173,24 @@ static int verify_quorum_heartbeat_timeout_ms(struct super_block *sb, int ret, u

 	return 0;
 }
+static int verify_meta_reserve_blocks(struct super_block *sb, int ret, int val)
+{
+	/*
+	 *  Ideally we set a limit to something reasonable like 1/2 the actual
+	 * total_meta_blocks, but we can't yet get this info when mount is called
+	 */
+	if (ret < 0) {
+		scoutfs_err(sb, "failed to parse meta_reserve_blocks value");
+		return -EINVAL;
+	}
+	if (val < 0 || val > INT_MAX) {
+		scoutfs_err(sb, "invalid meta_reserve_blocks value %d, must be between 0 and %d",
+			    val, INT_MAX);
+		return -EINVAL;
+	}
+
+	return 0;
+}

 /*
 * Parse the option string into our options struct.   This can allocate
@@ -279,6 +303,14 @@ static int parse_options(struct super_block *sb, char *options, struct scoutfs_m
 			opts->quorum_slot_nr = nr;
 			break;

+		case Opt_meta_reserve_blocks:
+			ret = match_int(args, &nr);
+			ret = verify_meta_reserve_blocks(sb, ret, nr);
+			if (ret < 0)
+				return ret;
+			opts->meta_reserve_blocks = nr;
+			break;
+
 		default:
 			scoutfs_err(sb, "Unknown or malformed option, \"%s\"", p);
 			return -EINVAL;
@@ -371,6 +403,7 @@ int scoutfs_options_show(struct seq_file *seq, struct dentry *root)
 	seq_printf(seq, ",orphan_scan_delay_ms=%u", opts.orphan_scan_delay_ms);
 	if (opts.quorum_slot_nr >= 0)
 		seq_printf(seq, ",quorum_slot_nr=%d", opts.quorum_slot_nr);
+	seq_printf(seq, ".meta_reserve_blocks=%llu", opts.meta_reserve_blocks);

 	return 0;
 }
@@ -589,6 +622,17 @@ static ssize_t quorum_slot_nr_show(struct kobject *kobj, struct kobj_attribute *
 }
 SCOUTFS_ATTR_RO(quorum_slot_nr);

+static ssize_t meta_reserve_blocks_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
+{
+	struct super_block *sb = SCOUTFS_SYSFS_ATTRS_SB(kobj);
+	struct scoutfs_mount_options opts;
+
+	scoutfs_options_read(sb, &opts);
+
+	return snprintf(buf, PAGE_SIZE, "%lld\n", opts.meta_reserve_blocks);
+}
+SCOUTFS_ATTR_RO(meta_reserve_blocks);
+
 static struct attribute *options_attrs[] = {
 	SCOUTFS_ATTR_PTR(data_prealloc_blocks),
 	SCOUTFS_ATTR_PTR(data_prealloc_contig_only),
@@ -597,6 +641,7 @@ static struct attribute *options_attrs[] = {
 	SCOUTFS_ATTR_PTR(orphan_scan_delay_ms),
 	SCOUTFS_ATTR_PTR(quorum_heartbeat_timeout_ms),
 	SCOUTFS_ATTR_PTR(quorum_slot_nr),
+	SCOUTFS_ATTR_PTR(meta_reserve_blocks),
 	NULL,
 };

--- a/kmod/src/options.h
+++ b/kmod/src/options.h
@@ -13,6 +13,7 @@ struct scoutfs_mount_options {
 	unsigned int orphan_scan_delay_ms;
 	int quorum_slot_nr;
 	u64 quorum_heartbeat_timeout_ms;
+	u64 meta_reserve_blocks;
 };

 void scoutfs_options_read(struct super_block *sb, struct scoutfs_mount_options *opts);
--- a/kmod/src/server.c
+++ b/kmod/src/server.c
@@ -772,11 +772,14 @@ static int alloc_move_empty(struct super_block *sb,
 u64 scoutfs_server_reserved_meta_blocks(struct super_block *sb)
 {
 	DECLARE_SERVER_INFO(sb, server);
+	struct scoutfs_mount_options opts;
 	u64 server_blocks;
 	u64 client_blocks;
 	u64 log_blocks;
 	u64 nr_clients;

+	scoutfs_options_read(sb, &opts);
+
 	/* server has two meta_avail lists it swaps between */
 	server_blocks = SCOUTFS_SERVER_META_FILL_TARGET * 2;

@@ -801,7 +804,7 @@ u64 scoutfs_server_reserved_meta_blocks(struct super_block *sb)
 	nr_clients = server->nr_clients;
 	spin_unlock(&server->lock);

-	return server_blocks + (max(1ULL, nr_clients) * client_blocks);
+	return server_blocks + (max(1ULL, nr_clients) * client_blocks) + opts.meta_reserve_blocks;
 }

 /*
@@ -1299,10 +1302,12 @@ static int finalize_and_start_log_merge(struct super_block *sb, struct scoutfs_l
 * is nested inside holding commits so we recheck the persistent item
 * each time we commit to make sure it's still what we think.   The
 * caller is still going to send the item to the client so we update the
- * caller's each time we make progress.  If we hit an error applying the
- * changes we make then we can't send the log_trees to the client.
+ * caller's each time we make progress.  This is a best-effort attempt
+ * to clean up and it's valid to leave extents in data_freed we don't
+ * return errors to the caller.  The client will continue the work later
+ * in get_log_trees or as the rid is reclaimed.
 */
-static int try_drain_data_freed(struct super_block *sb, struct scoutfs_log_trees *lt)
+static void try_drain_data_freed(struct super_block *sb, struct scoutfs_log_trees *lt)
 {
 	DECLARE_SERVER_INFO(sb, server);
 	struct scoutfs_super_block *super = DIRTY_SUPER_SB(sb);
@@ -1311,7 +1316,6 @@ static int try_drain_data_freed(struct super_block *sb, struct scoutfs_log_trees
 	struct scoutfs_log_trees drain;
 	struct scoutfs_key key;
 	COMMIT_HOLD(hold);
-	bool apply = false;
 	int ret = 0;
 	int err;

@@ -1320,27 +1324,22 @@ static int try_drain_data_freed(struct super_block *sb, struct scoutfs_log_trees
 	while (lt->data_freed.total_len != 0) {
 		server_hold_commit(sb, &hold);
 		mutex_lock(&server->logs_mutex);
-		apply = true;

 		ret = find_log_trees_item(sb, &super->logs_root, false, rid, U64_MAX, &drain);
-		if (ret < 0) {
-			ret = 0;
+		if (ret < 0)
 			break;
-		}

 		/* careful to only keep draining the caller's specific open trans */
 		if (drain.nr != lt->nr || drain.get_trans_seq != lt->get_trans_seq ||
 		    drain.commit_trans_seq != lt->commit_trans_seq || drain.flags != lt->flags) {
-			ret = 0;
+			ret = -ENOENT;
 			break;
 		}

 		ret = scoutfs_btree_dirty(sb, &server->alloc, &server->wri,
 					  &super->logs_root, &key);
-		if (ret < 0) {
-			ret = 0;
+		if (ret < 0)
 			break;
-		}

 		/* moving can modify and return errors, always update caller and item */
 		mutex_lock(&server->alloc_mutex);
@@ -1356,19 +1355,19 @@ static int try_drain_data_freed(struct super_block *sb, struct scoutfs_log_trees
 		BUG_ON(err < 0); /* dirtying must guarantee success */

 		mutex_unlock(&server->logs_mutex);
+
 		ret = server_apply_commit(sb, &hold, ret);
-		apply = false;
-
-		if (ret < 0)
+		if (ret < 0) {
+			ret = 0; /* don't try to abort, ignoring ret */
 			break;
+		}
 	}

-	if (apply) {
+	/* try to cleanly abort and write any partial dirty btree blocks, but ignore result */
+	if (ret < 0) {
 		mutex_unlock(&server->logs_mutex);
-		server_apply_commit(sb, &hold, ret);
+		server_apply_commit(sb, &hold, 0);
 	}
-
-	return ret;
 }

 /*
@@ -1576,9 +1575,9 @@ out:
 		scoutfs_err(sb, "error %d getting log trees for rid %016llx: %s",
 			    ret, rid, err_str);

-	/* try to drain excessive data_freed with additional commits, if needed */
+	/* try to drain excessive data_freed with additional commits, if needed, ignoring err */
 	if (ret == 0)
-		ret = try_drain_data_freed(sb, &lt);
+		try_drain_data_freed(sb, &lt);

 	return scoutfs_net_response(sb, conn, cmd, id, ret, &lt, sizeof(lt));
 }
@@ -4153,7 +4152,7 @@ static void fence_pending_recov_worker(struct work_struct *work)
 	struct server_info *server = container_of(work, struct server_info,
 						  fence_pending_recov_work);
 	struct super_block *sb = server->sb;
-	union scoutfs_inet_addr addr = {{0,}};
+	union scoutfs_inet_addr addr;
 	u64 rid = 0;
 	int ret = 0;

--- a/kmod/src/trans.c
+++ b/kmod/src/trans.c
@@ -159,58 +159,6 @@ static bool drained_holders(struct trans_info *tri)
 	return holders == 0;
 }

-static int commit_current_log_trees(struct super_block *sb, char **str)
-{
-	DECLARE_TRANS_INFO(sb, tri);
-
-	return (*str = "data submit", scoutfs_inode_walk_writeback(sb, true)) ?:
-	       (*str = "item dirty", scoutfs_item_write_dirty(sb))  ?:
-	       (*str = "data prepare", scoutfs_data_prepare_commit(sb))  ?:
-	       (*str = "alloc prepare", scoutfs_alloc_prepare_commit(sb, &tri->alloc, &tri->wri)) ?:
-	       (*str = "meta write", scoutfs_block_writer_write(sb, &tri->wri))  ?:
-	       (*str = "data wait", scoutfs_inode_walk_writeback(sb, false)) ?:
-	       (*str = "commit log trees", commit_btrees(sb)) ?:
-	       scoutfs_item_write_done(sb);
-}
-
-static int get_next_log_trees(struct super_block *sb, char **str)
-{
-	return (*str = "get log trees", scoutfs_trans_get_log_trees(sb));
-}
-
-static int retry_forever(struct super_block *sb, int (*func)(struct super_block *sb, char **str))
-{
-	bool retrying = false;
-	char *str;
-	int ret;
-
-	do {
-		str = NULL;
-
-		ret = func(sb, &str);
-		if (ret < 0) {
-			if (!retrying) {
-				scoutfs_warn(sb, "critical transaction commit failure: %s = %d, retrying",
-					    str, ret);
-				retrying = true;
-			}
-
-			if (scoutfs_forcing_unmount(sb)) {
-				ret = -EIO;
-				break;
-			}
-
-			msleep(2 * MSEC_PER_SEC);
-
-		} else if (retrying) {
-			scoutfs_info(sb, "retried transaction commit succeeded");
-		}
-
-	} while (ret < 0);
-
-	return ret;
-}
-
 /*
 * This work func is responsible for writing out all the dirty blocks
 * that make up the current dirty transaction.  It prevents writers from
@@ -236,6 +184,8 @@ void scoutfs_trans_write_func(struct work_struct *work)
 	struct trans_info *tri = container_of(work, struct trans_info, write_work.work);
 	struct super_block *sb = tri->sb;
 	struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
+	bool retrying = false;
+	char *s = NULL;
 	int ret = 0;

 	tri->task = current;
@@ -264,9 +214,37 @@ void scoutfs_trans_write_func(struct work_struct *work)

 	scoutfs_inc_counter(sb, trans_commit_written);

-	/* retry {commit,get}_log_trees until they succeeed, can only fail when forcing unmount */
-	ret = retry_forever(sb, commit_current_log_trees) ?:
-	      retry_forever(sb, get_next_log_trees);
+	do {
+		ret = (s = "data submit", scoutfs_inode_walk_writeback(sb, true)) ?:
+		      (s = "item dirty", scoutfs_item_write_dirty(sb))  ?:
+		      (s = "data prepare", scoutfs_data_prepare_commit(sb))  ?:
+		      (s = "alloc prepare", scoutfs_alloc_prepare_commit(sb, &tri->alloc,
+									 &tri->wri))  ?:
+		      (s = "meta write", scoutfs_block_writer_write(sb, &tri->wri))  ?:
+		      (s = "data wait", scoutfs_inode_walk_writeback(sb, false)) ?:
+		      (s = "commit log trees", commit_btrees(sb)) ?:
+		      scoutfs_item_write_done(sb) ?:
+		      (s = "get log trees", scoutfs_trans_get_log_trees(sb));
+		if (ret < 0) {
+			if (!retrying) {
+				scoutfs_warn(sb, "critical transaction commit failure: %s = %d, retrying",
+					    s, ret);
+				retrying = true;
+			}
+
+			if (scoutfs_forcing_unmount(sb)) {
+				ret = -EIO;
+				break;
+			}
+
+			msleep(2 * MSEC_PER_SEC);
+
+		} else if (retrying) {
+			scoutfs_info(sb, "retried transaction commit succeeded");
+		}
+
+	} while (ret < 0);
+
 out:
 	spin_lock(&tri->write_lock);
 	tri->write_count++;
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -15,8 +15,7 @@ BIN := src/createmany			\
 	src/o_tmpfile_umask		\
 	src/o_tmpfile_linkat		\
 	src/mmap_stress			\
-	src/mmap_validate		\
-	src/walk_inodes_for_estale
+	src/mmap_validate

 DEPS := $(wildcard src/*.d)

--- a/tests/funcs/exec.sh
+++ b/tests/funcs/exec.sh
@@ -80,15 +80,3 @@ t_compare_output()
 {
 	"$@" >&7 2>&1
 }
-
-#
-# usually bash prints an annoying output message when jobs
-# are killed.  We can avoid that by redirecting stderr for
-# the bash process when it reaps the jobs that are killed.
-#
-t_silent_kill() {
-	exec {ERR}>&2 2>/dev/null
-	kill "$@"
-	wait "$@"
-	exec 2>&$ERR {ERR}>&-
-}
--- a/tests/funcs/filter.sh
+++ b/tests/funcs/filter.sh
@@ -160,9 +160,6 @@ t_filter_dmesg()
 	re="$re|Pipe handler or fully qualified core dump path required.*"
 	re="$re|Set kernel.core_pattern before fs.suid_dumpable.*"

-	# perf warning that it adjusted sample rate
-	re="$re|perf: interrupt took too long.*lowering kernel.perf_event_max_sample_rate.*"
-
 	egrep -v "($re)" | \
 		ignore_harmless_unwind_kasan_stack_oob
 }
--- a/tests/run-tests.sh
+++ b/tests/run-tests.sh
@@ -464,6 +464,7 @@ for i in $(seq 0 $((T_NR_MOUNTS - 1))); do
 	if [ "$i" -lt "$T_QUORUM" ]; then
 		opts="$opts,quorum_slot_nr=$i"
 	fi
+	opts="$opts,meta_reserve_blocks=0"
 	opts="${opts}${T_MNT_OPTIONS}"

 	msg "mounting $meta_dev|$data_dev on $dir"
@@ -532,15 +533,12 @@ for t in $tests; do
 	cmd rm -rf "$T_TMPDIR"
 	cmd mkdir -p "$T_TMPDIR"

-	# create a test name dir in the fs, clean up old data as needed
+	# create a test name dir in the fs
 	T_DS=""
 	for i in $(seq 0 $((T_NR_MOUNTS - 1))); do
 		dir="${T_M[$i]}/test/$test_name"

-		test $i == 0 && (
-			test -d "$dir" && cmd rm -rf "$dir"
-			cmd mkdir -p "$dir"
-		)
+		test $i == 0 && cmd mkdir -p "$dir"

 		eval T_D$i=$dir
 		T_D[$i]=$dir
--- a/tests/src/walk_inodes_for_estale.c
+++ b/tests/src/walk_inodes_for_estale.c
@@ -1,464 +0,0 @@
-
-/*
- * Copyright (C) 2025 Versity Software, Inc.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License v2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- */
-
-#ifndef _GNU_SOURCE
-#define _GNU_SOURCE
-#endif
-#include <errno.h>
-#include <fcntl.h>
-#include <inttypes.h>
-#include <limits.h>
-#include <signal.h>
-#include <stdbool.h>
-#include <stddef.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <time.h>
-#include <unistd.h>
-#include <linux/types.h>
-#include <sys/ioctl.h>
-#include <sys/stat.h>
-#include <sys/syscall.h>
-
-#include "ioctl.h"
-
-#define array_size(arr) (sizeof(arr) / sizeof(arr[0]))
-
-#define FILEID_SCOUTFS			0x81
-#define FILEID_SCOUTFS_WITH_PARENT	0x82
-
-static uint64_t meta_seq = 0;
-static bool sig_received = false;
-static bool tracing_on = false;
-static bool exit_on_current = false;
-static bool exiting = false;
-static uint64_t count = 0;
-
-struct our_handle {
-	struct file_handle handle;
-	/*
-	 * scoutfs file handle can be ino or ino/parent. The
-	 * handle_type field of struct file_handle denotes which
-	 * version is in use. We only use the ino variant here.
-	 */
-	__le64 scoutfs_ino;
-};
-
-static void exit_usage(void)
-{
-	printf(
-		" -e            exit once stable meta_seq has been reached\n"
-		" -m <string>   scoutfs mount path string for seq walk\n"
-		" -s <number>   start from meta_seq number, instead of 0\n"
-		);
-	exit(1);
-}
-
-static int write_at(int tracefd, char *path, char *val)
-{
-	int fd = -1;
-	int ret;
-
-	fd = openat(tracefd, path, O_TRUNC | O_RDWR);
-	if (fd < 0)
-		return errno;
-	ret = write(fd, val, strlen(val));
-	if (ret < 0)
-		ret = errno;
-
-	close(fd);
-	return 0;
-}
-
-static int do_trace(int fd, uint64_t ino)
-{
-	struct our_handle handle;
-	int tracefd = -1;
-	int targetfd = -1;
-	int outfd = -1;
-	int infd = -1;
-	char *pidstr;
-	char *name;
-	char *buf;
-	ssize_t bytes;
-	ssize_t written;
-	ssize_t off = 0;
-	unsigned long e = 0;
-	int ret;
-
-	if (asprintf(&pidstr, "%u", getpid()) < 0)
-		return ENOMEM;
-
-	if (asprintf(&name, "trace.scoutfs.open_by_handle_at.ino-%lu", ino) < 0)
-		return ENOMEM;
-
-	buf = malloc(4096);
-	if (!buf)
-		return ENOMEM;
-
-	handle.handle.handle_bytes = sizeof(struct our_handle);
-	handle.handle.handle_type = FILEID_SCOUTFS;
-	handle.scoutfs_ino = htole64(ino);
-
-	/* keep a quick dirfd around for easy writing sysfs files */
-	tracefd = open("/sys/kernel/debug/tracing", 0);
-	if (tracefd < 0)
-		return errno;
-
-	/* start tracing */
-	ret = write_at(tracefd, "current_tracer", "nop") ?:
-	      write_at(tracefd, "current_tracer", "function_graph") ?:
-	      write_at(tracefd, "set_ftrace_pid", pidstr) ?:
-	      write_at(tracefd, "tracing_on", "1");
-
-	tracing_on = true;
-
-	if (ret)
-		goto out;
-
-	targetfd = open_by_handle_at(fd, &handle.handle, O_RDWR);
-	e = errno;
-
-out:
-	/* turn off tracing first */
-	ret = write_at(tracefd, "tracing_on", "0");
-	if (ret)
-		return ret;
-
-	tracing_on = false;
-
-	if (targetfd != -1) {
-		close(targetfd);
-		return 0;
-	}
-
-	if (e == ESTALE) {
-		/* capture trace */
-		outfd = open(name, O_CREAT | O_TRUNC | O_RDWR, 0644);
-		if (outfd < 0) {
-			fprintf(stderr, "Error opening trace\n");
-			return errno;
-		}
-		infd = openat(tracefd, "trace", O_RDONLY);
-		if (infd < 0) {
-			fprintf(stderr, "Error opening trace output\n");
-			return errno;
-		}
-		for (;;) {
-			bytes = pread(infd, buf, 4096, off);
-			if (bytes < 0)
-				return errno;
-			if (bytes == 0)
-				break;
-			written = pwrite(outfd, buf, bytes, off);
-			if (written < 0)
-				return errno;
-			if (written != bytes)
-				return EIO;
-			off += bytes;
-		}
-		close(outfd);
-		close(infd);
-
-		fprintf(stderr, "Wrote \"%s\"\n", name);
-	}
-
-	/* cleanup */
-	ret = write_at(tracefd, "current_tracer", "nop");
-
-	free(pidstr);
-	free(name);
-	free(buf);
-	close(tracefd);
-	/* collect trace output */
-	return ret;
-}
-
-/*
- * lookup path for ino using ino_path
- */
-struct ino_args {
-	char *path;
-	__u64 ino;
-};
-
-static int do_resolve(int fd, uint64_t ino, char **path)
-{
-	struct scoutfs_ioctl_ino_path ioctl_args = {0};
-	struct scoutfs_ioctl_ino_path_result *res;
-	unsigned int result_bytes;
-	int ret;
-
-	result_bytes = offsetof(struct scoutfs_ioctl_ino_path_result,
-				path[PATH_MAX]);
-
-	res = malloc(result_bytes);
-	if (!res)
-		return ENOMEM;
-
-	ioctl_args.ino = ino;
-	ioctl_args.dir_ino = 0;
-	ioctl_args.dir_pos = 0;
-	ioctl_args.result_ptr = (intptr_t)res;
-	ioctl_args.result_bytes = result_bytes;
-
-	ret = ioctl(fd, SCOUTFS_IOC_INO_PATH, &ioctl_args);
-	if (ret < 0) {
-		if (errno == ENOENT) {
-			*path = NULL;
-			return 0;
-		}
-		return errno;
-	}
-
-	ret = asprintf(path, "%.*s", res->path_bytes, res->path);
-	if (ret <= 0)
-		return ENOMEM;
-
-	free(res);
-
-	return 0;
-}
-
-static int do_test_ino(int fd, uint64_t ino)
-{
-	struct our_handle handle = {{0}};
-	struct stat sb = {0};
-	char *path = NULL;
-	int targetfd = -1;
-	int ret;
-
-	/* filter: open_by_handle_at() must fail */
-	handle.handle.handle_bytes = sizeof(struct our_handle);
-	handle.handle.handle_type = FILEID_SCOUTFS;
-	handle.scoutfs_ino = htole64(ino);
-
-	targetfd = open_by_handle_at(fd, &handle.handle, O_RDWR);
-	if (targetfd != -1) {
-		close(targetfd);
-		return 0;
-	}
-
-	/* filter: errno must be ESTALE */
-	if (errno != ESTALE)
-		return 0;
-
-	/* filter: path resolution succeeds to an actual file entry */
-	ret = do_resolve(fd, ino, &path);
-	if (path == NULL)
-		return 0;
-	if (ret)
-		return ret;
-
-	/* filter: stat() must succeed on resolved path */
-	ret = fstatat(fd, path, &sb, AT_SYMLINK_NOFOLLOW);
-	free(path);
-	if (ret != 0) {
-		if (errno == ENOENT)
-			/* doesn't exist */
-			return 0;
-		return errno;
-	}
-
-	return do_trace(fd, ino);
-}
-
-static uint64_t do_get_meta_seq_stable(int fd)
-{
-	struct scoutfs_ioctl_stat_more stm;
-
-	if (ioctl(fd, SCOUTFS_IOC_STAT_MORE, &stm) < 0)
-		return errno;
-
-	return stm.meta_seq;
-}
-
-static int do_walk_seq(int fd)
-{
-	struct scoutfs_ioctl_walk_inodes_entry ents[128];
-	struct scoutfs_ioctl_walk_inodes walk = {{0}};
-	struct timespec ts;
-	time_t seconds;
-	int ret;
-	uint64_t total = 0;
-	uint64_t stable;
-	int i;
-	int j;
-
-	walk.index = SCOUTFS_IOC_WALK_INODES_META_SEQ;
-
-	/* make sure not to advance to stable meta_seq, we can just trail behind */
-	stable = do_get_meta_seq_stable(fd);
-	if (stable == 0)
-		return 0;
-	if (meta_seq >= stable - 1) {
-		if (exit_on_current)
-			exiting = true;
-		return 0;
-	}
-
-	meta_seq = meta_seq ? meta_seq + 1 : 0;
-
-	walk.first.major = meta_seq;
-	walk.first.minor = 0;
-	walk.first.ino = 0;
-
-	walk.last.major = stable - 1;
-	walk.last.minor = ~0;
-	walk.last.ino = ~0ULL;
-
-	walk.entries_ptr = (unsigned long)ents;
-	walk.nr_entries = array_size(ents);
-
-	clock_gettime(CLOCK_REALTIME, &ts);
-	seconds = ts.tv_sec;
-
-	for (j = 0;; j++) {
-		if (sig_received)
-			return 0;
-
-		ret = ioctl(fd, SCOUTFS_IOC_WALK_INODES, &walk);
-		if (ret < 0)
-			return ret;
-
-		if (ret == 0)
-			break;
-
-		for (i = 0; i < ret; i++) {
-			meta_seq = ents[i].major;
-			if (ents[i].ino == 1)
-				continue;
-
-			/* poke at it */
-			ret = do_test_ino(fd, ents[i].ino);
-
-			count++;
-
-			if (ret < 0)
-				return ret;
-		}
-
-		total += i;
-
-		walk.first = ents[i - 1];
-		if (++walk.first.ino == 0 && ++walk.first.minor == 0)
-			walk.first.major++;
-
-		/* yield once in a while */
-		if (j % 32 == 0) {
-			clock_gettime(CLOCK_REALTIME, &ts);
-			if (ts.tv_sec > seconds + 1)
-				break;
-		}
-	}
-
-	return 0;
-}
-
-void handle_signal(int sig)
-{
-	int tracefd = -1;
-
-	sig_received = true;
-
-	if (!tracing_on)
-		return;
-
-	tracefd = open("/sys/kernel/debug/tracing", 0);
-	write_at(tracefd, "tracing_on", "0");
-	close(tracefd);
-}
-
-int main(int argc, char **argv)
-{
-	char *mnt = NULL;
-	char c;
-	int mntfd;
-	int ret;
-
-	meta_seq = 0;
-
-	/* All we need is the mount point arg */
-	while ((c = getopt(argc, argv, "+em:s:")) != -1) {
-		switch (c) {
-			case 'e':
-				exit_on_current = true;
-				break;
-			case 'm':
-				mnt = strdup(optarg);
-				break;
-			case 's':
-				meta_seq = strtoull(optarg, NULL, 0);
-				break;
-			case '?':
-				printf("unknown argument: %c\n", optind);
-			case 'h':
-				exit_usage();
-		}
-	}
-
-	if (!mnt) {
-		fprintf(stderr, "Must provide a mount point with -m\n");
-		exit(EXIT_FAILURE);
-	}
-
-	if (meta_seq > 0)
-		fprintf(stdout, "Starting from meta_seq = %lu\n", meta_seq);
-
-	/* lower prio */
-	ret = nice(10);
-	if (ret == -1)
-		fprintf(stderr, "Error setting nice value\n");
-	ret = syscall(SYS_ioprio_set, 1, 0, 0); /* IOPRIO_WHO_PROCESS = 1, IOPRIO_PRIO_CLASS(IOPRIO_CLASS_IDLE) = 0 */
-	if (ret == -1)
-		fprintf(stderr, "Error setting ioprio value\n");
-
-	signal(SIGINT, handle_signal);
-	signal(SIGTERM, handle_signal);
-
-	for (;;) {
-		if (sig_received)
-			break;
-
-		mntfd = open(mnt, O_RDONLY);
-		if (mntfd == -1) {
-			perror("open(mntfd)");
-			exit(EXIT_FAILURE);
-		}
-
-		ret = do_walk_seq(mntfd);
-		/* handle unmounts? EAGAIN? */
-		if (ret)
-			break;
-
-		close(mntfd);
-
-		if (exiting)
-			break;
-
-		/* yield */
-		if (!sig_received)
-			sleep(5);
-	}
-
-	free(mnt);
-
-	fprintf(stdout, "Last meta_seq = %lu\n", meta_seq);
-
-	if (ret)
-		fprintf(stderr, "Error walking inodes: %s(%d)\n", strerror(errno), ret);
-
-	exit(ret);
-}
--- a/tests/tests/enospc.sh
+++ b/tests/tests/enospc.sh
@@ -88,11 +88,6 @@ rm -rf "$SCR/xattrs"

 echo "== make sure we can create again"
 file="$SCR/file-after"
-C=120
-while (( C-- )); do
-	touch $file 2> /dev/null && break
-	sleep 1
-done
 touch $file
 setfattr -n user.scoutfs-enospc -v 1 "$file"
 sync
--- a/tests/tests/lock-recover-invalidate.sh
+++ b/tests/tests/lock-recover-invalidate.sh
@@ -38,6 +38,6 @@ while [ "$SECONDS" -lt "$END" ]; do
 done

 echo "== stopping background load"
-t_silent_kill $load_pids
+kill $load_pids

 t_pass
--- a/tests/tests/orphan-inodes.sh
+++ b/tests/tests/orphan-inodes.sh
@@ -5,6 +5,18 @@
 t_require_commands sleep touch sync stat handle_cat kill rm
 t_require_mounts 2

+#
+# usually bash prints an annoying output message when jobs
+# are killed.  We can avoid that by redirecting stderr for
+# the bash process when it reaps the jobs that are killed.
+#
+silent_kill() {
+	exec {ERR}>&2 2>/dev/null
+	kill "$@"
+	wait "$@"
+	exec 2>&$ERR {ERR}>&-
+}
+
 #
 # We don't have a great way to test that inode items still exist.   We
 # don't prevent opening handles with nlink 0 today, so we'll use that.
@@ -40,7 +52,7 @@ inode_exists $ino || echo "$ino didn't exist"

 echo "== orphan from failed evict deletion is picked up"
 # pending kill signal stops evict from getting locks and deleting
-t_silent_kill $pid
+silent_kill $pid
 t_set_sysfs_mount_option 0 orphan_scan_delay_ms 1000
 sleep 5
 inode_exists $ino && echo "$ino still exists"
@@ -58,7 +70,7 @@ for nr in $(t_fs_nrs); do
 	rm -f "$path"
 done
 sync
-t_silent_kill $pids
+silent_kill $pids
 for nr in $(t_fs_nrs); do
 	t_force_umount $nr
 done
@@ -70,15 +82,7 @@ done
 # wait for orphan scans to run
 t_set_all_sysfs_mount_options orphan_scan_delay_ms 1000
 # also have to wait for delayed log merge work from mount
-C=120
-while (( C-- )); do
-	brk=1
-	for ino in $inos; do
-		inode_exists $ino && brk=0
-	done
-	test $brk -eq 1 && break
-	sleep 1
-done
+sleep 15
 for ino in $inos; do
 	inode_exists $ino && echo "$ino still exists"
 done
@@ -127,7 +131,7 @@ while [ $SECONDS -lt $END ]; do
 	done

 	# trigger eviction deletion of each file in each mount
-	t_silent_kill $pids
+	silent_kill $pids

 	wait || t_fail "handle_fsetxattr failed"