Test export-lookup-evict-race in a loop with tracing.

This test hits the unmount hang consistently in our CI the most, so run it in a tight loop and enable tracing. Discard traces when the run succeeded. This will blow up if a hung task timeout occurs, so we should crash on panic and then extract traces from the crash. Make sure we don't wait for an hour before doing so, then, too. Signed-off-by: Auke Kok <auke.kok@versity.com>
2026-05-01 10:25:43 +00:00 · 2025-12-10 14:22:05 -08:00
9 changed files with 18 additions and 142 deletions
--- a/kmod/src/server.c
+++ b/kmod/src/server.c
@@ -1618,8 +1618,7 @@ static int server_get_log_trees(struct super_block *sb,
 		goto update;
 	}

-	ret = alloc_move_empty(sb, &super->data_alloc, &lt.data_freed,
-			       COMMIT_HOLD_ALLOC_BUDGET / 2);
+	ret = alloc_move_empty(sb, &super->data_alloc, &lt.data_freed, 100);
 	if (ret == -EINPROGRESS)
 		ret = 0;
 	if (ret < 0) {
@@ -1914,11 +1913,9 @@ static int reclaim_open_log_tree(struct super_block *sb, u64 rid)
 	       scoutfs_alloc_splice_list(sb, &server->alloc, &server->wri, server->other_freed,
 					 &lt.meta_avail)) ?:
 	      (err_str = "empty data_avail",
-	       alloc_move_empty(sb, &super->data_alloc, &lt.data_avail,
-				COMMIT_HOLD_ALLOC_BUDGET / 2)) ?:
+	       alloc_move_empty(sb, &super->data_alloc, &lt.data_avail, 100)) ?:
 	      (err_str = "empty data_freed",
-	       alloc_move_empty(sb, &super->data_alloc, &lt.data_freed,
-				COMMIT_HOLD_ALLOC_BUDGET / 2));
+	       alloc_move_empty(sb, &super->data_alloc, &lt.data_freed, 100));
 	mutex_unlock(&server->alloc_mutex);

 	/* only finalize, allowing merging, once the allocators are fully freed */
--- a/tests/funcs/filter.sh
+++ b/tests/funcs/filter.sh
@@ -170,9 +170,6 @@ t_filter_dmesg()
 	# some ci test guests are unresponsive
 	re="$re|longest quorum heartbeat .* delay"

-	# creating block devices may trigger this
-	re="$re|block device autoloading is deprecated and will be removed."
-
 	egrep -v "($re)" | \
 		ignore_harmless_unwind_kasan_stack_oob
 }
--- a/tests/funcs/tap.sh
+++ b/tests/funcs/tap.sh
@@ -43,14 +43,9 @@ t_tap_progress()
 	local testname=$1
 	local result=$2

-	local stmsg=""
 	local diff=""
 	local dmsg=""

-	if [[ -s $T_RESULTS/tmp/${testname}/status.msg ]]; then
-		stmsg="1"
-	fi
-
 	if [[ -s "$T_RESULTS/tmp/${testname}/dmesg.new" ]]; then
 		dmsg="1"
 	fi
@@ -66,7 +61,6 @@ t_tap_progress()
 		echo "# ${testname} ** skipped - permitted **"
 	else
 		echo "not ok ${i} - ${testname}"
-
 		case ${result} in
 		101)
 			echo "# ${testname} ** skipped **"
@@ -76,13 +70,6 @@ t_tap_progress()
 			;;
 		esac

-		if [[ -n "${stmsg}" ]]; then
-			echo "#"
-			echo "# status:"
-			echo "#"
-			cat $T_RESULTS/tmp/${testname}/status.msg | sed 's/^/# - /'
-		fi
-
 		if [[ -n "${diff}" ]]; then
 			echo "#"
 			echo "# diff:"
--- a/tests/run-tests.sh
+++ b/tests/run-tests.sh
@@ -92,10 +92,14 @@ done
 T_TRACE_DUMP="0"
 T_TRACE_PRINTK="0"
 T_PORT_START="19700"
-T_LOOP_ITER="1"
+T_LOOP_ITER="100"

 # array declarations to be able to use array ops
 declare -a T_TRACE_GLOB
+T_TRACE_GLOB=( "scoutfs*" )
+
+# CI sets this to 3600, but, for this case we want it very short
+echo 30 > /proc/sys/kernel/hung_task_timeout_secs

 while true; do
 	case $1 in
@@ -493,6 +497,11 @@ crash_monitor()
 			bad=1
 		fi

+		if dmesg | grep -q "blocked for more than"; then
+			echo "run-tests monitor saw blocked task message"
+			bad=1
+		fi
+
 		if dmesg | grep -q "error indicated by fence action" ; then
 			echo "run-tests monitor saw fence agent error message"
 			bad=1
@@ -504,6 +513,8 @@ crash_monitor()
 		fi

 		if [ "$bad" != 0 ]; then
+			sync & # maybe this gets logs synced...
+			sleep .1
 			echo "run-tests monitor triggering crash"
 			echo c > /proc/sysrq-trigger
 			exit 1
@@ -706,6 +717,8 @@ for t in $tests; do
 		# stop looping if we didn't pass
 		if [ "$sts" != "$T_PASS_STATUS" ]; then
 			break;
+		else
+			echo > /sys/kernel/debug/tracing/trace
 		fi
 	done

--- a/tests/sequence
+++ b/tests/sequence
@@ -1,60 +1 @@
-export-get-name-parent.sh
-basic-block-counts.sh
-basic-bad-mounts.sh
-basic-posix-acl.sh
-inode-items-updated.sh
-simple-inode-index.sh
-simple-staging.sh
-simple-release-extents.sh
-simple-readdir.sh
-get-referring-entries.sh
-fallocate.sh
-basic-truncate.sh
-data-prealloc.sh
-setattr_more.sh
-offline-extent-waiting.sh
-move-blocks.sh
-projects.sh
-large-fragmented-free.sh
-format-version-forward-back.sh
-enospc.sh
-mmap.sh
-srch-safe-merge-pos.sh
-srch-basic-functionality.sh
-simple-xattr-unit.sh
-retention-basic.sh
-totl-xattr-tag.sh
-quota.sh
-lock-refleak.sh
-lock-shrink-consistency.sh
-lock-shrink-read-race.sh
-lock-pr-cw-conflict.sh
-lock-revoke-getcwd.sh
-lock-recover-invalidate.sh
 export-lookup-evict-race.sh
-createmany-parallel.sh
-createmany-large-names.sh
-createmany-rename-large-dir.sh
-stage-release-race-alloc.sh
-stage-multi-part.sh
-o_tmpfile.sh
-basic-posix-consistency.sh
-dirent-consistency.sh
-mkdir-rename-rmdir.sh
-lock-ex-race-processes.sh
-cross-mount-data-free.sh
-persistent-item-vers.sh
-setup-error-teardown.sh
-resize-devices.sh
-change-devices.sh
-fence-and-reclaim.sh
-quorum-heartbeat-timeout.sh
-orphan-inodes.sh
-mount-unmount-race.sh
-client-unmount-recovery.sh
-createmany-parallel-mounts.sh
-archive-light-cycle.sh
-block-stale-reads.sh
-inode-deletion.sh
-renameat2-noreplace.sh
-xfstests.sh
--- a/tests/tests/get-referring-entries.sh
+++ b/tests/tests/get-referring-entries.sh
@@ -72,7 +72,7 @@ touch $T_D0/dir/file
 mkdir $T_D0/dir/dir
 ln -s $T_D0/dir/file $T_D0/dir/symlink
 mknod $T_D0/dir/char c 1 3 # null
-mknod $T_D0/dir/block b 42 0 # SAMPLE block dev - nonexistant/demo use only number
+mknod $T_D0/dir/block b 7 0 # loop0
 for name in $(ls -UA $T_D0/dir | sort); do
 	ino=$(stat -c '%i' $T_D0/dir/$name)
 	$GRE $ino | filter_types
--- a/utils/scoutfs-utils.spec.in
+++ b/utils/scoutfs-utils.spec.in
@@ -4,12 +4,6 @@

 %{!?_release: %global _release 0.%{pkg_date}git%{pkg_git_hash}}

-%if 0%{?rhel} && 0%{?rhel} < 10
-%global tuned_profiles_dir %{_prefix}/lib/tuned
-%else
-%global tuned_profiles_dir %{_prefix}/lib/tuned/profiles
-%endif
-
 Name:           scoutfs-utils
 Summary:        scoutfs user space utilities
 Version:        %{pkg_version}
@@ -63,8 +57,6 @@ install -m 644 -D src/format.h $RPM_BUILD_ROOT%{_includedir}/scoutfs/format.h
 install -m 755 -D fenced/scoutfs-fenced $RPM_BUILD_ROOT%{_libexecdir}/scoutfs-fenced/scoutfs-fenced
 install -m 644 -D fenced/scoutfs-fenced.service $RPM_BUILD_ROOT%{_unitdir}/scoutfs-fenced.service
 install -m 644 -D fenced/scoutfs-fenced.conf.example $RPM_BUILD_ROOT%{_sysconfdir}/scoutfs/scoutfs-fenced.conf.example
-install -m 644 -D tuned/tuned.conf $RPM_BUILD_ROOT%{tuned_profiles_dir}/scoutfs/tuned.conf
-install -m 644 -D tuned/40-scoutfs.conf $RPM_BUILD_ROOT%{_prefix}/lib/tuned/recommend.d/40-scoutfs.conf

 %files
 %defattr(644,root,root,755)
@@ -74,8 +66,6 @@ install -m 644 -D tuned/40-scoutfs.conf $RPM_BUILD_ROOT%{_prefix}/lib/tuned/reco
 %defattr(755,root,root,755)
 %{_sbindir}/scoutfs
 %{_libexecdir}/scoutfs-fenced
-%{tuned_profiles_dir}/scoutfs/tuned.conf
-%{_prefix}/lib/tuned/recommend.d/40-scoutfs.conf

 %files -n scoutfs-devel
 %defattr(644,root,root,755)
--- a/utils/tuned/40-scoutfs.conf
+++ b/utils/tuned/40-scoutfs.conf
@@ -1,9 +0,0 @@
-#
-# scoutfs tuned recommendation
-#
-
-# If the system has support for mounting scoutfs filesystems, which is
-# valid for client mounts and quorum mounts. We then always recommend
-# the scoutfs profile.
-[scoutfs]
-/proc/filesystems=scoutfs
--- a/utils/tuned/tuned.conf
+++ b/utils/tuned/tuned.conf
@@ -1,40 +0,0 @@
-#
-# ScoutFS specific tuned profile
-#
-
-# The parameters below are a mix of settings present in the throughput-performance
-# profile as well as the latency-performance profile. Generally speaking, we
-# want to encourage the system to avoid swap and accumulating large amounts of
-# dirty data, as this can cause reclaim to lead to congestion.
-
-# Enable this profile with `$ sudo tuned-adm profile scoutfs`
-
-# linux default values are marked with [<value>] for reference.
-
-[main]
-summary=Optimize for production scoutfs deployment
-description=Configures the system for production scoutfs filesystem server deployment.
-
-# network-throughput sets some larger buffers useful for 40gbe deployments
-# network-throughput also inherits throughput-performance
-include=network-throughput
-
-[vm]
-# throughput-performance sets dirty_bytes to 40% (much larger than linux default), but
-# this allows the accumulation of large backlogs of writeback. We prefer to writeback
-# often and early to avoid congestion [20%]
-dirty_bytes = 10%
-# start writing back at this amount [10%]
-dirty_background_bytes = 5%
-
-[sysctl]
-# the kernel default is 60. Lower it to instruct the kernel that swapping is
-# expensive and we want to avoid it. We assume scoutfs deployments have ample
-# available RAM. [60]
-vm.swappiness = 10
-
-# increase pdflush runs so it can more aggressively write out dirty data [500]
-vm.dirty_writeback_centisecs = 300
-
-# decrease time dirty data will linger before being written back [3000]
-vm.dirty_expire_centisecs = 2000