Add orphan-log-trees test and reclaim_skip_finalize trigger

Add a reclaim_skip_finalize trigger that prevents reclaim from
setting FINALIZED on log_trees entries.  The test arms this trigger,
force-unmounts a client to create an orphan, and verifies the log
merge succeeds without timeout and the orphan reclaim message
appears in dmesg.

Signed-off-by: Auke Kok <auke.kok@versity.com>
This commit is contained in:
Auke Kok
2026-03-25 06:23:40 -07:00
parent daea8d5bc1
commit 8a730464ab
7 changed files with 62 additions and 1 deletions

View File

@@ -2023,7 +2023,7 @@ static int reclaim_open_log_tree(struct super_block *sb, u64 rid)
mutex_unlock(&server->alloc_mutex);
/* only finalize, allowing merging, once the allocators are fully freed */
if (ret == 0) {
if (ret == 0 && !scoutfs_trigger(sb, RECLAIM_SKIP_FINALIZE)) {
/* the transaction is no longer open */
lt.commit_trans_seq = lt.get_trans_seq;

View File

@@ -45,6 +45,7 @@ static char *names[] = {
[SCOUTFS_TRIGGER_SRCH_FORCE_LOG_ROTATE] = "srch_force_log_rotate",
[SCOUTFS_TRIGGER_SRCH_MERGE_STOP_SAFE] = "srch_merge_stop_safe",
[SCOUTFS_TRIGGER_STATFS_LOCK_PURGE] = "statfs_lock_purge",
[SCOUTFS_TRIGGER_RECLAIM_SKIP_FINALIZE] = "reclaim_skip_finalize",
};
bool scoutfs_trigger_test_and_clear(struct super_block *sb, unsigned int t)

View File

@@ -8,6 +8,7 @@ enum scoutfs_trigger {
SCOUTFS_TRIGGER_SRCH_FORCE_LOG_ROTATE,
SCOUTFS_TRIGGER_SRCH_MERGE_STOP_SAFE,
SCOUTFS_TRIGGER_STATFS_LOCK_PURGE,
SCOUTFS_TRIGGER_RECLAIM_SKIP_FINALIZE,
SCOUTFS_TRIGGER_NR,
};

View File

@@ -123,6 +123,9 @@ t_filter_dmesg()
re="$re|hrtimer: interrupt took .*"
re="$re|clocksource: Long readout interval"
# orphan log trees reclaim is handled, not an error
re="$re|scoutfs .* reclaiming orphan log trees"
# fencing tests force unmounts and trigger timeouts
re="$re|scoutfs .* forcing unmount"
re="$re|scoutfs .* reconnect timed out"

View File

@@ -0,0 +1,3 @@
== create orphan log_trees entry via trigger
== verify orphan is reclaimed and merge completes
== verify orphan reclaim was logged

View File

@@ -50,6 +50,7 @@ setup-error-teardown.sh
resize-devices.sh
change-devices.sh
fence-and-reclaim.sh
orphan-log-trees.sh
quorum-heartbeat-timeout.sh
orphan-inodes.sh
mount-unmount-race.sh

View File

@@ -0,0 +1,52 @@
#
# Test that orphaned log_trees entries from unmounted rids are
# finalized and merged.
#
# An orphan log_trees entry is one whose rid has no mounted_clients
# entry. This can happen from incomplete reclaim across server
# failovers. We simulate it with the reclaim_skip_finalize trigger
# which makes reclaim_open_log_tree skip the finalization step.
#
t_require_commands touch scoutfs
t_require_mounts 2
TIMEOUT=90
echo "== create orphan log_trees entry via trigger"
sv=$(t_server_nr)
cl=$(t_first_client_nr)
rid=$(t_mount_rid $cl)
touch "$T_D0/file" "$T_D1/file"
sync
# arm the trigger so reclaim skips finalization
t_trigger_arm_silent reclaim_skip_finalize $sv
# force unmount the client, server will fence and reclaim it
# but the trigger makes reclaim leave log_trees unfinalized
t_force_umount $cl
# wait for fencing to run
verify_fenced() {
grep -q "running rid '$rid'" "$T_FENCED_LOG" 2>/dev/null
}
t_wait_until_timeout $TIMEOUT verify_fenced
# give the server time to complete reclaim after fence
sleep 5
# remount the client so t_force_log_merge can sync all mounts.
# the client gets a new rid; the old rid's log_trees is the orphan.
t_mount $cl
echo "== verify orphan is reclaimed and merge completes"
t_force_log_merge
echo "== verify orphan reclaim was logged"
if ! dmesg | grep -q "reclaiming orphan log trees for rid $rid"; then
t_fail "expected orphan reclaim message for rid $rid in dmesg"
fi
t_pass