diff --git a/kmod/src/server.c b/kmod/src/server.c index ea9a0746..e4791980 100644 --- a/kmod/src/server.c +++ b/kmod/src/server.c @@ -2023,7 +2023,7 @@ static int reclaim_open_log_tree(struct super_block *sb, u64 rid) mutex_unlock(&server->alloc_mutex); /* only finalize, allowing merging, once the allocators are fully freed */ - if (ret == 0) { + if (ret == 0 && !scoutfs_trigger(sb, RECLAIM_SKIP_FINALIZE)) { /* the transaction is no longer open */ lt.commit_trans_seq = lt.get_trans_seq; diff --git a/kmod/src/triggers.c b/kmod/src/triggers.c index 317f0911..4616d013 100644 --- a/kmod/src/triggers.c +++ b/kmod/src/triggers.c @@ -45,6 +45,7 @@ static char *names[] = { [SCOUTFS_TRIGGER_SRCH_FORCE_LOG_ROTATE] = "srch_force_log_rotate", [SCOUTFS_TRIGGER_SRCH_MERGE_STOP_SAFE] = "srch_merge_stop_safe", [SCOUTFS_TRIGGER_STATFS_LOCK_PURGE] = "statfs_lock_purge", + [SCOUTFS_TRIGGER_RECLAIM_SKIP_FINALIZE] = "reclaim_skip_finalize", }; bool scoutfs_trigger_test_and_clear(struct super_block *sb, unsigned int t) diff --git a/kmod/src/triggers.h b/kmod/src/triggers.h index eeb33b49..64a798b7 100644 --- a/kmod/src/triggers.h +++ b/kmod/src/triggers.h @@ -8,6 +8,7 @@ enum scoutfs_trigger { SCOUTFS_TRIGGER_SRCH_FORCE_LOG_ROTATE, SCOUTFS_TRIGGER_SRCH_MERGE_STOP_SAFE, SCOUTFS_TRIGGER_STATFS_LOCK_PURGE, + SCOUTFS_TRIGGER_RECLAIM_SKIP_FINALIZE, SCOUTFS_TRIGGER_NR, }; diff --git a/tests/funcs/filter.sh b/tests/funcs/filter.sh index 3dc593c8..cb766300 100644 --- a/tests/funcs/filter.sh +++ b/tests/funcs/filter.sh @@ -123,6 +123,9 @@ t_filter_dmesg() re="$re|hrtimer: interrupt took .*" re="$re|clocksource: Long readout interval" + # orphan log trees reclaim is handled, not an error + re="$re|scoutfs .* reclaiming orphan log trees" + # fencing tests force unmounts and trigger timeouts re="$re|scoutfs .* forcing unmount" re="$re|scoutfs .* reconnect timed out" diff --git a/tests/golden/orphan-log-trees b/tests/golden/orphan-log-trees new file mode 100644 index 00000000..08003b10 --- /dev/null +++ b/tests/golden/orphan-log-trees @@ -0,0 +1,3 @@ +== create orphan log_trees entry via trigger +== verify orphan is reclaimed and merge completes +== verify orphan reclaim was logged diff --git a/tests/sequence b/tests/sequence index 1b6a74f3..e296553a 100644 --- a/tests/sequence +++ b/tests/sequence @@ -50,6 +50,7 @@ setup-error-teardown.sh resize-devices.sh change-devices.sh fence-and-reclaim.sh +orphan-log-trees.sh quorum-heartbeat-timeout.sh orphan-inodes.sh mount-unmount-race.sh diff --git a/tests/tests/orphan-log-trees.sh b/tests/tests/orphan-log-trees.sh new file mode 100644 index 00000000..14927eb6 --- /dev/null +++ b/tests/tests/orphan-log-trees.sh @@ -0,0 +1,52 @@ +# +# Test that orphaned log_trees entries from unmounted rids are +# finalized and merged. +# +# An orphan log_trees entry is one whose rid has no mounted_clients +# entry. This can happen from incomplete reclaim across server +# failovers. We simulate it with the reclaim_skip_finalize trigger +# which makes reclaim_open_log_tree skip the finalization step. +# + +t_require_commands touch scoutfs +t_require_mounts 2 + +TIMEOUT=90 + +echo "== create orphan log_trees entry via trigger" +sv=$(t_server_nr) +cl=$(t_first_client_nr) +rid=$(t_mount_rid $cl) + +touch "$T_D0/file" "$T_D1/file" +sync + +# arm the trigger so reclaim skips finalization +t_trigger_arm_silent reclaim_skip_finalize $sv + +# force unmount the client, server will fence and reclaim it +# but the trigger makes reclaim leave log_trees unfinalized +t_force_umount $cl + +# wait for fencing to run +verify_fenced() { + grep -q "running rid '$rid'" "$T_FENCED_LOG" 2>/dev/null +} +t_wait_until_timeout $TIMEOUT verify_fenced + +# give the server time to complete reclaim after fence +sleep 5 + +# remount the client so t_force_log_merge can sync all mounts. +# the client gets a new rid; the old rid's log_trees is the orphan. +t_mount $cl + +echo "== verify orphan is reclaimed and merge completes" +t_force_log_merge + +echo "== verify orphan reclaim was logged" +if ! dmesg | grep -q "reclaiming orphan log trees for rid $rid"; then + t_fail "expected orphan reclaim message for rid $rid in dmesg" +fi + +t_pass