diff --git a/tests/fenced-local-force-unmount.sh b/tests/fenced-local-force-unmount.sh index 9d97a79b..f5553be1 100755 --- a/tests/fenced-local-force-unmount.sh +++ b/tests/fenced-local-force-unmount.sh @@ -1,5 +1,18 @@ #!/usr/bin/bash +# +# This fencing script is used for testing clusters of multiple mounts on +# a single host. It finds mounts to fence by looking for their rids and +# only knows how to "fence" by using forced unmount. +# + +echo "$0 running rid '$SCOUTFS_FENCED_REQ_RID' ip '$SCOUTFS_FENCED_REQ_IP' args '$@'" + +log() { + echo "$@" > /dev/stderr + exit 1 +} + echo_fail() { echo "$@" > /dev/stderr exit 1 @@ -7,29 +20,24 @@ echo_fail() { rid="$SCOUTFS_FENCED_REQ_RID" -# -# Look for a local mount with the rid to fence. Typically we'll at -# least find the mount with the server that requested the fence that -# we're processing. But it's possible that mounts are unmounted -# before, or while, we're running. -# -mnts=$(findmnt -l -n -t scoutfs -o TARGET) || \ - echo_fail "findmnt -t scoutfs failed" > /dev/stderr +for fs in /sys/fs/scoutfs/*; do + [ ! -d "$fs" ] && continue -for mnt in $mnts; do - mnt_rid=$(scoutfs statfs -p "$mnt" -s rid) || \ - echo_fail "scoutfs statfs $mnt failed" - - if [ "$mnt_rid" == "$rid" ]; then - umount -f "$mnt" || \ - echo_fail "umout -f $mnt" - - exit 0 + fs_rid="$(cat $fs/rid)" || \ + echo_fail "failed to get rid in $fs" + if [ "$fs_rid" != "$rid" ]; then + continue fi + + nr="$(cat $fs/data_device_maj_min)" || \ + echo_fail "failed to get data device major:minor in $fs" + + mnts=$(findmnt -l -n -t scoutfs -o TARGET -S $nr) || \ + echo_fail "findmnt -t scoutfs -S $nr failed" + for mnt in $mnts; do + umount -f "$mnt" || \ + echo_fail "umout -f $mnt failed" + done done -# -# If the mount doesn't exist on this host then it can't access the -# devices by definition and can be considered fenced. -# exit 0 diff --git a/tests/run-tests.sh b/tests/run-tests.sh index 5f826474..a8aa02af 100755 --- a/tests/run-tests.sh +++ b/tests/run-tests.sh @@ -380,13 +380,14 @@ cmd grep . /sys/kernel/debug/tracing/options/trace_printk \ # Build a fenced config that runs scripts out of the repository rather # than the default system directory # -conf="$T_RESULTS/scoutfs-fencd.conf" +conf="$T_RESULTS/scoutfs-fenced.conf" cat > $conf << EOF SCOUTFS_FENCED_DELAY=1 SCOUTFS_FENCED_RUN=$T_TESTS/fenced-local-force-unmount.sh -SCOUTFS_FENCED_RUN_ARGS="" +SCOUTFS_FENCED_RUN_ARGS="ignored run args" EOF export SCOUTFS_FENCED_CONFIG_FILE="$conf" +T_FENCED_LOG="$T_RESULTS/fenced.log" # # Run the agent in the background, log its output, an kill it if we @@ -394,7 +395,7 @@ export SCOUTFS_FENCED_CONFIG_FILE="$conf" # fenced_log() { - echo "[$(timestamp)] $*" >> "$T_RESULTS/fenced.stdout.log" + echo "[$(timestamp)] $*" >> "$T_FENCED_LOG" } fenced_pid="" kill_fenced() @@ -405,7 +406,7 @@ kill_fenced() fi } trap kill_fenced EXIT -$T_UTILS/fenced/scoutfs-fenced > "$T_RESULTS/fenced.stdout.log" 2> "$T_RESULTS/fenced.stderr.log" & +$T_UTILS/fenced/scoutfs-fenced > "$T_FENCED_LOG" 2>&1 & fenced_pid=$! fenced_log "started fenced pid $fenced_pid in the background" diff --git a/tests/tests/fence-and-reclaim.sh b/tests/tests/fence-and-reclaim.sh index 1ce52048..1fe1ad2e 100644 --- a/tests/tests/fence-and-reclaim.sh +++ b/tests/tests/fence-and-reclaim.sh @@ -45,6 +45,18 @@ check_read_write() fi } +# verify that fenced ran our testing fence script +verify_fenced_run() +{ + local rids="$@" + local rid + + for rid in $rids; do + grep -q ".* running rid '$rid'.* args 'ignored run args'" "$T_FENCED_LOG" || \ + t_fail "fenced didn't execute RUN script for rid $rid" + done +} + echo "== make sure all mounts can see each other" check_read_write @@ -62,12 +74,14 @@ done while t_rid_is_fencing $rid; do sleep .5 done +verify_fenced_run $rid t_mount $cl check_read_write echo "== force unmount all non-server, connection timeout, fence nop, mount" sv=$(t_server_nr) pattern="nonsense" +rids="" sync for cl in $(t_fs_nrs); do if [ $cl == $sv ]; then @@ -75,6 +89,7 @@ for cl in $(t_fs_nrs); do fi rid=$(t_mount_rid $cl) + rids="$rids $rid" pattern="$pattern|$rid" echo "cl $cl sv $sv rid $rid" >> "$T_TMP.log" @@ -89,6 +104,7 @@ done while test -d $(echo /sys/fs/scoutfs/*/fence/* | cut -d " " -f 1); do sleep .5 done +verify_fenced_run $rids # remount all the clients for cl in $(t_fs_nrs); do if [ $cl == $sv ]; then @@ -109,11 +125,17 @@ t_wait_for_leader while t_rid_is_fencing $rid; do sleep .5 done +verify_fenced_run $rid t_mount $sv check_read_write echo "== force unmount everything, new server fences all previous" sync +rids="" +# get rids before forced unmount breaks scoutfs statfs +for nr in $(t_fs_nrs); do + rids="$rids $(t_mount_rid $nr)" +done for nr in $(t_fs_nrs); do t_force_umount $nr done @@ -122,6 +144,7 @@ t_mount_all while test -d $(echo /sys/fs/scoutfs/*/fence/* | cut -d " " -f 1); do sleep .5 done +verify_fenced_run $rids check_read_write t_pass diff --git a/utils/fenced/scoutfs-fenced b/utils/fenced/scoutfs-fenced index 6e53099a..fa866e25 100755 --- a/utils/fenced/scoutfs-fenced +++ b/utils/fenced/scoutfs-fenced @@ -55,9 +55,21 @@ test -x "$SCOUTFS_FENCED_RUN" || \ error_exit "SCOUTFS_FENCED_RUN '$SCOUTFS_FENCED_RUN' isn't executable" # -# main loop watching for fence request across all filesystems +# Main loop watching for fence request across all filesystems. The +# server can shut down without waiting for pending fence requests to +# finish. All of the interaction with the fence directory and files can +# fail at any moment. We will generate log messages when the dir or +# files disappear. # +# generate failure messages to stderr while still echoing 0 for the caller +careful_cat() +{ + local path="$@" + + cat "$@" || echo 0 +} + while sleep $SCOUTFS_FENCED_DELAY; do for fence in /sys/fs/scoutfs/*/fence/*; do # catches unmatched regex when no dirs @@ -66,7 +78,8 @@ while sleep $SCOUTFS_FENCED_DELAY; do fi # skip requests that have been handled - if [ $(cat "$fence/fenced") == 1 -o $(cat "$fence/error") == 1 ]; then + if [ "$(careful_cat $fence/fenced)" == 1 -o \ + "$(careful_cat $fence/error)" == 1 ]; then continue fi @@ -81,10 +94,10 @@ while sleep $SCOUTFS_FENCED_DELAY; do export SCOUTFS_FENCED_REQ_RID="$rid" export SCOUTFS_FENCED_REQ_IP="$ip" - $run $SCOUTFS_FENCED_RUN_ARGS + $SCOUTFS_FENCED_RUN $SCOUTFS_FENCED_RUN_ARGS rc=$? if [ "$rc" != 0 ]; then - log_message "server $srv fencing rid $rid saw error status $rc from $run" + log_message "server $srv fencing rid $rid saw error status $rc" echo 1 > "$fence/error" continue fi