diff --git a/tests/run-tests.sh b/tests/run-tests.sh index cbfad39e..cace8dfb 100755 --- a/tests/run-tests.sh +++ b/tests/run-tests.sh @@ -39,6 +39,20 @@ cmd() { die "cmd failed (check the run.log)" } +# we can record pids to kill as we exit, we kill in reverse added order +declare -a atexit_kill_pids +atexit_kill() +{ + local pid + + for pid in $(echo ${atexit_kill_pids[*]} | rev); do + if test -e "/proc/$pid/status" ; then + kill "$pid" + fi + done +} +trap atexit_kill EXIT + show_help() { cat << EOF @@ -451,26 +465,44 @@ EOF export SCOUTFS_FENCED_CONFIG_FILE="$conf" T_FENCED_LOG="$T_RESULTS/fenced.log" -# -# Run the agent in the background, log its output, an kill it if we -# exit -# -fenced_log() -{ - echo "[$(timestamp)] $*" >> "$T_FENCED_LOG" -} -fenced_pid="" -kill_fenced() -{ - if test -n "$fenced_pid" -a -d "/proc/$fenced_pid" ; then - fenced_log "killing fenced pid $fenced_pid" - kill "$fenced_pid" - fi -} -trap kill_fenced EXIT $T_UTILS/fenced/scoutfs-fenced > "$T_FENCED_LOG" 2>&1 & fenced_pid=$! -fenced_log "started fenced pid $fenced_pid in the background" +atexit_kill_pids+=($fenced_pid) + +# +# some critical failures will cause fs operations to hang. We can watch +# for evidence of them and cause the system to crash, at least. +# +crash_monitor() +{ + local bad=0 + + while sleep 1; do + if dmesg | grep -q "inserting extent.*overlaps existing"; then + echo "run-tests monitor saw overlapping extent message" + bad=1 + fi + + if dmesg | grep -q "error indicated by fence action" ; then + echo "run-tests monitor saw fence agent error message" + bad=1 + fi + + if [ ! -e "/proc/${fenced_pid}/status" ]; then + echo "run-tests monitor didn't see fenced pid $fenced_pid /proc dir" + bad=1 + fi + + if [ "$bad" != 0 ]; then + echo "run-tests monitor triggering crash" + echo c > /proc/sysrq-trigger + # bg function doesn't reload bash, $$ is parent run-tests.sh + kill -9 $$ + fi + done +} +crash_monitor & +atexit_kill_pids+=($!) # setup dm tables echo "0 $(blockdev --getsz $T_META_DEVICE) linear $T_META_DEVICE 0" > \