mirror of
https://github.com/versity/scoutfs.git
synced 2026-01-05 03:44:05 +00:00
The fence script we use for our single node multi-mount tests only knows how to fence by using forced unmount to destroy a mount. As of now, the tests only generate failing nodes that need to be fenced by using forced unmount as well. This results in the awkward situation where the testing fence script doesn't have anything to do because the mount is already gone. When the test fence script has nothing to do we might not notice if it isn't run. This adds explicit verification to the fencing tests that the script was really run. It adds per-invocation logging to the fence script and the test makes sure that it was run. While we're at it, we take the opportunity to tidy up some of the scripting around this. We use a sysfs file with the data device major:minor numbers so that the fencing script can find and unmount mounts without having to ask them for their rid. They may not be operational. Signed-off-by: Zach Brown <zab@versity.com>
108 lines
2.4 KiB
Bash
Executable File
108 lines
2.4 KiB
Bash
Executable File
#!/usr/bin/bash
|
|
|
|
message_output()
|
|
{
|
|
printf "[%s] %s\n" "$(date '+%F %T.%N')" "$@"
|
|
}
|
|
|
|
error_message()
|
|
{
|
|
message_output "$@" >&2
|
|
}
|
|
|
|
error_exit()
|
|
{
|
|
error_message "$@, exiting"
|
|
exit 1
|
|
}
|
|
|
|
log_message()
|
|
{
|
|
message_output "$@"
|
|
}
|
|
|
|
# restart if we catch hup to re-read the config
|
|
hup_restart()
|
|
{
|
|
log_message "caught SIGHUP, restarting"
|
|
exec "$@"
|
|
}
|
|
trap hup_restart SIGHUP
|
|
|
|
# defaults
|
|
SCOUTFS_FENCED_CONFIG_FILE=${SCOUTFS_FENCED_CONFIG_FILE:-/etc/scoutfs/scoutfs-fenced.conf}
|
|
SCOUTFS_FENCED_DELAY=2
|
|
#SCOUTFS_FENCED_RUN
|
|
#SCOUTFS_FENCED_RUN_ARGS
|
|
|
|
test -n "$SCOUTFS_FENCED_CONFIG_FILE" || \
|
|
error_exit "SCOUTFS_FENCED_CONFIG_FILE isn't set"
|
|
test -r "$SCOUTFS_FENCED_CONFIG_FILE" || \
|
|
error_exit "SCOUTFS_FENCED_CONFIG_FILE isn't readable file"
|
|
|
|
log_message "reading config file $SCOUTFS_FENCED_CONFIG_FILE"
|
|
|
|
. "$SCOUTFS_FENCED_CONFIG_FILE" || \
|
|
error_exit "error sourcing $SCOUTFS_FENCED_CONFIG_FILE as bash script"
|
|
|
|
for conf in "${!SCOUTFS_FENCED_@}"; do
|
|
log_message " config var $conf=${!conf}"
|
|
done
|
|
|
|
test -n "$SCOUTFS_FENCED_RUN" || \
|
|
error_exit "SCOUTFS_FENCED_RUN must be set"
|
|
test -x "$SCOUTFS_FENCED_RUN" || \
|
|
error_exit "SCOUTFS_FENCED_RUN '$SCOUTFS_FENCED_RUN' isn't executable"
|
|
|
|
#
|
|
# Main loop watching for fence request across all filesystems. The
|
|
# server can shut down without waiting for pending fence requests to
|
|
# finish. All of the interaction with the fence directory and files can
|
|
# fail at any moment. We will generate log messages when the dir or
|
|
# files disappear.
|
|
#
|
|
|
|
# generate failure messages to stderr while still echoing 0 for the caller
|
|
careful_cat()
|
|
{
|
|
local path="$@"
|
|
|
|
cat "$@" || echo 0
|
|
}
|
|
|
|
while sleep $SCOUTFS_FENCED_DELAY; do
|
|
for fence in /sys/fs/scoutfs/*/fence/*; do
|
|
# catches unmatched regex when no dirs
|
|
if [ ! -d "$fence" ]; then
|
|
continue
|
|
fi
|
|
|
|
# skip requests that have been handled
|
|
if [ "$(careful_cat $fence/fenced)" == 1 -o \
|
|
"$(careful_cat $fence/error)" == 1 ]; then
|
|
continue
|
|
fi
|
|
|
|
srv=$(basename $(dirname $(dirname $fence)))
|
|
rid="$(cat $fence/rid)"
|
|
ip="$(cat $fence/ipv4_addr)"
|
|
reason="$(cat $fence/reason)"
|
|
|
|
log_message "server $srv fencing rid $rid at IP $ip for $reason"
|
|
|
|
# export _REQ_ vars for run to use
|
|
export SCOUTFS_FENCED_REQ_RID="$rid"
|
|
export SCOUTFS_FENCED_REQ_IP="$ip"
|
|
|
|
$SCOUTFS_FENCED_RUN $SCOUTFS_FENCED_RUN_ARGS
|
|
rc=$?
|
|
if [ "$rc" != 0 ]; then
|
|
log_message "server $srv fencing rid $rid saw error status $rc"
|
|
echo 1 > "$fence/error"
|
|
continue
|
|
fi
|
|
|
|
echo 1 > "$fence/fenced"
|
|
done
|
|
done
|