mirror of
https://github.com/versity/scoutfs.git
synced 2025-12-23 05:25:18 +00:00
Handle disappearing fencing requests and targets
The userspace fencing process wasn't careful about handling underlying directories that disappear while it was working. On the server/fenced side, fencing requests can linger after they've been resolved by writing 1 to fenced or error. The script could come back around to see the directory before the server finally removes it, causing all later uses of the request dir to fail. We saw this in the logs as a bunch of cat errors for the various request files. On the local fence script side, all the mounts can be in the process of being unmounted so both the /sys/fs dirs and the mount it self can be removed while we're working. For both, when we're working with the /sys/fs files we read them without logging errors and then test that the dir still exists before using what we read. When fencing a mount, we stop if findmnt doesn't find the mount and then raise a umount error if the /sys/fs dir exists after umount fails. And while we're at it, we have each scripts logging append instead of truncating (if, say, it's a log file instead of an interactive tty). Signed-off-by: Zach Brown <zab@versity.com>
This commit is contained in:
@@ -8,36 +8,34 @@
|
||||
|
||||
echo "$0 running rid '$SCOUTFS_FENCED_REQ_RID' ip '$SCOUTFS_FENCED_REQ_IP' args '$@'"
|
||||
|
||||
log() {
|
||||
echo "$@" > /dev/stderr
|
||||
echo_fail() {
|
||||
echo "$@" >> /dev/stderr
|
||||
exit 1
|
||||
}
|
||||
|
||||
echo_fail() {
|
||||
echo "$@" > /dev/stderr
|
||||
exit 1
|
||||
# silence error messages
|
||||
quiet_cat()
|
||||
{
|
||||
cat "$@" 2>/dev/null
|
||||
}
|
||||
|
||||
rid="$SCOUTFS_FENCED_REQ_RID"
|
||||
|
||||
shopt -s nullglob
|
||||
for fs in /sys/fs/scoutfs/*; do
|
||||
[ ! -d "$fs" ] && continue
|
||||
fs_rid="$(quiet_cat $fs/rid)"
|
||||
nr="$(quiet_cat $fs/data_device_maj_min)"
|
||||
[ ! -d "$fs" -o "$fs_rid" != "$rid" ] && continue
|
||||
|
||||
fs_rid="$(cat $fs/rid)" || \
|
||||
echo_fail "failed to get rid in $fs"
|
||||
if [ "$fs_rid" != "$rid" ]; then
|
||||
continue
|
||||
fi
|
||||
|
||||
nr="$(cat $fs/data_device_maj_min)" || \
|
||||
echo_fail "failed to get data device major:minor in $fs"
|
||||
|
||||
mnts=$(findmnt -l -n -t scoutfs -o TARGET -S $nr) || \
|
||||
mnt=$(findmnt -l -n -t scoutfs -o TARGET -S $nr) || \
|
||||
echo_fail "findmnt -t scoutfs -S $nr failed"
|
||||
for mnt in $mnts; do
|
||||
umount -f "$mnt" || \
|
||||
echo_fail "umout -f $mnt failed"
|
||||
done
|
||||
[ -z "$mnt" ] && continue
|
||||
|
||||
if ! umount -qf "$mnt"; then
|
||||
if [ -d "$fs" ]; then
|
||||
echo_fail "umount -qf $mnt failed"
|
||||
fi
|
||||
fi
|
||||
done
|
||||
|
||||
exit 0
|
||||
|
||||
@@ -7,7 +7,7 @@ message_output()
|
||||
|
||||
error_message()
|
||||
{
|
||||
message_output "$@" >&2
|
||||
message_output "$@" >> /dev/stderr
|
||||
}
|
||||
|
||||
error_exit()
|
||||
@@ -62,31 +62,27 @@ test -x "$SCOUTFS_FENCED_RUN" || \
|
||||
# files disappear.
|
||||
#
|
||||
|
||||
# generate failure messages to stderr while still echoing 0 for the caller
|
||||
careful_cat()
|
||||
# silence error messages
|
||||
quiet_cat()
|
||||
{
|
||||
local path="$@"
|
||||
|
||||
cat "$@" || echo 0
|
||||
cat "$@" 2>/dev/null
|
||||
}
|
||||
|
||||
while sleep $SCOUTFS_FENCED_DELAY; do
|
||||
shopt -s nullglob
|
||||
for fence in /sys/fs/scoutfs/*/fence/*; do
|
||||
# catches unmatched regex when no dirs
|
||||
if [ ! -d "$fence" ]; then
|
||||
continue
|
||||
fi
|
||||
|
||||
# skip requests that have been handled
|
||||
if [ "$(careful_cat $fence/fenced)" == 1 -o \
|
||||
"$(careful_cat $fence/error)" == 1 ]; then
|
||||
continue
|
||||
fi
|
||||
|
||||
srv=$(basename $(dirname $(dirname $fence)))
|
||||
rid="$(cat $fence/rid)"
|
||||
ip="$(cat $fence/ipv4_addr)"
|
||||
reason="$(cat $fence/reason)"
|
||||
fenced="$(quiet_cat $fence/fenced)"
|
||||
error="$(quiet_cat $fence/error)"
|
||||
rid="$(quiet_cat $fence/rid)"
|
||||
ip="$(quiet_cat $fence/ipv4_addr)"
|
||||
reason="$(quiet_cat $fence/reason)"
|
||||
|
||||
# request dirs can linger then disappear after fenced/error is set
|
||||
if [ ! -d "$fence" -o "$fenced" == "1" -o "$error" == "1" ]; then
|
||||
continue
|
||||
fi
|
||||
|
||||
log_message "server $srv fencing rid $rid at IP $ip for $reason"
|
||||
|
||||
|
||||
Reference in New Issue
Block a user