Update quorum heartbeat test for mount option

Update the quorum_heartbeat_timeout_ms test to also test the mount
option, not just updating the timeout via sysfs.  This takes some
reworking as we have to avoid the active leader/server when setting the
timeout via the mount option.  We also allow for a bit more slack around
comparing kernel sleeps and userspace wall clocks.

Signed-off-by: Zach Brown <zab@versity.com>
This commit is contained in:
Zach Brown
2023-05-22 16:08:39 -07:00
parent e52435b993
commit 950963375b
2 changed files with 61 additions and 30 deletions

View File

@@ -1,2 +1,5 @@
== bad timeout values fail
== test different timeouts
== bad mount option fails
== mount option
== sysfs
== reset all options

View File

@@ -17,43 +17,52 @@ set_bad_timeout() {
t_fail "set bad q hb to $to"
}
set_quorum_timeouts()
set_timeout()
{
local to="$1"
local was
local nr="$1"
local how="$2"
local to="$3"
local is
for nr in $(t_quorum_nrs); do
local mnt="$(eval echo \$T_M$nr)"
was=$(t_get_sysfs_mount_option $nr quorum_heartbeat_timeout_ms)
if [ $how == "sysfs" ]; then
t_set_sysfs_mount_option $nr quorum_heartbeat_timeout_ms $to
is=$(t_get_sysfs_mount_option $nr quorum_heartbeat_timeout_ms)
fi
if [ $how == "mount" ]; then
t_umount $nr
t_mount_opt $nr "quorum_heartbeat_timeout_ms=$to"
fi
if [ "$is" != "$to" ]; then
t_fail "tried to set qhbto on $nr to $to but got $is"
fi
done
is=$(t_get_sysfs_mount_option $nr quorum_heartbeat_timeout_ms)
if [ "$is" != "$to" ]; then
t_fail "tried to set qhbto on $nr via $how to $to but got $is"
fi
}
test_timeout()
{
local to="$1"
local orig_to
local how="$1"
local to="$2"
local start
local nr
local sv
local delay
local low
local high
# set new timeouts, saving original
orig_to=$(t_get_sysfs_mount_option 0 quorum_heartbeat_timeout_ms)
set_quorum_timeouts $to
# set timeout on non-server quorum mounts
sv=$(t_server_nr)
for nr in $(t_quorum_nrs); do
if [ $nr -ne $sv ]; then
set_timeout $nr $how $to
fi
done
# give followers time to recv heartbeats and reset timeouts
sleep 1
# tear down the current server/leader
nr=$(t_server_nr)
t_force_umount $nr
t_force_umount $sv
# see how long it takes for the next leader to start
start=$(time_ms)
@@ -64,15 +73,15 @@ test_timeout()
echo "to $to delay $delay" >> $T_TMP.delay
# restore the mount that we tore down
t_mount $nr
t_mount $sv
# reset the original timeouts
set_quorum_timeouts $orig_to
# make sure the new leader delay was reasonable, allowing for some slack
low=$((to - 1000))
high=$((to + 3000))
# make sure the new leader delay was reasonable
test "$delay" -gt "$to" || t_fail "delay $delay < to $to"
# allow 5 seconds of slop
test "$delay" -lt $(($to + 5000)) || t_fail "delay $delay > to $to + 5sec"
test "$delay" -lt "$low" && t_fail "delay $delay < low $low (to $to)"
test "$delay" -gt "$high" && t_fail "delay $delay > high $high (to $to)"
}
echo "== bad timeout values fail"
@@ -80,10 +89,29 @@ set_bad_timeout 0
set_bad_timeout -1
set_bad_timeout 1000000
echo "== test different timeouts"
echo "== bad mount option fails"
if [ "$(t_server_nr)" == 0 ]; then
nr=1
else
nr=0
fi
t_umount $nr
t_mount_opt $nr "quorum_heartbeat_timeout_ms=1000000" 2>/dev/null && \
t_fail "bad mount option succeeded"
t_mount $nr
echo "== mount option"
def=$(t_get_sysfs_mount_option 0 quorum_heartbeat_timeout_ms)
test_timeout $def
test_timeout 3000
test_timeout $((def + 19000))
test_timeout mount $def
test_timeout mount 3000
test_timeout mount $((def + 19000))
echo "== sysfs"
test_timeout sysfs $def
test_timeout sysfs 3000
test_timeout sysfs $((def + 19000))
echo "== reset all options"
t_remount_all
t_pass