Compare commits

..

1 Commits

Author SHA1 Message Date
Auke Kok e011c80452 Do not fence connections without valid greeting
There is no reason to fence any connection that hasn't sent a valid
greeting, since they haven't progressed far enough for it to make
sense. Skip the fence call for these connections and let the existing
destroy path tear them down. Any real client will reconnect.

server_notify_down() previously treated any zero rid teardown as
the listening socket going down and called stop_server(). That's
correct for the listener legitimately, but not for any conn that
never completed a greeting.

Adds a test that sends a short garbage payload to each quorum port and
verifies that no greeting-less connection remains past the reconnect
timeout (20s) and that the filesystem still works afterwards.

Signed-off-by: Auke Kok <auke.kok@versity.com>
2026-05-06 16:44:37 -07:00
6 changed files with 146 additions and 94 deletions
+17 -9
View File
@@ -1452,15 +1452,23 @@ restart:
set_conn_fl(acc, reconn_freeing);
spin_unlock(&conn->lock);
if (!test_conn_fl(conn, shutting_down)) {
scoutfs_info(sb, "client "SIN_FMT" reconnect timed out, fencing",
SIN_ARG(&acc->last_peername));
ret = scoutfs_fence_start(sb, acc->rid,
acc->last_peername.sin_addr.s_addr,
SCOUTFS_FENCE_CLIENT_RECONNECT);
if (ret) {
scoutfs_err(sb, "client fence returned err %d, shutting down server",
ret);
scoutfs_server_stop(sb);
/*
* Connections that never completed a valid greeting
* (port scans, malformed traffic, half-open peers)
* haven't progressed far enough to warrant fencing.
* Drop them. Any real client will reconnect.
*/
if (test_conn_fl(acc, valid_greeting)) {
scoutfs_info(sb, "client "SIN_FMT" reconnect timed out, fencing",
SIN_ARG(&acc->last_peername));
ret = scoutfs_fence_start(sb, acc->rid,
acc->last_peername.sin_addr.s_addr,
SCOUTFS_FENCE_CLIENT_RECONNECT);
if (ret) {
scoutfs_err(sb, "client fence returned err %d, shutting down server",
ret);
scoutfs_server_stop(sb);
}
}
}
destroy_conn(acc);
+2 -1
View File
@@ -4315,7 +4315,8 @@ static void server_notify_down(struct super_block *sb,
spin_unlock(&server->lock);
free_farewell_requests(sb, rid);
} else {
} else if (!conn->listening_conn) {
/* only the listener going down should stop the server */
stop_server(server);
}
}
+2
View File
@@ -0,0 +1,2 @@
== send empty payload to a quorum port
== greeting-less connections still in reconn_wait
+78 -84
View File
@@ -90,7 +90,7 @@ done
# set some T_ defaults
T_TRACE_DUMP="0"
T_TRACE_PRINTK=""
T_TRACE_PRINTK="0"
T_PORT_START="19700"
T_LOOP_ITER="1"
@@ -137,9 +137,6 @@ while true; do
test -n "$2" || die "-l must have a nr iterations argument"
test "$2" -eq "$2" 2>/dev/null || die "-l <nr> argument must be an integer"
T_LOOP_ITER="$2"
# when looping, break after first failure
T_ABORT="1"
shift
;;
-M)
@@ -402,44 +399,31 @@ if [ -n "$T_INSMOD" ]; then
cmd insmod "$T_MODULE"
fi
start_tracing() {
if [ -n "$T_TRACE_MULT" ]; then
orig_trace_size=1408
mult_trace_size=$((orig_trace_size * T_TRACE_MULT))
msg "increasing trace buffer size from $orig_trace_size KiB to $mult_trace_size KiB"
echo $mult_trace_size > /sys/kernel/debug/tracing/buffer_size_kb
fi
if [ -n "$T_TRACE_MULT" ]; then
# orig_trace_size=$(cat /sys/kernel/debug/tracing/buffer_size_kb)
orig_trace_size=1408
mult_trace_size=$((orig_trace_size * T_TRACE_MULT))
msg "increasing trace buffer size from $orig_trace_size KiB to $mult_trace_size KiB"
echo $mult_trace_size > /sys/kernel/debug/tracing/buffer_size_kb
fi
nr_globs=${#T_TRACE_GLOB[@]}
if [ $nr_globs -gt 0 ]; then
echo 0 > /sys/kernel/debug/tracing/events/scoutfs/enable
nr_globs=${#T_TRACE_GLOB[@]}
if [ $nr_globs -gt 0 ]; then
echo 0 > /sys/kernel/debug/tracing/events/scoutfs/enable
for g in "${T_TRACE_GLOB[@]}"; do
for e in /sys/kernel/debug/tracing/events/scoutfs/$g/enable; do
if test -w "$e"; then
echo 1 > "$e"
else
die "-t glob '$g' matched no scoutfs events"
fi
done
for g in "${T_TRACE_GLOB[@]}"; do
for e in /sys/kernel/debug/tracing/events/scoutfs/$g/enable; do
if test -w "$e"; then
echo 1 > "$e"
else
die "-t glob '$g' matched no scoutfs events"
fi
done
done
nr_events=$(cat /sys/kernel/debug/tracing/set_event | wc -l)
msg "enabled $nr_events trace events from $nr_globs -t globs"
fi
}
stop_tracing() {
if [ -n "$T_TRACE_GLOB" -o -n "$T_TRACE_PRINTK" ]; then
msg "saving traces and disabling tracing"
echo 0 > /sys/kernel/debug/tracing/events/scoutfs/enable
echo 0 > /sys/kernel/debug/tracing/options/trace_printk
cat /sys/kernel/debug/tracing/trace | gzip > "$T_RESULTS/traces.gz"
if [ -n "$orig_trace_size" ]; then
echo $orig_trace_size > /sys/kernel/debug/tracing/buffer_size_kb
fi
fi
}
nr_events=$(cat /sys/kernel/debug/tracing/set_event | wc -l)
msg "enabled $nr_events trace events from $nr_globs -t globs"
fi
if [ -n "$T_TRACE_PRINTK" ]; then
echo "$T_TRACE_PRINTK" > /sys/kernel/debug/tracing/options/trace_printk
@@ -619,26 +603,24 @@ passed=0
skipped=0
failed=0
skipped_permitted=0
for iter in $(seq 1 $T_LOOP_ITER); do
for t in $tests; do
# tests has basenames from sequence, get path and name
t="tests/$t"
test_name=$(basename "$t" | sed -e 's/.sh$//')
start_tracing
# get stats from previous pass
last="$T_RESULTS/last-passed-test-stats"
stats=$(grep -s "^$test_name " "$last" | cut -d " " -f 2-)
test -n "$stats" && stats="last: $stats"
printf " %-30s $stats" "$test_name"
for t in $tests; do
# tests has basenames from sequence, get path and name
t="tests/$t"
test_name=$(basename "$t" | sed -e 's/.sh$//')
# mark in dmesg as to what test we are running
echo "run scoutfs test $test_name" > /dev/kmsg
# get stats from previous pass
last="$T_RESULTS/last-passed-test-stats"
stats=$(grep -s "^$test_name " "$last" | cut -d " " -f 2-)
test -n "$stats" && stats="last: $stats"
printf " %-30s $stats" "$test_name"
# let the test get at its extra files
T_EXTRA="$T_TESTS/extra/$test_name"
# mark in dmesg as to what test we are running
echo "run scoutfs test $test_name" > /dev/kmsg
# let the test get at its extra files
T_EXTRA="$T_TESTS/extra/$test_name"
for iter in $(seq 1 $T_LOOP_ITER); do
# create a temporary dir and file path for the test
T_TMPDIR="$T_RESULTS/tmp/$test_name"
@@ -728,43 +710,55 @@ for iter in $(seq 1 $T_LOOP_ITER); do
sts=$T_FAIL_STATUS
fi
# show and record the result of the test
if [ "$sts" == "$T_PASS_STATUS" ]; then
echo " passed: $stats"
((passed++))
# save stats for passed test
grep -s -v "^$test_name " "$last" > "$last.tmp"
echo "$test_name $stats" >> "$last.tmp"
mv -f "$last.tmp" "$last"
elif [ "$sts" == "$T_SKIP_PERMITTED_STATUS" ]; then
echo " [ skipped (permitted): $message ]"
echo "$test_name skipped (permitted) $message " >> "$T_RESULTS/skip.log"
((skipped_permitted++))
elif [ "$sts" == "$T_SKIP_STATUS" ]; then
echo " [ skipped: $message ]"
echo "$test_name $message" >> "$T_RESULTS/skip.log"
((skipped++))
elif [ "$sts" == "$T_FAIL_STATUS" ]; then
echo " [ failed: $message ]"
echo "$test_name $message" >> "$T_RESULTS/fail.log"
((failed++))
if [ -n "$T_ABORT" ]; then
stop_tracing
die "aborting after first failure"
fi
# stop looping if we didn't pass
if [ "$sts" != "$T_PASS_STATUS" ]; then
break;
fi
# record results for TAP format output
t_tap_progress $test_name $sts
((testcount++))
done
stop_tracing
# show and record the result of the test
if [ "$sts" == "$T_PASS_STATUS" ]; then
echo " passed: $stats"
((passed++))
# save stats for passed test
grep -s -v "^$test_name " "$last" > "$last.tmp"
echo "$test_name $stats" >> "$last.tmp"
mv -f "$last.tmp" "$last"
elif [ "$sts" == "$T_SKIP_PERMITTED_STATUS" ]; then
echo " [ skipped (permitted): $message ]"
echo "$test_name skipped (permitted) $message " >> "$T_RESULTS/skip.log"
((skipped_permitted++))
elif [ "$sts" == "$T_SKIP_STATUS" ]; then
echo " [ skipped: $message ]"
echo "$test_name $message" >> "$T_RESULTS/skip.log"
((skipped++))
elif [ "$sts" == "$T_FAIL_STATUS" ]; then
echo " [ failed: $message ]"
echo "$test_name $message" >> "$T_RESULTS/fail.log"
((failed++))
test -n "$T_ABORT" && die "aborting after first failure"
fi
# record results for TAP format output
t_tap_progress $test_name $sts
((testcount++))
done
msg "all tests run: $passed passed, $skipped skipped, $skipped_permitted skipped (permitted), $failed failed"
if [ -n "$T_TRACE_GLOB" -o -n "$T_TRACE_PRINTK" ]; then
msg "saving traces and disabling tracing"
echo 0 > /sys/kernel/debug/tracing/events/scoutfs/enable
echo 0 > /sys/kernel/debug/tracing/options/trace_printk
cat /sys/kernel/debug/tracing/trace > "$T_RESULTS/traces"
if [ -n "$orig_trace_size" ]; then
echo $orig_trace_size > /sys/kernel/debug/tracing/buffer_size_kb
fi
fi
if [ "$skipped" == 0 -a "$failed" == 0 ]; then
msg "all tests passed"
unmount_all
+1
View File
@@ -64,4 +64,5 @@ archive-light-cycle.sh
block-stale-reads.sh
inode-deletion.sh
renameat2-noreplace.sh
portscan.sh
xfstests.sh
+46
View File
@@ -0,0 +1,46 @@
#
# portscan tests - assure malformed packets do not cause issues
#
# Send a short garbage payload to a scoutfs server quorum port. The
# accepted connection never completes a valid greeting, so after the
# reconnect timeout the kernel must drop it silently rather than
# fence it (which would restart the server).
#
t_require_commands scoutfs grep wc seq
send_garbage()
{
local port="$1"
(
exec 3<>"/dev/tcp/127.0.0.1/$port" || exit 1
printf ' ' >&3
exec 3>&-
) 2>/dev/null
}
echo "== send empty payload to a quorum port"
slot=-1
for i in $(seq 0 $((T_QUORUM - 1))); do
if send_garbage "$((T_TEST_PORT + i))"; then
slot=$i
break
fi
done
test "$slot" -ge 0 || t_fail "no quorum port accepted"
# CLIENT_RECONNECT_TIMEOUT_MS is 20s - wait until that happens.
echo "== greeting-less connections still in reconn_wait"
for _ in $(seq 1 25); do
n=$(grep -h 'vg 0 .* rw 1' /sys/kernel/debug/scoutfs/*/connections | wc -l)
[ "$n" = 0 ] && break
sleep 1
done
test "$n" -eq 0 || t_fail "$n greeting-less conns remain in reconn_wait"
# the mount whose port we hit should be up and not disconnected now.
eval dir=\$T_D$slot
touch "$dir/portscan-after" 2>/dev/null || t_fail "fs on $dir not responsive after portscan"
t_pass