Compare commits

...

1 Commits

Author SHA1 Message Date
Auke Kok
e011c80452 Do not fence connections without valid greeting
There is no reason to fence any connection that hasn't sent a valid
greeting, since they haven't progressed far enough for it to make
sense. Skip the fence call for these connections and let the existing
destroy path tear them down. Any real client will reconnect.

server_notify_down() previously treated any zero rid teardown as
the listening socket going down and called stop_server(). That's
correct for the listener legitimately, but not for any conn that
never completed a greeting.

Adds a test that sends a short garbage payload to each quorum port and
verifies that no greeting-less connection remains past the reconnect
timeout (20s) and that the filesystem still works afterwards.

Signed-off-by: Auke Kok <auke.kok@versity.com>
2026-05-06 16:44:37 -07:00
5 changed files with 68 additions and 10 deletions

View File

@@ -1452,15 +1452,23 @@ restart:
set_conn_fl(acc, reconn_freeing);
spin_unlock(&conn->lock);
if (!test_conn_fl(conn, shutting_down)) {
scoutfs_info(sb, "client "SIN_FMT" reconnect timed out, fencing",
SIN_ARG(&acc->last_peername));
ret = scoutfs_fence_start(sb, acc->rid,
acc->last_peername.sin_addr.s_addr,
SCOUTFS_FENCE_CLIENT_RECONNECT);
if (ret) {
scoutfs_err(sb, "client fence returned err %d, shutting down server",
ret);
scoutfs_server_stop(sb);
/*
* Connections that never completed a valid greeting
* (port scans, malformed traffic, half-open peers)
* haven't progressed far enough to warrant fencing.
* Drop them. Any real client will reconnect.
*/
if (test_conn_fl(acc, valid_greeting)) {
scoutfs_info(sb, "client "SIN_FMT" reconnect timed out, fencing",
SIN_ARG(&acc->last_peername));
ret = scoutfs_fence_start(sb, acc->rid,
acc->last_peername.sin_addr.s_addr,
SCOUTFS_FENCE_CLIENT_RECONNECT);
if (ret) {
scoutfs_err(sb, "client fence returned err %d, shutting down server",
ret);
scoutfs_server_stop(sb);
}
}
}
destroy_conn(acc);

View File

@@ -4315,7 +4315,8 @@ static void server_notify_down(struct super_block *sb,
spin_unlock(&server->lock);
free_farewell_requests(sb, rid);
} else {
} else if (!conn->listening_conn) {
/* only the listener going down should stop the server */
stop_server(server);
}
}

2
tests/golden/portscan Normal file
View File

@@ -0,0 +1,2 @@
== send empty payload to a quorum port
== greeting-less connections still in reconn_wait

View File

@@ -64,4 +64,5 @@ archive-light-cycle.sh
block-stale-reads.sh
inode-deletion.sh
renameat2-noreplace.sh
portscan.sh
xfstests.sh

46
tests/tests/portscan.sh Normal file
View File

@@ -0,0 +1,46 @@
#
# portscan tests - assure malformed packets do not cause issues
#
# Send a short garbage payload to a scoutfs server quorum port. The
# accepted connection never completes a valid greeting, so after the
# reconnect timeout the kernel must drop it silently rather than
# fence it (which would restart the server).
#
t_require_commands scoutfs grep wc seq
send_garbage()
{
local port="$1"
(
exec 3<>"/dev/tcp/127.0.0.1/$port" || exit 1
printf ' ' >&3
exec 3>&-
) 2>/dev/null
}
echo "== send empty payload to a quorum port"
slot=-1
for i in $(seq 0 $((T_QUORUM - 1))); do
if send_garbage "$((T_TEST_PORT + i))"; then
slot=$i
break
fi
done
test "$slot" -ge 0 || t_fail "no quorum port accepted"
# CLIENT_RECONNECT_TIMEOUT_MS is 20s - wait until that happens.
echo "== greeting-less connections still in reconn_wait"
for _ in $(seq 1 25); do
n=$(grep -h 'vg 0 .* rw 1' /sys/kernel/debug/scoutfs/*/connections | wc -l)
[ "$n" = 0 ] && break
sleep 1
done
test "$n" -eq 0 || t_fail "$n greeting-less conns remain in reconn_wait"
# the mount whose port we hit should be up and not disconnected now.
eval dir=\$T_D$slot
touch "$dir/portscan-after" 2>/dev/null || t_fail "fs on $dir not responsive after portscan"
t_pass