Compare commits

..

14 Commits

Author SHA1 Message Date
Chris Kirby
5465350ba2 block alloc fix 2026-01-14 13:25:00 -06:00
Chris Kirby
bf4f2ef8bb more dbg 2026-01-14 13:14:48 -06:00
Chris Kirby
c5207243f0 typo 2026-01-14 13:14:48 -06:00
Chris Kirby
8d330861ba track blkno 2026-01-14 13:14:48 -06:00
Chris Kirby
b7beadb22c track entry_bytes 2026-01-14 13:14:48 -06:00
Chris Kirby
926bb6b9ec debug 2026-01-14 13:14:48 -06:00
Chris Kirby
a420f5389e More debug 2026-01-14 13:14:48 -06:00
Chris Kirby
544362dfaa grab traces at exit 2026-01-14 13:14:48 -06:00
Chris Kirby
79593b6ecc And the output 2026-01-14 13:14:48 -06:00
Chris Kirby
b6e7d5b2d0 Temporarily make srch-safe triggers silent 2026-01-14 13:14:48 -06:00
Chris Kirby
487d38a4a5 init trace buffer size when starting tracing 2026-01-14 13:14:48 -06:00
Chris Kirby
9cddf6d9c9 Track srch_compact_error 2026-01-14 13:14:48 -06:00
Chris Kirby
d81c318c3e Beef up get_file_block() tracing
Signed-off-by: Chris Kirby <ckirby@versity.com>
2026-01-14 13:14:48 -06:00
Chris Kirby
279b093600 Debug stuff for srch-basic
Signed-off-by: Chris Kirby <ckirby@versity.com>
2026-01-14 13:14:48 -06:00
6 changed files with 158 additions and 174 deletions

View File

@@ -1,39 +1,6 @@
Versity ScoutFS Release Notes
=============================
---
v1.27
\
*Jan 15, 2026*
Switch away from using the general VM cache reclaim machinery to reduce
idle cluster locks in the client. The VM treated locks like a cache and
let many accumulate, presuming that it would be efficient to free them
in batches. Lock freeing requires network communication so this could
result in enormous backlogs in network messages (on the order of
hundreds of thousands) and could result in signifcant delays of other
network messaging.
Fix inefficient network receive processing while many messages are in
the send queue. This consumed sufficient CPU to cause significant
stalls, perhaps resulting in hung task warning messages due to delayed
lock message delivery.
Fix a server livelock case that could happen while committing client
transactions that contain a large amount of freed file data extents.
This would present as client tasks hanging and a server task spinning
consuming cpu.
Fix a rare server request processing failure that doesn't deal with
retransmission of a request that a previous server partially processed.
This would present as hung client tasks and repeated "error -2
committing log merge: getting merge status item" kernel messages.
Fix an unneccessary server shutdown during specific circumstances in
client lock recovery. The shutdown was due to server state and was
ultimately harmless. The next server that started up would proceed
accordingly.
---
v1.26
\

View File

@@ -2620,24 +2620,44 @@ TRACE_EVENT(scoutfs_block_dirty_ref,
);
TRACE_EVENT(scoutfs_get_file_block,
TP_PROTO(struct super_block *sb, u64 blkno, int flags),
TP_PROTO(struct super_block *sb, u64 blkno, int flags,
struct scoutfs_srch_block *srb),
TP_ARGS(sb, blkno, flags),
TP_ARGS(sb, blkno, flags, srb),
TP_STRUCT__entry(
SCSB_TRACE_FIELDS
__field(__u64, blkno)
__field(__u32, entry_nr)
__field(__u32, entry_bytes)
__field(int, flags)
__field(__u64, first_hash)
__field(__u64, first_ino)
__field(__u64, first_id)
__field(__u64, last_hash)
__field(__u64, last_ino)
__field(__u64, last_id)
),
TP_fast_assign(
SCSB_TRACE_ASSIGN(sb);
__entry->blkno = blkno;
__entry->entry_nr = __le32_to_cpu(srb->entry_nr);
__entry->entry_bytes = __le32_to_cpu(srb->entry_bytes);
__entry->flags = flags;
__entry->first_hash = __le64_to_cpu(srb->first.hash);
__entry->first_ino = __le64_to_cpu(srb->first.ino);
__entry->first_id = __le64_to_cpu(srb->first.id);
__entry->last_hash = __le64_to_cpu(srb->last.hash);
__entry->last_ino = __le64_to_cpu(srb->last.ino);
__entry->last_id = __le64_to_cpu(srb->last.id);
),
TP_printk(SCSBF" blkno %llu flags 0x%x",
SCSB_TRACE_ARGS, __entry->blkno, __entry->flags)
TP_printk(SCSBF" blkno %llu nr %u bytes %u flags 0x%x first_hash 0x%llx first_ino %llu first_id 0x%llx last_hash 0x%llx last_ino %llu last_id 0x%llx",
SCSB_TRACE_ARGS, __entry->blkno, __entry->entry_nr,
__entry->entry_bytes, __entry->flags,
__entry->first_hash, __entry->first_ino, __entry->first_id,
__entry->last_hash, __entry->last_ino, __entry->last_id)
);
TRACE_EVENT(scoutfs_block_stale,

View File

@@ -443,7 +443,7 @@ out:
sfl->blocks = cpu_to_le64(blk + 1);
if (bl) {
trace_scoutfs_get_file_block(sb, bl->blkno, flags);
trace_scoutfs_get_file_block(sb, bl->blkno, flags, bl->data);
}
*bl_ret = bl;
@@ -745,6 +745,7 @@ static int search_log_file(struct super_block *sb,
for (i = 0; i < le32_to_cpu(srb->entry_nr); i++) {
if (pos > SCOUTFS_SRCH_BLOCK_SAFE_BYTES) {
/* can only be inconsistency :/ */
scoutfs_err(sb, "@@@ pos %d > %ld", pos, SCOUTFS_SRCH_BLOCK_SAFE_BYTES);
ret = -EIO;
break;
}
@@ -752,6 +753,7 @@ static int search_log_file(struct super_block *sb,
ret = decode_entry(srb->entries + pos, &sre, &prev);
if (ret <= 0) {
/* can only be inconsistency :/ */
scoutfs_err(sb, "@@@ decode_entry -> %d", ret);
ret = -EIO;
break;
}
@@ -855,6 +857,7 @@ static int search_sorted_file(struct super_block *sb,
if (pos > SCOUTFS_SRCH_BLOCK_SAFE_BYTES) {
/* can only be inconsistency :/ */
scoutfs_err(sb, "@@@ 2: pos %d > %ld", pos, SCOUTFS_SRCH_BLOCK_SAFE_BYTES);
ret = -EIO;
goto out;
}
@@ -862,6 +865,7 @@ static int search_sorted_file(struct super_block *sb,
ret = decode_entry(srb->entries + pos, &sre, &prev);
if (ret <= 0) {
/* can only be inconsistency :/ */
scoutfs_err(sb, "@@@ 2: decode -> %d", ret);
ret = -EIO;
goto out;
}
@@ -1001,6 +1005,7 @@ retry:
scoutfs_key_inc(&key);
memcpy(&sfl, iref.val, iref.val_len);
} else {
scoutfs_err(sb, "@@@ btree_next bad?");
ret = -EIO;
}
scoutfs_btree_put_iref(&iref);
@@ -1036,6 +1041,7 @@ retry:
scoutfs_key_inc(&key);
memcpy(&lt, iref.val, iref.val_len);
} else {
scoutfs_err(sb, "@@@ btree_next bad 2");
ret = -EIO;
}
scoutfs_btree_put_iref(&iref);
@@ -1151,6 +1157,7 @@ int scoutfs_srch_get_compact(struct super_block *sb,
key = *iref.key;
memcpy(sc, iref.val, iref.val_len);
} else {
scoutfs_err(sb, "@@@ btree_next bad 3");
ret = -EIO;
}
scoutfs_btree_put_iref(&iref);
@@ -1209,6 +1216,7 @@ int scoutfs_srch_get_compact(struct super_block *sb,
key = *iref.key;
memcpy(&sfl, iref.val, iref.val_len);
} else {
scoutfs_err(sb, "@@@ btree_next bad 4");
ret = -EIO;
}
scoutfs_btree_put_iref(&iref);
@@ -1402,8 +1410,10 @@ int scoutfs_srch_commit_compact(struct super_block *sb,
if (ret == 0) {
if (iref.val_len == sizeof(struct scoutfs_srch_compact))
memcpy(busy, iref.val, iref.val_len);
else
else {
scoutfs_err(sb, "@@@ btree_lookup bad");
ret = -EIO;
}
scoutfs_btree_put_iref(&iref);
}
if (ret < 0)
@@ -1496,6 +1506,7 @@ int scoutfs_srch_cancel_compact(struct super_block *sb,
if (scoutfs_key_compare(iref.key, &last) > 0) {
ret = -ENOENT;
} else if (iref.val_len != sizeof(*sc)) {
scoutfs_err(sb, "@@@ btree_next bad 5");
ret = -EIO;
} else {
key = *iref.key;
@@ -1609,27 +1620,29 @@ static int kway_merge(struct super_block *sb,
/* always append new blocks */
blk = le64_to_cpu(sfl->blocks);
while (empty < nr) {
if (bl == NULL) {
if (atomic_read(&srinf->shutdown)) {
ret = -ESHUTDOWN;
goto out;
if (srb == NULL || sre_cmp(&root->sre, &srb->last) != 0) {
if (bl == NULL) {
if (atomic_read(&srinf->shutdown)) {
ret = -ESHUTDOWN;
goto out;
}
/* could grow and dirty to a leaf */
if (should_commit(sb, alloc, wri,
sfl->height + 1)) {
ret = 0;
goto out;
}
ret = get_file_block(sb, alloc, wri, sfl,
GFB_INSERT | GFB_DIRTY,
blk, &bl);
if (ret < 0)
goto out;
srb = bl->data;
scoutfs_inc_counter(sb, srch_compact_dirty_block);
}
/* could grow and dirty to a leaf */
if (should_commit(sb, alloc, wri, sfl->height + 1)) {
ret = 0;
goto out;
}
ret = get_file_block(sb, alloc, wri, sfl,
GFB_INSERT | GFB_DIRTY, blk, &bl);
if (ret < 0)
goto out;
srb = bl->data;
scoutfs_inc_counter(sb, srch_compact_dirty_block);
}
if (sre_cmp(&root->sre, &srb->last) != 0) {
last_bytes = le32_to_cpu(srb->entry_bytes);
last_tail = srb->last;
ret = encode_entry(srb->entries +
@@ -1637,6 +1650,7 @@ static int kway_merge(struct super_block *sb,
&root->sre, &srb->tail);
if (WARN_ON_ONCE(ret <= 0)) {
/* shouldn't happen */
scoutfs_err(sb, "@@@ encode -> %d", ret);
ret = -EIO;
goto out;
}
@@ -1861,6 +1875,7 @@ static int compact_logs(struct super_block *sb,
if (pos > SCOUTFS_SRCH_BLOCK_SAFE_BYTES) {
/* can only be inconsistency :/ */
scoutfs_err(sb, "@@@ 3: pos %d > %ld", pos, SCOUTFS_SRCH_BLOCK_SAFE_BYTES);
ret = -EIO;
goto out;
}
@@ -1868,6 +1883,7 @@ static int compact_logs(struct super_block *sb,
ret = decode_entry(srb->entries + pos, sre, &prev);
if (ret <= 0) {
/* can only be inconsistency :/ */
scoutfs_err(sb, "@@@ new decode -> %d", ret);
ret = -EIO;
goto out;
}
@@ -1982,6 +1998,11 @@ static int kway_get_reader(struct super_block *sb,
rdr->skip > SCOUTFS_SRCH_BLOCK_SAFE_BYTES ||
rdr->skip >= le32_to_cpu(srb->entry_bytes)) {
/* XXX inconsistency */
scoutfs_err(sb, "@@@ blkno %llu pos %u vs %ld, skip %u , %u",
__le64_to_cpu(srb->hdr.blkno),
rdr->pos, SCOUTFS_SRCH_BLOCK_SAFE_BYTES,
rdr->skip,
le32_to_cpu(srb->entry_bytes));
return -EIO;
}
@@ -1997,6 +2018,7 @@ static int kway_get_reader(struct super_block *sb,
&rdr->decoded_sre, &rdr->prev);
if (ret <= 0) {
/* XXX inconsistency */
scoutfs_err(sb, "@@@ decode new2 ret %d", ret);
return -EIO;
}
@@ -2293,8 +2315,10 @@ static void scoutfs_srch_compact_worker(struct work_struct *work)
out:
/* our allocators and files should be stable */
WARN_ON_ONCE(ret == -ESTALE);
if (ret < 0)
if (ret < 0) {
scoutfs_err(sb, "@@@ srch_compact_error %d", ret);
scoutfs_inc_counter(sb, srch_compact_error);
}
scoutfs_block_writer_forget_all(sb, &wri);
queue_compact_work(srinf, sc != NULL && sc->nr > 0 && ret == 0);

View File

@@ -1,37 +1,7 @@
== initialize per-mount values
== arm compaction triggers
trigger srch_compact_logs_pad_safe armed: 1
trigger srch_merge_stop_safe armed: 1
trigger srch_compact_logs_pad_safe armed: 1
trigger srch_merge_stop_safe armed: 1
trigger srch_compact_logs_pad_safe armed: 1
trigger srch_merge_stop_safe armed: 1
trigger srch_compact_logs_pad_safe armed: 1
trigger srch_merge_stop_safe armed: 1
trigger srch_compact_logs_pad_safe armed: 1
trigger srch_merge_stop_safe armed: 1
== compact more often
== create padded sorted inputs by forcing log rotation
trigger srch_force_log_rotate armed: 1
trigger srch_force_log_rotate armed: 1
trigger srch_force_log_rotate armed: 1
trigger srch_force_log_rotate armed: 1
trigger srch_compact_logs_pad_safe armed: 1
trigger srch_force_log_rotate armed: 1
trigger srch_force_log_rotate armed: 1
trigger srch_force_log_rotate armed: 1
trigger srch_force_log_rotate armed: 1
trigger srch_compact_logs_pad_safe armed: 1
trigger srch_force_log_rotate armed: 1
trigger srch_force_log_rotate armed: 1
trigger srch_force_log_rotate armed: 1
trigger srch_force_log_rotate armed: 1
trigger srch_compact_logs_pad_safe armed: 1
trigger srch_force_log_rotate armed: 1
trigger srch_force_log_rotate armed: 1
trigger srch_force_log_rotate armed: 1
trigger srch_force_log_rotate armed: 1
trigger srch_compact_logs_pad_safe armed: 1
== compaction of padded should stop at safe
== verify no compaction errors
== cleanup

View File

@@ -137,6 +137,9 @@ while true; do
test -n "$2" || die "-l must have a nr iterations argument"
test "$2" -eq "$2" 2>/dev/null || die "-l <nr> argument must be an integer"
T_LOOP_ITER="$2"
# when looping, break after first failure
T_ABORT="1"
shift
;;
-M)
@@ -399,31 +402,44 @@ if [ -n "$T_INSMOD" ]; then
cmd insmod "$T_MODULE"
fi
if [ -n "$T_TRACE_MULT" ]; then
# orig_trace_size=$(cat /sys/kernel/debug/tracing/buffer_size_kb)
orig_trace_size=1408
mult_trace_size=$((orig_trace_size * T_TRACE_MULT))
msg "increasing trace buffer size from $orig_trace_size KiB to $mult_trace_size KiB"
echo $mult_trace_size > /sys/kernel/debug/tracing/buffer_size_kb
fi
start_tracing() {
if [ -n "$T_TRACE_MULT" ]; then
orig_trace_size=1408
mult_trace_size=$((orig_trace_size * T_TRACE_MULT))
msg "increasing trace buffer size from $orig_trace_size KiB to $mult_trace_size KiB"
echo $mult_trace_size > /sys/kernel/debug/tracing/buffer_size_kb
fi
nr_globs=${#T_TRACE_GLOB[@]}
if [ $nr_globs -gt 0 ]; then
echo 0 > /sys/kernel/debug/tracing/events/scoutfs/enable
nr_globs=${#T_TRACE_GLOB[@]}
if [ $nr_globs -gt 0 ]; then
echo 0 > /sys/kernel/debug/tracing/events/scoutfs/enable
for g in "${T_TRACE_GLOB[@]}"; do
for e in /sys/kernel/debug/tracing/events/scoutfs/$g/enable; do
if test -w "$e"; then
echo 1 > "$e"
else
die "-t glob '$g' matched no scoutfs events"
fi
for g in "${T_TRACE_GLOB[@]}"; do
for e in /sys/kernel/debug/tracing/events/scoutfs/$g/enable; do
if test -w "$e"; then
echo 1 > "$e"
else
die "-t glob '$g' matched no scoutfs events"
fi
done
done
done
nr_events=$(cat /sys/kernel/debug/tracing/set_event | wc -l)
msg "enabled $nr_events trace events from $nr_globs -t globs"
fi
nr_events=$(cat /sys/kernel/debug/tracing/set_event | wc -l)
msg "enabled $nr_events trace events from $nr_globs -t globs"
fi
}
stop_tracing() {
if [ -n "$T_TRACE_GLOB" -o -n "$T_TRACE_PRINTK" ]; then
msg "saving traces and disabling tracing"
echo 0 > /sys/kernel/debug/tracing/events/scoutfs/enable
echo 0 > /sys/kernel/debug/tracing/options/trace_printk
cat /sys/kernel/debug/tracing/trace | gzip > "$T_RESULTS/traces.gz"
if [ -n "$orig_trace_size" ]; then
echo $orig_trace_size > /sys/kernel/debug/tracing/buffer_size_kb
fi
fi
}
if [ -n "$T_TRACE_PRINTK" ]; then
echo "$T_TRACE_PRINTK" > /sys/kernel/debug/tracing/options/trace_printk
@@ -505,10 +521,7 @@ crash_monitor()
fi
if [ "$bad" != 0 ]; then
echo "run-tests monitor syncing and triggering crash"
# hail mary, the sync could well hang
(echo s > /proc/sysrq-trigger) &
sleep 5
echo "run-tests monitor triggering crash"
echo c > /proc/sysrq-trigger
exit 1
fi
@@ -603,24 +616,26 @@ passed=0
skipped=0
failed=0
skipped_permitted=0
for t in $tests; do
# tests has basenames from sequence, get path and name
t="tests/$t"
test_name=$(basename "$t" | sed -e 's/.sh$//')
for iter in $(seq 1 $T_LOOP_ITER); do
# get stats from previous pass
last="$T_RESULTS/last-passed-test-stats"
stats=$(grep -s "^$test_name " "$last" | cut -d " " -f 2-)
test -n "$stats" && stats="last: $stats"
printf " %-30s $stats" "$test_name"
start_tracing
# mark in dmesg as to what test we are running
echo "run scoutfs test $test_name" > /dev/kmsg
for t in $tests; do
# tests has basenames from sequence, get path and name
t="tests/$t"
test_name=$(basename "$t" | sed -e 's/.sh$//')
# let the test get at its extra files
T_EXTRA="$T_TESTS/extra/$test_name"
# get stats from previous pass
last="$T_RESULTS/last-passed-test-stats"
stats=$(grep -s "^$test_name " "$last" | cut -d " " -f 2-)
test -n "$stats" && stats="last: $stats"
printf " %-30s $stats" "$test_name"
for iter in $(seq 1 $T_LOOP_ITER); do
# mark in dmesg as to what test we are running
echo "run scoutfs test $test_name" > /dev/kmsg
# let the test get at its extra files
T_EXTRA="$T_TESTS/extra/$test_name"
# create a temporary dir and file path for the test
T_TMPDIR="$T_RESULTS/tmp/$test_name"
@@ -707,55 +722,43 @@ for t in $tests; do
sts=$T_FAIL_STATUS
fi
# stop looping if we didn't pass
if [ "$sts" != "$T_PASS_STATUS" ]; then
break;
# show and record the result of the test
if [ "$sts" == "$T_PASS_STATUS" ]; then
echo " passed: $stats"
((passed++))
# save stats for passed test
grep -s -v "^$test_name " "$last" > "$last.tmp"
echo "$test_name $stats" >> "$last.tmp"
mv -f "$last.tmp" "$last"
elif [ "$sts" == "$T_SKIP_PERMITTED_STATUS" ]; then
echo " [ skipped (permitted): $message ]"
echo "$test_name skipped (permitted) $message " >> "$T_RESULTS/skip.log"
((skipped_permitted++))
elif [ "$sts" == "$T_SKIP_STATUS" ]; then
echo " [ skipped: $message ]"
echo "$test_name $message" >> "$T_RESULTS/skip.log"
((skipped++))
elif [ "$sts" == "$T_FAIL_STATUS" ]; then
echo " [ failed: $message ]"
echo "$test_name $message" >> "$T_RESULTS/fail.log"
((failed++))
if [ -n "$T_ABORT" ]; then
stop_tracing
die "aborting after first failure"
fi
fi
# record results for TAP format output
t_tap_progress $test_name $sts
((testcount++))
done
# show and record the result of the test
if [ "$sts" == "$T_PASS_STATUS" ]; then
echo " passed: $stats"
((passed++))
# save stats for passed test
grep -s -v "^$test_name " "$last" > "$last.tmp"
echo "$test_name $stats" >> "$last.tmp"
mv -f "$last.tmp" "$last"
elif [ "$sts" == "$T_SKIP_PERMITTED_STATUS" ]; then
echo " [ skipped (permitted): $message ]"
echo "$test_name skipped (permitted) $message " >> "$T_RESULTS/skip.log"
((skipped_permitted++))
elif [ "$sts" == "$T_SKIP_STATUS" ]; then
echo " [ skipped: $message ]"
echo "$test_name $message" >> "$T_RESULTS/skip.log"
((skipped++))
elif [ "$sts" == "$T_FAIL_STATUS" ]; then
echo " [ failed: $message ]"
echo "$test_name $message" >> "$T_RESULTS/fail.log"
((failed++))
test -n "$T_ABORT" && die "aborting after first failure"
fi
# record results for TAP format output
t_tap_progress $test_name $sts
((testcount++))
stop_tracing
done
msg "all tests run: $passed passed, $skipped skipped, $skipped_permitted skipped (permitted), $failed failed"
if [ -n "$T_TRACE_GLOB" -o -n "$T_TRACE_PRINTK" ]; then
msg "saving traces and disabling tracing"
echo 0 > /sys/kernel/debug/tracing/events/scoutfs/enable
echo 0 > /sys/kernel/debug/tracing/options/trace_printk
cat /sys/kernel/debug/tracing/trace > "$T_RESULTS/traces"
if [ -n "$orig_trace_size" ]; then
echo $orig_trace_size > /sys/kernel/debug/tracing/buffer_size_kb
fi
fi
if [ "$skipped" == 0 -a "$failed" == 0 ]; then
msg "all tests passed"
unmount_all

View File

@@ -31,8 +31,8 @@ trap restore_compact_delay EXIT
echo "== arm compaction triggers"
for nr in $(t_fs_nrs); do
t_trigger_arm srch_compact_logs_pad_safe $nr
t_trigger_arm srch_merge_stop_safe $nr
t_trigger_arm_silent srch_compact_logs_pad_safe $nr
t_trigger_arm_silent srch_merge_stop_safe $nr
done
echo "== compact more often"
@@ -44,7 +44,7 @@ echo "== create padded sorted inputs by forcing log rotation"
sv=$(t_server_nr)
for i in $(seq 1 $COMPACT_NR); do
for j in $(seq 1 $COMPACT_NR); do
t_trigger_arm srch_force_log_rotate $sv
t_trigger_arm_silent srch_force_log_rotate $sv
seq -f "f-$i-$j-$SEQF" 1 10 | \
bulk_create_paths -X "scoutfs.srch.t-srch-safe-merge-pos" -d "$T_D0" > \
@@ -59,7 +59,7 @@ for i in $(seq 1 $COMPACT_NR); do
while test $padded == 0 && sleep .5; do
for nr in $(t_fs_nrs); do
if [ "$(t_trigger_get srch_compact_logs_pad_safe $nr)" == "0" ]; then
t_trigger_arm srch_compact_logs_pad_safe $nr
t_trigger_arm_silent srch_compact_logs_pad_safe $nr
padded=1
break
fi