Compare commits

..

9 Commits

Author SHA1 Message Date
Chris Kirby
4a1cdbdf8f Suppress another forced shutdown error message
The "server error emptying freed" error was causing a
fence-and-reclaim test failure. In this case, the error
was -ENOLINK, which we should ignore for messaging purposes.

Signed-off-by: Chris Kirby <ckirby@versity.com>
2026-01-30 08:32:25 -06:00
Chris Kirby
de6899bbae Don't emit empty blocks in kway_merge()
It's possible for a srch compaction to collapse down to nothing
if given evenly paired create/delete entries. In this case, we
were emitting an empty block. This could cause problems for
search_sorted_file(), which assumes that every block it sees
has a valid first and last entry.

Fix this by keeping a temp entry and only emitting it if it differs
from the next entry in the block. Be sure to flush out a straggling
temp entry if we have one when we're done with the last block of
the merge.

Signed-off-by: Chris Kirby <ckirby@versity.com>
2026-01-30 08:32:25 -06:00
Chris Kirby
c9575cda2e Change the looping logic in run-tests.sh
If a set of tests is provided, loop over the entire set the
requested number of times. Start and stop any requested tracing
across the set boundary.

Signed-off-by: Chris Kirby <ckirby@versity.com>
2026-01-28 14:51:01 -06:00
Chris Kirby
d53bb9363e Improve tracing for get_file_block()
Print the first and last entries, the entry_nr and entry_bytes.

Signed-off-by: Chris Kirby <ckirby@versity.com>
2026-01-21 09:59:11 -06:00
Chris Kirby
645022a0b3 Fix trigger firing race in srch-safe-merge-pos
Because the srch triggers are inherently async to the test,
we can't be sure they won't fire prematurely just because a
compact worker started running at an inconvenient time.

Make the trigger arming silent to avoid spurious test failures.
Move the trigger arming closer to the point of interest to
increase the chances that we're actually testing what we want.

Signed-off-by: Chris Kirby <ckirby@versity.com>
2026-01-21 09:59:11 -06:00
Zach Brown
9b569415f2 Merge pull request #276 from versity/zab/v1.27
v1.27 Release
2026-01-15 19:36:38 -08:00
Zach Brown
6a1e136085 v1.27 Release
Finish the release notes for the 1.27 release.

Signed-off-by: Zach Brown <zab@versity.com>
2026-01-15 14:21:53 -08:00
Zach Brown
7ca789c837 Merge pull request #278 from versity/zab/test_sync_before_crash
Have run-tests monitor sync before crashing
2026-01-15 14:03:26 -08:00
Zach Brown
4d55fe6251 Have run-tests monitor sync before crashing
There have been a few failures where output is generated just before we
crash but it didn't have a chance to be written.  Add a best-effort
background sync before crashing.  There's a good chance it'll hang if
the system is stuck so we don't wait for it directly, just for .. some
time to pass.

Signed-off-by: Zach Brown <zab@versity.com>
2026-01-15 10:41:44 -08:00
6 changed files with 279 additions and 105 deletions

View File

@@ -1,6 +1,39 @@
Versity ScoutFS Release Notes
=============================
---
v1.27
\
*Jan 15, 2026*
Switch away from using the general VM cache reclaim machinery to reduce
idle cluster locks in the client. The VM treated locks like a cache and
let many accumulate, presuming that it would be efficient to free them
in batches. Lock freeing requires network communication so this could
result in enormous backlogs in network messages (on the order of
hundreds of thousands) and could result in signifcant delays of other
network messaging.
Fix inefficient network receive processing while many messages are in
the send queue. This consumed sufficient CPU to cause significant
stalls, perhaps resulting in hung task warning messages due to delayed
lock message delivery.
Fix a server livelock case that could happen while committing client
transactions that contain a large amount of freed file data extents.
This would present as client tasks hanging and a server task spinning
consuming cpu.
Fix a rare server request processing failure that doesn't deal with
retransmission of a request that a previous server partially processed.
This would present as hung client tasks and repeated "error -2
committing log merge: getting merge status item" kernel messages.
Fix an unneccessary server shutdown during specific circumstances in
client lock recovery. The shutdown was due to server state and was
ultimately harmless. The next server that started up would proceed
accordingly.
---
v1.26
\

View File

@@ -2660,6 +2660,110 @@ TRACE_EVENT(scoutfs_get_file_block,
__entry->last_hash, __entry->last_ino, __entry->last_id)
);
TRACE_EVENT(scoutfs_srch_new_merge,
TP_PROTO(struct super_block *sb),
TP_ARGS(sb),
TP_STRUCT__entry(
SCSB_TRACE_FIELDS
),
TP_fast_assign(
SCSB_TRACE_ASSIGN(sb);
),
TP_printk(SCSBF, SCSB_TRACE_ARGS)
);
TRACE_EVENT(scoutfs_srch_emit_entry,
TP_PROTO(struct super_block *sb, struct scoutfs_srch_entry *sre,
struct scoutfs_srch_block *srb, u64 blkno),
TP_ARGS(sb, sre, srb, blkno),
TP_STRUCT__entry(
SCSB_TRACE_FIELDS
__field(__u32, entry_nr)
__field(__u64, blkno)
__field(__u64, hash)
__field(__u64, ino)
__field(__u64, id)
),
TP_fast_assign(
SCSB_TRACE_ASSIGN(sb);
__entry->entry_nr = __le32_to_cpu(srb->entry_nr);
__entry->blkno = blkno;
__entry->hash = __le64_to_cpu(sre->hash);
__entry->ino = __le64_to_cpu(sre->ino);
__entry->id = __le64_to_cpu(sre->id);
),
TP_printk(SCSBF" nr %u blkno %llu hash 0x%llx ino %llu id 0x%llx",
SCSB_TRACE_ARGS, __entry->entry_nr, __entry->blkno,
__entry->hash, __entry->ino, __entry->id)
);
TRACE_EVENT(scoutfs_srch_clr_tmp,
TP_PROTO(struct super_block *sb, struct scoutfs_srch_entry *tmp),
TP_ARGS(sb, tmp),
TP_STRUCT__entry(
SCSB_TRACE_FIELDS
__field(__u64, tmp_hash)
__field(__u64, tmp_ino)
__field(__u64, tmp_id)
),
TP_fast_assign(
SCSB_TRACE_ASSIGN(sb);
__entry->tmp_hash = __le64_to_cpu(tmp->hash);
__entry->tmp_ino = __le64_to_cpu(tmp->ino);
__entry->tmp_id = __le64_to_cpu(tmp->id);
),
TP_printk(SCSBF" tmp hash 0x%llx tmp ino %llu tmp hash 0x%llx",
SCSB_TRACE_ARGS,
__entry->tmp_hash, __entry->tmp_ino, __entry->tmp_id)
);
TRACE_EVENT(scoutfs_srch_cmp,
TP_PROTO(struct super_block *sb, struct scoutfs_srch_entry *root,
struct scoutfs_srch_entry *tmp, void *bl),
TP_ARGS(sb, root, tmp, bl),
TP_STRUCT__entry(
SCSB_TRACE_FIELDS
__field(__u64, root_hash)
__field(__u64, root_ino)
__field(__u64, root_id)
__field(__u64, tmp_hash)
__field(__u64, tmp_ino)
__field(__u64, tmp_id)
__field(void *, bl)
),
TP_fast_assign(
SCSB_TRACE_ASSIGN(sb);
__entry->root_hash = __le64_to_cpu(root->hash);
__entry->root_ino = __le64_to_cpu(root->ino);
__entry->root_id = __le64_to_cpu(root->id);
__entry->tmp_hash = __le64_to_cpu(tmp->hash);
__entry->tmp_ino = __le64_to_cpu(tmp->ino);
__entry->tmp_id = __le64_to_cpu(tmp->id);
__entry->bl = bl;
),
TP_printk(SCSBF" root hash 0x%llx root ino %llu root id 0x%llx tmp hash 0x%llx tmp ino %llu tmp hash 0x%llx, bl %p",
SCSB_TRACE_ARGS,
__entry->root_hash, __entry->root_ino, __entry->root_id,
__entry->tmp_hash, __entry->tmp_ino, __entry->tmp_id,
__entry->bl)
);
TRACE_EVENT(scoutfs_block_stale,
TP_PROTO(struct super_block *sb, struct scoutfs_block_ref *ref,
struct scoutfs_block_header *hdr, u32 magic, u32 crc),

View File

@@ -630,7 +630,7 @@ static void scoutfs_server_commit_func(struct work_struct *work)
ret = scoutfs_alloc_empty_list(sb, &server->alloc, &server->wri,
server->meta_freed,
server->other_freed);
if (ret) {
if (ret && ret != -ENOLINK) {
scoutfs_err(sb, "server error emptying freed: %d", ret);
goto out;
}

View File

@@ -745,7 +745,6 @@ static int search_log_file(struct super_block *sb,
for (i = 0; i < le32_to_cpu(srb->entry_nr); i++) {
if (pos > SCOUTFS_SRCH_BLOCK_SAFE_BYTES) {
/* can only be inconsistency :/ */
scoutfs_err(sb, "@@@ pos %d > %ld", pos, SCOUTFS_SRCH_BLOCK_SAFE_BYTES);
ret = -EIO;
break;
}
@@ -753,7 +752,6 @@ static int search_log_file(struct super_block *sb,
ret = decode_entry(srb->entries + pos, &sre, &prev);
if (ret <= 0) {
/* can only be inconsistency :/ */
scoutfs_err(sb, "@@@ decode_entry -> %d", ret);
ret = -EIO;
break;
}
@@ -857,7 +855,6 @@ static int search_sorted_file(struct super_block *sb,
if (pos > SCOUTFS_SRCH_BLOCK_SAFE_BYTES) {
/* can only be inconsistency :/ */
scoutfs_err(sb, "@@@ 2: pos %d > %ld", pos, SCOUTFS_SRCH_BLOCK_SAFE_BYTES);
ret = -EIO;
goto out;
}
@@ -865,7 +862,6 @@ static int search_sorted_file(struct super_block *sb,
ret = decode_entry(srb->entries + pos, &sre, &prev);
if (ret <= 0) {
/* can only be inconsistency :/ */
scoutfs_err(sb, "@@@ 2: decode -> %d", ret);
ret = -EIO;
goto out;
}
@@ -1005,7 +1001,6 @@ retry:
scoutfs_key_inc(&key);
memcpy(&sfl, iref.val, iref.val_len);
} else {
scoutfs_err(sb, "@@@ btree_next bad?");
ret = -EIO;
}
scoutfs_btree_put_iref(&iref);
@@ -1041,7 +1036,6 @@ retry:
scoutfs_key_inc(&key);
memcpy(&lt, iref.val, iref.val_len);
} else {
scoutfs_err(sb, "@@@ btree_next bad 2");
ret = -EIO;
}
scoutfs_btree_put_iref(&iref);
@@ -1157,7 +1151,6 @@ int scoutfs_srch_get_compact(struct super_block *sb,
key = *iref.key;
memcpy(sc, iref.val, iref.val_len);
} else {
scoutfs_err(sb, "@@@ btree_next bad 3");
ret = -EIO;
}
scoutfs_btree_put_iref(&iref);
@@ -1216,7 +1209,6 @@ int scoutfs_srch_get_compact(struct super_block *sb,
key = *iref.key;
memcpy(&sfl, iref.val, iref.val_len);
} else {
scoutfs_err(sb, "@@@ btree_next bad 4");
ret = -EIO;
}
scoutfs_btree_put_iref(&iref);
@@ -1410,10 +1402,8 @@ int scoutfs_srch_commit_compact(struct super_block *sb,
if (ret == 0) {
if (iref.val_len == sizeof(struct scoutfs_srch_compact))
memcpy(busy, iref.val, iref.val_len);
else {
scoutfs_err(sb, "@@@ btree_lookup bad");
else
ret = -EIO;
}
scoutfs_btree_put_iref(&iref);
}
if (ret < 0)
@@ -1506,7 +1496,6 @@ int scoutfs_srch_cancel_compact(struct super_block *sb,
if (scoutfs_key_compare(iref.key, &last) > 0) {
ret = -ENOENT;
} else if (iref.val_len != sizeof(*sc)) {
scoutfs_err(sb, "@@@ btree_next bad 5");
ret = -EIO;
} else {
key = *iref.key;
@@ -1536,6 +1525,66 @@ static bool should_commit(struct super_block *sb, struct scoutfs_alloc *alloc,
scoutfs_alloc_meta_low(sb, alloc, nr);
}
static int alloc_srch_block(struct super_block *sb, struct scoutfs_alloc *alloc,
struct scoutfs_block_writer *wri,
struct scoutfs_srch_file *sfl,
struct scoutfs_block **bl,
u64 blk)
{
DECLARE_SRCH_INFO(sb, srinf);
int ret;
if (atomic_read(&srinf->shutdown))
return -ESHUTDOWN;
/* could grow and dirty to a leaf */
if (should_commit(sb, alloc, wri, sfl->height + 1))
return -EAGAIN;
ret = get_file_block(sb, alloc, wri, sfl, GFB_INSERT | GFB_DIRTY,
blk, bl);
if (ret < 0)
return ret;
scoutfs_inc_counter(sb, srch_compact_dirty_block);
return 0;
}
static int emit_srch_entry(struct super_block *sb,
struct scoutfs_srch_file *sfl,
struct scoutfs_srch_block *srb,
struct scoutfs_srch_entry *sre,
u64 blk)
{
int ret;
ret = encode_entry(srb->entries + le32_to_cpu(srb->entry_bytes),
sre, &srb->tail);
if (WARN_ON_ONCE(ret <= 0)) {
/* shouldn't happen */
return -EIO;
}
if (srb->entry_bytes == 0) {
if (blk == 0)
sfl->first = *sre;
srb->first = *sre;
}
le32_add_cpu(&srb->entry_nr, 1);
le32_add_cpu(&srb->entry_bytes, ret);
srb->last = *sre;
srb->tail = *sre;
sfl->last = *sre;
le64_add_cpu(&sfl->entries, 1);
scoutfs_inc_counter(sb, srch_compact_entry);
trace_scoutfs_srch_emit_entry(sb, sre, srb, blk);
return 0;
}
struct tourn_node {
struct scoutfs_srch_entry sre;
int ind;
@@ -1570,20 +1619,18 @@ static int kway_merge(struct super_block *sb,
kway_get_t kway_get, kway_advance_t kway_adv,
void **args, int nr, bool logs_input)
{
DECLARE_SRCH_INFO(sb, srinf);
struct scoutfs_srch_block *srb = NULL;
struct scoutfs_srch_entry last_tail;
struct scoutfs_srch_entry tmp_entry = {0};
struct scoutfs_block *bl = NULL;
struct tourn_node *tnodes;
struct tourn_node *leaves;
struct tourn_node *root;
struct tourn_node *tn;
int last_bytes = 0;
bool have_tmp = false;
int nr_parents;
int nr_nodes;
int empty = 0;
int ret = 0;
int diff;
u64 blk;
int ind;
int i;
@@ -1617,100 +1664,73 @@ static int kway_merge(struct super_block *sb,
}
}
trace_scoutfs_srch_new_merge(sb);
/* always append new blocks */
blk = le64_to_cpu(sfl->blocks);
while (empty < nr) {
if (srb == NULL || sre_cmp(&root->sre, &srb->last) != 0) {
if (bl == NULL) {
if (atomic_read(&srinf->shutdown)) {
ret = -ESHUTDOWN;
goto out;
trace_scoutfs_srch_cmp(sb, &root->sre, &tmp_entry, bl);
if (sre_cmp(&root->sre, &tmp_entry) != 0) {
if (have_tmp) {
if (bl == NULL) {
ret = alloc_srch_block(sb, alloc, wri,
sfl, &bl, blk);
if (ret < 0) {
if (ret == -EAGAIN)
ret = 0;
goto out;
}
srb = bl->data;
}
/* could grow and dirty to a leaf */
if (should_commit(sb, alloc, wri,
sfl->height + 1)) {
ret = 0;
goto out;
}
ret = get_file_block(sb, alloc, wri, sfl,
GFB_INSERT | GFB_DIRTY,
blk, &bl);
ret = emit_srch_entry(sb, sfl, srb, &tmp_entry,
blk);
if (ret < 0)
goto out;
srb = bl->data;
scoutfs_inc_counter(sb, srch_compact_dirty_block);
if (le32_to_cpu(srb->entry_bytes) >
SCOUTFS_SRCH_BLOCK_SAFE_BYTES) {
scoutfs_block_put(sb, bl);
bl = NULL;
blk++;
memset(&tmp_entry, 0, sizeof(tmp_entry));
have_tmp = false;
continue;
}
/*
* end sorted block on _SAFE offset for
* testing
*/
if (bl && le32_to_cpu(srb->entry_nr) == 1 &&
logs_input &&
scoutfs_trigger(sb, SRCH_COMPACT_LOGS_PAD_SAFE)) {
pad_entries_at_safe(sfl, srb);
scoutfs_block_put(sb, bl);
bl = NULL;
blk++;
memset(&tmp_entry, 0, sizeof(tmp_entry));
have_tmp = false;
continue;
}
}
last_bytes = le32_to_cpu(srb->entry_bytes);
last_tail = srb->last;
ret = encode_entry(srb->entries +
le32_to_cpu(srb->entry_bytes),
&root->sre, &srb->tail);
if (WARN_ON_ONCE(ret <= 0)) {
/* shouldn't happen */
scoutfs_err(sb, "@@@ encode -> %d", ret);
ret = -EIO;
goto out;
}
if (srb->entry_bytes == 0) {
if (blk == 0)
sfl->first = root->sre;
srb->first = root->sre;
}
le32_add_cpu(&srb->entry_nr, 1);
le32_add_cpu(&srb->entry_bytes, ret);
srb->last = root->sre;
srb->tail = root->sre;
sfl->last = root->sre;
le64_add_cpu(&sfl->entries, 1);
ret = 0;
if (le32_to_cpu(srb->entry_bytes) >
SCOUTFS_SRCH_BLOCK_SAFE_BYTES) {
scoutfs_block_put(sb, bl);
bl = NULL;
blk++;
}
/* end sorted block on _SAFE offset for testing */
if (bl && le32_to_cpu(srb->entry_nr) == 1 && logs_input &&
scoutfs_trigger(sb, SRCH_COMPACT_LOGS_PAD_SAFE)) {
pad_entries_at_safe(sfl, srb);
scoutfs_block_put(sb, bl);
bl = NULL;
blk++;
}
scoutfs_inc_counter(sb, srch_compact_entry);
tmp_entry = root->sre;
have_tmp = true;
} else {
/*
* Duplicate entries indicate deletion so we
* undo the previously encoded entry and ignore
* undo the previously cached tmp entry and ignore
* this entry. This only happens within each
* block. Deletions can span block boundaries
* and will be filtered out by search and
* hopefully removed in future compactions.
*/
diff = le32_to_cpu(srb->entry_bytes) - last_bytes;
if (diff) {
memset(srb->entries + last_bytes, 0, diff);
if (srb->entry_bytes == 0) {
/* last_tail will be 0 */
if (blk == 0)
sfl->first = last_tail;
srb->first = last_tail;
}
le32_add_cpu(&srb->entry_nr, -1);
srb->entry_bytes = cpu_to_le32(last_bytes);
srb->last = last_tail;
srb->tail = last_tail;
sfl->last = last_tail;
le64_add_cpu(&sfl->entries, -1);
}
trace_scoutfs_srch_clr_tmp(sb, &tmp_entry);
memset(&tmp_entry, 0, sizeof(tmp_entry));
have_tmp = false;
scoutfs_inc_counter(sb, srch_compact_removed_entry);
}
@@ -1753,6 +1773,24 @@ static int kway_merge(struct super_block *sb,
/* could stream a final index.. arguably a small portion of work */
out:
if (have_tmp) {
bool emit = true;
if (bl == NULL) {
ret = alloc_srch_block(sb, alloc, wri, sfl, &bl, blk);
if (ret) {
emit = false;
if (ret == -EAGAIN)
ret = 0;
} else {
srb = bl->data;
}
}
if (emit)
ret = emit_srch_entry(sb, sfl, srb, &tmp_entry, blk);
}
scoutfs_block_put(sb, bl);
vfree(tnodes);
return ret;
@@ -1875,7 +1913,6 @@ static int compact_logs(struct super_block *sb,
if (pos > SCOUTFS_SRCH_BLOCK_SAFE_BYTES) {
/* can only be inconsistency :/ */
scoutfs_err(sb, "@@@ 3: pos %d > %ld", pos, SCOUTFS_SRCH_BLOCK_SAFE_BYTES);
ret = -EIO;
goto out;
}
@@ -1883,7 +1920,6 @@ static int compact_logs(struct super_block *sb,
ret = decode_entry(srb->entries + pos, sre, &prev);
if (ret <= 0) {
/* can only be inconsistency :/ */
scoutfs_err(sb, "@@@ new decode -> %d", ret);
ret = -EIO;
goto out;
}
@@ -1998,7 +2034,7 @@ static int kway_get_reader(struct super_block *sb,
rdr->skip > SCOUTFS_SRCH_BLOCK_SAFE_BYTES ||
rdr->skip >= le32_to_cpu(srb->entry_bytes)) {
/* XXX inconsistency */
scoutfs_err(sb, "@@@ blkno %llu pos %u vs %ld, skip %u , %u",
scoutfs_err(sb, "blkno %llu pos %u vs %ld, skip %u, bytes %u",
__le64_to_cpu(srb->hdr.blkno),
rdr->pos, SCOUTFS_SRCH_BLOCK_SAFE_BYTES,
rdr->skip,
@@ -2018,7 +2054,6 @@ static int kway_get_reader(struct super_block *sb,
&rdr->decoded_sre, &rdr->prev);
if (ret <= 0) {
/* XXX inconsistency */
scoutfs_err(sb, "@@@ decode new2 ret %d", ret);
return -EIO;
}
@@ -2315,10 +2350,8 @@ static void scoutfs_srch_compact_worker(struct work_struct *work)
out:
/* our allocators and files should be stable */
WARN_ON_ONCE(ret == -ESTALE);
if (ret < 0) {
scoutfs_err(sb, "@@@ srch_compact_error %d", ret);
if (ret < 0)
scoutfs_inc_counter(sb, srch_compact_error);
}
scoutfs_block_writer_forget_all(sb, &wri);
queue_compact_work(srinf, sc != NULL && sc->nr > 0 && ret == 0);

View File

@@ -90,7 +90,7 @@ done
# set some T_ defaults
T_TRACE_DUMP="0"
T_TRACE_PRINTK="0"
T_TRACE_PRINTK=""
T_PORT_START="19700"
T_LOOP_ITER="1"
@@ -521,7 +521,10 @@ crash_monitor()
fi
if [ "$bad" != 0 ]; then
echo "run-tests monitor triggering crash"
echo "run-tests monitor syncing and triggering crash"
# hail mary, the sync could well hang
(echo s > /proc/sysrq-trigger) &
sleep 5
echo c > /proc/sysrq-trigger
exit 1
fi

View File

@@ -44,11 +44,12 @@ echo "== create padded sorted inputs by forcing log rotation"
sv=$(t_server_nr)
for i in $(seq 1 $COMPACT_NR); do
for j in $(seq 1 $COMPACT_NR); do
t_trigger_arm_silent srch_force_log_rotate $sv
seq -f "f-$i-$j-$SEQF" 1 10 | \
bulk_create_paths -X "scoutfs.srch.t-srch-safe-merge-pos" -d "$T_D0" > \
/dev/null
t_trigger_arm_silent srch_force_log_rotate $sv
sync
test "$(t_trigger_get srch_force_log_rotate $sv)" == "0" || \