Compare commits

..

14 Commits

Author SHA1 Message Date
Chris Kirby
5465350ba2 block alloc fix 2026-01-14 13:25:00 -06:00
Chris Kirby
bf4f2ef8bb more dbg 2026-01-14 13:14:48 -06:00
Chris Kirby
c5207243f0 typo 2026-01-14 13:14:48 -06:00
Chris Kirby
8d330861ba track blkno 2026-01-14 13:14:48 -06:00
Chris Kirby
b7beadb22c track entry_bytes 2026-01-14 13:14:48 -06:00
Chris Kirby
926bb6b9ec debug 2026-01-14 13:14:48 -06:00
Chris Kirby
a420f5389e More debug 2026-01-14 13:14:48 -06:00
Chris Kirby
544362dfaa grab traces at exit 2026-01-14 13:14:48 -06:00
Chris Kirby
79593b6ecc And the output 2026-01-14 13:14:48 -06:00
Chris Kirby
b6e7d5b2d0 Temporarily make srch-safe triggers silent 2026-01-14 13:14:48 -06:00
Chris Kirby
487d38a4a5 init trace buffer size when starting tracing 2026-01-14 13:14:48 -06:00
Chris Kirby
9cddf6d9c9 Track srch_compact_error 2026-01-14 13:14:48 -06:00
Chris Kirby
d81c318c3e Beef up get_file_block() tracing
Signed-off-by: Chris Kirby <ckirby@versity.com>
2026-01-14 13:14:48 -06:00
Chris Kirby
279b093600 Debug stuff for srch-basic
Signed-off-by: Chris Kirby <ckirby@versity.com>
2026-01-14 13:14:48 -06:00
5 changed files with 103 additions and 275 deletions

View File

@@ -1,39 +1,6 @@
Versity ScoutFS Release Notes
=============================
---
v1.27
\
*Jan 15, 2026*
Switch away from using the general VM cache reclaim machinery to reduce
idle cluster locks in the client. The VM treated locks like a cache and
let many accumulate, presuming that it would be efficient to free them
in batches. Lock freeing requires network communication so this could
result in enormous backlogs in network messages (on the order of
hundreds of thousands) and could result in signifcant delays of other
network messaging.
Fix inefficient network receive processing while many messages are in
the send queue. This consumed sufficient CPU to cause significant
stalls, perhaps resulting in hung task warning messages due to delayed
lock message delivery.
Fix a server livelock case that could happen while committing client
transactions that contain a large amount of freed file data extents.
This would present as client tasks hanging and a server task spinning
consuming cpu.
Fix a rare server request processing failure that doesn't deal with
retransmission of a request that a previous server partially processed.
This would present as hung client tasks and repeated "error -2
committing log merge: getting merge status item" kernel messages.
Fix an unneccessary server shutdown during specific circumstances in
client lock recovery. The shutdown was due to server state and was
ultimately harmless. The next server that started up would proceed
accordingly.
---
v1.26
\

View File

@@ -2660,110 +2660,6 @@ TRACE_EVENT(scoutfs_get_file_block,
__entry->last_hash, __entry->last_ino, __entry->last_id)
);
TRACE_EVENT(scoutfs_srch_new_merge,
TP_PROTO(struct super_block *sb),
TP_ARGS(sb),
TP_STRUCT__entry(
SCSB_TRACE_FIELDS
),
TP_fast_assign(
SCSB_TRACE_ASSIGN(sb);
),
TP_printk(SCSBF, SCSB_TRACE_ARGS)
);
TRACE_EVENT(scoutfs_srch_emit_entry,
TP_PROTO(struct super_block *sb, struct scoutfs_srch_entry *sre,
struct scoutfs_srch_block *srb, u64 blkno),
TP_ARGS(sb, sre, srb, blkno),
TP_STRUCT__entry(
SCSB_TRACE_FIELDS
__field(__u32, entry_nr)
__field(__u64, blkno)
__field(__u64, hash)
__field(__u64, ino)
__field(__u64, id)
),
TP_fast_assign(
SCSB_TRACE_ASSIGN(sb);
__entry->entry_nr = __le32_to_cpu(srb->entry_nr);
__entry->blkno = blkno;
__entry->hash = __le64_to_cpu(sre->hash);
__entry->ino = __le64_to_cpu(sre->ino);
__entry->id = __le64_to_cpu(sre->id);
),
TP_printk(SCSBF" nr %u blkno %llu hash 0x%llx ino %llu id 0x%llx",
SCSB_TRACE_ARGS, __entry->entry_nr, __entry->blkno,
__entry->hash, __entry->ino, __entry->id)
);
TRACE_EVENT(scoutfs_srch_clr_tmp,
TP_PROTO(struct super_block *sb, struct scoutfs_srch_entry *tmp),
TP_ARGS(sb, tmp),
TP_STRUCT__entry(
SCSB_TRACE_FIELDS
__field(__u64, tmp_hash)
__field(__u64, tmp_ino)
__field(__u64, tmp_id)
),
TP_fast_assign(
SCSB_TRACE_ASSIGN(sb);
__entry->tmp_hash = __le64_to_cpu(tmp->hash);
__entry->tmp_ino = __le64_to_cpu(tmp->ino);
__entry->tmp_id = __le64_to_cpu(tmp->id);
),
TP_printk(SCSBF" tmp hash 0x%llx tmp ino %llu tmp hash 0x%llx",
SCSB_TRACE_ARGS,
__entry->tmp_hash, __entry->tmp_ino, __entry->tmp_id)
);
TRACE_EVENT(scoutfs_srch_cmp,
TP_PROTO(struct super_block *sb, struct scoutfs_srch_entry *root,
struct scoutfs_srch_entry *tmp, void *bl),
TP_ARGS(sb, root, tmp, bl),
TP_STRUCT__entry(
SCSB_TRACE_FIELDS
__field(__u64, root_hash)
__field(__u64, root_ino)
__field(__u64, root_id)
__field(__u64, tmp_hash)
__field(__u64, tmp_ino)
__field(__u64, tmp_id)
__field(void *, bl)
),
TP_fast_assign(
SCSB_TRACE_ASSIGN(sb);
__entry->root_hash = __le64_to_cpu(root->hash);
__entry->root_ino = __le64_to_cpu(root->ino);
__entry->root_id = __le64_to_cpu(root->id);
__entry->tmp_hash = __le64_to_cpu(tmp->hash);
__entry->tmp_ino = __le64_to_cpu(tmp->ino);
__entry->tmp_id = __le64_to_cpu(tmp->id);
__entry->bl = bl;
),
TP_printk(SCSBF" root hash 0x%llx root ino %llu root id 0x%llx tmp hash 0x%llx tmp ino %llu tmp hash 0x%llx, bl %p",
SCSB_TRACE_ARGS,
__entry->root_hash, __entry->root_ino, __entry->root_id,
__entry->tmp_hash, __entry->tmp_ino, __entry->tmp_id,
__entry->bl)
);
TRACE_EVENT(scoutfs_block_stale,
TP_PROTO(struct super_block *sb, struct scoutfs_block_ref *ref,
struct scoutfs_block_header *hdr, u32 magic, u32 crc),

View File

@@ -745,6 +745,7 @@ static int search_log_file(struct super_block *sb,
for (i = 0; i < le32_to_cpu(srb->entry_nr); i++) {
if (pos > SCOUTFS_SRCH_BLOCK_SAFE_BYTES) {
/* can only be inconsistency :/ */
scoutfs_err(sb, "@@@ pos %d > %ld", pos, SCOUTFS_SRCH_BLOCK_SAFE_BYTES);
ret = -EIO;
break;
}
@@ -752,6 +753,7 @@ static int search_log_file(struct super_block *sb,
ret = decode_entry(srb->entries + pos, &sre, &prev);
if (ret <= 0) {
/* can only be inconsistency :/ */
scoutfs_err(sb, "@@@ decode_entry -> %d", ret);
ret = -EIO;
break;
}
@@ -855,6 +857,7 @@ static int search_sorted_file(struct super_block *sb,
if (pos > SCOUTFS_SRCH_BLOCK_SAFE_BYTES) {
/* can only be inconsistency :/ */
scoutfs_err(sb, "@@@ 2: pos %d > %ld", pos, SCOUTFS_SRCH_BLOCK_SAFE_BYTES);
ret = -EIO;
goto out;
}
@@ -862,6 +865,7 @@ static int search_sorted_file(struct super_block *sb,
ret = decode_entry(srb->entries + pos, &sre, &prev);
if (ret <= 0) {
/* can only be inconsistency :/ */
scoutfs_err(sb, "@@@ 2: decode -> %d", ret);
ret = -EIO;
goto out;
}
@@ -1001,6 +1005,7 @@ retry:
scoutfs_key_inc(&key);
memcpy(&sfl, iref.val, iref.val_len);
} else {
scoutfs_err(sb, "@@@ btree_next bad?");
ret = -EIO;
}
scoutfs_btree_put_iref(&iref);
@@ -1036,6 +1041,7 @@ retry:
scoutfs_key_inc(&key);
memcpy(&lt, iref.val, iref.val_len);
} else {
scoutfs_err(sb, "@@@ btree_next bad 2");
ret = -EIO;
}
scoutfs_btree_put_iref(&iref);
@@ -1151,6 +1157,7 @@ int scoutfs_srch_get_compact(struct super_block *sb,
key = *iref.key;
memcpy(sc, iref.val, iref.val_len);
} else {
scoutfs_err(sb, "@@@ btree_next bad 3");
ret = -EIO;
}
scoutfs_btree_put_iref(&iref);
@@ -1209,6 +1216,7 @@ int scoutfs_srch_get_compact(struct super_block *sb,
key = *iref.key;
memcpy(&sfl, iref.val, iref.val_len);
} else {
scoutfs_err(sb, "@@@ btree_next bad 4");
ret = -EIO;
}
scoutfs_btree_put_iref(&iref);
@@ -1402,8 +1410,10 @@ int scoutfs_srch_commit_compact(struct super_block *sb,
if (ret == 0) {
if (iref.val_len == sizeof(struct scoutfs_srch_compact))
memcpy(busy, iref.val, iref.val_len);
else
else {
scoutfs_err(sb, "@@@ btree_lookup bad");
ret = -EIO;
}
scoutfs_btree_put_iref(&iref);
}
if (ret < 0)
@@ -1496,6 +1506,7 @@ int scoutfs_srch_cancel_compact(struct super_block *sb,
if (scoutfs_key_compare(iref.key, &last) > 0) {
ret = -ENOENT;
} else if (iref.val_len != sizeof(*sc)) {
scoutfs_err(sb, "@@@ btree_next bad 5");
ret = -EIO;
} else {
key = *iref.key;
@@ -1525,66 +1536,6 @@ static bool should_commit(struct super_block *sb, struct scoutfs_alloc *alloc,
scoutfs_alloc_meta_low(sb, alloc, nr);
}
static int alloc_srch_block(struct super_block *sb, struct scoutfs_alloc *alloc,
struct scoutfs_block_writer *wri,
struct scoutfs_srch_file *sfl,
struct scoutfs_block **bl,
u64 blk)
{
DECLARE_SRCH_INFO(sb, srinf);
int ret;
if (atomic_read(&srinf->shutdown))
return -ESHUTDOWN;
/* could grow and dirty to a leaf */
if (should_commit(sb, alloc, wri, sfl->height + 1))
return -EAGAIN;
ret = get_file_block(sb, alloc, wri, sfl, GFB_INSERT | GFB_DIRTY,
blk, bl);
if (ret < 0)
return ret;
scoutfs_inc_counter(sb, srch_compact_dirty_block);
return 0;
}
static int emit_srch_entry(struct super_block *sb,
struct scoutfs_srch_file *sfl,
struct scoutfs_srch_block *srb,
struct scoutfs_srch_entry *sre,
u64 blk)
{
int ret;
ret = encode_entry(srb->entries + le32_to_cpu(srb->entry_bytes),
sre, &srb->tail);
if (WARN_ON_ONCE(ret <= 0)) {
/* shouldn't happen */
return -EIO;
}
if (srb->entry_bytes == 0) {
if (blk == 0)
sfl->first = *sre;
srb->first = *sre;
}
le32_add_cpu(&srb->entry_nr, 1);
le32_add_cpu(&srb->entry_bytes, ret);
srb->last = *sre;
srb->tail = *sre;
sfl->last = *sre;
le64_add_cpu(&sfl->entries, 1);
scoutfs_inc_counter(sb, srch_compact_entry);
trace_scoutfs_srch_emit_entry(sb, sre, srb, blk);
return 0;
}
struct tourn_node {
struct scoutfs_srch_entry sre;
int ind;
@@ -1619,18 +1570,20 @@ static int kway_merge(struct super_block *sb,
kway_get_t kway_get, kway_advance_t kway_adv,
void **args, int nr, bool logs_input)
{
DECLARE_SRCH_INFO(sb, srinf);
struct scoutfs_srch_block *srb = NULL;
struct scoutfs_srch_entry tmp_entry = {0};
struct scoutfs_srch_entry last_tail;
struct scoutfs_block *bl = NULL;
struct tourn_node *tnodes;
struct tourn_node *leaves;
struct tourn_node *root;
struct tourn_node *tn;
bool have_tmp = false;
int last_bytes = 0;
int nr_parents;
int nr_nodes;
int empty = 0;
int ret = 0;
int diff;
u64 blk;
int ind;
int i;
@@ -1664,73 +1617,100 @@ static int kway_merge(struct super_block *sb,
}
}
trace_scoutfs_srch_new_merge(sb);
/* always append new blocks */
blk = le64_to_cpu(sfl->blocks);
while (empty < nr) {
trace_scoutfs_srch_cmp(sb, &root->sre, &tmp_entry, bl);
if (sre_cmp(&root->sre, &tmp_entry) != 0) {
if (have_tmp) {
if (bl == NULL) {
ret = alloc_srch_block(sb, alloc, wri,
sfl, &bl, blk);
if (ret < 0) {
if (ret == -EAGAIN)
ret = 0;
goto out;
}
srb = bl->data;
if (srb == NULL || sre_cmp(&root->sre, &srb->last) != 0) {
if (bl == NULL) {
if (atomic_read(&srinf->shutdown)) {
ret = -ESHUTDOWN;
goto out;
}
ret = emit_srch_entry(sb, sfl, srb, &tmp_entry,
blk);
/* could grow and dirty to a leaf */
if (should_commit(sb, alloc, wri,
sfl->height + 1)) {
ret = 0;
goto out;
}
ret = get_file_block(sb, alloc, wri, sfl,
GFB_INSERT | GFB_DIRTY,
blk, &bl);
if (ret < 0)
goto out;
if (le32_to_cpu(srb->entry_bytes) >
SCOUTFS_SRCH_BLOCK_SAFE_BYTES) {
scoutfs_block_put(sb, bl);
bl = NULL;
blk++;
memset(&tmp_entry, 0, sizeof(tmp_entry));
have_tmp = false;
continue;
}
/*
* end sorted block on _SAFE offset for
* testing
*/
if (bl && le32_to_cpu(srb->entry_nr) == 1 &&
logs_input &&
scoutfs_trigger(sb, SRCH_COMPACT_LOGS_PAD_SAFE)) {
pad_entries_at_safe(sfl, srb);
scoutfs_block_put(sb, bl);
bl = NULL;
blk++;
memset(&tmp_entry, 0, sizeof(tmp_entry));
have_tmp = false;
continue;
}
srb = bl->data;
scoutfs_inc_counter(sb, srch_compact_dirty_block);
}
tmp_entry = root->sre;
have_tmp = true;
last_bytes = le32_to_cpu(srb->entry_bytes);
last_tail = srb->last;
ret = encode_entry(srb->entries +
le32_to_cpu(srb->entry_bytes),
&root->sre, &srb->tail);
if (WARN_ON_ONCE(ret <= 0)) {
/* shouldn't happen */
scoutfs_err(sb, "@@@ encode -> %d", ret);
ret = -EIO;
goto out;
}
if (srb->entry_bytes == 0) {
if (blk == 0)
sfl->first = root->sre;
srb->first = root->sre;
}
le32_add_cpu(&srb->entry_nr, 1);
le32_add_cpu(&srb->entry_bytes, ret);
srb->last = root->sre;
srb->tail = root->sre;
sfl->last = root->sre;
le64_add_cpu(&sfl->entries, 1);
ret = 0;
if (le32_to_cpu(srb->entry_bytes) >
SCOUTFS_SRCH_BLOCK_SAFE_BYTES) {
scoutfs_block_put(sb, bl);
bl = NULL;
blk++;
}
/* end sorted block on _SAFE offset for testing */
if (bl && le32_to_cpu(srb->entry_nr) == 1 && logs_input &&
scoutfs_trigger(sb, SRCH_COMPACT_LOGS_PAD_SAFE)) {
pad_entries_at_safe(sfl, srb);
scoutfs_block_put(sb, bl);
bl = NULL;
blk++;
}
scoutfs_inc_counter(sb, srch_compact_entry);
} else {
/*
* Duplicate entries indicate deletion so we
* undo the previously cached tmp entry and ignore
* undo the previously encoded entry and ignore
* this entry. This only happens within each
* block. Deletions can span block boundaries
* and will be filtered out by search and
* hopefully removed in future compactions.
*/
trace_scoutfs_srch_clr_tmp(sb, &tmp_entry);
memset(&tmp_entry, 0, sizeof(tmp_entry));
have_tmp = false;
diff = le32_to_cpu(srb->entry_bytes) - last_bytes;
if (diff) {
memset(srb->entries + last_bytes, 0, diff);
if (srb->entry_bytes == 0) {
/* last_tail will be 0 */
if (blk == 0)
sfl->first = last_tail;
srb->first = last_tail;
}
le32_add_cpu(&srb->entry_nr, -1);
srb->entry_bytes = cpu_to_le32(last_bytes);
srb->last = last_tail;
srb->tail = last_tail;
sfl->last = last_tail;
le64_add_cpu(&sfl->entries, -1);
}
scoutfs_inc_counter(sb, srch_compact_removed_entry);
}
@@ -1773,22 +1753,6 @@ static int kway_merge(struct super_block *sb,
/* could stream a final index.. arguably a small portion of work */
out:
if (have_tmp) {
bool emit = true;
if (bl == NULL) {
ret = alloc_srch_block(sb, alloc, wri, sfl, &bl, blk);
if (ret) {
emit = false;
if (ret == -EAGAIN)
ret = 0;
}
}
if (emit)
ret = emit_srch_entry(sb, sfl, srb, &tmp_entry, blk);
}
scoutfs_block_put(sb, bl);
vfree(tnodes);
return ret;
@@ -1911,6 +1875,7 @@ static int compact_logs(struct super_block *sb,
if (pos > SCOUTFS_SRCH_BLOCK_SAFE_BYTES) {
/* can only be inconsistency :/ */
scoutfs_err(sb, "@@@ 3: pos %d > %ld", pos, SCOUTFS_SRCH_BLOCK_SAFE_BYTES);
ret = -EIO;
goto out;
}
@@ -1918,6 +1883,7 @@ static int compact_logs(struct super_block *sb,
ret = decode_entry(srb->entries + pos, sre, &prev);
if (ret <= 0) {
/* can only be inconsistency :/ */
scoutfs_err(sb, "@@@ new decode -> %d", ret);
ret = -EIO;
goto out;
}
@@ -2032,7 +1998,7 @@ static int kway_get_reader(struct super_block *sb,
rdr->skip > SCOUTFS_SRCH_BLOCK_SAFE_BYTES ||
rdr->skip >= le32_to_cpu(srb->entry_bytes)) {
/* XXX inconsistency */
scoutfs_err(sb, "blkno %llu pos %u vs %ld, skip %u, bytes %u",
scoutfs_err(sb, "@@@ blkno %llu pos %u vs %ld, skip %u , %u",
__le64_to_cpu(srb->hdr.blkno),
rdr->pos, SCOUTFS_SRCH_BLOCK_SAFE_BYTES,
rdr->skip,
@@ -2052,6 +2018,7 @@ static int kway_get_reader(struct super_block *sb,
&rdr->decoded_sre, &rdr->prev);
if (ret <= 0) {
/* XXX inconsistency */
scoutfs_err(sb, "@@@ decode new2 ret %d", ret);
return -EIO;
}
@@ -2348,8 +2315,10 @@ static void scoutfs_srch_compact_worker(struct work_struct *work)
out:
/* our allocators and files should be stable */
WARN_ON_ONCE(ret == -ESTALE);
if (ret < 0)
if (ret < 0) {
scoutfs_err(sb, "@@@ srch_compact_error %d", ret);
scoutfs_inc_counter(sb, srch_compact_error);
}
scoutfs_block_writer_forget_all(sb, &wri);
queue_compact_work(srinf, sc != NULL && sc->nr > 0 && ret == 0);

View File

@@ -521,10 +521,7 @@ crash_monitor()
fi
if [ "$bad" != 0 ]; then
echo "run-tests monitor syncing and triggering crash"
# hail mary, the sync could well hang
(echo s > /proc/sysrq-trigger) &
sleep 5
echo "run-tests monitor triggering crash"
echo c > /proc/sysrq-trigger
exit 1
fi

View File

@@ -44,12 +44,11 @@ echo "== create padded sorted inputs by forcing log rotation"
sv=$(t_server_nr)
for i in $(seq 1 $COMPACT_NR); do
for j in $(seq 1 $COMPACT_NR); do
t_trigger_arm_silent srch_force_log_rotate $sv
seq -f "f-$i-$j-$SEQF" 1 10 | \
bulk_create_paths -X "scoutfs.srch.t-srch-safe-merge-pos" -d "$T_D0" > \
/dev/null
t_trigger_arm_silent srch_force_log_rotate $sv
sync
test "$(t_trigger_get srch_force_log_rotate $sv)" == "0" || \