Compare commits

..

10 Commits

Author SHA1 Message Date
Chris Kirby
f58a208ddb grab traces at exit 2026-01-09 11:20:59 -06:00
Chris Kirby
0ac35782a6 And the output 2026-01-08 17:52:27 -06:00
Chris Kirby
02ac482964 Temporarily make srch-safe triggers silent 2026-01-08 17:51:50 -06:00
Chris Kirby
124f15a5c0 init trace buffer size when starting tracing 2026-01-07 19:06:11 -06:00
Chris Kirby
1f8b26e8a3 Track srch_compact_error 2026-01-07 17:08:19 -06:00
Chris Kirby
6027fa3e9d Beef up get_file_block() tracing
Signed-off-by: Chris Kirby <ckirby@versity.com>
2026-01-07 17:08:06 -06:00
Chris Kirby
c24f129558 Debug stuff for srch-basic
Signed-off-by: Chris Kirby <ckirby@versity.com>
2026-01-07 17:07:29 -06:00
Zach Brown
3dafeaac5b Merge pull request #273 from versity/clk/inode_deletion
Clk/inode deletion
2026-01-07 12:20:12 -08:00
Chris Kirby
ef0f6f8ac2 Fix race in inode-deletion test
Due to an iput race, the "unlink wait for open on other mount"
subtest can fail. If the unlink happens inline, then the test
passes. But if the orphan scanner has to complete the unlink
work, it's possible that there won't be enough log merge work
for the scanner to do the cleanup before we look at the seq index.

Add SCOUTFS_TRIGGER_LOG_MERGE_FORCE_FINALIZE_OURS, to allow
forcing a log merge. Add new counters, log_merges_start and
log_merge_complete, so that tests can see that a merge has happened.

Then we have to wait for the orphan scanner to do its work.
Add a new counter, orphan_scan_empty, that increments each time
the scanner walks the entire inode space without finding any
orphans. Once the test sees that counter increment, it should be
safe to check the seq index and see that the unlinked inode is gone.

Signed-off-by: Chris Kirby <ckirby@versity.com>
2026-01-07 08:29:38 -06:00
Chris Kirby
c0cd29aa1b Fix run-test.sh buffer multiplier breakage
The /sys/kernel/debug/tracing/buffer_size_kb file always reads as
"7 (expanded: 1408)". So the -T option to run-test.sh won't work,
because it tries to multiply that string by the given factor.

It always defaults to 1408 on every platform we currently support.
Just use that value so we can specify -T in CI runs.

Signed-off-by: Chris Kirby <ckirby@versity.com>
2025-12-18 15:05:48 -06:00
16 changed files with 499 additions and 196 deletions

View File

@@ -145,7 +145,9 @@
EXPAND_COUNTER(lock_shrink_work) \
EXPAND_COUNTER(lock_unlock) \
EXPAND_COUNTER(lock_wait) \
EXPAND_COUNTER(log_merge_complete) \
EXPAND_COUNTER(log_merge_no_finalized) \
EXPAND_COUNTER(log_merge_start) \
EXPAND_COUNTER(log_merge_wait_timeout) \
EXPAND_COUNTER(net_dropped_response) \
EXPAND_COUNTER(net_send_bytes) \
@@ -160,6 +162,7 @@
EXPAND_COUNTER(orphan_scan) \
EXPAND_COUNTER(orphan_scan_attempts) \
EXPAND_COUNTER(orphan_scan_cached) \
EXPAND_COUNTER(orphan_scan_empty) \
EXPAND_COUNTER(orphan_scan_error) \
EXPAND_COUNTER(orphan_scan_item) \
EXPAND_COUNTER(orphan_scan_omap_set) \

View File

@@ -1637,10 +1637,14 @@ int scoutfs_inode_orphan_delete(struct super_block *sb, u64 ino, struct scoutfs_
struct scoutfs_lock *primary)
{
struct scoutfs_key key;
int ret;
init_orphan_key(&key, ino);
return scoutfs_item_delete_force(sb, &key, lock, primary);
ret = scoutfs_item_delete_force(sb, &key, lock, primary);
trace_scoutfs_inode_orphan_delete(sb, ino, ret);
return ret;
}
/*
@@ -1722,6 +1726,8 @@ out:
scoutfs_release_trans(sb);
scoutfs_inode_index_unlock(sb, &ind_locks);
trace_scoutfs_delete_inode_end(sb, ino, mode, size, ret);
return ret;
}
@@ -1817,6 +1823,9 @@ out:
* they've checked that the inode could really be deleted. We serialize
* on a bit in the lock data so that we only have one deletion attempt
* per inode under this mount's cluster lock.
*
* Returns -EAGAIN if we either did some cleanup work or are unable to finish
* cleaning up this inode right now.
*/
static int try_delete_inode_items(struct super_block *sb, u64 ino)
{
@@ -1830,6 +1839,8 @@ static int try_delete_inode_items(struct super_block *sb, u64 ino)
int bit_nr;
int ret;
trace_scoutfs_try_delete(sb, ino);
ret = scoutfs_lock_ino(sb, SCOUTFS_LOCK_WRITE, 0, ino, &lock);
if (ret < 0)
goto out;
@@ -1842,27 +1853,32 @@ static int try_delete_inode_items(struct super_block *sb, u64 ino)
/* only one local attempt per inode at a time */
if (test_and_set_bit(bit_nr, ldata->trying)) {
ret = 0;
trace_scoutfs_try_delete_local_busy(sb, ino);
ret = -EAGAIN;
goto out;
}
clear_trying = true;
/* can't delete if it's cached in local or remote mounts */
if (scoutfs_omap_test(sb, ino) || test_bit_le(bit_nr, ldata->map.bits)) {
ret = 0;
trace_scoutfs_try_delete_cached(sb, ino);
ret = -EAGAIN;
goto out;
}
scoutfs_inode_init_key(&key, ino);
ret = lookup_inode_item(sb, &key, &sinode, lock);
if (ret < 0) {
if (ret == -ENOENT)
if (ret == -ENOENT) {
trace_scoutfs_try_delete_no_item(sb, ino);
ret = 0;
}
goto out;
}
if (le32_to_cpu(sinode.nlink) > 0) {
ret = 0;
trace_scoutfs_try_delete_has_links(sb, ino, le32_to_cpu(sinode.nlink));
ret = -EAGAIN;
goto out;
}
@@ -1871,8 +1887,10 @@ static int try_delete_inode_items(struct super_block *sb, u64 ino)
goto out;
ret = delete_inode_items(sb, ino, &sinode, lock, orph_lock);
if (ret == 0)
if (ret == 0) {
ret = -EAGAIN;
scoutfs_inc_counter(sb, inode_deleted);
}
out:
if (clear_trying)
@@ -2074,6 +2092,10 @@ void scoutfs_inode_schedule_orphan_dwork(struct super_block *sb)
* a locally cached inode. Then we ask the server for the open map
* containing the inode. Only if we don't see any cached users do we do
* the expensive work of acquiring locks to try and delete the items.
*
* We need to track whether there is any orphan cleanup work remaining so
* that tests such as inode-deletion can watch the orphan_scan_empty counter
* to determine when inode cleanup from open-unlink scenarios is complete.
*/
static void inode_orphan_scan_worker(struct work_struct *work)
{
@@ -2085,11 +2107,14 @@ static void inode_orphan_scan_worker(struct work_struct *work)
SCOUTFS_BTREE_ITEM_REF(iref);
struct scoutfs_key last;
struct scoutfs_key key;
bool work_todo = false;
u64 group_nr;
int bit_nr;
u64 ino;
int ret;
trace_scoutfs_orphan_scan_start(sb);
scoutfs_inc_counter(sb, orphan_scan);
init_orphan_key(&last, U64_MAX);
@@ -2109,8 +2134,10 @@ static void inode_orphan_scan_worker(struct work_struct *work)
init_orphan_key(&key, ino);
ret = scoutfs_btree_next(sb, &roots.fs_root, &key, &iref);
if (ret < 0) {
if (ret == -ENOENT)
if (ret == -ENOENT) {
trace_scoutfs_orphan_scan_work(sb, 0);
break;
}
goto out;
}
@@ -2125,6 +2152,7 @@ static void inode_orphan_scan_worker(struct work_struct *work)
/* locally cached inodes will try to delete as they evict */
if (scoutfs_omap_test(sb, ino)) {
work_todo = true;
scoutfs_inc_counter(sb, orphan_scan_cached);
continue;
}
@@ -2140,13 +2168,22 @@ static void inode_orphan_scan_worker(struct work_struct *work)
/* remote cached inodes will also try to delete */
if (test_bit_le(bit_nr, omap.bits)) {
work_todo = true;
scoutfs_inc_counter(sb, orphan_scan_omap_set);
continue;
}
/* seemingly orphaned and unused, get locks and check for sure */
scoutfs_inc_counter(sb, orphan_scan_attempts);
trace_scoutfs_orphan_scan_work(sb, ino);
ret = try_delete_inode_items(sb, ino);
if (ret == -EAGAIN) {
work_todo = true;
ret = 0;
}
trace_scoutfs_orphan_scan_end(sb, ino, ret);
}
ret = 0;
@@ -2155,6 +2192,11 @@ out:
if (ret < 0)
scoutfs_inc_counter(sb, orphan_scan_error);
if (!work_todo)
scoutfs_inc_counter(sb, orphan_scan_empty);
trace_scoutfs_orphan_scan_stop(sb, work_todo);
scoutfs_inode_schedule_orphan_dwork(sb);
}

View File

@@ -789,6 +789,80 @@ TRACE_EVENT(scoutfs_inode_walk_writeback,
__entry->ino, __entry->write, __entry->ret)
);
TRACE_EVENT(scoutfs_orphan_scan_start,
TP_PROTO(struct super_block *sb),
TP_ARGS(sb),
TP_STRUCT__entry(
SCSB_TRACE_FIELDS
),
TP_fast_assign(
SCSB_TRACE_ASSIGN(sb);
),
TP_printk(SCSBF, SCSB_TRACE_ARGS)
);
TRACE_EVENT(scoutfs_orphan_scan_stop,
TP_PROTO(struct super_block *sb, bool work_todo),
TP_ARGS(sb, work_todo),
TP_STRUCT__entry(
SCSB_TRACE_FIELDS
__field(bool, work_todo)
),
TP_fast_assign(
SCSB_TRACE_ASSIGN(sb);
__entry->work_todo = work_todo;
),
TP_printk(SCSBF" work_todo %d", SCSB_TRACE_ARGS, __entry->work_todo)
);
TRACE_EVENT(scoutfs_orphan_scan_work,
TP_PROTO(struct super_block *sb, __u64 ino),
TP_ARGS(sb, ino),
TP_STRUCT__entry(
SCSB_TRACE_FIELDS
__field(__u64, ino)
),
TP_fast_assign(
SCSB_TRACE_ASSIGN(sb);
__entry->ino = ino;
),
TP_printk(SCSBF" ino %llu", SCSB_TRACE_ARGS,
__entry->ino)
);
TRACE_EVENT(scoutfs_orphan_scan_end,
TP_PROTO(struct super_block *sb, __u64 ino, int ret),
TP_ARGS(sb, ino, ret),
TP_STRUCT__entry(
SCSB_TRACE_FIELDS
__field(__u64, ino)
__field(int, ret)
),
TP_fast_assign(
SCSB_TRACE_ASSIGN(sb);
__entry->ino = ino;
__entry->ret = ret;
),
TP_printk(SCSBF" ino %llu ret %d", SCSB_TRACE_ARGS,
__entry->ino, __entry->ret)
);
DECLARE_EVENT_CLASS(scoutfs_lock_info_class,
TP_PROTO(struct super_block *sb, struct lock_info *linfo),
@@ -1036,6 +1110,82 @@ TRACE_EVENT(scoutfs_orphan_inode,
MINOR(__entry->dev), __entry->ino)
);
DECLARE_EVENT_CLASS(scoutfs_try_delete_class,
TP_PROTO(struct super_block *sb, u64 ino),
TP_ARGS(sb, ino),
TP_STRUCT__entry(
SCSB_TRACE_FIELDS
__field(__u64, ino)
),
TP_fast_assign(
SCSB_TRACE_ASSIGN(sb);
__entry->ino = ino;
),
TP_printk(SCSBF" ino %llu", SCSB_TRACE_ARGS, __entry->ino)
);
DEFINE_EVENT(scoutfs_try_delete_class, scoutfs_try_delete,
TP_PROTO(struct super_block *sb, u64 ino),
TP_ARGS(sb, ino)
);
DEFINE_EVENT(scoutfs_try_delete_class, scoutfs_try_delete_local_busy,
TP_PROTO(struct super_block *sb, u64 ino),
TP_ARGS(sb, ino)
);
DEFINE_EVENT(scoutfs_try_delete_class, scoutfs_try_delete_cached,
TP_PROTO(struct super_block *sb, u64 ino),
TP_ARGS(sb, ino)
);
DEFINE_EVENT(scoutfs_try_delete_class, scoutfs_try_delete_no_item,
TP_PROTO(struct super_block *sb, u64 ino),
TP_ARGS(sb, ino)
);
TRACE_EVENT(scoutfs_try_delete_has_links,
TP_PROTO(struct super_block *sb, u64 ino, unsigned int nlink),
TP_ARGS(sb, ino, nlink),
TP_STRUCT__entry(
SCSB_TRACE_FIELDS
__field(__u64, ino)
__field(unsigned int, nlink)
),
TP_fast_assign(
SCSB_TRACE_ASSIGN(sb);
__entry->ino = ino;
__entry->nlink = nlink;
),
TP_printk(SCSBF" ino %llu nlink %u", SCSB_TRACE_ARGS, __entry->ino,
__entry->nlink)
);
TRACE_EVENT(scoutfs_inode_orphan_delete,
TP_PROTO(struct super_block *sb, u64 ino, int ret),
TP_ARGS(sb, ino, ret),
TP_STRUCT__entry(
SCSB_TRACE_FIELDS
__field(__u64, ino)
__field(int, ret)
),
TP_fast_assign(
SCSB_TRACE_ASSIGN(sb);
__entry->ino = ino;
__entry->ret = ret;
),
TP_printk(SCSBF" ino %llu ret %d", SCSB_TRACE_ARGS, __entry->ino,
__entry->ret)
);
TRACE_EVENT(scoutfs_delete_inode,
TP_PROTO(struct super_block *sb, u64 ino, umode_t mode, u64 size),
@@ -1060,6 +1210,32 @@ TRACE_EVENT(scoutfs_delete_inode,
__entry->mode, __entry->size)
);
TRACE_EVENT(scoutfs_delete_inode_end,
TP_PROTO(struct super_block *sb, u64 ino, umode_t mode, u64 size, int ret),
TP_ARGS(sb, ino, mode, size, ret),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(__u64, ino)
__field(umode_t, mode)
__field(__u64, size)
__field(int, ret)
),
TP_fast_assign(
__entry->dev = sb->s_dev;
__entry->ino = ino;
__entry->mode = mode;
__entry->size = size;
__entry->ret = ret;
),
TP_printk("dev %d,%d ino %llu, mode 0x%x size %llu, ret %d",
MAJOR(__entry->dev), MINOR(__entry->dev), __entry->ino,
__entry->mode, __entry->size, __entry->ret)
);
DECLARE_EVENT_CLASS(scoutfs_key_class,
TP_PROTO(struct super_block *sb, struct scoutfs_key *key),
TP_ARGS(sb, key),
@@ -2466,24 +2642,39 @@ TRACE_EVENT(scoutfs_block_dirty_ref,
);
TRACE_EVENT(scoutfs_get_file_block,
TP_PROTO(struct super_block *sb, u64 blkno, int flags),
TP_PROTO(struct super_block *sb, u64 blkno, int flags,
struct scoutfs_srch_block *srb),
TP_ARGS(sb, blkno, flags),
TP_ARGS(sb, blkno, flags, srb),
TP_STRUCT__entry(
SCSB_TRACE_FIELDS
__field(__u64, blkno)
__field(int, flags)
__field(__u64, first_hash)
__field(__u64, first_ino)
__field(__u64, first_id)
__field(__u64, last_hash)
__field(__u64, last_ino)
__field(__u64, last_id)
),
TP_fast_assign(
SCSB_TRACE_ASSIGN(sb);
__entry->blkno = blkno;
__entry->flags = flags;
__entry->first_hash = __le64_to_cpu(srb->first.hash);
__entry->first_ino = __le64_to_cpu(srb->first.ino);
__entry->first_id = __le64_to_cpu(srb->first.id);
__entry->last_hash = __le64_to_cpu(srb->last.hash);
__entry->last_ino = __le64_to_cpu(srb->last.ino);
__entry->last_id = __le64_to_cpu(srb->last.id);
),
TP_printk(SCSBF" blkno %llu flags 0x%x",
SCSB_TRACE_ARGS, __entry->blkno, __entry->flags)
TP_printk(SCSBF" blkno %llu flags 0x%x first_hash 0x%llx first_ino %llu fist_id 0x%llx last_hash 0x%llx last_ino %llu last_id 0x%llx",
SCSB_TRACE_ARGS, __entry->blkno, __entry->flags,
__entry->first_hash, __entry->first_ino, __entry->first_id,
__entry->last_hash, __entry->last_ino, __entry->last_id)
);
TRACE_EVENT(scoutfs_block_stale,
@@ -3097,6 +3288,24 @@ TRACE_EVENT(scoutfs_ioc_search_xattrs,
__entry->ino, __entry->last_ino)
);
TRACE_EVENT(scoutfs_trigger_fired,
TP_PROTO(struct super_block *sb, const char *name),
TP_ARGS(sb, name),
TP_STRUCT__entry(
SCSB_TRACE_FIELDS
__field(const char *, name)
),
TP_fast_assign(
SCSB_TRACE_ASSIGN(sb);
__entry->name = name;
),
TP_printk(SCSBF" %s", SCSB_TRACE_ARGS, __entry->name)
);
#endif /* _TRACE_SCOUTFS_H */
/* This part must be outside protection */

View File

@@ -41,6 +41,7 @@
#include "recov.h"
#include "omap.h"
#include "fence.h"
#include "triggers.h"
/*
* Every active mount can act as the server that listens on a net
@@ -1291,9 +1292,13 @@ static int finalize_and_start_log_merge(struct super_block *sb, struct scoutfs_l
* meta was low so that deleted items are merged
* promptly and freed blocks can bring the client out of
* enospc.
*
* The trigger can be used to force a log merge in cases where
* a test only generates small amounts of change.
*/
finalize_ours = (lt->item_root.height > 2) ||
(le32_to_cpu(lt->meta_avail.flags) & SCOUTFS_ALLOC_FLAG_LOW);
(le32_to_cpu(lt->meta_avail.flags) & SCOUTFS_ALLOC_FLAG_LOW) ||
scoutfs_trigger(sb, LOG_MERGE_FORCE_FINALIZE_OURS);
trace_scoutfs_server_finalize_decision(sb, rid, saw_finalized, others_active,
ours_visible, finalize_ours, delay_ms,
@@ -1402,6 +1407,8 @@ static int finalize_and_start_log_merge(struct super_block *sb, struct scoutfs_l
BUG_ON(err); /* inconsistent */
}
scoutfs_inc_counter(sb, log_merge_start);
/* we're done, caller can make forward progress */
break;
}
@@ -2509,6 +2516,8 @@ static int splice_log_merge_completions(struct super_block *sb,
queue_work(server->wq, &server->log_merge_free_work);
else
err_str = "deleting merge status item";
scoutfs_inc_counter(sb, log_merge_complete);
out:
if (upd_stat) {
init_log_merge_key(&key, SCOUTFS_LOG_MERGE_STATUS_ZONE, 0, 0);

View File

@@ -443,7 +443,7 @@ out:
sfl->blocks = cpu_to_le64(blk + 1);
if (bl) {
trace_scoutfs_get_file_block(sb, bl->blkno, flags);
trace_scoutfs_get_file_block(sb, bl->blkno, flags, bl->data);
}
*bl_ret = bl;
@@ -2293,8 +2293,10 @@ static void scoutfs_srch_compact_worker(struct work_struct *work)
out:
/* our allocators and files should be stable */
WARN_ON_ONCE(ret == -ESTALE);
if (ret < 0)
if (ret < 0) {
scoutfs_err(sb, "@@@ srch_compact_error %d", ret);
scoutfs_inc_counter(sb, srch_compact_error);
}
scoutfs_block_writer_forget_all(sb, &wri);
queue_compact_work(srinf, sc != NULL && sc->nr > 0 && ret == 0);

View File

@@ -18,6 +18,7 @@
#include "super.h"
#include "triggers.h"
#include "scoutfs_trace.h"
/*
* We have debugfs files we can write to which arm triggers which
@@ -39,6 +40,7 @@ struct scoutfs_triggers {
static char *names[] = {
[SCOUTFS_TRIGGER_BLOCK_REMOVE_STALE] = "block_remove_stale",
[SCOUTFS_TRIGGER_LOG_MERGE_FORCE_FINALIZE_OURS] = "log_merge_force_finalize_ours",
[SCOUTFS_TRIGGER_SRCH_COMPACT_LOGS_PAD_SAFE] = "srch_compact_logs_pad_safe",
[SCOUTFS_TRIGGER_SRCH_FORCE_LOG_ROTATE] = "srch_force_log_rotate",
[SCOUTFS_TRIGGER_SRCH_MERGE_STOP_SAFE] = "srch_merge_stop_safe",
@@ -51,6 +53,7 @@ bool scoutfs_trigger_test_and_clear(struct super_block *sb, unsigned int t)
atomic_t *atom;
int old;
int mem;
bool fired;
BUG_ON(t >= SCOUTFS_TRIGGER_NR);
atom = &triggers->atomics[t];
@@ -64,7 +67,12 @@ bool scoutfs_trigger_test_and_clear(struct super_block *sb, unsigned int t)
mem = atomic_cmpxchg(atom, old, 0);
} while (mem && mem != old);
return !!mem;
fired = !!mem;
if (fired)
trace_scoutfs_trigger_fired(sb, names[t]);
return fired;
}
int scoutfs_setup_triggers(struct super_block *sb)

View File

@@ -3,6 +3,7 @@
enum scoutfs_trigger {
SCOUTFS_TRIGGER_BLOCK_REMOVE_STALE,
SCOUTFS_TRIGGER_LOG_MERGE_FORCE_FINALIZE_OURS,
SCOUTFS_TRIGGER_SRCH_COMPACT_LOGS_PAD_SAFE,
SCOUTFS_TRIGGER_SRCH_FORCE_LOG_ROTATE,
SCOUTFS_TRIGGER_SRCH_MERGE_STOP_SAFE,

View File

@@ -498,3 +498,100 @@ t_restore_all_sysfs_mount_options() {
t_set_sysfs_mount_option $i $name "${_saved_opts[$ind]}"
done
}
t_force_log_merge() {
local sv=$(t_server_nr)
local merges_started
local last_merges_started
local merges_completed
local last_merges_completed
while true; do
last_merges_started=$(t_counter log_merge_start $sv)
last_merges_completed=$(t_counter log_merge_complete $sv)
t_trigger_arm_silent log_merge_force_finalize_ours $sv
t_sync_seq_index
while test "$(t_trigger_get log_merge_force_finalize_ours $sv)" == "1"; do
sleep .5
done
merges_started=$(t_counter log_merge_start $sv)
if (( merges_started > last_merges_started )); then
merges_completed=$(t_counter log_merge_complete $sv)
while (( merges_completed == last_merges_completed )); do
sleep .5
merges_completed=$(t_counter log_merge_complete $sv)
done
break
fi
done
}
declare -A _last_scan
t_get_orphan_scan_runs() {
local i
for i in $(t_fs_nrs); do
_last_scan[$i]=$(t_counter orphan_scan $i)
done
}
t_wait_for_orphan_scan_runs() {
local i
local scan
t_get_orphan_scan_runs
for i in $(t_fs_nrs); do
while true; do
scan=$(t_counter orphan_scan $i)
if (( scan != _last_scan[$i] )); then
break
fi
sleep .5
done
done
}
declare -A _last_empty
t_get_orphan_scan_empty() {
local i
for i in $(t_fs_nrs); do
_last_empty[$i]=$(t_counter orphan_scan_empty $i)
done
}
t_wait_for_no_orphans() {
local i;
local working;
local empty;
t_get_orphan_scan_empty
while true; do
working=0
t_wait_for_orphan_scan_runs
for i in $(t_fs_nrs); do
empty=$(t_counter orphan_scan_empty $i)
if (( empty == _last_empty[$i] )); then
(( working++ ))
else
(( _last_empty[$i] = empty ))
fi
done
if (( working == 0 )); then
break
fi
sleep 1
done
}

View File

@@ -17,7 +17,7 @@ ino not found in dseq index
mount 0 contents after mount 1 rm: contents
ino found in dseq index
ino found in dseq index
stat: cannot stat '/mnt/test/test/inode-deletion/file': No such file or directory
stat: cannot stat '/mnt/test/test/inode-deletion/badfile': No such file or directory
ino not found in dseq index
ino not found in dseq index
== lots of deletions use one open map

View File

@@ -1,37 +1,7 @@
== initialize per-mount values
== arm compaction triggers
trigger srch_compact_logs_pad_safe armed: 1
trigger srch_merge_stop_safe armed: 1
trigger srch_compact_logs_pad_safe armed: 1
trigger srch_merge_stop_safe armed: 1
trigger srch_compact_logs_pad_safe armed: 1
trigger srch_merge_stop_safe armed: 1
trigger srch_compact_logs_pad_safe armed: 1
trigger srch_merge_stop_safe armed: 1
trigger srch_compact_logs_pad_safe armed: 1
trigger srch_merge_stop_safe armed: 1
== compact more often
== create padded sorted inputs by forcing log rotation
trigger srch_force_log_rotate armed: 1
trigger srch_force_log_rotate armed: 1
trigger srch_force_log_rotate armed: 1
trigger srch_force_log_rotate armed: 1
trigger srch_compact_logs_pad_safe armed: 1
trigger srch_force_log_rotate armed: 1
trigger srch_force_log_rotate armed: 1
trigger srch_force_log_rotate armed: 1
trigger srch_force_log_rotate armed: 1
trigger srch_compact_logs_pad_safe armed: 1
trigger srch_force_log_rotate armed: 1
trigger srch_force_log_rotate armed: 1
trigger srch_force_log_rotate armed: 1
trigger srch_force_log_rotate armed: 1
trigger srch_compact_logs_pad_safe armed: 1
trigger srch_force_log_rotate armed: 1
trigger srch_force_log_rotate armed: 1
trigger srch_force_log_rotate armed: 1
trigger srch_force_log_rotate armed: 1
trigger srch_compact_logs_pad_safe armed: 1
== compaction of padded should stop at safe
== verify no compaction errors
== cleanup

View File

@@ -137,6 +137,9 @@ while true; do
test -n "$2" || die "-l must have a nr iterations argument"
test "$2" -eq "$2" 2>/dev/null || die "-l <nr> argument must be an integer"
T_LOOP_ITER="$2"
# when looping, break after first failure
T_ABORT="1"
shift
;;
-M)
@@ -399,30 +402,44 @@ if [ -n "$T_INSMOD" ]; then
cmd insmod "$T_MODULE"
fi
if [ -n "$T_TRACE_MULT" ]; then
orig_trace_size=$(cat /sys/kernel/debug/tracing/buffer_size_kb)
mult_trace_size=$((orig_trace_size * T_TRACE_MULT))
msg "increasing trace buffer size from $orig_trace_size KiB to $mult_trace_size KiB"
echo $mult_trace_size > /sys/kernel/debug/tracing/buffer_size_kb
fi
start_tracing() {
if [ -n "$T_TRACE_MULT" ]; then
orig_trace_size=1408
mult_trace_size=$((orig_trace_size * T_TRACE_MULT))
msg "increasing trace buffer size from $orig_trace_size KiB to $mult_trace_size KiB"
echo $mult_trace_size > /sys/kernel/debug/tracing/buffer_size_kb
fi
nr_globs=${#T_TRACE_GLOB[@]}
if [ $nr_globs -gt 0 ]; then
echo 0 > /sys/kernel/debug/tracing/events/scoutfs/enable
nr_globs=${#T_TRACE_GLOB[@]}
if [ $nr_globs -gt 0 ]; then
echo 0 > /sys/kernel/debug/tracing/events/scoutfs/enable
for g in "${T_TRACE_GLOB[@]}"; do
for e in /sys/kernel/debug/tracing/events/scoutfs/$g/enable; do
if test -w "$e"; then
echo 1 > "$e"
else
die "-t glob '$g' matched no scoutfs events"
fi
for g in "${T_TRACE_GLOB[@]}"; do
for e in /sys/kernel/debug/tracing/events/scoutfs/$g/enable; do
if test -w "$e"; then
echo 1 > "$e"
else
die "-t glob '$g' matched no scoutfs events"
fi
done
done
done
nr_events=$(cat /sys/kernel/debug/tracing/set_event | wc -l)
msg "enabled $nr_events trace events from $nr_globs -t globs"
fi
nr_events=$(cat /sys/kernel/debug/tracing/set_event | wc -l)
msg "enabled $nr_events trace events from $nr_globs -t globs"
fi
}
stop_tracing() {
if [ -n "$T_TRACE_GLOB" -o -n "$T_TRACE_PRINTK" ]; then
msg "saving traces and disabling tracing"
echo 0 > /sys/kernel/debug/tracing/events/scoutfs/enable
echo 0 > /sys/kernel/debug/tracing/options/trace_printk
cat /sys/kernel/debug/tracing/trace | gzip > "$T_RESULTS/traces.gz"
if [ -n "$orig_trace_size" ]; then
echo $orig_trace_size > /sys/kernel/debug/tracing/buffer_size_kb
fi
fi
}
if [ -n "$T_TRACE_PRINTK" ]; then
echo "$T_TRACE_PRINTK" > /sys/kernel/debug/tracing/options/trace_printk
@@ -599,24 +616,26 @@ passed=0
skipped=0
failed=0
skipped_permitted=0
for t in $tests; do
# tests has basenames from sequence, get path and name
t="tests/$t"
test_name=$(basename "$t" | sed -e 's/.sh$//')
for iter in $(seq 1 $T_LOOP_ITER); do
# get stats from previous pass
last="$T_RESULTS/last-passed-test-stats"
stats=$(grep -s "^$test_name " "$last" | cut -d " " -f 2-)
test -n "$stats" && stats="last: $stats"
printf " %-30s $stats" "$test_name"
start_tracing
# mark in dmesg as to what test we are running
echo "run scoutfs test $test_name" > /dev/kmsg
for t in $tests; do
# tests has basenames from sequence, get path and name
t="tests/$t"
test_name=$(basename "$t" | sed -e 's/.sh$//')
# let the test get at its extra files
T_EXTRA="$T_TESTS/extra/$test_name"
# get stats from previous pass
last="$T_RESULTS/last-passed-test-stats"
stats=$(grep -s "^$test_name " "$last" | cut -d " " -f 2-)
test -n "$stats" && stats="last: $stats"
printf " %-30s $stats" "$test_name"
for iter in $(seq 1 $T_LOOP_ITER); do
# mark in dmesg as to what test we are running
echo "run scoutfs test $test_name" > /dev/kmsg
# let the test get at its extra files
T_EXTRA="$T_TESTS/extra/$test_name"
# create a temporary dir and file path for the test
T_TMPDIR="$T_RESULTS/tmp/$test_name"
@@ -703,55 +722,43 @@ for t in $tests; do
sts=$T_FAIL_STATUS
fi
# stop looping if we didn't pass
if [ "$sts" != "$T_PASS_STATUS" ]; then
break;
# show and record the result of the test
if [ "$sts" == "$T_PASS_STATUS" ]; then
echo " passed: $stats"
((passed++))
# save stats for passed test
grep -s -v "^$test_name " "$last" > "$last.tmp"
echo "$test_name $stats" >> "$last.tmp"
mv -f "$last.tmp" "$last"
elif [ "$sts" == "$T_SKIP_PERMITTED_STATUS" ]; then
echo " [ skipped (permitted): $message ]"
echo "$test_name skipped (permitted) $message " >> "$T_RESULTS/skip.log"
((skipped_permitted++))
elif [ "$sts" == "$T_SKIP_STATUS" ]; then
echo " [ skipped: $message ]"
echo "$test_name $message" >> "$T_RESULTS/skip.log"
((skipped++))
elif [ "$sts" == "$T_FAIL_STATUS" ]; then
echo " [ failed: $message ]"
echo "$test_name $message" >> "$T_RESULTS/fail.log"
((failed++))
if [ -n "$T_ABORT" ]; then
stop_tracing
die "aborting after first failure"
fi
fi
# record results for TAP format output
t_tap_progress $test_name $sts
((testcount++))
done
# show and record the result of the test
if [ "$sts" == "$T_PASS_STATUS" ]; then
echo " passed: $stats"
((passed++))
# save stats for passed test
grep -s -v "^$test_name " "$last" > "$last.tmp"
echo "$test_name $stats" >> "$last.tmp"
mv -f "$last.tmp" "$last"
elif [ "$sts" == "$T_SKIP_PERMITTED_STATUS" ]; then
echo " [ skipped (permitted): $message ]"
echo "$test_name skipped (permitted) $message " >> "$T_RESULTS/skip.log"
((skipped_permitted++))
elif [ "$sts" == "$T_SKIP_STATUS" ]; then
echo " [ skipped: $message ]"
echo "$test_name $message" >> "$T_RESULTS/skip.log"
((skipped++))
elif [ "$sts" == "$T_FAIL_STATUS" ]; then
echo " [ failed: $message ]"
echo "$test_name $message" >> "$T_RESULTS/fail.log"
((failed++))
test -n "$T_ABORT" && die "aborting after first failure"
fi
# record results for TAP format output
t_tap_progress $test_name $sts
((testcount++))
stop_tracing
done
msg "all tests run: $passed passed, $skipped skipped, $skipped_permitted skipped (permitted), $failed failed"
if [ -n "$T_TRACE_GLOB" -o -n "$T_TRACE_PRINTK" ]; then
msg "saving traces and disabling tracing"
echo 0 > /sys/kernel/debug/tracing/events/scoutfs/enable
echo 0 > /sys/kernel/debug/tracing/options/trace_printk
cat /sys/kernel/debug/tracing/trace > "$T_RESULTS/traces"
if [ -n "$orig_trace_size" ]; then
echo $orig_trace_size > /sys/kernel/debug/tracing/buffer_size_kb
fi
fi
if [ "$skipped" == 0 -a "$failed" == 0 ]; then
msg "all tests passed"
unmount_all

View File

@@ -61,18 +61,28 @@ rm -f "$T_D1/file"
check_ino_index "$ino" "$dseq" "$T_M0"
check_ino_index "$ino" "$dseq" "$T_M1"
# Hurry along the orphan scanners. If any are currently asleep, we will
# have to wait at least their current scan interval before they wake up,
# run, and notice their new interval.
t_save_all_sysfs_mount_options orphan_scan_delay_ms
t_set_all_sysfs_mount_options orphan_scan_delay_ms 500
t_wait_for_orphan_scan_runs
echo "== unlink wait for open on other mount"
echo "contents" > "$T_D0/file"
ino=$(stat -c "%i" "$T_D0/file")
dseq=$(scoutfs stat -s data_seq "$T_D0/file")
exec {FD}<"$T_D0/file"
rm -f "$T_D1/file"
echo "contents" > "$T_D0/badfile"
ino=$(stat -c "%i" "$T_D0/badfile")
dseq=$(scoutfs stat -s data_seq "$T_D0/badfile")
exec {FD}<"$T_D0/badfile"
rm -f "$T_D1/badfile"
echo "mount 0 contents after mount 1 rm: $(cat <&$FD)"
check_ino_index "$ino" "$dseq" "$T_M0"
check_ino_index "$ino" "$dseq" "$T_M1"
exec {FD}>&- # close
# we know that revalidating will unhash the remote dentry
stat "$T_D0/file" 2>&1 | sed 's/cannot statx/cannot stat/' | t_filter_fs
stat "$T_D0/badfile" 2>&1 | sed 's/cannot statx/cannot stat/' | t_filter_fs
t_force_log_merge
# wait for orphan scanners to pick up the unlinked inode and become idle
t_wait_for_no_orphans
check_ino_index "$ino" "$dseq" "$T_M0"
check_ino_index "$ino" "$dseq" "$T_M1"
@@ -83,16 +93,20 @@ rm -f "$T_D0/dir"/files-*
rmdir "$T_D0/dir"
echo "== open files survive remote scanning orphans"
echo "contents" > "$T_D0/file"
ino=$(stat -c "%i" "$T_D0/file")
dseq=$(scoutfs stat -s data_seq "$T_D0/file")
exec {FD}<"$T_D0/file"
rm -f "$T_D0/file"
echo "contents" > "$T_D0/lastfile"
ino=$(stat -c "%i" "$T_D0/lastfile")
dseq=$(scoutfs stat -s data_seq "$T_D0/lastfile")
exec {FD}<"$T_D0/lastfile"
rm -f "$T_D0/lastfile"
t_umount 1
t_mount 1
echo "mount 0 contents after mount 1 remounted: $(cat <&$FD)"
exec {FD}>&- # close
t_force_log_merge
t_wait_for_no_orphans
check_ino_index "$ino" "$dseq" "$T_M0"
check_ino_index "$ino" "$dseq" "$T_M1"
t_restore_all_sysfs_mount_options orphan_scan_delay_ms
t_pass

View File

@@ -31,8 +31,8 @@ trap restore_compact_delay EXIT
echo "== arm compaction triggers"
for nr in $(t_fs_nrs); do
t_trigger_arm srch_compact_logs_pad_safe $nr
t_trigger_arm srch_merge_stop_safe $nr
t_trigger_arm_silent srch_compact_logs_pad_safe $nr
t_trigger_arm_silent srch_merge_stop_safe $nr
done
echo "== compact more often"
@@ -44,7 +44,7 @@ echo "== create padded sorted inputs by forcing log rotation"
sv=$(t_server_nr)
for i in $(seq 1 $COMPACT_NR); do
for j in $(seq 1 $COMPACT_NR); do
t_trigger_arm srch_force_log_rotate $sv
t_trigger_arm_silent srch_force_log_rotate $sv
seq -f "f-$i-$j-$SEQF" 1 10 | \
bulk_create_paths -X "scoutfs.srch.t-srch-safe-merge-pos" -d "$T_D0" > \
@@ -59,7 +59,7 @@ for i in $(seq 1 $COMPACT_NR); do
while test $padded == 0 && sleep .5; do
for nr in $(t_fs_nrs); do
if [ "$(t_trigger_get srch_compact_logs_pad_safe $nr)" == "0" ]; then
t_trigger_arm srch_compact_logs_pad_safe $nr
t_trigger_arm_silent srch_compact_logs_pad_safe $nr
padded=1
break
fi

View File

@@ -4,12 +4,6 @@
%{!?_release: %global _release 0.%{pkg_date}git%{pkg_git_hash}}
%if 0%{?rhel} && 0%{?rhel} < 10
%global tuned_profiles_dir %{_prefix}/lib/tuned
%else
%global tuned_profiles_dir %{_prefix}/lib/tuned/profiles
%endif
Name: scoutfs-utils
Summary: scoutfs user space utilities
Version: %{pkg_version}
@@ -63,8 +57,6 @@ install -m 644 -D src/format.h $RPM_BUILD_ROOT%{_includedir}/scoutfs/format.h
install -m 755 -D fenced/scoutfs-fenced $RPM_BUILD_ROOT%{_libexecdir}/scoutfs-fenced/scoutfs-fenced
install -m 644 -D fenced/scoutfs-fenced.service $RPM_BUILD_ROOT%{_unitdir}/scoutfs-fenced.service
install -m 644 -D fenced/scoutfs-fenced.conf.example $RPM_BUILD_ROOT%{_sysconfdir}/scoutfs/scoutfs-fenced.conf.example
install -m 644 -D tuned/tuned.conf $RPM_BUILD_ROOT%{tuned_profiles_dir}/scoutfs/tuned.conf
install -m 644 -D tuned/40-scoutfs.conf $RPM_BUILD_ROOT%{_prefix}/lib/tuned/recommend.d/40-scoutfs.conf
%files
%defattr(644,root,root,755)
@@ -74,8 +66,6 @@ install -m 644 -D tuned/40-scoutfs.conf $RPM_BUILD_ROOT%{_prefix}/lib/tuned/reco
%defattr(755,root,root,755)
%{_sbindir}/scoutfs
%{_libexecdir}/scoutfs-fenced
%{tuned_profiles_dir}/scoutfs/tuned.conf
%{_prefix}/lib/tuned/recommend.d/40-scoutfs.conf
%files -n scoutfs-devel
%defattr(644,root,root,755)

View File

@@ -1,9 +0,0 @@
#
# scoutfs tuned recommendation
#
# If the system has support for mounting scoutfs filesystems, which is
# valid for client mounts and quorum mounts. We then always recommend
# the scoutfs profile.
[scoutfs]
/proc/filesystems=scoutfs

View File

@@ -1,40 +0,0 @@
#
# ScoutFS specific tuned profile
#
# The parameters below are a mix of settings present in the throughput-performance
# profile as well as the latency-performance profile. Generally speaking, we
# want to encourage the system to avoid swap and accumulating large amounts of
# dirty data, as this can cause reclaim to lead to congestion.
# Enable this profile with `$ sudo tuned-adm profile scoutfs`
# linux default values are marked with [<value>] for reference.
[main]
summary=Optimize for production scoutfs deployment
description=Configures the system for production scoutfs filesystem server deployment.
# network-throughput sets some larger buffers useful for 40gbe deployments
# network-throughput also inherits throughput-performance
include=network-throughput
[vm]
# throughput-performance sets dirty_bytes to 40% (much larger than linux default), but
# this allows the accumulation of large backlogs of writeback. We prefer to writeback
# often and early to avoid congestion [20%]
dirty_bytes = 10%
# start writing back at this amount [10%]
dirty_background_bytes = 5%
[sysctl]
# the kernel default is 60. Lower it to instruct the kernel that swapping is
# expensive and we want to avoid it. We assume scoutfs deployments have ample
# available RAM. [60]
vm.swappiness = 10
# increase pdflush runs so it can more aggressively write out dirty data [500]
vm.dirty_writeback_centisecs = 300
# decrease time dirty data will linger before being written back [3000]
vm.dirty_expire_centisecs = 2000