diff --git a/kmod/src/counters.h b/kmod/src/counters.h index 5c048393..2dbc1036 100644 --- a/kmod/src/counters.h +++ b/kmod/src/counters.h @@ -147,6 +147,8 @@ EXPAND_COUNTER(lock_wait) \ EXPAND_COUNTER(log_merge_no_finalized) \ EXPAND_COUNTER(log_merge_wait_timeout) \ + EXPAND_COUNTER(merges_started) \ + EXPAND_COUNTER(merges_completed) \ EXPAND_COUNTER(net_dropped_response) \ EXPAND_COUNTER(net_send_bytes) \ EXPAND_COUNTER(net_send_error) \ @@ -160,6 +162,7 @@ EXPAND_COUNTER(orphan_scan) \ EXPAND_COUNTER(orphan_scan_attempts) \ EXPAND_COUNTER(orphan_scan_cached) \ + EXPAND_COUNTER(orphan_scan_empty) \ EXPAND_COUNTER(orphan_scan_error) \ EXPAND_COUNTER(orphan_scan_item) \ EXPAND_COUNTER(orphan_scan_omap_set) \ diff --git a/kmod/src/inode.c b/kmod/src/inode.c index 15d29d64..9f7cb18f 100644 --- a/kmod/src/inode.c +++ b/kmod/src/inode.c @@ -1637,10 +1637,14 @@ int scoutfs_inode_orphan_delete(struct super_block *sb, u64 ino, struct scoutfs_ struct scoutfs_lock *primary) { struct scoutfs_key key; + int ret; init_orphan_key(&key, ino); - return scoutfs_item_delete_force(sb, &key, lock, primary); + ret = scoutfs_item_delete_force(sb, &key, lock, primary); + trace_scoutfs_inode_orphan_delete(sb, ino, ret); + + return ret; } /* @@ -1722,6 +1726,8 @@ out: scoutfs_release_trans(sb); scoutfs_inode_index_unlock(sb, &ind_locks); + trace_scoutfs_delete_inode_end(sb, ino, mode, size, ret); + return ret; } @@ -1818,7 +1824,8 @@ out: * on a bit in the lock data so that we only have one deletion attempt * per inode under this mount's cluster lock. */ -static int try_delete_inode_items(struct super_block *sb, u64 ino) +static int try_delete_inode_items(struct super_block *sb, u64 ino, + bool *work_todo) { struct inode_deletion_lock_data *ldata = NULL; struct scoutfs_lock *orph_lock = NULL; @@ -1830,6 +1837,8 @@ static int try_delete_inode_items(struct super_block *sb, u64 ino) int bit_nr; int ret; + trace_scoutfs_try_delete(sb, ino); + ret = scoutfs_lock_ino(sb, SCOUTFS_LOCK_WRITE, 0, ino, &lock); if (ret < 0) goto out; @@ -1842,6 +1851,9 @@ static int try_delete_inode_items(struct super_block *sb, u64 ino) /* only one local attempt per inode at a time */ if (test_and_set_bit(bit_nr, ldata->trying)) { + trace_scoutfs_try_delete_local_busy(sb, ino); + if (work_todo) + *work_todo = true; ret = 0; goto out; } @@ -1849,6 +1861,9 @@ static int try_delete_inode_items(struct super_block *sb, u64 ino) /* can't delete if it's cached in local or remote mounts */ if (scoutfs_omap_test(sb, ino) || test_bit_le(bit_nr, ldata->map.bits)) { + trace_scoutfs_try_delete_cached(sb, ino); + if (work_todo) + *work_todo = true; ret = 0; goto out; } @@ -1856,12 +1871,17 @@ static int try_delete_inode_items(struct super_block *sb, u64 ino) scoutfs_inode_init_key(&key, ino); ret = lookup_inode_item(sb, &key, &sinode, lock); if (ret < 0) { - if (ret == -ENOENT) + if (ret == -ENOENT) { + trace_scoutfs_try_delete_no_item(sb, ino); ret = 0; + } goto out; } if (le32_to_cpu(sinode.nlink) > 0) { + if (work_todo) + *work_todo = true; + trace_scoutfs_try_delete_has_links(sb, ino, le32_to_cpu(sinode.nlink)); ret = 0; goto out; } @@ -1871,8 +1891,12 @@ static int try_delete_inode_items(struct super_block *sb, u64 ino) goto out; ret = delete_inode_items(sb, ino, &sinode, lock, orph_lock); - if (ret == 0) + if (ret == 0) { + if (work_todo) + *work_todo = true; + scoutfs_inc_counter(sb, inode_deleted); + } out: if (clear_trying) @@ -1913,7 +1937,7 @@ void scoutfs_evict_inode(struct inode *inode) scoutfs_omap_clear(sb, ino); if (scoutfs_lock_is_covered(sb, &si->ino_lock_cov) && inode->i_nlink == 0) - try_delete_inode_items(sb, scoutfs_ino(inode)); + try_delete_inode_items(sb, scoutfs_ino(inode), NULL); } clear_inode(inode); @@ -2085,11 +2109,14 @@ static void inode_orphan_scan_worker(struct work_struct *work) SCOUTFS_BTREE_ITEM_REF(iref); struct scoutfs_key last; struct scoutfs_key key; + bool work_todo = false; u64 group_nr; int bit_nr; u64 ino; int ret; + trace_scoutfs_orphan_scan_start(sb); + scoutfs_inc_counter(sb, orphan_scan); init_orphan_key(&last, U64_MAX); @@ -2109,8 +2136,10 @@ static void inode_orphan_scan_worker(struct work_struct *work) init_orphan_key(&key, ino); ret = scoutfs_btree_next(sb, &roots.fs_root, &key, &iref); if (ret < 0) { - if (ret == -ENOENT) + if (ret == -ENOENT) { + trace_scoutfs_orphan_scan_work(sb, 0); break; + } goto out; } @@ -2125,6 +2154,7 @@ static void inode_orphan_scan_worker(struct work_struct *work) /* locally cached inodes will try to delete as they evict */ if (scoutfs_omap_test(sb, ino)) { + work_todo = true; scoutfs_inc_counter(sb, orphan_scan_cached); continue; } @@ -2140,13 +2170,16 @@ static void inode_orphan_scan_worker(struct work_struct *work) /* remote cached inodes will also try to delete */ if (test_bit_le(bit_nr, omap.bits)) { + work_todo = true; scoutfs_inc_counter(sb, orphan_scan_omap_set); continue; } /* seemingly orphaned and unused, get locks and check for sure */ scoutfs_inc_counter(sb, orphan_scan_attempts); - ret = try_delete_inode_items(sb, ino); + trace_scoutfs_orphan_scan_work(sb, ino); + ret = try_delete_inode_items(sb, ino, &work_todo); + trace_scoutfs_orphan_scan_end(sb, ino, ret); } ret = 0; @@ -2155,6 +2188,13 @@ out: if (ret < 0) scoutfs_inc_counter(sb, orphan_scan_error); + trace_scoutfs_orphan_scan_empty(sb, !work_todo); + + if (!work_todo) + scoutfs_inc_counter(sb, orphan_scan_empty); + + trace_scoutfs_orphan_scan_stop(sb); + scoutfs_inode_schedule_orphan_dwork(sb); } diff --git a/kmod/src/scoutfs_trace.h b/kmod/src/scoutfs_trace.h index 1316418b..ac88a82b 100644 --- a/kmod/src/scoutfs_trace.h +++ b/kmod/src/scoutfs_trace.h @@ -789,6 +789,97 @@ TRACE_EVENT(scoutfs_inode_walk_writeback, __entry->ino, __entry->write, __entry->ret) ); +TRACE_EVENT(scoutfs_orphan_scan_start, + TP_PROTO(struct super_block *sb), + + TP_ARGS(sb), + + TP_STRUCT__entry( + SCSB_TRACE_FIELDS + ), + + TP_fast_assign( + SCSB_TRACE_ASSIGN(sb); + ), + + TP_printk(SCSBF, SCSB_TRACE_ARGS) +); + +TRACE_EVENT(scoutfs_orphan_scan_stop, + TP_PROTO(struct super_block *sb), + + TP_ARGS(sb), + + TP_STRUCT__entry( + SCSB_TRACE_FIELDS + ), + + TP_fast_assign( + SCSB_TRACE_ASSIGN(sb); + ), + + TP_printk(SCSBF, SCSB_TRACE_ARGS) +); + +TRACE_EVENT(scoutfs_orphan_scan_work, + TP_PROTO(struct super_block *sb, __u64 ino), + + TP_ARGS(sb, ino), + + TP_STRUCT__entry( + SCSB_TRACE_FIELDS + __field(__u64, ino) + ), + + TP_fast_assign( + SCSB_TRACE_ASSIGN(sb); + __entry->ino = ino; + ), + + TP_printk(SCSBF" ino %llu", SCSB_TRACE_ARGS, + __entry->ino) +); + +TRACE_EVENT(scoutfs_orphan_scan_end, + TP_PROTO(struct super_block *sb, __u64 ino, int ret), + + TP_ARGS(sb, ino, ret), + + TP_STRUCT__entry( + SCSB_TRACE_FIELDS + __field(__u64, ino) + __field(int, ret) + ), + + TP_fast_assign( + SCSB_TRACE_ASSIGN(sb); + __entry->ino = ino; + __entry->ret = ret; + ), + + TP_printk(SCSBF" ino %llu ret %d", SCSB_TRACE_ARGS, + __entry->ino, __entry->ret) +); + +TRACE_EVENT(scoutfs_orphan_scan_empty, + TP_PROTO(struct super_block *sb, bool found_work), + + TP_ARGS(sb, found_work), + + TP_STRUCT__entry( + SCSB_TRACE_FIELDS + __field(bool, found_work) + ), + + TP_fast_assign( + SCSB_TRACE_ASSIGN(sb); + __entry->found_work = found_work; + ), + + TP_printk(SCSBF" found_work %d", SCSB_TRACE_ARGS, + __entry->found_work) +); + DECLARE_EVENT_CLASS(scoutfs_lock_info_class, TP_PROTO(struct super_block *sb, struct lock_info *linfo), @@ -1036,6 +1127,136 @@ TRACE_EVENT(scoutfs_orphan_inode, MINOR(__entry->dev), __entry->ino) ); +TRACE_EVENT(scoutfs_try_delete, + TP_PROTO(struct super_block *sb, u64 ino), + + TP_ARGS(sb, ino), + + TP_STRUCT__entry( + SCSB_TRACE_FIELDS + __field(__u64, ino) + ), + + TP_fast_assign( + SCSB_TRACE_ASSIGN(sb); + __entry->ino = ino; + ), + + TP_printk(SCSBF" ino %llu", SCSB_TRACE_ARGS, __entry->ino) +); + +TRACE_EVENT(scoutfs_try_delete_local_busy, + TP_PROTO(struct super_block *sb, u64 ino), + + TP_ARGS(sb, ino), + + TP_STRUCT__entry( + SCSB_TRACE_FIELDS + __field(__u64, ino) + ), + + TP_fast_assign( + SCSB_TRACE_ASSIGN(sb); + __entry->ino = ino; + ), + + TP_printk(SCSBF" ino %llu", SCSB_TRACE_ARGS, __entry->ino) +); + +TRACE_EVENT(scoutfs_try_delete_cached, + TP_PROTO(struct super_block *sb, u64 ino), + + TP_ARGS(sb, ino), + + TP_STRUCT__entry( + SCSB_TRACE_FIELDS + __field(__u64, ino) + ), + + TP_fast_assign( + SCSB_TRACE_ASSIGN(sb); + __entry->ino = ino; + ), + + TP_printk(SCSBF" ino %llu", SCSB_TRACE_ARGS, __entry->ino) +); + +TRACE_EVENT(scoutfs_try_delete_no_item, + TP_PROTO(struct super_block *sb, u64 ino), + + TP_ARGS(sb, ino), + + TP_STRUCT__entry( + SCSB_TRACE_FIELDS + __field(__u64, ino) + ), + + TP_fast_assign( + SCSB_TRACE_ASSIGN(sb); + __entry->ino = ino; + ), + + TP_printk(SCSBF" ino %llu", SCSB_TRACE_ARGS, __entry->ino) +); + +TRACE_EVENT(scoutfs_try_delete_has_links, + TP_PROTO(struct super_block *sb, u64 ino, unsigned int nlink), + + TP_ARGS(sb, ino, nlink), + + TP_STRUCT__entry( + SCSB_TRACE_FIELDS + __field(__u64, ino) + __field(unsigned int, nlink) + ), + + TP_fast_assign( + SCSB_TRACE_ASSIGN(sb); + __entry->ino = ino; + __entry->nlink = nlink; + ), + + TP_printk(SCSBF" ino %llu nlink %u", SCSB_TRACE_ARGS, __entry->ino, + __entry->nlink) +); + +TRACE_EVENT(scoutfs_inode_orphan_delete, + TP_PROTO(struct super_block *sb, u64 ino, int ret), + + TP_ARGS(sb, ino, ret), + + TP_STRUCT__entry( + SCSB_TRACE_FIELDS + __field(__u64, ino) + __field(int, ret) + ), + + TP_fast_assign( + SCSB_TRACE_ASSIGN(sb); + __entry->ino = ino; + __entry->ret = ret; + ), + + TP_printk(SCSBF" ino %llu ret %d", SCSB_TRACE_ARGS, __entry->ino, + __entry->ret) +); + +TRACE_EVENT(scoutfs_server_finalize_trigger, + TP_PROTO(struct super_block *sb), + + TP_ARGS(sb), + + TP_STRUCT__entry( + SCSB_TRACE_FIELDS + ), + + TP_fast_assign( + SCSB_TRACE_ASSIGN(sb); + ), + + TP_printk(SCSBF, SCSB_TRACE_ARGS) +); + TRACE_EVENT(scoutfs_delete_inode, TP_PROTO(struct super_block *sb, u64 ino, umode_t mode, u64 size), @@ -1060,6 +1281,32 @@ TRACE_EVENT(scoutfs_delete_inode, __entry->mode, __entry->size) ); +TRACE_EVENT(scoutfs_delete_inode_end, + TP_PROTO(struct super_block *sb, u64 ino, umode_t mode, u64 size, int ret), + + TP_ARGS(sb, ino, mode, size, ret), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(__u64, ino) + __field(umode_t, mode) + __field(__u64, size) + __field(int, ret) + ), + + TP_fast_assign( + __entry->dev = sb->s_dev; + __entry->ino = ino; + __entry->mode = mode; + __entry->size = size; + __entry->ret = ret; + ), + + TP_printk("dev %d,%d ino %llu, mode 0x%x size %llu, ret %d", + MAJOR(__entry->dev), MINOR(__entry->dev), __entry->ino, + __entry->mode, __entry->size, __entry->ret) +); + DECLARE_EVENT_CLASS(scoutfs_key_class, TP_PROTO(struct super_block *sb, struct scoutfs_key *key), TP_ARGS(sb, key), diff --git a/kmod/src/server.c b/kmod/src/server.c index ac604e74..79d1eec0 100644 --- a/kmod/src/server.c +++ b/kmod/src/server.c @@ -41,6 +41,7 @@ #include "recov.h" #include "omap.h" #include "fence.h" +#include "triggers.h" /* * Every active mount can act as the server that listens on a net @@ -1291,9 +1292,17 @@ static int finalize_and_start_log_merge(struct super_block *sb, struct scoutfs_l * meta was low so that deleted items are merged * promptly and freed blocks can bring the client out of * enospc. + * + * The trigger can be used to force a log merge in cases where + * a test only generates small amounts of change. */ finalize_ours = (lt->item_root.height > 2) || (le32_to_cpu(lt->meta_avail.flags) & SCOUTFS_ALLOC_FLAG_LOW); + if (!finalize_ours) { + finalize_ours = scoutfs_trigger(sb, FINALIZE_OURS); + if (finalize_ours) + trace_scoutfs_server_finalize_trigger(sb); + } trace_scoutfs_server_finalize_decision(sb, rid, saw_finalized, others_active, ours_visible, finalize_ours, delay_ms, @@ -1402,6 +1411,8 @@ static int finalize_and_start_log_merge(struct super_block *sb, struct scoutfs_l BUG_ON(err); /* inconsistent */ } + scoutfs_inc_counter(sb, merges_started); + /* we're done, caller can make forward progress */ break; } @@ -2509,6 +2520,8 @@ static int splice_log_merge_completions(struct super_block *sb, queue_work(server->wq, &server->log_merge_free_work); else err_str = "deleting merge status item"; + + scoutfs_inc_counter(sb, merges_completed); out: if (upd_stat) { init_log_merge_key(&key, SCOUTFS_LOG_MERGE_STATUS_ZONE, 0, 0); diff --git a/kmod/src/triggers.c b/kmod/src/triggers.c index 8e46236d..6c490c02 100644 --- a/kmod/src/triggers.c +++ b/kmod/src/triggers.c @@ -43,6 +43,7 @@ static char *names[] = { [SCOUTFS_TRIGGER_SRCH_FORCE_LOG_ROTATE] = "srch_force_log_rotate", [SCOUTFS_TRIGGER_SRCH_MERGE_STOP_SAFE] = "srch_merge_stop_safe", [SCOUTFS_TRIGGER_STATFS_LOCK_PURGE] = "statfs_lock_purge", + [SCOUTFS_TRIGGER_FINALIZE_OURS] = "finalize_ours", }; bool scoutfs_trigger_test_and_clear(struct super_block *sb, unsigned int t) diff --git a/kmod/src/triggers.h b/kmod/src/triggers.h index f2374dec..0e821ddb 100644 --- a/kmod/src/triggers.h +++ b/kmod/src/triggers.h @@ -7,6 +7,7 @@ enum scoutfs_trigger { SCOUTFS_TRIGGER_SRCH_FORCE_LOG_ROTATE, SCOUTFS_TRIGGER_SRCH_MERGE_STOP_SAFE, SCOUTFS_TRIGGER_STATFS_LOCK_PURGE, + SCOUTFS_TRIGGER_FINALIZE_OURS, SCOUTFS_TRIGGER_NR, }; diff --git a/tests/funcs/fs.sh b/tests/funcs/fs.sh index 78a8420b..17274ed9 100644 --- a/tests/funcs/fs.sh +++ b/tests/funcs/fs.sh @@ -498,3 +498,100 @@ t_restore_all_sysfs_mount_options() { t_set_sysfs_mount_option $i $name "${_saved_opts[$ind]}" done } + +t_force_log_merge() { + local sv=$(t_server_nr) + local merges_started + local last_merges_started + local merges_completed + local last_merges_completed + + while true; do + last_merges_started=$(t_counter merges_started $sv) + last_merges_completed=$(t_counter merges_completed $sv) + + t_trigger_arm_silent finalize_ours $sv + + t_sync_seq_index + + while test "$(t_trigger_get finalize_ours $sv)" == "1"; do + sleep .5 + done + + merges_started=$(t_counter merges_started $sv) + + if (( merges_started > last_merges_started )); then + merges_completed=$(t_counter merges_completed $sv) + + while (( merges_completed == last_merges_completed )); do + sleep .5 + merges_completed=$(t_counter merges_completed $sv) + done + break + fi + done +} + +declare -A _last_scan +t_get_orphan_scan_runs() { + local i + + for i in $(t_fs_nrs); do + _last_scan[$i]=$(t_counter orphan_scan $i) + done +} + +t_wait_for_orphan_scan_runs() { + local i + local scan + + t_get_orphan_scan_runs + + for i in $(t_fs_nrs); do + while true; do + scan=$(t_counter orphan_scan $i) + if (( scan != _last_scan[$i] )); then + break + fi + sleep .5 + done + done +} + +declare -A _last_empty +t_get_orphan_scan_empty() { + local i + + for i in $(t_fs_nrs); do + _last_empty[$i]=$(t_counter orphan_scan_empty $i) + done +} + +t_wait_for_no_orphans() { + local i; + local working; + local empty; + + t_get_orphan_scan_empty + + while true; do + working=0 + + t_wait_for_orphan_scan_runs + + for i in $(t_fs_nrs); do + empty=$(t_counter orphan_scan_empty $i) + if (( empty == _last_empty[$i] )); then + (( working++ )) + else + (( _last_empty[$i] = empty )) + fi + done + + if (( working == 0 )); then + break + fi + + sleep 1 + done +} diff --git a/tests/golden/inode-deletion b/tests/golden/inode-deletion index f586d563..930df05e 100644 --- a/tests/golden/inode-deletion +++ b/tests/golden/inode-deletion @@ -17,7 +17,7 @@ ino not found in dseq index mount 0 contents after mount 1 rm: contents ino found in dseq index ino found in dseq index -stat: cannot stat '/mnt/test/test/inode-deletion/file': No such file or directory +stat: cannot stat '/mnt/test/test/inode-deletion/badfile': No such file or directory ino not found in dseq index ino not found in dseq index == lots of deletions use one open map diff --git a/tests/tests/inode-deletion.sh b/tests/tests/inode-deletion.sh index 9324d035..eb6a60d6 100644 --- a/tests/tests/inode-deletion.sh +++ b/tests/tests/inode-deletion.sh @@ -61,18 +61,30 @@ rm -f "$T_D1/file" check_ino_index "$ino" "$dseq" "$T_M0" check_ino_index "$ino" "$dseq" "$T_M1" +# Hurry along the orphan scanners. If any are currently asleep, we will +# have to wait at least their current scan interval before they wake up, +# run, and notice their new interval. Assume they're all currently the +# same interval and sleep for two of those (so it has some time to run). +cur_delay=$(t_get_sysfs_mount_option 0 orphan_scan_delay_ms) +t_save_all_sysfs_mount_options orphan_scan_delay_ms +t_set_all_sysfs_mount_options orphan_scan_delay_ms 500 +sleep $(( (cur_delay / 1000) * 2 )) + echo "== unlink wait for open on other mount" -echo "contents" > "$T_D0/file" -ino=$(stat -c "%i" "$T_D0/file") -dseq=$(scoutfs stat -s data_seq "$T_D0/file") -exec {FD}<"$T_D0/file" -rm -f "$T_D1/file" +echo "contents" > "$T_D0/badfile" +ino=$(stat -c "%i" "$T_D0/badfile") +dseq=$(scoutfs stat -s data_seq "$T_D0/badfile") +exec {FD}<"$T_D0/badfile" +rm -f "$T_D1/badfile" echo "mount 0 contents after mount 1 rm: $(cat <&$FD)" check_ino_index "$ino" "$dseq" "$T_M0" check_ino_index "$ino" "$dseq" "$T_M1" exec {FD}>&- # close # we know that revalidating will unhash the remote dentry -stat "$T_D0/file" 2>&1 | sed 's/cannot statx/cannot stat/' | t_filter_fs +stat "$T_D0/badfile" 2>&1 | sed 's/cannot statx/cannot stat/' | t_filter_fs +t_force_log_merge +# wait for orphan scanners to pick up the unlinked inode and become idle +t_wait_for_no_orphans check_ino_index "$ino" "$dseq" "$T_M0" check_ino_index "$ino" "$dseq" "$T_M1" @@ -83,16 +95,20 @@ rm -f "$T_D0/dir"/files-* rmdir "$T_D0/dir" echo "== open files survive remote scanning orphans" -echo "contents" > "$T_D0/file" -ino=$(stat -c "%i" "$T_D0/file") -dseq=$(scoutfs stat -s data_seq "$T_D0/file") -exec {FD}<"$T_D0/file" -rm -f "$T_D0/file" +echo "contents" > "$T_D0/lastfile" +ino=$(stat -c "%i" "$T_D0/lastfile") +dseq=$(scoutfs stat -s data_seq "$T_D0/lastfile") +exec {FD}<"$T_D0/lastfile" +rm -f "$T_D0/lastfile" t_umount 1 t_mount 1 echo "mount 0 contents after mount 1 remounted: $(cat <&$FD)" exec {FD}>&- # close +t_force_log_merge +t_wait_for_no_orphans check_ino_index "$ino" "$dseq" "$T_M0" check_ino_index "$ino" "$dseq" "$T_M1" +t_restore_all_sysfs_mount_options orphan_scan_delay_ms + t_pass