scoutfs: more carefully trace backref walk loop

We were only issuing one kernel warning when we couldn't resolve a path
to an inode due to excessive retries.  It was hard to capture and we
only saw details from the first instance.

This adds a counter for each time we see excessive retries and returns
-ELOOP in that case.  We also extend the link backref adding trace point
to include the found entry, if any.

Signed-off-by: Zach Brown <zab@versity.com>
This commit is contained in:
Zach Brown
2018-04-13 09:31:01 -07:00
committed by Zach Brown
parent c118f7cc03
commit 676d1e32ef
3 changed files with 29 additions and 19 deletions

View File

@@ -36,6 +36,7 @@
EXPAND_COUNTER(dentry_revalidate_rcu) \
EXPAND_COUNTER(dentry_revalidate_root) \
EXPAND_COUNTER(dentry_revalidate_valid) \
EXPAND_COUNTER(dir_backref_excessive_retries) \
EXPAND_COUNTER(item_alloc) \
EXPAND_COUNTER(item_batch_duplicate) \
EXPAND_COUNTER(item_batch_inserted) \

View File

@@ -1206,8 +1206,10 @@ int scoutfs_dir_add_next_linkref(struct super_block *sb, u64 ino,
ent = kmalloc(offsetof(struct scoutfs_link_backref_entry,
dent.name[SCOUTFS_NAME_LEN]), GFP_KERNEL);
if (!ent)
return -ENOMEM;
if (!ent) {
ret = -ENOMEM;
goto out;
}
INIT_LIST_HEAD(&ent->head);
@@ -1223,8 +1225,6 @@ int scoutfs_dir_add_next_linkref(struct super_block *sb, u64 ino,
ret = scoutfs_item_next(sb, &key, &last_key, &val, lock);
scoutfs_unlock(sb, lock, DLM_LOCK_PR);
lock = NULL;
trace_scoutfs_dir_add_next_linkref(sb, ino, dir_ino, dir_pos, ret);
if (ret < 0)
goto out;
@@ -1241,7 +1241,12 @@ int scoutfs_dir_add_next_linkref(struct super_block *sb, u64 ino,
ent->name_len = len;
ret = 0;
out:
if (list_empty(&ent->head))
trace_scoutfs_dir_add_next_linkref(sb, ino, dir_ino, dir_pos, ret,
ent ? ent->dir_ino : 0,
ent ? ent->dir_pos : 0,
ent ? ent->name_len : 0);
if (ent && list_empty(&ent->head))
kfree(ent);
return ret;
}
@@ -1306,19 +1311,14 @@ void scoutfs_dir_free_backref_path(struct super_block *sb,
int scoutfs_dir_get_backref_path(struct super_block *sb, u64 ino, u64 dir_ino,
u64 dir_pos, struct list_head *list)
{
int retries = 10;
u64 par_ino;
int ret;
int iters = 0;
retry:
/*
* Debugging for SCOUT-107, can be removed later when we're
* confident we won't hit an endless loop here again.
*/
if (WARN_ONCE(++iters >= 4000, "scoutfs: Excessive retries in "
"dir_get_backref_path. ino %llu dir_ino %llu pos %llu\n",
ino, dir_ino, dir_pos)) {
ret = -EINVAL;
if (retries-- == 0) {
scoutfs_inc_counter(sb, dir_backref_excessive_retries);
ret = -ELOOP;
goto out;
}

View File

@@ -1060,9 +1060,11 @@ TRACE_EVENT(scoutfs_advance_dirty_super,
TRACE_EVENT(scoutfs_dir_add_next_linkref,
TP_PROTO(struct super_block *sb, __u64 ino, __u64 dir_ino,
__u64 dir_pos, int ret),
__u64 dir_pos, int ret, __u64 found_dir_ino,
__u64 found_dir_pos, unsigned int name_len),
TP_ARGS(sb, ino, dir_ino, dir_pos, ret),
TP_ARGS(sb, ino, dir_ino, dir_pos, ret, found_dir_pos, found_dir_ino,
name_len),
TP_STRUCT__entry(
__field(__u64, fsid)
@@ -1070,6 +1072,9 @@ TRACE_EVENT(scoutfs_dir_add_next_linkref,
__field(__u64, dir_ino)
__field(__u64, dir_pos)
__field(int, ret)
__field(__u64, found_dir_ino)
__field(__u64, found_dir_pos)
__field(unsigned int, name_len)
),
TP_fast_assign(
@@ -1078,11 +1083,15 @@ TRACE_EVENT(scoutfs_dir_add_next_linkref,
__entry->dir_ino = dir_ino;
__entry->dir_pos = dir_pos;
__entry->ret = ret;
__entry->found_dir_ino = dir_ino;
__entry->found_dir_pos = dir_pos;
__entry->name_len = name_len;
),
TP_printk(FSID_FMT" ino %llu dir_ino %llu dis_pos %llu ret %d",
__entry->fsid, __entry->ino, __entry->dir_ino,
__entry->dir_ino, __entry->ret)
TP_printk("fsid "FSID_FMT" ino %llu dir_ino %llu dir_pos %llu ret %d found_dir_ino %llu found_dir_pos %llu name_len %u",
__entry->fsid, __entry->ino, __entry->dir_pos,
__entry->dir_ino, __entry->ret, __entry->found_dir_pos,
__entry->found_dir_ino, __entry->name_len)
);
TRACE_EVENT(scoutfs_compact_func,