mirror of
https://github.com/versity/scoutfs.git
synced 2025-12-23 05:25:18 +00:00
Dump block alloc stacks for leaked blocks
The typical pattern of spinning isolating a list_lru results in a livelock if there are blocks with leaked refcounts. We're rarely seeing this in testing. We can have a modest array in each block that records the stack of the caller that initially allocated the block and dump that stack for any blocks that we're unable to shrink/isolate. Instead of spinning shrinking, we can give it a good try and then print the blocks that remain and carry on with unmount, leaking a few blocks. (Past events have had 2 blocks.) Signed-off-by: Zach Brown <zab@versity.com>
This commit is contained in:
@@ -23,6 +23,7 @@
|
||||
#include <linux/random.h>
|
||||
#include <linux/sched/mm.h>
|
||||
#include <linux/list_lru.h>
|
||||
#include <linux/stacktrace.h>
|
||||
|
||||
#include "format.h"
|
||||
#include "super.h"
|
||||
@@ -80,6 +81,8 @@ struct block_private {
|
||||
struct page *page;
|
||||
void *virt;
|
||||
};
|
||||
unsigned int stack_len;
|
||||
unsigned long stack[10];
|
||||
};
|
||||
|
||||
#define TRACE_BLOCK(which, bp) \
|
||||
@@ -100,7 +103,17 @@ static __le32 block_calc_crc(struct scoutfs_block_header *hdr, u32 size)
|
||||
return cpu_to_le32(calc);
|
||||
}
|
||||
|
||||
static struct block_private *block_alloc(struct super_block *sb, u64 blkno)
|
||||
static noinline void save_block_stack(struct block_private *bp)
|
||||
{
|
||||
bp->stack_len = stack_trace_save(bp->stack, ARRAY_SIZE(bp->stack), 2);
|
||||
}
|
||||
|
||||
static void print_block_stack(struct block_private *bp)
|
||||
{
|
||||
stack_trace_print(bp->stack, bp->stack_len, 1);
|
||||
}
|
||||
|
||||
static noinline struct block_private *block_alloc(struct super_block *sb, u64 blkno)
|
||||
{
|
||||
struct block_private *bp;
|
||||
unsigned int nofs_flags;
|
||||
@@ -156,6 +169,7 @@ static struct block_private *block_alloc(struct super_block *sb, u64 blkno)
|
||||
atomic_set(&bp->io_count, 0);
|
||||
|
||||
TRACE_BLOCK(allocate, bp);
|
||||
save_block_stack(bp);
|
||||
|
||||
out:
|
||||
if (!bp)
|
||||
@@ -1113,6 +1127,19 @@ static unsigned long block_scan_objects(struct shrinker *shrink, struct shrink_c
|
||||
return freed;
|
||||
}
|
||||
|
||||
static enum lru_status dump_lru_block(struct list_head *item, struct list_lru_one *list,
|
||||
void *cb_arg)
|
||||
{
|
||||
struct block_private *bp = container_of(item, struct block_private, lru_head);
|
||||
|
||||
printk("blkno %llu refcount 0x%x io_count %d bits 0x%lx\n",
|
||||
bp->bl.blkno, atomic_read(&bp->refcount), atomic_read(&bp->io_count),
|
||||
bp->bits);
|
||||
print_block_stack(bp);
|
||||
|
||||
return LRU_SKIP;
|
||||
}
|
||||
|
||||
/*
|
||||
* Called during shutdown with no other users. The isolating walk must
|
||||
* find blocks on the lru that only have references for presence on the
|
||||
@@ -1122,11 +1149,19 @@ static void block_shrink_all(struct super_block *sb)
|
||||
{
|
||||
DECLARE_BLOCK_INFO(sb, binf);
|
||||
DECLARE_ISOLATE_ARGS(sb, ia);
|
||||
long count;
|
||||
|
||||
count = DIV_ROUND_UP(list_lru_count(&binf->lru), 128) * 2;
|
||||
do {
|
||||
kc_list_lru_walk(&binf->lru, isolate_lru_block, &ia, 128);
|
||||
shrink_dispose_blocks(sb, &ia.dispose);
|
||||
} while (list_lru_count(&binf->lru) > 0);
|
||||
} while (list_lru_count(&binf->lru) > 0 && --count > 0);
|
||||
|
||||
count = list_lru_count(&binf->lru);
|
||||
if (count > 0) {
|
||||
scoutfs_err(sb, "failed to isolate/dispose %ld blocks", count);
|
||||
kc_list_lru_walk(&binf->lru, dump_lru_block, sb, count);
|
||||
}
|
||||
}
|
||||
|
||||
struct sm_block_completion {
|
||||
|
||||
Reference in New Issue
Block a user