diff --git a/kmod/src/block.c b/kmod/src/block.c index 3413bb3c..3ecf6a7b 100644 --- a/kmod/src/block.c +++ b/kmod/src/block.c @@ -47,6 +47,7 @@ struct scoutfs_block { struct rw_semaphore rwsem; atomic_t refcount; + struct list_head lru_entry; u64 blkno; unsigned long bits; @@ -80,6 +81,7 @@ static struct scoutfs_block *alloc_block(struct super_block *sb, u64 blkno) if (page) { init_rwsem(&bl->rwsem); atomic_set(&bl->refcount, 1); + INIT_LIST_HEAD(&bl->lru_entry); bl->blkno = blkno; bl->sb = sb; bl->page = page; @@ -98,12 +100,60 @@ void scoutfs_block_put(struct scoutfs_block *bl) { if (!IS_ERR_OR_NULL(bl) && atomic_dec_and_test(&bl->refcount)) { trace_printk("freeing bl %p\n", bl); + WARN_ON_ONCE(!list_empty(&bl->lru_entry)); __free_pages(bl->page, SCOUTFS_BLOCK_PAGE_ORDER); kfree(bl); scoutfs_inc_counter(bl->sb, block_mem_free); } } +static void lru_add(struct scoutfs_sb_info *sbi, struct scoutfs_block *bl) +{ + if (list_empty(&bl->lru_entry)) { + list_add_tail(&bl->lru_entry, &sbi->block_lru_list); + sbi->block_lru_nr++; + } +} + +static void lru_del(struct scoutfs_sb_info *sbi, struct scoutfs_block *bl) +{ + if (!list_empty(&bl->lru_entry)) { + list_del_init(&bl->lru_entry); + sbi->block_lru_nr--; + } +} + +/* + * The caller is referencing a block but doesn't know if its in the LRU + * or not. If it is move it to the tail so it's last to be dropped by + * the shrinker. + */ +static void lru_move(struct scoutfs_sb_info *sbi, struct scoutfs_block *bl) +{ + if (!list_empty(&bl->lru_entry)) + list_move_tail(&bl->lru_entry, &sbi->block_lru_list); +} + +static void radix_insert(struct scoutfs_sb_info *sbi, struct scoutfs_block *bl, + bool dirty) +{ + radix_tree_insert(&sbi->block_radix, bl->blkno, bl); + if (dirty) + radix_tree_tag_set(&sbi->block_radix, bl->blkno, + DIRTY_RADIX_TAG); + else + lru_add(sbi, bl); + atomic_inc(&bl->refcount); +} + +/* deleting the blkno from the radix also clears the dirty tag if it was set */ +static void radix_delete(struct scoutfs_sb_info *sbi, struct scoutfs_block *bl) +{ + lru_del(sbi, bl); + radix_tree_delete(&sbi->block_radix, bl->blkno); + scoutfs_block_put(bl); +} + static int verify_block_header(struct super_block *sb, struct scoutfs_block *bl) { struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb); @@ -165,6 +215,7 @@ static void block_write_end_io(struct bio *bio, int err) spin_lock_irqsave(&sbi->block_lock, flags); radix_tree_tag_clear(&sbi->block_radix, bl->blkno, DIRTY_RADIX_TAG); + lru_add(sbi, bl); spin_unlock_irqrestore(&sbi->block_lock, flags); } @@ -227,10 +278,10 @@ struct scoutfs_block *scoutfs_block_read(struct super_block *sb, u64 blkno) bl = radix_tree_lookup(&sbi->block_radix, blkno); if (bl) { if (test_bit(BLOCK_BIT_ERROR, &bl->bits)) { - radix_tree_delete(&sbi->block_radix, bl->blkno); - scoutfs_block_put(bl); + radix_delete(sbi, bl); bl = NULL; } else { + lru_move(sbi, bl); atomic_inc(&bl->refcount); } } @@ -255,10 +306,10 @@ struct scoutfs_block *scoutfs_block_read(struct super_block *sb, u64 blkno) if (found) { scoutfs_block_put(bl); bl = found; + lru_move(sbi, bl); atomic_inc(&bl->refcount); } else { - radix_tree_insert(&sbi->block_radix, blkno, bl); - atomic_inc(&bl->refcount); + radix_insert(sbi, bl, false); } spin_unlock_irqrestore(&sbi->block_lock, flags); @@ -531,14 +582,9 @@ struct scoutfs_block *scoutfs_block_dirty(struct super_block *sb, u64 blkno) spin_lock_irqsave(&sbi->block_lock, flags); found = radix_tree_lookup(&sbi->block_radix, blkno); - if (found) { - radix_tree_delete(&sbi->block_radix, blkno); - scoutfs_block_put(found); - } - - radix_tree_insert(&sbi->block_radix, blkno, bl); - radix_tree_tag_set(&sbi->block_radix, blkno, DIRTY_RADIX_TAG); - atomic_inc(&bl->refcount); + if (found) + radix_delete(sbi, found); + radix_insert(sbi, bl, true); spin_unlock_irqrestore(&sbi->block_lock, flags); radix_tree_preload_end(); @@ -592,13 +638,65 @@ void scoutfs_block_forget(struct scoutfs_block *bl) spin_lock_irqsave(&sbi->block_lock, flags); found = radix_tree_lookup(&sbi->block_radix, blkno); - if (found == bl) { - radix_tree_delete(&sbi->block_radix, blkno); - radix_tree_tag_clear(&sbi->block_radix, blkno, DIRTY_RADIX_TAG); - scoutfs_block_put(found); + if (found == bl) + radix_delete(sbi, bl); + spin_unlock_irqrestore(&sbi->block_lock, flags); +} + +/* + * We maintain an LRU of blocks so that the shrinker can free the oldest + * under memory pressure. We can't reclaim dirty blocks so only clean + * blocks are kept in the LRU. Blocks are only in the LRU while their + * presence in the radix holds a reference. We don't care if a reader + * has an active ref on a clean block that gets reclaimed. All we're + * doing is removing from the radix. The caller can still work with the + * block and it will be freed once they drop their ref. + * + * If this is called with nr_to_scan == 0 then it only returns the nr. + * We avoid acquiring the lock in that case. + * + * Lookup code only moves blocks around in the LRU while they're in the + * radix. Once we remove the block from the radix we're able to use the + * lru_entry to drop all the blocks outside the lock. + * + * XXX: + * - are sc->nr_to_scan and our return meant to be in units of pages? + * - should we sync a transaction here? + */ +int scoutfs_block_shrink(struct shrinker *shrink, struct shrink_control *sc) +{ + struct scoutfs_sb_info *sbi = container_of(shrink, + struct scoutfs_sb_info, + block_shrinker); + struct scoutfs_block *tmp; + struct scoutfs_block *bl; + unsigned long flags; + unsigned long nr; + LIST_HEAD(list); + + nr = sc->nr_to_scan; + if (!nr) + goto out; + + spin_lock_irqsave(&sbi->block_lock, flags); + + list_for_each_entry_safe(bl, tmp, &sbi->block_lru_list, lru_entry) { + if (nr-- == 0) + break; + atomic_inc(&bl->refcount); + radix_delete(sbi, bl); + list_add(&bl->lru_entry, &list); } spin_unlock_irqrestore(&sbi->block_lock, flags); + + list_for_each_entry_safe(bl, tmp, &list, lru_entry) { + list_del_init(&bl->lru_entry); + scoutfs_block_put(bl); + } + +out: + return min_t(unsigned long, sbi->block_lru_nr, INT_MAX); } void scoutfs_block_set_crc(struct scoutfs_block *bl) @@ -681,9 +779,8 @@ void scoutfs_block_destroy(struct super_block *sb) blkno, ARRAY_SIZE(blocks)); for (i = 0; i < nr; i++) { bl = blocks[i]; - radix_tree_delete(&sbi->block_radix, bl->blkno); blkno = bl->blkno + 1; - scoutfs_block_put(bl); + radix_delete(sbi, bl); } } while (nr); } diff --git a/kmod/src/block.h b/kmod/src/block.h index 5d760f52..0eb86837 100644 --- a/kmod/src/block.h +++ b/kmod/src/block.h @@ -32,6 +32,7 @@ void *scoutfs_block_data_from_contents(const void *ptr); void scoutfs_block_forget(struct scoutfs_block *bl); void scoutfs_block_put(struct scoutfs_block *bl); +int scoutfs_block_shrink(struct shrinker *shrink, struct shrink_control *sc); void scoutfs_block_destroy(struct super_block *sb); #endif diff --git a/kmod/src/super.c b/kmod/src/super.c index aa7c2bcc..ca085815 100644 --- a/kmod/src/super.c +++ b/kmod/src/super.c @@ -192,6 +192,7 @@ static int scoutfs_fill_super(struct super_block *sb, void *data, int silent) INIT_RADIX_TREE(&sbi->block_radix, GFP_ATOMIC); init_waitqueue_head(&sbi->block_wq); atomic_set(&sbi->block_writes, 0); + INIT_LIST_HEAD(&sbi->block_lru_list); init_rwsem(&sbi->btree_rwsem); atomic_set(&sbi->trans_holds, 0); init_waitqueue_head(&sbi->trans_hold_wq); @@ -200,6 +201,10 @@ static int scoutfs_fill_super(struct super_block *sb, void *data, int silent) init_waitqueue_head(&sbi->trans_write_wq); spin_lock_init(&sbi->file_alloc_lock); + sbi->block_shrinker.shrink = scoutfs_block_shrink; + sbi->block_shrinker.seeks = DEFAULT_SEEKS; + register_shrinker(&sbi->block_shrinker); + /* XXX can have multiple mounts of a device, need mount id */ sbi->kset = kset_create_and_add(sb->s_id, NULL, &scoutfs_kset->kobj); if (!sbi->kset) @@ -241,6 +246,8 @@ static void scoutfs_kill_sb(struct super_block *sb) if (sbi) { scoutfs_shutdown_trans(sb); scoutfs_buddy_destroy(sb); + if (sbi->block_shrinker.shrink == scoutfs_block_shrink) + unregister_shrinker(&sbi->block_shrinker); scoutfs_block_destroy(sb); scoutfs_destroy_counters(sb); if (sbi->kset) diff --git a/kmod/src/super.h b/kmod/src/super.h index 141a606d..db453cb2 100644 --- a/kmod/src/super.h +++ b/kmod/src/super.h @@ -23,6 +23,10 @@ struct scoutfs_sb_info { wait_queue_head_t block_wq; atomic_t block_writes; int block_write_err; + /* block cache lru */ + struct shrinker block_shrinker; + struct list_head block_lru_list; + unsigned long block_lru_nr; struct buddy_info *buddy_info;