diff --git a/kmod/src/block.c b/kmod/src/block.c index 641d9c3d..8249a3d6 100644 --- a/kmod/src/block.c +++ b/kmod/src/block.c @@ -19,6 +19,7 @@ #include #include #include +#include #include "format.h" #include "super.h" @@ -45,7 +46,7 @@ struct block_info { struct super_block *sb; spinlock_t lock; - struct radix_tree_root radix; + struct rb_root root; struct list_head lru_list; u64 lru_nr; u64 lru_move_counter; @@ -63,7 +64,7 @@ enum { BLOCK_BIT_NEW, /* newly allocated, contents undefined */ BLOCK_BIT_DIRTY, /* dirty, writer will write */ BLOCK_BIT_ERROR, /* saw IO error */ - BLOCK_BIT_DELETED, /* has been deleted from radix tree */ + BLOCK_BIT_DELETED, /* has been deleted from rbtree */ BLOCK_BIT_PAGE_ALLOC, /* page (possibly high order) allocation */ BLOCK_BIT_VIRT, /* mapped virt allocation */ BLOCK_BIT_CRC_VALID, /* crc has been verified */ @@ -72,6 +73,7 @@ enum { struct block_private { struct scoutfs_block bl; + struct rb_node node; struct super_block *sb; atomic_t refcount; union { @@ -180,6 +182,7 @@ static struct block_private *block_alloc(struct super_block *sb, u64 blkno) } bp->bl.blkno = blkno; + RB_CLEAR_NODE(&bp->node); bp->sb = sb; atomic_set(&bp->refcount, 1); INIT_LIST_HEAD(&bp->lru_entry); @@ -249,9 +252,39 @@ static void block_put(struct super_block *sb, struct block_private *bp) } } +static struct block_private *walk_block_rbtree(struct rb_root *root, + u64 blkno, + struct block_private *ins) +{ + struct rb_node **node = &root->rb_node; + struct rb_node *parent = NULL; + struct block_private *bp; + int cmp; + + while (*node) { + parent = *node; + bp = container_of(*node, struct block_private, node); + + cmp = scoutfs_cmp_u64s(bp->bl.blkno, blkno); + if (cmp == 0) + return bp; + else if (cmp < 0) + node = &(*node)->rb_left; + else + node = &(*node)->rb_right; + } + + if (ins) { + rb_link_node(&ins->node, parent, node); + rb_insert_color(&ins->node, root); + return ins; + } + + return NULL; +} + /* - * Add a new block into the cache. The caller holds the lock and has - * preloaded the radix. + * Add a new block into the cache. The caller holds the lock. */ static void block_insert(struct super_block *sb, struct block_private *bp, u64 blkno) @@ -260,9 +293,10 @@ static void block_insert(struct super_block *sb, struct block_private *bp, assert_spin_locked(&binf->lock); BUG_ON(!list_empty(&bp->lru_entry)); + BUG_ON(!RB_EMPTY_NODE(&bp->node)); atomic_inc(&bp->refcount); - radix_tree_insert(&binf->radix, blkno, bp); + walk_block_rbtree(&binf->root, blkno, bp); list_add_tail(&bp->lru_entry, &binf->lru_list); bp->lru_moved = ++binf->lru_move_counter; binf->lru_nr++; @@ -310,11 +344,10 @@ static void block_remove(struct super_block *sb, struct block_private *bp) { DECLARE_BLOCK_INFO(sb, binf); - assert_spin_locked(&binf->lock); - if (!test_and_set_bit(BLOCK_BIT_DELETED, &bp->bits)) { BUG_ON(list_empty(&bp->lru_entry)); - radix_tree_delete(&binf->radix, bp->bl.blkno); + rb_erase(&bp->node, &binf->root); + RB_CLEAR_NODE(&bp->node); list_del_init(&bp->lru_entry); binf->lru_nr--; block_put(sb, bp); @@ -328,19 +361,18 @@ static void block_remove_all(struct super_block *sb) { DECLARE_BLOCK_INFO(sb, binf); struct block_private *bp; + struct rb_node *node; - spin_lock(&binf->lock); - - while (radix_tree_gang_lookup(&binf->radix, (void **)&bp, 0, 1) == 1) { + for (node = rb_first(&binf->root); node; ) { + bp = container_of(node, struct block_private, node); + node = rb_next(node); wait_event(binf->waitq, atomic_read(&bp->io_count) == 0); block_remove(sb, bp); } - spin_unlock(&binf->lock); - WARN_ON_ONCE(!list_empty(&binf->lru_list)); WARN_ON_ONCE(binf->lru_nr != 0); - WARN_ON_ONCE(binf->radix.rnode != NULL); + WARN_ON_ONCE(!RB_EMPTY_ROOT(&binf->root)); } /* @@ -457,8 +489,8 @@ static int block_submit_bio(struct super_block *sb, struct block_private *bp, /* * Return a reference to a cached block in the system, allocating a new - * block if one isn't found in the radix. Its contents are undefined if - * it's newly allocated. + * block if one isn't found in the rbtree. Its contents are undefined + * if it's newly allocated. */ static struct block_private *block_get(struct super_block *sb, u64 blkno) { @@ -467,11 +499,11 @@ static struct block_private *block_get(struct super_block *sb, u64 blkno) struct block_private *bp; int ret; - rcu_read_lock(); - bp = radix_tree_lookup(&binf->radix, blkno); + spin_lock(&binf->lock); + bp = walk_block_rbtree(&binf->root, blkno, NULL); if (bp) atomic_inc(&bp->refcount); - rcu_read_unlock(); + spin_unlock(&binf->lock); /* drop failed reads that interrupted waiters abandoned */ if (bp && (test_bit(BLOCK_BIT_ERROR, &bp->bits) && @@ -490,20 +522,15 @@ static struct block_private *block_get(struct super_block *sb, u64 blkno) goto out; } - ret = radix_tree_preload(GFP_NOFS); - if (ret) - goto out; - - /* could use slot instead of lookup/insert */ + /* could refactor to insert in one walk */ spin_lock(&binf->lock); - found = radix_tree_lookup(&binf->radix, blkno); + found = walk_block_rbtree(&binf->root, blkno, NULL); if (found) { atomic_inc(&found->refcount); } else { block_insert(sb, bp, blkno); } spin_unlock(&binf->lock); - radix_tree_preload_end(); if (found) { block_put(sb, bp); @@ -850,17 +877,8 @@ int scoutfs_block_setup(struct super_block *sb) { struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb); struct block_info *binf; - loff_t size; int ret; - /* we store blknos in longs in the radix */ - size = i_size_read(sb->s_bdev->bd_inode); - if ((size >> SCOUTFS_BLOCK_SHIFT) >= LONG_MAX) { - scoutfs_err(sb, "Cant reference all blocks in %llu byte device with %u bit long radix tree indexes", - size, BITS_PER_LONG); - return -EINVAL; - } - binf = kzalloc(sizeof(struct block_info), GFP_KERNEL); if (!binf) { ret = -ENOMEM; @@ -869,7 +887,7 @@ int scoutfs_block_setup(struct super_block *sb) binf->sb = sb; spin_lock_init(&binf->lock); - INIT_RADIX_TREE(&binf->radix, GFP_ATOMIC); /* insertion preloads */ + binf->root = RB_ROOT; INIT_LIST_HEAD(&binf->lru_list); init_waitqueue_head(&binf->waitq); binf->shrinker.shrink = block_shrink;