From 71df879f0705d25065d87cb69716d583f52cf956 Mon Sep 17 00:00:00 2001 From: Zach Brown Date: Tue, 23 Feb 2016 19:39:02 -0800 Subject: [PATCH] scoutfs: update format.h to remove bricks Update to the format.h from the recent -utils changes that moved from the clumsy 'brick' terminology to the more reasonable 'block/chunk/segment' terminology. Signed-off-by: Zach Brown --- kmod/src/format.h | 120 +++++++++++++++++++++++----------------------- kmod/src/super.c | 25 ++++------ kmod/src/super.h | 4 +- 3 files changed, 72 insertions(+), 77 deletions(-) diff --git a/kmod/src/format.h b/kmod/src/format.h index bff0b7cb..d27748c0 100644 --- a/kmod/src/format.h +++ b/kmod/src/format.h @@ -7,53 +7,69 @@ #define SCOUTFS_SUPER_ID 0x2e736674756f6373ULL /* "scoutfs." */ /* - * Some fs structures are stored in smaller fixed size 4k bricks. + * Everything is stored in and addressed as 4k fixed size blocks. This + * avoids having to manage contiguous cpu mappings of larger blocks. + * Larger structures are read and written as multiple blocks. */ -#define SCOUTFS_BRICK_SHIFT 12 -#define SCOUTFS_BRICK_SIZE (1 << SCOUTFS_BRICK_SHIFT) - -/* - * A large block size reduces the amount of per-block overhead throughout - * the system: block IO, manifest communications and storage, etc. - */ -#define SCOUTFS_BLOCK_SHIFT 22 +#define SCOUTFS_BLOCK_SHIFT 12 #define SCOUTFS_BLOCK_SIZE (1 << SCOUTFS_BLOCK_SHIFT) -/* for shifting between brick and block numbers */ -#define SCOUTFS_BLOCK_BRICK (SCOUTFS_BLOCK_SHIFT - SCOUTFS_BRICK_SHIFT) +/* + * The allocator works on larger chunks. Smaller metadata structures + * like the super blocks and the ring are stored in chunks. + * + * A log segment is a collection of smaller blocks (bloom filter, item blocks) + * stored in a chunk. + */ +#define SCOUTFS_CHUNK_SHIFT 22 +#define SCOUTFS_CHUNK_SIZE (1 << SCOUTFS_CHUNK_SHIFT) +#define SCOUTFS_CHUNK_BLOCK_SHIFT (SCOUTFS_CHUNK_SHIFT - SCOUTFS_BLOCK_SHIFT) +#define SCOUTFS_BLOCKS_PER_CHUNK (1 << SCOUTFS_CHUNK_BLOCK_SHIFT) /* - * The super bricks leave a bunch of room at the start of the first - * block for platform structures like boot loaders. + * The super blocks leave some room at the start of the first block for + * platform structures like boot loaders. */ -#define SCOUTFS_SUPER_BRICK 16 +#define SCOUTFS_SUPER_BLKNO ((64 * 1024) >> SCOUTFS_BLOCK_SHIFT) +#define SCOUTFS_SUPER_NR 2 /* - * This header is found at the start of every brick and block - * so that we can verify that it's what we were looking for. + * This header is found at the start of every block so that we can + * verify that it's what we were looking for. The crc and padding + * starts the block so that its calculation operations on a nice 64bit + * aligned region. */ -struct scoutfs_header { +struct scoutfs_block_header { __le32 crc; + __le32 _pad; __le64 fsid; __le64 seq; - __le64 nr; + __le64 blkno; } __packed; #define SCOUTFS_UUID_BYTES 16 /* - * The super is stored in a pair of bricks in the first block. + * The super is stored in a pair of blocks in the first chunk on the + * device. + * + * The ring map blocks describe the chunks that make up the ring. + * + * The rest of the ring fields describe the state of the ring blocks + * that are stored in their chunks. The active portion of the ring + * describes the current state of the system and is replayed on mount. */ -struct scoutfs_super { - struct scoutfs_header hdr; +struct scoutfs_super_block { + struct scoutfs_block_header hdr; __le64 id; __u8 uuid[SCOUTFS_UUID_BYTES]; - __le64 total_blocks; - __le64 ring_layout_block; - __le64 ring_layout_seq; - __le64 last_ring_brick; - __le64 last_ring_seq; - __le64 last_block_seq; + __le64 total_chunks; + __le64 ring_map_blkno; + __le64 ring_map_seq; + __le64 ring_first_block; + __le64 ring_active_blocks; + __le64 ring_total_blocks; + __le64 ring_seq; } __packed; /* @@ -71,10 +87,10 @@ struct scoutfs_key { #define SCOUTFS_INODE_KEY 128 #define SCOUTFS_DIRENT_KEY 192 -struct scoutfs_ring_layout { - struct scoutfs_header hdr; - __le32 nr_blocks; - __le64 blocks[0]; +struct scoutfs_ring_map_block { + struct scoutfs_block_header hdr; + __le32 nr_chunks; + __le64 blknos[0]; } __packed; struct scoutfs_ring_entry { @@ -83,16 +99,15 @@ struct scoutfs_ring_entry { } __packed; /* - * Ring blocks are 4k blocks stored inside the large ring blocks - * referenced by the ring descriptor block. + * Ring blocks are stored in chunks described by the ring map blocks. * - * The manifest entries describe the position of a given block in the - * manifest. They're keyed by the block number so that we can log - * movement of a block in the manifest with one log entry and we can log - * deletion with just the block number. + * The manifest entries describe the position of a given log segment in + * the manifest. They're keyed by the block number so that we can + * record movement of a log segment in the manifest with one ring entry + * and we can record deletion with just the block number. */ -struct scoutfs_ring_brick { - struct scoutfs_header hdr; +struct scoutfs_ring_block { + struct scoutfs_block_header hdr; __le16 nr_entries; } __packed; @@ -102,13 +117,8 @@ enum { SCOUTFS_RING_BITMAP, }; -/* - * Manifest entries are logged by their block number. This lets us log - * a change with one entry and a removal with a tiny block number - * without the key. - */ struct scoutfs_ring_remove_manifest { - __le64 block; + __le64 blkno; } __packed; /* @@ -119,7 +129,7 @@ struct scoutfs_ring_remove_manifest { * blocks when we didn't need to. */ struct scoutfs_ring_add_manifest { - __le64 block; + __le64 blkno; __le64 seq; __u8 level; struct scoutfs_key first; @@ -132,23 +142,15 @@ struct scoutfs_ring_bitmap { } __packed; /* - * This bloom size is chosen to have a roughly 1% false positive rate - * for ~90k items which is roughly the worst case for a block full of - * dirents with reasonably small names. Pathologically smaller items - * could be even more dense. + * To start the log segments are a trivial single item block. We'll + * flesh this out into larger blocks once the rest of the architecture + * is in place. */ -#define SCOUTFS_BLOOM_FILTER_BYTES (128 * 1024) -#define SCOUTFS_BLOOM_FILTER_BITS (SCOUTFS_BLOOM_FILTER_BYTES * 8) -#define SCOUTFS_BLOOM_INDEX_BITS (ilog2(SCOUTFS_BLOOM_FILTER_BITS)) -#define SCOUTFS_BLOOM_INDEX_MASK ((1 << SCOUTFS_BLOOM_INDEX_BITS) - 1) -#define SCOUTFS_BLOOM_INDEX_NR 7 - -struct scoutfs_lsm_block { - struct scoutfs_header hdr; +struct scoutfs_item_block { + struct scoutfs_block_header hdr; struct scoutfs_key first; struct scoutfs_key last; __le32 nr_items; - /* u8 bloom[SCOUTFS_BLOOM_BYTES]; */ /* struct scoutfs_item_header items[0] .. */ } __packed; diff --git a/kmod/src/super.c b/kmod/src/super.c index 82d0bd98..b14f495d 100644 --- a/kmod/src/super.c +++ b/kmod/src/super.c @@ -33,33 +33,33 @@ static const struct super_operations scoutfs_super_ops = { static int read_supers(struct super_block *sb) { struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb); + struct scoutfs_super_block *super; struct buffer_head *bh = NULL; - struct scoutfs_super *super; int found = -1; u32 crc; int i; - for (i = 0; i < 2; i++) { + for (i = 0; i < SCOUTFS_SUPER_NR; i++) { if (bh) brelse(bh); - bh = sb_bread(sb, SCOUTFS_SUPER_BRICK + i); + bh = sb_bread(sb, SCOUTFS_SUPER_BLKNO + i); if (!bh) { - scoutfs_warn(sb, "couldn't read super brick %u", i); + scoutfs_warn(sb, "couldn't read super block %u", i); continue; } super = (void *)bh->b_data; if (super->id != cpu_to_le64(SCOUTFS_SUPER_ID)) { - scoutfs_warn(sb, "super brick %u has invalid id %llx", + scoutfs_warn(sb, "super block %u has invalid id %llx", i, le64_to_cpu(super->id)); continue; } crc = crc32c(~0, (char *)&super->hdr.crc + sizeof(crc), - SCOUTFS_BRICK_SIZE - sizeof(crc)); + SCOUTFS_BLOCK_SIZE - sizeof(crc)); if (crc != le32_to_cpu(super->hdr.crc)) { - scoutfs_warn(sb, "super brick %u has bad crc %x (expected %x)", + scoutfs_warn(sb, "super block %u has bad crc %x (expected %x)", i, crc, le32_to_cpu(super->hdr.crc)); continue; } @@ -67,7 +67,7 @@ static int read_supers(struct super_block *sb) if (found < 0 || (le64_to_cpu(super->hdr.seq) > le64_to_cpu(sbi->super.hdr.seq))) { memcpy(&sbi->super, super, - sizeof(struct scoutfs_super)); + sizeof(struct scoutfs_super_block)); found = i; } } @@ -76,7 +76,7 @@ static int read_supers(struct super_block *sb) brelse(bh); if (found < 0) { - scoutfs_err(sb, "unable to read valid super brick"); + scoutfs_err(sb, "unable to read valid super block"); return -EINVAL; } @@ -89,11 +89,6 @@ static int read_supers(struct super_block *sb) atomic64_set(&sbi->next_ino, SCOUTFS_ROOT_INO + 1); atomic64_set(&sbi->next_blkno, 2); - for (i = 0; i < ARRAY_SIZE(sbi->bloom_hash_keys); i++) { - get_random_bytes(&sbi->bloom_hash_keys[i], - sizeof(sbi->bloom_hash_keys[i])); - } - return 0; } @@ -116,7 +111,7 @@ static int scoutfs_fill_super(struct super_block *sb, void *data, int silent) sbi->item_root = RB_ROOT; sbi->dirty_item_root = RB_ROOT; - if (!sb_set_blocksize(sb, SCOUTFS_BRICK_SIZE)) { + if (!sb_set_blocksize(sb, SCOUTFS_BLOCK_SIZE)) { printk(KERN_ERR "couldn't set blocksize\n"); return -EINVAL; } diff --git a/kmod/src/super.h b/kmod/src/super.h index fd1fe36d..538dd773 100644 --- a/kmod/src/super.h +++ b/kmod/src/super.h @@ -5,13 +5,11 @@ #include "format.h" struct scoutfs_sb_info { - struct scoutfs_super super; + struct scoutfs_super_block super; atomic64_t next_ino; atomic64_t next_blkno; - __le64 bloom_hash_keys[6]; /* XXX */ - spinlock_t item_lock; struct rb_root item_root; struct rb_root dirty_item_root;