From ff436db49b6db773292c258afa1295aaf63c0170 Mon Sep 17 00:00:00 2001 From: Zach Brown Date: Sun, 16 Feb 2020 17:32:54 -0800 Subject: [PATCH] scoutfs-utils: add support for radix alloc Add support for initializing radix allocator blocks that describe free space in mkfs and support for printing them out. Signed-off-by: Zach Brown --- utils/src/format.h | 126 +++++++++---------- utils/src/mkfs.c | 292 ++++++++++++++++++++++++++++++++++++--------- utils/src/print.c | 263 ++++++++++++++++++++-------------------- utils/src/radix.c | 106 ++++++++++++++++ utils/src/radix.h | 13 ++ 5 files changed, 542 insertions(+), 258 deletions(-) create mode 100644 utils/src/radix.c create mode 100644 utils/src/radix.h diff --git a/utils/src/format.h b/utils/src/format.h index d801c40d..6cfb7322 100644 --- a/utils/src/format.h +++ b/utils/src/format.h @@ -8,6 +8,7 @@ #define SCOUTFS_BLOCK_MAGIC_SUPER 0x103c428b #define SCOUTFS_BLOCK_MAGIC_BTREE 0xe597f96d #define SCOUTFS_BLOCK_MAGIC_BLOOM 0x31995604 +#define SCOUTFS_BLOCK_MAGIC_RADIX 0xebeb5e65 /* * The super block and btree blocks are fixed 4k. @@ -132,6 +133,43 @@ struct scoutfs_key { #define skpe_base _sk_second #define skpe_part _sk_fourth +struct scoutfs_radix_block { + struct scoutfs_block_header hdr; + __le32 sm_first; + __le32 lg_first; + union { + struct scoutfs_radix_ref { + __le64 blkno; + __le64 seq; + __le64 sm_total; + __le64 lg_total; + } __packed refs[0]; + __le64 bits[0]; + } __packed; +} __packed; + +struct scoutfs_radix_root { + __u8 height; + __le64 next_find_bit; + struct scoutfs_radix_ref ref; +} __packed; + +#define SCOUTFS_RADIX_REFS \ + ((SCOUTFS_BLOCK_SIZE - offsetof(struct scoutfs_radix_block, refs[0])) /\ + sizeof(struct scoutfs_radix_ref)) + +/* 8 meg regions with 4k data blocks */ +#define SCOUTFS_RADIX_LG_SHIFT 11 +#define SCOUTFS_RADIX_LG_BITS (1 << SCOUTFS_RADIX_LG_SHIFT) +#define SCOUTFS_RADIX_LG_MASK (SCOUTFS_RADIX_LG_BITS - 1) + +/* round block bits down to a multiple of large ranges */ +#define SCOUTFS_RADIX_BITS \ + (((SCOUTFS_BLOCK_SIZE - \ + offsetof(struct scoutfs_radix_block, bits[0])) * 8) & \ + ~(__u64)SCOUTFS_RADIX_LG_MASK) +#define SCOUTFS_RADIX_BITS_BYTES (SCOUTFS_RADIX_BITS / 8) + /* * The btree still uses memcmp() to compare keys. We should fix that * before too long. @@ -208,55 +246,6 @@ struct scoutfs_btree_block { struct scoutfs_btree_item_header item_hdrs[0]; } __packed; -/* - * Free metadata blocks are tracked by block allocator items. - */ -struct scoutfs_balloc_root { - struct scoutfs_btree_root root; - __le64 total_free; -} __packed; -struct scoutfs_balloc_item_key { - __be64 base; -} __packed; - -#define SCOUTFS_BALLOC_ITEM_BYTES 256 -#define SCOUTFS_BALLOC_ITEM_U64S (SCOUTFS_BALLOC_ITEM_BYTES / \ - sizeof(__u64)) -#define SCOUTFS_BALLOC_ITEM_BITS (SCOUTFS_BALLOC_ITEM_BYTES * 8) -#define SCOUTFS_BALLOC_ITEM_BASE_SHIFT ilog2(SCOUTFS_BALLOC_ITEM_BITS) -#define SCOUTFS_BALLOC_ITEM_BIT_MASK (SCOUTFS_BALLOC_ITEM_BITS - 1) - -struct scoutfs_balloc_item_val { - __le64 bits[SCOUTFS_BALLOC_ITEM_U64S]; -} __packed; - -/* - * Free data blocks are tracked in bitmaps stored in btree items. - */ -struct scoutfs_block_bitmap_key { - __u8 type; - __be64 base; -} __packed; - -#define SCOUTFS_BLOCK_BITMAP_BIG 0 -#define SCOUTFS_BLOCK_BITMAP_LITTLE 1 - -#define SCOUTFS_PACKED_BITMAP_WORDS 32 -#define SCOUTFS_PACKED_BITMAP_BITS (SCOUTFS_PACKED_BITMAP_WORDS * 64) -#define SCOUTFS_PACKED_BITMAP_MAX_BYTES \ - offsetof(struct scoutfs_packed_bitmap, \ - words[SCOUTFS_PACKED_BITMAP_WORDS]) - -#define SCOUTFS_BLOCK_BITMAP_BITS SCOUTFS_PACKED_BITMAP_BITS -#define SCOUTFS_BLOCK_BITMAP_BIT_MASK (SCOUTFS_PACKED_BITMAP_BITS - 1) -#define SCOUTFS_BLOCK_BITMAP_BASE_SHIFT (ilog2(SCOUTFS_PACKED_BITMAP_BITS)) - -struct scoutfs_packed_bitmap { - __le64 present; - __le64 set; - __le64 words[0]; -}; - /* * The lock server keeps a persistent record of connected clients so that * server failover knows who to wait for before resuming operations. @@ -293,12 +282,12 @@ struct scoutfs_mounted_client_btree_val { * about item logs, it's about clients making changes to trees. */ struct scoutfs_log_trees { - struct scoutfs_balloc_root alloc_root; - struct scoutfs_balloc_root free_root; + struct scoutfs_radix_root meta_avail; + struct scoutfs_radix_root meta_freed; struct scoutfs_btree_root item_root; struct scoutfs_btree_ref bloom_ref; - struct scoutfs_balloc_root data_alloc; - struct scoutfs_balloc_root data_free; + struct scoutfs_radix_root data_avail; + struct scoutfs_radix_root data_freed; __le64 rid; __le64 nr; } __packed; @@ -309,12 +298,12 @@ struct scoutfs_log_trees_key { } __packed; struct scoutfs_log_trees_val { - struct scoutfs_balloc_root alloc_root; - struct scoutfs_balloc_root free_root; + struct scoutfs_radix_root meta_avail; + struct scoutfs_radix_root meta_freed; struct scoutfs_btree_root item_root; struct scoutfs_btree_ref bloom_ref; - struct scoutfs_balloc_root data_alloc; - struct scoutfs_balloc_root data_free; + struct scoutfs_radix_root data_avail; + struct scoutfs_radix_root data_freed; } __packed; struct scoutfs_log_item_value { @@ -489,25 +478,22 @@ struct scoutfs_super_block { __u8 uuid[SCOUTFS_UUID_BYTES]; __le64 next_ino; __le64 next_trans_seq; - __le64 total_blocks; - __le64 next_uninit_meta_blkno; - __le64 last_uninit_meta_blkno; - __le64 next_uninit_data_blkno; - __le64 last_uninit_data_blkno; - __le64 core_balloc_cursor; - __le64 core_data_alloc_cursor; + __le64 total_meta_blocks; /* both static and dynamic */ + __le64 first_meta_blkno; /* first dynamically allocated */ + __le64 last_meta_blkno; + __le64 total_data_blocks; + __le64 first_data_blkno; + __le64 last_data_blkno; __le64 free_blocks; - __le64 first_fs_blkno; - __le64 last_fs_blkno; __le64 quorum_fenced_term; __le64 quorum_server_term; __le64 unmount_barrier; __u8 quorum_count; struct scoutfs_inet_addr server_addr; - struct scoutfs_balloc_root core_balloc_alloc; - struct scoutfs_balloc_root core_balloc_free; - struct scoutfs_balloc_root core_data_alloc; - struct scoutfs_balloc_root core_data_free; + struct scoutfs_radix_root core_meta_avail; + struct scoutfs_radix_root core_meta_freed; + struct scoutfs_radix_root core_data_avail; + struct scoutfs_radix_root core_data_freed; struct scoutfs_btree_root fs_root; struct scoutfs_btree_root logs_root; struct scoutfs_btree_root lock_clients; diff --git a/utils/src/mkfs.c b/utils/src/mkfs.c index 72fde3a0..63a97aec 100644 --- a/utils/src/mkfs.c +++ b/utils/src/mkfs.c @@ -26,6 +26,7 @@ #include "dev.h" #include "key.h" #include "bitops.h" +#include "radix.h" static int write_raw_block(int fd, u64 blkno, void *blk) { @@ -84,6 +85,199 @@ static char *size_str(u64 nr, unsigned size) #define SIZE_FMT "%llu (%.2f %s)" #define SIZE_ARGS(nr, sz) (nr), size_flt(nr, sz), size_str(nr, sz) +/* + * Update a reference to a block of references that has been modified. We + * walk all the references and rebuild the ref tracking. + */ +static void update_parent_ref(struct scoutfs_radix_ref *ref, + struct scoutfs_radix_block *rdx) +{ + int i; + + ref->sm_total = cpu_to_le64(0); + ref->lg_total = cpu_to_le64(0); + + rdx->sm_first = cpu_to_le32(SCOUTFS_RADIX_REFS); + rdx->lg_first = cpu_to_le32(SCOUTFS_RADIX_REFS); + + for (i = 0; i < SCOUTFS_RADIX_REFS; i++) { + if (le32_to_cpu(rdx->sm_first) == SCOUTFS_RADIX_REFS && + rdx->refs[i].sm_total != 0) + rdx->sm_first = cpu_to_le32(i); + if (le32_to_cpu(rdx->lg_first) == SCOUTFS_RADIX_REFS && + rdx->refs[i].lg_total != 0) + rdx->lg_first = cpu_to_le32(i); + + le64_add_cpu(&ref->sm_total, + le64_to_cpu(rdx->refs[i].sm_total)); + le64_add_cpu(&ref->lg_total, + le64_to_cpu(rdx->refs[i].lg_total)); + } +} + +/* + * Initialize all the blocks in a path to a leaf with the given blocks + * set. We know that we're being called to set all the bits in a region + * by setting the left and right partial leafs of the region. We first + * set the left and set full references down the left path, then we're + * called on the right and set full to the left and clear full refs past + * the right. + * + * The caller provides an array of block buffers and a starting block + * number to allocate blocks from and reference blocks within. It's the + * world's dumbest block cache. + */ +static void set_radix_path(struct scoutfs_super_block *super, int *inds, + struct scoutfs_radix_ref *ref, int level, bool left, + void **blocks, u64 blkno_base, u64 *next_blkno, + u64 first, u64 last) +{ + struct scoutfs_radix_block *rdx; + int lg_ind; + int lg_after; + u64 bno; + int ind; + int end; + int i; + + if (ref->blkno == 0) { + bno = (*next_blkno)++; + ref->blkno = cpu_to_le64(bno); + ref->seq = cpu_to_le64(1); + } + + rdx = blocks[le64_to_cpu(ref->blkno) - blkno_base]; + + if (level) { + ind = inds[level]; + + /* initialize empty parent blocks with empty refs */ + if (ref->sm_total == 0) { + for (i = 0; i < SCOUTFS_RADIX_REFS; i++) + radix_init_ref(&rdx->refs[i], level - 1, false); + } + + if (left) { + /* initialize full refs from left to end */ + for (i = ind + 1; i < SCOUTFS_RADIX_REFS; i++) + radix_init_ref(&rdx->refs[i], level - 1, true); + } else { + /* initialize full refs from start or left to right */ + for (i = le32_to_cpu(rdx->sm_first) != + SCOUTFS_RADIX_REFS ? + le32_to_cpu(rdx->sm_first) + 1 : 0; + i < ind; i++) + radix_init_ref(&rdx->refs[i], level - 1, true); + + /* wipe full refs from right (maybe including) to end */ + for (i = le64_to_cpu(rdx->refs[ind].blkno) == U64_MAX ? + ind : ind + 1; i < SCOUTFS_RADIX_REFS; i++) + radix_init_ref(&rdx->refs[i], level - 1, false); + } + + set_radix_path(super, inds, &rdx->refs[ind], level - 1, left, + blocks, blkno_base, next_blkno, first, last); + update_parent_ref(ref, rdx); + + } else { + ind = first - radix_calc_leaf_bit(first); + end = last - radix_calc_leaf_bit(last); + for (i = ind; i <= end; i++) + set_bit_le(i, rdx->bits); + + rdx->sm_first = cpu_to_le32(ind); + ref->sm_total = cpu_to_le64(end - ind + 1); + + lg_ind = round_up(ind, SCOUTFS_RADIX_LG_BITS); + lg_after = round_down(end + 1, SCOUTFS_RADIX_LG_BITS); + + if (lg_ind < SCOUTFS_RADIX_BITS) + rdx->lg_first = cpu_to_le32(lg_ind); + else + rdx->lg_first = cpu_to_le32(SCOUTFS_RADIX_BITS); + ref->lg_total = cpu_to_le64(lg_after - lg_ind); + } +} + +/* + * Initialize a new radix allocator with the region of bits set. We + * initialize and write populated blocks down the paths to the two ends + * of the interval and write full refs in between. + */ +static int write_radix_blocks(struct scoutfs_super_block *super, int fd, + struct scoutfs_radix_root *root, + u64 blkno, u64 first, u64 last) +{ + struct scoutfs_radix_block *rdx; + void **blocks; + u64 next_blkno; + u64 edge; + u8 height; + int alloced; + int used; + int *inds; + int ret; + int i; + + height = radix_height_from_last(last); + inds = alloca(sizeof(inds[0]) * height); + alloced = height * 2; + next_blkno = blkno; + + /* allocate all the blocks we might need */ + blocks = calloc(alloced, sizeof(*blocks)); + if (!blocks) + return -ENOMEM; + + for (i = 0; i < alloced; i++) { + blocks[i] = calloc(1, SCOUTFS_BLOCK_SIZE); + if (blocks[i] == NULL) { + ret = -ENOMEM; + goto out; + } + } + + /* initialize empty root ref */ + memset(root, 0, sizeof(struct scoutfs_radix_root)); + root->height = height; + radix_init_ref(&root->ref, height - 1, false); + + edge = radix_calc_leaf_bit(first) + SCOUTFS_RADIX_BITS - 1; + radix_calc_level_inds(inds, height, first); + set_radix_path(super, inds, &root->ref, root->height - 1, true, blocks, + blkno, &next_blkno, first, min(edge, last)); + + edge = radix_calc_leaf_bit(last); + radix_calc_level_inds(inds, height, last); + set_radix_path(super, inds, &root->ref, root->height - 1, false, blocks, + blkno, &next_blkno, max(first, edge), last); + + used = next_blkno - blkno; + + /* write out all the dirtied blocks */ + for (i = 0; i < used; i++) { + rdx = blocks[i]; + rdx->hdr.magic = cpu_to_le32(SCOUTFS_BLOCK_MAGIC_RADIX); + rdx->hdr.fsid = super->hdr.fsid; + rdx->hdr.seq = cpu_to_le64(1); + rdx->hdr.blkno = cpu_to_le64(blkno + i); + rdx->hdr.crc = cpu_to_le32(crc_block(&rdx->hdr)); + ret = write_raw_block(fd, blkno + i, rdx); + if (ret < 0) + goto out; + } + + ret = used; +out: + if (blocks) { + for (i = 0; i < alloced && blocks[i]; i++) + free(blocks[i]); + free(blocks); + } + + return ret; +} + /* * Make a new file system by writing: * - super blocks @@ -97,8 +291,6 @@ static int write_new_fs(char *path, int fd, u8 quorum_count) struct scoutfs_inode *inode; struct scoutfs_btree_block *bt; struct scoutfs_btree_item *btitem; - struct scoutfs_balloc_item_key *bik; - struct scoutfs_balloc_item_val *biv; struct scoutfs_key key; struct timeval tv; char uuid_str[37]; @@ -107,6 +299,7 @@ static int write_new_fs(char *path, int fd, u8 quorum_count) u64 limit; u64 size; u64 total_blocks; + u64 meta_alloc_blocks; u64 next_meta; u64 last_meta; u64 next_data; @@ -142,6 +335,13 @@ static int write_new_fs(char *path, int fd, u8 quorum_count) } total_blocks = size / SCOUTFS_BLOCK_SIZE; + /* metadata blocks start after the quorum blocks */ + next_meta = SCOUTFS_QUORUM_BLKNO + SCOUTFS_QUORUM_BLOCKS; + /* data blocks are after metadata, we'll say 1:4 for now */ + next_data = round_up(next_meta + ((total_blocks - next_meta) / 5), + SCOUTFS_RADIX_BITS); + last_meta = next_data - 1; + last_data = total_blocks - 1; /* partially initialize the super so we can use it to init others */ memset(super, 0, SCOUTFS_BLOCK_SIZE); @@ -152,18 +352,14 @@ static int write_new_fs(char *path, int fd, u8 quorum_count) uuid_generate(super->uuid); super->next_ino = cpu_to_le64(SCOUTFS_ROOT_INO + 1); super->next_trans_seq = cpu_to_le64(1); - super->total_blocks = cpu_to_le64(total_blocks); + super->total_meta_blocks = cpu_to_le64(last_meta + 1); + super->first_meta_blkno = cpu_to_le64(next_meta); + super->last_meta_blkno = cpu_to_le64(last_meta); + super->total_data_blocks = cpu_to_le64(last_data - next_data + 1); + super->first_data_blkno = cpu_to_le64(next_data); + super->last_data_blkno = cpu_to_le64(last_data); super->quorum_count = quorum_count; - /* metadata blocks start after the quorum blocks */ - next_meta = SCOUTFS_QUORUM_BLKNO + SCOUTFS_QUORUM_BLOCKS; - - /* data blocks are after metadata, we'll say 1:4 for now */ - next_data = round_up(next_meta + ((total_blocks - next_meta) / 5), - SCOUTFS_BLOCK_BITMAP_BITS); - last_meta = next_data - 1; - last_data = total_blocks - 1; - /* fs root starts with root inode and its index items */ blkno = next_meta++; @@ -224,47 +420,31 @@ static int write_new_fs(char *path, int fd, u8 quorum_count) if (ret) goto out; - /* metadata block allocator has single item, server continues init */ - blkno = next_meta++; - - super->core_balloc_alloc.root.ref.blkno = cpu_to_le64(blkno); - super->core_balloc_alloc.root.ref.seq = cpu_to_le64(1); - super->core_balloc_alloc.root.height = 1; - - /* XXX magic */ - - memset(bt, 0, SCOUTFS_BLOCK_SIZE); - bt->hdr.fsid = super->hdr.fsid; - bt->hdr.blkno = cpu_to_le64(blkno); - bt->hdr.seq = cpu_to_le64(1); - bt->nr_items = cpu_to_le32(1); - - /* btree item allocated from the back of the block */ - biv = (void *)bt + SCOUTFS_BLOCK_SIZE - sizeof(*biv); - bik = (void *)biv - sizeof(*bik); - btitem = (void *)bik - sizeof(*btitem); - - bt->item_hdrs[0].off = cpu_to_le32((long)btitem - (long)bt); - btitem->key_len = cpu_to_le16(sizeof(*bik)); - btitem->val_len = cpu_to_le16(sizeof(*biv)); - - bik->base = cpu_to_be64(0); /* XXX true? */ - - /* set all the bits past our final used blkno */ - super->core_balloc_free.total_free = - cpu_to_le64(SCOUTFS_BALLOC_ITEM_BITS - next_meta); - for (i = next_meta; i < SCOUTFS_BALLOC_ITEM_BITS; i++) - set_bit_le(i, &biv->bits); - next_meta = i; - - bt->free_end = bt->item_hdrs[le32_to_cpu(bt->nr_items) - 1].off; - - bt->hdr.magic = cpu_to_le32(SCOUTFS_BLOCK_MAGIC_BTREE); - bt->hdr.crc = cpu_to_le32(crc_block(&bt->hdr)); - - ret = write_raw_block(fd, blkno, bt); - if (ret) + /* write out radix allocator blocks for data */ + ret = write_radix_blocks(super, fd, &super->core_data_avail, next_meta, + next_data, last_data); + if (ret < 0) goto out; + next_meta += ret; + + super->core_data_freed.height = super->core_data_avail.height; + radix_init_ref(&super->core_data_freed.ref, 0, false); + + meta_alloc_blocks = radix_blocks_needed(next_meta, last_meta); + + /* + * Write out radix alloc blocks, knowing that the region we mark + * has to start after the blocks we store the allocator itself in. + */ + ret = write_radix_blocks(super, fd, &super->core_meta_avail, + next_meta, next_meta + meta_alloc_blocks, + last_meta); + if (ret < 0) + goto out; + next_meta += ret; + + super->core_meta_freed.height = super->core_meta_avail.height; + radix_init_ref(&super->core_meta_freed.ref, 0, false); /* zero out quorum blocks */ for (i = 0; i < SCOUTFS_QUORUM_BLOCKS; i++) { @@ -277,10 +457,6 @@ static int write_new_fs(char *path, int fd, u8 quorum_count) } /* fill out allocator fields now that we've written our blocks */ - super->next_uninit_meta_blkno = cpu_to_le64(next_meta); - super->last_uninit_meta_blkno = cpu_to_le64(last_meta); - super->next_uninit_data_blkno = cpu_to_le64(next_data); - super->last_uninit_data_blkno = cpu_to_le64(last_data); super->free_blocks = cpu_to_le64(total_blocks - next_meta); /* write the super block */ @@ -312,9 +488,9 @@ static int write_new_fs(char *path, int fd, u8 quorum_count) le64_to_cpu(super->format_hash), uuid_str, SIZE_ARGS(total_blocks, SCOUTFS_BLOCK_SIZE), - SIZE_ARGS(last_meta - next_meta + 1, + SIZE_ARGS(le64_to_cpu(super->total_meta_blocks), SCOUTFS_BLOCK_SIZE), - SIZE_ARGS(last_data - next_data + 1, + SIZE_ARGS(le64_to_cpu(super->total_data_blocks), SCOUTFS_BLOCK_SIZE), super->quorum_count); diff --git a/utils/src/print.c b/utils/src/print.c index dd2989c3..8a4a3e62 100644 --- a/utils/src/print.c +++ b/utils/src/print.c @@ -20,6 +20,7 @@ #include "cmd.h" #include "crc.h" #include "key.h" +#include "radix.h" static void *read_block(int fd, u64 blkno) { @@ -258,6 +259,18 @@ static int print_logs_item(void *key, unsigned key_len, void *val, return 0; } +#define RADREF_F \ + "blkno %llu seq %llu sm_total %llu lg_total %llu" +#define RADREF_A(ref) \ + le64_to_cpu((ref)->blkno), le64_to_cpu((ref)->seq), \ + le64_to_cpu((ref)->sm_total), le64_to_cpu((ref)->lg_total) + +#define RADROOT_F \ + "height %u next_find_bit %llu ref: "RADREF_F +#define RADROOT_A(root) \ + (root)->height, le64_to_cpu((root)->next_find_bit), \ + RADREF_A(&(root)->ref) + /* same as fs item but with a small header in the value */ static int print_log_trees_item(void *key, unsigned key_len, void *val, unsigned val_len, void *arg) @@ -270,33 +283,21 @@ static int print_log_trees_item(void *key, unsigned key_len, void *val, /* only items in leaf blocks have values */ if (val) { - printf(" alloc_root: total_free %llu root: height %u blkno %llu seq %llu\n" - " free_root: total_free %llu root: height %u blkno %llu seq %llu\n" + printf(" meta_avail: "RADROOT_F"\n" + " meta_freed: "RADROOT_F"\n" " item_root: height %u blkno %llu seq %llu\n" " bloom_ref: blkno %llu seq %llu\n" - " data_alloc: total_free %llu root: height %u blkno %llu seq %llu\n" - " data_free: total_free %llu root: height %u blkno %llu seq %llu\n", - le64_to_cpu(ltv->alloc_root.total_free), - ltv->alloc_root.root.height, - le64_to_cpu(ltv->alloc_root.root.ref.blkno), - le64_to_cpu(ltv->alloc_root.root.ref.seq), - le64_to_cpu(ltv->free_root.total_free), - ltv->free_root.root.height, - le64_to_cpu(ltv->free_root.root.ref.blkno), - le64_to_cpu(ltv->free_root.root.ref.seq), + " data_avail: "RADROOT_F"\n" + " data_freed: "RADROOT_F"\n", + RADROOT_A(<v->meta_avail), + RADROOT_A(<v->meta_freed), ltv->item_root.height, le64_to_cpu(ltv->item_root.ref.blkno), le64_to_cpu(ltv->item_root.ref.seq), le64_to_cpu(ltv->bloom_ref.blkno), le64_to_cpu(ltv->bloom_ref.seq), - le64_to_cpu(ltv->data_alloc.total_free), - ltv->data_alloc.root.height, - le64_to_cpu(ltv->data_alloc.root.ref.blkno), - le64_to_cpu(ltv->data_alloc.root.ref.seq), - le64_to_cpu(ltv->data_free.total_free), - ltv->data_free.root.height, - le64_to_cpu(ltv->data_free.root.ref.blkno), - le64_to_cpu(ltv->data_free.root.ref.seq)); + RADROOT_A(<v->data_avail), + RADROOT_A(<v->data_freed)); } return 0; @@ -323,31 +324,6 @@ static int print_trans_seqs_entry(void *key, unsigned key_len, void *val, return 0; } -static int print_balloc_entry(void *key, unsigned key_len, void *val, - unsigned val_len, void *arg) -{ - struct scoutfs_balloc_item_key *bik = key; -// struct scoutfs_balloc_item_val *biv = val; - - printf(" base %llu\n", - be64_to_cpu(bik->base)); - - return 0; -} - -static int print_bitmap_entry(void *key, unsigned key_len, void *val, - unsigned val_len, void *arg) -{ - struct scoutfs_block_bitmap_key *bbk = key; - struct scoutfs_packed_bitmap *pb = val; - - printf(" type %u base %llu present 0x%016llx set 0x%016llx\n", - bbk->type, be64_to_cpu(bbk->base), - le64_to_cpu(pb->present), le64_to_cpu(pb->set)); - - return 0; -} - static int print_mounted_client_entry(void *key, unsigned key_len, void *val, unsigned val_len, void *arg) { @@ -460,6 +436,71 @@ static int print_btree(int fd, struct scoutfs_super_block *super, char *which, return ret; } +static int print_radix_block(int fd, struct scoutfs_radix_ref *par, int level) +{ + struct scoutfs_radix_block *rdx; + u64 blkno; + int prev; + int ret; + int err; + int i; + + /* XXX not printing bitmap leaf blocks */ + blkno = le64_to_cpu(par->blkno); + if (blkno == 0 || blkno == U64_MAX || level == 0) + return 0; + + rdx = read_block(fd, le64_to_cpu(par->blkno)); + if (!rdx) { + ret = -ENOMEM; + goto out; + } + + printf("radix parent block blkno %llu\n", le64_to_cpu(par->blkno)); + print_block_header(&rdx->hdr); + printf(" sm_first %u lg_first %u\n", + le32_to_cpu(rdx->sm_first), le32_to_cpu(rdx->lg_first)); + + prev = 0; + for (i = 0; i < SCOUTFS_RADIX_REFS; i++) { + /* only skip if the next ref is identically full/empty */ + if ((le64_to_cpu(rdx->refs[i].blkno) == 0 || + le64_to_cpu(rdx->refs[i].blkno) == U64_MAX) && + (i + 1) < SCOUTFS_RADIX_REFS && + (le64_to_cpu(rdx->refs[i].blkno) == + le64_to_cpu(rdx->refs[i + 1].blkno))) { + prev++; + continue; + } + + if (prev) { + printf(" [%u - %u]: (%s): ", i - prev, i, + (le64_to_cpu(rdx->refs[i].blkno) == 0) ? "empty" : + "full"); + prev = 0; + } else { + printf(" [%u]: ", i); + } + + printf(RADREF_F"\n", RADREF_A(&rdx->refs[i])); + } + + ret = 0; + for (i = 0; i < SCOUTFS_RADIX_REFS; i++) { + if (le64_to_cpu(rdx->refs[i].blkno) != 0 && + le64_to_cpu(rdx->refs[i].blkno) != U64_MAX) { + err = print_radix_block(fd, &rdx->refs[i], level - 1); + if (err < 0 && ret == 0) + ret = err; + } + } + +out: + free(rdx); + + return ret; +} + struct print_recursion_args { struct scoutfs_super_block *super; int fd; @@ -469,52 +510,35 @@ struct print_recursion_args { static int print_log_trees_roots(void *key, unsigned key_len, void *val, unsigned val_len, void *arg) { - struct scoutfs_log_trees_key *ltk = key; +// struct scoutfs_log_trees_key *ltk = key; struct scoutfs_log_trees_val *ltv = val; struct print_recursion_args *pa = arg; - struct log_trees_roots { - char *fmt; - struct scoutfs_btree_root *root; - print_item_func func; - } roots[] = { - { "log_tree_rid:%llu_nr:%llu_alloc", - <v->alloc_root.root, - print_balloc_entry, - }, - { "log_tree_rid:%llu_nr:%llu_free", - <v->free_root.root, - print_balloc_entry, - }, - { "log_tree_rid:%llu_nr:%llu_data_alloc", - <v->data_alloc.root, - print_bitmap_entry, - }, - { "log_tree_rid:%llu_nr:%llu_data_free", - <v->data_free.root, - print_bitmap_entry, - }, - { "log_tree_rid:%llu_nr:%llu_item", - <v->item_root, - print_logs_item, - }, - }; - char which[100]; - int ret; + int ret = 0; int err; - int i; /* XXX doesn't print the bloom block */ - ret = 0; - for (i = 0; i < array_size(roots); i++) { - snprintf(which, sizeof(which) - 1, roots[i].fmt, - be64_to_cpu(ltk->rid), be64_to_cpu(ltk->nr)); + err = print_radix_block(pa->fd, <v->meta_avail.ref, + ltv->meta_avail.height - 1); + if (err && !ret) + ret = err; + err = print_radix_block(pa->fd, <v->meta_freed.ref, + ltv->meta_avail.height - 1); + if (err && !ret) + ret = err; + err = print_radix_block(pa->fd, <v->data_avail.ref, + ltv->data_avail.height - 1); + if (err && !ret) + ret = err; + err = print_radix_block(pa->fd, <v->meta_freed.ref, + ltv->data_avail.height - 1); + if (err && !ret) + ret = err; - err = print_btree(pa->fd, pa->super, which, roots[i].root, - roots[i].func, NULL); - if (err && !ret) - ret = err; - } + err = print_btree(pa->fd, pa->super, "", <v->item_root, + print_logs_item, NULL); + if (err && !ret) + ret = err; return ret; } @@ -657,51 +681,37 @@ static void print_super_block(struct scoutfs_super_block *super, u64 blkno) /* XXX these are all in a crazy order */ printf(" next_ino %llu next_trans_seq %llu\n" - " total_blocks %llu free_blocks %llu\n" - " next_uninit_meta_blkno %llu last_uninit_meta_blkno %llu\n" - " next_uninit_data_blkno %llu last_uninit_data_blkno %llu\n" - " core_balloc_cursor %llu core_data_alloc_cursor %llu\n" + " total_meta_blocks %llu first_meta_blkno %llu last_meta_blkno %llu\n" + " total_data_blocks %llu first_data_blkno %llu last_data_blkno %llu\n" + " free_blocks %llu\n" " quorum_fenced_term %llu quorum_server_term %llu unmount_barrier %llu\n" " quorum_count %u server_addr %s\n" - " core_balloc_alloc: total_free %llu root: height %u blkno %llu seq %llu\n" - " core_balloc_free: total_free %llu root: height %u blkno %llu seq %llu\n" - " core_data_alloc: total_free %llu root: height %u blkno %llu seq %llu\n" - " core_data_free: total_free %llu root: height %u blkno %llu seq %llu\n" + " core_meta_avail: "RADROOT_F"\n" + " core_meta_freed: "RADROOT_F"\n" + " core_data_avail: "RADROOT_F"\n" + " core_data_freed: "RADROOT_F"\n" " lock_clients root: height %u blkno %llu seq %llu\n" " mounted_clients root: height %u blkno %llu seq %llu\n" " trans_seqs root: height %u blkno %llu seq %llu\n" " fs_root btree root: height %u blkno %llu seq %llu\n", le64_to_cpu(super->next_ino), le64_to_cpu(super->next_trans_seq), - le64_to_cpu(super->total_blocks), + le64_to_cpu(super->total_meta_blocks), + le64_to_cpu(super->first_meta_blkno), + le64_to_cpu(super->last_meta_blkno), + le64_to_cpu(super->total_data_blocks), + le64_to_cpu(super->first_data_blkno), + le64_to_cpu(super->last_data_blkno), le64_to_cpu(super->free_blocks), - le64_to_cpu(super->next_uninit_meta_blkno), - le64_to_cpu(super->last_uninit_meta_blkno), - le64_to_cpu(super->next_uninit_data_blkno), - le64_to_cpu(super->last_uninit_data_blkno), - le64_to_cpu(super->core_balloc_cursor), - le64_to_cpu(super->core_data_alloc_cursor), le64_to_cpu(super->quorum_fenced_term), le64_to_cpu(super->quorum_server_term), le64_to_cpu(super->unmount_barrier), super->quorum_count, server_addr, - le64_to_cpu(super->core_balloc_alloc.total_free), - super->core_balloc_alloc.root.height, - le64_to_cpu(super->core_balloc_alloc.root.ref.blkno), - le64_to_cpu(super->core_balloc_alloc.root.ref.seq), - le64_to_cpu(super->core_balloc_free.total_free), - super->core_balloc_free.root.height, - le64_to_cpu(super->core_balloc_free.root.ref.blkno), - le64_to_cpu(super->core_balloc_free.root.ref.seq), - le64_to_cpu(super->core_data_alloc.total_free), - super->core_data_alloc.root.height, - le64_to_cpu(super->core_data_alloc.root.ref.blkno), - le64_to_cpu(super->core_data_alloc.root.ref.seq), - le64_to_cpu(super->core_data_free.total_free), - super->core_data_free.root.height, - le64_to_cpu(super->core_data_free.root.ref.blkno), - le64_to_cpu(super->core_data_free.root.ref.seq), + RADROOT_A(&super->core_meta_avail), + RADROOT_A(&super->core_meta_freed), + RADROOT_A(&super->core_data_avail), + RADROOT_A(&super->core_data_freed), super->lock_clients.height, le64_to_cpu(super->lock_clients.ref.blkno), le64_to_cpu(super->lock_clients.ref.seq), @@ -748,27 +758,20 @@ static int print_volume(int fd) if (err && !ret) ret = err; - err = print_btree(fd, super, "core_balloc_alloc", - &super->core_balloc_alloc.root, - print_balloc_entry, NULL); + err = print_radix_block(fd, &super->core_meta_avail.ref, + super->core_meta_avail.height - 1); if (err && !ret) ret = err; - - err = print_btree(fd, super, "core_balloc_free", - &super->core_balloc_free.root, - print_balloc_entry, NULL); + err = print_radix_block(fd, &super->core_meta_freed.ref, + super->core_meta_freed.height - 1); if (err && !ret) ret = err; - - err = print_btree(fd, super, "core_data_alloc", - &super->core_data_alloc.root, - print_bitmap_entry, NULL); + err = print_radix_block(fd, &super->core_data_avail.ref, + super->core_data_avail.height - 1); if (err && !ret) ret = err; - - err = print_btree(fd, super, "core_data_free", - &super->core_data_free.root, - print_bitmap_entry, NULL); + err = print_radix_block(fd, &super->core_data_freed.ref, + super->core_data_freed.height - 1); if (err && !ret) ret = err; diff --git a/utils/src/radix.c b/utils/src/radix.c new file mode 100644 index 00000000..66a400a3 --- /dev/null +++ b/utils/src/radix.c @@ -0,0 +1,106 @@ +#include + +#include "sparse.h" +#include "util.h" +#include "format.h" +#include "radix.h" + +/* return the height of a tree needed to store the last bit */ +u8 radix_height_from_last(u64 last) +{ + u64 bit = SCOUTFS_RADIX_BITS - 1; + u64 mult = SCOUTFS_RADIX_BITS; + int i; + + for (i = 1; i <= U8_MAX; i++) { + if (bit >= last) + return i; + bit += (u64)(SCOUTFS_RADIX_REFS - 1) * mult; + mult *= SCOUTFS_RADIX_REFS; + } + + return U8_MAX; +} + +u64 radix_full_subtree_total(int level) +{ + u64 total = SCOUTFS_RADIX_BITS; + int i; + + for (i = 1; i <= level; i++) + total *= SCOUTFS_RADIX_REFS; + + return total; +} + +/* + * Initialize a reference to a block at the given level. + */ +void radix_init_ref(struct scoutfs_radix_ref *ref, int level, bool full) +{ + u64 tot; + + if (full) { + tot = radix_full_subtree_total(level); + + ref->blkno = cpu_to_le64(U64_MAX); + ref->seq = cpu_to_le64(0); + ref->sm_total = cpu_to_le64(tot); + ref->lg_total = cpu_to_le64(tot); + } else { + ref->blkno = cpu_to_le64(0); + ref->seq = cpu_to_le64(0); + ref->sm_total = cpu_to_le64(0); + ref->lg_total = cpu_to_le64(0); + } +} + +void radix_calc_level_inds(int *inds, u8 height, u64 bit) +{ + u32 ind; + int i; + + ind = bit % SCOUTFS_RADIX_BITS; + bit = bit / SCOUTFS_RADIX_BITS; + inds[0] = ind; + + for (i = 1; i < height; i++) { + ind = bit % SCOUTFS_RADIX_REFS; + bit = bit / SCOUTFS_RADIX_REFS; + inds[i] = ind; + } +} + +u64 radix_calc_leaf_bit(u64 bit) +{ + return bit - (bit % SCOUTFS_RADIX_BITS); +} + +/* + * The number of blocks needed to initialize a radix with left and right + * paths. The first time we find a level where the parent refs are at + * different indices determines where the paths diverge at lower levels. + * If the refs never diverge then the two paths traverse the same blocks + * and we just need blocks for the height of the tree. + */ +int radix_blocks_needed(u64 a, u64 b) +{ + u8 height = radix_height_from_last(b); + int *a_inds; + int *b_inds; + int i; + + a_inds = alloca(sizeof(a_inds[0] * height)); + b_inds = alloca(sizeof(b_inds[0] * height)); + + radix_calc_level_inds(a_inds, height, a); + radix_calc_level_inds(b_inds, height, b); + + for (i = height - 1; i > 0; i--) { + if (a_inds[i] != b_inds[i]) { + return (i * 2) + (height - i); + } + } + + return height; +} diff --git a/utils/src/radix.h b/utils/src/radix.h new file mode 100644 index 00000000..31f4db8c --- /dev/null +++ b/utils/src/radix.h @@ -0,0 +1,13 @@ +#ifndef _RADIX_H_ +#define _RADIX_H_ + +#include + +u8 radix_height_from_last(u64 last); +u64 radix_full_subtree_total(int level); +void radix_init_ref(struct scoutfs_radix_ref *ref, int level, bool full); +void radix_calc_level_inds(int *inds, u8 height, u64 bit); +u64 radix_calc_leaf_bit(u64 bit); +int radix_blocks_needed(u64 a, u64 b); + +#endif