scoutfs-utils: add support for radix alloc

Add support for initializing radix allocator blocks that describe free
space in mkfs and support for printing them out.

Signed-off-by: Zach Brown <zab@versity.com>
This commit is contained in:
Zach Brown
2020-02-16 17:32:54 -08:00
committed by Zach Brown
parent 34c3d903d9
commit ff436db49b
5 changed files with 542 additions and 258 deletions

View File

@@ -8,6 +8,7 @@
#define SCOUTFS_BLOCK_MAGIC_SUPER 0x103c428b
#define SCOUTFS_BLOCK_MAGIC_BTREE 0xe597f96d
#define SCOUTFS_BLOCK_MAGIC_BLOOM 0x31995604
#define SCOUTFS_BLOCK_MAGIC_RADIX 0xebeb5e65
/*
* The super block and btree blocks are fixed 4k.
@@ -132,6 +133,43 @@ struct scoutfs_key {
#define skpe_base _sk_second
#define skpe_part _sk_fourth
struct scoutfs_radix_block {
struct scoutfs_block_header hdr;
__le32 sm_first;
__le32 lg_first;
union {
struct scoutfs_radix_ref {
__le64 blkno;
__le64 seq;
__le64 sm_total;
__le64 lg_total;
} __packed refs[0];
__le64 bits[0];
} __packed;
} __packed;
struct scoutfs_radix_root {
__u8 height;
__le64 next_find_bit;
struct scoutfs_radix_ref ref;
} __packed;
#define SCOUTFS_RADIX_REFS \
((SCOUTFS_BLOCK_SIZE - offsetof(struct scoutfs_radix_block, refs[0])) /\
sizeof(struct scoutfs_radix_ref))
/* 8 meg regions with 4k data blocks */
#define SCOUTFS_RADIX_LG_SHIFT 11
#define SCOUTFS_RADIX_LG_BITS (1 << SCOUTFS_RADIX_LG_SHIFT)
#define SCOUTFS_RADIX_LG_MASK (SCOUTFS_RADIX_LG_BITS - 1)
/* round block bits down to a multiple of large ranges */
#define SCOUTFS_RADIX_BITS \
(((SCOUTFS_BLOCK_SIZE - \
offsetof(struct scoutfs_radix_block, bits[0])) * 8) & \
~(__u64)SCOUTFS_RADIX_LG_MASK)
#define SCOUTFS_RADIX_BITS_BYTES (SCOUTFS_RADIX_BITS / 8)
/*
* The btree still uses memcmp() to compare keys. We should fix that
* before too long.
@@ -208,55 +246,6 @@ struct scoutfs_btree_block {
struct scoutfs_btree_item_header item_hdrs[0];
} __packed;
/*
* Free metadata blocks are tracked by block allocator items.
*/
struct scoutfs_balloc_root {
struct scoutfs_btree_root root;
__le64 total_free;
} __packed;
struct scoutfs_balloc_item_key {
__be64 base;
} __packed;
#define SCOUTFS_BALLOC_ITEM_BYTES 256
#define SCOUTFS_BALLOC_ITEM_U64S (SCOUTFS_BALLOC_ITEM_BYTES / \
sizeof(__u64))
#define SCOUTFS_BALLOC_ITEM_BITS (SCOUTFS_BALLOC_ITEM_BYTES * 8)
#define SCOUTFS_BALLOC_ITEM_BASE_SHIFT ilog2(SCOUTFS_BALLOC_ITEM_BITS)
#define SCOUTFS_BALLOC_ITEM_BIT_MASK (SCOUTFS_BALLOC_ITEM_BITS - 1)
struct scoutfs_balloc_item_val {
__le64 bits[SCOUTFS_BALLOC_ITEM_U64S];
} __packed;
/*
* Free data blocks are tracked in bitmaps stored in btree items.
*/
struct scoutfs_block_bitmap_key {
__u8 type;
__be64 base;
} __packed;
#define SCOUTFS_BLOCK_BITMAP_BIG 0
#define SCOUTFS_BLOCK_BITMAP_LITTLE 1
#define SCOUTFS_PACKED_BITMAP_WORDS 32
#define SCOUTFS_PACKED_BITMAP_BITS (SCOUTFS_PACKED_BITMAP_WORDS * 64)
#define SCOUTFS_PACKED_BITMAP_MAX_BYTES \
offsetof(struct scoutfs_packed_bitmap, \
words[SCOUTFS_PACKED_BITMAP_WORDS])
#define SCOUTFS_BLOCK_BITMAP_BITS SCOUTFS_PACKED_BITMAP_BITS
#define SCOUTFS_BLOCK_BITMAP_BIT_MASK (SCOUTFS_PACKED_BITMAP_BITS - 1)
#define SCOUTFS_BLOCK_BITMAP_BASE_SHIFT (ilog2(SCOUTFS_PACKED_BITMAP_BITS))
struct scoutfs_packed_bitmap {
__le64 present;
__le64 set;
__le64 words[0];
};
/*
* The lock server keeps a persistent record of connected clients so that
* server failover knows who to wait for before resuming operations.
@@ -293,12 +282,12 @@ struct scoutfs_mounted_client_btree_val {
* about item logs, it's about clients making changes to trees.
*/
struct scoutfs_log_trees {
struct scoutfs_balloc_root alloc_root;
struct scoutfs_balloc_root free_root;
struct scoutfs_radix_root meta_avail;
struct scoutfs_radix_root meta_freed;
struct scoutfs_btree_root item_root;
struct scoutfs_btree_ref bloom_ref;
struct scoutfs_balloc_root data_alloc;
struct scoutfs_balloc_root data_free;
struct scoutfs_radix_root data_avail;
struct scoutfs_radix_root data_freed;
__le64 rid;
__le64 nr;
} __packed;
@@ -309,12 +298,12 @@ struct scoutfs_log_trees_key {
} __packed;
struct scoutfs_log_trees_val {
struct scoutfs_balloc_root alloc_root;
struct scoutfs_balloc_root free_root;
struct scoutfs_radix_root meta_avail;
struct scoutfs_radix_root meta_freed;
struct scoutfs_btree_root item_root;
struct scoutfs_btree_ref bloom_ref;
struct scoutfs_balloc_root data_alloc;
struct scoutfs_balloc_root data_free;
struct scoutfs_radix_root data_avail;
struct scoutfs_radix_root data_freed;
} __packed;
struct scoutfs_log_item_value {
@@ -489,25 +478,22 @@ struct scoutfs_super_block {
__u8 uuid[SCOUTFS_UUID_BYTES];
__le64 next_ino;
__le64 next_trans_seq;
__le64 total_blocks;
__le64 next_uninit_meta_blkno;
__le64 last_uninit_meta_blkno;
__le64 next_uninit_data_blkno;
__le64 last_uninit_data_blkno;
__le64 core_balloc_cursor;
__le64 core_data_alloc_cursor;
__le64 total_meta_blocks; /* both static and dynamic */
__le64 first_meta_blkno; /* first dynamically allocated */
__le64 last_meta_blkno;
__le64 total_data_blocks;
__le64 first_data_blkno;
__le64 last_data_blkno;
__le64 free_blocks;
__le64 first_fs_blkno;
__le64 last_fs_blkno;
__le64 quorum_fenced_term;
__le64 quorum_server_term;
__le64 unmount_barrier;
__u8 quorum_count;
struct scoutfs_inet_addr server_addr;
struct scoutfs_balloc_root core_balloc_alloc;
struct scoutfs_balloc_root core_balloc_free;
struct scoutfs_balloc_root core_data_alloc;
struct scoutfs_balloc_root core_data_free;
struct scoutfs_radix_root core_meta_avail;
struct scoutfs_radix_root core_meta_freed;
struct scoutfs_radix_root core_data_avail;
struct scoutfs_radix_root core_data_freed;
struct scoutfs_btree_root fs_root;
struct scoutfs_btree_root logs_root;
struct scoutfs_btree_root lock_clients;

View File

@@ -26,6 +26,7 @@
#include "dev.h"
#include "key.h"
#include "bitops.h"
#include "radix.h"
static int write_raw_block(int fd, u64 blkno, void *blk)
{
@@ -84,6 +85,199 @@ static char *size_str(u64 nr, unsigned size)
#define SIZE_FMT "%llu (%.2f %s)"
#define SIZE_ARGS(nr, sz) (nr), size_flt(nr, sz), size_str(nr, sz)
/*
* Update a reference to a block of references that has been modified. We
* walk all the references and rebuild the ref tracking.
*/
static void update_parent_ref(struct scoutfs_radix_ref *ref,
struct scoutfs_radix_block *rdx)
{
int i;
ref->sm_total = cpu_to_le64(0);
ref->lg_total = cpu_to_le64(0);
rdx->sm_first = cpu_to_le32(SCOUTFS_RADIX_REFS);
rdx->lg_first = cpu_to_le32(SCOUTFS_RADIX_REFS);
for (i = 0; i < SCOUTFS_RADIX_REFS; i++) {
if (le32_to_cpu(rdx->sm_first) == SCOUTFS_RADIX_REFS &&
rdx->refs[i].sm_total != 0)
rdx->sm_first = cpu_to_le32(i);
if (le32_to_cpu(rdx->lg_first) == SCOUTFS_RADIX_REFS &&
rdx->refs[i].lg_total != 0)
rdx->lg_first = cpu_to_le32(i);
le64_add_cpu(&ref->sm_total,
le64_to_cpu(rdx->refs[i].sm_total));
le64_add_cpu(&ref->lg_total,
le64_to_cpu(rdx->refs[i].lg_total));
}
}
/*
* Initialize all the blocks in a path to a leaf with the given blocks
* set. We know that we're being called to set all the bits in a region
* by setting the left and right partial leafs of the region. We first
* set the left and set full references down the left path, then we're
* called on the right and set full to the left and clear full refs past
* the right.
*
* The caller provides an array of block buffers and a starting block
* number to allocate blocks from and reference blocks within. It's the
* world's dumbest block cache.
*/
static void set_radix_path(struct scoutfs_super_block *super, int *inds,
struct scoutfs_radix_ref *ref, int level, bool left,
void **blocks, u64 blkno_base, u64 *next_blkno,
u64 first, u64 last)
{
struct scoutfs_radix_block *rdx;
int lg_ind;
int lg_after;
u64 bno;
int ind;
int end;
int i;
if (ref->blkno == 0) {
bno = (*next_blkno)++;
ref->blkno = cpu_to_le64(bno);
ref->seq = cpu_to_le64(1);
}
rdx = blocks[le64_to_cpu(ref->blkno) - blkno_base];
if (level) {
ind = inds[level];
/* initialize empty parent blocks with empty refs */
if (ref->sm_total == 0) {
for (i = 0; i < SCOUTFS_RADIX_REFS; i++)
radix_init_ref(&rdx->refs[i], level - 1, false);
}
if (left) {
/* initialize full refs from left to end */
for (i = ind + 1; i < SCOUTFS_RADIX_REFS; i++)
radix_init_ref(&rdx->refs[i], level - 1, true);
} else {
/* initialize full refs from start or left to right */
for (i = le32_to_cpu(rdx->sm_first) !=
SCOUTFS_RADIX_REFS ?
le32_to_cpu(rdx->sm_first) + 1 : 0;
i < ind; i++)
radix_init_ref(&rdx->refs[i], level - 1, true);
/* wipe full refs from right (maybe including) to end */
for (i = le64_to_cpu(rdx->refs[ind].blkno) == U64_MAX ?
ind : ind + 1; i < SCOUTFS_RADIX_REFS; i++)
radix_init_ref(&rdx->refs[i], level - 1, false);
}
set_radix_path(super, inds, &rdx->refs[ind], level - 1, left,
blocks, blkno_base, next_blkno, first, last);
update_parent_ref(ref, rdx);
} else {
ind = first - radix_calc_leaf_bit(first);
end = last - radix_calc_leaf_bit(last);
for (i = ind; i <= end; i++)
set_bit_le(i, rdx->bits);
rdx->sm_first = cpu_to_le32(ind);
ref->sm_total = cpu_to_le64(end - ind + 1);
lg_ind = round_up(ind, SCOUTFS_RADIX_LG_BITS);
lg_after = round_down(end + 1, SCOUTFS_RADIX_LG_BITS);
if (lg_ind < SCOUTFS_RADIX_BITS)
rdx->lg_first = cpu_to_le32(lg_ind);
else
rdx->lg_first = cpu_to_le32(SCOUTFS_RADIX_BITS);
ref->lg_total = cpu_to_le64(lg_after - lg_ind);
}
}
/*
* Initialize a new radix allocator with the region of bits set. We
* initialize and write populated blocks down the paths to the two ends
* of the interval and write full refs in between.
*/
static int write_radix_blocks(struct scoutfs_super_block *super, int fd,
struct scoutfs_radix_root *root,
u64 blkno, u64 first, u64 last)
{
struct scoutfs_radix_block *rdx;
void **blocks;
u64 next_blkno;
u64 edge;
u8 height;
int alloced;
int used;
int *inds;
int ret;
int i;
height = radix_height_from_last(last);
inds = alloca(sizeof(inds[0]) * height);
alloced = height * 2;
next_blkno = blkno;
/* allocate all the blocks we might need */
blocks = calloc(alloced, sizeof(*blocks));
if (!blocks)
return -ENOMEM;
for (i = 0; i < alloced; i++) {
blocks[i] = calloc(1, SCOUTFS_BLOCK_SIZE);
if (blocks[i] == NULL) {
ret = -ENOMEM;
goto out;
}
}
/* initialize empty root ref */
memset(root, 0, sizeof(struct scoutfs_radix_root));
root->height = height;
radix_init_ref(&root->ref, height - 1, false);
edge = radix_calc_leaf_bit(first) + SCOUTFS_RADIX_BITS - 1;
radix_calc_level_inds(inds, height, first);
set_radix_path(super, inds, &root->ref, root->height - 1, true, blocks,
blkno, &next_blkno, first, min(edge, last));
edge = radix_calc_leaf_bit(last);
radix_calc_level_inds(inds, height, last);
set_radix_path(super, inds, &root->ref, root->height - 1, false, blocks,
blkno, &next_blkno, max(first, edge), last);
used = next_blkno - blkno;
/* write out all the dirtied blocks */
for (i = 0; i < used; i++) {
rdx = blocks[i];
rdx->hdr.magic = cpu_to_le32(SCOUTFS_BLOCK_MAGIC_RADIX);
rdx->hdr.fsid = super->hdr.fsid;
rdx->hdr.seq = cpu_to_le64(1);
rdx->hdr.blkno = cpu_to_le64(blkno + i);
rdx->hdr.crc = cpu_to_le32(crc_block(&rdx->hdr));
ret = write_raw_block(fd, blkno + i, rdx);
if (ret < 0)
goto out;
}
ret = used;
out:
if (blocks) {
for (i = 0; i < alloced && blocks[i]; i++)
free(blocks[i]);
free(blocks);
}
return ret;
}
/*
* Make a new file system by writing:
* - super blocks
@@ -97,8 +291,6 @@ static int write_new_fs(char *path, int fd, u8 quorum_count)
struct scoutfs_inode *inode;
struct scoutfs_btree_block *bt;
struct scoutfs_btree_item *btitem;
struct scoutfs_balloc_item_key *bik;
struct scoutfs_balloc_item_val *biv;
struct scoutfs_key key;
struct timeval tv;
char uuid_str[37];
@@ -107,6 +299,7 @@ static int write_new_fs(char *path, int fd, u8 quorum_count)
u64 limit;
u64 size;
u64 total_blocks;
u64 meta_alloc_blocks;
u64 next_meta;
u64 last_meta;
u64 next_data;
@@ -142,6 +335,13 @@ static int write_new_fs(char *path, int fd, u8 quorum_count)
}
total_blocks = size / SCOUTFS_BLOCK_SIZE;
/* metadata blocks start after the quorum blocks */
next_meta = SCOUTFS_QUORUM_BLKNO + SCOUTFS_QUORUM_BLOCKS;
/* data blocks are after metadata, we'll say 1:4 for now */
next_data = round_up(next_meta + ((total_blocks - next_meta) / 5),
SCOUTFS_RADIX_BITS);
last_meta = next_data - 1;
last_data = total_blocks - 1;
/* partially initialize the super so we can use it to init others */
memset(super, 0, SCOUTFS_BLOCK_SIZE);
@@ -152,18 +352,14 @@ static int write_new_fs(char *path, int fd, u8 quorum_count)
uuid_generate(super->uuid);
super->next_ino = cpu_to_le64(SCOUTFS_ROOT_INO + 1);
super->next_trans_seq = cpu_to_le64(1);
super->total_blocks = cpu_to_le64(total_blocks);
super->total_meta_blocks = cpu_to_le64(last_meta + 1);
super->first_meta_blkno = cpu_to_le64(next_meta);
super->last_meta_blkno = cpu_to_le64(last_meta);
super->total_data_blocks = cpu_to_le64(last_data - next_data + 1);
super->first_data_blkno = cpu_to_le64(next_data);
super->last_data_blkno = cpu_to_le64(last_data);
super->quorum_count = quorum_count;
/* metadata blocks start after the quorum blocks */
next_meta = SCOUTFS_QUORUM_BLKNO + SCOUTFS_QUORUM_BLOCKS;
/* data blocks are after metadata, we'll say 1:4 for now */
next_data = round_up(next_meta + ((total_blocks - next_meta) / 5),
SCOUTFS_BLOCK_BITMAP_BITS);
last_meta = next_data - 1;
last_data = total_blocks - 1;
/* fs root starts with root inode and its index items */
blkno = next_meta++;
@@ -224,47 +420,31 @@ static int write_new_fs(char *path, int fd, u8 quorum_count)
if (ret)
goto out;
/* metadata block allocator has single item, server continues init */
blkno = next_meta++;
super->core_balloc_alloc.root.ref.blkno = cpu_to_le64(blkno);
super->core_balloc_alloc.root.ref.seq = cpu_to_le64(1);
super->core_balloc_alloc.root.height = 1;
/* XXX magic */
memset(bt, 0, SCOUTFS_BLOCK_SIZE);
bt->hdr.fsid = super->hdr.fsid;
bt->hdr.blkno = cpu_to_le64(blkno);
bt->hdr.seq = cpu_to_le64(1);
bt->nr_items = cpu_to_le32(1);
/* btree item allocated from the back of the block */
biv = (void *)bt + SCOUTFS_BLOCK_SIZE - sizeof(*biv);
bik = (void *)biv - sizeof(*bik);
btitem = (void *)bik - sizeof(*btitem);
bt->item_hdrs[0].off = cpu_to_le32((long)btitem - (long)bt);
btitem->key_len = cpu_to_le16(sizeof(*bik));
btitem->val_len = cpu_to_le16(sizeof(*biv));
bik->base = cpu_to_be64(0); /* XXX true? */
/* set all the bits past our final used blkno */
super->core_balloc_free.total_free =
cpu_to_le64(SCOUTFS_BALLOC_ITEM_BITS - next_meta);
for (i = next_meta; i < SCOUTFS_BALLOC_ITEM_BITS; i++)
set_bit_le(i, &biv->bits);
next_meta = i;
bt->free_end = bt->item_hdrs[le32_to_cpu(bt->nr_items) - 1].off;
bt->hdr.magic = cpu_to_le32(SCOUTFS_BLOCK_MAGIC_BTREE);
bt->hdr.crc = cpu_to_le32(crc_block(&bt->hdr));
ret = write_raw_block(fd, blkno, bt);
if (ret)
/* write out radix allocator blocks for data */
ret = write_radix_blocks(super, fd, &super->core_data_avail, next_meta,
next_data, last_data);
if (ret < 0)
goto out;
next_meta += ret;
super->core_data_freed.height = super->core_data_avail.height;
radix_init_ref(&super->core_data_freed.ref, 0, false);
meta_alloc_blocks = radix_blocks_needed(next_meta, last_meta);
/*
* Write out radix alloc blocks, knowing that the region we mark
* has to start after the blocks we store the allocator itself in.
*/
ret = write_radix_blocks(super, fd, &super->core_meta_avail,
next_meta, next_meta + meta_alloc_blocks,
last_meta);
if (ret < 0)
goto out;
next_meta += ret;
super->core_meta_freed.height = super->core_meta_avail.height;
radix_init_ref(&super->core_meta_freed.ref, 0, false);
/* zero out quorum blocks */
for (i = 0; i < SCOUTFS_QUORUM_BLOCKS; i++) {
@@ -277,10 +457,6 @@ static int write_new_fs(char *path, int fd, u8 quorum_count)
}
/* fill out allocator fields now that we've written our blocks */
super->next_uninit_meta_blkno = cpu_to_le64(next_meta);
super->last_uninit_meta_blkno = cpu_to_le64(last_meta);
super->next_uninit_data_blkno = cpu_to_le64(next_data);
super->last_uninit_data_blkno = cpu_to_le64(last_data);
super->free_blocks = cpu_to_le64(total_blocks - next_meta);
/* write the super block */
@@ -312,9 +488,9 @@ static int write_new_fs(char *path, int fd, u8 quorum_count)
le64_to_cpu(super->format_hash),
uuid_str,
SIZE_ARGS(total_blocks, SCOUTFS_BLOCK_SIZE),
SIZE_ARGS(last_meta - next_meta + 1,
SIZE_ARGS(le64_to_cpu(super->total_meta_blocks),
SCOUTFS_BLOCK_SIZE),
SIZE_ARGS(last_data - next_data + 1,
SIZE_ARGS(le64_to_cpu(super->total_data_blocks),
SCOUTFS_BLOCK_SIZE),
super->quorum_count);

View File

@@ -20,6 +20,7 @@
#include "cmd.h"
#include "crc.h"
#include "key.h"
#include "radix.h"
static void *read_block(int fd, u64 blkno)
{
@@ -258,6 +259,18 @@ static int print_logs_item(void *key, unsigned key_len, void *val,
return 0;
}
#define RADREF_F \
"blkno %llu seq %llu sm_total %llu lg_total %llu"
#define RADREF_A(ref) \
le64_to_cpu((ref)->blkno), le64_to_cpu((ref)->seq), \
le64_to_cpu((ref)->sm_total), le64_to_cpu((ref)->lg_total)
#define RADROOT_F \
"height %u next_find_bit %llu ref: "RADREF_F
#define RADROOT_A(root) \
(root)->height, le64_to_cpu((root)->next_find_bit), \
RADREF_A(&(root)->ref)
/* same as fs item but with a small header in the value */
static int print_log_trees_item(void *key, unsigned key_len, void *val,
unsigned val_len, void *arg)
@@ -270,33 +283,21 @@ static int print_log_trees_item(void *key, unsigned key_len, void *val,
/* only items in leaf blocks have values */
if (val) {
printf(" alloc_root: total_free %llu root: height %u blkno %llu seq %llu\n"
" free_root: total_free %llu root: height %u blkno %llu seq %llu\n"
printf(" meta_avail: "RADROOT_F"\n"
" meta_freed: "RADROOT_F"\n"
" item_root: height %u blkno %llu seq %llu\n"
" bloom_ref: blkno %llu seq %llu\n"
" data_alloc: total_free %llu root: height %u blkno %llu seq %llu\n"
" data_free: total_free %llu root: height %u blkno %llu seq %llu\n",
le64_to_cpu(ltv->alloc_root.total_free),
ltv->alloc_root.root.height,
le64_to_cpu(ltv->alloc_root.root.ref.blkno),
le64_to_cpu(ltv->alloc_root.root.ref.seq),
le64_to_cpu(ltv->free_root.total_free),
ltv->free_root.root.height,
le64_to_cpu(ltv->free_root.root.ref.blkno),
le64_to_cpu(ltv->free_root.root.ref.seq),
" data_avail: "RADROOT_F"\n"
" data_freed: "RADROOT_F"\n",
RADROOT_A(&ltv->meta_avail),
RADROOT_A(&ltv->meta_freed),
ltv->item_root.height,
le64_to_cpu(ltv->item_root.ref.blkno),
le64_to_cpu(ltv->item_root.ref.seq),
le64_to_cpu(ltv->bloom_ref.blkno),
le64_to_cpu(ltv->bloom_ref.seq),
le64_to_cpu(ltv->data_alloc.total_free),
ltv->data_alloc.root.height,
le64_to_cpu(ltv->data_alloc.root.ref.blkno),
le64_to_cpu(ltv->data_alloc.root.ref.seq),
le64_to_cpu(ltv->data_free.total_free),
ltv->data_free.root.height,
le64_to_cpu(ltv->data_free.root.ref.blkno),
le64_to_cpu(ltv->data_free.root.ref.seq));
RADROOT_A(&ltv->data_avail),
RADROOT_A(&ltv->data_freed));
}
return 0;
@@ -323,31 +324,6 @@ static int print_trans_seqs_entry(void *key, unsigned key_len, void *val,
return 0;
}
static int print_balloc_entry(void *key, unsigned key_len, void *val,
unsigned val_len, void *arg)
{
struct scoutfs_balloc_item_key *bik = key;
// struct scoutfs_balloc_item_val *biv = val;
printf(" base %llu\n",
be64_to_cpu(bik->base));
return 0;
}
static int print_bitmap_entry(void *key, unsigned key_len, void *val,
unsigned val_len, void *arg)
{
struct scoutfs_block_bitmap_key *bbk = key;
struct scoutfs_packed_bitmap *pb = val;
printf(" type %u base %llu present 0x%016llx set 0x%016llx\n",
bbk->type, be64_to_cpu(bbk->base),
le64_to_cpu(pb->present), le64_to_cpu(pb->set));
return 0;
}
static int print_mounted_client_entry(void *key, unsigned key_len, void *val,
unsigned val_len, void *arg)
{
@@ -460,6 +436,71 @@ static int print_btree(int fd, struct scoutfs_super_block *super, char *which,
return ret;
}
static int print_radix_block(int fd, struct scoutfs_radix_ref *par, int level)
{
struct scoutfs_radix_block *rdx;
u64 blkno;
int prev;
int ret;
int err;
int i;
/* XXX not printing bitmap leaf blocks */
blkno = le64_to_cpu(par->blkno);
if (blkno == 0 || blkno == U64_MAX || level == 0)
return 0;
rdx = read_block(fd, le64_to_cpu(par->blkno));
if (!rdx) {
ret = -ENOMEM;
goto out;
}
printf("radix parent block blkno %llu\n", le64_to_cpu(par->blkno));
print_block_header(&rdx->hdr);
printf(" sm_first %u lg_first %u\n",
le32_to_cpu(rdx->sm_first), le32_to_cpu(rdx->lg_first));
prev = 0;
for (i = 0; i < SCOUTFS_RADIX_REFS; i++) {
/* only skip if the next ref is identically full/empty */
if ((le64_to_cpu(rdx->refs[i].blkno) == 0 ||
le64_to_cpu(rdx->refs[i].blkno) == U64_MAX) &&
(i + 1) < SCOUTFS_RADIX_REFS &&
(le64_to_cpu(rdx->refs[i].blkno) ==
le64_to_cpu(rdx->refs[i + 1].blkno))) {
prev++;
continue;
}
if (prev) {
printf(" [%u - %u]: (%s): ", i - prev, i,
(le64_to_cpu(rdx->refs[i].blkno) == 0) ? "empty" :
"full");
prev = 0;
} else {
printf(" [%u]: ", i);
}
printf(RADREF_F"\n", RADREF_A(&rdx->refs[i]));
}
ret = 0;
for (i = 0; i < SCOUTFS_RADIX_REFS; i++) {
if (le64_to_cpu(rdx->refs[i].blkno) != 0 &&
le64_to_cpu(rdx->refs[i].blkno) != U64_MAX) {
err = print_radix_block(fd, &rdx->refs[i], level - 1);
if (err < 0 && ret == 0)
ret = err;
}
}
out:
free(rdx);
return ret;
}
struct print_recursion_args {
struct scoutfs_super_block *super;
int fd;
@@ -469,52 +510,35 @@ struct print_recursion_args {
static int print_log_trees_roots(void *key, unsigned key_len, void *val,
unsigned val_len, void *arg)
{
struct scoutfs_log_trees_key *ltk = key;
// struct scoutfs_log_trees_key *ltk = key;
struct scoutfs_log_trees_val *ltv = val;
struct print_recursion_args *pa = arg;
struct log_trees_roots {
char *fmt;
struct scoutfs_btree_root *root;
print_item_func func;
} roots[] = {
{ "log_tree_rid:%llu_nr:%llu_alloc",
&ltv->alloc_root.root,
print_balloc_entry,
},
{ "log_tree_rid:%llu_nr:%llu_free",
&ltv->free_root.root,
print_balloc_entry,
},
{ "log_tree_rid:%llu_nr:%llu_data_alloc",
&ltv->data_alloc.root,
print_bitmap_entry,
},
{ "log_tree_rid:%llu_nr:%llu_data_free",
&ltv->data_free.root,
print_bitmap_entry,
},
{ "log_tree_rid:%llu_nr:%llu_item",
&ltv->item_root,
print_logs_item,
},
};
char which[100];
int ret;
int ret = 0;
int err;
int i;
/* XXX doesn't print the bloom block */
ret = 0;
for (i = 0; i < array_size(roots); i++) {
snprintf(which, sizeof(which) - 1, roots[i].fmt,
be64_to_cpu(ltk->rid), be64_to_cpu(ltk->nr));
err = print_radix_block(pa->fd, &ltv->meta_avail.ref,
ltv->meta_avail.height - 1);
if (err && !ret)
ret = err;
err = print_radix_block(pa->fd, &ltv->meta_freed.ref,
ltv->meta_avail.height - 1);
if (err && !ret)
ret = err;
err = print_radix_block(pa->fd, &ltv->data_avail.ref,
ltv->data_avail.height - 1);
if (err && !ret)
ret = err;
err = print_radix_block(pa->fd, &ltv->meta_freed.ref,
ltv->data_avail.height - 1);
if (err && !ret)
ret = err;
err = print_btree(pa->fd, pa->super, which, roots[i].root,
roots[i].func, NULL);
if (err && !ret)
ret = err;
}
err = print_btree(pa->fd, pa->super, "", &ltv->item_root,
print_logs_item, NULL);
if (err && !ret)
ret = err;
return ret;
}
@@ -657,51 +681,37 @@ static void print_super_block(struct scoutfs_super_block *super, u64 blkno)
/* XXX these are all in a crazy order */
printf(" next_ino %llu next_trans_seq %llu\n"
" total_blocks %llu free_blocks %llu\n"
" next_uninit_meta_blkno %llu last_uninit_meta_blkno %llu\n"
" next_uninit_data_blkno %llu last_uninit_data_blkno %llu\n"
" core_balloc_cursor %llu core_data_alloc_cursor %llu\n"
" total_meta_blocks %llu first_meta_blkno %llu last_meta_blkno %llu\n"
" total_data_blocks %llu first_data_blkno %llu last_data_blkno %llu\n"
" free_blocks %llu\n"
" quorum_fenced_term %llu quorum_server_term %llu unmount_barrier %llu\n"
" quorum_count %u server_addr %s\n"
" core_balloc_alloc: total_free %llu root: height %u blkno %llu seq %llu\n"
" core_balloc_free: total_free %llu root: height %u blkno %llu seq %llu\n"
" core_data_alloc: total_free %llu root: height %u blkno %llu seq %llu\n"
" core_data_free: total_free %llu root: height %u blkno %llu seq %llu\n"
" core_meta_avail: "RADROOT_F"\n"
" core_meta_freed: "RADROOT_F"\n"
" core_data_avail: "RADROOT_F"\n"
" core_data_freed: "RADROOT_F"\n"
" lock_clients root: height %u blkno %llu seq %llu\n"
" mounted_clients root: height %u blkno %llu seq %llu\n"
" trans_seqs root: height %u blkno %llu seq %llu\n"
" fs_root btree root: height %u blkno %llu seq %llu\n",
le64_to_cpu(super->next_ino),
le64_to_cpu(super->next_trans_seq),
le64_to_cpu(super->total_blocks),
le64_to_cpu(super->total_meta_blocks),
le64_to_cpu(super->first_meta_blkno),
le64_to_cpu(super->last_meta_blkno),
le64_to_cpu(super->total_data_blocks),
le64_to_cpu(super->first_data_blkno),
le64_to_cpu(super->last_data_blkno),
le64_to_cpu(super->free_blocks),
le64_to_cpu(super->next_uninit_meta_blkno),
le64_to_cpu(super->last_uninit_meta_blkno),
le64_to_cpu(super->next_uninit_data_blkno),
le64_to_cpu(super->last_uninit_data_blkno),
le64_to_cpu(super->core_balloc_cursor),
le64_to_cpu(super->core_data_alloc_cursor),
le64_to_cpu(super->quorum_fenced_term),
le64_to_cpu(super->quorum_server_term),
le64_to_cpu(super->unmount_barrier),
super->quorum_count,
server_addr,
le64_to_cpu(super->core_balloc_alloc.total_free),
super->core_balloc_alloc.root.height,
le64_to_cpu(super->core_balloc_alloc.root.ref.blkno),
le64_to_cpu(super->core_balloc_alloc.root.ref.seq),
le64_to_cpu(super->core_balloc_free.total_free),
super->core_balloc_free.root.height,
le64_to_cpu(super->core_balloc_free.root.ref.blkno),
le64_to_cpu(super->core_balloc_free.root.ref.seq),
le64_to_cpu(super->core_data_alloc.total_free),
super->core_data_alloc.root.height,
le64_to_cpu(super->core_data_alloc.root.ref.blkno),
le64_to_cpu(super->core_data_alloc.root.ref.seq),
le64_to_cpu(super->core_data_free.total_free),
super->core_data_free.root.height,
le64_to_cpu(super->core_data_free.root.ref.blkno),
le64_to_cpu(super->core_data_free.root.ref.seq),
RADROOT_A(&super->core_meta_avail),
RADROOT_A(&super->core_meta_freed),
RADROOT_A(&super->core_data_avail),
RADROOT_A(&super->core_data_freed),
super->lock_clients.height,
le64_to_cpu(super->lock_clients.ref.blkno),
le64_to_cpu(super->lock_clients.ref.seq),
@@ -748,27 +758,20 @@ static int print_volume(int fd)
if (err && !ret)
ret = err;
err = print_btree(fd, super, "core_balloc_alloc",
&super->core_balloc_alloc.root,
print_balloc_entry, NULL);
err = print_radix_block(fd, &super->core_meta_avail.ref,
super->core_meta_avail.height - 1);
if (err && !ret)
ret = err;
err = print_btree(fd, super, "core_balloc_free",
&super->core_balloc_free.root,
print_balloc_entry, NULL);
err = print_radix_block(fd, &super->core_meta_freed.ref,
super->core_meta_freed.height - 1);
if (err && !ret)
ret = err;
err = print_btree(fd, super, "core_data_alloc",
&super->core_data_alloc.root,
print_bitmap_entry, NULL);
err = print_radix_block(fd, &super->core_data_avail.ref,
super->core_data_avail.height - 1);
if (err && !ret)
ret = err;
err = print_btree(fd, super, "core_data_free",
&super->core_data_free.root,
print_bitmap_entry, NULL);
err = print_radix_block(fd, &super->core_data_freed.ref,
super->core_data_freed.height - 1);
if (err && !ret)
ret = err;

106
utils/src/radix.c Normal file
View File

@@ -0,0 +1,106 @@
#include <stdbool.h>
#include "sparse.h"
#include "util.h"
#include "format.h"
#include "radix.h"
/* return the height of a tree needed to store the last bit */
u8 radix_height_from_last(u64 last)
{
u64 bit = SCOUTFS_RADIX_BITS - 1;
u64 mult = SCOUTFS_RADIX_BITS;
int i;
for (i = 1; i <= U8_MAX; i++) {
if (bit >= last)
return i;
bit += (u64)(SCOUTFS_RADIX_REFS - 1) * mult;
mult *= SCOUTFS_RADIX_REFS;
}
return U8_MAX;
}
u64 radix_full_subtree_total(int level)
{
u64 total = SCOUTFS_RADIX_BITS;
int i;
for (i = 1; i <= level; i++)
total *= SCOUTFS_RADIX_REFS;
return total;
}
/*
* Initialize a reference to a block at the given level.
*/
void radix_init_ref(struct scoutfs_radix_ref *ref, int level, bool full)
{
u64 tot;
if (full) {
tot = radix_full_subtree_total(level);
ref->blkno = cpu_to_le64(U64_MAX);
ref->seq = cpu_to_le64(0);
ref->sm_total = cpu_to_le64(tot);
ref->lg_total = cpu_to_le64(tot);
} else {
ref->blkno = cpu_to_le64(0);
ref->seq = cpu_to_le64(0);
ref->sm_total = cpu_to_le64(0);
ref->lg_total = cpu_to_le64(0);
}
}
void radix_calc_level_inds(int *inds, u8 height, u64 bit)
{
u32 ind;
int i;
ind = bit % SCOUTFS_RADIX_BITS;
bit = bit / SCOUTFS_RADIX_BITS;
inds[0] = ind;
for (i = 1; i < height; i++) {
ind = bit % SCOUTFS_RADIX_REFS;
bit = bit / SCOUTFS_RADIX_REFS;
inds[i] = ind;
}
}
u64 radix_calc_leaf_bit(u64 bit)
{
return bit - (bit % SCOUTFS_RADIX_BITS);
}
/*
* The number of blocks needed to initialize a radix with left and right
* paths. The first time we find a level where the parent refs are at
* different indices determines where the paths diverge at lower levels.
* If the refs never diverge then the two paths traverse the same blocks
* and we just need blocks for the height of the tree.
*/
int radix_blocks_needed(u64 a, u64 b)
{
u8 height = radix_height_from_last(b);
int *a_inds;
int *b_inds;
int i;
a_inds = alloca(sizeof(a_inds[0] * height));
b_inds = alloca(sizeof(b_inds[0] * height));
radix_calc_level_inds(a_inds, height, a);
radix_calc_level_inds(b_inds, height, b);
for (i = height - 1; i > 0; i--) {
if (a_inds[i] != b_inds[i]) {
return (i * 2) + (height - i);
}
}
return height;
}

13
utils/src/radix.h Normal file
View File

@@ -0,0 +1,13 @@
#ifndef _RADIX_H_
#define _RADIX_H_
#include <stdbool.h>
u8 radix_height_from_last(u64 last);
u64 radix_full_subtree_total(int level);
void radix_init_ref(struct scoutfs_radix_ref *ref, int level, bool full);
void radix_calc_level_inds(int *inds, u8 height, u64 bit);
u64 radix_calc_leaf_bit(u64 bit);
int radix_blocks_needed(u64 a, u64 b);
#endif