scoutfs-utils: switch to btree forest

Remove all the lsm code from mkfs and print, replacing
it with the forest of btrees.

Signed-off-by: Zach Brown <zab@versity.com>
This commit is contained in:
Zach Brown
2019-09-30 11:26:36 -07:00
committed by Zach Brown
parent 70efa2f905
commit 3776c18c66
7 changed files with 414 additions and 631 deletions

View File

@@ -37,12 +37,3 @@ u32 crc_block(struct scoutfs_block_header *hdr)
return crc32c(~0, (char *)hdr + sizeof(hdr->crc),
SCOUTFS_BLOCK_SIZE - sizeof(hdr->crc));
}
u32 crc_segment(struct scoutfs_segment_block *sblk)
{
u32 off = offsetof(struct scoutfs_segment_block, _padding) +
sizeof(sblk->_padding);
return crc32c(~0, (char *)sblk + off,
le32_to_cpu(sblk->total_bytes) - off);
}

View File

@@ -8,6 +8,5 @@
u32 crc32c(u32 crc, const void *data, unsigned int len);
u64 crc32c_64(u32 crc, const void *data, unsigned int len);
u32 crc_block(struct scoutfs_block_header *hdr);
u32 crc_segment(struct scoutfs_segment_block *seg);
#endif

View File

@@ -7,6 +7,7 @@
/* block header magic values, chosen at random */
#define SCOUTFS_BLOCK_MAGIC_SUPER 0x103c428b
#define SCOUTFS_BLOCK_MAGIC_BTREE 0xe597f96d
#define SCOUTFS_BLOCK_MAGIC_BLOOM 0x31995604
/*
* The super block and btree blocks are fixed 4k.
@@ -19,18 +20,6 @@
#define SCOUTFS_BLOCK_SECTORS (1 << SCOUTFS_BLOCK_SECTOR_SHIFT)
#define SCOUTFS_BLOCK_MAX (U64_MAX >> SCOUTFS_BLOCK_SHIFT)
/*
* FS data is stored in segments, for now they're fixed size. They'll
* be dynamic.
*/
#define SCOUTFS_SEGMENT_SHIFT 20
#define SCOUTFS_SEGMENT_SIZE (1 << SCOUTFS_SEGMENT_SHIFT)
#define SCOUTFS_SEGMENT_MASK (SCOUTFS_SEGMENT_SIZE - 1)
#define SCOUTFS_SEGMENT_PAGES (SCOUTFS_SEGMENT_SIZE / PAGE_SIZE)
#define SCOUTFS_SEGMENT_BLOCKS (SCOUTFS_SEGMENT_SIZE / SCOUTFS_BLOCK_SIZE)
#define SCOUTFS_SEGMENT_BLOCK_SHIFT \
(SCOUTFS_SEGMENT_SHIFT - SCOUTFS_BLOCK_SHIFT)
#define SCOUTFS_PAGES_PER_BLOCK (SCOUTFS_BLOCK_SIZE / PAGE_SIZE)
#define SCOUTFS_BLOCK_PAGE_ORDER (SCOUTFS_BLOCK_SHIFT - PAGE_SHIFT)
@@ -162,7 +151,7 @@ struct scoutfs_key_be {
/* chose reasonable max key and value lens that have room for some u64s */
#define SCOUTFS_BTREE_MAX_KEY_LEN 40
#define SCOUTFS_BTREE_MAX_VAL_LEN 64
#define SCOUTFS_BTREE_MAX_VAL_LEN 256
/*
* The min number of free bytes we must leave in a parent as we descend
@@ -198,19 +187,14 @@ struct scoutfs_btree_ref {
/*
* A height of X means that the first block read will have level X-1 and
* the leaves will have level 0.
*
* The migration key is used to walk the tree finding old blocks to migrate
* into the current half of the ring.
*/
struct scoutfs_btree_root {
struct scoutfs_btree_ref ref;
__u8 height;
__le16 migration_key_len;
__u8 migration_key[SCOUTFS_BTREE_MAX_KEY_LEN];
} __packed;
struct scoutfs_btree_item_header {
__le16 off;
__le32 off;
} __packed;
struct scoutfs_btree_item {
@@ -221,52 +205,32 @@ struct scoutfs_btree_item {
struct scoutfs_btree_block {
struct scoutfs_block_header hdr;
__le16 free_end;
__le16 free_reclaim;
__le16 nr_items;
__le32 free_end;
__le32 nr_items;
__u8 level;
struct scoutfs_btree_item_header item_hdrs[0];
} __packed;
struct scoutfs_btree_ring {
__le64 first_blkno;
__le64 nr_blocks;
__le64 next_block;
__le64 next_seq;
} __packed;
/*
* This is absurdly huge. If there was only ever 1 item per segment and
* 2^64 items the tree could get this deep.
* Free metadata blocks are tracked by block allocator items.
*/
#define SCOUTFS_MANIFEST_MAX_LEVEL 20
#define SCOUTFS_MANIFEST_FANOUT 10
struct scoutfs_manifest {
struct scoutfs_balloc_root {
struct scoutfs_btree_root root;
__le64 level_counts[SCOUTFS_MANIFEST_MAX_LEVEL];
__le64 total_free;
} __packed;
struct scoutfs_balloc_item_key {
__be64 base;
} __packed;
/*
* Manifest entries are split across btree keys and values. Putting
* some entry fields in the value keeps the key smaller and increases
* the fanout of the btree which keeps the tree smaller and reduces
* block IO.
*
* The key is made up of the level, first key, and seq. At level 0
* segments can completely overlap and have identical key ranges but we
* avoid duplicate btree keys by including the unique seq.
*/
struct scoutfs_manifest_btree_key {
__u8 level;
struct scoutfs_key_be first_key;
__be64 seq;
} __packed;
#define SCOUTFS_BALLOC_ITEM_BYTES 256
#define SCOUTFS_BALLOC_ITEM_U64S (SCOUTFS_BALLOC_ITEM_BYTES / \
sizeof(__u64))
#define SCOUTFS_BALLOC_ITEM_BITS (SCOUTFS_BALLOC_ITEM_BYTES * 8)
#define SCOUTFS_BALLOC_ITEM_BASE_SHIFT ilog2(SCOUTFS_BALLOC_ITEM_BITS)
#define SCOUTFS_BALLOC_ITEM_BIT_MASK (SCOUTFS_BALLOC_ITEM_BITS - 1)
struct scoutfs_manifest_btree_val {
__le64 segno;
struct scoutfs_key last_key;
struct scoutfs_balloc_item_val {
__le64 bits[SCOUTFS_BALLOC_ITEM_U64S];
} __packed;
/*
@@ -312,50 +276,61 @@ struct scoutfs_mounted_client_btree_val {
#define SCOUTFS_MOUNTED_CLIENT_VOTER (1 << 0)
/*
* The max number of links defines the max number of entries that we can
* index in o(log n) and the static list head storage size in the
* segment block. We always pay the static storage cost, which is tiny,
* and we can look at the number of items to know the greatest number of
* links and skip most of the initial 0 links.
*/
#define SCOUTFS_MAX_SKIP_LINKS 32
struct scoutfs_log_trees {
struct scoutfs_balloc_root alloc_root;
struct scoutfs_balloc_root free_root;
struct scoutfs_btree_root item_root;
struct scoutfs_btree_ref bloom_ref;
__le64 rid;
__le64 nr;
} __packed;
/*
* Items are packed into segments and linked together in a skip list.
* Each item's header, links, key, and value are stored contiguously.
* They're not allowed to cross a block boundary.
*/
struct scoutfs_segment_item {
struct scoutfs_key key;
__le16 val_len;
struct scoutfs_log_trees_key {
__be64 rid;
__be64 nr;
} __packed;
struct scoutfs_log_trees_val {
struct scoutfs_balloc_root alloc_root;
struct scoutfs_balloc_root free_root;
struct scoutfs_btree_root item_root;
struct scoutfs_btree_ref bloom_ref;
} __packed;
struct scoutfs_log_item_value {
__le64 vers;
__u8 flags;
__u8 nr_links;
__le32 skip_links[0];
/* __u8 val_bytes[val_len] */
__u8 data[0];
} __packed;
#define SCOUTFS_ITEM_FLAG_DELETION (1 << 0)
/*
* Each large segment starts with a segment block that describes the
* rest of the blocks that make up the segment.
*
* The crc covers the initial total_bytes of the segment but starts
* after the padding.
* FS items are limited by the max btree value length with the log item
* value header.
*/
struct scoutfs_segment_block {
__le32 crc;
__le32 _padding;
__le64 segno;
__le64 seq;
__le32 last_item_off;
__le32 total_bytes;
__le32 nr_items;
__le32 skip_links[SCOUTFS_MAX_SKIP_LINKS];
/* packed items */
#define SCOUTFS_MAX_VAL_SIZE \
(SCOUTFS_BTREE_MAX_VAL_LEN - sizeof(struct scoutfs_log_item_value))
#define SCOUTFS_LOG_ITEM_FLAG_DELETION (1 << 0)
struct scoutfs_bloom_block {
struct scoutfs_block_header hdr;
__le64 total_set;
__le64 bits[0];
} __packed;
/*
* Log trees include a tree of items that make up a fixed size bloom
* filter. Just a few megs worth of items lets us test for the presence
* of locks that cover billions of files with a .1% chance of false
* positives. The log trees should be finalized and merged long before
* the bloom filters fill up and start returning excessive false positives.
*/
#define SCOUTFS_FOREST_BLOOM_NRS 7
#define SCOUTFS_FOREST_BLOOM_BITS \
(((SCOUTFS_BLOCK_SIZE - sizeof(struct scoutfs_bloom_block)) / \
member_sizeof(struct scoutfs_bloom_block, bits[0])) * \
member_sizeof(struct scoutfs_bloom_block, bits[0]) * 8) \
/*
* Keys are first sorted by major key zones.
*/
@@ -475,18 +450,21 @@ struct scoutfs_super_block {
__le64 next_ino;
__le64 next_trans_seq;
__le64 total_blocks;
__le64 next_uninit_free_block;
__le64 core_balloc_cursor;
__le64 free_blocks;
__le64 alloc_cursor;
struct scoutfs_btree_ring bring;
__le64 next_seg_seq;
__le64 next_compact_id;
__le64 first_fs_blkno;
__le64 last_fs_blkno;
__le64 quorum_fenced_term;
__le64 quorum_server_term;
__le64 unmount_barrier;
__u8 quorum_count;
struct scoutfs_inet_addr server_addr;
struct scoutfs_balloc_root core_balloc_alloc;
struct scoutfs_balloc_root core_balloc_free;
struct scoutfs_btree_root alloc_root;
struct scoutfs_manifest manifest;
struct scoutfs_btree_root fs_root;
struct scoutfs_btree_root logs_root;
struct scoutfs_btree_root lock_clients;
struct scoutfs_btree_root trans_seqs;
struct scoutfs_btree_root mounted_clients;
@@ -594,8 +572,6 @@ enum {
DIV_ROUND_UP(sizeof(struct scoutfs_xattr) + name_len + val_len, \
SCOUTFS_XATTR_MAX_PART_SIZE);
#define SCOUTFS_MAX_VAL_SIZE SCOUTFS_XATTR_MAX_PART_SIZE
#define SCOUTFS_LOCK_INODE_GROUP_NR 1024
#define SCOUTFS_LOCK_INODE_GROUP_MASK (SCOUTFS_LOCK_INODE_GROUP_NR - 1)
#define SCOUTFS_LOCK_SEQ_GROUP_MASK ((1ULL << 10) - 1)
@@ -678,13 +654,11 @@ enum {
SCOUTFS_NET_CMD_ALLOC_INODES,
SCOUTFS_NET_CMD_ALLOC_EXTENT,
SCOUTFS_NET_CMD_FREE_EXTENTS,
SCOUTFS_NET_CMD_ALLOC_SEGNO,
SCOUTFS_NET_CMD_RECORD_SEGMENT,
SCOUTFS_NET_CMD_GET_LOG_TREES,
SCOUTFS_NET_CMD_COMMIT_LOG_TREES,
SCOUTFS_NET_CMD_ADVANCE_SEQ,
SCOUTFS_NET_CMD_GET_LAST_SEQ,
SCOUTFS_NET_CMD_GET_MANIFEST_ROOT,
SCOUTFS_NET_CMD_STATFS,
SCOUTFS_NET_CMD_COMPACT,
SCOUTFS_NET_CMD_LOCK,
SCOUTFS_NET_CMD_LOCK_RECOVER,
SCOUTFS_NET_CMD_FAREWELL,
@@ -723,20 +697,6 @@ struct scoutfs_net_inode_alloc {
__le64 nr;
} __packed;
struct scoutfs_net_key_range {
__le16 start_len;
__le16 end_len;
__u8 key_bytes[0];
} __packed;
struct scoutfs_net_manifest_entry {
__le64 segno;
__le64 seq;
struct scoutfs_key first;
struct scoutfs_key last;
__u8 level;
} __packed;
struct scoutfs_net_statfs {
__le64 total_blocks; /* total blocks in device */
__le64 next_ino; /* next unused inode number */
@@ -763,52 +723,9 @@ struct scoutfs_net_extent_list {
/* arbitrarily makes a nice ~1k extent list payload */
#define SCOUTFS_NET_EXTENT_LIST_MAX_NR 64
/* one upper segment and fanout lower segments */
#define SCOUTFS_COMPACTION_MAX_INPUT (1 + SCOUTFS_MANIFEST_FANOUT)
/* sticky can split the input and item alignment padding can add a lower */
#define SCOUTFS_COMPACTION_SEGNO_OVERHEAD 2
#define SCOUTFS_COMPACTION_MAX_OUTPUT \
(SCOUTFS_COMPACTION_MAX_INPUT + SCOUTFS_COMPACTION_SEGNO_OVERHEAD)
/*
* A compact request is sent by the server to the client. It provides
* the input segments and enough allocated segnos to write the results.
* The id uniquely identifies this compaction request and is included in
* the response to clean up its allocated resources.
*/
struct scoutfs_net_compact_request {
__le64 id;
__u8 last_level;
__u8 flags;
__le64 segnos[SCOUTFS_COMPACTION_MAX_OUTPUT];
struct scoutfs_net_manifest_entry ents[SCOUTFS_COMPACTION_MAX_INPUT];
} __packed;
/*
* A sticky compaction has more lower level segments that overlap with
* the end of the upper after the last lower level segment included in
* the compaction. Items left in the upper segment after the last lower
* need to be written to the upper level instead of the lower. The
* upper segment "sticks" in place instead of moving down to the lower
* level.
*/
#define SCOUTFS_NET_COMPACT_FLAG_STICKY (1 << 0)
/*
* A compact response is sent by the client to the server. It describes
* the written output segments that need to be added to the manifest.
* The server compares the response to the request to free unused
* allocated segnos and input manifest entries. An empty response is
* valid and can happen if, say, the upper input segment completely
* deleted all the items in a single overlapping lower segment.
*/
struct scoutfs_net_compact_response {
__le64 id;
struct scoutfs_net_manifest_entry ents[SCOUTFS_COMPACTION_MAX_OUTPUT];
} __packed;
struct scoutfs_net_lock {
struct scoutfs_key key;
__le64 write_version;
__u8 old_mode;
__u8 new_mode;
} __packed;

View File

@@ -238,28 +238,6 @@ struct scoutfs_ioctl_stat_more {
struct scoutfs_ioctl_stat_more)
/*
* Fills the buffer with either the keys for the cached items or the
* keys for the cached ranges found starting with the given key. The
* number of keys filled in the buffer is returned. When filling range
* keys the returned number will always be a multiple of two.
*/
struct scoutfs_ioctl_item_cache_keys {
struct scoutfs_ioctl_key ikey;
__u64 buf_ptr;
__u16 buf_nr;
__u8 which;
__u8 _pad[21]; /* padded to align _ioctl_key total size */
};
enum {
SCOUTFS_IOC_ITEM_CACHE_KEYS_ITEMS = 0,
SCOUTFS_IOC_ITEM_CACHE_KEYS_RANGES,
};
#define SCOUTFS_IOC_ITEM_CACHE_KEYS _IOR(SCOUTFS_IOCTL_MAGIC, 6, \
struct scoutfs_ioctl_item_cache_keys)
struct scoutfs_ioctl_data_waiting_entry {
__u64 ino;
__u64 iblock;
@@ -283,7 +261,7 @@ struct scoutfs_ioctl_data_waiting {
#define SCOUTFS_IOC_DATA_WAITING_FLAGS_UNKNOWN (U8_MAX << 0)
#define SCOUTFS_IOC_DATA_WAITING _IOR(SCOUTFS_IOCTL_MAGIC, 7, \
#define SCOUTFS_IOC_DATA_WAITING _IOR(SCOUTFS_IOCTL_MAGIC, 6, \
struct scoutfs_ioctl_data_waiting)
/*
@@ -303,7 +281,7 @@ struct scoutfs_ioctl_setattr_more {
#define SCOUTFS_IOC_SETATTR_MORE_OFFLINE (1 << 0)
#define SCOUTFS_IOC_SETATTR_MORE_UNKNOWN (U8_MAX << 1)
#define SCOUTFS_IOC_SETATTR_MORE _IOW(SCOUTFS_IOCTL_MAGIC, 8, \
#define SCOUTFS_IOC_SETATTR_MORE _IOW(SCOUTFS_IOCTL_MAGIC, 7, \
struct scoutfs_ioctl_setattr_more)
struct scoutfs_ioctl_listxattr_hidden {
@@ -313,7 +291,7 @@ struct scoutfs_ioctl_listxattr_hidden {
__u32 hash_pos;
};
#define SCOUTFS_IOC_LISTXATTR_HIDDEN _IOR(SCOUTFS_IOCTL_MAGIC, 9, \
#define SCOUTFS_IOC_LISTXATTR_HIDDEN _IOR(SCOUTFS_IOCTL_MAGIC, 8, \
struct scoutfs_ioctl_listxattr_hidden)
/*
@@ -344,7 +322,7 @@ struct scoutfs_ioctl_find_xattrs {
__u8 _pad[4];
};
#define SCOUTFS_IOC_FIND_XATTRS _IOR(SCOUTFS_IOCTL_MAGIC, 10, \
#define SCOUTFS_IOC_FIND_XATTRS _IOR(SCOUTFS_IOCTL_MAGIC, 9, \
struct scoutfs_ioctl_find_xattrs)
/*
@@ -365,7 +343,7 @@ struct scoutfs_ioctl_statfs_more {
__u64 rid;
} __packed;
#define SCOUTFS_IOC_STATFS_MORE _IOR(SCOUTFS_IOCTL_MAGIC, 11, \
#define SCOUTFS_IOC_STATFS_MORE _IOR(SCOUTFS_IOCTL_MAGIC, 10, \
struct scoutfs_ioctl_statfs_more)

View File

@@ -1,92 +0,0 @@
#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/ioctl.h>
#include <fcntl.h>
#include <errno.h>
#include <string.h>
#include <limits.h>
#include "sparse.h"
#include "util.h"
#include "format.h"
#include "ioctl.h"
#include "cmd.h"
#include "key.h"
static int item_cache_keys(int argc, char **argv, int which)
{
struct scoutfs_ioctl_item_cache_keys ick;
struct scoutfs_ioctl_key ikeys[32];
struct scoutfs_key key;
int ret;
int fd;
int i;
if (argc != 2) {
fprintf(stderr, "too many arguments, only scoutfs path needed");
return -EINVAL;
}
fd = open(argv[1], O_RDONLY);
if (fd < 0) {
ret = -errno;
fprintf(stderr, "failed to open '%s': %s (%d)\n",
argv[1], strerror(errno), errno);
return ret;
}
memset(&ick, 0, sizeof(ick));
ick.buf_ptr = (unsigned long)ikeys;
ick.buf_nr = array_size(ikeys);
ick.which = which;
for (;;) {
ret = ioctl(fd, SCOUTFS_IOC_ITEM_CACHE_KEYS, &ick);
if (ret < 0) {
ret = -errno;
fprintf(stderr, "walk_inodes ioctl failed: %s (%d)\n",
strerror(errno), errno);
break;
} else if (ret == 0) {
break;
}
for (i = 0; i < ret; i++) {
scoutfs_key_copy_types(&key, &ikeys[i]);
printf(SK_FMT, SK_ARG(&key));
if (which == SCOUTFS_IOC_ITEM_CACHE_KEYS_ITEMS ||
(i & 1))
printf("\n");
else
printf(" - ");
}
scoutfs_key_inc(&key);
scoutfs_key_copy_types(&ick.ikey, &key);
}
close(fd);
return ret;
};
static int item_keys(int argc, char **argv)
{
return item_cache_keys(argc, argv, SCOUTFS_IOC_ITEM_CACHE_KEYS_ITEMS);
}
static int range_keys(int argc, char **argv)
{
return item_cache_keys(argc, argv, SCOUTFS_IOC_ITEM_CACHE_KEYS_RANGES);
}
static void __attribute__((constructor)) item_cache_key_ctor(void)
{
cmd_register("item-cache-keys", "<path>",
"print range of indexed inodes", item_keys);
cmd_register("item-cache-range-keys", "<path>",
"print range of indexed inodes", range_keys);
}

View File

@@ -25,6 +25,7 @@
#include "rand.h"
#include "dev.h"
#include "key.h"
#include "bitops.h"
static int write_raw_block(int fd, u64 blkno, void *blk)
{
@@ -54,80 +55,6 @@ static int write_block(int fd, u64 blkno, struct scoutfs_super_block *super,
return write_raw_block(fd, blkno, hdr);
}
/*
* Calculate the greatest number of btree blocks that might be needed to
* store the given item population. At most all blocks will be half
* full. All keys will be the max size including parent items which
* determines the fanout.
*
* We will never hit this in practice. But some joker *could* fill a
* filesystem with empty files with enormous file names.
*/
static u64 calc_btree_blocks(u64 nr, u64 max_key, u64 max_val)
{
u64 item_bytes;
u64 fanout;
u64 block_items;
u64 leaf_blocks;
u64 level_blocks;
u64 total_blocks;
/* figure out the parent fanout for these silly huge possible items */
item_bytes = sizeof(struct scoutfs_btree_item_header) +
sizeof(struct scoutfs_btree_item) +
max_key + sizeof(struct scoutfs_btree_ref);
fanout = ((SCOUTFS_BLOCK_SIZE - sizeof(struct scoutfs_btree_block) -
SCOUTFS_BTREE_PARENT_MIN_FREE_BYTES) / 2) / item_bytes;
/* figure out how many items we have to store */
item_bytes = sizeof(struct scoutfs_btree_item_header) +
sizeof(struct scoutfs_btree_item) +
max_key + max_val;
block_items = ((SCOUTFS_BLOCK_SIZE -
sizeof(struct scoutfs_btree_block)) / 2) / item_bytes;
leaf_blocks = DIV_ROUND_UP(nr, block_items);
/* then calc total blocks as we grow to have enough blocks for items */
level_blocks = 1;
total_blocks = level_blocks;
while (level_blocks < leaf_blocks) {
level_blocks *= fanout;
level_blocks = min(leaf_blocks, level_blocks);
total_blocks += level_blocks;
}
return total_blocks;
}
/*
* Figure out how many btree ring blocks we'll need for all the btree
* items that could be needed to describe this many segments.
*
* We can have either a free extent or manifest ref for every segment in
* the system. Free extent items are smaller than manifest refs, and
* they merge if they're adjacent, so the largest possible tree is a ref
* for every segment.
*/
static u64 calc_btree_ring_blocks(u64 total_segs)
{
u64 blocks;
/* key is smaller for wider parent fanout */
assert(sizeof(struct scoutfs_extent_btree_key) <=
sizeof(struct scoutfs_manifest_btree_key));
/* 2 extent items is smaller than a manifest ref */
assert((2 * sizeof(struct scoutfs_extent_btree_key)) <=
(sizeof(struct scoutfs_manifest_btree_key) +
sizeof(struct scoutfs_manifest_btree_val)));
blocks = calc_btree_blocks(total_segs,
sizeof(struct scoutfs_manifest_btree_key),
sizeof(struct scoutfs_manifest_btree_val));
return round_up(blocks * 4, SCOUTFS_SEGMENT_BLOCKS);
}
static float size_flt(u64 nr, unsigned size)
{
float x = (float)nr * (float)size;
@@ -166,28 +93,22 @@ static char *size_str(u64 nr, unsigned size)
static int write_new_fs(char *path, int fd, u8 quorum_count)
{
struct scoutfs_super_block *super;
struct scoutfs_key *ino_key;
struct scoutfs_key *idx_key;
struct scoutfs_key_be *kbe;
struct scoutfs_inode *inode;
struct scoutfs_segment_block *sblk;
struct scoutfs_manifest_btree_key *mkey;
struct scoutfs_manifest_btree_val *mval;
struct scoutfs_extent_btree_key *ebk;
struct scoutfs_btree_block *bt;
struct scoutfs_btree_item *btitem;
struct scoutfs_segment_item *item;
struct scoutfs_balloc_item_key *bik;
struct scoutfs_balloc_item_val *biv;
struct scoutfs_key key;
__le32 *prev_link;
struct timeval tv;
char uuid_str[37];
void *zeros;
u64 blkno;
u64 limit;
u64 size;
u64 ring_blocks;
u64 total_segs;
u64 total_blocks;
u64 first_segno;
u64 free_blkno;
u64 free_start;
u64 free_len;
int ret;
@@ -197,9 +118,8 @@ static int write_new_fs(char *path, int fd, u8 quorum_count)
super = calloc(1, SCOUTFS_BLOCK_SIZE);
bt = calloc(1, SCOUTFS_BLOCK_SIZE);
sblk = calloc(1, SCOUTFS_SEGMENT_SIZE);
zeros = calloc(1, SCOUTFS_SEGMENT_SIZE);
if (!super || !bt || !sblk || !zeros) {
zeros = calloc(1, SCOUTFS_BLOCK_SIZE);
if (!super || !bt || !zeros) {
ret = -errno;
fprintf(stderr, "failed to allocate block mem: %s (%d)\n",
strerror(errno), errno);
@@ -213,15 +133,14 @@ static int write_new_fs(char *path, int fd, u8 quorum_count)
goto out;
}
/* arbitrarily require space for a handful of segments */
limit = SCOUTFS_SEGMENT_SIZE * 16;
/* arbitrarily require a reasonably large device */
limit = 8ULL * (1024 * 1024 * 1024);
if (size < limit) {
fprintf(stderr, "%llu byte device too small for min %llu byte fs\n",
size, limit);
goto out;
}
total_segs = size / SCOUTFS_SEGMENT_SIZE;
total_blocks = size / SCOUTFS_BLOCK_SIZE;
/* partially initialize the super so we can use it to init others */
@@ -234,25 +153,21 @@ static int write_new_fs(char *path, int fd, u8 quorum_count)
super->next_ino = cpu_to_le64(SCOUTFS_ROOT_INO + 1);
super->next_trans_seq = cpu_to_le64(1);
super->total_blocks = cpu_to_le64(total_blocks);
super->next_seg_seq = cpu_to_le64(2);
super->next_compact_id = cpu_to_le64(1);
super->quorum_count = quorum_count;
/* align the btree ring to the segment after the super */
blkno = round_up(SCOUTFS_SUPER_BLKNO + 1, SCOUTFS_SEGMENT_BLOCKS);
/* first usable segno follows manifest ring */
ring_blocks = calc_btree_ring_blocks(total_segs);
first_segno = (blkno + ring_blocks) / SCOUTFS_SEGMENT_BLOCKS;
free_start = ((first_segno + 1) << SCOUTFS_SEGMENT_BLOCK_SHIFT);
/* metadata blocks start after the quorum blocks */
free_blkno = SCOUTFS_QUORUM_BLKNO + SCOUTFS_QUORUM_BLOCKS;
/* extents start after btree blocks */
free_start = total_blocks - (total_blocks / 4);
free_len = total_blocks - free_start;
/* fill out some alloc boundaries before using */
super->free_blocks = cpu_to_le64(free_len);
super->bring.first_blkno = cpu_to_le64(blkno);
super->bring.nr_blocks = cpu_to_le64(ring_blocks);
super->bring.next_block = cpu_to_le64(2);
super->bring.next_seq = cpu_to_le64(2);
/* allocator btree has item with space after first segno */
/* extent allocator btree indexes free data extent */
blkno = free_blkno++;
super->alloc_root.ref.blkno = cpu_to_le64(blkno);
super->alloc_root.ref.seq = cpu_to_le64(1);
super->alloc_root.height = 1;
@@ -261,14 +176,13 @@ static int write_new_fs(char *path, int fd, u8 quorum_count)
bt->hdr.fsid = super->hdr.fsid;
bt->hdr.blkno = cpu_to_le64(blkno);
bt->hdr.seq = cpu_to_le64(1);
bt->nr_items = cpu_to_le16(2);
bt->nr_items = cpu_to_le32(2);
/* btree item allocated from the back of the block */
ebk = (void *)bt + SCOUTFS_BLOCK_SIZE - sizeof(*ebk);
btitem = (void *)ebk - sizeof(*btitem);
bt->item_hdrs[0].off = cpu_to_le16((long)btitem - (long)bt);
bt->free_end = bt->item_hdrs[0].off;
bt->item_hdrs[0].off = cpu_to_le32((long)btitem - (long)bt);
btitem->key_len = cpu_to_le16(sizeof(*ebk));
btitem->val_len = cpu_to_le16(0);
@@ -279,8 +193,7 @@ static int write_new_fs(char *path, int fd, u8 quorum_count)
ebk = (void *)btitem - sizeof(*ebk);
btitem = (void *)ebk - sizeof(*btitem);
bt->item_hdrs[1].off = cpu_to_le16((long)btitem - (long)bt);
bt->free_end = bt->item_hdrs[1].off;
bt->item_hdrs[1].off = cpu_to_le32((long)btitem - (long)bt);
btitem->key_len = cpu_to_le16(sizeof(*ebk));
btitem->val_len = cpu_to_le16(0);
@@ -288,6 +201,8 @@ static int write_new_fs(char *path, int fd, u8 quorum_count)
ebk->major = cpu_to_be64(free_len);
ebk->minor = cpu_to_be64(free_start + free_len - 1);
bt->free_end = bt->item_hdrs[le32_to_cpu(bt->nr_items) - 1].off;
bt->hdr.magic = cpu_to_le32(SCOUTFS_BLOCK_MAGIC_BTREE);
bt->hdr.crc = cpu_to_le32(crc_block(&bt->hdr));
@@ -296,85 +211,46 @@ static int write_new_fs(char *path, int fd, u8 quorum_count)
goto out;
blkno++;
/* manifest btree has a block with an item for the segment */
super->manifest.root.ref.blkno = cpu_to_le64(blkno);
super->manifest.root.ref.seq = cpu_to_le64(1);
super->manifest.root.height = 1;
super->manifest.level_counts[1] = cpu_to_le64(1);
/* fs root starts with root inode and its index items */
blkno = free_blkno++;
super->fs_root.ref.blkno = cpu_to_le64(blkno);
super->fs_root.ref.seq = cpu_to_le64(1);
super->fs_root.height = 1;
memset(bt, 0, SCOUTFS_BLOCK_SIZE);
bt->hdr.fsid = super->hdr.fsid;
bt->hdr.blkno = cpu_to_le64(blkno);
bt->hdr.seq = cpu_to_le64(1);
bt->nr_items = cpu_to_le16(1);
bt->nr_items = cpu_to_le32(2);
/* btree item allocated from the back of the block */
mval = (void *)bt + SCOUTFS_BLOCK_SIZE - sizeof(*mval);
ino_key = &mval->last_key;
mkey = (void *)mval - sizeof(*mkey);
btitem = (void *)mkey - sizeof(*btitem);
kbe = (void *)bt + SCOUTFS_BLOCK_SIZE - sizeof(*kbe);
btitem = (void *)kbe - sizeof(*btitem);
bt->item_hdrs[0].off = cpu_to_le16((long)btitem - (long)bt);
bt->free_end = bt->item_hdrs[0].off;
bt->item_hdrs[0].off = cpu_to_le32((long)btitem - (long)bt);
btitem->key_len = cpu_to_le16(sizeof(*kbe));
btitem->val_len = cpu_to_le16(0);
btitem->key_len = cpu_to_le16(sizeof(*mkey));
btitem->val_len = cpu_to_le16(sizeof(*mval));
mkey->level = 1;
mkey->seq = cpu_to_be64(1);
memset(&key, 0, sizeof(key));
key.sk_zone = SCOUTFS_INODE_INDEX_ZONE;
key.sk_type = SCOUTFS_INODE_INDEX_META_SEQ_TYPE;
key.skii_ino = cpu_to_le64(SCOUTFS_ROOT_INO);
scoutfs_key_to_be(&mkey->first_key, &key);
scoutfs_key_to_be(kbe, &key);
mval->segno = cpu_to_le64(first_segno);
ino_key->sk_zone = SCOUTFS_FS_ZONE;
ino_key->ski_ino = cpu_to_le64(SCOUTFS_ROOT_INO);
ino_key->sk_type = SCOUTFS_INODE_TYPE;
inode = (void *)btitem - sizeof(*inode);
kbe = (void *)inode - sizeof(*kbe);
btitem = (void *)kbe - sizeof(*btitem);
bt->hdr.magic = cpu_to_le32(SCOUTFS_BLOCK_MAGIC_BTREE);
bt->hdr.crc = cpu_to_le32(crc_block(&bt->hdr));
bt->item_hdrs[1].off = cpu_to_le32((long)btitem - (long)bt);
btitem->key_len = cpu_to_le16(sizeof(*kbe));
btitem->val_len = cpu_to_le16(sizeof(*inode));
ret = write_raw_block(fd, blkno, bt);
if (ret)
goto out;
blkno += ring_blocks;
/* write seg with root inode */
sblk->segno = cpu_to_le64(first_segno);
sblk->seq = cpu_to_le64(1);
prev_link = &sblk->skip_links[0];
item = (void *)(sblk + 1);
*prev_link = cpu_to_le32((long)item -(long)sblk);
prev_link = &item->skip_links[0];
item->val_len = 0;
item->nr_links = 1;
le32_add_cpu(&sblk->nr_items, 1);
idx_key = &item->key;
idx_key->sk_zone = SCOUTFS_INODE_INDEX_ZONE;
idx_key->sk_type = SCOUTFS_INODE_INDEX_META_SEQ_TYPE;
idx_key->skii_ino = cpu_to_le64(SCOUTFS_ROOT_INO);
item = (void *)&item->skip_links[1];
*prev_link = cpu_to_le32((long)item -(long)sblk);
prev_link = &item->skip_links[0];
sblk->last_item_off = cpu_to_le32((long)item - (long)sblk);
ino_key = (void *)&item->key;
inode = (void *)&item->skip_links[1];
item->val_len = cpu_to_le16(sizeof(struct scoutfs_inode));
item->nr_links = 1;
le32_add_cpu(&sblk->nr_items, 1);
ino_key->sk_zone = SCOUTFS_FS_ZONE;
ino_key->ski_ino = cpu_to_le64(SCOUTFS_ROOT_INO);
ino_key->sk_type = SCOUTFS_INODE_TYPE;
memset(&key, 0, sizeof(key));
key.sk_zone = SCOUTFS_FS_ZONE;
key.ski_ino = cpu_to_le64(SCOUTFS_ROOT_INO);
key.sk_type = SCOUTFS_INODE_TYPE;
scoutfs_key_to_be(kbe, &key);
inode->next_readdir_pos = cpu_to_le64(2);
inode->nlink = cpu_to_le32(SCOUTFS_DIRENT_FIRST_POS);
@@ -386,16 +262,55 @@ static int write_new_fs(char *path, int fd, u8 quorum_count)
inode->mtime.sec = inode->atime.sec;
inode->mtime.nsec = inode->atime.nsec;
item = (void *)(inode + 1);
sblk->total_bytes = cpu_to_le32((long)item - (long)sblk);
sblk->crc = cpu_to_le32(crc_segment(sblk));
bt->free_end = bt->item_hdrs[le32_to_cpu(bt->nr_items) - 1].off;
ret = pwrite(fd, sblk, SCOUTFS_SEGMENT_SIZE,
first_segno << SCOUTFS_SEGMENT_SHIFT);
if (ret != SCOUTFS_SEGMENT_SIZE) {
ret = -EIO;
bt->hdr.magic = cpu_to_le32(SCOUTFS_BLOCK_MAGIC_BTREE);
bt->hdr.crc = cpu_to_le32(crc_block(&bt->hdr));
ret = write_raw_block(fd, blkno, bt);
if (ret)
goto out;
/* metadata block allocator has single item, server continues init */
blkno = free_blkno++;
super->core_balloc_alloc.root.ref.blkno = cpu_to_le64(blkno);
super->core_balloc_alloc.root.ref.seq = cpu_to_le64(1);
super->core_balloc_alloc.root.height = 1;
/* XXX magic */
memset(bt, 0, SCOUTFS_BLOCK_SIZE);
bt->hdr.fsid = super->hdr.fsid;
bt->hdr.blkno = cpu_to_le64(blkno);
bt->hdr.seq = cpu_to_le64(1);
bt->nr_items = cpu_to_le32(1);
/* btree item allocated from the back of the block */
biv = (void *)bt + SCOUTFS_BLOCK_SIZE - sizeof(*biv);
bik = (void *)biv - sizeof(*bik);
btitem = (void *)bik - sizeof(*btitem);
bt->item_hdrs[0].off = cpu_to_le32((long)btitem - (long)bt);
btitem->key_len = cpu_to_le16(sizeof(*bik));
btitem->val_len = cpu_to_le16(sizeof(*biv));
bik->base = cpu_to_be64(0); /* XXX true? */
/* set all the bits past our final used blkno */
super->core_balloc_free.total_free =
cpu_to_le64(SCOUTFS_BALLOC_ITEM_BITS - free_blkno);
for (i = free_blkno; i < SCOUTFS_BALLOC_ITEM_BITS; i++)
set_bit_le(i, &biv->bits);
bt->free_end = bt->item_hdrs[le32_to_cpu(bt->nr_items) - 1].off;
bt->hdr.magic = cpu_to_le32(SCOUTFS_BLOCK_MAGIC_BTREE);
bt->hdr.crc = cpu_to_le32(crc_block(&bt->hdr));
ret = write_raw_block(fd, blkno, bt);
if (ret)
goto out;
}
/* zero out quorum blocks */
for (i = 0; i < SCOUTFS_QUORUM_BLOCKS; i++) {
@@ -407,6 +322,8 @@ static int write_new_fs(char *path, int fd, u8 quorum_count)
}
}
super->next_uninit_free_block = cpu_to_le64(SCOUTFS_BALLOC_ITEM_BITS);
/* write the super block */
super->hdr.seq = cpu_to_le64(1);
ret = write_block(fd, SCOUTFS_SUPER_BLKNO, NULL, &super->hdr);
@@ -423,22 +340,21 @@ static int write_new_fs(char *path, int fd, u8 quorum_count)
uuid_unparse(super->uuid, uuid_str);
printf("Created scoutfs filesystem:\n"
" device path: %s\n"
" fsid: %llx\n"
" format hash: %llx\n"
" uuid: %s\n"
" device bytes: "SIZE_FMT"\n"
" device blocks: "SIZE_FMT"\n"
" btree ring blocks: "SIZE_FMT"\n"
" free blocks: "SIZE_FMT"\n"
" quorum count: %u\n",
" device path: %s\n"
" fsid: %llx\n"
" format hash: %llx\n"
" uuid: %s\n"
" device blocks: "SIZE_FMT"\n"
" metadata blocks: "SIZE_FMT"\n"
" file extent blocks: "SIZE_FMT"\n"
" quorum count: %u\n",
path,
le64_to_cpu(super->hdr.fsid),
le64_to_cpu(super->format_hash),
uuid_str,
SIZE_ARGS(size, 1),
SIZE_ARGS(total_blocks, SCOUTFS_BLOCK_SIZE),
SIZE_ARGS(le64_to_cpu(super->bring.nr_blocks),
SIZE_ARGS(le64_to_cpu(super->total_blocks) -
le64_to_cpu(super->free_blocks),
SCOUTFS_BLOCK_SIZE),
SIZE_ARGS(le64_to_cpu(super->free_blocks),
SCOUTFS_BLOCK_SIZE),
@@ -450,8 +366,6 @@ out:
free(super);
if (bt)
free(bt);
if (sblk)
free(sblk);
if (zeros)
free(zeros);
return ret;

View File

@@ -41,27 +41,6 @@ static void *read_block(int fd, u64 blkno)
return buf;
}
static void *read_segment(int fd, u64 segno)
{
ssize_t ret;
void *buf;
buf = malloc(SCOUTFS_SEGMENT_SIZE);
if (!buf)
return NULL;
ret = pread(fd, buf, SCOUTFS_SEGMENT_SIZE,
segno << SCOUTFS_SEGMENT_SHIFT);
if (ret != SCOUTFS_SEGMENT_SIZE) {
fprintf(stderr, "read segno %llu returned %zd: %s (%d)\n",
segno, ret, strerror(errno), errno);
free(buf);
buf = NULL;
}
return buf;
}
static void print_block_header(struct scoutfs_block_header *hdr)
{
u32 crc = crc_block(hdr);
@@ -240,93 +219,92 @@ static print_func_t find_printer(u8 zone, u8 type)
return NULL;
}
static void print_item(struct scoutfs_segment_block *sblk,
struct scoutfs_segment_item *item, u32 which, u32 off)
static int print_fs_item(void *key, unsigned key_len, void *val,
unsigned val_len, void *arg)
{
struct scoutfs_key item_key;
print_func_t printer;
void *val;
int i;
val = (char *)&item->skip_links[item->nr_links];
scoutfs_key_from_be(&item_key, key);
printer = find_printer(item->key.sk_zone, item->key.sk_type);
printf(" "SK_FMT"\n", SK_ARG(&item_key));
printf(" [%u]: key "SK_FMT" off %u val_len %u nr_links %u flags %x%s\n",
which, SK_ARG(&item->key), off, le16_to_cpu(item->val_len),
item->nr_links,
item->flags, printer ? "" : " (unrecognized zone+type)");
printf(" links:");
for (i = 0; i < item->nr_links; i++)
printf(" %u", le32_to_cpu(item->skip_links[i]));
printf("\n");
if (printer)
printer(&item->key, val, le16_to_cpu(item->val_len));
}
static void print_segment_block(struct scoutfs_segment_block *sblk)
{
int i;
printf(" sblk: segno %llu seq %llu last_item_off %u total_bytes %u "
"nr_items %u\n",
le64_to_cpu(sblk->segno), le64_to_cpu(sblk->seq),
le32_to_cpu(sblk->last_item_off), le32_to_cpu(sblk->total_bytes),
le32_to_cpu(sblk->nr_items));
printf(" links:");
for (i = 0; sblk->skip_links[i]; i++)
printf(" %u", le32_to_cpu(sblk->skip_links[i]));
printf("\n");
}
static int print_segments(int fd, unsigned long *seg_map, u64 total)
{
struct scoutfs_segment_block *sblk;
struct scoutfs_segment_item *item;
u32 off;
u64 s;
u64 i;
for (s = 0; (s = find_next_set_bit(seg_map, s, total)) < total; s++) {
sblk = read_segment(fd, s);
if (!sblk)
return -ENOMEM;
printf("segment segno %llu\n", s);
print_segment_block(sblk);
off = le32_to_cpu(sblk->skip_links[0]);
for (i = 0; i < le32_to_cpu(sblk->nr_items); i++) {
item = (void *)sblk + off;
print_item(sblk, item, i, off);
off = le32_to_cpu(item->skip_links[0]);
}
free(sblk);
/* only items in leaf blocks have values */
if (val) {
printer = find_printer(item_key.sk_zone, item_key.sk_type);
if (printer)
printer(&item_key, val, val_len);
else
printf(" (unknown zone %u type %u)\n",
item_key.sk_zone, item_key.sk_type);
}
return 0;
}
static int print_manifest_entry(void *key, unsigned key_len, void *val,
unsigned val_len, void *arg)
/* same as fs item but with a small header in the value */
static int print_logs_item(void *key, unsigned key_len, void *val,
unsigned val_len, void *arg)
{
struct scoutfs_manifest_btree_key *mkey = key;
struct scoutfs_manifest_btree_val *mval = val;
struct scoutfs_key first;
unsigned long *seg_map = arg;
struct scoutfs_key item_key;
struct scoutfs_log_item_value *liv;
print_func_t printer;
scoutfs_key_from_be(&first, &mkey->first_key);
scoutfs_key_from_be(&item_key, key);
printf(" level %u first "SK_FMT" seq %llu\n",
mkey->level, SK_ARG(&first), be64_to_cpu(mkey->seq));
printf(" "SK_FMT"\n", SK_ARG(&item_key));
/* only items in leaf blocks have values */
if (val) {
printf(" segno %llu last "SK_FMT"\n",
le64_to_cpu(mval->segno), SK_ARG(&mval->last_key));
liv = val;
printf(" log_item_value: vers %llu flags %x\n",
le64_to_cpu(liv->vers), liv->flags);
set_bit(seg_map, le64_to_cpu(mval->segno));
/* deletion items don't have values */
if (!(liv->flags & SCOUTFS_LOG_ITEM_FLAG_DELETION)) {
printer = find_printer(item_key.sk_zone,
item_key.sk_type);
if (printer)
printer(&item_key, val + sizeof(*liv),
val_len - sizeof(*liv));
else
printf(" (unknown zone %u type %u)\n",
item_key.sk_zone, item_key.sk_type);
}
}
return 0;
}
/* same as fs item but with a small header in the value */
static int print_log_trees_item(void *key, unsigned key_len, void *val,
unsigned val_len, void *arg)
{
struct scoutfs_log_trees_key *ltk = key;
struct scoutfs_log_trees_val *ltv = val;
printf(" rid %llu nr %llu\n",
be64_to_cpu(ltk->rid), be64_to_cpu(ltk->nr));
/* only items in leaf blocks have values */
if (val) {
printf(" alloc_root: total_free %llu root: height %u blkno %llu seq %llu\n"
" free_root: total_free %llu root: height %u blkno %llu seq %llu\n"
" item_root: height %u blkno %llu seq %llu\n"
" bloom_ref: blkno %llu seq %llu\n",
le64_to_cpu(ltv->alloc_root.total_free),
ltv->alloc_root.root.height,
le64_to_cpu(ltv->alloc_root.root.ref.blkno),
le64_to_cpu(ltv->alloc_root.root.ref.seq),
le64_to_cpu(ltv->free_root.total_free),
ltv->free_root.root.height,
le64_to_cpu(ltv->free_root.root.ref.blkno),
le64_to_cpu(ltv->free_root.root.ref.seq),
ltv->item_root.height,
le64_to_cpu(ltv->item_root.ref.blkno),
le64_to_cpu(ltv->item_root.ref.seq),
le64_to_cpu(ltv->bloom_ref.blkno),
le64_to_cpu(ltv->bloom_ref.seq));
}
return 0;
@@ -375,7 +353,18 @@ static int print_trans_seqs_entry(void *key, unsigned key_len, void *val,
return 0;
}
/* XXX should make sure that the val is null terminated */
static int print_balloc_entry(void *key, unsigned key_len, void *val,
unsigned val_len, void *arg)
{
struct scoutfs_balloc_item_key *bik = key;
// struct scoutfs_balloc_item_val *biv = val;
printf(" base %llu\n",
be64_to_cpu(bik->base));
return 0;
}
static int print_mounted_client_entry(void *key, unsigned key_len, void *val,
unsigned val_len, void *arg)
{
@@ -423,20 +412,19 @@ static int print_btree_block(int fd, struct scoutfs_super_block *super,
if (bt->level == level) {
printf("%s btree blkno %llu\n"
" crc %08x fsid %llx seq %llu blkno %llu \n"
" level %u free_end %u free_reclaim %u nr_items %u\n",
" level %u free_end %u nr_items %u\n",
which, le64_to_cpu(ref->blkno),
le32_to_cpu(bt->hdr.crc),
le64_to_cpu(bt->hdr.fsid),
le64_to_cpu(bt->hdr.seq),
le64_to_cpu(bt->hdr.blkno),
bt->level,
le16_to_cpu(bt->free_end),
le16_to_cpu(bt->free_reclaim),
le16_to_cpu(bt->nr_items));
le32_to_cpu(bt->free_end),
le32_to_cpu(bt->nr_items));
}
for (i = 0; i < le16_to_cpu(bt->nr_items); i++) {
item = (void *)bt + le16_to_cpu(bt->item_hdrs[i].off);
for (i = 0; i < le32_to_cpu(bt->nr_items); i++) {
item = (void *)bt + le32_to_cpu(bt->item_hdrs[i].off);
key_len = le16_to_cpu(item->key_len);
val_len = le16_to_cpu(item->val_len);
key = (void *)(item + 1);
@@ -455,7 +443,7 @@ static int print_btree_block(int fd, struct scoutfs_super_block *super,
}
printf(" item [%u] off %u key_len %u val_len %u\n",
i, le16_to_cpu(bt->item_hdrs[i].off), key_len, val_len);
i, le32_to_cpu(bt->item_hdrs[i].off), key_len, val_len);
if (level)
print_btree_ref(key, key_len, val, val_len, func, arg);
@@ -489,6 +477,98 @@ static int print_btree(int fd, struct scoutfs_super_block *super, char *which,
return ret;
}
struct print_recursion_args {
struct scoutfs_super_block *super;
int fd;
};
/* same as fs item but with a small header in the value */
static int print_log_trees_roots(void *key, unsigned key_len, void *val,
unsigned val_len, void *arg)
{
struct scoutfs_log_trees_key *ltk = key;
struct scoutfs_log_trees_val *ltv = val;
struct print_recursion_args *pa = arg;
struct log_trees_roots {
char *fmt;
struct scoutfs_btree_root *root;
print_item_func func;
} roots[] = {
{ "log_tree_rid:%llu_nr:%llu_alloc",
&ltv->alloc_root.root,
print_balloc_entry,
},
{ "log_tree_rid:%llu_nr:%llu_free",
&ltv->free_root.root,
print_balloc_entry,
},
{ "log_tree_rid:%llu_nr:%llu_item",
&ltv->item_root,
print_logs_item,
},
};
char which[100];
int ret;
int err;
int i;
/* XXX doesn't print the bloom block */
ret = 0;
for (i = 0; i < array_size(roots); i++) {
snprintf(which, sizeof(which) - 1, roots[i].fmt,
be64_to_cpu(ltk->rid), be64_to_cpu(ltk->nr));
err = print_btree(pa->fd, pa->super, which, roots[i].root,
roots[i].func, NULL);
if (err && !ret)
ret = err;
}
return ret;
}
static int print_btree_leaf_items(int fd, struct scoutfs_super_block *super,
struct scoutfs_btree_ref *ref,
print_item_func func, void *arg)
{
struct scoutfs_btree_item *item;
struct scoutfs_btree_block *bt;
unsigned key_len;
unsigned val_len;
void *key;
void *val;
int ret;
int i;
if (ref->blkno == 0)
return 0;
bt = read_block(fd, le64_to_cpu(ref->blkno));
if (!bt)
return -ENOMEM;
for (i = 0; i < le32_to_cpu(bt->nr_items); i++) {
item = (void *)bt + le32_to_cpu(bt->item_hdrs[i].off);
key_len = le16_to_cpu(item->key_len);
val_len = le16_to_cpu(item->val_len);
key = (void *)(item + 1);
val = (void *)key + key_len;
if (bt->level > 0) {
ret = print_btree_leaf_items(fd, super, val, func, arg);
if (ret)
break;
continue;
} else {
func(key, key_len, val, val_len, arg);
}
}
free(bt);
return 0;
}
static char *alloc_addr_str(struct scoutfs_inet_addr *ia)
{
struct in_addr addr;
@@ -572,8 +652,6 @@ static void print_super_block(struct scoutfs_super_block *super, u64 blkno)
{
char uuid_str[37];
char *server_addr;
u64 count;
int i;
uuid_unparse(super->uuid, uuid_str);
@@ -587,62 +665,52 @@ static void print_super_block(struct scoutfs_super_block *super, u64 blkno)
return;
/* XXX these are all in a crazy order */
printf(" next_ino %llu next_trans_seq %llu next_seg_seq %llu\n"
" next_compact_id %llu\n"
" total_blocks %llu free_blocks %llu alloc_cursor %llu\n"
printf(" next_ino %llu next_trans_seq %llu\n"
" total_blocks %llu free_blocks %llu\n"
" next_uninit_free_block %llu core_balloc_blocks %llu\n"
" quorum_fenced_term %llu quorum_server_term %llu unmount_barrier %llu\n"
" quorum_count %u server_addr %s\n"
" btree ring: first_blkno %llu nr_blocks %llu next_block %llu "
"next_seq %llu\n"
" lock_clients root: height %u blkno %llu seq %llu mig_len %u\n"
" mounted_clients root: height %u blkno %llu seq %llu mig_len %u\n"
" trans_seqs root: height %u blkno %llu seq %llu mig_len %u\n"
" alloc btree root: height %u blkno %llu seq %llu mig_len %u\n"
" manifest btree root: height %u blkno %llu seq %llu mig_len %u\n",
" core_balloc_alloc: total_free %llu root: height %u blkno %llu seq %llu\n"
" core_balloc_free: total_free %llu root: height %u blkno %llu seq %llu\n"
" lock_clients root: height %u blkno %llu seq %llu\n"
" mounted_clients root: height %u blkno %llu seq %llu\n"
" trans_seqs root: height %u blkno %llu seq %llu\n"
" alloc btree root: height %u blkno %llu seq %llu\n"
" fs_root btree root: height %u blkno %llu seq %llu\n",
le64_to_cpu(super->next_ino),
le64_to_cpu(super->next_trans_seq),
le64_to_cpu(super->next_seg_seq),
le64_to_cpu(super->next_compact_id),
le64_to_cpu(super->total_blocks),
le64_to_cpu(super->free_blocks),
le64_to_cpu(super->alloc_cursor),
le64_to_cpu(super->next_uninit_free_block),
le64_to_cpu(super->core_balloc_cursor),
le64_to_cpu(super->quorum_fenced_term),
le64_to_cpu(super->quorum_server_term),
le64_to_cpu(super->unmount_barrier),
super->quorum_count,
server_addr,
le64_to_cpu(super->bring.first_blkno),
le64_to_cpu(super->bring.nr_blocks),
le64_to_cpu(super->bring.next_block),
le64_to_cpu(super->bring.next_seq),
le64_to_cpu(super->core_balloc_alloc.total_free),
super->core_balloc_alloc.root.height,
le64_to_cpu(super->core_balloc_alloc.root.ref.blkno),
le64_to_cpu(super->core_balloc_alloc.root.ref.seq),
le64_to_cpu(super->core_balloc_free.total_free),
super->core_balloc_free.root.height,
le64_to_cpu(super->core_balloc_free.root.ref.blkno),
le64_to_cpu(super->core_balloc_free.root.ref.seq),
super->lock_clients.height,
le64_to_cpu(super->lock_clients.ref.blkno),
le64_to_cpu(super->lock_clients.ref.seq),
le16_to_cpu(super->lock_clients.migration_key_len),
super->mounted_clients.height,
le64_to_cpu(super->mounted_clients.ref.blkno),
le64_to_cpu(super->mounted_clients.ref.seq),
le16_to_cpu(super->mounted_clients.migration_key_len),
super->trans_seqs.height,
le64_to_cpu(super->trans_seqs.ref.blkno),
le64_to_cpu(super->trans_seqs.ref.seq),
le16_to_cpu(super->trans_seqs.migration_key_len),
super->alloc_root.height,
le64_to_cpu(super->alloc_root.ref.blkno),
le64_to_cpu(super->alloc_root.ref.seq),
le16_to_cpu(super->alloc_root.migration_key_len),
super->manifest.root.height,
le64_to_cpu(super->manifest.root.ref.blkno),
le64_to_cpu(super->manifest.root.ref.seq),
le16_to_cpu(super->manifest.root.migration_key_len));
printf(" level_counts:");
for (i = 0; i < SCOUTFS_MANIFEST_MAX_LEVEL; i++) {
count = le64_to_cpu(super->manifest.level_counts[i]);
if (count)
printf(" %u: %llu", i, count);
}
printf("\n");
super->fs_root.height,
le64_to_cpu(super->fs_root.ref.blkno),
le64_to_cpu(super->fs_root.ref.seq));
free(server_addr);
}
@@ -650,8 +718,7 @@ static void print_super_block(struct scoutfs_super_block *super, u64 blkno)
static int print_volume(int fd)
{
struct scoutfs_super_block *super = NULL;
unsigned long *seg_map = NULL;
u64 nr_segs;
struct print_recursion_args pa;
int ret = 0;
int err;
@@ -661,15 +728,6 @@ static int print_volume(int fd)
print_super_block(super, SCOUTFS_SUPER_BLKNO);
nr_segs = le64_to_cpu(super->total_blocks) / SCOUTFS_SEGMENT_BLOCKS;
seg_map = alloc_bits(nr_segs);
if (!seg_map) {
ret = -ENOMEM;
fprintf(stderr, "failed to alloc %llu seg map: %s (%d)\n",
nr_segs, strerror(errno), errno);
goto out;
}
ret = print_quorum_blocks(fd, super);
err = print_btree(fd, super, "lock_clients", &super->lock_clients,
@@ -687,23 +745,41 @@ static int print_volume(int fd)
if (err && !ret)
ret = err;
err = print_btree(fd, super, "core_balloc_alloc",
&super->core_balloc_alloc.root,
print_balloc_entry, NULL);
if (err && !ret)
ret = err;
err = print_btree(fd, super, "core_balloc_free",
&super->core_balloc_free.root,
print_balloc_entry, NULL);
if (err && !ret)
ret = err;
err = print_btree(fd, super, "alloc", &super->alloc_root,
print_alloc_item, NULL);
if (err && !ret)
ret = err;
err = print_btree(fd, super, "manifest", &super->manifest.root,
print_manifest_entry, seg_map);
err = print_btree(fd, super, "logs_root", &super->logs_root,
print_log_trees_item, NULL);
if (err && !ret)
ret = err;
err = print_segments(fd, seg_map, nr_segs);
pa.super = super;
pa.fd = fd;
err = print_btree_leaf_items(fd, super, &super->logs_root.ref,
print_log_trees_roots, &pa);
if (err && !ret)
ret = err;
err = print_btree(fd, super, "fs_root", &super->fs_root,
print_fs_item, NULL);
if (err && !ret)
ret = err;
out:
free(super);
free(seg_map);
return ret;
}