Update mkfs and print for lsm writing

Adapt mkfs and print for the format changes made to support writing
segments.

Signed-off-by: Zach Brown <zab@versity.com>
This commit is contained in:
Zach Brown
2016-12-07 10:20:03 -08:00
parent eb4baa88f5
commit 818e149643
5 changed files with 213 additions and 60 deletions

View File

@@ -58,7 +58,8 @@ struct scoutfs_ring_entry_header {
__le16 len;
} __packed;
#define SCOUTFS_RING_ADD_MANIFEST 1
#define SCOUTFS_RING_ADD_MANIFEST 1
#define SCOUTFS_RING_ADD_ALLOC 2
struct scoutfs_ring_add_manifest {
struct scoutfs_ring_entry_header eh;
@@ -70,26 +71,55 @@ struct scoutfs_ring_add_manifest {
/* first and last key bytes */
} __packed;
#define SCOUTFS_ALLOC_REGION_SHIFT 8
#define SCOUTFS_ALLOC_REGION_BITS (1 << SCOUTFS_ALLOC_REGION_SHIFT)
#define SCOUTFS_ALLOC_REGION_MASK (SCOUTFS_ALLOC_REGION_BITS - 1)
/*
* The bits need to be aligned so that the host can use native long
* bitops on the bits in memory.
*/
struct scoutfs_ring_alloc_region {
struct scoutfs_ring_entry_header eh;
__le64 index;
__u8 pad[5];
__le64 bits[SCOUTFS_ALLOC_REGION_BITS / 64];
} __packed;
/*
* This is absurdly huge. If there was only ever 1 item per segment and
* 2^64 items the tree could get this deep.
*/
#define SCOUTFS_MANIFEST_MAX_LEVEL 20
/*
* The packed entries in the block are terminated by a header with a 0 length.
*/
struct scoutfs_ring_block {
struct scoutfs_block_header hdr;
__le32 nr_entries;
struct scoutfs_ring_entry_header entries[0];
} __packed;
/*
* We really want these to be a power of two size so that they're naturally
* aligned. This ensures that they won't cross page boundaries and we
* can use pointers to them in the page vecs that make up segments without
* funny business.
*
* We limit segment sizes to 8 megs (23 bits) and value lengths to 512 bytes
* (9 bits). The item offsets and lengths then take up 64 bits.
*
* We then operate on the items in on-stack nice native structs.
*/
struct scoutfs_segment_item {
__le64 seq;
__le32 key_off;
__le32 val_off;
__le16 key_len;
__le16 val_len;
__le32 key_off_len;
__le32 val_off_len;
} __packed;
#define SCOUTFS_SEGMENT_ITEM_OFF_SHIFT 9
#define SCOUTFS_SEGMENT_ITEM_LEN_MASK ((1 << SCOUTFS_SEGMENT_ITEM_OFF_SHIFT)-1)
/*
* Each large segment starts with a segment block that describes the
* rest of the blocks that make up the segment.
@@ -100,20 +130,12 @@ struct scoutfs_segment_block {
__le64 segno;
__le64 max_seq;
__le32 nr_items;
/* item array with gaps so they don't cross 4k blocks */
__le32 _moar_pads;
struct scoutfs_segment_item items[0];
/* packed keys */
/* packed vals */
} __packed;
/* the first block in the segment has the header and items */
#define SCOUTFS_SEGMENT_FIRST_BLOCK_ITEMS \
((SCOUTFS_BLOCK_SIZE - sizeof(struct scoutfs_segment_block)) / \
sizeof(struct scoutfs_segment_item))
/* the rest of the header blocks are full of items */
#define SCOUTFS_SEGMENT_ITEMS_PER_BLOCK \
(SCOUTFS_BLOCK_SIZE / sizeof(struct scoutfs_segment_item))
/*
* Block references include the sequence number so that we can detect
* readers racing with writers and so that we can tell that we don't
@@ -188,18 +210,34 @@ struct scoutfs_key {
#define SCOUTFS_XATTR_NAME_HASH_KEY 3
#define SCOUTFS_XATTR_VAL_HASH_KEY 4
#define SCOUTFS_DIRENT_KEY 5
#define SCOUTFS_LINK_BACKREF_KEY 6
#define SCOUTFS_SYMLINK_KEY 7
#define SCOUTFS_EXTENT_KEY 8
#define SCOUTFS_ORPHAN_KEY 9
#define SCOUTFS_READDIR_KEY 6
#define SCOUTFS_LINK_BACKREF_KEY 7
#define SCOUTFS_SYMLINK_KEY 8
#define SCOUTFS_EXTENT_KEY 9
#define SCOUTFS_ORPHAN_KEY 10
#define SCOUTFS_MAX_ITEM_LEN 512
/* value is struct scoutfs_inode */
struct scoutfs_inode_key {
__u8 type;
__be64 ino;
} __packed;
/* value is struct scoutfs_dirent without the name */
struct scoutfs_dirent_key {
__u8 type;
__be64 ino;
__u8 name[0];
} __packed;
/* value is struct scoutfs_dirent with the name */
struct scoutfs_readdir_key {
__u8 type;
__be64 ino;
__be64 pos;
} __packed;
struct scoutfs_btree_root {
u8 height;
struct scoutfs_block_ref ref;
@@ -272,6 +310,8 @@ struct scoutfs_super_block {
__le64 id;
__u8 uuid[SCOUTFS_UUID_BYTES];
__le64 next_ino;
__le64 alloc_uninit;
__le64 total_segs;
__le64 total_blocks;
__le64 free_blocks;
__le64 ring_blkno;

57
utils/src/item.c Normal file
View File

@@ -0,0 +1,57 @@
#include <unistd.h>
#include <stdlib.h>
#include "sparse.h"
#include "util.h"
#include "format.h"
#include "item.h"
/* utils uses bit contiguous allocations */
static void *off_ptr(struct scoutfs_segment_block *sblk, u32 off)
{
return (char *)sblk + off;
}
static u32 pos_off(struct scoutfs_segment_block *sblk, u32 pos)
{
return offsetof(struct scoutfs_segment_block, items[pos]);
}
static void *pos_ptr(struct scoutfs_segment_block *sblk, u32 pos)
{
return off_ptr(sblk, pos_off(sblk, pos));
}
void load_item(struct scoutfs_segment_block *sblk, u32 pos,
struct native_item *item)
{
struct scoutfs_segment_item *sitem = pos_ptr(sblk, pos);
u32 packed;
item->seq = le64_to_cpu(sitem->seq);
packed = le32_to_cpu(sitem->key_off_len);
item->key_off = packed >> SCOUTFS_SEGMENT_ITEM_OFF_SHIFT;
item->key_len = packed & SCOUTFS_SEGMENT_ITEM_LEN_MASK;
packed = le32_to_cpu(sitem->val_off_len);
item->val_off = packed >> SCOUTFS_SEGMENT_ITEM_OFF_SHIFT;
item->val_len = packed & SCOUTFS_SEGMENT_ITEM_LEN_MASK;
}
void store_item(struct scoutfs_segment_block *sblk, u32 pos,
struct native_item *item)
{
struct scoutfs_segment_item *sitem = pos_ptr(sblk, pos);
u32 packed;
sitem->seq = cpu_to_le64(item->seq);
packed = (item->key_off << SCOUTFS_SEGMENT_ITEM_OFF_SHIFT) |
(item->key_len & SCOUTFS_SEGMENT_ITEM_LEN_MASK);
sitem->key_off_len = cpu_to_le32(packed);
packed = (item->val_off << SCOUTFS_SEGMENT_ITEM_OFF_SHIFT) |
(item->val_len & SCOUTFS_SEGMENT_ITEM_LEN_MASK);
sitem->val_off_len = cpu_to_le32(packed);
}

22
utils/src/item.h Normal file
View File

@@ -0,0 +1,22 @@
#ifndef _ITEM_H_
#define _ITEM_H_
/*
* The persistent item fields that are stored in the segment are packed
* with funny precision. We translate those to and from a much more
* natural native representation of the fields.
*/
struct native_item {
u64 seq;
u32 key_off;
u32 val_off;
u16 key_len;
u16 val_len;
};
void load_item(struct scoutfs_segment_block *sblk, u32 pos,
struct native_item *item);
void store_item(struct scoutfs_segment_block *sblk, u32 pos,
struct native_item *item);
#endif

View File

@@ -19,6 +19,7 @@
#include "dev.h"
#include "bitops.h"
#include "buddy.h"
#include "item.h"
/*
* Update the block's header and write it out.
@@ -88,8 +89,9 @@ static int write_new_fs(char *path, int fd)
struct scoutfs_inode *inode;
struct scoutfs_segment_block *sblk;
struct scoutfs_ring_block *ring;
struct scoutfs_segment_item *item;
struct scoutfs_ring_add_manifest *am;
struct scoutfs_ring_alloc_region *reg;
struct native_item item;
struct timeval tv;
char uuid_str[37];
unsigned int i;
@@ -97,6 +99,8 @@ static int write_new_fs(char *path, int fd)
u64 size;
u64 total_blocks;
u64 ring_blocks;
u64 total_segs;
u64 first_segno;
int ret;
gettimeofday(&tv, NULL);
@@ -127,6 +131,7 @@ static int write_new_fs(char *path, int fd)
}
total_blocks = size / SCOUTFS_BLOCK_SIZE;
total_segs = size / SCOUTFS_SEGMENT_SIZE;
ring_blocks = calc_ring_blocks(size);
/* first initialize the super so we can use it to build structures */
@@ -137,24 +142,30 @@ static int write_new_fs(char *path, int fd)
uuid_generate(super->uuid);
super->next_ino = cpu_to_le64(SCOUTFS_ROOT_INO + 1);
super->total_blocks = cpu_to_le64(total_blocks);
super->total_segs = cpu_to_le64(total_segs);
super->alloc_uninit = cpu_to_le64(SCOUTFS_ALLOC_REGION_BITS);
super->ring_blkno = cpu_to_le64(SCOUTFS_SUPER_BLKNO + 2);
super->ring_blocks = cpu_to_le64(ring_blocks);
super->ring_head_seq = cpu_to_le64(1);
first_segno = DIV_ROUND_UP(le64_to_cpu(super->ring_blkno) +
le64_to_cpu(super->ring_blocks),
SCOUTFS_SEGMENT_BLOCKS);
/* write seg with root inode */
sblk->segno = cpu_to_le64(1);
sblk->segno = cpu_to_le64(first_segno);
sblk->max_seq = cpu_to_le64(1);
sblk->nr_items = cpu_to_le32(1);
item = (void *)(sblk + 1);
ikey = (void *)(item + 1);
ikey = (void *)&sblk->items[1];
inode = (void *)(ikey + 1);
item->seq = cpu_to_le64(1);
item->key_off = cpu_to_le32((long)ikey - (long)sblk);
item->val_off = cpu_to_le32((long)inode - (long)sblk);
item->key_len = cpu_to_le16(sizeof(struct scoutfs_inode_key));
item->val_len = cpu_to_le16(sizeof(struct scoutfs_inode));
item.seq = 1;
item.key_off = (long)ikey - (long)sblk;
item.val_off = (long)inode - (long)sblk;
item.key_len = sizeof(struct scoutfs_inode_key);
item.val_len = sizeof(struct scoutfs_inode);
store_item(sblk, 0, &item);
ikey->type = SCOUTFS_INODE_KEY;
ikey->ino = cpu_to_be64(SCOUTFS_ROOT_INO);
@@ -169,19 +180,18 @@ static int write_new_fs(char *path, int fd)
inode->mtime.nsec = inode->atime.nsec;
ret = pwrite(fd, sblk, SCOUTFS_SEGMENT_SIZE,
1 << SCOUTFS_SEGMENT_SHIFT);
first_segno << SCOUTFS_SEGMENT_SHIFT);
if (ret != SCOUTFS_SEGMENT_SIZE) {
ret = -EIO;
goto out;
}
/* write the ring block with the manifest entry pointing to seg */
ring->nr_entries = cpu_to_le32(1);
/* a single manifest entry points to the single segment */
am = (void *)ring->entries;
am->eh.type = SCOUTFS_RING_ADD_MANIFEST;
am->eh.len = cpu_to_le16(sizeof(struct scoutfs_ring_add_manifest));
am->segno = cpu_to_le64(1);
am->eh.len = cpu_to_le16(sizeof(struct scoutfs_ring_add_manifest) +
(2 * sizeof(struct scoutfs_inode_key)));
am->segno = sblk->segno;
am->seq = cpu_to_le64(1);
am->first_key_len = cpu_to_le16(sizeof(struct scoutfs_inode_key));
am->last_key_len = cpu_to_le16(sizeof(struct scoutfs_inode_key));
@@ -193,6 +203,17 @@ static int write_new_fs(char *path, int fd)
ikey->type = SCOUTFS_INODE_KEY;
ikey->ino = cpu_to_be64(SCOUTFS_ROOT_INO);
/* a single alloc region records the first two segs as allocated */
reg = (void *)am + le16_to_cpu(am->eh.len);
reg->eh.type = SCOUTFS_RING_ADD_ALLOC;
reg->eh.len = cpu_to_le16(sizeof(struct scoutfs_ring_alloc_region));
/* initial super, ring, and first seg are all allocated */
memset(reg->bits, 0xff, sizeof(reg->bits));
for (i = 0; i <= first_segno; i++)
clear_bit_le(i, reg->bits);
/* block is already zeroed and so contains a 0 len terminating header */
ret = write_block(fd, le64_to_cpu(super->ring_blkno), super,
&ring->hdr);
if (ret)

View File

@@ -17,6 +17,7 @@
#include "crc.h"
#include "buddy.h"
#include "bitops.h"
#include "item.h"
/* XXX maybe these go somewhere */
#define SKF "%llu.%u.%llu"
@@ -169,12 +170,23 @@ static print_func_t printers[] = {
[SCOUTFS_INODE_KEY] = print_inode,
};
static void print_item(struct scoutfs_segment_block *sblk,
struct scoutfs_segment_item *item)
static void print_item(struct scoutfs_segment_block *sblk, u32 pos)
{
void *key = (char *)sblk + le32_to_cpu(item->key_off);
void *val = (char *)sblk + le32_to_cpu(item->val_off);
__u8 type = *(__u8 *)key;
struct native_item item;
void *key;
void *val;
__u8 type;
load_item(sblk, pos, &item);
key = (char *)sblk + item.key_off;
val = (char *)sblk + item.val_off;
type = *(__u8 *)key;
printf(" [%u]: seq %llu key_off %u val_off %u key_len %u "
"val_len %u\n",
pos, item.seq, item.key_off, item.val_off, item.key_len,
item.val_len);
if (type < array_size(printers) && printers[type])
printers[type](key, val);
@@ -185,7 +197,6 @@ static void print_item(struct scoutfs_segment_block *sblk,
static int print_segment(int fd, u64 segno)
{
struct scoutfs_segment_block *sblk;
struct scoutfs_segment_item *item;
int i;
sblk = read_segment(fd, segno);
@@ -195,22 +206,8 @@ static int print_segment(int fd, u64 segno)
printf("segment segno %llu\n", segno);
// print_block_header(&sblk->hdr);
item = (void *)(sblk + 1);
for (i = 0; i < le32_to_cpu(sblk->nr_items); i++) {
printf(" [%u]: seq %llu key_off %u val_off %u key_len %u "
"val_len %u\n",
i,
le64_to_cpu(item->seq),
le32_to_cpu(item->key_off),
le32_to_cpu(item->val_off),
le16_to_cpu(item->key_len),
le16_to_cpu(item->val_len));
print_item(sblk, item);
/* XXX item has to skip holes at the end of blocks */
item = (void *)(item + 1);
}
for (i = 0; i < le32_to_cpu(sblk->nr_items); i++)
print_item(sblk, i);
free(sblk);
@@ -238,6 +235,7 @@ static int print_segments(int fd, unsigned long *seg_map, u64 total_segs)
static int print_ring_block(int fd, unsigned long *seg_map, u64 blkno)
{
struct scoutfs_ring_alloc_region *reg;
struct scoutfs_ring_entry_header *eh;
struct scoutfs_ring_add_manifest *am;
struct scoutfs_ring_block *ring;
@@ -252,12 +250,13 @@ static int print_ring_block(int fd, unsigned long *seg_map, u64 blkno)
print_block_header(&ring->hdr);
eh = ring->entries;
for (i = 0; i < le32_to_cpu(ring->nr_entries); i++) {
while (eh->len) {
off = (char *)eh - (char *)ring;
printf(" [%u]: type %u len %u\n",
off, eh->type, le16_to_cpu(eh->len));
switch(eh->type) {
case SCOUTFS_RING_ADD_MANIFEST:
am = (void *)eh;
printf(" add ment: segno %llu seq %llu "
@@ -271,7 +270,18 @@ static int print_ring_block(int fd, unsigned long *seg_map, u64 blkno)
/* XXX verify, 'int nr' limits segno precision */
set_bit_le(le64_to_cpu(am->segno), seg_map);
break;
case SCOUTFS_RING_ADD_ALLOC:
reg = (void *)eh;
printf(" add alloc: index %llu bits",
le64_to_cpu(reg->index));
for (i = 0; i < array_size(reg->bits); i++)
printf(" %016llx", le64_to_cpu(reg->bits[i]));
printf("\n");
break;
}
eh = (void *)eh + le16_to_cpu(eh->len);
}
free(ring);
@@ -330,16 +340,19 @@ static int print_super_blocks(int fd)
print_block_header(&super->hdr);
printf(" id %llx uuid %s\n",
le64_to_cpu(super->id), uuid_str);
/* XXX these are all in a crazy order */
printf(" next_ino %llu total_blocks %llu free_blocks %llu\n"
" ring_blkno %llu ring_blocks %llu ring_head %llu\n"
" ring_tail %llu\n",
" ring_tail %llu alloc_uninit %llu total_segs %llu\n",
le64_to_cpu(super->next_ino),
le64_to_cpu(super->total_blocks),
le64_to_cpu(super->free_blocks),
le64_to_cpu(super->ring_blkno),
le64_to_cpu(super->ring_blocks),
le64_to_cpu(super->ring_head_index),
le64_to_cpu(super->ring_tail_index));
le64_to_cpu(super->ring_tail_index),
le64_to_cpu(super->alloc_uninit),
le64_to_cpu(super->total_segs));
if (le64_to_cpu(super->hdr.seq) > le64_to_cpu(recent.hdr.seq))
memcpy(&recent, super, sizeof(recent));