mirror of
https://github.com/versity/scoutfs.git
synced 2026-01-08 13:01:23 +00:00
Update mkfs and print for lsm writing
Adapt mkfs and print for the format changes made to support writing segments. Signed-off-by: Zach Brown <zab@versity.com>
This commit is contained in:
@@ -58,7 +58,8 @@ struct scoutfs_ring_entry_header {
|
||||
__le16 len;
|
||||
} __packed;
|
||||
|
||||
#define SCOUTFS_RING_ADD_MANIFEST 1
|
||||
#define SCOUTFS_RING_ADD_MANIFEST 1
|
||||
#define SCOUTFS_RING_ADD_ALLOC 2
|
||||
|
||||
struct scoutfs_ring_add_manifest {
|
||||
struct scoutfs_ring_entry_header eh;
|
||||
@@ -70,26 +71,55 @@ struct scoutfs_ring_add_manifest {
|
||||
/* first and last key bytes */
|
||||
} __packed;
|
||||
|
||||
#define SCOUTFS_ALLOC_REGION_SHIFT 8
|
||||
#define SCOUTFS_ALLOC_REGION_BITS (1 << SCOUTFS_ALLOC_REGION_SHIFT)
|
||||
#define SCOUTFS_ALLOC_REGION_MASK (SCOUTFS_ALLOC_REGION_BITS - 1)
|
||||
|
||||
/*
|
||||
* The bits need to be aligned so that the host can use native long
|
||||
* bitops on the bits in memory.
|
||||
*/
|
||||
struct scoutfs_ring_alloc_region {
|
||||
struct scoutfs_ring_entry_header eh;
|
||||
__le64 index;
|
||||
__u8 pad[5];
|
||||
__le64 bits[SCOUTFS_ALLOC_REGION_BITS / 64];
|
||||
} __packed;
|
||||
|
||||
/*
|
||||
* This is absurdly huge. If there was only ever 1 item per segment and
|
||||
* 2^64 items the tree could get this deep.
|
||||
*/
|
||||
#define SCOUTFS_MANIFEST_MAX_LEVEL 20
|
||||
|
||||
/*
|
||||
* The packed entries in the block are terminated by a header with a 0 length.
|
||||
*/
|
||||
struct scoutfs_ring_block {
|
||||
struct scoutfs_block_header hdr;
|
||||
__le32 nr_entries;
|
||||
struct scoutfs_ring_entry_header entries[0];
|
||||
} __packed;
|
||||
|
||||
/*
|
||||
* We really want these to be a power of two size so that they're naturally
|
||||
* aligned. This ensures that they won't cross page boundaries and we
|
||||
* can use pointers to them in the page vecs that make up segments without
|
||||
* funny business.
|
||||
*
|
||||
* We limit segment sizes to 8 megs (23 bits) and value lengths to 512 bytes
|
||||
* (9 bits). The item offsets and lengths then take up 64 bits.
|
||||
*
|
||||
* We then operate on the items in on-stack nice native structs.
|
||||
*/
|
||||
struct scoutfs_segment_item {
|
||||
__le64 seq;
|
||||
__le32 key_off;
|
||||
__le32 val_off;
|
||||
__le16 key_len;
|
||||
__le16 val_len;
|
||||
__le32 key_off_len;
|
||||
__le32 val_off_len;
|
||||
} __packed;
|
||||
|
||||
#define SCOUTFS_SEGMENT_ITEM_OFF_SHIFT 9
|
||||
#define SCOUTFS_SEGMENT_ITEM_LEN_MASK ((1 << SCOUTFS_SEGMENT_ITEM_OFF_SHIFT)-1)
|
||||
|
||||
/*
|
||||
* Each large segment starts with a segment block that describes the
|
||||
* rest of the blocks that make up the segment.
|
||||
@@ -100,20 +130,12 @@ struct scoutfs_segment_block {
|
||||
__le64 segno;
|
||||
__le64 max_seq;
|
||||
__le32 nr_items;
|
||||
/* item array with gaps so they don't cross 4k blocks */
|
||||
__le32 _moar_pads;
|
||||
struct scoutfs_segment_item items[0];
|
||||
/* packed keys */
|
||||
/* packed vals */
|
||||
} __packed;
|
||||
|
||||
/* the first block in the segment has the header and items */
|
||||
#define SCOUTFS_SEGMENT_FIRST_BLOCK_ITEMS \
|
||||
((SCOUTFS_BLOCK_SIZE - sizeof(struct scoutfs_segment_block)) / \
|
||||
sizeof(struct scoutfs_segment_item))
|
||||
|
||||
/* the rest of the header blocks are full of items */
|
||||
#define SCOUTFS_SEGMENT_ITEMS_PER_BLOCK \
|
||||
(SCOUTFS_BLOCK_SIZE / sizeof(struct scoutfs_segment_item))
|
||||
|
||||
/*
|
||||
* Block references include the sequence number so that we can detect
|
||||
* readers racing with writers and so that we can tell that we don't
|
||||
@@ -188,18 +210,34 @@ struct scoutfs_key {
|
||||
#define SCOUTFS_XATTR_NAME_HASH_KEY 3
|
||||
#define SCOUTFS_XATTR_VAL_HASH_KEY 4
|
||||
#define SCOUTFS_DIRENT_KEY 5
|
||||
#define SCOUTFS_LINK_BACKREF_KEY 6
|
||||
#define SCOUTFS_SYMLINK_KEY 7
|
||||
#define SCOUTFS_EXTENT_KEY 8
|
||||
#define SCOUTFS_ORPHAN_KEY 9
|
||||
#define SCOUTFS_READDIR_KEY 6
|
||||
#define SCOUTFS_LINK_BACKREF_KEY 7
|
||||
#define SCOUTFS_SYMLINK_KEY 8
|
||||
#define SCOUTFS_EXTENT_KEY 9
|
||||
#define SCOUTFS_ORPHAN_KEY 10
|
||||
|
||||
#define SCOUTFS_MAX_ITEM_LEN 512
|
||||
|
||||
/* value is struct scoutfs_inode */
|
||||
struct scoutfs_inode_key {
|
||||
__u8 type;
|
||||
__be64 ino;
|
||||
} __packed;
|
||||
|
||||
/* value is struct scoutfs_dirent without the name */
|
||||
struct scoutfs_dirent_key {
|
||||
__u8 type;
|
||||
__be64 ino;
|
||||
__u8 name[0];
|
||||
} __packed;
|
||||
|
||||
/* value is struct scoutfs_dirent with the name */
|
||||
struct scoutfs_readdir_key {
|
||||
__u8 type;
|
||||
__be64 ino;
|
||||
__be64 pos;
|
||||
} __packed;
|
||||
|
||||
struct scoutfs_btree_root {
|
||||
u8 height;
|
||||
struct scoutfs_block_ref ref;
|
||||
@@ -272,6 +310,8 @@ struct scoutfs_super_block {
|
||||
__le64 id;
|
||||
__u8 uuid[SCOUTFS_UUID_BYTES];
|
||||
__le64 next_ino;
|
||||
__le64 alloc_uninit;
|
||||
__le64 total_segs;
|
||||
__le64 total_blocks;
|
||||
__le64 free_blocks;
|
||||
__le64 ring_blkno;
|
||||
|
||||
57
utils/src/item.c
Normal file
57
utils/src/item.c
Normal file
@@ -0,0 +1,57 @@
|
||||
#include <unistd.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "sparse.h"
|
||||
#include "util.h"
|
||||
#include "format.h"
|
||||
#include "item.h"
|
||||
|
||||
/* utils uses bit contiguous allocations */
|
||||
static void *off_ptr(struct scoutfs_segment_block *sblk, u32 off)
|
||||
{
|
||||
return (char *)sblk + off;
|
||||
}
|
||||
|
||||
static u32 pos_off(struct scoutfs_segment_block *sblk, u32 pos)
|
||||
{
|
||||
return offsetof(struct scoutfs_segment_block, items[pos]);
|
||||
}
|
||||
|
||||
static void *pos_ptr(struct scoutfs_segment_block *sblk, u32 pos)
|
||||
{
|
||||
return off_ptr(sblk, pos_off(sblk, pos));
|
||||
}
|
||||
|
||||
void load_item(struct scoutfs_segment_block *sblk, u32 pos,
|
||||
struct native_item *item)
|
||||
{
|
||||
struct scoutfs_segment_item *sitem = pos_ptr(sblk, pos);
|
||||
u32 packed;
|
||||
|
||||
item->seq = le64_to_cpu(sitem->seq);
|
||||
|
||||
packed = le32_to_cpu(sitem->key_off_len);
|
||||
item->key_off = packed >> SCOUTFS_SEGMENT_ITEM_OFF_SHIFT;
|
||||
item->key_len = packed & SCOUTFS_SEGMENT_ITEM_LEN_MASK;
|
||||
|
||||
packed = le32_to_cpu(sitem->val_off_len);
|
||||
item->val_off = packed >> SCOUTFS_SEGMENT_ITEM_OFF_SHIFT;
|
||||
item->val_len = packed & SCOUTFS_SEGMENT_ITEM_LEN_MASK;
|
||||
}
|
||||
|
||||
void store_item(struct scoutfs_segment_block *sblk, u32 pos,
|
||||
struct native_item *item)
|
||||
{
|
||||
struct scoutfs_segment_item *sitem = pos_ptr(sblk, pos);
|
||||
u32 packed;
|
||||
|
||||
sitem->seq = cpu_to_le64(item->seq);
|
||||
|
||||
packed = (item->key_off << SCOUTFS_SEGMENT_ITEM_OFF_SHIFT) |
|
||||
(item->key_len & SCOUTFS_SEGMENT_ITEM_LEN_MASK);
|
||||
sitem->key_off_len = cpu_to_le32(packed);
|
||||
|
||||
packed = (item->val_off << SCOUTFS_SEGMENT_ITEM_OFF_SHIFT) |
|
||||
(item->val_len & SCOUTFS_SEGMENT_ITEM_LEN_MASK);
|
||||
sitem->val_off_len = cpu_to_le32(packed);
|
||||
}
|
||||
22
utils/src/item.h
Normal file
22
utils/src/item.h
Normal file
@@ -0,0 +1,22 @@
|
||||
#ifndef _ITEM_H_
|
||||
#define _ITEM_H_
|
||||
|
||||
/*
|
||||
* The persistent item fields that are stored in the segment are packed
|
||||
* with funny precision. We translate those to and from a much more
|
||||
* natural native representation of the fields.
|
||||
*/
|
||||
struct native_item {
|
||||
u64 seq;
|
||||
u32 key_off;
|
||||
u32 val_off;
|
||||
u16 key_len;
|
||||
u16 val_len;
|
||||
};
|
||||
|
||||
void load_item(struct scoutfs_segment_block *sblk, u32 pos,
|
||||
struct native_item *item);
|
||||
void store_item(struct scoutfs_segment_block *sblk, u32 pos,
|
||||
struct native_item *item);
|
||||
|
||||
#endif
|
||||
@@ -19,6 +19,7 @@
|
||||
#include "dev.h"
|
||||
#include "bitops.h"
|
||||
#include "buddy.h"
|
||||
#include "item.h"
|
||||
|
||||
/*
|
||||
* Update the block's header and write it out.
|
||||
@@ -88,8 +89,9 @@ static int write_new_fs(char *path, int fd)
|
||||
struct scoutfs_inode *inode;
|
||||
struct scoutfs_segment_block *sblk;
|
||||
struct scoutfs_ring_block *ring;
|
||||
struct scoutfs_segment_item *item;
|
||||
struct scoutfs_ring_add_manifest *am;
|
||||
struct scoutfs_ring_alloc_region *reg;
|
||||
struct native_item item;
|
||||
struct timeval tv;
|
||||
char uuid_str[37];
|
||||
unsigned int i;
|
||||
@@ -97,6 +99,8 @@ static int write_new_fs(char *path, int fd)
|
||||
u64 size;
|
||||
u64 total_blocks;
|
||||
u64 ring_blocks;
|
||||
u64 total_segs;
|
||||
u64 first_segno;
|
||||
int ret;
|
||||
|
||||
gettimeofday(&tv, NULL);
|
||||
@@ -127,6 +131,7 @@ static int write_new_fs(char *path, int fd)
|
||||
}
|
||||
|
||||
total_blocks = size / SCOUTFS_BLOCK_SIZE;
|
||||
total_segs = size / SCOUTFS_SEGMENT_SIZE;
|
||||
ring_blocks = calc_ring_blocks(size);
|
||||
|
||||
/* first initialize the super so we can use it to build structures */
|
||||
@@ -137,24 +142,30 @@ static int write_new_fs(char *path, int fd)
|
||||
uuid_generate(super->uuid);
|
||||
super->next_ino = cpu_to_le64(SCOUTFS_ROOT_INO + 1);
|
||||
super->total_blocks = cpu_to_le64(total_blocks);
|
||||
super->total_segs = cpu_to_le64(total_segs);
|
||||
super->alloc_uninit = cpu_to_le64(SCOUTFS_ALLOC_REGION_BITS);
|
||||
super->ring_blkno = cpu_to_le64(SCOUTFS_SUPER_BLKNO + 2);
|
||||
super->ring_blocks = cpu_to_le64(ring_blocks);
|
||||
super->ring_head_seq = cpu_to_le64(1);
|
||||
|
||||
first_segno = DIV_ROUND_UP(le64_to_cpu(super->ring_blkno) +
|
||||
le64_to_cpu(super->ring_blocks),
|
||||
SCOUTFS_SEGMENT_BLOCKS);
|
||||
|
||||
/* write seg with root inode */
|
||||
sblk->segno = cpu_to_le64(1);
|
||||
sblk->segno = cpu_to_le64(first_segno);
|
||||
sblk->max_seq = cpu_to_le64(1);
|
||||
sblk->nr_items = cpu_to_le32(1);
|
||||
|
||||
item = (void *)(sblk + 1);
|
||||
ikey = (void *)(item + 1);
|
||||
ikey = (void *)&sblk->items[1];
|
||||
inode = (void *)(ikey + 1);
|
||||
|
||||
item->seq = cpu_to_le64(1);
|
||||
item->key_off = cpu_to_le32((long)ikey - (long)sblk);
|
||||
item->val_off = cpu_to_le32((long)inode - (long)sblk);
|
||||
item->key_len = cpu_to_le16(sizeof(struct scoutfs_inode_key));
|
||||
item->val_len = cpu_to_le16(sizeof(struct scoutfs_inode));
|
||||
item.seq = 1;
|
||||
item.key_off = (long)ikey - (long)sblk;
|
||||
item.val_off = (long)inode - (long)sblk;
|
||||
item.key_len = sizeof(struct scoutfs_inode_key);
|
||||
item.val_len = sizeof(struct scoutfs_inode);
|
||||
store_item(sblk, 0, &item);
|
||||
|
||||
ikey->type = SCOUTFS_INODE_KEY;
|
||||
ikey->ino = cpu_to_be64(SCOUTFS_ROOT_INO);
|
||||
@@ -169,19 +180,18 @@ static int write_new_fs(char *path, int fd)
|
||||
inode->mtime.nsec = inode->atime.nsec;
|
||||
|
||||
ret = pwrite(fd, sblk, SCOUTFS_SEGMENT_SIZE,
|
||||
1 << SCOUTFS_SEGMENT_SHIFT);
|
||||
first_segno << SCOUTFS_SEGMENT_SHIFT);
|
||||
if (ret != SCOUTFS_SEGMENT_SIZE) {
|
||||
ret = -EIO;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* write the ring block with the manifest entry pointing to seg */
|
||||
ring->nr_entries = cpu_to_le32(1);
|
||||
|
||||
/* a single manifest entry points to the single segment */
|
||||
am = (void *)ring->entries;
|
||||
am->eh.type = SCOUTFS_RING_ADD_MANIFEST;
|
||||
am->eh.len = cpu_to_le16(sizeof(struct scoutfs_ring_add_manifest));
|
||||
am->segno = cpu_to_le64(1);
|
||||
am->eh.len = cpu_to_le16(sizeof(struct scoutfs_ring_add_manifest) +
|
||||
(2 * sizeof(struct scoutfs_inode_key)));
|
||||
am->segno = sblk->segno;
|
||||
am->seq = cpu_to_le64(1);
|
||||
am->first_key_len = cpu_to_le16(sizeof(struct scoutfs_inode_key));
|
||||
am->last_key_len = cpu_to_le16(sizeof(struct scoutfs_inode_key));
|
||||
@@ -193,6 +203,17 @@ static int write_new_fs(char *path, int fd)
|
||||
ikey->type = SCOUTFS_INODE_KEY;
|
||||
ikey->ino = cpu_to_be64(SCOUTFS_ROOT_INO);
|
||||
|
||||
/* a single alloc region records the first two segs as allocated */
|
||||
reg = (void *)am + le16_to_cpu(am->eh.len);
|
||||
reg->eh.type = SCOUTFS_RING_ADD_ALLOC;
|
||||
reg->eh.len = cpu_to_le16(sizeof(struct scoutfs_ring_alloc_region));
|
||||
/* initial super, ring, and first seg are all allocated */
|
||||
memset(reg->bits, 0xff, sizeof(reg->bits));
|
||||
for (i = 0; i <= first_segno; i++)
|
||||
clear_bit_le(i, reg->bits);
|
||||
|
||||
/* block is already zeroed and so contains a 0 len terminating header */
|
||||
|
||||
ret = write_block(fd, le64_to_cpu(super->ring_blkno), super,
|
||||
&ring->hdr);
|
||||
if (ret)
|
||||
|
||||
@@ -17,6 +17,7 @@
|
||||
#include "crc.h"
|
||||
#include "buddy.h"
|
||||
#include "bitops.h"
|
||||
#include "item.h"
|
||||
|
||||
/* XXX maybe these go somewhere */
|
||||
#define SKF "%llu.%u.%llu"
|
||||
@@ -169,12 +170,23 @@ static print_func_t printers[] = {
|
||||
[SCOUTFS_INODE_KEY] = print_inode,
|
||||
};
|
||||
|
||||
static void print_item(struct scoutfs_segment_block *sblk,
|
||||
struct scoutfs_segment_item *item)
|
||||
static void print_item(struct scoutfs_segment_block *sblk, u32 pos)
|
||||
{
|
||||
void *key = (char *)sblk + le32_to_cpu(item->key_off);
|
||||
void *val = (char *)sblk + le32_to_cpu(item->val_off);
|
||||
__u8 type = *(__u8 *)key;
|
||||
struct native_item item;
|
||||
void *key;
|
||||
void *val;
|
||||
__u8 type;
|
||||
|
||||
load_item(sblk, pos, &item);
|
||||
|
||||
key = (char *)sblk + item.key_off;
|
||||
val = (char *)sblk + item.val_off;
|
||||
type = *(__u8 *)key;
|
||||
|
||||
printf(" [%u]: seq %llu key_off %u val_off %u key_len %u "
|
||||
"val_len %u\n",
|
||||
pos, item.seq, item.key_off, item.val_off, item.key_len,
|
||||
item.val_len);
|
||||
|
||||
if (type < array_size(printers) && printers[type])
|
||||
printers[type](key, val);
|
||||
@@ -185,7 +197,6 @@ static void print_item(struct scoutfs_segment_block *sblk,
|
||||
static int print_segment(int fd, u64 segno)
|
||||
{
|
||||
struct scoutfs_segment_block *sblk;
|
||||
struct scoutfs_segment_item *item;
|
||||
int i;
|
||||
|
||||
sblk = read_segment(fd, segno);
|
||||
@@ -195,22 +206,8 @@ static int print_segment(int fd, u64 segno)
|
||||
printf("segment segno %llu\n", segno);
|
||||
// print_block_header(&sblk->hdr);
|
||||
|
||||
item = (void *)(sblk + 1);
|
||||
for (i = 0; i < le32_to_cpu(sblk->nr_items); i++) {
|
||||
printf(" [%u]: seq %llu key_off %u val_off %u key_len %u "
|
||||
"val_len %u\n",
|
||||
i,
|
||||
le64_to_cpu(item->seq),
|
||||
le32_to_cpu(item->key_off),
|
||||
le32_to_cpu(item->val_off),
|
||||
le16_to_cpu(item->key_len),
|
||||
le16_to_cpu(item->val_len));
|
||||
|
||||
print_item(sblk, item);
|
||||
|
||||
/* XXX item has to skip holes at the end of blocks */
|
||||
item = (void *)(item + 1);
|
||||
}
|
||||
for (i = 0; i < le32_to_cpu(sblk->nr_items); i++)
|
||||
print_item(sblk, i);
|
||||
|
||||
free(sblk);
|
||||
|
||||
@@ -238,6 +235,7 @@ static int print_segments(int fd, unsigned long *seg_map, u64 total_segs)
|
||||
|
||||
static int print_ring_block(int fd, unsigned long *seg_map, u64 blkno)
|
||||
{
|
||||
struct scoutfs_ring_alloc_region *reg;
|
||||
struct scoutfs_ring_entry_header *eh;
|
||||
struct scoutfs_ring_add_manifest *am;
|
||||
struct scoutfs_ring_block *ring;
|
||||
@@ -252,12 +250,13 @@ static int print_ring_block(int fd, unsigned long *seg_map, u64 blkno)
|
||||
print_block_header(&ring->hdr);
|
||||
|
||||
eh = ring->entries;
|
||||
for (i = 0; i < le32_to_cpu(ring->nr_entries); i++) {
|
||||
while (eh->len) {
|
||||
off = (char *)eh - (char *)ring;
|
||||
printf(" [%u]: type %u len %u\n",
|
||||
off, eh->type, le16_to_cpu(eh->len));
|
||||
|
||||
switch(eh->type) {
|
||||
|
||||
case SCOUTFS_RING_ADD_MANIFEST:
|
||||
am = (void *)eh;
|
||||
printf(" add ment: segno %llu seq %llu "
|
||||
@@ -271,7 +270,18 @@ static int print_ring_block(int fd, unsigned long *seg_map, u64 blkno)
|
||||
/* XXX verify, 'int nr' limits segno precision */
|
||||
set_bit_le(le64_to_cpu(am->segno), seg_map);
|
||||
break;
|
||||
|
||||
case SCOUTFS_RING_ADD_ALLOC:
|
||||
reg = (void *)eh;
|
||||
printf(" add alloc: index %llu bits",
|
||||
le64_to_cpu(reg->index));
|
||||
for (i = 0; i < array_size(reg->bits); i++)
|
||||
printf(" %016llx", le64_to_cpu(reg->bits[i]));
|
||||
printf("\n");
|
||||
break;
|
||||
}
|
||||
|
||||
eh = (void *)eh + le16_to_cpu(eh->len);
|
||||
}
|
||||
|
||||
free(ring);
|
||||
@@ -330,16 +340,19 @@ static int print_super_blocks(int fd)
|
||||
print_block_header(&super->hdr);
|
||||
printf(" id %llx uuid %s\n",
|
||||
le64_to_cpu(super->id), uuid_str);
|
||||
/* XXX these are all in a crazy order */
|
||||
printf(" next_ino %llu total_blocks %llu free_blocks %llu\n"
|
||||
" ring_blkno %llu ring_blocks %llu ring_head %llu\n"
|
||||
" ring_tail %llu\n",
|
||||
" ring_tail %llu alloc_uninit %llu total_segs %llu\n",
|
||||
le64_to_cpu(super->next_ino),
|
||||
le64_to_cpu(super->total_blocks),
|
||||
le64_to_cpu(super->free_blocks),
|
||||
le64_to_cpu(super->ring_blkno),
|
||||
le64_to_cpu(super->ring_blocks),
|
||||
le64_to_cpu(super->ring_head_index),
|
||||
le64_to_cpu(super->ring_tail_index));
|
||||
le64_to_cpu(super->ring_tail_index),
|
||||
le64_to_cpu(super->alloc_uninit),
|
||||
le64_to_cpu(super->total_segs));
|
||||
|
||||
if (le64_to_cpu(super->hdr.seq) > le64_to_cpu(recent.hdr.seq))
|
||||
memcpy(&recent, super, sizeof(recent));
|
||||
|
||||
Reference in New Issue
Block a user