mirror of
https://github.com/versity/scoutfs.git
synced 2026-01-09 13:23:14 +00:00
scoutfs-utils: support btree avl and hash
Update the internal structure of btree blocks to use the avl item index and hash table direct item lookup. Signed-off-by: Zach Brown <zab@versity.com>
This commit is contained in:
40
utils/src/avl.c
Normal file
40
utils/src/avl.c
Normal file
@@ -0,0 +1,40 @@
|
||||
#include "sparse.h"
|
||||
#include "util.h"
|
||||
#include "format.h"
|
||||
#include "avl.h"
|
||||
|
||||
static struct scoutfs_avl_node *node_ptr(struct scoutfs_avl_root *root,
|
||||
|
||||
__le16 off)
|
||||
{
|
||||
return off ? (void *)root + le16_to_cpu(off) : NULL;
|
||||
}
|
||||
|
||||
struct scoutfs_avl_node *avl_first(struct scoutfs_avl_root *root)
|
||||
{
|
||||
struct scoutfs_avl_node *node = node_ptr(root, root->node);
|
||||
|
||||
while (node && node->left)
|
||||
node = node_ptr(root, node->left);
|
||||
|
||||
return node;
|
||||
}
|
||||
|
||||
struct scoutfs_avl_node *avl_next(struct scoutfs_avl_root *root,
|
||||
struct scoutfs_avl_node *node)
|
||||
{
|
||||
struct scoutfs_avl_node *parent;
|
||||
|
||||
if (node->right) {
|
||||
node = node_ptr(root, node->right);
|
||||
while (node->left)
|
||||
node = node_ptr(root, node->left);
|
||||
return node;
|
||||
}
|
||||
|
||||
while ((parent = node_ptr(root, node->parent)) &&
|
||||
node == node_ptr(root, parent->right))
|
||||
node = parent;
|
||||
|
||||
return parent;
|
||||
}
|
||||
8
utils/src/avl.h
Normal file
8
utils/src/avl.h
Normal file
@@ -0,0 +1,8 @@
|
||||
#ifndef _AVL_H_
|
||||
#define _AVL_H_
|
||||
|
||||
struct scoutfs_avl_node *avl_first(struct scoutfs_avl_root *root);
|
||||
struct scoutfs_avl_node *avl_next(struct scoutfs_avl_root *root,
|
||||
struct scoutfs_avl_node *node);
|
||||
|
||||
#endif
|
||||
@@ -184,26 +184,22 @@ struct scoutfs_radix_root {
|
||||
~(__u64)SCOUTFS_RADIX_LG_MASK)
|
||||
#define SCOUTFS_RADIX_BITS_BYTES (SCOUTFS_RADIX_BITS / 8)
|
||||
|
||||
struct scoutfs_avl_root {
|
||||
__le16 node;
|
||||
} __packed;
|
||||
|
||||
struct scoutfs_avl_node {
|
||||
__le16 parent;
|
||||
__le16 left;
|
||||
__le16 right;
|
||||
__u8 height;
|
||||
} __packed;
|
||||
|
||||
/* when we split we want to have multiple items on each side */
|
||||
#define SCOUTFS_BTREE_MAX_VAL_LEN (SCOUTFS_BLOCK_SIZE / 8)
|
||||
#define SCOUTFS_BTREE_MAX_VAL_LEN 512
|
||||
|
||||
/*
|
||||
* The min number of free bytes we must leave in a parent as we descend
|
||||
* to modify. This guarantees enough free bytes in a parent to insert a
|
||||
* new child reference item as a child block splits.
|
||||
*/
|
||||
#define SCOUTFS_BTREE_PARENT_MIN_FREE_BYTES \
|
||||
(sizeof(struct scoutfs_btree_item_header) + \
|
||||
sizeof(struct scoutfs_btree_item) + \
|
||||
sizeof(struct scoutfs_btree_ref))
|
||||
|
||||
/*
|
||||
* When debugging we can tune the splitting and merging thresholds to
|
||||
* create much larger trees by having blocks with many fewer items. We
|
||||
* implement this by pretending the blocks are tiny. They're still
|
||||
* large enough for a handful of items.
|
||||
*/
|
||||
#define SCOUTFS_BTREE_TINY_BLOCK_SIZE 512
|
||||
/* each value ends with an offset which lets compaction iterate over values */
|
||||
#define SCOUTFS_BTREE_VAL_OWNER_BYTES sizeof(__le16)
|
||||
|
||||
/*
|
||||
* A 4EB test image measured a worst case height of 17. This is plenty
|
||||
@@ -225,24 +221,37 @@ struct scoutfs_btree_root {
|
||||
__u8 height;
|
||||
} __packed;
|
||||
|
||||
struct scoutfs_btree_item_header {
|
||||
__le32 off;
|
||||
} __packed;
|
||||
|
||||
struct scoutfs_btree_item {
|
||||
struct scoutfs_avl_node node;
|
||||
struct scoutfs_key key;
|
||||
__le16 val_off;
|
||||
__le16 val_len;
|
||||
__u8 val[0];
|
||||
} __packed;
|
||||
|
||||
struct scoutfs_btree_block {
|
||||
struct scoutfs_block_header hdr;
|
||||
__le32 free_end;
|
||||
__le32 nr_items;
|
||||
struct scoutfs_avl_root item_root;
|
||||
__le16 nr_items;
|
||||
__le16 total_item_bytes;
|
||||
__le16 mid_free_len;
|
||||
__le16 last_free_off;
|
||||
__le16 last_free_len;
|
||||
__u8 level;
|
||||
struct scoutfs_btree_item_header item_hdrs[0];
|
||||
struct scoutfs_btree_item items[0];
|
||||
/* leaf blocks have a fixed size item offset hash table at the end */
|
||||
} __packed;
|
||||
|
||||
/*
|
||||
* Try to aim for a 75% load in a leaf full of items with no value.
|
||||
* We'll almost never see this because most items have values and most
|
||||
* blocks aren't full.
|
||||
*/
|
||||
#define SCOUTFS_BTREE_LEAF_ITEM_HASH_NR \
|
||||
((SCOUTFS_BLOCK_SIZE - sizeof(struct scoutfs_btree_block)) / \
|
||||
(sizeof(struct scoutfs_btree_item) + (sizeof(__le16))) * 100 / 75)
|
||||
#define SCOUTFS_BTREE_LEAF_ITEM_HASH_BYTES \
|
||||
(SCOUTFS_BTREE_LEAF_ITEM_HASH_NR * sizeof(__le16))
|
||||
|
||||
struct scoutfs_mounted_client_btree_val {
|
||||
__u8 flags;
|
||||
} __packed;
|
||||
|
||||
39
utils/src/leaf_item_hash.c
Normal file
39
utils/src/leaf_item_hash.c
Normal file
@@ -0,0 +1,39 @@
|
||||
#include "sparse.h"
|
||||
#include "util.h"
|
||||
#include "format.h"
|
||||
#include "crc.h"
|
||||
#include "leaf_item_hash.h"
|
||||
|
||||
/*
|
||||
* A minimal extraction of the leaf item hash from the kernel's btree.
|
||||
*/
|
||||
|
||||
int leaf_item_hash_ind(struct scoutfs_key *key)
|
||||
{
|
||||
return crc32c(~0, key, sizeof(struct scoutfs_key)) %
|
||||
SCOUTFS_BTREE_LEAF_ITEM_HASH_NR;
|
||||
}
|
||||
|
||||
__le16 *leaf_item_hash_buckets(struct scoutfs_btree_block *bt)
|
||||
{
|
||||
return (void *)bt + SCOUTFS_BLOCK_SIZE -
|
||||
SCOUTFS_BTREE_LEAF_ITEM_HASH_BYTES;
|
||||
}
|
||||
|
||||
void leaf_item_hash_insert(struct scoutfs_btree_block *bt,
|
||||
struct scoutfs_key *key, __le16 off)
|
||||
{
|
||||
__le16 *buckets = leaf_item_hash_buckets(bt);
|
||||
int i;
|
||||
|
||||
if (bt->level > 0)
|
||||
return;
|
||||
|
||||
for (i = leaf_item_hash_ind(key);
|
||||
i < SCOUTFS_BTREE_LEAF_ITEM_HASH_NR; i++) {
|
||||
if (buckets[i] == 0) {
|
||||
buckets[i] = off;
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
9
utils/src/leaf_item_hash.h
Normal file
9
utils/src/leaf_item_hash.h
Normal file
@@ -0,0 +1,9 @@
|
||||
#ifndef _LEAF_ITEM_HASH_H_
|
||||
#define _LEAF_ITEM_HASH_H_
|
||||
|
||||
int leaf_item_hash_ind(struct scoutfs_key *key);
|
||||
__le16 *leaf_item_hash_buckets(struct scoutfs_btree_block *bt);
|
||||
void leaf_item_hash_insert(struct scoutfs_btree_block *bt,
|
||||
struct scoutfs_key *key, __le16 off);
|
||||
|
||||
#endif
|
||||
@@ -27,6 +27,7 @@
|
||||
#include "key.h"
|
||||
#include "bitops.h"
|
||||
#include "radix.h"
|
||||
#include "leaf_item_hash.h"
|
||||
|
||||
static int write_raw_block(int fd, u64 blkno, void *blk)
|
||||
{
|
||||
@@ -290,9 +291,11 @@ static int write_new_fs(char *path, int fd, u8 quorum_count)
|
||||
struct scoutfs_inode *inode;
|
||||
struct scoutfs_btree_block *bt;
|
||||
struct scoutfs_btree_item *btitem;
|
||||
struct scoutfs_avl_node *par;
|
||||
struct scoutfs_key *key;
|
||||
struct timeval tv;
|
||||
char uuid_str[37];
|
||||
__le16 *own;
|
||||
void *zeros;
|
||||
u64 blkno;
|
||||
u64 limit;
|
||||
@@ -370,28 +373,43 @@ static int write_new_fs(char *path, int fd, u8 quorum_count)
|
||||
bt->hdr.fsid = super->hdr.fsid;
|
||||
bt->hdr.blkno = cpu_to_le64(blkno);
|
||||
bt->hdr.seq = cpu_to_le64(1);
|
||||
bt->nr_items = cpu_to_le32(2);
|
||||
|
||||
/* btree item allocated from the back of the block */
|
||||
key = (void *)bt + SCOUTFS_BLOCK_SIZE - sizeof(*key);
|
||||
btitem = (void *)key - sizeof(*btitem);
|
||||
/* meta seq index for the root inode */
|
||||
btitem = &bt->items[le16_to_cpu(bt->nr_items)];
|
||||
le16_add_cpu(&bt->nr_items, 1);
|
||||
key = &btitem->key;
|
||||
|
||||
bt->item_hdrs[0].off = cpu_to_le32((long)btitem - (long)bt);
|
||||
bt->item_root.node = cpu_to_le16((void *)&btitem->node -
|
||||
(void *)&bt->item_root);
|
||||
btitem->node.height = 2;
|
||||
btitem->val_len = cpu_to_le16(0);
|
||||
|
||||
memset(key, 0, sizeof(*key));
|
||||
key->sk_zone = SCOUTFS_INODE_INDEX_ZONE;
|
||||
key->sk_type = SCOUTFS_INODE_INDEX_META_SEQ_TYPE;
|
||||
key->skii_ino = cpu_to_le64(SCOUTFS_ROOT_INO);
|
||||
|
||||
inode = (void *)btitem - sizeof(*inode);
|
||||
key = (void *)inode - sizeof(*key);
|
||||
btitem = (void *)key - sizeof(*btitem);
|
||||
leaf_item_hash_insert(bt, &btitem->key,
|
||||
cpu_to_le16((void *)btitem - (void *)bt));
|
||||
|
||||
bt->item_hdrs[1].off = cpu_to_le32((long)btitem - (long)bt);
|
||||
/* root inode */
|
||||
par = &btitem->node;
|
||||
btitem = &bt->items[le16_to_cpu(bt->nr_items)];
|
||||
le16_add_cpu(&bt->nr_items, 1);
|
||||
key = &btitem->key;
|
||||
own = (void *)bt + SCOUTFS_BLOCK_SIZE -
|
||||
SCOUTFS_BTREE_LEAF_ITEM_HASH_BYTES -
|
||||
SCOUTFS_BTREE_VAL_OWNER_BYTES;
|
||||
inode = (void *)own - sizeof(*inode);
|
||||
|
||||
par->right = cpu_to_le16((void *)&btitem->node -
|
||||
(void *)&bt->item_root);
|
||||
btitem->node.height = 1;
|
||||
btitem->node.parent = cpu_to_le16((void *)par - (void *)&bt->item_root);
|
||||
btitem->val_off = cpu_to_le16((void *)inode - (void *)bt);
|
||||
btitem->val_len = cpu_to_le16(sizeof(*inode));
|
||||
le16_add_cpu(&bt->total_item_bytes, le16_to_cpu(btitem->val_len) +
|
||||
SCOUTFS_BTREE_VAL_OWNER_BYTES);
|
||||
|
||||
memset(key, 0, sizeof(*key));
|
||||
key->sk_zone = SCOUTFS_FS_ZONE;
|
||||
key->ski_ino = cpu_to_le64(SCOUTFS_ROOT_INO);
|
||||
key->sk_type = SCOUTFS_INODE_TYPE;
|
||||
@@ -406,8 +424,14 @@ static int write_new_fs(char *path, int fd, u8 quorum_count)
|
||||
inode->mtime.sec = inode->atime.sec;
|
||||
inode->mtime.nsec = inode->atime.nsec;
|
||||
|
||||
bt->free_end = bt->item_hdrs[le32_to_cpu(bt->nr_items) - 1].off;
|
||||
leaf_item_hash_insert(bt, &btitem->key,
|
||||
cpu_to_le16((void *)btitem - (void *)bt));
|
||||
*own = cpu_to_le16((void *)btitem - (void *)bt);
|
||||
|
||||
le16_add_cpu(&bt->total_item_bytes, le16_to_cpu(bt->nr_items) *
|
||||
sizeof(struct scoutfs_btree_item));
|
||||
bt->mid_free_len = cpu_to_le16((void *)inode -
|
||||
(void *)&bt->items[le16_to_cpu(bt->nr_items)]);
|
||||
bt->hdr.magic = cpu_to_le32(SCOUTFS_BLOCK_MAGIC_BTREE);
|
||||
bt->hdr.crc = cpu_to_le32(crc_block(&bt->hdr));
|
||||
|
||||
|
||||
@@ -21,6 +21,8 @@
|
||||
#include "crc.h"
|
||||
#include "key.h"
|
||||
#include "radix.h"
|
||||
#include "avl.h"
|
||||
#include "leaf_item_hash.h"
|
||||
|
||||
static void *read_block(int fd, u64 blkno)
|
||||
{
|
||||
@@ -395,14 +397,48 @@ static int print_btree_ref(struct scoutfs_key *key, void *val,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void print_leaf_item_hash(struct scoutfs_btree_block *bt)
|
||||
{
|
||||
__le16 *b;
|
||||
int col;
|
||||
int nr;
|
||||
int i;
|
||||
|
||||
/* print the leaf item hash */
|
||||
printf(" item hash: ");
|
||||
col = 13;
|
||||
|
||||
b = leaf_item_hash_buckets(bt);
|
||||
nr = 0;
|
||||
for (i = 0; i < SCOUTFS_BTREE_LEAF_ITEM_HASH_NR; i++) {
|
||||
if (b[i] == 0)
|
||||
continue;
|
||||
|
||||
nr++;
|
||||
col += snprintf(NULL, 0, "%u,%u ", i, le16_to_cpu(b[i]));
|
||||
if (col >= 78) {
|
||||
printf("\n ");
|
||||
col = 3;
|
||||
}
|
||||
printf("%u,%u ", i, le16_to_cpu(b[i]));
|
||||
}
|
||||
if (col != 3)
|
||||
printf("\n");
|
||||
printf(" (%u / %u populated, %u%% load)\n",
|
||||
nr, (int)SCOUTFS_BTREE_LEAF_ITEM_HASH_NR,
|
||||
nr * 100 / (int)SCOUTFS_BTREE_LEAF_ITEM_HASH_NR);
|
||||
}
|
||||
|
||||
static int print_btree_block(int fd, struct scoutfs_super_block *super,
|
||||
char *which, struct scoutfs_btree_ref *ref,
|
||||
print_item_func func, void *arg, u8 level)
|
||||
{
|
||||
struct scoutfs_btree_item *item;
|
||||
struct scoutfs_avl_node *node;
|
||||
struct scoutfs_btree_block *bt;
|
||||
struct scoutfs_key *key;
|
||||
unsigned val_len;
|
||||
unsigned int val_len;
|
||||
unsigned int off;
|
||||
void *val;
|
||||
int ret;
|
||||
int i;
|
||||
@@ -414,22 +450,35 @@ static int print_btree_block(int fd, struct scoutfs_super_block *super,
|
||||
if (bt->level == level) {
|
||||
printf("%s btree blkno %llu\n"
|
||||
" crc %08x fsid %llx seq %llu blkno %llu \n"
|
||||
" level %u free_end %u nr_items %u\n",
|
||||
" total_item_bytes %u mid_free_len %u last_free_off %u "
|
||||
"last_free_len %u\n"
|
||||
" level %u nr_items %u item_root.node %u\n",
|
||||
which, le64_to_cpu(ref->blkno),
|
||||
le32_to_cpu(bt->hdr.crc),
|
||||
le64_to_cpu(bt->hdr.fsid),
|
||||
le64_to_cpu(bt->hdr.seq),
|
||||
le64_to_cpu(bt->hdr.blkno),
|
||||
le16_to_cpu(bt->total_item_bytes),
|
||||
le16_to_cpu(bt->mid_free_len),
|
||||
le16_to_cpu(bt->last_free_off),
|
||||
le16_to_cpu(bt->last_free_len),
|
||||
bt->level,
|
||||
le32_to_cpu(bt->free_end),
|
||||
le32_to_cpu(bt->nr_items));
|
||||
le16_to_cpu(bt->nr_items),
|
||||
le16_to_cpu(bt->item_root.node));
|
||||
|
||||
if (bt->level == 0)
|
||||
print_leaf_item_hash(bt);
|
||||
}
|
||||
|
||||
for (i = 0; i < le32_to_cpu(bt->nr_items); i++) {
|
||||
item = (void *)bt + le32_to_cpu(bt->item_hdrs[i].off);
|
||||
for (i = 0, node = avl_first(&bt->item_root);
|
||||
node;
|
||||
i++, node = avl_next(&bt->item_root, node)) {
|
||||
|
||||
item = container_of(node, struct scoutfs_btree_item, node);
|
||||
off = (void *)item - (void *)bt;
|
||||
val_len = le16_to_cpu(item->val_len);
|
||||
key = &item->key;
|
||||
val = item->val;
|
||||
val = (void *)bt + le16_to_cpu(item->val_off);
|
||||
|
||||
if (level < bt->level) {
|
||||
ref = val;
|
||||
@@ -443,8 +492,12 @@ static int print_btree_block(int fd, struct scoutfs_super_block *super,
|
||||
continue;
|
||||
}
|
||||
|
||||
printf(" item [%u] off %u val_len %u\n",
|
||||
i, le32_to_cpu(bt->item_hdrs[i].off), val_len);
|
||||
printf(" [%u] off %u par %u l %u r %u h %u vo %u vl %u\n",
|
||||
i, off, le16_to_cpu(item->node.parent),
|
||||
le16_to_cpu(item->node.left),
|
||||
le16_to_cpu(item->node.right),
|
||||
item->node.height, le16_to_cpu(item->val_off),
|
||||
val_len);
|
||||
|
||||
if (level)
|
||||
print_btree_ref(key, val, val_len, func, arg);
|
||||
@@ -589,12 +642,12 @@ static int print_btree_leaf_items(int fd, struct scoutfs_super_block *super,
|
||||
print_item_func func, void *arg)
|
||||
{
|
||||
struct scoutfs_btree_item *item;
|
||||
struct scoutfs_avl_node *node;
|
||||
struct scoutfs_btree_block *bt;
|
||||
unsigned val_len;
|
||||
void *key;
|
||||
void *val;
|
||||
int ret;
|
||||
int i;
|
||||
|
||||
if (ref->blkno == 0)
|
||||
return 0;
|
||||
@@ -603,11 +656,12 @@ static int print_btree_leaf_items(int fd, struct scoutfs_super_block *super,
|
||||
if (!bt)
|
||||
return -ENOMEM;
|
||||
|
||||
for (i = 0; i < le32_to_cpu(bt->nr_items); i++) {
|
||||
item = (void *)bt + le32_to_cpu(bt->item_hdrs[i].off);
|
||||
node = avl_first(&bt->item_root);
|
||||
while (node) {
|
||||
item = container_of(node, struct scoutfs_btree_item, node);
|
||||
val_len = le16_to_cpu(item->val_len);
|
||||
key = (void *)(item + 1);
|
||||
val = (void *)(key + 1);
|
||||
key = &item->key;
|
||||
val = (void *)bt + le16_to_cpu(item->val_off);
|
||||
|
||||
if (bt->level > 0) {
|
||||
ret = print_btree_leaf_items(fd, super, val, func, arg);
|
||||
@@ -617,6 +671,8 @@ static int print_btree_leaf_items(int fd, struct scoutfs_super_block *super,
|
||||
} else {
|
||||
func(key, val, val_len, arg);
|
||||
}
|
||||
|
||||
node = avl_next(&bt->item_root, node);
|
||||
}
|
||||
|
||||
free(bt);
|
||||
|
||||
Reference in New Issue
Block a user