mirror of
https://github.com/versity/scoutfs.git
synced 2026-01-08 21:03:12 +00:00
Remove dead block, btree, and buddy code
Remove the last bits of the dead code from the old btree design. Signed-off-by: Zach Brown <zab@versity.com>
This commit is contained in:
@@ -1,61 +0,0 @@
|
||||
#include <unistd.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#include "sparse.h"
|
||||
#include "util.h"
|
||||
#include "buddy.h"
|
||||
|
||||
/*
|
||||
* Figure out how many blocks the radix will need by starting with leaf
|
||||
* blocks and dividing by the slot fanout until we have one block. cow
|
||||
* updates require two copies of every block.
|
||||
*/
|
||||
static u64 calc_blocks(struct buddy_info *binf, u64 bits)
|
||||
{
|
||||
u64 blocks = DIV_ROUND_UP(bits, SCOUTFS_BUDDY_ORDER0_BITS);
|
||||
u64 tot = 0;
|
||||
int level = 0;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < SCOUTFS_BUDDY_MAX_HEIGHT; i++)
|
||||
binf->blknos[i] = SCOUTFS_BUDDY_BLKNO;
|
||||
|
||||
for (;;) {
|
||||
for (i = level - 1; i >= 0; i--)
|
||||
binf->blknos[i] += (blocks * 2);
|
||||
tot += (blocks * 2);
|
||||
|
||||
level++;
|
||||
if (blocks == 1)
|
||||
break;
|
||||
blocks = DIV_ROUND_UP(blocks, SCOUTFS_BUDDY_SLOTS);
|
||||
}
|
||||
|
||||
binf->height = level;
|
||||
|
||||
return tot;
|
||||
}
|
||||
|
||||
/*
|
||||
* Figure out how many buddy blocks we'll need to allocate the rest of
|
||||
* the blocks in the device. The first time through we find the size of
|
||||
* the radix needed to describe the whole device, but that doesn't take
|
||||
* the buddy block overhead into account. We iterate getting a more
|
||||
* precise estimate each time. This only takes a few rounds to
|
||||
* stabilize.
|
||||
*/
|
||||
void buddy_init(struct buddy_info *binf, u64 total_blocks)
|
||||
{
|
||||
u64 blocks = SCOUTFS_BUDDY_BLKNO;
|
||||
u64 was;
|
||||
|
||||
while(1) {
|
||||
was = blocks;
|
||||
blocks = calc_blocks(binf, total_blocks - blocks);
|
||||
if (blocks == was)
|
||||
break;
|
||||
}
|
||||
|
||||
binf->buddy_blocks = blocks;
|
||||
}
|
||||
@@ -1,16 +0,0 @@
|
||||
#ifndef _BUDDY_H_
|
||||
#define _BUDDY_H_
|
||||
|
||||
#include "format.h"
|
||||
|
||||
struct buddy_info {
|
||||
u8 height;
|
||||
u64 buddy_blocks;
|
||||
|
||||
/* starting blkno in each level, including mirrors */
|
||||
u64 blknos[SCOUTFS_BUDDY_MAX_HEIGHT];
|
||||
};
|
||||
|
||||
void buddy_init(struct buddy_info *binf, u64 total_blocks);
|
||||
|
||||
#endif
|
||||
@@ -35,9 +35,6 @@
|
||||
*/
|
||||
#define SCOUTFS_SUPER_BLKNO ((64 * 1024) >> SCOUTFS_BLOCK_SHIFT)
|
||||
#define SCOUTFS_SUPER_NR 2
|
||||
#define SCOUTFS_BUDDY_BLKNO (SCOUTFS_SUPER_BLKNO + SCOUTFS_SUPER_NR)
|
||||
|
||||
#define SCOUTFS_MAX_TRANS_BLOCKS (128 * 1024 * 1024 / SCOUTFS_BLOCK_SIZE)
|
||||
|
||||
/*
|
||||
* This header is found at the start of every block so that we can
|
||||
@@ -161,70 +158,6 @@ struct scoutfs_segment_block {
|
||||
/* packed vals */
|
||||
} __packed;
|
||||
|
||||
/*
|
||||
* Block references include the sequence number so that we can detect
|
||||
* readers racing with writers and so that we can tell that we don't
|
||||
* need to follow a reference when traversing based on seqs.
|
||||
*/
|
||||
struct scoutfs_block_ref {
|
||||
__le64 blkno;
|
||||
__le64 seq;
|
||||
} __packed;
|
||||
|
||||
/*
|
||||
* If the block was full of bits the largest possible order would be
|
||||
* the block size shift + 3 (BITS_PER_BYTE). But the header uses
|
||||
* up some space and then the buddy bits mean two bits per block.
|
||||
* Then +1 for this being the number, not the greatest order.
|
||||
*/
|
||||
#define SCOUTFS_BUDDY_ORDERS (SCOUTFS_BLOCK_SHIFT + 3 - 2 + 1)
|
||||
|
||||
struct scoutfs_buddy_block {
|
||||
struct scoutfs_block_header hdr;
|
||||
__le16 first_set[SCOUTFS_BUDDY_ORDERS];
|
||||
__u8 level;
|
||||
__u8 __pad[3]; /* naturally align bits */
|
||||
union {
|
||||
struct scoutfs_buddy_slot {
|
||||
__le64 seq;
|
||||
__le16 free_orders;
|
||||
/* XXX seems like we could hide a bit somewhere */
|
||||
__u8 blkno_off;
|
||||
} __packed slots[0];
|
||||
__le64 bits[0];
|
||||
} __packed;
|
||||
} __packed;
|
||||
|
||||
/*
|
||||
* Each buddy leaf block references order 0 blocks with half of its
|
||||
* bitmap. The other half of the bits are used for the higher order
|
||||
* bits.
|
||||
*/
|
||||
#define SCOUTFS_BUDDY_ORDER0_BITS \
|
||||
(((SCOUTFS_BLOCK_SIZE - sizeof(struct scoutfs_buddy_block)) * 8) / 2)
|
||||
|
||||
#define SCOUTFS_BUDDY_SLOTS \
|
||||
((SCOUTFS_BLOCK_SIZE - sizeof(struct scoutfs_buddy_block)) / \
|
||||
sizeof(struct scoutfs_buddy_slot))
|
||||
|
||||
struct scoutfs_buddy_root {
|
||||
struct scoutfs_buddy_slot slot;
|
||||
__u8 height;
|
||||
} __packed;
|
||||
|
||||
/* ((SCOUTFS_BUDDY_SLOTS^5) * SCOUTFS_BUDDY_ORDER0_BITS) > 2^52 */
|
||||
#define SCOUTFS_BUDDY_MAX_HEIGHT 6
|
||||
|
||||
/*
|
||||
* We should be able to make the offset smaller if neither dirents nor
|
||||
* data items use the full 64 bits.
|
||||
*/
|
||||
struct scoutfs_key {
|
||||
__le64 inode;
|
||||
u8 type;
|
||||
__le64 offset;
|
||||
} __packed;
|
||||
|
||||
/*
|
||||
* Currently we sort keys by the numeric value of the types, but that
|
||||
* isn't necessary. We could have an arbitrary sort order. So we don't
|
||||
@@ -241,8 +174,6 @@ struct scoutfs_key {
|
||||
#define SCOUTFS_DATA_KEY 11
|
||||
#define SCOUTFS_MAX_UNUSED_KEY 255
|
||||
|
||||
#define SCOUTFS_MAX_ITEM_LEN 512
|
||||
|
||||
/* value is struct scoutfs_inode */
|
||||
struct scoutfs_inode_key {
|
||||
__u8 type;
|
||||
@@ -307,66 +238,9 @@ struct scoutfs_symlink_key {
|
||||
__be64 ino;
|
||||
} __packed;
|
||||
|
||||
struct scoutfs_btree_root {
|
||||
u8 height;
|
||||
struct scoutfs_block_ref ref;
|
||||
} __packed;
|
||||
|
||||
/*
|
||||
* @free_end: records the byte offset of the first byte after the free
|
||||
* space in the block between the header and the first item. New items
|
||||
* are allocated by subtracting the space they need.
|
||||
*
|
||||
* @free_reclaim: records the number of bytes of free space amongst the
|
||||
* items after free_end. If a block is compacted then this much new
|
||||
* free space would be reclaimed.
|
||||
*/
|
||||
struct scoutfs_btree_block {
|
||||
struct scoutfs_block_header hdr;
|
||||
__le16 free_end;
|
||||
__le16 free_reclaim;
|
||||
__le16 nr_items;
|
||||
__le16 item_offs[0];
|
||||
} __packed;
|
||||
|
||||
/*
|
||||
* The item sequence number is set to the dirty block's sequence number
|
||||
* when the item is modified. It is not changed by splits or merges.
|
||||
*/
|
||||
struct scoutfs_btree_item {
|
||||
struct scoutfs_key key;
|
||||
__le64 seq;
|
||||
__le16 val_len;
|
||||
char val[0];
|
||||
} __packed;
|
||||
|
||||
/* Blocks are no more than half free. */
|
||||
#define SCOUTFS_BTREE_FREE_LIMIT \
|
||||
((SCOUTFS_BLOCK_SIZE - sizeof(struct scoutfs_btree_block)) / 2)
|
||||
|
||||
/* XXX does this exist upstream somewhere? */
|
||||
#define member_sizeof(TYPE, MEMBER) (sizeof(((TYPE *)0)->MEMBER))
|
||||
|
||||
#define SCOUTFS_BTREE_MAX_ITEMS \
|
||||
((SCOUTFS_BLOCK_SIZE - sizeof(struct scoutfs_btree_block)) / \
|
||||
(member_sizeof(struct scoutfs_btree_block, item_offs[0]) + \
|
||||
sizeof(struct scoutfs_btree_item)))
|
||||
|
||||
/*
|
||||
* We can calculate the max tree depth by calculating how many leaf
|
||||
* blocks the tree could reference. The block device can only reference
|
||||
* 2^64 bytes. The tallest parent tree has half full parent blocks.
|
||||
*
|
||||
* So we have the relation:
|
||||
*
|
||||
* ceil(max_items / 2) ^ (max_depth - 1) >= 2^64 / block_size
|
||||
*
|
||||
* and solve for depth:
|
||||
*
|
||||
* max_depth = log(ceil(max_items / 2), 2^64 / block_size) + 1
|
||||
*/
|
||||
#define SCOUTFS_BTREE_MAX_DEPTH 10
|
||||
|
||||
#define SCOUTFS_UUID_BYTES 16
|
||||
|
||||
/*
|
||||
@@ -382,16 +256,11 @@ struct scoutfs_super_block {
|
||||
__le64 alloc_uninit;
|
||||
__le64 total_segs;
|
||||
__le64 free_segs;
|
||||
__le64 total_blocks;
|
||||
__le64 free_blocks;
|
||||
__le64 ring_blkno;
|
||||
__le64 ring_blocks;
|
||||
__le64 ring_tail_block;
|
||||
__le64 ring_gen;
|
||||
__le64 next_seg_seq;
|
||||
__le64 buddy_blocks;
|
||||
struct scoutfs_buddy_root buddy_root;
|
||||
struct scoutfs_btree_root btree_root;
|
||||
struct scoutfs_treap_root alloc_treap_root;
|
||||
struct scoutfs_manifest manifest;
|
||||
} __packed;
|
||||
@@ -418,7 +287,6 @@ struct scoutfs_timespec {
|
||||
struct scoutfs_inode {
|
||||
__le64 size;
|
||||
__le64 blocks;
|
||||
__le64 link_counter;
|
||||
__le64 data_version;
|
||||
__le64 next_readdir_pos;
|
||||
__le32 nlink;
|
||||
@@ -426,7 +294,6 @@ struct scoutfs_inode {
|
||||
__le32 gid;
|
||||
__le32 mode;
|
||||
__le32 rdev;
|
||||
__le32 salt;
|
||||
struct scoutfs_timespec atime;
|
||||
struct scoutfs_timespec ctime;
|
||||
struct scoutfs_timespec mtime;
|
||||
@@ -449,20 +316,6 @@ struct scoutfs_dirent {
|
||||
__u8 name[0];
|
||||
} __packed;
|
||||
|
||||
/*
|
||||
* Dirent items are stored at keys with the offset set to the hash of
|
||||
* the name. Creation can find that hash values collide and will
|
||||
* attempt to linearly probe this many following hash values looking for
|
||||
* an unused value.
|
||||
*
|
||||
* In small directories this doesn't really matter because hash values
|
||||
* will so very rarely collide. At around 50k items we start to see our
|
||||
* first collisions. 16 slots is still pretty quick to scan in the
|
||||
* btree and it gets us up into the hundreds of millions of entries
|
||||
* before enospc is returned as we run out of hash values.
|
||||
*/
|
||||
#define SCOUTFS_DIRENT_COLL_NR 16
|
||||
|
||||
#define SCOUTFS_NAME_LEN 255
|
||||
|
||||
/* S32_MAX avoids the (int) sign bit and might avoid sloppy bugs */
|
||||
@@ -475,17 +328,10 @@ struct scoutfs_dirent {
|
||||
#define SCOUTFS_XATTR_MAX_PARTS \
|
||||
DIV_ROUND_UP(SCOUTFS_XATTR_MAX_SIZE, SCOUTFS_XATTR_PART_SIZE)
|
||||
|
||||
/*
|
||||
* We only use 31 bits for readdir positions so that we don't confuse
|
||||
* old signed 32bit f_pos applications or those on the other side of
|
||||
* network protocols that have limited readir positions.
|
||||
*/
|
||||
|
||||
#define SCOUTFS_DIRENT_OFF_BITS 31
|
||||
#define SCOUTFS_DIRENT_OFF_MASK ((1U << SCOUTFS_DIRENT_OFF_BITS) - 1)
|
||||
/* getdents returns next pos with an entry, no entry at (f_pos)~0 */
|
||||
/* entries begin after . and .. */
|
||||
#define SCOUTFS_DIRENT_FIRST_POS 2
|
||||
#define SCOUTFS_DIRENT_LAST_POS (INT_MAX - 1)
|
||||
/* getdents returns next pos with an entry, no entry at (f_pos)~0 */
|
||||
#define SCOUTFS_DIRENT_LAST_POS (U64_MAX - 1)
|
||||
|
||||
enum {
|
||||
SCOUTFS_DT_FIFO = 0,
|
||||
@@ -498,14 +344,6 @@ enum {
|
||||
SCOUTFS_DT_WHT,
|
||||
};
|
||||
|
||||
struct scoutfs_extent {
|
||||
__le64 blkno;
|
||||
__le64 len;
|
||||
__u8 flags;
|
||||
} __packed;
|
||||
|
||||
#define SCOUTFS_EXTENT_FLAG_OFFLINE (1 << 0)
|
||||
|
||||
/* ino_path can search for backref items with a null term */
|
||||
#define SCOUTFS_MAX_KEY_SIZE \
|
||||
offsetof(struct scoutfs_link_backref_key, name[SCOUTFS_NAME_LEN + 1])
|
||||
|
||||
@@ -17,7 +17,6 @@
|
||||
#include "crc.h"
|
||||
#include "rand.h"
|
||||
#include "dev.h"
|
||||
#include "buddy.h"
|
||||
|
||||
static int write_raw_block(int fd, u64 blkno, void *blk)
|
||||
{
|
||||
@@ -100,7 +99,6 @@ static int write_new_fs(char *path, int fd)
|
||||
void *ring;
|
||||
u64 limit;
|
||||
u64 size;
|
||||
u64 total_blocks;
|
||||
u64 ring_blocks;
|
||||
u64 total_segs;
|
||||
u64 first_segno;
|
||||
@@ -134,7 +132,6 @@ static int write_new_fs(char *path, int fd)
|
||||
goto out;
|
||||
}
|
||||
|
||||
total_blocks = size / SCOUTFS_BLOCK_SIZE;
|
||||
total_segs = size / SCOUTFS_SEGMENT_SIZE;
|
||||
ring_blocks = calc_ring_blocks(total_segs);
|
||||
|
||||
@@ -145,7 +142,6 @@ static int write_new_fs(char *path, int fd)
|
||||
super->id = cpu_to_le64(SCOUTFS_SUPER_ID);
|
||||
uuid_generate(super->uuid);
|
||||
super->next_ino = cpu_to_le64(SCOUTFS_ROOT_INO + 1);
|
||||
super->total_blocks = cpu_to_le64(total_blocks);
|
||||
super->total_segs = cpu_to_le64(total_segs);
|
||||
super->ring_blkno = cpu_to_le64(SCOUTFS_SUPER_BLKNO + 2);
|
||||
super->ring_blocks = cpu_to_le64(ring_blocks);
|
||||
@@ -246,11 +242,11 @@ static int write_new_fs(char *path, int fd)
|
||||
uuid_unparse(super->uuid, uuid_str);
|
||||
|
||||
printf("Created scoutfs filesystem:\n"
|
||||
" total blocks: %llu\n"
|
||||
" total segments: %llu\n"
|
||||
" ring blocks: %llu\n"
|
||||
" fsid: %llx\n"
|
||||
" uuid: %s\n",
|
||||
total_blocks, ring_blocks, le64_to_cpu(super->hdr.fsid),
|
||||
total_segs, ring_blocks, le64_to_cpu(super->hdr.fsid),
|
||||
uuid_str);
|
||||
|
||||
ret = 0;
|
||||
|
||||
@@ -15,12 +15,6 @@
|
||||
#include "format.h"
|
||||
#include "cmd.h"
|
||||
#include "crc.h"
|
||||
#include "buddy.h"
|
||||
|
||||
/* XXX maybe these go somewhere */
|
||||
#define SKF "%llu.%u.%llu"
|
||||
#define SKA(k) le64_to_cpu((k)->inode), (k)->type, \
|
||||
le64_to_cpu((k)->offset)
|
||||
|
||||
static void *read_block(int fd, u64 blkno)
|
||||
{
|
||||
@@ -83,17 +77,16 @@ static void print_inode(void *key, int key_len, void *val, int val_len)
|
||||
struct scoutfs_inode_key *ikey = key;
|
||||
struct scoutfs_inode *inode = val;
|
||||
|
||||
printf(" inode: ino %llu size %llu blocks %llu lctr %llu nlink %u\n"
|
||||
printf(" inode: ino %llu size %llu blocks %llu nlink %u\n"
|
||||
" uid %u gid %u mode 0%o rdev 0x%x\n"
|
||||
" salt 0x%x next_readdir_pos %llu data_version %llu\n"
|
||||
" next_readdir_pos %llu data_version %llu\n"
|
||||
" atime %llu.%08u ctime %llu.%08u\n"
|
||||
" mtime %llu.%08u\n",
|
||||
be64_to_cpu(ikey->ino),
|
||||
le64_to_cpu(inode->size), le64_to_cpu(inode->blocks),
|
||||
le64_to_cpu(inode->link_counter),
|
||||
le32_to_cpu(inode->nlink), le32_to_cpu(inode->uid),
|
||||
le32_to_cpu(inode->gid), le32_to_cpu(inode->mode),
|
||||
le32_to_cpu(inode->rdev), le32_to_cpu(inode->salt),
|
||||
le32_to_cpu(inode->rdev),
|
||||
le64_to_cpu(inode->next_readdir_pos),
|
||||
le64_to_cpu(inode->data_version),
|
||||
le64_to_cpu(inode->atime.sec),
|
||||
@@ -194,20 +187,6 @@ static void print_symlink(void *key, int key_len, void *val, int val_len)
|
||||
be64_to_cpu(skey->ino), name);
|
||||
}
|
||||
|
||||
#if 0
|
||||
#define EXT_FLAG(f, flags, str) \
|
||||
(flags & f) ? str : "", (flags & (f - 1)) ? "|" : ""
|
||||
|
||||
static void print_extent(struct scoutfs_key *key,
|
||||
struct scoutfs_extent *ext)
|
||||
{
|
||||
printf(" extent: (offest %llu) blkno %llu, len %llu flags %s%s\n",
|
||||
le64_to_cpu(key->offset), le64_to_cpu(ext->blkno),
|
||||
le64_to_cpu(ext->len),
|
||||
EXT_FLAG(SCOUTFS_EXTENT_FLAG_OFFLINE, ext->flags, "OFF"));
|
||||
}
|
||||
#endif
|
||||
|
||||
typedef void (*print_func_t)(void *key, int key_len, void *val, int val_len);
|
||||
|
||||
static print_func_t printers[] = {
|
||||
@@ -410,13 +389,11 @@ static int print_super_blocks(int fd)
|
||||
printf(" id %llx uuid %s\n",
|
||||
le64_to_cpu(super->id), uuid_str);
|
||||
/* XXX these are all in a crazy order */
|
||||
printf(" next_ino %llu total_blocks %llu free_blocks %llu\n"
|
||||
printf(" next_ino %llu\n"
|
||||
" ring_blkno %llu ring_blocks %llu ring_tail_block %llu\n"
|
||||
" ring_gen %llu alloc_uninit %llu total_segs %llu\n"
|
||||
" next_seg_seq %llu free_segs %llu\n",
|
||||
le64_to_cpu(super->next_ino),
|
||||
le64_to_cpu(super->total_blocks),
|
||||
le64_to_cpu(super->free_blocks),
|
||||
le64_to_cpu(super->ring_blkno),
|
||||
le64_to_cpu(super->ring_blocks),
|
||||
le64_to_cpu(super->ring_tail_block),
|
||||
|
||||
Reference in New Issue
Block a user