mirror of
https://github.com/versity/scoutfs.git
synced 2026-02-10 04:30:10 +00:00
scoutfs: use block mapping items
Move to static mapping items instead of unbounded extents. We get more predictable data structures and simpler code but still get reasonably dense metadata. We no longer need all the extent code needed to split and merge extents, test for overlaps, and all that. The functions that use the mappings (get_block, fiemap, truncate) now have a pattern where they decode the mapping item into an allocated native representation, do their work, and encode the result back into the dense item. We do have to grow the largest possible item value to fit the worst case encoding expansion of random block numbers. The local allocators are no longer two extents but are instead simple bitmaps: one for full segments and one for individual blocks. There are helper functions to free and allocate segments and blocks, with careful coordination of, for example, freeing a segment once all of its constituent blocks are free. _fiemap is refactored a bit to make it more clear what's going on. There's one function that either merges the next bit with the currently building extent or fills the current and starts recording from a non-mergable additional block. The old loop worked this way but was implemented with a single squirrely iteration over the extents. This wasn't feasible now that we're also iterating over blocks inside the mapping items. It's a lot clearer to call out to merge or fill the fiemap entry. The dirty item reservation counts for using the mappings is reduced significantly because each modification no longer has to assume that it might merge with two adjacent contiguous neighbours. Signed-off-by: Zach Brown <zab@versity.com>
This commit is contained in:
@@ -206,50 +206,40 @@ static inline const struct scoutfs_item_count SIC_XATTR_SET(unsigned name_len,
|
||||
}
|
||||
|
||||
/*
|
||||
* Both insertion and removal modifications can dirty three extents
|
||||
* at most: insertion can delete two existing neighbours and create a
|
||||
* third new extent and removal can delete an existing extent and create
|
||||
* two new remaining extents.
|
||||
*/
|
||||
static inline void __count_extents(struct scoutfs_item_count *cnt,
|
||||
unsigned nr_mod, unsigned sz)
|
||||
{
|
||||
cnt->items += nr_mod * 3;
|
||||
cnt->keys += (nr_mod * 3) * sz;
|
||||
}
|
||||
|
||||
/*
|
||||
* write_begin can refill local free extents after a bulk alloc rpc,
|
||||
* alloc an block, delete an offline mapping, and insert the new allocated
|
||||
* mapping.
|
||||
* write_begin can add local free segment items, modify another to
|
||||
* alloc, add a free blkno item, and modify dirty the mapping.
|
||||
*/
|
||||
static inline const struct scoutfs_item_count SIC_WRITE_BEGIN(void)
|
||||
{
|
||||
struct scoutfs_item_count cnt = {0,};
|
||||
|
||||
BUILD_BUG_ON(sizeof(struct scoutfs_free_extent_blkno_key) !=
|
||||
sizeof(struct scoutfs_free_extent_blocks_key));
|
||||
unsigned nr_free = SCOUTFS_BULK_ALLOC_COUNT + 1 + 1;
|
||||
|
||||
__count_dirty_inode(&cnt);
|
||||
|
||||
__count_extents(&cnt, 2 * (SCOUTFS_BULK_ALLOC_COUNT + 1),
|
||||
sizeof(struct scoutfs_free_extent_blkno_key));
|
||||
__count_extents(&cnt, 2, sizeof(struct scoutfs_file_extent_key));
|
||||
cnt.items += 1 + nr_free;
|
||||
cnt.keys += sizeof(struct scoutfs_block_mapping_key) +
|
||||
(nr_free * sizeof(struct scoutfs_free_bits_key));
|
||||
cnt.vals += SCOUTFS_BLOCK_MAPPING_MAX_BYTES +
|
||||
(nr_free * sizeof(struct scoutfs_free_bits));
|
||||
|
||||
return cnt;
|
||||
}
|
||||
|
||||
/*
|
||||
* Truncating a block can free an allocated block, delete an online
|
||||
* mapping, and create an offline mapping.
|
||||
* Truncating a block mapping item's worth of blocks can modify both
|
||||
* free blkno and free segno items per block. Then the largest possible
|
||||
* mapping item.
|
||||
*/
|
||||
static inline const struct scoutfs_item_count SIC_TRUNC_BLOCK(void)
|
||||
{
|
||||
struct scoutfs_item_count cnt = {0,};
|
||||
unsigned nr_free = (2 * SCOUTFS_BLOCK_MAPPING_BLOCKS);
|
||||
|
||||
__count_extents(&cnt, 2 * 1,
|
||||
sizeof(struct scoutfs_free_extent_blkno_key));
|
||||
__count_extents(&cnt, 2, sizeof(struct scoutfs_file_extent_key));
|
||||
cnt.items += 1 + nr_free;
|
||||
cnt.keys += sizeof(struct scoutfs_block_mapping_key) +
|
||||
(nr_free * sizeof(struct scoutfs_free_bits_key));
|
||||
cnt.vals += SCOUTFS_BLOCK_MAPPING_MAX_BYTES +
|
||||
(nr_free * sizeof(struct scoutfs_free_bits));
|
||||
|
||||
return cnt;
|
||||
}
|
||||
|
||||
1769
kmod/src/data.c
1769
kmod/src/data.c
File diff suppressed because it is too large
Load Diff
@@ -12,4 +12,6 @@ int scoutfs_data_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
|
||||
int scoutfs_data_setup(struct super_block *sb);
|
||||
void scoutfs_data_destroy(struct super_block *sb);
|
||||
|
||||
int __init scoutfs_data_test(void);
|
||||
|
||||
#endif
|
||||
|
||||
@@ -852,7 +852,7 @@ static int symlink_item_ops(struct super_block *sb, int op, u64 ino,
|
||||
for (i = 0; i < nr; i++) {
|
||||
|
||||
init_symlink_key(&key, &skey, ino, i);
|
||||
bytes = min(size, SCOUTFS_MAX_VAL_SIZE);
|
||||
bytes = min_t(u64, size, SCOUTFS_MAX_VAL_SIZE);
|
||||
scoutfs_kvec_init(val, (void *)target, bytes);
|
||||
|
||||
if (op == SYM_CREATE)
|
||||
|
||||
@@ -252,8 +252,8 @@ struct scoutfs_segment_block {
|
||||
(SCOUTFS_INODE_INDEX_DATA_SEQ_TYPE - SCOUTFS_INODE_INDEX_SIZE_TYPE + 1)
|
||||
|
||||
/* node zone */
|
||||
#define SCOUTFS_FREE_EXTENT_BLKNO_TYPE 11
|
||||
#define SCOUTFS_FREE_EXTENT_BLOCKS_TYPE 12
|
||||
#define SCOUTFS_FREE_BITS_SEGNO_TYPE 1
|
||||
#define SCOUTFS_FREE_BITS_BLKNO_TYPE 2
|
||||
|
||||
/* fs zone */
|
||||
#define SCOUTFS_INODE_TYPE 1
|
||||
@@ -262,7 +262,7 @@ struct scoutfs_segment_block {
|
||||
#define SCOUTFS_READDIR_TYPE 4
|
||||
#define SCOUTFS_LINK_BACKREF_TYPE 5
|
||||
#define SCOUTFS_SYMLINK_TYPE 6
|
||||
#define SCOUTFS_FILE_EXTENT_TYPE 7
|
||||
#define SCOUTFS_BLOCK_MAPPING_TYPE 7
|
||||
#define SCOUTFS_ORPHAN_TYPE 8
|
||||
|
||||
#define SCOUTFS_MAX_TYPE 16 /* power of 2 is efficient */
|
||||
@@ -299,38 +299,70 @@ struct scoutfs_link_backref_key {
|
||||
__u8 name[0];
|
||||
} __packed;
|
||||
|
||||
|
||||
/* no value */
|
||||
struct scoutfs_file_extent_key {
|
||||
/* key is bytes of encoded block mapping */
|
||||
struct scoutfs_block_mapping_key {
|
||||
__u8 zone;
|
||||
__be64 ino;
|
||||
__u8 type;
|
||||
__be64 last_blk_off;
|
||||
__be64 last_blkno;
|
||||
__be64 blocks;
|
||||
__u8 flags;
|
||||
__be64 base;
|
||||
} __packed;
|
||||
|
||||
#define SCOUTFS_FILE_EXTENT_OFFLINE (1 << 0)
|
||||
/* each mapping item describes a fixed number of blocks */
|
||||
#define SCOUTFS_BLOCK_MAPPING_SHIFT 6
|
||||
#define SCOUTFS_BLOCK_MAPPING_BLOCKS (1 << SCOUTFS_BLOCK_MAPPING_SHIFT)
|
||||
#define SCOUTFS_BLOCK_MAPPING_MASK (SCOUTFS_BLOCK_MAPPING_BLOCKS - 1)
|
||||
|
||||
/* no value */
|
||||
struct scoutfs_free_extent_blkno_key {
|
||||
/*
|
||||
* The mapping item value is a byte stream that encodes the value of the
|
||||
* mapped blocks. The first byte contains the last index that contains
|
||||
* a mapped block in its low bits. The high bits contain the control
|
||||
* bits for the first (and possibly only) mapped block.
|
||||
*
|
||||
* From then on we consume the control bits in the current control byte
|
||||
* for each mapped block. Each block has two bits that describe the
|
||||
* block: zero, incremental from previous block, delta encoded, and
|
||||
* offline. If we run out of control bits then we consume the next byte
|
||||
* in the stream for additional control bits. If we have a delta
|
||||
* encoded block then we consume its encoded bytes from the byte stream.
|
||||
*/
|
||||
|
||||
#define SCOUTFS_BLOCK_ENC_ZERO 0
|
||||
#define SCOUTFS_BLOCK_ENC_INC 1
|
||||
#define SCOUTFS_BLOCK_ENC_DELTA 2
|
||||
#define SCOUTFS_BLOCK_ENC_OFFLINE 3
|
||||
#define SCOUTFS_BLOCK_ENC_MASK 3
|
||||
|
||||
#define SCOUTFS_ZIGZAG_MAX_BYTES (DIV_ROUND_UP(64, 7))
|
||||
|
||||
/*
|
||||
* the largest block mapping has: nr byte, ctl bytes for all blocks, and
|
||||
* worst case zigzag encodings for all blocks.
|
||||
*/
|
||||
#define SCOUTFS_BLOCK_MAPPING_MAX_BYTES \
|
||||
(1 + (SCOUTFS_BLOCK_MAPPING_BLOCKS / 4) + \
|
||||
(SCOUTFS_BLOCK_MAPPING_BLOCKS * SCOUTFS_ZIGZAG_MAX_BYTES))
|
||||
|
||||
/* free bit bitmaps contain a segment's worth of blocks */
|
||||
#define SCOUTFS_FREE_BITS_SHIFT \
|
||||
SCOUTFS_SEGMENT_BLOCK_SHIFT
|
||||
#define SCOUTFS_FREE_BITS_BITS \
|
||||
(1 << SCOUTFS_FREE_BITS_SHIFT)
|
||||
#define SCOUTFS_FREE_BITS_MASK \
|
||||
(SCOUTFS_FREE_BITS_BITS - 1)
|
||||
#define SCOUTFS_FREE_BITS_U64S \
|
||||
DIV_ROUND_UP(SCOUTFS_FREE_BITS_BITS, 64)
|
||||
|
||||
struct scoutfs_free_bits_key {
|
||||
__u8 zone;
|
||||
__be64 node_id;
|
||||
__u8 type;
|
||||
__be64 last_blkno;
|
||||
__be64 blocks;
|
||||
__be64 base;
|
||||
} __packed;
|
||||
|
||||
struct scoutfs_free_extent_blocks_key {
|
||||
__u8 zone;
|
||||
__be64 node_id;
|
||||
__u8 type;
|
||||
__be64 blocks;
|
||||
__be64 last_blkno;
|
||||
struct scoutfs_free_bits {
|
||||
__le64 bits[SCOUTFS_FREE_BITS_U64S];
|
||||
} __packed;
|
||||
|
||||
/* no value */
|
||||
struct scoutfs_orphan_key {
|
||||
__u8 zone;
|
||||
__be64 node_id;
|
||||
@@ -492,9 +524,7 @@ enum {
|
||||
#define SCOUTFS_MAX_KEY_SIZE \
|
||||
offsetof(struct scoutfs_link_backref_key, name[SCOUTFS_NAME_LEN + 1])
|
||||
|
||||
/* largest single val are dirents, larger broken up into units of this */
|
||||
#define SCOUTFS_MAX_VAL_SIZE \
|
||||
offsetof(struct scoutfs_dirent, name[SCOUTFS_NAME_LEN])
|
||||
#define SCOUTFS_MAX_VAL_SIZE SCOUTFS_BLOCK_MAPPING_MAX_BYTES
|
||||
|
||||
#define SCOUTFS_XATTR_MAX_NAME_LEN 255
|
||||
#define SCOUTFS_XATTR_MAX_SIZE 65536
|
||||
|
||||
@@ -224,22 +224,20 @@ static int pr_ino_idx(char *buf, struct scoutfs_key_buf *key, size_t size)
|
||||
be32_to_cpu(ikey->minor), be64_to_cpu(ikey->ino));
|
||||
}
|
||||
|
||||
static int pr_free_ext(char *buf, struct scoutfs_key_buf *key, size_t size)
|
||||
static int pr_free_bits(char *buf, struct scoutfs_key_buf *key, size_t size)
|
||||
{
|
||||
struct scoutfs_free_extent_blkno_key *fkey = key->data;
|
||||
|
||||
static char *type_strings[] = {
|
||||
[SCOUTFS_FREE_EXTENT_BLKNO_TYPE] = "fno",
|
||||
[SCOUTFS_FREE_EXTENT_BLOCKS_TYPE] = "fks",
|
||||
[SCOUTFS_FREE_BITS_SEGNO_TYPE] = "fsg",
|
||||
[SCOUTFS_FREE_BITS_BLKNO_TYPE] = "fbk",
|
||||
};
|
||||
struct scoutfs_free_bits_key *frk = key->data;
|
||||
|
||||
return snprintf_key(buf, size, key,
|
||||
sizeof(struct scoutfs_free_extent_blkno_key), 0,
|
||||
"nod.%llu.%s.%llu.%llu",
|
||||
be64_to_cpu(fkey->node_id),
|
||||
type_strings[fkey->type],
|
||||
be64_to_cpu(fkey->last_blkno),
|
||||
be64_to_cpu(fkey->blocks));
|
||||
sizeof(struct scoutfs_block_mapping_key), 0,
|
||||
"nod.%llu.%s.%llu",
|
||||
be64_to_cpu(frk->node_id),
|
||||
type_strings[frk->type],
|
||||
be64_to_cpu(frk->base));
|
||||
}
|
||||
|
||||
static int pr_orphan(char *buf, struct scoutfs_key_buf *key, size_t size)
|
||||
@@ -319,18 +317,15 @@ static int pr_symlink(char *buf, struct scoutfs_key_buf *key, size_t size)
|
||||
be64_to_cpu(skey->ino));
|
||||
}
|
||||
|
||||
static int pr_file_ext(char *buf, struct scoutfs_key_buf *key, size_t size)
|
||||
static int pr_block_mapping(char *buf, struct scoutfs_key_buf *key, size_t size)
|
||||
{
|
||||
struct scoutfs_file_extent_key *ekey = key->data;
|
||||
struct scoutfs_block_mapping_key *bmk = key->data;
|
||||
|
||||
return snprintf_key(buf, size, key,
|
||||
sizeof(struct scoutfs_file_extent_key), 0,
|
||||
"fs.%llu.ext.%llu.%llu.%llu.%x",
|
||||
be64_to_cpu(ekey->ino),
|
||||
be64_to_cpu(ekey->last_blk_off),
|
||||
be64_to_cpu(ekey->last_blkno),
|
||||
be64_to_cpu(ekey->blocks),
|
||||
ekey->flags);
|
||||
sizeof(struct scoutfs_block_mapping_key), 0,
|
||||
"fs.%llu.bmp.%llu",
|
||||
be64_to_cpu(bmk->ino),
|
||||
be64_to_cpu(bmk->base));
|
||||
}
|
||||
|
||||
const static key_printer_t key_printers[SCOUTFS_MAX_ZONE][SCOUTFS_MAX_TYPE] = {
|
||||
@@ -340,8 +335,8 @@ const static key_printer_t key_printers[SCOUTFS_MAX_ZONE][SCOUTFS_MAX_TYPE] = {
|
||||
pr_ino_idx,
|
||||
[SCOUTFS_INODE_INDEX_ZONE][SCOUTFS_INODE_INDEX_DATA_SEQ_TYPE] =
|
||||
pr_ino_idx,
|
||||
[SCOUTFS_NODE_ZONE][SCOUTFS_FREE_EXTENT_BLKNO_TYPE] = pr_free_ext,
|
||||
[SCOUTFS_NODE_ZONE][SCOUTFS_FREE_EXTENT_BLOCKS_TYPE] = pr_free_ext,
|
||||
[SCOUTFS_NODE_ZONE][SCOUTFS_FREE_BITS_SEGNO_TYPE] = pr_free_bits,
|
||||
[SCOUTFS_NODE_ZONE][SCOUTFS_FREE_BITS_BLKNO_TYPE] = pr_free_bits,
|
||||
[SCOUTFS_NODE_ZONE][SCOUTFS_ORPHAN_TYPE] = pr_orphan,
|
||||
[SCOUTFS_FS_ZONE][SCOUTFS_INODE_TYPE] = pr_inode,
|
||||
[SCOUTFS_FS_ZONE][SCOUTFS_XATTR_TYPE] = pr_xattr,
|
||||
@@ -349,7 +344,7 @@ const static key_printer_t key_printers[SCOUTFS_MAX_ZONE][SCOUTFS_MAX_TYPE] = {
|
||||
[SCOUTFS_FS_ZONE][SCOUTFS_READDIR_TYPE] = pr_readdir,
|
||||
[SCOUTFS_FS_ZONE][SCOUTFS_LINK_BACKREF_TYPE] = pr_link_backref,
|
||||
[SCOUTFS_FS_ZONE][SCOUTFS_SYMLINK_TYPE] = pr_symlink,
|
||||
[SCOUTFS_FS_ZONE][SCOUTFS_FILE_EXTENT_TYPE] = pr_file_ext,
|
||||
[SCOUTFS_FS_ZONE][SCOUTFS_BLOCK_MAPPING_TYPE] = pr_block_mapping,
|
||||
};
|
||||
|
||||
/*
|
||||
@@ -382,7 +377,7 @@ int scoutfs_key_str_size(char *buf, struct scoutfs_key_buf *key, size_t size)
|
||||
struct scoutfs_inode_index_key *ikey = key->data;
|
||||
type = ikey->type;
|
||||
} else if (zone == SCOUTFS_NODE_ZONE) {
|
||||
struct scoutfs_free_extent_blkno_key *fkey = key->data;
|
||||
struct scoutfs_free_bits_key *fkey = key->data;
|
||||
type = fkey->type;
|
||||
} else if (zone == SCOUTFS_FS_ZONE) {
|
||||
struct scoutfs_inode_key *ikey = key->data;
|
||||
|
||||
@@ -392,6 +392,10 @@ static int __init scoutfs_module_init(void)
|
||||
|
||||
scoutfs_init_counters();
|
||||
|
||||
ret = scoutfs_data_test();
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
scoutfs_kset = kset_create_and_add("scoutfs", NULL, fs_kobj);
|
||||
if (!scoutfs_kset)
|
||||
return -ENOMEM;
|
||||
|
||||
Reference in New Issue
Block a user