scoutfs: use block mapping items

Move to static mapping items instead of unbounded extents.
We get more predictable data structures and simpler code but still get
reasonably dense metadata.

We no longer need all the extent code needed to split and merge extents,
test for overlaps, and all that.  The functions that use the mappings
(get_block, fiemap, truncate) now have a pattern where they decode the
mapping item into an allocated native representation, do their work, and
encode the result back into the dense item.

We do have to grow the largest possible item value to fit the worst case
encoding expansion of random block numbers.

The local allocators are no longer two extents but are instead simple
bitmaps: one for full segments and one for individual blocks.  There are
helper functions to free and allocate segments and blocks, with careful
coordination of, for example, freeing a segment once all of its
constituent blocks are free.

_fiemap is refactored a bit to make it more clear what's going on.
There's one function that either merges the next bit with the currently
building extent or fills the current and starts recording from a
non-mergable additional block.  The old loop worked this way but was
implemented with a single squirrely iteration over the extents.  This
wasn't feasible now that we're also iterating over blocks inside the
mapping items.  It's a lot clearer to call out to merge or fill the
fiemap entry.

The dirty item reservation counts for using the mappings is reduced
significantly because each modification no longer has to assume that it
might merge with two adjacent contiguous neighbours.

Signed-off-by: Zach Brown <zab@versity.com>
This commit is contained in:
Zach Brown
2017-09-13 14:41:51 -07:00
committed by Mark Fasheh
parent c4f3c26343
commit 1012ee5e8f
7 changed files with 1079 additions and 865 deletions

View File

@@ -206,50 +206,40 @@ static inline const struct scoutfs_item_count SIC_XATTR_SET(unsigned name_len,
}
/*
* Both insertion and removal modifications can dirty three extents
* at most: insertion can delete two existing neighbours and create a
* third new extent and removal can delete an existing extent and create
* two new remaining extents.
*/
static inline void __count_extents(struct scoutfs_item_count *cnt,
unsigned nr_mod, unsigned sz)
{
cnt->items += nr_mod * 3;
cnt->keys += (nr_mod * 3) * sz;
}
/*
* write_begin can refill local free extents after a bulk alloc rpc,
* alloc an block, delete an offline mapping, and insert the new allocated
* mapping.
* write_begin can add local free segment items, modify another to
* alloc, add a free blkno item, and modify dirty the mapping.
*/
static inline const struct scoutfs_item_count SIC_WRITE_BEGIN(void)
{
struct scoutfs_item_count cnt = {0,};
BUILD_BUG_ON(sizeof(struct scoutfs_free_extent_blkno_key) !=
sizeof(struct scoutfs_free_extent_blocks_key));
unsigned nr_free = SCOUTFS_BULK_ALLOC_COUNT + 1 + 1;
__count_dirty_inode(&cnt);
__count_extents(&cnt, 2 * (SCOUTFS_BULK_ALLOC_COUNT + 1),
sizeof(struct scoutfs_free_extent_blkno_key));
__count_extents(&cnt, 2, sizeof(struct scoutfs_file_extent_key));
cnt.items += 1 + nr_free;
cnt.keys += sizeof(struct scoutfs_block_mapping_key) +
(nr_free * sizeof(struct scoutfs_free_bits_key));
cnt.vals += SCOUTFS_BLOCK_MAPPING_MAX_BYTES +
(nr_free * sizeof(struct scoutfs_free_bits));
return cnt;
}
/*
* Truncating a block can free an allocated block, delete an online
* mapping, and create an offline mapping.
* Truncating a block mapping item's worth of blocks can modify both
* free blkno and free segno items per block. Then the largest possible
* mapping item.
*/
static inline const struct scoutfs_item_count SIC_TRUNC_BLOCK(void)
{
struct scoutfs_item_count cnt = {0,};
unsigned nr_free = (2 * SCOUTFS_BLOCK_MAPPING_BLOCKS);
__count_extents(&cnt, 2 * 1,
sizeof(struct scoutfs_free_extent_blkno_key));
__count_extents(&cnt, 2, sizeof(struct scoutfs_file_extent_key));
cnt.items += 1 + nr_free;
cnt.keys += sizeof(struct scoutfs_block_mapping_key) +
(nr_free * sizeof(struct scoutfs_free_bits_key));
cnt.vals += SCOUTFS_BLOCK_MAPPING_MAX_BYTES +
(nr_free * sizeof(struct scoutfs_free_bits));
return cnt;
}

File diff suppressed because it is too large Load Diff

View File

@@ -12,4 +12,6 @@ int scoutfs_data_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
int scoutfs_data_setup(struct super_block *sb);
void scoutfs_data_destroy(struct super_block *sb);
int __init scoutfs_data_test(void);
#endif

View File

@@ -852,7 +852,7 @@ static int symlink_item_ops(struct super_block *sb, int op, u64 ino,
for (i = 0; i < nr; i++) {
init_symlink_key(&key, &skey, ino, i);
bytes = min(size, SCOUTFS_MAX_VAL_SIZE);
bytes = min_t(u64, size, SCOUTFS_MAX_VAL_SIZE);
scoutfs_kvec_init(val, (void *)target, bytes);
if (op == SYM_CREATE)

View File

@@ -252,8 +252,8 @@ struct scoutfs_segment_block {
(SCOUTFS_INODE_INDEX_DATA_SEQ_TYPE - SCOUTFS_INODE_INDEX_SIZE_TYPE + 1)
/* node zone */
#define SCOUTFS_FREE_EXTENT_BLKNO_TYPE 11
#define SCOUTFS_FREE_EXTENT_BLOCKS_TYPE 12
#define SCOUTFS_FREE_BITS_SEGNO_TYPE 1
#define SCOUTFS_FREE_BITS_BLKNO_TYPE 2
/* fs zone */
#define SCOUTFS_INODE_TYPE 1
@@ -262,7 +262,7 @@ struct scoutfs_segment_block {
#define SCOUTFS_READDIR_TYPE 4
#define SCOUTFS_LINK_BACKREF_TYPE 5
#define SCOUTFS_SYMLINK_TYPE 6
#define SCOUTFS_FILE_EXTENT_TYPE 7
#define SCOUTFS_BLOCK_MAPPING_TYPE 7
#define SCOUTFS_ORPHAN_TYPE 8
#define SCOUTFS_MAX_TYPE 16 /* power of 2 is efficient */
@@ -299,38 +299,70 @@ struct scoutfs_link_backref_key {
__u8 name[0];
} __packed;
/* no value */
struct scoutfs_file_extent_key {
/* key is bytes of encoded block mapping */
struct scoutfs_block_mapping_key {
__u8 zone;
__be64 ino;
__u8 type;
__be64 last_blk_off;
__be64 last_blkno;
__be64 blocks;
__u8 flags;
__be64 base;
} __packed;
#define SCOUTFS_FILE_EXTENT_OFFLINE (1 << 0)
/* each mapping item describes a fixed number of blocks */
#define SCOUTFS_BLOCK_MAPPING_SHIFT 6
#define SCOUTFS_BLOCK_MAPPING_BLOCKS (1 << SCOUTFS_BLOCK_MAPPING_SHIFT)
#define SCOUTFS_BLOCK_MAPPING_MASK (SCOUTFS_BLOCK_MAPPING_BLOCKS - 1)
/* no value */
struct scoutfs_free_extent_blkno_key {
/*
* The mapping item value is a byte stream that encodes the value of the
* mapped blocks. The first byte contains the last index that contains
* a mapped block in its low bits. The high bits contain the control
* bits for the first (and possibly only) mapped block.
*
* From then on we consume the control bits in the current control byte
* for each mapped block. Each block has two bits that describe the
* block: zero, incremental from previous block, delta encoded, and
* offline. If we run out of control bits then we consume the next byte
* in the stream for additional control bits. If we have a delta
* encoded block then we consume its encoded bytes from the byte stream.
*/
#define SCOUTFS_BLOCK_ENC_ZERO 0
#define SCOUTFS_BLOCK_ENC_INC 1
#define SCOUTFS_BLOCK_ENC_DELTA 2
#define SCOUTFS_BLOCK_ENC_OFFLINE 3
#define SCOUTFS_BLOCK_ENC_MASK 3
#define SCOUTFS_ZIGZAG_MAX_BYTES (DIV_ROUND_UP(64, 7))
/*
* the largest block mapping has: nr byte, ctl bytes for all blocks, and
* worst case zigzag encodings for all blocks.
*/
#define SCOUTFS_BLOCK_MAPPING_MAX_BYTES \
(1 + (SCOUTFS_BLOCK_MAPPING_BLOCKS / 4) + \
(SCOUTFS_BLOCK_MAPPING_BLOCKS * SCOUTFS_ZIGZAG_MAX_BYTES))
/* free bit bitmaps contain a segment's worth of blocks */
#define SCOUTFS_FREE_BITS_SHIFT \
SCOUTFS_SEGMENT_BLOCK_SHIFT
#define SCOUTFS_FREE_BITS_BITS \
(1 << SCOUTFS_FREE_BITS_SHIFT)
#define SCOUTFS_FREE_BITS_MASK \
(SCOUTFS_FREE_BITS_BITS - 1)
#define SCOUTFS_FREE_BITS_U64S \
DIV_ROUND_UP(SCOUTFS_FREE_BITS_BITS, 64)
struct scoutfs_free_bits_key {
__u8 zone;
__be64 node_id;
__u8 type;
__be64 last_blkno;
__be64 blocks;
__be64 base;
} __packed;
struct scoutfs_free_extent_blocks_key {
__u8 zone;
__be64 node_id;
__u8 type;
__be64 blocks;
__be64 last_blkno;
struct scoutfs_free_bits {
__le64 bits[SCOUTFS_FREE_BITS_U64S];
} __packed;
/* no value */
struct scoutfs_orphan_key {
__u8 zone;
__be64 node_id;
@@ -492,9 +524,7 @@ enum {
#define SCOUTFS_MAX_KEY_SIZE \
offsetof(struct scoutfs_link_backref_key, name[SCOUTFS_NAME_LEN + 1])
/* largest single val are dirents, larger broken up into units of this */
#define SCOUTFS_MAX_VAL_SIZE \
offsetof(struct scoutfs_dirent, name[SCOUTFS_NAME_LEN])
#define SCOUTFS_MAX_VAL_SIZE SCOUTFS_BLOCK_MAPPING_MAX_BYTES
#define SCOUTFS_XATTR_MAX_NAME_LEN 255
#define SCOUTFS_XATTR_MAX_SIZE 65536

View File

@@ -224,22 +224,20 @@ static int pr_ino_idx(char *buf, struct scoutfs_key_buf *key, size_t size)
be32_to_cpu(ikey->minor), be64_to_cpu(ikey->ino));
}
static int pr_free_ext(char *buf, struct scoutfs_key_buf *key, size_t size)
static int pr_free_bits(char *buf, struct scoutfs_key_buf *key, size_t size)
{
struct scoutfs_free_extent_blkno_key *fkey = key->data;
static char *type_strings[] = {
[SCOUTFS_FREE_EXTENT_BLKNO_TYPE] = "fno",
[SCOUTFS_FREE_EXTENT_BLOCKS_TYPE] = "fks",
[SCOUTFS_FREE_BITS_SEGNO_TYPE] = "fsg",
[SCOUTFS_FREE_BITS_BLKNO_TYPE] = "fbk",
};
struct scoutfs_free_bits_key *frk = key->data;
return snprintf_key(buf, size, key,
sizeof(struct scoutfs_free_extent_blkno_key), 0,
"nod.%llu.%s.%llu.%llu",
be64_to_cpu(fkey->node_id),
type_strings[fkey->type],
be64_to_cpu(fkey->last_blkno),
be64_to_cpu(fkey->blocks));
sizeof(struct scoutfs_block_mapping_key), 0,
"nod.%llu.%s.%llu",
be64_to_cpu(frk->node_id),
type_strings[frk->type],
be64_to_cpu(frk->base));
}
static int pr_orphan(char *buf, struct scoutfs_key_buf *key, size_t size)
@@ -319,18 +317,15 @@ static int pr_symlink(char *buf, struct scoutfs_key_buf *key, size_t size)
be64_to_cpu(skey->ino));
}
static int pr_file_ext(char *buf, struct scoutfs_key_buf *key, size_t size)
static int pr_block_mapping(char *buf, struct scoutfs_key_buf *key, size_t size)
{
struct scoutfs_file_extent_key *ekey = key->data;
struct scoutfs_block_mapping_key *bmk = key->data;
return snprintf_key(buf, size, key,
sizeof(struct scoutfs_file_extent_key), 0,
"fs.%llu.ext.%llu.%llu.%llu.%x",
be64_to_cpu(ekey->ino),
be64_to_cpu(ekey->last_blk_off),
be64_to_cpu(ekey->last_blkno),
be64_to_cpu(ekey->blocks),
ekey->flags);
sizeof(struct scoutfs_block_mapping_key), 0,
"fs.%llu.bmp.%llu",
be64_to_cpu(bmk->ino),
be64_to_cpu(bmk->base));
}
const static key_printer_t key_printers[SCOUTFS_MAX_ZONE][SCOUTFS_MAX_TYPE] = {
@@ -340,8 +335,8 @@ const static key_printer_t key_printers[SCOUTFS_MAX_ZONE][SCOUTFS_MAX_TYPE] = {
pr_ino_idx,
[SCOUTFS_INODE_INDEX_ZONE][SCOUTFS_INODE_INDEX_DATA_SEQ_TYPE] =
pr_ino_idx,
[SCOUTFS_NODE_ZONE][SCOUTFS_FREE_EXTENT_BLKNO_TYPE] = pr_free_ext,
[SCOUTFS_NODE_ZONE][SCOUTFS_FREE_EXTENT_BLOCKS_TYPE] = pr_free_ext,
[SCOUTFS_NODE_ZONE][SCOUTFS_FREE_BITS_SEGNO_TYPE] = pr_free_bits,
[SCOUTFS_NODE_ZONE][SCOUTFS_FREE_BITS_BLKNO_TYPE] = pr_free_bits,
[SCOUTFS_NODE_ZONE][SCOUTFS_ORPHAN_TYPE] = pr_orphan,
[SCOUTFS_FS_ZONE][SCOUTFS_INODE_TYPE] = pr_inode,
[SCOUTFS_FS_ZONE][SCOUTFS_XATTR_TYPE] = pr_xattr,
@@ -349,7 +344,7 @@ const static key_printer_t key_printers[SCOUTFS_MAX_ZONE][SCOUTFS_MAX_TYPE] = {
[SCOUTFS_FS_ZONE][SCOUTFS_READDIR_TYPE] = pr_readdir,
[SCOUTFS_FS_ZONE][SCOUTFS_LINK_BACKREF_TYPE] = pr_link_backref,
[SCOUTFS_FS_ZONE][SCOUTFS_SYMLINK_TYPE] = pr_symlink,
[SCOUTFS_FS_ZONE][SCOUTFS_FILE_EXTENT_TYPE] = pr_file_ext,
[SCOUTFS_FS_ZONE][SCOUTFS_BLOCK_MAPPING_TYPE] = pr_block_mapping,
};
/*
@@ -382,7 +377,7 @@ int scoutfs_key_str_size(char *buf, struct scoutfs_key_buf *key, size_t size)
struct scoutfs_inode_index_key *ikey = key->data;
type = ikey->type;
} else if (zone == SCOUTFS_NODE_ZONE) {
struct scoutfs_free_extent_blkno_key *fkey = key->data;
struct scoutfs_free_bits_key *fkey = key->data;
type = fkey->type;
} else if (zone == SCOUTFS_FS_ZONE) {
struct scoutfs_inode_key *ikey = key->data;

View File

@@ -392,6 +392,10 @@ static int __init scoutfs_module_init(void)
scoutfs_init_counters();
ret = scoutfs_data_test();
if (ret)
return ret;
scoutfs_kset = kset_create_and_add("scoutfs", NULL, fs_kobj);
if (!scoutfs_kset)
return -ENOMEM;