mirror of
https://github.com/versity/scoutfs.git
synced 2026-01-06 20:16:25 +00:00
Introduce chunk and segment terminology
The use of 'log' for all the large sizes was pretty confusing. Let's use 'chunk' to describe the large alloc size. Other things live in them as well as logs. Then use 'log segment' to describe the larger log structure stored in a chunk that's made up of all the little blocks.
This commit is contained in:
@@ -7,24 +7,24 @@
|
||||
#define SCOUTFS_SUPER_ID 0x2e736674756f6373ULL /* "scoutfs." */
|
||||
|
||||
/*
|
||||
* Structures are stored and referenced in fixed 4k chunks to
|
||||
* simplify block buffer access at run time.
|
||||
* Everything is stored in and addressed as 4k fixed size blocks. This
|
||||
* avoids having to manage contiguous cpu mappings of larger blocks.
|
||||
* Larger structures are read and written as multiple blocks.
|
||||
*/
|
||||
#define SCOUTFS_BLOCK_SHIFT 12
|
||||
#define SCOUTFS_BLOCK_SIZE (1 << SCOUTFS_BLOCK_SHIFT)
|
||||
|
||||
/*
|
||||
* Logs are a logical structure that is made up of a fixed number of
|
||||
* contiguously allocated blocks.
|
||||
* The allocator works on larger chunks. Smaller metadata structures
|
||||
* like the super blocks and the ring are stored in chunks.
|
||||
*
|
||||
* The allocator manages log-sized regions. Smaller metadata blocks
|
||||
* like the ring and super blocks are stored inside large log
|
||||
* allocations.
|
||||
* A log segment is a collection of smaller blocks (bloom filter, item blocks)
|
||||
* stored in a chunk.
|
||||
*/
|
||||
#define SCOUTFS_LOG_SHIFT 22
|
||||
#define SCOUTFS_LOG_SIZE (1 << SCOUTFS_LOG_SHIFT)
|
||||
#define SCOUTFS_LOG_BLOCK_SHIFT (SCOUTFS_LOG_SHIFT - SCOUTFS_BLOCK_SHIFT)
|
||||
#define SCOUTFS_BLOCKS_PER_LOG (1 << SCOUTFS_LOG_BLOCK_SHIFT)
|
||||
#define SCOUTFS_CHUNK_SHIFT 22
|
||||
#define SCOUTFS_CHUNK_SIZE (1 << SCOUTFS_CHUNK_SHIFT)
|
||||
#define SCOUTFS_CHUNK_BLOCK_SHIFT (SCOUTFS_CHUNK_SHIFT - SCOUTFS_BLOCK_SHIFT)
|
||||
#define SCOUTFS_BLOCKS_PER_CHUNK (1 << SCOUTFS_CHUNK_BLOCK_SHIFT)
|
||||
|
||||
/*
|
||||
* The super blocks leave some room at the start of the first block for
|
||||
@@ -50,22 +50,25 @@ struct scoutfs_block_header {
|
||||
#define SCOUTFS_UUID_BYTES 16
|
||||
|
||||
/*
|
||||
* The super is stored in a pair of blocks in log 0 on the device.
|
||||
* The super is stored in a pair of blocks in the first chunk on the
|
||||
* device.
|
||||
*
|
||||
* The ring layout blocks describe the location of the ring blocks. The
|
||||
* ring start and length refers to the logical ring blocks within that
|
||||
* storage which contain live data.
|
||||
* The ring map blocks describe the chunks that make up the ring.
|
||||
*
|
||||
* The rest of the ring fields describe the state of the ring blocks
|
||||
* that are stored in their chunks. The active portion of the ring
|
||||
* describes the current state of the system and is replayed on mount.
|
||||
*/
|
||||
struct scoutfs_super_block {
|
||||
struct scoutfs_block_header hdr;
|
||||
__le64 id;
|
||||
__u8 uuid[SCOUTFS_UUID_BYTES];
|
||||
__le64 total_logs;
|
||||
__le64 ring_layout_blkno;
|
||||
__le64 ring_layout_nr_blocks;
|
||||
__le64 ring_layout_seq;
|
||||
__le64 ring_block;
|
||||
__le64 ring_nr_blocks;
|
||||
__le64 total_chunks;
|
||||
__le64 ring_map_blkno;
|
||||
__le64 ring_map_seq;
|
||||
__le64 ring_first_block;
|
||||
__le64 ring_active_blocks;
|
||||
__le64 ring_total_blocks;
|
||||
__le64 ring_seq;
|
||||
} __packed;
|
||||
|
||||
@@ -84,9 +87,9 @@ struct scoutfs_key {
|
||||
#define SCOUTFS_INODE_KEY 128
|
||||
#define SCOUTFS_DIRENT_KEY 192
|
||||
|
||||
struct scoutfs_layout_block {
|
||||
struct scoutfs_ring_map_block {
|
||||
struct scoutfs_block_header hdr;
|
||||
__le32 nr_blocks;
|
||||
__le32 nr_chunks;
|
||||
__le64 blknos[0];
|
||||
} __packed;
|
||||
|
||||
@@ -96,13 +99,12 @@ struct scoutfs_ring_entry {
|
||||
} __packed;
|
||||
|
||||
/*
|
||||
* Ring blocks are 4k blocks stored inside the regions described by the
|
||||
* ring layout block referenced by the super.
|
||||
* Ring blocks are stored in chunks described by the ring map blocks.
|
||||
*
|
||||
* The manifest entries describe the position of a given block in the
|
||||
* manifest. They're keyed by the block number so that we can log
|
||||
* movement of a block in the manifest with one log entry and we can log
|
||||
* deletion with just the block number.
|
||||
* The manifest entries describe the position of a given log segment in
|
||||
* the manifest. They're keyed by the block number so that we can
|
||||
* record movement of a log segment in the manifest with one ring entry
|
||||
* and we can record deletion with just the block number.
|
||||
*/
|
||||
struct scoutfs_ring_block {
|
||||
struct scoutfs_block_header hdr;
|
||||
@@ -115,11 +117,6 @@ enum {
|
||||
SCOUTFS_RING_BITMAP,
|
||||
};
|
||||
|
||||
/*
|
||||
* Manifest entries are logged by their block number. This lets us log
|
||||
* a change with one entry and a removal with a tiny block number
|
||||
* without the key.
|
||||
*/
|
||||
struct scoutfs_ring_remove_manifest {
|
||||
__le64 blkno;
|
||||
} __packed;
|
||||
@@ -145,9 +142,9 @@ struct scoutfs_ring_bitmap {
|
||||
} __packed;
|
||||
|
||||
/*
|
||||
* To start the logs are a trivial single item block. We'll flesh this out
|
||||
* into larger blocks once the rest of the architecture is in
|
||||
* place.
|
||||
* To start the log segments are a trivial single item block. We'll
|
||||
* flesh this out into larger blocks once the rest of the architecture
|
||||
* is in place.
|
||||
*/
|
||||
struct scoutfs_item_block {
|
||||
struct scoutfs_block_header hdr;
|
||||
|
||||
@@ -42,7 +42,7 @@ static int write_new_fs(char *path, int fd)
|
||||
struct scoutfs_super_block *super;
|
||||
struct scoutfs_block_header hdr;
|
||||
struct scoutfs_inode *inode;
|
||||
struct scoutfs_layout_block *lout;
|
||||
struct scoutfs_ring_map_block *map;
|
||||
struct scoutfs_ring_block *ring;
|
||||
struct scoutfs_ring_entry *ent;
|
||||
struct scoutfs_ring_add_manifest *mani;
|
||||
@@ -54,7 +54,7 @@ static int write_new_fs(char *path, int fd)
|
||||
char uuid_str[37];
|
||||
struct stat st;
|
||||
unsigned int i;
|
||||
u64 total_logs;
|
||||
u64 total_chunks;
|
||||
u64 blkno;
|
||||
void *buf;
|
||||
int ret;
|
||||
@@ -80,14 +80,14 @@ static int write_new_fs(char *path, int fd)
|
||||
goto out;
|
||||
}
|
||||
|
||||
total_logs = st.st_size >> SCOUTFS_LOG_SHIFT;
|
||||
total_chunks = st.st_size >> SCOUTFS_CHUNK_SHIFT;
|
||||
|
||||
root_key.inode = cpu_to_le64(SCOUTFS_ROOT_INO);
|
||||
root_key.type = SCOUTFS_INODE_KEY;
|
||||
root_key.offset = 0;
|
||||
|
||||
/* super in log 0, first fs log block in log 1 */
|
||||
blkno = 1 << SCOUTFS_LOG_BLOCK_SHIFT;
|
||||
blkno = 1 << SCOUTFS_CHUNK_BLOCK_SHIFT;
|
||||
|
||||
/* write a single log block with the root inode item */
|
||||
memset(buf, 0, SCOUTFS_BLOCK_SIZE);
|
||||
@@ -136,20 +136,20 @@ static int write_new_fs(char *path, int fd)
|
||||
bm->bits[0] = cpu_to_le64(~7ULL);
|
||||
bm->bits[1] = cpu_to_le64(~0ULL);
|
||||
|
||||
blkno += SCOUTFS_BLOCKS_PER_LOG;
|
||||
blkno += SCOUTFS_BLOCKS_PER_CHUNK;
|
||||
ret = write_block(fd, blkno, &ring->hdr);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
/* the ring has a single block for now */
|
||||
/* the ring has a single chunk for now */
|
||||
memset(buf, 0, SCOUTFS_BLOCK_SIZE);
|
||||
lout = buf;
|
||||
lout->hdr = hdr;
|
||||
lout->nr_blocks = cpu_to_le32(1);
|
||||
lout->blknos[0] = cpu_to_le64(blkno);
|
||||
map = buf;
|
||||
map->hdr = hdr;
|
||||
map->nr_chunks = cpu_to_le32(1);
|
||||
map->blknos[0] = cpu_to_le64(blkno);
|
||||
|
||||
blkno += SCOUTFS_BLOCKS_PER_LOG;
|
||||
ret = write_block(fd, blkno, &lout->hdr);
|
||||
blkno += SCOUTFS_BLOCKS_PER_CHUNK;
|
||||
ret = write_block(fd, blkno, &map->hdr);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
@@ -159,12 +159,12 @@ static int write_new_fs(char *path, int fd)
|
||||
super->hdr = hdr;
|
||||
super->id = cpu_to_le64(SCOUTFS_SUPER_ID);
|
||||
uuid_generate(super->uuid);
|
||||
super->total_logs = cpu_to_le64(total_logs);
|
||||
super->ring_layout_blkno = cpu_to_le64(blkno);
|
||||
super->ring_layout_nr_blocks = cpu_to_le64(1);
|
||||
super->ring_layout_seq = hdr.seq;
|
||||
super->ring_block = cpu_to_le64(0);
|
||||
super->ring_nr_blocks = cpu_to_le64(1);
|
||||
super->total_chunks = cpu_to_le64(total_chunks);
|
||||
super->ring_map_blkno = cpu_to_le64(blkno);
|
||||
super->ring_map_seq = hdr.seq;
|
||||
super->ring_first_block = cpu_to_le64(0);
|
||||
super->ring_active_blocks = cpu_to_le64(1);
|
||||
super->ring_total_blocks = cpu_to_le64(SCOUTFS_BLOCKS_PER_CHUNK);
|
||||
super->ring_seq = hdr.seq;
|
||||
|
||||
for (i = 0; i < SCOUTFS_SUPER_NR; i++) {
|
||||
@@ -184,10 +184,12 @@ static int write_new_fs(char *path, int fd)
|
||||
uuid_unparse(super->uuid, uuid_str);
|
||||
|
||||
printf("Created scoutfs filesystem:\n"
|
||||
" total logs: %llu\n"
|
||||
" chunk bytes: %u\n"
|
||||
" total chunks: %llu\n"
|
||||
" fsid: %llx\n"
|
||||
" uuid: %s\n",
|
||||
total_logs, le64_to_cpu(super->hdr.fsid), uuid_str);
|
||||
SCOUTFS_CHUNK_SIZE, total_chunks,
|
||||
le64_to_cpu(super->hdr.fsid), uuid_str);
|
||||
|
||||
ret = 0;
|
||||
out:
|
||||
|
||||
@@ -134,16 +134,16 @@ static int print_item_block(int fd, u64 nr)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int print_log_blocks(int fd, __le64 *live_logs, u64 total_logs)
|
||||
static int print_log_segments(int fd, __le64 *log_segs, u64 total_chunks)
|
||||
{
|
||||
int ret = 0;
|
||||
int err;
|
||||
s64 nr;
|
||||
|
||||
while ((nr = find_first_le_bit(live_logs, total_logs)) >= 0) {
|
||||
clear_le_bit(live_logs, nr);
|
||||
while ((nr = find_first_le_bit(log_segs, total_chunks)) >= 0) {
|
||||
clear_le_bit(log_segs, nr);
|
||||
|
||||
err = print_item_block(fd, nr << SCOUTFS_LOG_BLOCK_SHIFT);
|
||||
err = print_item_block(fd, nr << SCOUTFS_CHUNK_BLOCK_SHIFT);
|
||||
if (!ret && err)
|
||||
ret = err;
|
||||
}
|
||||
@@ -202,8 +202,8 @@ static void print_ring_entry(int fd, struct scoutfs_ring_entry *ent)
|
||||
}
|
||||
}
|
||||
|
||||
static void update_live_logs(struct scoutfs_ring_entry *ent,
|
||||
__le64 *live_logs)
|
||||
static void update_log_segs(struct scoutfs_ring_entry *ent,
|
||||
__le64 *log_segs)
|
||||
{
|
||||
struct scoutfs_ring_remove_manifest *rem;
|
||||
struct scoutfs_ring_add_manifest *add;
|
||||
@@ -212,18 +212,18 @@ static void update_live_logs(struct scoutfs_ring_entry *ent,
|
||||
switch(ent->type) {
|
||||
case SCOUTFS_RING_REMOVE_MANIFEST:
|
||||
rem = (void *)(ent + 1);
|
||||
bit = le64_to_cpu(rem->blkno) >> SCOUTFS_LOG_BLOCK_SHIFT;
|
||||
clear_le_bit(live_logs, bit);
|
||||
bit = le64_to_cpu(rem->blkno) >> SCOUTFS_CHUNK_BLOCK_SHIFT;
|
||||
clear_le_bit(log_segs, bit);
|
||||
break;
|
||||
case SCOUTFS_RING_ADD_MANIFEST:
|
||||
add = (void *)(ent + 1);
|
||||
bit = le64_to_cpu(add->blkno) >> SCOUTFS_LOG_BLOCK_SHIFT;
|
||||
set_le_bit(live_logs, bit);
|
||||
bit = le64_to_cpu(add->blkno) >> SCOUTFS_CHUNK_BLOCK_SHIFT;
|
||||
set_le_bit(log_segs, bit);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static int print_ring_block(int fd, u64 blkno, __le64 *live_logs)
|
||||
static int print_ring_block(int fd, u64 blkno, __le64 *log_segs)
|
||||
{
|
||||
struct scoutfs_ring_block *ring;
|
||||
struct scoutfs_ring_entry *ent;
|
||||
@@ -245,7 +245,7 @@ static int print_ring_block(int fd, u64 blkno, __le64 *live_logs)
|
||||
for (i = 0; i < le16_to_cpu(ring->nr_entries); i++) {
|
||||
ent = (void *)((char *)ring + off);
|
||||
|
||||
update_live_logs(ent, live_logs);
|
||||
update_log_segs(ent, log_segs);
|
||||
print_ring_entry(fd, ent);
|
||||
|
||||
off += sizeof(struct scoutfs_ring_entry) +
|
||||
@@ -256,33 +256,33 @@ static int print_ring_block(int fd, u64 blkno, __le64 *live_logs)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int print_layout_block(int fd, u64 blkno, __le64 *live_logs)
|
||||
static int print_map_block(int fd, u64 blkno, __le64 *log_segs)
|
||||
{
|
||||
struct scoutfs_layout_block *lout;
|
||||
struct scoutfs_ring_map_block *map;
|
||||
int ret = 0;
|
||||
int err;
|
||||
int i;
|
||||
|
||||
lout = read_block(fd, blkno);
|
||||
if (!lout)
|
||||
map = read_block(fd, blkno);
|
||||
if (!map)
|
||||
return -ENOMEM;
|
||||
|
||||
printf("layout block:\n");
|
||||
print_block_header(&lout->hdr);
|
||||
printf(" nr_blocks: %u\n", le32_to_cpu(lout->nr_blocks));
|
||||
printf("map block:\n");
|
||||
print_block_header(&map->hdr);
|
||||
printf(" nr_chunks: %u\n", le32_to_cpu(map->nr_chunks));
|
||||
|
||||
printf(" blknos: ");
|
||||
for (i = 0; i < le32_to_cpu(lout->nr_blocks); i++)
|
||||
printf(" %llu\n", le64_to_cpu(lout->blknos[i]));
|
||||
for (i = 0; i < le32_to_cpu(map->nr_chunks); i++)
|
||||
printf(" %llu\n", le64_to_cpu(map->blknos[i]));
|
||||
|
||||
for (i = 0; i < le32_to_cpu(lout->nr_blocks); i++) {
|
||||
err = print_ring_block(fd, le64_to_cpu(lout->blknos[i]),
|
||||
live_logs);
|
||||
for (i = 0; i < le32_to_cpu(map->nr_chunks); i++) {
|
||||
err = print_ring_block(fd, le64_to_cpu(map->blknos[i]),
|
||||
log_segs);
|
||||
if (err && !ret)
|
||||
ret = err;
|
||||
}
|
||||
|
||||
free(lout);
|
||||
free(map);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -290,8 +290,8 @@ static int print_super_brick(int fd)
|
||||
{
|
||||
struct scoutfs_super_block *super;
|
||||
char uuid_str[37];
|
||||
__le64 *live_logs;
|
||||
u64 total_logs;
|
||||
__le64 *log_segs;
|
||||
u64 total_chunks;
|
||||
size_t bytes;
|
||||
int ret = 0;
|
||||
int err;
|
||||
@@ -303,50 +303,49 @@ static int print_super_brick(int fd)
|
||||
|
||||
uuid_unparse(super->uuid, uuid_str);
|
||||
|
||||
total_logs = le64_to_cpu(super->total_logs);
|
||||
total_chunks = le64_to_cpu(super->total_chunks);
|
||||
|
||||
printf("super:\n");
|
||||
print_block_header(&super->hdr);
|
||||
printf(" id: %llx\n"
|
||||
" uuid: %s\n"
|
||||
" total_logs: %llu\n"
|
||||
" ring_layout_blkno: %llu\n"
|
||||
" ring_layout_nr_blocks: %llu\n"
|
||||
" ring_layout_seq: %llu\n"
|
||||
" ring_block: %llu\n"
|
||||
" ring_seq: %llu\n"
|
||||
" ring_nr_blocks: %llu\n",
|
||||
" total_chunks: %llu\n"
|
||||
" ring_map_blkno: %llu\n"
|
||||
" ring_map_seq: %llu\n"
|
||||
" ring_first_block: %llu\n"
|
||||
" ring_active_blocks: %llu\n"
|
||||
" ring_total_blocks: %llu\n"
|
||||
" ring_seq: %llu\n",
|
||||
le64_to_cpu(super->id),
|
||||
uuid_str,
|
||||
total_logs,
|
||||
le64_to_cpu(super->ring_layout_blkno),
|
||||
le64_to_cpu(super->ring_layout_nr_blocks),
|
||||
le64_to_cpu(super->ring_layout_seq),
|
||||
le64_to_cpu(super->ring_block),
|
||||
le64_to_cpu(super->ring_nr_blocks),
|
||||
total_chunks,
|
||||
le64_to_cpu(super->ring_map_blkno),
|
||||
le64_to_cpu(super->ring_map_seq),
|
||||
le64_to_cpu(super->ring_first_block),
|
||||
le64_to_cpu(super->ring_active_blocks),
|
||||
le64_to_cpu(super->ring_total_blocks),
|
||||
le64_to_cpu(super->ring_seq));
|
||||
|
||||
/* XXX by hand? */
|
||||
bytes = (total_logs + 63) / 8;
|
||||
live_logs = malloc(bytes);
|
||||
if (!live_logs) {
|
||||
bytes = (total_chunks + 63) / 8;
|
||||
log_segs = malloc(bytes);
|
||||
if (!log_segs) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
memset(live_logs, 0, bytes);
|
||||
memset(log_segs, 0, bytes);
|
||||
|
||||
err = print_layout_block(fd, le64_to_cpu(super->ring_layout_blkno),
|
||||
live_logs);
|
||||
err = print_map_block(fd, le64_to_cpu(super->ring_map_blkno), log_segs);
|
||||
if (err && !ret)
|
||||
ret = err;
|
||||
|
||||
err = print_log_blocks(fd, live_logs, total_logs);
|
||||
err = print_log_segments(fd, log_segs, total_chunks);
|
||||
if (err && !ret)
|
||||
ret = err;
|
||||
|
||||
out:
|
||||
free(super);
|
||||
free(live_logs);
|
||||
free(log_segs);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user