diff --git a/utils/src/format.h b/utils/src/format.h index dd991c2d..1f994eec 100644 --- a/utils/src/format.h +++ b/utils/src/format.h @@ -112,19 +112,29 @@ struct scoutfs_alloc_region { } __packed; /* - * We really want these to be a power of two size so that they're naturally - * aligned. This ensures that they won't cross page boundaries and we - * can use pointers to them in the page vecs that make up segments without - * funny business. + * The max number of links defines the max number of entries that we can + * index in o(log n) and the static list head storage size in the + * segment block. We always pay the static storage cost, which is tiny, + * and we can look at the number of items to know the greatest number of + * links and skip most of the initial 0 links. + */ +#define SCOUTFS_MAX_SKIP_LINKS 32 + +/* + * Items are packed into segments and linked together in a skip list. + * Each item's header, links, key, and value are stored contiguously. + * They're not allowed to cross a block boundary. */ struct scoutfs_segment_item { - __le64 seq; - __le32 key_off; - __le32 val_off; __le16 key_len; __le16 val_len; - __u8 padding[11]; __u8 flags; + __u8 nr_links; + __le32 skip_links[0]; + /* + * u8 key_bytes[key_len] + * u8 val_bytes[val_len] + */ } __packed; #define SCOUTFS_ITEM_FLAG_DELETION (1 << 0) @@ -138,11 +148,11 @@ struct scoutfs_segment_block { __le32 _padding; __le64 segno; __le64 seq; + __le32 last_item_off; + __le32 total_bytes; __le32 nr_items; - __le32 _moar_pads; - struct scoutfs_segment_item items[0]; - /* packed keys */ - /* packed vals */ + __le32 skip_links[SCOUTFS_MAX_SKIP_LINKS]; + /* packed items */ } __packed; /* @@ -160,7 +170,7 @@ struct scoutfs_segment_block { #define SCOUTFS_ORPHAN_KEY 10 #define SCOUTFS_FREE_EXTENT_BLKNO_KEY 11 #define SCOUTFS_FREE_EXTENT_BLOCKS_KEY 12 -#define SCOUTFS_INODE_INDEX_CTIME_KEY 13 +#define SCOUTFS_INODE_INDEX_CTIME_KEY 13 /* don't forget first and last */ #define SCOUTFS_INODE_INDEX_MTIME_KEY 14 #define SCOUTFS_INODE_INDEX_SIZE_KEY 15 #define SCOUTFS_INODE_INDEX_META_SEQ_KEY 16 @@ -170,6 +180,11 @@ struct scoutfs_segment_block { #define SCOUTFS_NET_ADDR_KEY 254 #define SCOUTFS_NET_LISTEN_KEY 255 +#define SCOUTFS_INODE_INDEX_FIRST SCOUTFS_INODE_INDEX_CTIME_KEY +#define SCOUTFS_INODE_INDEX_LAST SCOUTFS_INODE_INDEX_DATA_SEQ_KEY +#define SCOUTFS_INODE_INDEX_NR \ + (SCOUTFS_INODE_INDEX_LAST - SCOUTFS_INODE_INDEX_FIRST + 1) + /* value is struct scoutfs_inode */ struct scoutfs_inode_key { __u8 type; @@ -388,6 +403,10 @@ enum { #define SCOUTFS_MAX_KEY_SIZE \ offsetof(struct scoutfs_link_backref_key, name[SCOUTFS_NAME_LEN + 1]) +/* largest single val are dirents, larger broken up into units of this */ +#define SCOUTFS_MAX_VAL_SIZE \ + offsetof(struct scoutfs_dirent, name[SCOUTFS_NAME_LEN]) + /* * messages over the wire. */ @@ -433,11 +452,24 @@ struct scoutfs_net_manifest_entries { struct scoutfs_manifest_entry ments[0]; } __packed; +/* XXX I dunno, totally made up */ +#define SCOUTFS_BULK_ALLOC_COUNT 32 + struct scoutfs_net_segnos { __le16 nr; __le64 segnos[0]; } __packed; +/* XXX eventually we'll have net compaction and will need agents to agree */ + +/* one upper segment and fanout lower segments */ +#define SCOUTFS_COMPACTION_MAX_INPUT (1 + SCOUTFS_MANIFEST_FANOUT) +/* sticky can add one, and so can item page alignment */ +#define SCOUTFS_COMPACTION_SLOP 2 +/* delete all inputs and insert all outputs (same goes for alloc|free segnos) */ +#define SCOUTFS_COMPACTION_MAX_UPDATE \ + (2 * (SCOUTFS_COMPACTION_MAX_INPUT + SCOUTFS_COMPACTION_SLOP)) + enum { SCOUTFS_NET_ALLOC_INODES = 0, SCOUTFS_NET_MANIFEST_RANGE_ENTRIES, diff --git a/utils/src/mkfs.c b/utils/src/mkfs.c index 31d719ae..07723de2 100644 --- a/utils/src/mkfs.c +++ b/utils/src/mkfs.c @@ -84,6 +84,7 @@ static int write_new_fs(char *path, int fd) struct scoutfs_ring_block *rblk; struct scoutfs_ring_entry *rent; struct scoutfs_segment_item *item; + __le32 *prev_link; struct timeval tv; char uuid_str[37]; u64 blkno; @@ -214,17 +215,15 @@ static int write_new_fs(char *path, int fd) sblk->segno = cpu_to_le64(first_segno); sblk->seq = cpu_to_le64(1); sblk->nr_items = cpu_to_le32(5); + prev_link = &sblk->skip_links[0]; - item = &sblk->items[0]; - ikey = (void *)&sblk->items[5]; - inode = (void *)(ikey + 1) + - (4 * sizeof(struct scoutfs_inode_index_key)); + item = (void *)(sblk + 1); + ikey = (void *)&item->skip_links[1]; + inode = (void *)ikey + sizeof(struct scoutfs_inode_key); - item->seq = cpu_to_le64(1); - item->key_off = cpu_to_le32((long)ikey - (long)sblk); - item->val_off = cpu_to_le32((long)inode - (long)sblk); item->key_len = cpu_to_le16(sizeof(struct scoutfs_inode_key)); item->val_len = cpu_to_le16(sizeof(struct scoutfs_inode)); + item->nr_links = 1; ikey->type = SCOUTFS_INODE_KEY; ikey->ino = cpu_to_be64(SCOUTFS_ROOT_INO); @@ -239,18 +238,19 @@ static int write_new_fs(char *path, int fd) inode->mtime.sec = inode->atime.sec; inode->mtime.nsec = inode->atime.nsec; - item = (void *)(item + 1); - idx_key = (void *)(ikey + 1); + *prev_link = cpu_to_le32((long)item -(long)sblk); + prev_link = &item->skip_links[0]; + + item = (void *)inode + sizeof(struct scoutfs_inode); + idx_key = (void *)&item->skip_links[1]; /* write the root inode index keys */ for (i = SCOUTFS_INODE_INDEX_CTIME_KEY; i <= SCOUTFS_INODE_INDEX_META_SEQ_KEY; i++) { - item->seq = cpu_to_le64(1); - item->key_off = cpu_to_le32((long)idx_key - (long)sblk); - item->val_off = 0; item->key_len = cpu_to_le16(sizeof(*idx_key)); item->val_len = 0; + item->nr_links = 1; idx_key->type = i; idx_key->ino = cpu_to_be64(SCOUTFS_ROOT_INO); @@ -267,10 +267,17 @@ static int write_new_fs(char *path, int fd) break; } - item = (void *)(item + 1); - idx_key = (void *)(idx_key + 1); + *prev_link = cpu_to_le32((long)item -(long)sblk); + prev_link = &item->skip_links[0]; + + sblk->last_item_off = cpu_to_le32((long)item - (long)sblk); + + item = (void *)(idx_key + 1); + idx_key = (void *)&item->skip_links[1]; } + sblk->total_bytes = cpu_to_le32((long)item - (long)sblk); + ret = pwrite(fd, sblk, SCOUTFS_SEGMENT_SIZE, first_segno << SCOUTFS_SEGMENT_SHIFT); if (ret != SCOUTFS_SEGMENT_SIZE) { diff --git a/utils/src/print.c b/utils/src/print.c index 7f2b5df6..72eb7f15 100644 --- a/utils/src/print.c +++ b/utils/src/print.c @@ -257,45 +257,29 @@ static print_func_t printers[] = { [SCOUTFS_INODE_INDEX_DATA_SEQ_KEY] = print_inode_index, }; -/* utils uses big contiguous allocations */ -static void *off_ptr(struct scoutfs_segment_block *sblk, u32 off) -{ - return (char *)sblk + off; -} - -static u32 pos_off(struct scoutfs_segment_block *sblk, u32 pos) -{ - return offsetof(struct scoutfs_segment_block, items[pos]); -} - -static void *pos_ptr(struct scoutfs_segment_block *sblk, u32 pos) -{ - return off_ptr(sblk, pos_off(sblk, pos)); -} - -static void print_item(struct scoutfs_segment_block *sblk, u32 pos) +static void print_item(struct scoutfs_segment_block *sblk, + struct scoutfs_segment_item *item, u32 which, u32 off) { print_func_t printer; - struct scoutfs_segment_item *item; void *key; void *val; __u8 type; + int i; - item = pos_ptr(sblk, pos); - - key = (char *)sblk + le32_to_cpu(item->key_off); - val = (char *)sblk + le32_to_cpu(item->val_off); + key = (char *)&item->skip_links[item->nr_links]; + val = (char *)key + le16_to_cpu(item->key_len); type = *(__u8 *)key; printer = type < array_size(printers) ? printers[type] : NULL; - printf(" [%u]: type %u seq %llu key_off %u val_off %u key_len %u " - "val_len %u flags %x%s\n", - pos, type, le64_to_cpu(item->seq), le32_to_cpu(item->key_off), - le32_to_cpu(item->val_off), le16_to_cpu(item->key_len), - le16_to_cpu(item->val_len), item->flags, - printer ? "" : " (unrecognized type)"); - printf(" key: "); + printf(" [%u]: type %u off %u key_len %u val_len %u nr_links %u flags %x%s\n", + which, type, off, le16_to_cpu(item->key_len), + le16_to_cpu(item->val_len), item->nr_links, + item->flags, printer ? "" : " (unrecognized type)"); + printf(" links:"); + for (i = 0; i < item->nr_links; i++) + printf(" %u", le32_to_cpu(item->skip_links[i])); + printf("\n key: "); print_key(key, le16_to_cpu(item->key_len)); printf("\n"); @@ -306,14 +290,24 @@ static void print_item(struct scoutfs_segment_block *sblk, u32 pos) static void print_segment_block(struct scoutfs_segment_block *sblk) { - printf(" sblk: segno %llu seq %llu nr_items %u\n", + int i; + + printf(" sblk: segno %llu seq %llu last_item_off %u total_bytes %u " + "nr_items %u\n", le64_to_cpu(sblk->segno), le64_to_cpu(sblk->seq), + le32_to_cpu(sblk->last_item_off), le32_to_cpu(sblk->total_bytes), le32_to_cpu(sblk->nr_items)); + printf(" links:"); + for (i = 0; sblk->skip_links[i]; i++) + printf(" %u", le32_to_cpu(sblk->skip_links[i])); + printf("\n"); } static int print_segments(int fd, unsigned long *seg_map, u64 total) { struct scoutfs_segment_block *sblk; + struct scoutfs_segment_item *item; + u32 off; u64 s; u64 i; @@ -325,8 +319,12 @@ static int print_segments(int fd, unsigned long *seg_map, u64 total) printf("segment segno %llu\n", s); print_segment_block(sblk); - for (i = 0; i < le32_to_cpu(sblk->nr_items); i++) - print_item(sblk, i); + off = le32_to_cpu(sblk->skip_links[0]); + for (i = 0; i < le32_to_cpu(sblk->nr_items); i++) { + item = (void *)sblk + off; + print_item(sblk, item, i, off); + off = le32_to_cpu(item->skip_links[0]); + } free(sblk); }