#ifndef _SCOUTFS_FORMAT_H_ #define _SCOUTFS_FORMAT_H_ /* statfs(2) f_type */ #define SCOUTFS_SUPER_MAGIC 0x554f4353 /* "SCOU" */ /* super block id */ #define SCOUTFS_SUPER_ID 0x2e736674756f6373ULL /* "scoutfs." */ #define SCOUTFS_BLOCK_SHIFT 12 #define SCOUTFS_BLOCK_SIZE (1 << SCOUTFS_BLOCK_SHIFT) #define SCOUTFS_BLOCK_MASK (SCOUTFS_BLOCK_SIZE - 1) #define SCOUTFS_PAGES_PER_BLOCK (SCOUTFS_BLOCK_SIZE / PAGE_SIZE) #define SCOUTFS_BLOCK_PAGE_ORDER (SCOUTFS_BLOCK_SHIFT - PAGE_SHIFT) /* * The super blocks leave some room at the start of the first block for * platform structures like boot loaders. */ #define SCOUTFS_SUPER_BLKNO ((64 * 1024) >> SCOUTFS_BLOCK_SHIFT) #define SCOUTFS_SUPER_NR 2 #define SCOUTFS_BUDDY_BM_BLKNO (SCOUTFS_SUPER_BLKNO + SCOUTFS_SUPER_NR) #define SCOUTFS_BUDDY_BM_NR 2 /* * This header is found at the start of every block so that we can * verify that it's what we were looking for. The crc and padding * starts the block so that its calculation operations on a nice 64bit * aligned region. */ struct scoutfs_block_header { __le32 crc; __le32 _pad; __le64 fsid; __le64 seq; __le64 blkno; } __packed; /* * Block references include the sequence number so that we can detect * readers racing with writers and so that we can tell that we don't * need to follow a reference when traversing based on seqs. */ struct scoutfs_block_ref { __le64 blkno; __le64 seq; } __packed; struct scoutfs_bitmap_block { struct scoutfs_block_header hdr; __le64 bits[0]; } __packed; /* * Track allocations from BLOCK_SIZE to (BLOCK_SIZE << ..._ORDERS). */ #define SCOUTFS_BUDDY_ORDERS 8 struct scoutfs_buddy_block { struct scoutfs_block_header hdr; __le32 order_counts[SCOUTFS_BUDDY_ORDERS]; __le64 bits[0]; } __packed; /* * If we had log2(raw bits) orders we'd fully use all of the raw bits in * the block. We're close enough that the amount of space wasted at the * end (~1/256th of the block, ~64 bytes) isn't worth worrying about. */ #define SCOUTFS_BUDDY_ORDER0_BITS \ (((SCOUTFS_BLOCK_SIZE - sizeof(struct scoutfs_buddy_block)) * 8) / 2) struct scoutfs_buddy_indirect { struct scoutfs_block_header hdr; __le64 order_totals[SCOUTFS_BUDDY_ORDERS]; struct scoutfs_buddy_slot { __u8 free_orders; struct scoutfs_block_ref ref; } slots[0]; } __packed; #define SCOUTFS_BUDDY_SLOTS \ ((SCOUTFS_BLOCK_SIZE - sizeof(struct scoutfs_buddy_indirect)) / \ sizeof(struct scoutfs_buddy_slot)) /* * We should be able to make the offset smaller if neither dirents nor * data items use the full 64 bits. */ struct scoutfs_key { __le64 inode; u8 type; __le64 offset; } __packed; /* * Currently we sort keys by the numeric value of the types, but that * isn't necessary. We could have an arbitrary sort order. So we don't * have to stress about cleverly allocating the types. */ #define SCOUTFS_INODE_KEY 1 #define SCOUTFS_XATTR_KEY 2 #define SCOUTFS_XATTR_NAME_HASH_KEY 3 #define SCOUTFS_XATTR_VAL_HASH_KEY 4 #define SCOUTFS_DIRENT_KEY 5 #define SCOUTFS_LINK_BACKREF_KEY 6 #define SCOUTFS_SYMLINK_KEY 7 #define SCOUTFS_BMAP_KEY 8 #define SCOUTFS_MAX_ITEM_LEN 512 struct scoutfs_btree_root { u8 height; struct scoutfs_block_ref ref; } __packed; /* * @free_end: records the byte offset of the first byte after the free * space in the block between the header and the first item. New items * are allocated by subtracting the space they need. * * @free_reclaim: records the number of bytes of free space amongst the * items after free_end. If a block is compacted then this much new * free space would be reclaimed. */ struct scoutfs_btree_block { struct scoutfs_block_header hdr; __le16 free_end; __le16 free_reclaim; __u8 nr_items; __le16 item_offs[0]; } __packed; /* * The item sequence number is set to the dirty block's sequence number * when the item is modified. It is not changed by splits or merges. */ struct scoutfs_btree_item { struct scoutfs_key key; __le64 seq; __le16 val_len; char val[0]; } __packed; /* Blocks are no more than half free. */ #define SCOUTFS_BTREE_FREE_LIMIT \ ((SCOUTFS_BLOCK_SIZE - sizeof(struct scoutfs_btree_block)) / 2) #define SCOUTFS_UUID_BYTES 16 struct scoutfs_super_block { struct scoutfs_block_header hdr; __le64 id; __u8 uuid[SCOUTFS_UUID_BYTES]; __le64 next_ino; __le64 total_blocks; __le32 buddy_blocks; struct scoutfs_btree_root btree_root; struct scoutfs_block_ref buddy_ind_ref; struct scoutfs_block_ref buddy_bm_ref; } __packed; #define SCOUTFS_ROOT_INO 1 struct scoutfs_timespec { __le64 sec; __le32 nsec; } __packed; /* * XXX * - otime? * - compat flags? * - version? * - generation? * - be more careful with rdev? */ struct scoutfs_inode { __le64 size; __le64 blocks; __le64 link_counter; __le32 nlink; __le32 uid; __le32 gid; __le32 mode; __le32 rdev; __le32 salt; struct scoutfs_timespec atime; struct scoutfs_timespec ctime; struct scoutfs_timespec mtime; } __packed; #define SCOUTFS_ROOT_INO 1 /* like the block size, a reasonable min PATH_MAX across platforms */ #define SCOUTFS_SYMLINK_MAX_SIZE 4096 /* * Dirents are stored in items with an offset of the hash of their name. * Colliding names are packed into the value. */ struct scoutfs_dirent { __le64 ino; __le64 counter; __u8 type; __u8 name[0]; } __packed; /* * Dirent items are stored at keys with the offset set to the hash of * the name. Creation can find that hash values collide and will * attempt to linearly probe this many following hash values looking for * an unused value. * * In small directories this doesn't really matter because hash values * will so very rarely collide. At around 50k items we start to see our * first collisions. 16 slots is still pretty quick to scan in the * btree and it gets us up into the hundreds of millions of entries * before enospc is returned as we run out of hash values. */ #define SCOUTFS_DIRENT_COLL_NR 16 #define SCOUTFS_NAME_LEN 255 /* * This is arbitrarily limiting the max size of the single buffer * that's needed in the inode_paths ioctl to return all the paths * that link to an inode. The structures could easily support much * more than this but then we'd need to grow a more thorough interface * for iterating over referring paths. That sounds horrible. */ #define SCOUTFS_LINK_MAX 255 /* * We only use 31 bits for readdir positions so that we don't confuse * old signed 32bit f_pos applications or those on the other side of * network protocols that have limited readir positions. */ #define SCOUTFS_DIRENT_OFF_BITS 31 #define SCOUTFS_DIRENT_OFF_MASK ((1U << SCOUTFS_DIRENT_OFF_BITS) - 1) /* getdents returns next pos with an entry, no entry at (f_pos)~0 */ #define SCOUTFS_DIRENT_LAST_POS (INT_MAX - 1) enum { SCOUTFS_DT_FIFO = 0, SCOUTFS_DT_CHR, SCOUTFS_DT_DIR, SCOUTFS_DT_BLK, SCOUTFS_DT_REG, SCOUTFS_DT_LNK, SCOUTFS_DT_SOCK, SCOUTFS_DT_WHT, }; #define SCOUTFS_MAX_XATTR_LEN 255 #define SCOUTFS_XATTR_NAME_HASH_MASK 7ULL struct scoutfs_xattr { __u8 name_len; __u8 value_len; __u8 name[0]; } __packed; /* * We use simple block map items to map a aligned fixed group of logical * block offsets to physical blocks. We make them a decent size to * reduce the item storage overhead per block referenced, but we don't * want them so large that they start to take up an extraordinary amount * of space for small files. 8 block items ranges from around 3% to .3% * overhead for files that use only one or all of the blocks in the * mapping item. */ #define SCOUTFS_BLOCK_MAP_SHIFT 3 #define SCOUTFS_BLOCK_MAP_COUNT (1 << SCOUTFS_BLOCK_MAP_SHIFT) #define SCOUTFS_BLOCK_MAP_MASK (SCOUTFS_BLOCK_MAP_COUNT - 1) struct scoutfs_block_map { __le32 crc[SCOUTFS_BLOCK_MAP_COUNT]; __le64 blkno[SCOUTFS_BLOCK_MAP_COUNT]; }; /* * link backrefs give us a way to find all the hard links that refer * to a target inode. They're stored at an offset determined by an * advancing counter in their inode. */ struct scoutfs_link_backref { __le64 ino; __le64 offset; } __packed; #endif