Remove format.h and ioctl.h from utils

Now that we're in one repo utils can get its format and ioctl headers from the authoriative kmod files. When we're building a dist tarball we copy the files over so that the build from the dist tarball can use them. Signed-off-by: Zach Brown <zab@versity.com>
2025-12-23 05:25:18 +00:00 · 2020-12-04 15:28:10 -08:00
parent aa6e210ac7
commit 86cf3ec4ab
4 changed files with 31 additions and 1365 deletions
--- a/utils/.gitignore
+++ b/utils/.gitignore
@@ -2,6 +2,8 @@
 *.d
 *.swp
 src/scoutfs
 src/format.h
 src/ioctl.h
 .sparse*
 .mock.build*
 cscope.*
--- a/utils/Makefile
+++ b/utils/Makefile
@@ -1,11 +1,32 @@
-SCOUTFS_FORMAT_HASH := \
+#
-	$(shell cat src/format.h src/ioctl.h | md5sum | cut -b1-16)
+# The userspace utils and kernel module share definitions of physical
 # structures and ioctls.  If we're in the repo we include the kmod
 # headers directly, and hash them directly to calculate the format hash.
 #
 # If we're creating a standalone tarball for distribution we copy the
 # headers out of the kmod dir into the tarball.  And then when we're
 # building in that tarball we use the headers in src/ directly.
 #
 FMTIOC_H := format.h ioctl.h
 FMTIOC_DIST := $(addprefix src/,$(FMTIOC_H))
 FMTIOC_KMOD := $(addprefix ../kmod/src/,$(FMTIOC_H))
 ifneq ($(wildcard $(firstword $(FMTIOC_KMOD))),)
 HASH_FILES := $(FMTIOC_KMOD)
 else
 HASH_FILES := $(FMTIOC_DIST)
 endif
 SCOUTFS_FORMAT_HASH := $(shell cat $(HASH_FILES) | md5sum | cut -b1-16)
 CFLAGS := -Wall -O2 -Werror -D_FILE_OFFSET_BITS=64 -g -msse4.2 \
 	-Wpadded \
 	-fno-strict-aliasing \
 	-DSCOUTFS_FORMAT_HASH=0x$(SCOUTFS_FORMAT_HASH)LLU
 ifneq ($(wildcard $(firstword $(FMTIOC_KMOD))),)
 CFLAGS += -I../kmod/src
 endif
 BIN := src/scoutfs
 OBJ := $(patsubst %.c,%.o,$(wildcard src/*.c))
 DEPS := $(wildcard */*.d)
@@ -47,9 +68,14 @@ RPM_GITHASH := $(shell git rev-parse --short HEAD)
 TARFILE = scoutfs-utils-$(RPM_VERSION).tar
 #
 # make a stand alone buildable tarball for packaging, arguably this
 # shouldn't be included in the dist Makefile :)
 #
 dist: $(RPM_DIR) scoutfs-utils.spec
 	git archive --format=tar --prefix scoutfs-utils-$(RPM_VERSION)/ HEAD^{tree} > $(TARFILE)
-	@ tar rf $(TARFILE) --transform="s@\(.*\)@scoutfs-utils-$(RPM_VERSION)/\1@" scoutfs-utils.spec
+	tar rf $(TARFILE) --transform="s@\(.*\)@scoutfs-utils-$(RPM_VERSION)/\1@" scoutfs-utils.spec
 	tar rf $(TARFILE) --transform="s@.*\(src/.*\)@scoutfs-utils-$(RPM_VERSION)/\1@" $(FMTIOC_KMOD)
 clean:
 	@rm -f $(BIN) $(OBJ) $(DEPS) .sparse.*
--- a/utils/src/format.h
+++ b/utils/src/format.h
@@ -1,946 +0,0 @@
 #ifndef _SCOUTFS_FORMAT_H_
 #define _SCOUTFS_FORMAT_H_
 /* statfs(2) f_type */
 #define SCOUTFS_SUPER_MAGIC	0x554f4353		/* "SCOU" */
 /* block header magic values, chosen at random */
 #define SCOUTFS_BLOCK_MAGIC_SUPER	0x103c428b
 #define SCOUTFS_BLOCK_MAGIC_BTREE	0xe597f96d
 #define SCOUTFS_BLOCK_MAGIC_BLOOM	0x31995604
 #define SCOUTFS_BLOCK_MAGIC_SRCH_BLOCK	0x897e4a7d
 #define SCOUTFS_BLOCK_MAGIC_SRCH_PARENT	0xb23a2a05
 #define SCOUTFS_BLOCK_MAGIC_ALLOC_LIST	0x8a93ac83
 /*
 * The super block, quorum block, and file data allocation granularity
 * use the smaller 4KB block.
 */
 #define SCOUTFS_BLOCK_SM_SHIFT		12
 #define SCOUTFS_BLOCK_SM_SIZE		(1 << SCOUTFS_BLOCK_SM_SHIFT)
 #define SCOUTFS_BLOCK_SM_MASK		(SCOUTFS_BLOCK_SM_SIZE - 1)
 #define SCOUTFS_BLOCK_SM_PER_PAGE	(PAGE_SIZE / SCOUTFS_BLOCK_SM_SIZE)
 #define SCOUTFS_BLOCK_SM_SECTOR_SHIFT	(SCOUTFS_BLOCK_SM_SHIFT - 9)
 #define SCOUTFS_BLOCK_SM_SECTORS	(1 << SCOUTFS_BLOCK_SM_SECTOR_SHIFT)
 #define SCOUTFS_BLOCK_SM_MAX		(U64_MAX >> SCOUTFS_BLOCK_SM_SHIFT)
 #define SCOUTFS_BLOCK_SM_PAGES_PER	(SCOUTFS_BLOCK_SM_SIZE / PAGE_SIZE)
 #define SCOUTFS_BLOCK_SM_PAGE_ORDER	(SCOUTFS_BLOCK_SM_SHIFT - PAGE_SHIFT)
 /*
 * The radix and btree structures, and the forest bloom block, use the
 * larger 64KB metadata block size.
 */
 #define SCOUTFS_BLOCK_LG_SHIFT		16
 #define SCOUTFS_BLOCK_LG_SIZE		(1 << SCOUTFS_BLOCK_LG_SHIFT)
 #define SCOUTFS_BLOCK_LG_MASK		(SCOUTFS_BLOCK_LG_SIZE - 1)
 #define SCOUTFS_BLOCK_LG_PER_PAGE	(PAGE_SIZE / SCOUTFS_BLOCK_LG_SIZE)
 #define SCOUTFS_BLOCK_LG_SECTOR_SHIFT	(SCOUTFS_BLOCK_LG_SHIFT - 9)
 #define SCOUTFS_BLOCK_LG_SECTORS	(1 << SCOUTFS_BLOCK_LG_SECTOR_SHIFT)
 #define SCOUTFS_BLOCK_LG_MAX		(U64_MAX >> SCOUTFS_BLOCK_LG_SHIFT)
 #define SCOUTFS_BLOCK_LG_PAGES_PER	(SCOUTFS_BLOCK_LG_SIZE / PAGE_SIZE)
 #define SCOUTFS_BLOCK_LG_PAGE_ORDER	(SCOUTFS_BLOCK_LG_SHIFT - PAGE_SHIFT)
 #define SCOUTFS_BLOCK_SM_LG_SHIFT	(SCOUTFS_BLOCK_LG_SHIFT - \
 					 SCOUTFS_BLOCK_SM_SHIFT)
 /*
 * The super block leaves some room before the first block for platform
 * structures like boot loaders.
 */
 #define SCOUTFS_SUPER_BLKNO ((64ULL * 1024) >> SCOUTFS_BLOCK_SM_SHIFT)
 /*
 * A reasonably large region of aligned quorum blocks follow the super
 * block.  Each voting cycle reads the entire region so we don't want it
 * to be too enormous.  256K seems like a reasonably chunky single IO.
 * The number of blocks in the region also determines the number of
 * mounts that have a reasonable probability of not overwriting each
 * other's random block locations.
 */
 #define SCOUTFS_QUORUM_BLKNO	((256ULL * 1024) >> SCOUTFS_BLOCK_SM_SHIFT)
 #define SCOUTFS_QUORUM_BLOCKS	((256ULL * 1024) >> SCOUTFS_BLOCK_SM_SHIFT)
 /*
 * Start data on the data device aligned as well.
 */
 #define SCOUTFS_DATA_DEV_START_BLKNO ((256ULL * 1024) >> SCOUTFS_BLOCK_SM_SHIFT)
 #define SCOUTFS_UNIQUE_NAME_MAX_BYTES	64 /* includes null */
 /*
 * Base types used by other structures.
 */
 struct scoutfs_timespec {
 	__le64 sec;
 	__le32 nsec;
 	__u8 __pad[4];
 };
 /* XXX ipv6 */
 struct scoutfs_inet_addr {
 	__le32 addr;
 	__le16 port;
 	__u8 __pad[2];
 };
 /*
 * This header is stored at the start of btree blocks and the super
 * block for verification.  The crc field is not included in the
 * calculation of the crc.
 */
 struct scoutfs_block_header {
 	__le32 crc;
 	__le32 magic;
 	__le64 fsid;
 	__le64 seq;
 	__le64 blkno;
 };
 /*
 * scoutfs identifies all file system metadata items by a small key
 * struct.
 *
 * Each item type maps their logical structures to the fixed fields in
 * sort order.  This lets us print keys without needing per-type
 * formats.
 *
 * The keys are compared by considering the fields in struct order from
 * most to least significant.  They are considered a multi precision
 * value when navigating the keys in ordered key space.  We can
 * increment them, subtract them from each other, etc.
 */
 struct scoutfs_key {
 	__le64	_sk_first;
 	__le64	_sk_second;
 	__le64	_sk_third;
 	__u8	_sk_fourth;
 	__u8	sk_zone;
 	__u8	sk_type;
 	__u8	__pad[5];
 };
 /* inode index */
 #define skii_major	_sk_second
 #define skii_ino	_sk_third
 /* node orphan inode */
 #define sko_rid		_sk_first
 #define sko_ino		_sk_second
 /* inode */
 #define ski_ino		_sk_first
 /* xattr parts */
 #define skx_ino		_sk_first
 #define skx_name_hash	_sk_second
 #define skx_id		_sk_third
 #define skx_part	_sk_fourth
 /* directory entries */
 #define skd_ino		_sk_first
 #define skd_major	_sk_second
 #define skd_minor	_sk_third
 /* symlink target */
 #define sks_ino		_sk_first
 #define sks_nr		_sk_second
 /* data extents */
 #define skdx_ino	_sk_first
 #define skdx_end	_sk_second
 #define skdx_len	_sk_third
 /* log trees */
 #define sklt_rid	_sk_first
 #define sklt_nr		_sk_second
 /* lock clients */
 #define sklc_rid	_sk_first
 /* seqs */
 #define skts_trans_seq	_sk_first
 #define skts_rid	_sk_second
 /* mounted clients */
 #define skmc_rid	_sk_first
 /* free extents by blkno */
 #define skfb_end	_sk_second
 #define skfb_len	_sk_third
 /* free extents by len */
 #define skfl_neglen	_sk_second
 #define skfl_blkno	_sk_third
 struct scoutfs_radix_block {
 	struct scoutfs_block_header hdr;
 	union {
 		struct scoutfs_radix_ref {
 			__le64 blkno;
 			__le64 seq;
 			__le64 sm_total;
 			__le64 lg_total;
 		} refs[0];
 		__le64 bits[0];
 	};
 };
 struct scoutfs_avl_root {
 	__le16 node;
 };
 struct scoutfs_avl_node {
 	__le16 parent;
 	__le16 left;
 	__le16 right;
 	__u8 height;
 	__u8 __pad[1];
 };
 /* when we split we want to have multiple items on each side */
 #define SCOUTFS_BTREE_MAX_VAL_LEN 896
 /*
 * A 4EB test image measured a worst case height of 17.  This is plenty
 * generous.
 */
 #define SCOUTFS_BTREE_MAX_HEIGHT 20
 struct scoutfs_btree_ref {
 	__le64 blkno;
 	__le64 seq;
 };
 /*
 * A height of X means that the first block read will have level X-1 and
 * the leaves will have level 0.
 */
 struct scoutfs_btree_root {
 	struct scoutfs_btree_ref ref;
 	__u8 height;
 	__u8 __pad[7];
 };
 struct scoutfs_btree_item {
 	struct scoutfs_avl_node node;
 	struct scoutfs_key key;
 	__le16 val_off;
 	__le16 val_len;
 	__u8 __pad[4];
 };
 struct scoutfs_btree_block {
 	struct scoutfs_block_header hdr;
 	struct scoutfs_avl_root item_root;
 	__le16 nr_items;
 	__le16 total_item_bytes;
 	__le16 mid_free_len;
 	__u8 level;
 	__u8 __pad[7];
 	struct scoutfs_btree_item items[0];
 	/* leaf blocks have a fixed size item offset hash table at the end */
 };
 #define SCOUTFS_BTREE_VALUE_ALIGN 8
 /*
 * Try to aim for a 75% load in a leaf full of items with no value.
 * We'll almost never see this because most items have values and most
 * blocks aren't full.
 */
 #define SCOUTFS_BTREE_LEAF_ITEM_HASH_NR_UNALIGNED			  \
 	((SCOUTFS_BLOCK_LG_SIZE - sizeof(struct scoutfs_btree_block)) /	  \
 	 (sizeof(struct scoutfs_btree_item) + (sizeof(__le16))) * 100 / 75)
 #define SCOUTFS_BTREE_LEAF_ITEM_HASH_NR					  \
 	(round_up(SCOUTFS_BTREE_LEAF_ITEM_HASH_NR_UNALIGNED,		  \
 		  SCOUTFS_BTREE_VALUE_ALIGN))
 #define SCOUTFS_BTREE_LEAF_ITEM_HASH_BYTES \
 	(SCOUTFS_BTREE_LEAF_ITEM_HASH_NR * sizeof(__le16))
 struct scoutfs_alloc_list_ref {
 	__le64 blkno;
 	__le64 seq;
 };
 /*
 * first_nr tracks the nr of the first block in the list and is used for
 * allocation sizing. total_nr is the sum of the nr of all the blocks in
 * the list and is used for calculating total free block counts.
 */
 struct scoutfs_alloc_list_head {
 	struct scoutfs_alloc_list_ref ref;
 	__le64 total_nr;
 	__le32 first_nr;
 	__u8 __pad[4];
 };
 /*
 * While the main allocator uses extent items in btree blocks, metadata
 * allocations for a single transaction are recorded in arrays in
 * blocks.  This limits the number of allocations and frees needed to
 * cow and modify the structure.  The blocks can be stored in a list
 * which lets us create a persistent log of pending frees that are
 * generated as we cow btree blocks to insert freed extents.
 *
 * The array floats in the block so that both adding and removing blknos
 * only modifies an index.
 */
 struct scoutfs_alloc_list_block {
 	struct scoutfs_block_header hdr;
 	struct scoutfs_alloc_list_ref next;
 	__le32 start;
 	__le32 nr;
 	__le64 blknos[0]; /* naturally aligned for sorting */
 };
 #define SCOUTFS_ALLOC_LIST_MAX_BLOCKS					      \
 	((SCOUTFS_BLOCK_LG_SIZE - sizeof(struct scoutfs_alloc_list_block)) /  \
 	 (member_sizeof(struct scoutfs_alloc_list_block, blknos[0])))
 /*
 * These can safely be initialized to all-zeros.
 */
 struct scoutfs_alloc_root {
 	__le64 total_len;
 	struct scoutfs_btree_root root;
 };
 /* types of allocators, exposed to alloc_detail ioctl */
 #define SCOUTFS_ALLOC_OWNER_NONE	0
 #define SCOUTFS_ALLOC_OWNER_SERVER	1
 #define SCOUTFS_ALLOC_OWNER_MOUNT	2
 #define SCOUTFS_ALLOC_OWNER_SRCH	3
 struct scoutfs_mounted_client_btree_val {
 	__u8 flags;
 };
 #define SCOUTFS_MOUNTED_CLIENT_VOTER	(1 << 0)
 /*
 * srch files are a contiguous run of blocks with compressed entries
 * described by a dense parent radix.  The files can be stored in
 * log_tree items when the files contain unsorted entries written by
 * mounts during their transactions.  Sorted files of increasing size
 * are kept in a btree off the super for searching and further
 * compacting.
 */
 struct scoutfs_srch_entry {
 	__le64 hash;
 	__le64 ino;
 	__le64 id;
 };
 #define SCOUTFS_SRCH_ENTRY_MAX_BYTES	(2 + (sizeof(__u64) * 3))
 struct scoutfs_srch_ref {
 	__le64 blkno;
 	__le64 seq;
 };
 struct scoutfs_srch_file {
 	struct scoutfs_srch_entry first;
 	struct scoutfs_srch_entry last;
 	struct scoutfs_srch_ref ref;
 	__le64 blocks;
 	__le64 entries;
 	__u8 height;
 	__u8 __pad[7];
 };
 struct scoutfs_srch_parent {
 	struct scoutfs_block_header hdr;
 	struct scoutfs_srch_ref refs[0];
 };
 #define SCOUTFS_SRCH_PARENT_REFS				\
 	((SCOUTFS_BLOCK_LG_SIZE -				\
 	  offsetof(struct scoutfs_srch_parent, refs)) /		\
 	 sizeof(struct scoutfs_srch_ref))
 struct scoutfs_srch_block {
 	struct scoutfs_block_header hdr;
 	struct scoutfs_srch_entry first;
 	struct scoutfs_srch_entry last;
 	struct scoutfs_srch_entry tail;
 	__le32 entry_nr;
 	__le32 entry_bytes;
 	__u8 entries[0];
 };
 /*
 * Decoding loads final small deltas with full __u64 loads.  Rather than
 * check the size before each load we stop coding entries past the point
 * where a full size entry could overflow the block.  A final entry can
 * start at this byte count and consume the rest of the block, though
 * its unlikely.
 */
 #define SCOUTFS_SRCH_BLOCK_SAFE_BYTES					\
 	(SCOUTFS_BLOCK_LG_SIZE - sizeof(struct scoutfs_srch_block) -	\
 	 SCOUTFS_SRCH_ENTRY_MAX_BYTES)
 #define SCOUTFS_SRCH_LOG_BLOCK_LIMIT	(1024 * 1024 / SCOUTFS_BLOCK_LG_SIZE)
 #define SCOUTFS_SRCH_COMPACT_ORDER	2
 #define SCOUTFS_SRCH_COMPACT_NR		(1 << SCOUTFS_SRCH_COMPACT_ORDER)
 /*
 * A persistent record of a srch file compaction operation in progress.
 *
 * When compacting log files blk and pos aren't used.  When compacting
 * sorted files blk is the logical block number and pos is the byte
 * offset of the next entry.  When deleting files pos is the height of
 * the level that we're deleting, and blk is the logical block offset of
 * the next parent ref array index to descend through.
 */
 struct scoutfs_srch_compact {
 	struct scoutfs_alloc_list_head meta_avail;
 	struct scoutfs_alloc_list_head meta_freed;
 	__le64 id;
 	__u8 nr;
 	__u8 flags;
 	__u8 __pad[6];
 	struct scoutfs_srch_file out;
 	struct scoutfs_srch_compact_input {
 		struct scoutfs_srch_file sfl;
 		__le64 blk;
 		__le64 pos;
 	} in[SCOUTFS_SRCH_COMPACT_NR];
 };
 /* server -> client: combine input log file entries into output file */
 #define SCOUTFS_SRCH_COMPACT_FLAG_LOG		(1 << 0)
 /* server -> client: combine input sorted file entries into output file */
 #define SCOUTFS_SRCH_COMPACT_FLAG_SORTED	(1 << 1)
 /* server -> client: delete input files */
 #define SCOUTFS_SRCH_COMPACT_FLAG_DELETE	(1 << 2)
 /* client -> server: compaction phase (LOG,SORTED,DELETE) done */
 #define SCOUTFS_SRCH_COMPACT_FLAG_DONE		(1 << 4)
 /* client -> server: compaction failed */
 #define SCOUTFS_SRCH_COMPACT_FLAG_ERROR		(1 << 5)
 /*
 * XXX I imagine we should rename these now that they've evolved to track
 * all the btrees that clients use during a transaction.  It's not just
 * about item logs, it's about clients making changes to trees.
 */
 struct scoutfs_log_trees {
 	struct scoutfs_alloc_list_head meta_avail;
 	struct scoutfs_alloc_list_head meta_freed;
 	struct scoutfs_btree_root item_root;
 	struct scoutfs_btree_ref bloom_ref;
 	struct scoutfs_alloc_root data_avail;
 	struct scoutfs_alloc_root data_freed;
 	struct scoutfs_srch_file srch_file;
 	__le64 max_item_vers;
 	__le64 rid;
 	__le64 nr;
 };
 struct scoutfs_log_item_value {
 	__le64 vers;
 	__u8 flags;
 	__u8 __pad[7];
 	__u8 data[0];
 };
 /*
 * FS items are limited by the max btree value length with the log item
 * value header.
 */
 #define SCOUTFS_MAX_VAL_SIZE \
 	(SCOUTFS_BTREE_MAX_VAL_LEN - sizeof(struct scoutfs_log_item_value))
 #define SCOUTFS_LOG_ITEM_FLAG_DELETION		(1 << 0)
 struct scoutfs_bloom_block {
 	struct scoutfs_block_header hdr;
 	__le64 total_set;
 	__le64 bits[0];
 };
 /*
 * Item log trees are accompanied by a block of bits that make up a
 * bloom filter which indicate if the item log trees may contain items
 * covered by a lock.  The log trees should be finalized and merged long
 * before the bloom filters fill up and start returning excessive false
 * positives.
 */
 #define SCOUTFS_FOREST_BLOOM_NRS		3
 #define SCOUTFS_FOREST_BLOOM_BITS \
 	(((SCOUTFS_BLOCK_LG_SIZE - sizeof(struct scoutfs_bloom_block)) /  \
 	 member_sizeof(struct scoutfs_bloom_block, bits[0])) *		  \
 	 member_sizeof(struct scoutfs_bloom_block, bits[0]) * 8)
 #define SCOUTFS_FOREST_BLOOM_FUNC_BITS		(SCOUTFS_BLOCK_LG_SHIFT + 3)
 /*
 * Keys are first sorted by major key zones.
 */
 #define SCOUTFS_INODE_INDEX_ZONE		1
 #define SCOUTFS_RID_ZONE			2
 #define SCOUTFS_FS_ZONE				3
 #define SCOUTFS_LOCK_ZONE			4
 /* Items only stored in server btrees */
 #define SCOUTFS_LOG_TREES_ZONE			6
 #define SCOUTFS_LOCK_CLIENTS_ZONE		7
 #define SCOUTFS_TRANS_SEQ_ZONE			8
 #define SCOUTFS_MOUNTED_CLIENT_ZONE		9
 #define SCOUTFS_SRCH_ZONE			10
 #define SCOUTFS_FREE_EXTENT_ZONE		11
 /* inode index zone */
 #define SCOUTFS_INODE_INDEX_META_SEQ_TYPE	1
 #define SCOUTFS_INODE_INDEX_DATA_SEQ_TYPE	2
 #define SCOUTFS_INODE_INDEX_NR			3 /* don't forget to update */
 /* rid zone (also used in server alloc btree) */
 #define SCOUTFS_ORPHAN_TYPE			1
 /* fs zone */
 #define SCOUTFS_INODE_TYPE			1
 #define SCOUTFS_XATTR_TYPE			2
 #define SCOUTFS_DIRENT_TYPE			3
 #define SCOUTFS_READDIR_TYPE			4
 #define SCOUTFS_LINK_BACKREF_TYPE		5
 #define SCOUTFS_SYMLINK_TYPE			6
 #define SCOUTFS_DATA_EXTENT_TYPE		7
 /* lock zone, only ever found in lock ranges, never in persistent items */
 #define SCOUTFS_RENAME_TYPE			1
 /* srch zone, only in server btrees */
 #define SCOUTFS_SRCH_LOG_TYPE		1
 #define SCOUTFS_SRCH_BLOCKS_TYPE	2
 #define SCOUTFS_SRCH_PENDING_TYPE	3
 #define SCOUTFS_SRCH_BUSY_TYPE		4
 /* free extents in allocator btrees in client and server, by blkno or len */
 #define SCOUTFS_FREE_EXTENT_BLKNO_TYPE	1
 #define SCOUTFS_FREE_EXTENT_LEN_TYPE	2
 /* file data extents have start and len in key */
 struct scoutfs_data_extent_val {
 	__le64 blkno;
 	__u8 flags;
 	__u8 __pad[7];
 };
 #define SEF_OFFLINE	(1 << 0)
 #define SEF_UNWRITTEN	(1 << 1)
 #define SEF_UNKNOWN	(U8_MAX << 2)
 /*
 * The first xattr part item has a header that describes the xattr.  The
 * name and value are then packed into the following bytes in the first
 * part item and overflow into the values of the rest of the part items.
 */
 struct scoutfs_xattr {
 	__le16 val_len;
 	__u8 name_len;
 	__u8 __pad[5];
 	__u8 name[0];
 };
 /* XXX does this exist upstream somewhere? */
 #define member_sizeof(TYPE, MEMBER) (sizeof(((TYPE *)0)->MEMBER))
 #define SCOUTFS_UUID_BYTES 16
 /*
 * Mounts read all the quorum blocks and write to one random quorum
 * block during a cycle.  The min cycle time limits the per-mount iop
 * load during elections.  The random cycle delay makes it less likely
 * that mounts will read and write at the same time and miss each
 * other's writes.  An election only completes if a quorum of mounts
 * vote for a leader before any of their elections timeout.  This is
 * made less likely by the probability that mounts will overwrite each
 * others random block locations.  The max quorum count limits that
 * probability.  9 mounts only have a 55% chance of writing to unique 4k
 * blocks in a 256k region.  The election timeout is set to include
 * enough cycles to usually complete the election.  Once a leader is
 * elected it spends a number of cycles writing out blocks with itself
 * logged as a leader.  This reduces the possibility that servers
 * will have their log entries overwritten and not be fenced.
 */
 #define SCOUTFS_QUORUM_MAX_COUNT		9
 #define SCOUTFS_QUORUM_CYCLE_LO_MS		10
 #define SCOUTFS_QUORUM_CYCLE_HI_MS		20
 #define SCOUTFS_QUORUM_TERM_LO_MS		250
 #define SCOUTFS_QUORUM_TERM_HI_MS		500
 #define SCOUTFS_QUORUM_ELECTED_LOG_CYCLES	10
 struct scoutfs_quorum_block {
 	__le64 fsid;
 	__le64 blkno;
 	__le64 term;
 	__le64 write_nr;
 	__le64 voter_rid;
 	__le64 vote_for_rid;
 	__le32 crc;
 	__u8 log_nr;
 	__u8 __pad[3];
 	struct scoutfs_quorum_log {
 		__le64 term;
 		__le64 rid;
 		struct scoutfs_inet_addr addr;
 	} log[0];
 };
 #define SCOUTFS_QUORUM_LOG_MAX						  \
 	((SCOUTFS_BLOCK_SM_SIZE - sizeof(struct scoutfs_quorum_block)) /  \
 		sizeof(struct scoutfs_quorum_log))
 #define SCOUTFS_FLAG_IS_META_BDEV 0x01
 struct scoutfs_super_block {
 	struct scoutfs_block_header hdr;
 	__le64 id;
 	__le64 format_hash;
 	__le64 flags;
 	__u8 uuid[SCOUTFS_UUID_BYTES];
 	__le64 next_ino;
 	__le64 next_trans_seq;
 	__le64 total_meta_blocks;	/* both static and dynamic */
 	__le64 first_meta_blkno;	/* first dynamically allocated */
 	__le64 last_meta_blkno;
 	__le64 total_data_blocks;
 	__le64 first_data_blkno;
 	__le64 last_data_blkno;
 	__le64 quorum_fenced_term;
 	__le64 quorum_server_term;
 	__le64 unmount_barrier;
 	__u8 quorum_count;
 	__u8 __pad[7];
 	struct scoutfs_inet_addr server_addr;
 	struct scoutfs_alloc_root meta_alloc[2];
 	struct scoutfs_alloc_root data_alloc;
 	struct scoutfs_alloc_list_head server_meta_avail[2];
 	struct scoutfs_alloc_list_head server_meta_freed[2];
 	struct scoutfs_btree_root fs_root;
 	struct scoutfs_btree_root logs_root;
 	struct scoutfs_btree_root lock_clients;
 	struct scoutfs_btree_root trans_seqs;
 	struct scoutfs_btree_root mounted_clients;
 	struct scoutfs_btree_root srch_root;
 };
 #define SCOUTFS_ROOT_INO 1
 /*
 * @meta_seq: advanced the first time an inode is updated in a given
 * transaction.  It can only advance again after the inode is written
 * and a new transaction opens.
 *
 * @data_seq: advanced the first time a file's data (or size) is
 * modified in a given transaction.  It can only advance again after the
 * file is written and a new transaction opens.
 *
 * @data_version: incremented every time the contents of a file could
 * have changed.  It is exposed via an ioctl and is then provided as an
 * argument to data functions to protect racing modification.
 *
 * @online_blocks: The number of fixed 4k blocks currently allocated and
 * storing data in the volume.
 *
 * @offline_blocks: The number of fixed 4k blocks that could be made
 * online by staging.
 *
 * XXX
 *	- otime?
 *	- compat flags?
 *	- version?
 *	- generation?
 *	- be more careful with rdev?
 */
 struct scoutfs_inode {
 	__le64 size;
 	__le64 meta_seq;
 	__le64 data_seq;
 	__le64 data_version;
 	__le64 online_blocks;
 	__le64 offline_blocks;
 	__le64 next_readdir_pos;
 	__le64 next_xattr_id;
 	__le32 nlink;
 	__le32 uid;
 	__le32 gid;
 	__le32 mode;
 	__le32 rdev;
 	__le32 flags;
 	struct scoutfs_timespec atime;
 	struct scoutfs_timespec ctime;
 	struct scoutfs_timespec mtime;
 };
 #define SCOUTFS_INO_FLAG_TRUNCATE 0x1
 #define SCOUTFS_ROOT_INO 1
 /* like the block size, a reasonable min PATH_MAX across platforms */
 #define SCOUTFS_SYMLINK_MAX_SIZE 4096
 /*
 * Dirents are stored in multiple places to isolate contention when
 * performing different operations: hashed by name for creation and
 * lookup, at incrementing positions for readdir and resolving inodes to
 * paths.  Each entry has all the metadata needed to reference all the
 * items (so an entry cached by lookup can be used to unlink all the
 * items).
 */
 struct scoutfs_dirent {
 	__le64 ino;
 	__le64 hash;
 	__le64 pos;
 	__u8 type;
 	__u8 __pad[7];
 	__u8 name[0];
 };
 #define SCOUTFS_NAME_LEN 255
 /* S32_MAX avoids the (int) sign bit and might avoid sloppy bugs */
 #define SCOUTFS_LINK_MAX S32_MAX
 /* entries begin after . and .. */
 #define SCOUTFS_DIRENT_FIRST_POS 2
 /* getdents returns next pos with an entry, no entry at (f_pos)~0 */
 #define SCOUTFS_DIRENT_LAST_POS (U64_MAX - 1)
 enum scoutfs_dentry_type {
 	SCOUTFS_DT_FIFO = 0,
 	SCOUTFS_DT_CHR,
 	SCOUTFS_DT_DIR,
 	SCOUTFS_DT_BLK,
 	SCOUTFS_DT_REG,
 	SCOUTFS_DT_LNK,
 	SCOUTFS_DT_SOCK,
 	SCOUTFS_DT_WHT,
 };
 #define SCOUTFS_XATTR_MAX_NAME_LEN	255
 #define SCOUTFS_XATTR_MAX_VAL_LEN	65535
 #define SCOUTFS_XATTR_MAX_PART_SIZE	SCOUTFS_MAX_VAL_SIZE
 #define SCOUTFS_XATTR_NR_PARTS(name_len, val_len)			\
 	DIV_ROUND_UP(sizeof(struct scoutfs_xattr) + name_len + val_len, \
 		     (unsigned int)SCOUTFS_XATTR_MAX_PART_SIZE)
 #define SCOUTFS_LOCK_INODE_GROUP_NR	1024
 #define SCOUTFS_LOCK_INODE_GROUP_MASK	(SCOUTFS_LOCK_INODE_GROUP_NR - 1)
 #define SCOUTFS_LOCK_SEQ_GROUP_MASK	((1ULL << 10) - 1)
 /*
 * messages over the wire.
 */
 /*
 * Greetings verify identity of communicating nodes.  The sender sends
 * their credentials and the receiver verifies them.
 *
 * @server_term: The raft term that elected the server.  Initially 0
 * from the client, sent by the server, then sent by the client as it
 * tries to reconnect.  Used to identify a client reconnecting to both
 * the same serer after receiving a greeting response and to a new
 * server after failover.
 *
 * @unmount_barrier: Incremented every time the remaining majority of
 * quorum members all agree to leave.  The server tells a quorum member
 * the value that it's connecting under so that if the client sees the
 * value increase in the super block then it knows that the server has
 * processed its farewell and can safely unmount.
 *
 * @rid: The client's random id that was generated once as the mount
 * started up.  This identifies a specific remote mount across
 * connections and servers.  It's set to the client's rid in both the
 * request and response for consistency.
 */
 struct scoutfs_net_greeting {
 	__le64 fsid;
 	__le64 format_hash;
 	__le64 server_term;
 	__le64 unmount_barrier;
 	__le64 rid;
 	__le64 flags;
 };
 #define SCOUTFS_NET_GREETING_FLAG_FAREWELL	(1 << 0)
 #define SCOUTFS_NET_GREETING_FLAG_VOTER		(1 << 1)
 #define SCOUTFS_NET_GREETING_FLAG_INVALID	(~(__u64)0 << 2)
 /*
 * This header precedes and describes all network messages sent over
 * sockets.
 *
 * @seq: A sequence number that is increased for each message queued for
 * send on the sender.  The sender will never reorder messages in the
 * send queue so this will always increase in recv on the receiver.  The
 * receiver can use this to drop messages that arrived twice after being
 * resent across a newly connected socket for a given connection.
 *
 * @recv_seq: The sequence number of the last received message.  The
 * receiver is sending this to the sender in every message.  The sender
 * uses them to drop responses which have been delivered.
 *
 * @id: An increasing identifier that is set in each request.  Responses
 * specify the request that they're responding to.
 *
 * Error is only set to a translated errno and will only be found in
 * response messages.
 */
 struct scoutfs_net_header {
 	__le64 clock_sync_id;
 	__le64 seq;
 	__le64 recv_seq;
 	__le64 id;
 	__le16 data_len;
 	__u8 cmd;
 	__u8 flags;
 	__u8 error;
 	__u8 __pad[3];
 	__u8 data[0];
 };
 #define SCOUTFS_NET_FLAG_RESPONSE	(1 << 0)
 #define SCOUTFS_NET_FLAGS_UNKNOWN	(U8_MAX << 1)
 enum scoutfs_net_cmd {
 	SCOUTFS_NET_CMD_GREETING = 0,
 	SCOUTFS_NET_CMD_ALLOC_INODES,
 	SCOUTFS_NET_CMD_GET_LOG_TREES,
 	SCOUTFS_NET_CMD_COMMIT_LOG_TREES,
 	SCOUTFS_NET_CMD_GET_ROOTS,
 	SCOUTFS_NET_CMD_ADVANCE_SEQ,
 	SCOUTFS_NET_CMD_GET_LAST_SEQ,
 	SCOUTFS_NET_CMD_LOCK,
 	SCOUTFS_NET_CMD_LOCK_RECOVER,
 	SCOUTFS_NET_CMD_SRCH_GET_COMPACT,
 	SCOUTFS_NET_CMD_SRCH_COMMIT_COMPACT,
 	SCOUTFS_NET_CMD_FAREWELL,
 	SCOUTFS_NET_CMD_UNKNOWN,
 };
 /*
 * Define a macro to evaluate another macro for each of the errnos we
 * translate over the wire.  This lets us keep our enum in sync with the
 * mapping arrays to and from host errnos.
 */
 #define EXPAND_EACH_NET_ERRNO		\
 	EXPAND_NET_ERRNO(ENOENT)	\
 	EXPAND_NET_ERRNO(ENOMEM)	\
 	EXPAND_NET_ERRNO(EIO)		\
 	EXPAND_NET_ERRNO(ENOSPC)	\
 	EXPAND_NET_ERRNO(EINVAL)
 #undef EXPAND_NET_ERRNO
 #define EXPAND_NET_ERRNO(which) SCOUTFS_NET_ERR_##which,
 enum scoutfs_net_errors {
 	SCOUTFS_NET_ERR_NONE = 0,
 	EXPAND_EACH_NET_ERRNO
 	SCOUTFS_NET_ERR_UNKNOWN,
 };
 /* arbitrarily chosen to be safely less than mss and allow 1k with header */
 #define SCOUTFS_NET_MAX_DATA_LEN 1100
 /*
 * When there's no more free inodes this will be sent with ino = ~0 and
 * nr = 0.
 */
 struct scoutfs_net_inode_alloc {
 	__le64 ino;
 	__le64 nr;
 };
 struct scoutfs_net_roots {
 	struct scoutfs_btree_root fs_root;
 	struct scoutfs_btree_root logs_root;
 	struct scoutfs_btree_root srch_root;
 };
 struct scoutfs_net_lock {
 	struct scoutfs_key key;
 	__le64 write_version;
 	__u8 old_mode;
 	__u8 new_mode;
 	__u8 __pad[6];
 };
 struct scoutfs_net_lock_grant_response {
 	struct scoutfs_net_lock nl;
 	struct scoutfs_net_roots roots;
 };
 struct scoutfs_net_lock_recover {
 	__le16 nr;
 	__u8 __pad[6];
 	struct scoutfs_net_lock locks[0];
 };
 #define SCOUTFS_NET_LOCK_MAX_RECOVER_NR					       \
 	((SCOUTFS_NET_MAX_DATA_LEN - sizeof(struct scoutfs_net_lock_recover)) /\
 	 sizeof(struct scoutfs_net_lock))
 /* some enums for tracing */
 enum scoutfs_lock_trace {
 	SLT_CLIENT,
 	SLT_SERVER,
 	SLT_GRANT,
 	SLT_INVALIDATE,
 	SLT_REQUEST,
 	SLT_RESPONSE,
 };
 /*
 * Read and write locks operate as you'd expect.  Multiple readers can
 * hold read locks while writers are excluded.  A single writer can hold
 * a write lock which excludes other readers and writers.  Writers can
 * read while holding a write lock.
 *
 * Multiple writers can hold write only locks but they can not read,
 * they can only generate dirty items.  It's used when the system has
 * other means of knowing that it's safe to overwrite items.
 *
 * The null mode provides no access and is used to destroy locks.
 */
 enum scoutfs_lock_mode {
 	SCOUTFS_LOCK_NULL = 0,
 	SCOUTFS_LOCK_READ,
 	SCOUTFS_LOCK_WRITE,
 	SCOUTFS_LOCK_WRITE_ONLY,
 	SCOUTFS_LOCK_INVALID,
 };
 /*
 * Scoutfs file handle structure - this can be copied out to userspace
 * via open by handle or put on the wire from NFS.
 */
 struct scoutfs_fid {
 	__le64 ino;
 	__le64 parent_ino;
 };
 #define FILEID_SCOUTFS			0x81
 #define FILEID_SCOUTFS_WITH_PARENT	0x82
 /*
 * Identifiers for sources of corruption that can generate messages.
 */
 enum scoutfs_corruption_sources {
 	SC_DIRENT_NAME_LEN = 0,
 	SC_DIRENT_BACKREF_NAME_LEN,
 	SC_DIRENT_READDIR_NAME_LEN,
 	SC_SYMLINK_INODE_SIZE,
 	SC_SYMLINK_MISSING_ITEM,
 	SC_SYMLINK_NOT_NULL_TERM,
 	SC_BTREE_BLOCK_LEVEL,
 	SC_BTREE_NO_CHILD_REF,
 	SC_INODE_BLOCK_COUNTS,
 	SC_NR_SOURCES,
 };
 #define SC_NR_LONGS DIV_ROUND_UP(SC_NR_SOURCES, BITS_PER_LONG)
 #endif
--- a/utils/src/ioctl.h
+++ b/utils/src/ioctl.h
@@ -1,416 +0,0 @@
 #ifndef _SCOUTFS_IOCTL_H_
 #define _SCOUTFS_IOCTL_H_
 /*
 * We naturally align explicit width fields in the ioctl structs so that
 * userspace doesn't need to deal with padding or unaligned packing and
 * we don't have to deal with 32/64 compat.  It makes it a little
 * awkward to communicate persistent packed structs through the ioctls
 * but that happens very rarely.  An interesting special case are
 * 0length arrays that follow the structs.  We make those start at the
 * next aligned offset of the struct to be safe.
 *
 * This is enforced by pahole scripting in external build environments.
 */
 /* XXX I have no idea how these are chosen. */
 #define SCOUTFS_IOCTL_MAGIC 's'
 /*
 * Packed scoutfs keys rarely cross the ioctl boundary so we have a
 * translation struct.
 */
 struct scoutfs_ioctl_key {
 	__le64	_sk_first;
 	__le64	_sk_second;
 	__le64	_sk_third;
 	__u8	_sk_fourth;
 	__u8	sk_type;
 	__u8	sk_zone;
 	__u8	_pad[5];
 };
 struct scoutfs_ioctl_walk_inodes_entry {
 	__u64 major;
 	__u64 ino;
 	__u32 minor;
 	__u8  _pad[4];
 };
 /*
 * Walk inodes in an index that is sorted by one of their fields.
 *
 * Each index is built from generic index items that have major and
 * minor values that are set to the field being indexed.  In time
 * indices, for example, major is seconds and minor is nanoseconds.
 *
 * @first       The first index entry that can be returned.
 * @last        The last index entry that can be returned.
 * @entries_ptr Pointer to emory containing buffer for entry results.
 * @nr_entries  The number of entries that can fit in the buffer.
 * @index       Which index to walk, enumerated in _WALK_INODES_ constants.
 *
 * To start iterating first can be memset to 0 and last to 0xff.  Then
 * after each set of results first can be set to the last entry returned
 * and then the fields can be incremented in reverse sort order (ino <
 * minor < major) as each increasingly significant value wraps around to
 * 0.
 *
 * These indexes are not strictly consistent.  The items that back these
 * index entries aren't updated with cluster locks so they're not
 * guaranteed to be visible the moment you read after writing.  They're
 * only visible when the transaction that updated them is synced.
 *
 * In addition, the seq indexes will only allow walking through sequence
 * space that has been consistent.  This prevents old dirty entries from
 * becoming visible after newer stable entries are displayed.
 *
 * If first is greater than last then the walk will return 0 entries.
 *
 * XXX invalidate before reading.
 */
 struct scoutfs_ioctl_walk_inodes {
 	struct scoutfs_ioctl_walk_inodes_entry first;
 	struct scoutfs_ioctl_walk_inodes_entry last;
 	__u64 entries_ptr;
 	__u32 nr_entries;
 	__u8 index;
 	__u8 _pad[11]; /* padded to align walk_inodes_entry total size */
 };
 enum scoutfs_ino_walk_seq_type {
 	SCOUTFS_IOC_WALK_INODES_META_SEQ = 0,
 	SCOUTFS_IOC_WALK_INODES_DATA_SEQ,
 	SCOUTFS_IOC_WALK_INODES_UNKNOWN,
 };
 /*
 * Adds entries to the user's buffer for each inode that is found in the
 * given index between the first and last positions.
 */
 #define SCOUTFS_IOC_WALK_INODES _IOR(SCOUTFS_IOCTL_MAGIC, 1, \
 				     struct scoutfs_ioctl_walk_inodes)
 /*
 * Fill the result buffer with the next absolute path to the target
 * inode searching from a given position in a parent directory.
 *
 * @ino: The target ino that we're finding paths to.  Constant across
 * all the calls that make up an iteration over all the inode's paths.
 *
 * @dir_ino: The inode number of the directory containing the entry to
 * our inode to search from.  If this parent directory contains no more
 * entries to our inode then we'll search through other parent directory
 * inodes in inode order.
 *
 * @dir_pos: The position in the dir_ino parent directory of the entry
 * to our inode to search from.  If there is no entry at this position
 * then we'll search through other entry positions in increasing order.
 * If we exhaust the parent directory then we'll search through
 * additional parent directories in inode order.
 *
 * @result_ptr: A pointer to the buffer where the result struct and
 * absolute path will be stored.
 *
 * @result_bytes: The size of the buffer that will contain the result
 * struct and the null terminated absolute path name.
 *
 * To start iterating set the desired target inode, dir_ino to 0,
 * dir_pos to 0, and set result_ptr and _bytes to a sufficiently large
 * buffeer (sizeof(result) + PATH_MAX is a solid choice).
 *
 * After each returned result set the next search dir_ino and dir_pos to
 * the returned dir_ino and dir_pos.  Then increment the search dir_pos,
 * and if it wrapped to 0, increment dir_ino.
 *
 * This only walks back through full hard links.  None of the returned
 * paths will reflect symlinks to components in the path.
 *
 * This doesn't ensure that the caller has permissions to traverse the
 * returned paths to the inode.  It requires CAP_DAC_READ_SEARCH which
 * bypasses permissions checking.
 *
 * This call is not serialized with any modification (create, rename,
 * unlink) of the path components.  It will return all the paths that
 * were stable both before and after the call.  It may or may not return
 * paths which are created or unlinked during the call.
 *
 * On success 0 is returned and result struct is filled with the next
 * absolute path.  The path_bytes length of the path includes a null
 * terminating byte.  dir_ino and dir_pos refer to the position of the
 * final component in its parent directory and can be advanced to search
 * for the next terminal entry whose path is then built by walking up
 * parent directories.
 *
 * ENOENT is returned when no paths are found.
 *
 * ENAMETOOLONG is returned when the result struct and path found
 * doesn't fit in the result buffer.
 *
 * Many other errnos indicate hard failure to find the next path.
 */
 struct scoutfs_ioctl_ino_path {
 	__u64 ino;
 	__u64 dir_ino;
 	__u64 dir_pos;
 	__u64 result_ptr;
 	__u16 result_bytes;
 	__u8 _pad[6];
 };
 struct scoutfs_ioctl_ino_path_result {
 	__u64 dir_ino;
 	__u64 dir_pos;
 	__u16 path_bytes;
 	__u8  _pad[6];
 	__u8  path[0];
 };
 /* Get a single path from the root to the given inode number */
 #define SCOUTFS_IOC_INO_PATH _IOR(SCOUTFS_IOCTL_MAGIC, 2, \
 				  struct scoutfs_ioctl_ino_path)
 /*
 * "Release" a contiguous range of logical blocks of file data.
 * Released blocks are removed from the file system like truncation, but
 * an offline record is left behind to trigger demand staging if the
 * file is read.
 *
 * The starting block offset and number of blocks to release are in
 * units 4KB blocks.
 *
 * The specified range can extend past i_size and can straddle sparse
 * regions or blocks that are already offline.  The only change it makes
 * is to free and mark offline any existing blocks that intersect with
 * the region.
 *
 * Returns 0 if the operation succeeds.  If an error is returned then
 * some partial region of the blocks in the region may have been marked
 * offline.
 *
 * If the operation succeeds then inode metadata that reflects file data
 * contents are not updated.  This is intended to be transparent to the
 * presentation of the data in the file.
 */
 struct scoutfs_ioctl_release {
 	__u64 block;
 	__u64 count;
 	__u64 data_version;
 };
 #define SCOUTFS_IOC_RELEASE _IOW(SCOUTFS_IOCTL_MAGIC, 3, \
 				 struct scoutfs_ioctl_release)
 struct scoutfs_ioctl_stage {
 	__u64 data_version;
 	__u64 buf_ptr;
 	__u64 offset;
 	__s32 count;
 	__u32 _pad;
 };
 #define SCOUTFS_IOC_STAGE _IOW(SCOUTFS_IOCTL_MAGIC, 4, \
 			       struct scoutfs_ioctl_stage)
 /*
 * Give the user inode fields that are not otherwise visible.  statx()
 * isn't always available and xattrs are relatively expensive.
 *
 * @valid_bytes stores the number of bytes that are valid in the
 * structure.  The caller sets this to the size of the struct that they
 * understand.  The kernel then fills and copies back the min of the
 * size they and the user caller understand.  The user can tell if a
 * field is set if all of its bytes are within the valid_bytes that the
 * kernel set on return.
 *
 * New fields are only added to the end of the struct.
 */
 struct scoutfs_ioctl_stat_more {
 	__u64 valid_bytes;
 	__u64 meta_seq;
 	__u64 data_seq;
 	__u64 data_version;
 	__u64 online_blocks;
 	__u64 offline_blocks;
 };
 #define SCOUTFS_IOC_STAT_MORE _IOR(SCOUTFS_IOCTL_MAGIC, 5, \
 				   struct scoutfs_ioctl_stat_more)
 struct scoutfs_ioctl_data_waiting_entry {
 	__u64 ino;
 	__u64 iblock;
 	__u8 op;
 	__u8 _pad[7];
 };
 #define SCOUTFS_IOC_DWO_READ		(1 << 0)
 #define SCOUTFS_IOC_DWO_WRITE		(1 << 1)
 #define SCOUTFS_IOC_DWO_CHANGE_SIZE	(1 << 2)
 #define SCOUTFS_IOC_DWO_UNKNOWN		(U8_MAX << 3)
 struct scoutfs_ioctl_data_waiting {
 	__u64 flags;
 	__u64 after_ino;
 	__u64 after_iblock;
 	__u64 ents_ptr;
 	__u16 ents_nr;
 	__u8 _pad[6];
 };
 #define SCOUTFS_IOC_DATA_WAITING_FLAGS_UNKNOWN		(U8_MAX << 0)
 #define SCOUTFS_IOC_DATA_WAITING _IOR(SCOUTFS_IOCTL_MAGIC, 6, \
 				      struct scoutfs_ioctl_data_waiting)
 /*
 * If i_size is set then data_version must be non-zero.  If the offline
 * flag is set then i_size must be set and a offline extent will be
 * created from offset 0 to i_size.
 */
 struct scoutfs_ioctl_setattr_more {
 	__u64 data_version;
 	__u64 i_size;
 	__u64 flags;
 	__u64 ctime_sec;
 	__u32 ctime_nsec;
 	__u8 _pad[4];
 };
 #define SCOUTFS_IOC_SETATTR_MORE_OFFLINE		(1 << 0)
 #define SCOUTFS_IOC_SETATTR_MORE_UNKNOWN		(U8_MAX << 1)
 #define SCOUTFS_IOC_SETATTR_MORE _IOW(SCOUTFS_IOCTL_MAGIC, 7, \
 				      struct scoutfs_ioctl_setattr_more)
 struct scoutfs_ioctl_listxattr_hidden {
 	__u64 id_pos;
 	__u64 buf_ptr;
 	__u32 buf_bytes;
 	__u32 hash_pos;
 };
 #define SCOUTFS_IOC_LISTXATTR_HIDDEN _IOR(SCOUTFS_IOCTL_MAGIC, 8, \
 					  struct scoutfs_ioctl_listxattr_hidden)
 /*
 * Return the inode numbers of inodes which might contain the given
 * xattr.  The inode may not have a set xattr with that name, the caller
 * must check the returned inodes to see if they match.
 *
 * @next_ino: The next inode number that could be returned.  Initialized
 * to 0 when first searching and set to one past the last inode number
 * returned to continue searching.
 * @last_ino: The last inode number that could be returned.  U64_MAX to
 * find all inodes.
 * @name_ptr: The address of the name of the xattr to search for.  It is
 * not null terminated.
 * @inodes_ptr: The address of the array of uint64_t inode numbers in
 * which to store inode numbers that may contain the xattr.  EFAULT may
 * be returned if this address is not naturally aligned.
 * @output_flags: Set as success is returned.  If an error is returned
 * then this field is undefined and should not be read.
 * @nr_inodes: The number of elements in the array found at inodes_ptr.
 * @name_bytes: The number of non-null bytes found in the name at
 * name_ptr.
 *
 * This requires the CAP_SYS_ADMIN capability and will return -EPERM if
 * it's not granted.
 *
 * The number of inode numbers stored in the inodes_ptr array is
 * returned.  If nr_inodes is 0 or last_ino is less than next_ino then 0
 * will be immediately returned.
 *
 * Partial progress can be returned if an error is hit or if nr_inodes
 * was larger than the internal limit on the number of inodes returned
 * in a search pass.  The _END output flag is set if all the results
 * including last_ino were searched in this pass.
 *
 * It's valuable to provide a large inodes array so that all the results
 * can be found in one search pass and _END can be set.  There are
 * significant constant costs for performing each search pass.
 */
 struct scoutfs_ioctl_search_xattrs {
 	__u64 next_ino;
 	__u64 last_ino;
 	__u64 name_ptr;
 	__u64 inodes_ptr;
 	__u64 output_flags;
 	__u64 nr_inodes;
 	__u16 name_bytes;
 	__u8 _pad[6];
 };
 /* set in output_flags if returned inodes reached last_ino */
 #define SCOUTFS_SEARCH_XATTRS_OFLAG_END (1ULL << 0)
 #define SCOUTFS_IOC_SEARCH_XATTRS _IOR(SCOUTFS_IOCTL_MAGIC, 9, \
 				     struct scoutfs_ioctl_search_xattrs)
 /*
 * Give the user information about the filesystem.
 *
 * @valid_bytes stores the number of bytes that are valid in the
 * structure.  The caller sets this to the size of the struct that they
 * understand.  The kernel then fills and copies back the min of the
 * size they and the user caller understand.  The user can tell if a
 * field is set if all of its bytes are within the valid_bytes that the
 * kernel set on return.
 *
 * @committed_seq: All seqs up to and including this seq have been
 * committed.  Can be compared with meta_seq and data_seq from inodes in
 * stat_more to discover if changes have been committed to disk.
 *
 * New fields are only added to the end of the struct.
 */
 struct scoutfs_ioctl_statfs_more {
 	__u64 valid_bytes;
 	__u64 fsid;
 	__u64 rid;
 	__u64 committed_seq;
 	__u64 total_meta_blocks;
 	__u64 total_data_blocks;
 };
 #define SCOUTFS_IOC_STATFS_MORE _IOR(SCOUTFS_IOCTL_MAGIC, 10, \
 				     struct scoutfs_ioctl_statfs_more)
 /*
 * Cause matching waiters to return an error.
 *
 * Find current waiters that match the inode, op, and block range to wake
 * up and return an error.
 */
 struct scoutfs_ioctl_data_wait_err {
 	__u64 ino;
 	__u64 data_version;
 	__u64 offset;
 	__u64 count;
 	__u64 op;
 	__s64 err;
 };
 #define SCOUTFS_IOC_DATA_WAIT_ERR _IOR(SCOUTFS_IOCTL_MAGIC, 11, \
 				       struct scoutfs_ioctl_data_wait_err)
 #define SCOUTFS_IOC_ALLOC_DETAIL _IOR(SCOUTFS_IOCTL_MAGIC, 12, \
 				     struct scoutfs_ioctl_alloc_detail)
 struct scoutfs_ioctl_alloc_detail {
 	__u64 entries_ptr;
 	__u64 entries_nr;
 };
 struct scoutfs_ioctl_alloc_detail_entry {
 	__u64 id;
 	__u64 blocks;
 	__u8 type;
 	__u8 meta:1,
 	     avail:1;
 	__u8 __bit_pad:6;
 	__u8 __pad[6];
 };
 #endif