From 86cf3ec4abb9690c87f2108ce72c555181d5665b Mon Sep 17 00:00:00 2001 From: Zach Brown Date: Fri, 4 Dec 2020 15:28:10 -0800 Subject: [PATCH] Remove format.h and ioctl.h from utils Now that we're in one repo utils can get its format and ioctl headers from the authoriative kmod files. When we're building a dist tarball we copy the files over so that the build from the dist tarball can use them. Signed-off-by: Zach Brown --- utils/.gitignore | 2 + utils/Makefile | 32 +- utils/src/format.h | 946 --------------------------------------------- utils/src/ioctl.h | 416 -------------------- 4 files changed, 31 insertions(+), 1365 deletions(-) delete mode 100644 utils/src/format.h delete mode 100644 utils/src/ioctl.h diff --git a/utils/.gitignore b/utils/.gitignore index 3d9129b6..829a296c 100644 --- a/utils/.gitignore +++ b/utils/.gitignore @@ -2,6 +2,8 @@ *.d *.swp src/scoutfs +src/format.h +src/ioctl.h .sparse* .mock.build* cscope.* diff --git a/utils/Makefile b/utils/Makefile index 81386c83..ad146e5b 100644 --- a/utils/Makefile +++ b/utils/Makefile @@ -1,11 +1,32 @@ -SCOUTFS_FORMAT_HASH := \ - $(shell cat src/format.h src/ioctl.h | md5sum | cut -b1-16) +# +# The userspace utils and kernel module share definitions of physical +# structures and ioctls. If we're in the repo we include the kmod +# headers directly, and hash them directly to calculate the format hash. +# +# If we're creating a standalone tarball for distribution we copy the +# headers out of the kmod dir into the tarball. And then when we're +# building in that tarball we use the headers in src/ directly. +# +FMTIOC_H := format.h ioctl.h +FMTIOC_DIST := $(addprefix src/,$(FMTIOC_H)) +FMTIOC_KMOD := $(addprefix ../kmod/src/,$(FMTIOC_H)) + +ifneq ($(wildcard $(firstword $(FMTIOC_KMOD))),) +HASH_FILES := $(FMTIOC_KMOD) +else +HASH_FILES := $(FMTIOC_DIST) +endif +SCOUTFS_FORMAT_HASH := $(shell cat $(HASH_FILES) | md5sum | cut -b1-16) CFLAGS := -Wall -O2 -Werror -D_FILE_OFFSET_BITS=64 -g -msse4.2 \ -Wpadded \ -fno-strict-aliasing \ -DSCOUTFS_FORMAT_HASH=0x$(SCOUTFS_FORMAT_HASH)LLU +ifneq ($(wildcard $(firstword $(FMTIOC_KMOD))),) +CFLAGS += -I../kmod/src +endif + BIN := src/scoutfs OBJ := $(patsubst %.c,%.o,$(wildcard src/*.c)) DEPS := $(wildcard */*.d) @@ -47,9 +68,14 @@ RPM_GITHASH := $(shell git rev-parse --short HEAD) TARFILE = scoutfs-utils-$(RPM_VERSION).tar +# +# make a stand alone buildable tarball for packaging, arguably this +# shouldn't be included in the dist Makefile :) +# dist: $(RPM_DIR) scoutfs-utils.spec git archive --format=tar --prefix scoutfs-utils-$(RPM_VERSION)/ HEAD^{tree} > $(TARFILE) - @ tar rf $(TARFILE) --transform="s@\(.*\)@scoutfs-utils-$(RPM_VERSION)/\1@" scoutfs-utils.spec + tar rf $(TARFILE) --transform="s@\(.*\)@scoutfs-utils-$(RPM_VERSION)/\1@" scoutfs-utils.spec + tar rf $(TARFILE) --transform="s@.*\(src/.*\)@scoutfs-utils-$(RPM_VERSION)/\1@" $(FMTIOC_KMOD) clean: @rm -f $(BIN) $(OBJ) $(DEPS) .sparse.* diff --git a/utils/src/format.h b/utils/src/format.h deleted file mode 100644 index 033552bf..00000000 --- a/utils/src/format.h +++ /dev/null @@ -1,946 +0,0 @@ -#ifndef _SCOUTFS_FORMAT_H_ -#define _SCOUTFS_FORMAT_H_ - -/* statfs(2) f_type */ -#define SCOUTFS_SUPER_MAGIC 0x554f4353 /* "SCOU" */ - -/* block header magic values, chosen at random */ -#define SCOUTFS_BLOCK_MAGIC_SUPER 0x103c428b -#define SCOUTFS_BLOCK_MAGIC_BTREE 0xe597f96d -#define SCOUTFS_BLOCK_MAGIC_BLOOM 0x31995604 -#define SCOUTFS_BLOCK_MAGIC_SRCH_BLOCK 0x897e4a7d -#define SCOUTFS_BLOCK_MAGIC_SRCH_PARENT 0xb23a2a05 -#define SCOUTFS_BLOCK_MAGIC_ALLOC_LIST 0x8a93ac83 - -/* - * The super block, quorum block, and file data allocation granularity - * use the smaller 4KB block. - */ -#define SCOUTFS_BLOCK_SM_SHIFT 12 -#define SCOUTFS_BLOCK_SM_SIZE (1 << SCOUTFS_BLOCK_SM_SHIFT) -#define SCOUTFS_BLOCK_SM_MASK (SCOUTFS_BLOCK_SM_SIZE - 1) -#define SCOUTFS_BLOCK_SM_PER_PAGE (PAGE_SIZE / SCOUTFS_BLOCK_SM_SIZE) -#define SCOUTFS_BLOCK_SM_SECTOR_SHIFT (SCOUTFS_BLOCK_SM_SHIFT - 9) -#define SCOUTFS_BLOCK_SM_SECTORS (1 << SCOUTFS_BLOCK_SM_SECTOR_SHIFT) -#define SCOUTFS_BLOCK_SM_MAX (U64_MAX >> SCOUTFS_BLOCK_SM_SHIFT) -#define SCOUTFS_BLOCK_SM_PAGES_PER (SCOUTFS_BLOCK_SM_SIZE / PAGE_SIZE) -#define SCOUTFS_BLOCK_SM_PAGE_ORDER (SCOUTFS_BLOCK_SM_SHIFT - PAGE_SHIFT) - -/* - * The radix and btree structures, and the forest bloom block, use the - * larger 64KB metadata block size. - */ -#define SCOUTFS_BLOCK_LG_SHIFT 16 -#define SCOUTFS_BLOCK_LG_SIZE (1 << SCOUTFS_BLOCK_LG_SHIFT) -#define SCOUTFS_BLOCK_LG_MASK (SCOUTFS_BLOCK_LG_SIZE - 1) -#define SCOUTFS_BLOCK_LG_PER_PAGE (PAGE_SIZE / SCOUTFS_BLOCK_LG_SIZE) -#define SCOUTFS_BLOCK_LG_SECTOR_SHIFT (SCOUTFS_BLOCK_LG_SHIFT - 9) -#define SCOUTFS_BLOCK_LG_SECTORS (1 << SCOUTFS_BLOCK_LG_SECTOR_SHIFT) -#define SCOUTFS_BLOCK_LG_MAX (U64_MAX >> SCOUTFS_BLOCK_LG_SHIFT) -#define SCOUTFS_BLOCK_LG_PAGES_PER (SCOUTFS_BLOCK_LG_SIZE / PAGE_SIZE) -#define SCOUTFS_BLOCK_LG_PAGE_ORDER (SCOUTFS_BLOCK_LG_SHIFT - PAGE_SHIFT) - -#define SCOUTFS_BLOCK_SM_LG_SHIFT (SCOUTFS_BLOCK_LG_SHIFT - \ - SCOUTFS_BLOCK_SM_SHIFT) - - -/* - * The super block leaves some room before the first block for platform - * structures like boot loaders. - */ -#define SCOUTFS_SUPER_BLKNO ((64ULL * 1024) >> SCOUTFS_BLOCK_SM_SHIFT) - -/* - * A reasonably large region of aligned quorum blocks follow the super - * block. Each voting cycle reads the entire region so we don't want it - * to be too enormous. 256K seems like a reasonably chunky single IO. - * The number of blocks in the region also determines the number of - * mounts that have a reasonable probability of not overwriting each - * other's random block locations. - */ -#define SCOUTFS_QUORUM_BLKNO ((256ULL * 1024) >> SCOUTFS_BLOCK_SM_SHIFT) -#define SCOUTFS_QUORUM_BLOCKS ((256ULL * 1024) >> SCOUTFS_BLOCK_SM_SHIFT) - -/* - * Start data on the data device aligned as well. - */ -#define SCOUTFS_DATA_DEV_START_BLKNO ((256ULL * 1024) >> SCOUTFS_BLOCK_SM_SHIFT) - - -#define SCOUTFS_UNIQUE_NAME_MAX_BYTES 64 /* includes null */ - -/* - * Base types used by other structures. - */ -struct scoutfs_timespec { - __le64 sec; - __le32 nsec; - __u8 __pad[4]; -}; - -/* XXX ipv6 */ -struct scoutfs_inet_addr { - __le32 addr; - __le16 port; - __u8 __pad[2]; -}; - -/* - * This header is stored at the start of btree blocks and the super - * block for verification. The crc field is not included in the - * calculation of the crc. - */ -struct scoutfs_block_header { - __le32 crc; - __le32 magic; - __le64 fsid; - __le64 seq; - __le64 blkno; -}; - -/* - * scoutfs identifies all file system metadata items by a small key - * struct. - * - * Each item type maps their logical structures to the fixed fields in - * sort order. This lets us print keys without needing per-type - * formats. - * - * The keys are compared by considering the fields in struct order from - * most to least significant. They are considered a multi precision - * value when navigating the keys in ordered key space. We can - * increment them, subtract them from each other, etc. - */ -struct scoutfs_key { - __le64 _sk_first; - __le64 _sk_second; - __le64 _sk_third; - __u8 _sk_fourth; - __u8 sk_zone; - __u8 sk_type; - __u8 __pad[5]; -}; - -/* inode index */ -#define skii_major _sk_second -#define skii_ino _sk_third - -/* node orphan inode */ -#define sko_rid _sk_first -#define sko_ino _sk_second - -/* inode */ -#define ski_ino _sk_first - -/* xattr parts */ -#define skx_ino _sk_first -#define skx_name_hash _sk_second -#define skx_id _sk_third -#define skx_part _sk_fourth - -/* directory entries */ -#define skd_ino _sk_first -#define skd_major _sk_second -#define skd_minor _sk_third - -/* symlink target */ -#define sks_ino _sk_first -#define sks_nr _sk_second - -/* data extents */ -#define skdx_ino _sk_first -#define skdx_end _sk_second -#define skdx_len _sk_third - -/* log trees */ -#define sklt_rid _sk_first -#define sklt_nr _sk_second - -/* lock clients */ -#define sklc_rid _sk_first - -/* seqs */ -#define skts_trans_seq _sk_first -#define skts_rid _sk_second - -/* mounted clients */ -#define skmc_rid _sk_first - -/* free extents by blkno */ -#define skfb_end _sk_second -#define skfb_len _sk_third -/* free extents by len */ -#define skfl_neglen _sk_second -#define skfl_blkno _sk_third - -struct scoutfs_radix_block { - struct scoutfs_block_header hdr; - union { - struct scoutfs_radix_ref { - __le64 blkno; - __le64 seq; - __le64 sm_total; - __le64 lg_total; - } refs[0]; - __le64 bits[0]; - }; -}; - -struct scoutfs_avl_root { - __le16 node; -}; - -struct scoutfs_avl_node { - __le16 parent; - __le16 left; - __le16 right; - __u8 height; - __u8 __pad[1]; -}; - -/* when we split we want to have multiple items on each side */ -#define SCOUTFS_BTREE_MAX_VAL_LEN 896 - -/* - * A 4EB test image measured a worst case height of 17. This is plenty - * generous. - */ -#define SCOUTFS_BTREE_MAX_HEIGHT 20 - -struct scoutfs_btree_ref { - __le64 blkno; - __le64 seq; -}; - -/* - * A height of X means that the first block read will have level X-1 and - * the leaves will have level 0. - */ -struct scoutfs_btree_root { - struct scoutfs_btree_ref ref; - __u8 height; - __u8 __pad[7]; -}; - -struct scoutfs_btree_item { - struct scoutfs_avl_node node; - struct scoutfs_key key; - __le16 val_off; - __le16 val_len; - __u8 __pad[4]; -}; - -struct scoutfs_btree_block { - struct scoutfs_block_header hdr; - struct scoutfs_avl_root item_root; - __le16 nr_items; - __le16 total_item_bytes; - __le16 mid_free_len; - __u8 level; - __u8 __pad[7]; - struct scoutfs_btree_item items[0]; - /* leaf blocks have a fixed size item offset hash table at the end */ -}; - -#define SCOUTFS_BTREE_VALUE_ALIGN 8 - -/* - * Try to aim for a 75% load in a leaf full of items with no value. - * We'll almost never see this because most items have values and most - * blocks aren't full. - */ -#define SCOUTFS_BTREE_LEAF_ITEM_HASH_NR_UNALIGNED \ - ((SCOUTFS_BLOCK_LG_SIZE - sizeof(struct scoutfs_btree_block)) / \ - (sizeof(struct scoutfs_btree_item) + (sizeof(__le16))) * 100 / 75) -#define SCOUTFS_BTREE_LEAF_ITEM_HASH_NR \ - (round_up(SCOUTFS_BTREE_LEAF_ITEM_HASH_NR_UNALIGNED, \ - SCOUTFS_BTREE_VALUE_ALIGN)) -#define SCOUTFS_BTREE_LEAF_ITEM_HASH_BYTES \ - (SCOUTFS_BTREE_LEAF_ITEM_HASH_NR * sizeof(__le16)) - -struct scoutfs_alloc_list_ref { - __le64 blkno; - __le64 seq; -}; - -/* - * first_nr tracks the nr of the first block in the list and is used for - * allocation sizing. total_nr is the sum of the nr of all the blocks in - * the list and is used for calculating total free block counts. - */ -struct scoutfs_alloc_list_head { - struct scoutfs_alloc_list_ref ref; - __le64 total_nr; - __le32 first_nr; - __u8 __pad[4]; -}; - -/* - * While the main allocator uses extent items in btree blocks, metadata - * allocations for a single transaction are recorded in arrays in - * blocks. This limits the number of allocations and frees needed to - * cow and modify the structure. The blocks can be stored in a list - * which lets us create a persistent log of pending frees that are - * generated as we cow btree blocks to insert freed extents. - * - * The array floats in the block so that both adding and removing blknos - * only modifies an index. - */ -struct scoutfs_alloc_list_block { - struct scoutfs_block_header hdr; - struct scoutfs_alloc_list_ref next; - __le32 start; - __le32 nr; - __le64 blknos[0]; /* naturally aligned for sorting */ -}; - -#define SCOUTFS_ALLOC_LIST_MAX_BLOCKS \ - ((SCOUTFS_BLOCK_LG_SIZE - sizeof(struct scoutfs_alloc_list_block)) / \ - (member_sizeof(struct scoutfs_alloc_list_block, blknos[0]))) - -/* - * These can safely be initialized to all-zeros. - */ -struct scoutfs_alloc_root { - __le64 total_len; - struct scoutfs_btree_root root; -}; - -/* types of allocators, exposed to alloc_detail ioctl */ -#define SCOUTFS_ALLOC_OWNER_NONE 0 -#define SCOUTFS_ALLOC_OWNER_SERVER 1 -#define SCOUTFS_ALLOC_OWNER_MOUNT 2 -#define SCOUTFS_ALLOC_OWNER_SRCH 3 - -struct scoutfs_mounted_client_btree_val { - __u8 flags; -}; - -#define SCOUTFS_MOUNTED_CLIENT_VOTER (1 << 0) - -/* - * srch files are a contiguous run of blocks with compressed entries - * described by a dense parent radix. The files can be stored in - * log_tree items when the files contain unsorted entries written by - * mounts during their transactions. Sorted files of increasing size - * are kept in a btree off the super for searching and further - * compacting. - */ -struct scoutfs_srch_entry { - __le64 hash; - __le64 ino; - __le64 id; -}; - -#define SCOUTFS_SRCH_ENTRY_MAX_BYTES (2 + (sizeof(__u64) * 3)) - -struct scoutfs_srch_ref { - __le64 blkno; - __le64 seq; -}; - -struct scoutfs_srch_file { - struct scoutfs_srch_entry first; - struct scoutfs_srch_entry last; - struct scoutfs_srch_ref ref; - __le64 blocks; - __le64 entries; - __u8 height; - __u8 __pad[7]; -}; - -struct scoutfs_srch_parent { - struct scoutfs_block_header hdr; - struct scoutfs_srch_ref refs[0]; -}; - -#define SCOUTFS_SRCH_PARENT_REFS \ - ((SCOUTFS_BLOCK_LG_SIZE - \ - offsetof(struct scoutfs_srch_parent, refs)) / \ - sizeof(struct scoutfs_srch_ref)) - -struct scoutfs_srch_block { - struct scoutfs_block_header hdr; - struct scoutfs_srch_entry first; - struct scoutfs_srch_entry last; - struct scoutfs_srch_entry tail; - __le32 entry_nr; - __le32 entry_bytes; - __u8 entries[0]; -}; - -/* - * Decoding loads final small deltas with full __u64 loads. Rather than - * check the size before each load we stop coding entries past the point - * where a full size entry could overflow the block. A final entry can - * start at this byte count and consume the rest of the block, though - * its unlikely. - */ -#define SCOUTFS_SRCH_BLOCK_SAFE_BYTES \ - (SCOUTFS_BLOCK_LG_SIZE - sizeof(struct scoutfs_srch_block) - \ - SCOUTFS_SRCH_ENTRY_MAX_BYTES) - -#define SCOUTFS_SRCH_LOG_BLOCK_LIMIT (1024 * 1024 / SCOUTFS_BLOCK_LG_SIZE) -#define SCOUTFS_SRCH_COMPACT_ORDER 2 -#define SCOUTFS_SRCH_COMPACT_NR (1 << SCOUTFS_SRCH_COMPACT_ORDER) - -/* - * A persistent record of a srch file compaction operation in progress. - * - * When compacting log files blk and pos aren't used. When compacting - * sorted files blk is the logical block number and pos is the byte - * offset of the next entry. When deleting files pos is the height of - * the level that we're deleting, and blk is the logical block offset of - * the next parent ref array index to descend through. - */ -struct scoutfs_srch_compact { - struct scoutfs_alloc_list_head meta_avail; - struct scoutfs_alloc_list_head meta_freed; - __le64 id; - __u8 nr; - __u8 flags; - __u8 __pad[6]; - struct scoutfs_srch_file out; - struct scoutfs_srch_compact_input { - struct scoutfs_srch_file sfl; - __le64 blk; - __le64 pos; - } in[SCOUTFS_SRCH_COMPACT_NR]; -}; - -/* server -> client: combine input log file entries into output file */ -#define SCOUTFS_SRCH_COMPACT_FLAG_LOG (1 << 0) -/* server -> client: combine input sorted file entries into output file */ -#define SCOUTFS_SRCH_COMPACT_FLAG_SORTED (1 << 1) -/* server -> client: delete input files */ -#define SCOUTFS_SRCH_COMPACT_FLAG_DELETE (1 << 2) -/* client -> server: compaction phase (LOG,SORTED,DELETE) done */ -#define SCOUTFS_SRCH_COMPACT_FLAG_DONE (1 << 4) -/* client -> server: compaction failed */ -#define SCOUTFS_SRCH_COMPACT_FLAG_ERROR (1 << 5) - -/* - * XXX I imagine we should rename these now that they've evolved to track - * all the btrees that clients use during a transaction. It's not just - * about item logs, it's about clients making changes to trees. - */ -struct scoutfs_log_trees { - struct scoutfs_alloc_list_head meta_avail; - struct scoutfs_alloc_list_head meta_freed; - struct scoutfs_btree_root item_root; - struct scoutfs_btree_ref bloom_ref; - struct scoutfs_alloc_root data_avail; - struct scoutfs_alloc_root data_freed; - struct scoutfs_srch_file srch_file; - __le64 max_item_vers; - __le64 rid; - __le64 nr; -}; - -struct scoutfs_log_item_value { - __le64 vers; - __u8 flags; - __u8 __pad[7]; - __u8 data[0]; -}; - -/* - * FS items are limited by the max btree value length with the log item - * value header. - */ -#define SCOUTFS_MAX_VAL_SIZE \ - (SCOUTFS_BTREE_MAX_VAL_LEN - sizeof(struct scoutfs_log_item_value)) - -#define SCOUTFS_LOG_ITEM_FLAG_DELETION (1 << 0) - -struct scoutfs_bloom_block { - struct scoutfs_block_header hdr; - __le64 total_set; - __le64 bits[0]; -}; - -/* - * Item log trees are accompanied by a block of bits that make up a - * bloom filter which indicate if the item log trees may contain items - * covered by a lock. The log trees should be finalized and merged long - * before the bloom filters fill up and start returning excessive false - * positives. - */ -#define SCOUTFS_FOREST_BLOOM_NRS 3 -#define SCOUTFS_FOREST_BLOOM_BITS \ - (((SCOUTFS_BLOCK_LG_SIZE - sizeof(struct scoutfs_bloom_block)) / \ - member_sizeof(struct scoutfs_bloom_block, bits[0])) * \ - member_sizeof(struct scoutfs_bloom_block, bits[0]) * 8) -#define SCOUTFS_FOREST_BLOOM_FUNC_BITS (SCOUTFS_BLOCK_LG_SHIFT + 3) - -/* - * Keys are first sorted by major key zones. - */ -#define SCOUTFS_INODE_INDEX_ZONE 1 -#define SCOUTFS_RID_ZONE 2 -#define SCOUTFS_FS_ZONE 3 -#define SCOUTFS_LOCK_ZONE 4 -/* Items only stored in server btrees */ -#define SCOUTFS_LOG_TREES_ZONE 6 -#define SCOUTFS_LOCK_CLIENTS_ZONE 7 -#define SCOUTFS_TRANS_SEQ_ZONE 8 -#define SCOUTFS_MOUNTED_CLIENT_ZONE 9 -#define SCOUTFS_SRCH_ZONE 10 -#define SCOUTFS_FREE_EXTENT_ZONE 11 - -/* inode index zone */ -#define SCOUTFS_INODE_INDEX_META_SEQ_TYPE 1 -#define SCOUTFS_INODE_INDEX_DATA_SEQ_TYPE 2 -#define SCOUTFS_INODE_INDEX_NR 3 /* don't forget to update */ - -/* rid zone (also used in server alloc btree) */ -#define SCOUTFS_ORPHAN_TYPE 1 - -/* fs zone */ -#define SCOUTFS_INODE_TYPE 1 -#define SCOUTFS_XATTR_TYPE 2 -#define SCOUTFS_DIRENT_TYPE 3 -#define SCOUTFS_READDIR_TYPE 4 -#define SCOUTFS_LINK_BACKREF_TYPE 5 -#define SCOUTFS_SYMLINK_TYPE 6 -#define SCOUTFS_DATA_EXTENT_TYPE 7 - -/* lock zone, only ever found in lock ranges, never in persistent items */ -#define SCOUTFS_RENAME_TYPE 1 - -/* srch zone, only in server btrees */ -#define SCOUTFS_SRCH_LOG_TYPE 1 -#define SCOUTFS_SRCH_BLOCKS_TYPE 2 -#define SCOUTFS_SRCH_PENDING_TYPE 3 -#define SCOUTFS_SRCH_BUSY_TYPE 4 - -/* free extents in allocator btrees in client and server, by blkno or len */ -#define SCOUTFS_FREE_EXTENT_BLKNO_TYPE 1 -#define SCOUTFS_FREE_EXTENT_LEN_TYPE 2 - -/* file data extents have start and len in key */ -struct scoutfs_data_extent_val { - __le64 blkno; - __u8 flags; - __u8 __pad[7]; -}; - -#define SEF_OFFLINE (1 << 0) -#define SEF_UNWRITTEN (1 << 1) -#define SEF_UNKNOWN (U8_MAX << 2) - -/* - * The first xattr part item has a header that describes the xattr. The - * name and value are then packed into the following bytes in the first - * part item and overflow into the values of the rest of the part items. - */ -struct scoutfs_xattr { - __le16 val_len; - __u8 name_len; - __u8 __pad[5]; - __u8 name[0]; -}; - - -/* XXX does this exist upstream somewhere? */ -#define member_sizeof(TYPE, MEMBER) (sizeof(((TYPE *)0)->MEMBER)) - -#define SCOUTFS_UUID_BYTES 16 - -/* - * Mounts read all the quorum blocks and write to one random quorum - * block during a cycle. The min cycle time limits the per-mount iop - * load during elections. The random cycle delay makes it less likely - * that mounts will read and write at the same time and miss each - * other's writes. An election only completes if a quorum of mounts - * vote for a leader before any of their elections timeout. This is - * made less likely by the probability that mounts will overwrite each - * others random block locations. The max quorum count limits that - * probability. 9 mounts only have a 55% chance of writing to unique 4k - * blocks in a 256k region. The election timeout is set to include - * enough cycles to usually complete the election. Once a leader is - * elected it spends a number of cycles writing out blocks with itself - * logged as a leader. This reduces the possibility that servers - * will have their log entries overwritten and not be fenced. - */ -#define SCOUTFS_QUORUM_MAX_COUNT 9 -#define SCOUTFS_QUORUM_CYCLE_LO_MS 10 -#define SCOUTFS_QUORUM_CYCLE_HI_MS 20 -#define SCOUTFS_QUORUM_TERM_LO_MS 250 -#define SCOUTFS_QUORUM_TERM_HI_MS 500 -#define SCOUTFS_QUORUM_ELECTED_LOG_CYCLES 10 - -struct scoutfs_quorum_block { - __le64 fsid; - __le64 blkno; - __le64 term; - __le64 write_nr; - __le64 voter_rid; - __le64 vote_for_rid; - __le32 crc; - __u8 log_nr; - __u8 __pad[3]; - struct scoutfs_quorum_log { - __le64 term; - __le64 rid; - struct scoutfs_inet_addr addr; - } log[0]; -}; - -#define SCOUTFS_QUORUM_LOG_MAX \ - ((SCOUTFS_BLOCK_SM_SIZE - sizeof(struct scoutfs_quorum_block)) / \ - sizeof(struct scoutfs_quorum_log)) - -#define SCOUTFS_FLAG_IS_META_BDEV 0x01 - -struct scoutfs_super_block { - struct scoutfs_block_header hdr; - __le64 id; - __le64 format_hash; - __le64 flags; - __u8 uuid[SCOUTFS_UUID_BYTES]; - __le64 next_ino; - __le64 next_trans_seq; - __le64 total_meta_blocks; /* both static and dynamic */ - __le64 first_meta_blkno; /* first dynamically allocated */ - __le64 last_meta_blkno; - __le64 total_data_blocks; - __le64 first_data_blkno; - __le64 last_data_blkno; - __le64 quorum_fenced_term; - __le64 quorum_server_term; - __le64 unmount_barrier; - __u8 quorum_count; - __u8 __pad[7]; - struct scoutfs_inet_addr server_addr; - struct scoutfs_alloc_root meta_alloc[2]; - struct scoutfs_alloc_root data_alloc; - struct scoutfs_alloc_list_head server_meta_avail[2]; - struct scoutfs_alloc_list_head server_meta_freed[2]; - struct scoutfs_btree_root fs_root; - struct scoutfs_btree_root logs_root; - struct scoutfs_btree_root lock_clients; - struct scoutfs_btree_root trans_seqs; - struct scoutfs_btree_root mounted_clients; - struct scoutfs_btree_root srch_root; -}; - -#define SCOUTFS_ROOT_INO 1 - - -/* - * @meta_seq: advanced the first time an inode is updated in a given - * transaction. It can only advance again after the inode is written - * and a new transaction opens. - * - * @data_seq: advanced the first time a file's data (or size) is - * modified in a given transaction. It can only advance again after the - * file is written and a new transaction opens. - * - * @data_version: incremented every time the contents of a file could - * have changed. It is exposed via an ioctl and is then provided as an - * argument to data functions to protect racing modification. - * - * @online_blocks: The number of fixed 4k blocks currently allocated and - * storing data in the volume. - * - * @offline_blocks: The number of fixed 4k blocks that could be made - * online by staging. - * - * XXX - * - otime? - * - compat flags? - * - version? - * - generation? - * - be more careful with rdev? - */ -struct scoutfs_inode { - __le64 size; - __le64 meta_seq; - __le64 data_seq; - __le64 data_version; - __le64 online_blocks; - __le64 offline_blocks; - __le64 next_readdir_pos; - __le64 next_xattr_id; - __le32 nlink; - __le32 uid; - __le32 gid; - __le32 mode; - __le32 rdev; - __le32 flags; - struct scoutfs_timespec atime; - struct scoutfs_timespec ctime; - struct scoutfs_timespec mtime; -}; - -#define SCOUTFS_INO_FLAG_TRUNCATE 0x1 - -#define SCOUTFS_ROOT_INO 1 - -/* like the block size, a reasonable min PATH_MAX across platforms */ -#define SCOUTFS_SYMLINK_MAX_SIZE 4096 - -/* - * Dirents are stored in multiple places to isolate contention when - * performing different operations: hashed by name for creation and - * lookup, at incrementing positions for readdir and resolving inodes to - * paths. Each entry has all the metadata needed to reference all the - * items (so an entry cached by lookup can be used to unlink all the - * items). - */ -struct scoutfs_dirent { - __le64 ino; - __le64 hash; - __le64 pos; - __u8 type; - __u8 __pad[7]; - __u8 name[0]; -}; - -#define SCOUTFS_NAME_LEN 255 - -/* S32_MAX avoids the (int) sign bit and might avoid sloppy bugs */ -#define SCOUTFS_LINK_MAX S32_MAX - -/* entries begin after . and .. */ -#define SCOUTFS_DIRENT_FIRST_POS 2 -/* getdents returns next pos with an entry, no entry at (f_pos)~0 */ -#define SCOUTFS_DIRENT_LAST_POS (U64_MAX - 1) - -enum scoutfs_dentry_type { - SCOUTFS_DT_FIFO = 0, - SCOUTFS_DT_CHR, - SCOUTFS_DT_DIR, - SCOUTFS_DT_BLK, - SCOUTFS_DT_REG, - SCOUTFS_DT_LNK, - SCOUTFS_DT_SOCK, - SCOUTFS_DT_WHT, -}; - - -#define SCOUTFS_XATTR_MAX_NAME_LEN 255 -#define SCOUTFS_XATTR_MAX_VAL_LEN 65535 -#define SCOUTFS_XATTR_MAX_PART_SIZE SCOUTFS_MAX_VAL_SIZE - -#define SCOUTFS_XATTR_NR_PARTS(name_len, val_len) \ - DIV_ROUND_UP(sizeof(struct scoutfs_xattr) + name_len + val_len, \ - (unsigned int)SCOUTFS_XATTR_MAX_PART_SIZE) - -#define SCOUTFS_LOCK_INODE_GROUP_NR 1024 -#define SCOUTFS_LOCK_INODE_GROUP_MASK (SCOUTFS_LOCK_INODE_GROUP_NR - 1) -#define SCOUTFS_LOCK_SEQ_GROUP_MASK ((1ULL << 10) - 1) - -/* - * messages over the wire. - */ - -/* - * Greetings verify identity of communicating nodes. The sender sends - * their credentials and the receiver verifies them. - * - * @server_term: The raft term that elected the server. Initially 0 - * from the client, sent by the server, then sent by the client as it - * tries to reconnect. Used to identify a client reconnecting to both - * the same serer after receiving a greeting response and to a new - * server after failover. - * - * @unmount_barrier: Incremented every time the remaining majority of - * quorum members all agree to leave. The server tells a quorum member - * the value that it's connecting under so that if the client sees the - * value increase in the super block then it knows that the server has - * processed its farewell and can safely unmount. - * - * @rid: The client's random id that was generated once as the mount - * started up. This identifies a specific remote mount across - * connections and servers. It's set to the client's rid in both the - * request and response for consistency. - */ -struct scoutfs_net_greeting { - __le64 fsid; - __le64 format_hash; - __le64 server_term; - __le64 unmount_barrier; - __le64 rid; - __le64 flags; -}; - -#define SCOUTFS_NET_GREETING_FLAG_FAREWELL (1 << 0) -#define SCOUTFS_NET_GREETING_FLAG_VOTER (1 << 1) -#define SCOUTFS_NET_GREETING_FLAG_INVALID (~(__u64)0 << 2) - -/* - * This header precedes and describes all network messages sent over - * sockets. - * - * @seq: A sequence number that is increased for each message queued for - * send on the sender. The sender will never reorder messages in the - * send queue so this will always increase in recv on the receiver. The - * receiver can use this to drop messages that arrived twice after being - * resent across a newly connected socket for a given connection. - * - * @recv_seq: The sequence number of the last received message. The - * receiver is sending this to the sender in every message. The sender - * uses them to drop responses which have been delivered. - * - * @id: An increasing identifier that is set in each request. Responses - * specify the request that they're responding to. - * - * Error is only set to a translated errno and will only be found in - * response messages. - */ -struct scoutfs_net_header { - __le64 clock_sync_id; - __le64 seq; - __le64 recv_seq; - __le64 id; - __le16 data_len; - __u8 cmd; - __u8 flags; - __u8 error; - __u8 __pad[3]; - __u8 data[0]; -}; - -#define SCOUTFS_NET_FLAG_RESPONSE (1 << 0) -#define SCOUTFS_NET_FLAGS_UNKNOWN (U8_MAX << 1) - -enum scoutfs_net_cmd { - SCOUTFS_NET_CMD_GREETING = 0, - SCOUTFS_NET_CMD_ALLOC_INODES, - SCOUTFS_NET_CMD_GET_LOG_TREES, - SCOUTFS_NET_CMD_COMMIT_LOG_TREES, - SCOUTFS_NET_CMD_GET_ROOTS, - SCOUTFS_NET_CMD_ADVANCE_SEQ, - SCOUTFS_NET_CMD_GET_LAST_SEQ, - SCOUTFS_NET_CMD_LOCK, - SCOUTFS_NET_CMD_LOCK_RECOVER, - SCOUTFS_NET_CMD_SRCH_GET_COMPACT, - SCOUTFS_NET_CMD_SRCH_COMMIT_COMPACT, - SCOUTFS_NET_CMD_FAREWELL, - SCOUTFS_NET_CMD_UNKNOWN, -}; - -/* - * Define a macro to evaluate another macro for each of the errnos we - * translate over the wire. This lets us keep our enum in sync with the - * mapping arrays to and from host errnos. - */ -#define EXPAND_EACH_NET_ERRNO \ - EXPAND_NET_ERRNO(ENOENT) \ - EXPAND_NET_ERRNO(ENOMEM) \ - EXPAND_NET_ERRNO(EIO) \ - EXPAND_NET_ERRNO(ENOSPC) \ - EXPAND_NET_ERRNO(EINVAL) - -#undef EXPAND_NET_ERRNO -#define EXPAND_NET_ERRNO(which) SCOUTFS_NET_ERR_##which, -enum scoutfs_net_errors { - SCOUTFS_NET_ERR_NONE = 0, - EXPAND_EACH_NET_ERRNO - SCOUTFS_NET_ERR_UNKNOWN, -}; - -/* arbitrarily chosen to be safely less than mss and allow 1k with header */ -#define SCOUTFS_NET_MAX_DATA_LEN 1100 - -/* - * When there's no more free inodes this will be sent with ino = ~0 and - * nr = 0. - */ -struct scoutfs_net_inode_alloc { - __le64 ino; - __le64 nr; -}; - -struct scoutfs_net_roots { - struct scoutfs_btree_root fs_root; - struct scoutfs_btree_root logs_root; - struct scoutfs_btree_root srch_root; -}; - -struct scoutfs_net_lock { - struct scoutfs_key key; - __le64 write_version; - __u8 old_mode; - __u8 new_mode; - __u8 __pad[6]; -}; - -struct scoutfs_net_lock_grant_response { - struct scoutfs_net_lock nl; - struct scoutfs_net_roots roots; -}; - -struct scoutfs_net_lock_recover { - __le16 nr; - __u8 __pad[6]; - struct scoutfs_net_lock locks[0]; -}; - -#define SCOUTFS_NET_LOCK_MAX_RECOVER_NR \ - ((SCOUTFS_NET_MAX_DATA_LEN - sizeof(struct scoutfs_net_lock_recover)) /\ - sizeof(struct scoutfs_net_lock)) - -/* some enums for tracing */ -enum scoutfs_lock_trace { - SLT_CLIENT, - SLT_SERVER, - SLT_GRANT, - SLT_INVALIDATE, - SLT_REQUEST, - SLT_RESPONSE, -}; - -/* - * Read and write locks operate as you'd expect. Multiple readers can - * hold read locks while writers are excluded. A single writer can hold - * a write lock which excludes other readers and writers. Writers can - * read while holding a write lock. - * - * Multiple writers can hold write only locks but they can not read, - * they can only generate dirty items. It's used when the system has - * other means of knowing that it's safe to overwrite items. - * - * The null mode provides no access and is used to destroy locks. - */ -enum scoutfs_lock_mode { - SCOUTFS_LOCK_NULL = 0, - SCOUTFS_LOCK_READ, - SCOUTFS_LOCK_WRITE, - SCOUTFS_LOCK_WRITE_ONLY, - SCOUTFS_LOCK_INVALID, -}; - -/* - * Scoutfs file handle structure - this can be copied out to userspace - * via open by handle or put on the wire from NFS. - */ -struct scoutfs_fid { - __le64 ino; - __le64 parent_ino; -}; - -#define FILEID_SCOUTFS 0x81 -#define FILEID_SCOUTFS_WITH_PARENT 0x82 - -/* - * Identifiers for sources of corruption that can generate messages. - */ -enum scoutfs_corruption_sources { - SC_DIRENT_NAME_LEN = 0, - SC_DIRENT_BACKREF_NAME_LEN, - SC_DIRENT_READDIR_NAME_LEN, - SC_SYMLINK_INODE_SIZE, - SC_SYMLINK_MISSING_ITEM, - SC_SYMLINK_NOT_NULL_TERM, - SC_BTREE_BLOCK_LEVEL, - SC_BTREE_NO_CHILD_REF, - SC_INODE_BLOCK_COUNTS, - SC_NR_SOURCES, -}; - -#define SC_NR_LONGS DIV_ROUND_UP(SC_NR_SOURCES, BITS_PER_LONG) - -#endif diff --git a/utils/src/ioctl.h b/utils/src/ioctl.h deleted file mode 100644 index a53626a0..00000000 --- a/utils/src/ioctl.h +++ /dev/null @@ -1,416 +0,0 @@ -#ifndef _SCOUTFS_IOCTL_H_ -#define _SCOUTFS_IOCTL_H_ - -/* - * We naturally align explicit width fields in the ioctl structs so that - * userspace doesn't need to deal with padding or unaligned packing and - * we don't have to deal with 32/64 compat. It makes it a little - * awkward to communicate persistent packed structs through the ioctls - * but that happens very rarely. An interesting special case are - * 0length arrays that follow the structs. We make those start at the - * next aligned offset of the struct to be safe. - * - * This is enforced by pahole scripting in external build environments. - */ - -/* XXX I have no idea how these are chosen. */ -#define SCOUTFS_IOCTL_MAGIC 's' - -/* - * Packed scoutfs keys rarely cross the ioctl boundary so we have a - * translation struct. - */ -struct scoutfs_ioctl_key { - __le64 _sk_first; - __le64 _sk_second; - __le64 _sk_third; - __u8 _sk_fourth; - __u8 sk_type; - __u8 sk_zone; - __u8 _pad[5]; -}; - -struct scoutfs_ioctl_walk_inodes_entry { - __u64 major; - __u64 ino; - __u32 minor; - __u8 _pad[4]; -}; - -/* - * Walk inodes in an index that is sorted by one of their fields. - * - * Each index is built from generic index items that have major and - * minor values that are set to the field being indexed. In time - * indices, for example, major is seconds and minor is nanoseconds. - * - * @first The first index entry that can be returned. - * @last The last index entry that can be returned. - * @entries_ptr Pointer to emory containing buffer for entry results. - * @nr_entries The number of entries that can fit in the buffer. - * @index Which index to walk, enumerated in _WALK_INODES_ constants. - * - * To start iterating first can be memset to 0 and last to 0xff. Then - * after each set of results first can be set to the last entry returned - * and then the fields can be incremented in reverse sort order (ino < - * minor < major) as each increasingly significant value wraps around to - * 0. - * - * These indexes are not strictly consistent. The items that back these - * index entries aren't updated with cluster locks so they're not - * guaranteed to be visible the moment you read after writing. They're - * only visible when the transaction that updated them is synced. - * - * In addition, the seq indexes will only allow walking through sequence - * space that has been consistent. This prevents old dirty entries from - * becoming visible after newer stable entries are displayed. - * - * If first is greater than last then the walk will return 0 entries. - * - * XXX invalidate before reading. - */ -struct scoutfs_ioctl_walk_inodes { - struct scoutfs_ioctl_walk_inodes_entry first; - struct scoutfs_ioctl_walk_inodes_entry last; - __u64 entries_ptr; - __u32 nr_entries; - __u8 index; - __u8 _pad[11]; /* padded to align walk_inodes_entry total size */ -}; - -enum scoutfs_ino_walk_seq_type { - SCOUTFS_IOC_WALK_INODES_META_SEQ = 0, - SCOUTFS_IOC_WALK_INODES_DATA_SEQ, - SCOUTFS_IOC_WALK_INODES_UNKNOWN, -}; - -/* - * Adds entries to the user's buffer for each inode that is found in the - * given index between the first and last positions. - */ -#define SCOUTFS_IOC_WALK_INODES _IOR(SCOUTFS_IOCTL_MAGIC, 1, \ - struct scoutfs_ioctl_walk_inodes) - -/* - * Fill the result buffer with the next absolute path to the target - * inode searching from a given position in a parent directory. - * - * @ino: The target ino that we're finding paths to. Constant across - * all the calls that make up an iteration over all the inode's paths. - * - * @dir_ino: The inode number of the directory containing the entry to - * our inode to search from. If this parent directory contains no more - * entries to our inode then we'll search through other parent directory - * inodes in inode order. - * - * @dir_pos: The position in the dir_ino parent directory of the entry - * to our inode to search from. If there is no entry at this position - * then we'll search through other entry positions in increasing order. - * If we exhaust the parent directory then we'll search through - * additional parent directories in inode order. - * - * @result_ptr: A pointer to the buffer where the result struct and - * absolute path will be stored. - * - * @result_bytes: The size of the buffer that will contain the result - * struct and the null terminated absolute path name. - * - * To start iterating set the desired target inode, dir_ino to 0, - * dir_pos to 0, and set result_ptr and _bytes to a sufficiently large - * buffeer (sizeof(result) + PATH_MAX is a solid choice). - * - * After each returned result set the next search dir_ino and dir_pos to - * the returned dir_ino and dir_pos. Then increment the search dir_pos, - * and if it wrapped to 0, increment dir_ino. - * - * This only walks back through full hard links. None of the returned - * paths will reflect symlinks to components in the path. - * - * This doesn't ensure that the caller has permissions to traverse the - * returned paths to the inode. It requires CAP_DAC_READ_SEARCH which - * bypasses permissions checking. - * - * This call is not serialized with any modification (create, rename, - * unlink) of the path components. It will return all the paths that - * were stable both before and after the call. It may or may not return - * paths which are created or unlinked during the call. - * - * On success 0 is returned and result struct is filled with the next - * absolute path. The path_bytes length of the path includes a null - * terminating byte. dir_ino and dir_pos refer to the position of the - * final component in its parent directory and can be advanced to search - * for the next terminal entry whose path is then built by walking up - * parent directories. - * - * ENOENT is returned when no paths are found. - * - * ENAMETOOLONG is returned when the result struct and path found - * doesn't fit in the result buffer. - * - * Many other errnos indicate hard failure to find the next path. - */ -struct scoutfs_ioctl_ino_path { - __u64 ino; - __u64 dir_ino; - __u64 dir_pos; - __u64 result_ptr; - __u16 result_bytes; - __u8 _pad[6]; -}; - -struct scoutfs_ioctl_ino_path_result { - __u64 dir_ino; - __u64 dir_pos; - __u16 path_bytes; - __u8 _pad[6]; - __u8 path[0]; -}; - -/* Get a single path from the root to the given inode number */ -#define SCOUTFS_IOC_INO_PATH _IOR(SCOUTFS_IOCTL_MAGIC, 2, \ - struct scoutfs_ioctl_ino_path) - -/* - * "Release" a contiguous range of logical blocks of file data. - * Released blocks are removed from the file system like truncation, but - * an offline record is left behind to trigger demand staging if the - * file is read. - * - * The starting block offset and number of blocks to release are in - * units 4KB blocks. - * - * The specified range can extend past i_size and can straddle sparse - * regions or blocks that are already offline. The only change it makes - * is to free and mark offline any existing blocks that intersect with - * the region. - * - * Returns 0 if the operation succeeds. If an error is returned then - * some partial region of the blocks in the region may have been marked - * offline. - * - * If the operation succeeds then inode metadata that reflects file data - * contents are not updated. This is intended to be transparent to the - * presentation of the data in the file. - */ -struct scoutfs_ioctl_release { - __u64 block; - __u64 count; - __u64 data_version; -}; - -#define SCOUTFS_IOC_RELEASE _IOW(SCOUTFS_IOCTL_MAGIC, 3, \ - struct scoutfs_ioctl_release) - -struct scoutfs_ioctl_stage { - __u64 data_version; - __u64 buf_ptr; - __u64 offset; - __s32 count; - __u32 _pad; -}; - -#define SCOUTFS_IOC_STAGE _IOW(SCOUTFS_IOCTL_MAGIC, 4, \ - struct scoutfs_ioctl_stage) - -/* - * Give the user inode fields that are not otherwise visible. statx() - * isn't always available and xattrs are relatively expensive. - * - * @valid_bytes stores the number of bytes that are valid in the - * structure. The caller sets this to the size of the struct that they - * understand. The kernel then fills and copies back the min of the - * size they and the user caller understand. The user can tell if a - * field is set if all of its bytes are within the valid_bytes that the - * kernel set on return. - * - * New fields are only added to the end of the struct. - */ -struct scoutfs_ioctl_stat_more { - __u64 valid_bytes; - __u64 meta_seq; - __u64 data_seq; - __u64 data_version; - __u64 online_blocks; - __u64 offline_blocks; -}; - -#define SCOUTFS_IOC_STAT_MORE _IOR(SCOUTFS_IOCTL_MAGIC, 5, \ - struct scoutfs_ioctl_stat_more) - - -struct scoutfs_ioctl_data_waiting_entry { - __u64 ino; - __u64 iblock; - __u8 op; - __u8 _pad[7]; -}; - -#define SCOUTFS_IOC_DWO_READ (1 << 0) -#define SCOUTFS_IOC_DWO_WRITE (1 << 1) -#define SCOUTFS_IOC_DWO_CHANGE_SIZE (1 << 2) -#define SCOUTFS_IOC_DWO_UNKNOWN (U8_MAX << 3) - -struct scoutfs_ioctl_data_waiting { - __u64 flags; - __u64 after_ino; - __u64 after_iblock; - __u64 ents_ptr; - __u16 ents_nr; - __u8 _pad[6]; -}; - -#define SCOUTFS_IOC_DATA_WAITING_FLAGS_UNKNOWN (U8_MAX << 0) - -#define SCOUTFS_IOC_DATA_WAITING _IOR(SCOUTFS_IOCTL_MAGIC, 6, \ - struct scoutfs_ioctl_data_waiting) - -/* - * If i_size is set then data_version must be non-zero. If the offline - * flag is set then i_size must be set and a offline extent will be - * created from offset 0 to i_size. - */ -struct scoutfs_ioctl_setattr_more { - __u64 data_version; - __u64 i_size; - __u64 flags; - __u64 ctime_sec; - __u32 ctime_nsec; - __u8 _pad[4]; -}; - -#define SCOUTFS_IOC_SETATTR_MORE_OFFLINE (1 << 0) -#define SCOUTFS_IOC_SETATTR_MORE_UNKNOWN (U8_MAX << 1) - -#define SCOUTFS_IOC_SETATTR_MORE _IOW(SCOUTFS_IOCTL_MAGIC, 7, \ - struct scoutfs_ioctl_setattr_more) - -struct scoutfs_ioctl_listxattr_hidden { - __u64 id_pos; - __u64 buf_ptr; - __u32 buf_bytes; - __u32 hash_pos; -}; - -#define SCOUTFS_IOC_LISTXATTR_HIDDEN _IOR(SCOUTFS_IOCTL_MAGIC, 8, \ - struct scoutfs_ioctl_listxattr_hidden) - -/* - * Return the inode numbers of inodes which might contain the given - * xattr. The inode may not have a set xattr with that name, the caller - * must check the returned inodes to see if they match. - * - * @next_ino: The next inode number that could be returned. Initialized - * to 0 when first searching and set to one past the last inode number - * returned to continue searching. - * @last_ino: The last inode number that could be returned. U64_MAX to - * find all inodes. - * @name_ptr: The address of the name of the xattr to search for. It is - * not null terminated. - * @inodes_ptr: The address of the array of uint64_t inode numbers in - * which to store inode numbers that may contain the xattr. EFAULT may - * be returned if this address is not naturally aligned. - * @output_flags: Set as success is returned. If an error is returned - * then this field is undefined and should not be read. - * @nr_inodes: The number of elements in the array found at inodes_ptr. - * @name_bytes: The number of non-null bytes found in the name at - * name_ptr. - * - * This requires the CAP_SYS_ADMIN capability and will return -EPERM if - * it's not granted. - * - * The number of inode numbers stored in the inodes_ptr array is - * returned. If nr_inodes is 0 or last_ino is less than next_ino then 0 - * will be immediately returned. - * - * Partial progress can be returned if an error is hit or if nr_inodes - * was larger than the internal limit on the number of inodes returned - * in a search pass. The _END output flag is set if all the results - * including last_ino were searched in this pass. - * - * It's valuable to provide a large inodes array so that all the results - * can be found in one search pass and _END can be set. There are - * significant constant costs for performing each search pass. - */ -struct scoutfs_ioctl_search_xattrs { - __u64 next_ino; - __u64 last_ino; - __u64 name_ptr; - __u64 inodes_ptr; - __u64 output_flags; - __u64 nr_inodes; - __u16 name_bytes; - __u8 _pad[6]; -}; - -/* set in output_flags if returned inodes reached last_ino */ -#define SCOUTFS_SEARCH_XATTRS_OFLAG_END (1ULL << 0) - -#define SCOUTFS_IOC_SEARCH_XATTRS _IOR(SCOUTFS_IOCTL_MAGIC, 9, \ - struct scoutfs_ioctl_search_xattrs) - -/* - * Give the user information about the filesystem. - * - * @valid_bytes stores the number of bytes that are valid in the - * structure. The caller sets this to the size of the struct that they - * understand. The kernel then fills and copies back the min of the - * size they and the user caller understand. The user can tell if a - * field is set if all of its bytes are within the valid_bytes that the - * kernel set on return. - * - * @committed_seq: All seqs up to and including this seq have been - * committed. Can be compared with meta_seq and data_seq from inodes in - * stat_more to discover if changes have been committed to disk. - * - * New fields are only added to the end of the struct. - */ -struct scoutfs_ioctl_statfs_more { - __u64 valid_bytes; - __u64 fsid; - __u64 rid; - __u64 committed_seq; - __u64 total_meta_blocks; - __u64 total_data_blocks; -}; - -#define SCOUTFS_IOC_STATFS_MORE _IOR(SCOUTFS_IOCTL_MAGIC, 10, \ - struct scoutfs_ioctl_statfs_more) - -/* - * Cause matching waiters to return an error. - * - * Find current waiters that match the inode, op, and block range to wake - * up and return an error. - */ -struct scoutfs_ioctl_data_wait_err { - __u64 ino; - __u64 data_version; - __u64 offset; - __u64 count; - __u64 op; - __s64 err; -}; - -#define SCOUTFS_IOC_DATA_WAIT_ERR _IOR(SCOUTFS_IOCTL_MAGIC, 11, \ - struct scoutfs_ioctl_data_wait_err) - - -#define SCOUTFS_IOC_ALLOC_DETAIL _IOR(SCOUTFS_IOCTL_MAGIC, 12, \ - struct scoutfs_ioctl_alloc_detail) - -struct scoutfs_ioctl_alloc_detail { - __u64 entries_ptr; - __u64 entries_nr; -}; - -struct scoutfs_ioctl_alloc_detail_entry { - __u64 id; - __u64 blocks; - __u8 type; - __u8 meta:1, - avail:1; - __u8 __bit_pad:6; - __u8 __pad[6]; -}; - -#endif