commit 2c2f090168124b5b302bc72abcfb2c5b174ed57d Author: Zach Brown Date: Fri Feb 12 14:35:27 2016 -0800 Initial commit This initial commit has enough to make a new file system and print out it's structures. Signed-off-by: Zach Brown diff --git a/utils/.gitignore b/utils/.gitignore new file mode 100644 index 00000000..0a68a563 --- /dev/null +++ b/utils/.gitignore @@ -0,0 +1,7 @@ +*.o +*.d +*.swp +src/scoutfs +.sparse* +.mock.build* +cscope.* diff --git a/utils/Makefile b/utils/Makefile new file mode 100644 index 00000000..6a15bbdd --- /dev/null +++ b/utils/Makefile @@ -0,0 +1,33 @@ +CFLAGS := -Wall -O2 -Werror -D_FILE_OFFSET_BITS=64 -g -mrdrnd -msse4.2 + +BIN := src/scoutfs +OBJ := $(patsubst %.c,%.o,$(wildcard src/*.c)) +DEPS := $(wildcard */*.d) + +all: $(BIN) + +ifneq ($(DEPS),) +-include $(DEPS) +endif + +ifeq ($(V), ) +QU = @echo +VE = @ +else +QU = @: +VE = +endif + +$(BIN): $(OBJ) + $(QU) [BIN $@] + $(VE)gcc -o $@ $^ -luuid + +%.o %.d: %.c Makefile sparse.sh + $(QU) [CC $<] + $(VE)gcc $(CFLAGS) -MD -MP -MF $*.d -c $< -o $*.o + $(QU) [SP $<] + $(VE)./sparse.sh -Wbitwise -D__CHECKER__ $(CFLAGS) $< + +.PHONY: clean +clean: + @rm -f $(BIN) $(OBJ) $(DEPS) .sparse.* diff --git a/utils/sparse.sh b/utils/sparse.sh new file mode 100755 index 00000000..61c7bd33 --- /dev/null +++ b/utils/sparse.sh @@ -0,0 +1,58 @@ +#!/bin/bash + +# can we find sparse? If not, we're done. +which sparse > /dev/null 2>&1 || exit 0 + +# +# one of the problems with using sparse in userspace is that it picks up +# things in system headers that we don't care about. We're willing to +# take on the burden of filtering them out so that we can have it tell +# us about problems in our code. +# +# system headers using __transparent_union__ +RE="^/.*error: ignoring attribute __transparent_union__" + +# we don't care if system headers have gcc attributes sparse doesn't +# know about +RE="$RE|error: attribute '__leaf__': unknown attribute" + +# yes, sparse, that's the size of memseting a 4 meg buffer all right +RE="$RE|warning: memset with byte count of 4194304" + +# +# don't filter out 'too many errors' here, it can signify that +# sparse doesn't understand something and is throwing a *ton* +# of useless errors before giving up and existing. Check +# unfiltered sparse output. +# + +# +# I'm not sure this is needed. +# +search=$(gcc -print-search-dirs | awk '($1 == "install:"){print "-I" $2}') + +# +# We're trying to use sparse against glibc headers which go wild trying to +# use internal compiler macros to test features. We copy gcc's and give +# them to sparse. But not __SIZE_TYPE__ 'cause sparse defines that one. +# +defines=".sparse.gcc-defines.h" +gcc -dM -E -x c - < /dev/null | grep -v __SIZE_TYPE__ > $defines +include="-include $defines" + +# +# sparse doesn't seem to notice when it's on a 64bit host. It warns that +# 64bit values don't fit in 'unsigned long' without this. +# +if grep -q "__LP64__ 1" $defines; then + m64="-m64" +else + m64="" +fi + +sparse $m64 $include $search/include "$@" 2>&1 | egrep -v "($RE)" | tee .sparse.output +if [ -s .sparse.output ]; then + exit 1 +else + exit 0 +fi diff --git a/utils/src/cmd.c b/utils/src/cmd.c new file mode 100644 index 00000000..2dc0cbd7 --- /dev/null +++ b/utils/src/cmd.c @@ -0,0 +1,82 @@ +#include +#include +#include +#include +#include +#include + +#include "cmd.h" +#include "util.h" + +static struct command { + char *name; + char *opts; + char *summary; + int (*func)(int argc, char **argv); +} cmds[100], *next_cmd = cmds; + +#define cmd_for_each(com) for (com = cmds; com->func; com++) + +void cmd_register(char *name, char *opts, char *summary, + int (*func)(int argc, char **argv)) +{ + struct command *com = next_cmd++; + + assert((com - cmds) < array_size(cmds)); + + com->name = name; + com->opts = opts; + com->summary = summary; + com->func = func; +} + +static struct command *find_command(char *name) +{ + struct command *com; + + cmd_for_each(com) { + if (!strcmp(name, com->name)) + return com; + } + + return NULL; +} + +static void usage(void) +{ + struct command *com; + + fprintf(stderr, "usage: scoutfs []\n" + "Commands:\n"); + + cmd_for_each(com) { + fprintf(stderr, " %8s %12s - %s\n", + com->name, com->opts, com->summary); + } +} + +int cmd_execute(int argc, char **argv) +{ + struct command *com = NULL; + int ret; + + if (argc > 1) { + com = find_command(argv[1]); + if (!com) + fprintf(stderr, "scoutfs: unrecognized command: '%s'\n", + argv[1]); + } + if (!com) { + usage(); + return 1; + } + + ret = com->func(argc - 2, argv + 2); + if (ret < 0) { + fprintf(stderr, "scoutfs: %s failed: %s (%d)\n", + com->name, strerror(-ret), -ret); + return 1; + } + + return 0; +} diff --git a/utils/src/cmd.h b/utils/src/cmd.h new file mode 100644 index 00000000..53515b36 --- /dev/null +++ b/utils/src/cmd.h @@ -0,0 +1,9 @@ +#ifndef _CMD_H_ +#define _CMD_H_ + +void cmd_register(char *name, char *opts, char *summary, + int (*func)(int argc, char **argv)); + +int cmd_execute(int argc, char **argv); + +#endif diff --git a/utils/src/crc.c b/utils/src/crc.c new file mode 100644 index 00000000..6f40350a --- /dev/null +++ b/utils/src/crc.c @@ -0,0 +1,39 @@ +#include "crc.h" +#include "util.h" +#include "format.h" + +u32 crc32c(u32 crc, const void *data, unsigned int len) +{ + while (len >= 8) { + crc = __builtin_ia32_crc32di(crc, *(u64 *)data); + len -= 8; + data += 8; + } + if (len & 4) { + crc = __builtin_ia32_crc32si(crc, *(u32 *)data); + data += 4; + } + if (len & 2) { + crc = __builtin_ia32_crc32hi(crc, *(u16 *)data); + data += 2; + } + if (len & 1) + crc = __builtin_ia32_crc32qi(crc, *(u8 *)data); + + return crc; +} + +/* A simple hack to get reasonably solid 64bit hash values */ +u64 crc32c_64(u32 crc, const void *data, unsigned int len) +{ + unsigned int half = (len + 1) / 2; + + return ((u64)crc32c(crc, data, half) << 32) | + crc32c(~crc, data + len - half, half); +} + +u32 crc_header(struct scoutfs_header *hdr, size_t size) +{ + return crc32c(~0, (char *)hdr + sizeof(hdr->crc), + size - sizeof(hdr->crc)); +} diff --git a/utils/src/crc.h b/utils/src/crc.h new file mode 100644 index 00000000..d9e370e7 --- /dev/null +++ b/utils/src/crc.h @@ -0,0 +1,12 @@ +#ifndef _CRC_H_ +#define _CRC_H_ + +#include "sparse.h" +#include "util.h" +#include "format.h" + +u32 crc32c(u32 crc, const void *data, unsigned int len); +u64 crc32c_64(u32 crc, const void *data, unsigned int len); +u32 crc_header(struct scoutfs_header *hdr, size_t size); + +#endif diff --git a/utils/src/format.h b/utils/src/format.h new file mode 100644 index 00000000..bff0b7cb --- /dev/null +++ b/utils/src/format.h @@ -0,0 +1,234 @@ +#ifndef _SCOUTFS_FORMAT_H_ +#define _SCOUTFS_FORMAT_H_ + +/* statfs(2) f_type */ +#define SCOUTFS_SUPER_MAGIC 0x554f4353 /* "SCOU" */ +/* super block id */ +#define SCOUTFS_SUPER_ID 0x2e736674756f6373ULL /* "scoutfs." */ + +/* + * Some fs structures are stored in smaller fixed size 4k bricks. + */ +#define SCOUTFS_BRICK_SHIFT 12 +#define SCOUTFS_BRICK_SIZE (1 << SCOUTFS_BRICK_SHIFT) + +/* + * A large block size reduces the amount of per-block overhead throughout + * the system: block IO, manifest communications and storage, etc. + */ +#define SCOUTFS_BLOCK_SHIFT 22 +#define SCOUTFS_BLOCK_SIZE (1 << SCOUTFS_BLOCK_SHIFT) + +/* for shifting between brick and block numbers */ +#define SCOUTFS_BLOCK_BRICK (SCOUTFS_BLOCK_SHIFT - SCOUTFS_BRICK_SHIFT) + +/* + * The super bricks leave a bunch of room at the start of the first + * block for platform structures like boot loaders. + */ +#define SCOUTFS_SUPER_BRICK 16 + +/* + * This header is found at the start of every brick and block + * so that we can verify that it's what we were looking for. + */ +struct scoutfs_header { + __le32 crc; + __le64 fsid; + __le64 seq; + __le64 nr; +} __packed; + +#define SCOUTFS_UUID_BYTES 16 + +/* + * The super is stored in a pair of bricks in the first block. + */ +struct scoutfs_super { + struct scoutfs_header hdr; + __le64 id; + __u8 uuid[SCOUTFS_UUID_BYTES]; + __le64 total_blocks; + __le64 ring_layout_block; + __le64 ring_layout_seq; + __le64 last_ring_brick; + __le64 last_ring_seq; + __le64 last_block_seq; +} __packed; + +/* + * We should be able to make the offset smaller if neither dirents nor + * data items use the full 64 bits. + */ +struct scoutfs_key { + __le64 inode; + u8 type; + __le64 offset; +} __packed; + +#define SCOUTFS_ROOT_INO 1 + +#define SCOUTFS_INODE_KEY 128 +#define SCOUTFS_DIRENT_KEY 192 + +struct scoutfs_ring_layout { + struct scoutfs_header hdr; + __le32 nr_blocks; + __le64 blocks[0]; +} __packed; + +struct scoutfs_ring_entry { + u8 type; + __le16 len; +} __packed; + +/* + * Ring blocks are 4k blocks stored inside the large ring blocks + * referenced by the ring descriptor block. + * + * The manifest entries describe the position of a given block in the + * manifest. They're keyed by the block number so that we can log + * movement of a block in the manifest with one log entry and we can log + * deletion with just the block number. + */ +struct scoutfs_ring_brick { + struct scoutfs_header hdr; + __le16 nr_entries; +} __packed; + +enum { + SCOUTFS_RING_REMOVE_MANIFEST = 0, + SCOUTFS_RING_ADD_MANIFEST, + SCOUTFS_RING_BITMAP, +}; + +/* + * Manifest entries are logged by their block number. This lets us log + * a change with one entry and a removal with a tiny block number + * without the key. + */ +struct scoutfs_ring_remove_manifest { + __le64 block; +} __packed; + +/* + * Including both keys might make the manifest too large. It might be + * better to only include one key and infer a block's range from the + * neighbour's key. The downside of that is that we assume that there + * isn't unused key space between blocks in a level. We might search + * blocks when we didn't need to. + */ +struct scoutfs_ring_add_manifest { + __le64 block; + __le64 seq; + __u8 level; + struct scoutfs_key first; + struct scoutfs_key last; +} __packed; + +struct scoutfs_ring_bitmap { + __le32 offset; + __le64 bits[2]; +} __packed; + +/* + * This bloom size is chosen to have a roughly 1% false positive rate + * for ~90k items which is roughly the worst case for a block full of + * dirents with reasonably small names. Pathologically smaller items + * could be even more dense. + */ +#define SCOUTFS_BLOOM_FILTER_BYTES (128 * 1024) +#define SCOUTFS_BLOOM_FILTER_BITS (SCOUTFS_BLOOM_FILTER_BYTES * 8) +#define SCOUTFS_BLOOM_INDEX_BITS (ilog2(SCOUTFS_BLOOM_FILTER_BITS)) +#define SCOUTFS_BLOOM_INDEX_MASK ((1 << SCOUTFS_BLOOM_INDEX_BITS) - 1) +#define SCOUTFS_BLOOM_INDEX_NR 7 + +struct scoutfs_lsm_block { + struct scoutfs_header hdr; + struct scoutfs_key first; + struct scoutfs_key last; + __le32 nr_items; + /* u8 bloom[SCOUTFS_BLOOM_BYTES]; */ + /* struct scoutfs_item_header items[0] .. */ +} __packed; + +struct scoutfs_item_header { + struct scoutfs_key key; + __le16 len; +} __packed; + +struct scoutfs_timespec { + __le64 sec; + __le32 nsec; +} __packed; + +/* + * XXX + * - otime? + * - compat flags? + * - version? + * - generation? + * - be more careful with rdev? + */ +struct scoutfs_inode { + __le64 size; + __le64 blocks; + __le32 nlink; + __le32 uid; + __le32 gid; + __le32 mode; + __le32 rdev; + __le32 salt; + struct scoutfs_timespec atime; + struct scoutfs_timespec ctime; + struct scoutfs_timespec mtime; +} __packed; + +#define SCOUTFS_ROOT_INO 1 + +/* + * Dirents are stored in items with an offset of the hash of their name. + * Colliding names are packed into the value. + */ +struct scoutfs_dirent { + __le64 ino; +#if defined(__LITTLE_ENDIAN_BITFIELD) + __u8 type:4, + coll_nr:4; +#else + __u8 coll_nr:4, + type:4; +#endif + __u8 name_len; + __u8 name[0]; +} __packed; + +#define SCOUTFS_NAME_LEN 255 + +/* + * We only use 31 bits for readdir positions so that we don't confuse + * old signed 32bit f_pos applications or those on the other side of + * network protocols that have limited readir positions. + */ + +#define SCOUTFS_DIRENT_OFF_BITS 27 +#define SCOUTFS_DIRENT_OFF_MASK ((1 << SCOUTFS_DIRENT_OFF_BITS) - 1) +#define SCOUTFS_DIRENT_COLL_BITS 4 +#define SCOUTFS_DIRENT_COLL_MASK ((1 << SCOUTFS_DIRENT_COLL_BITS) - 1) + +/* getdents returns the *next* pos with each entry. so we can't return ~0 */ +#define SCOUTFS_DIRENT_MAX_POS \ + (((1 << (SCOUTFS_DIRENT_OFF_BITS + SCOUTFS_DIRENT_COLL_BITS)) - 1) - 1) + +enum { + SCOUTFS_DT_FIFO = 0, + SCOUTFS_DT_CHR, + SCOUTFS_DT_DIR, + SCOUTFS_DT_BLK, + SCOUTFS_DT_REG, + SCOUTFS_DT_LNK, + SCOUTFS_DT_SOCK, + SCOUTFS_DT_WHT, +}; + +#endif diff --git a/utils/src/lebitmap.c b/utils/src/lebitmap.c new file mode 100644 index 00000000..e7848947 --- /dev/null +++ b/utils/src/lebitmap.c @@ -0,0 +1,38 @@ +#define _GNU_SOURCE /* ffsll */ +#include + +#include "lebitmap.h" + +void set_le_bit(__le64 *bits, u64 nr) +{ + bits += nr / 64; + + *bits = cpu_to_le64(le64_to_cpu(*bits) | (1ULL << (nr & 63))); +} + +void clear_le_bit(__le64 *bits, u64 nr) +{ + bits += nr / 64; + + *bits = cpu_to_le64(le64_to_cpu(*bits) & ~(1ULL << (nr & 63))); +} + +int test_le_bit(__le64 *bits, u64 nr) +{ + bits += nr / 64; + + return !!(le64_to_cpu(*bits) & (1ULL << (nr & 63))); +} + +/* returns -1 or nr */ +s64 find_first_le_bit(__le64 *bits, s64 count) +{ + long nr; + + for (nr = 0; count > 0; bits++, nr += 64, count -= 64) { + if (*bits) + return nr + ffsll(le64_to_cpu(*bits)) - 1; + } + + return -1; +} diff --git a/utils/src/lebitmap.h b/utils/src/lebitmap.h new file mode 100644 index 00000000..9e399d33 --- /dev/null +++ b/utils/src/lebitmap.h @@ -0,0 +1,11 @@ +#ifndef _LEBITMAP_H_ +#define _LEBITMAP_H_ + +#include "sparse.h" + +void set_le_bit(__le64 *bits, u64 nr); +void clear_le_bit(__le64 *bits, u64 nr); +int test_le_bit(__le64 *bits, u64 nr); +s64 find_first_le_bit(__le64 *bits, s64 count); + +#endif diff --git a/utils/src/main.c b/utils/src/main.c new file mode 100644 index 00000000..599babb0 --- /dev/null +++ b/utils/src/main.c @@ -0,0 +1,24 @@ +#include +#include +#include +#include +#include +#include + +#include "cmd.h" +#include "util.h" + +int main(int argc, char **argv) +{ + int ret; + + /* + * XXX parse global options, env, configs, etc. + */ + + ret = cmd_execute(argc, argv); + if (ret < 0) + return 1; + + return 0; +} diff --git a/utils/src/mkfs.c b/utils/src/mkfs.c new file mode 100644 index 00000000..c5957408 --- /dev/null +++ b/utils/src/mkfs.c @@ -0,0 +1,250 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "sparse.h" +#include "cmd.h" +#include "util.h" +#include "format.h" +#include "crc.h" +#include "rand.h" + + +/* + * Update the buffer's header and write it out. + */ +static int write_header(int fd, u64 nr, struct scoutfs_header *hdr, size_t size) +{ + off_t off = nr * size; + ssize_t ret; + + hdr->nr = cpu_to_le64(nr); + hdr->crc = cpu_to_le32(crc_header(hdr, size)); + + ret = pwrite(fd, hdr, size, off); + if (ret != size) { + fprintf(stderr, "write at nr %llu (offset %llu, size %zu) returned %zd: %s (%d)\n", + nr, (long long)off, size, ret, strerror(errno), errno); + return -errno; + } + + return 0; +} + +static int write_brick(int fd, u64 nr, struct scoutfs_header *hdr) +{ + return write_header(fd, nr, hdr, SCOUTFS_BRICK_SIZE); +} + +static int write_block(int fd, u64 nr, struct scoutfs_header *hdr) +{ + return write_header(fd, nr, hdr, SCOUTFS_BLOCK_SIZE); +} + +/* + * - config blocks that describe ring + * - ring entries for lots of free blocks + * - manifest that references single block + * - block with inode + */ +/* + * So what does mkfs really need to do? + * + * - super blocks that describe ring log + * - ring log with free bitmap entries + * - ring log with manifest entries + * - single item block with root dir + */ +static int write_new_fs(char *path, int fd) +{ + struct scoutfs_super *super; + struct scoutfs_inode *inode; + struct scoutfs_ring_layout *rlo; + struct scoutfs_ring_brick *ring; + struct scoutfs_ring_entry *ent; + struct scoutfs_ring_add_manifest *mani; + struct scoutfs_ring_bitmap *bm; + struct scoutfs_lsm_block *lblk; + struct scoutfs_item_header *ihdr; + struct scoutfs_key root_key; + struct timeval tv; + char uuid_str[37]; + struct stat st; + unsigned int i; + u64 total_blocks; + void *buf; + int ret; + + gettimeofday(&tv, NULL); + + super = malloc(SCOUTFS_BRICK_SIZE); + buf = malloc(SCOUTFS_BLOCK_SIZE); + if (!super || !buf) { + ret = -errno; + fprintf(stderr, "failed to allocate a block: %s (%d)\n", + strerror(errno), errno); + goto out; + } + + if (fstat(fd, &st)) { + ret = -errno; + fprintf(stderr, "failed to stat '%s': %s (%d)\n", + path, strerror(errno), errno); + goto out; + } + + total_blocks = st.st_size >> SCOUTFS_BLOCK_SHIFT; + + root_key.inode = cpu_to_le64(SCOUTFS_ROOT_INO); + root_key.type = SCOUTFS_INODE_KEY; + root_key.offset = 0; + + /* initialize the super */ + memset(super, 0, sizeof(struct scoutfs_super)); + pseudo_random_bytes(&super->hdr.fsid, sizeof(super->hdr.fsid)); + super->hdr.seq = cpu_to_le64(1); + super->id = cpu_to_le64(SCOUTFS_SUPER_ID); + uuid_generate(super->uuid); + super->total_blocks = cpu_to_le64(total_blocks); + super->ring_layout_block = cpu_to_le64(1); + super->ring_layout_seq = cpu_to_le64(1); + super->last_ring_brick = cpu_to_le64(1); + super->last_ring_seq = cpu_to_le64(1); + super->last_block_seq = cpu_to_le64(1); + + /* the ring has a single block for now */ + memset(buf, 0, SCOUTFS_BLOCK_SIZE); + rlo = buf; + rlo->hdr.fsid = super->hdr.fsid; + rlo->hdr.seq = super->ring_layout_seq; + rlo->nr_blocks = cpu_to_le32(1); + rlo->blocks[0] = cpu_to_le64(2); + + ret = write_block(fd, 1, &rlo->hdr); + if (ret) + goto out; + + /* log the root inode block manifest and free bitmap */ + memset(buf, 0, SCOUTFS_BLOCK_SIZE); + ring = buf; + ring->hdr.fsid = super->hdr.fsid; + ring->hdr.seq = super->last_ring_seq; + ring->nr_entries = cpu_to_le16(2); + ent = (void *)(ring + 1); + ent->type = SCOUTFS_RING_ADD_MANIFEST; + ent->len = cpu_to_le16(sizeof(*mani)); + mani = (void *)(ent + 1); + mani->block = cpu_to_le64(3); + mani->seq = super->last_block_seq; + mani->level = 0; + mani->first = root_key; + mani->last = root_key; + ent = (void *)(mani + 1); + ent->type = SCOUTFS_RING_BITMAP; + ent->len = cpu_to_le16(sizeof(*bm)); + bm = (void *)(ent + 1); + memset(bm->bits, 0xff, sizeof(bm->bits)); + /* the first three blocks are allocated */ + bm->bits[0] = cpu_to_le64(~7ULL); + bm->bits[1] = cpu_to_le64(~0ULL); + + ret = write_block(fd, 2, &ring->hdr); + if (ret) + goto out; + + /* write a single lsm block with the root inode item */ + memset(buf, 0, SCOUTFS_BLOCK_SIZE); + lblk = buf; + lblk->hdr.fsid = super->hdr.fsid; + lblk->hdr.seq = super->last_block_seq; + lblk->first = root_key; + lblk->last = root_key; + lblk->nr_items = cpu_to_le32(1); + /* XXX set bloom */ + ihdr = (void *)((char *)(lblk + 1) + SCOUTFS_BLOOM_FILTER_BYTES); + ihdr->key = root_key; + ihdr->len = cpu_to_le16(sizeof(struct scoutfs_inode)); + inode = (void *)(ihdr + 1); + inode->nlink = cpu_to_le32(2); + inode->mode = cpu_to_le32(0755 | 0040000); + inode->atime.sec = cpu_to_le64(tv.tv_sec); + inode->atime.nsec = cpu_to_le32(tv.tv_usec * 1000); + inode->ctime.sec = inode->atime.sec; + inode->ctime.nsec = inode->atime.nsec; + inode->mtime.sec = inode->atime.sec; + inode->mtime.nsec = inode->atime.nsec; + + ret = write_block(fd, 3, &ring->hdr); + if (ret) + goto out; + + /* write the two super bricks */ + for (i = 0; i < 2; i++) { + super->hdr.seq = cpu_to_le64(i); + ret = write_brick(fd, SCOUTFS_SUPER_BRICK + i, &super->hdr); + if (ret) + goto out; + } + + if (fsync(fd)) { + ret = -errno; + fprintf(stderr, "failed to fsync '%s': %s (%d)\n", + path, strerror(errno), errno); + goto out; + } + + uuid_unparse(super->uuid, uuid_str); + + printf("Created scoutfs filesystem:\n" + " total blocks: %llu\n" + " fsid: %llx\n" + " uuid: %s\n", + total_blocks, le64_to_cpu(super->hdr.fsid), uuid_str); + + ret = 0; +out: + free(super); + free(buf); + return ret; +} + +static int mkfs_func(int argc, char *argv[]) +{ + char *path = argv[0]; + int ret; + int fd; + + if (argc != 1) { + printf("scoutfs: mkfs: a single path argument is required\n"); + return -EINVAL; + } + + fd = open(path, O_RDWR | O_EXCL); + if (fd < 0) { + ret = -errno; + fprintf(stderr, "failed to open '%s': %s (%d)\n", + path, strerror(errno), errno); + return ret; + } + + ret = write_new_fs(path, fd); + close(fd); + + return ret; +} + +static void __attribute__((constructor)) mkfs_ctor(void) +{ + cmd_register("mkfs", "", "write a new file system", mkfs_func); + + /* for lack of some other place to put these.. */ + build_assert(sizeof(uuid_t) == SCOUTFS_UUID_BYTES); +} diff --git a/utils/src/print.c b/utils/src/print.c new file mode 100644 index 00000000..df74bc67 --- /dev/null +++ b/utils/src/print.c @@ -0,0 +1,402 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "sparse.h" +#include "util.h" +#include "format.h" +#include "cmd.h" +#include "crc.h" +#include "lebitmap.h" + +/* XXX maybe these go somewhere */ +#define SKF "%llu.%u.%llu" +#define SKA(k) le64_to_cpu((k)->inode), (k)->type, \ + le64_to_cpu((k)->offset) + +static void *read_buf(int fd, u64 nr, size_t size) +{ + off_t off = nr * size; + ssize_t ret; + void *buf; + + buf = malloc(size); + if (!buf) + return NULL; + + ret = pread(fd, buf, size, off); + if (ret != size) { + fprintf(stderr, "read at blkno %llu (offset %llu) returned %zd: %s (%d)\n", + nr, (long long)off, ret, strerror(errno), errno); + free(buf); + buf = NULL; + } + + return buf; +} + +static void *read_brick(int fd, u64 nr) +{ + return read_buf(fd, nr, SCOUTFS_BRICK_SIZE); +} + +static void *read_block(int fd, u64 nr) +{ + return read_buf(fd, nr, SCOUTFS_BLOCK_SIZE); +} + +static void print_header(struct scoutfs_header *hdr, size_t size) +{ + u32 crc = crc_header(hdr, size); + char valid_str[40]; + + if (crc != le32_to_cpu(hdr->crc)) + sprintf(valid_str, "# != %08x", crc); + else + valid_str[0] = '\0'; + + printf(" header:\n" + " crc: %08x %s\n" + " fsid: %llx\n" + " seq: %llu\n" + " nr: %llu\n", + le32_to_cpu(hdr->crc), valid_str, le64_to_cpu(hdr->fsid), + le64_to_cpu(hdr->seq), le64_to_cpu(hdr->nr)); +} + +static void print_brick_header(struct scoutfs_header *hdr) +{ + return print_header(hdr, SCOUTFS_BRICK_SIZE); +} + +static void print_block_header(struct scoutfs_header *hdr) +{ + return print_header(hdr, SCOUTFS_BLOCK_SIZE); +} + +static void print_inode(struct scoutfs_inode *inode) +{ + printf(" inode:\n" + " size: %llu\n" + " blocks: %llu\n" + " nlink: %u\n" + " uid: %u\n" + " gid: %u\n" + " mode: 0%o\n" + " rdev: 0x%x\n" + " salt: 0x%x\n" + " atime: %llu.%08u\n" + " ctime: %llu.%08u\n" + " mtime: %llu.%08u\n", + le64_to_cpu(inode->size), le64_to_cpu(inode->blocks), + le32_to_cpu(inode->nlink), le32_to_cpu(inode->uid), + le32_to_cpu(inode->gid), le32_to_cpu(inode->mode), + le32_to_cpu(inode->rdev), le32_to_cpu(inode->salt), + le64_to_cpu(inode->atime.sec), + le32_to_cpu(inode->atime.nsec), + le64_to_cpu(inode->ctime.sec), + le32_to_cpu(inode->ctime.nsec), + le64_to_cpu(inode->mtime.sec), + le32_to_cpu(inode->mtime.nsec)); +} + +static void print_item(struct scoutfs_item_header *ihdr, size_t off) +{ + printf(" item: &%zu\n" + " key: "SKF"\n" + " len: %u\n", + off, SKA(&ihdr->key), le16_to_cpu(ihdr->len)); + + switch(ihdr->key.type) { + case SCOUTFS_INODE_KEY: + print_inode((void *)(ihdr + 1)); + break; + } +} + +static int print_block(int fd, u64 nr) +{ + struct scoutfs_item_header *ihdr; + struct scoutfs_lsm_block *lblk; + size_t off; + int i; + + lblk = read_block(fd, nr); + if (!lblk) + return -ENOMEM; + + printf("block: &%llu\n", le64_to_cpu(lblk->hdr.nr)); + print_block_header(&lblk->hdr); + printf(" first: "SKF"\n" + " last: "SKF"\n" + " nr_items: %u\n", + SKA(&lblk->first), SKA(&lblk->last), + le32_to_cpu(lblk->nr_items)); + off = (char *)(lblk + 1) - (char *)lblk + SCOUTFS_BLOOM_FILTER_BYTES; + + for (i = 0; i < le32_to_cpu(lblk->nr_items); i++) { + ihdr = (void *)((char *)lblk + off); + print_item(ihdr, off); + + off += sizeof(struct scoutfs_item_header) + + le16_to_cpu(ihdr->len); + } + + free(lblk); + + return 0; +} + +static int print_blocks(int fd, __le64 *live_blocks, u64 total_blocks) +{ + int ret = 0; + int err; + s64 nr; + + while ((nr = find_first_le_bit(live_blocks, total_blocks)) >= 0) { + clear_le_bit(live_blocks, nr); + + err = print_block(fd, nr); + if (!ret && err) + ret = err; + } + + return ret; +} + +static char *ent_type_str(u8 type) +{ + switch (type) { + case SCOUTFS_RING_REMOVE_MANIFEST: + return "REMOVE_MANIFEST"; + case SCOUTFS_RING_ADD_MANIFEST: + return "ADD_MANIFEST"; + case SCOUTFS_RING_BITMAP: + return "BITMAP"; + default: + return "(unknown)"; + } +} + +static void print_ring_entry(int fd, struct scoutfs_ring_entry *ent, + size_t off) +{ + struct scoutfs_ring_remove_manifest *rem; + struct scoutfs_ring_add_manifest *add; + struct scoutfs_ring_bitmap *bm; + + printf(" entry: &%zu\n" + " type: %u # %s\n" + " len: %u\n", + off, ent->type, ent_type_str(ent->type), le16_to_cpu(ent->len)); + + switch(ent->type) { + case SCOUTFS_RING_REMOVE_MANIFEST: + rem = (void *)(ent + 1); + printf(" block: %llu\n", + le64_to_cpu(rem->block)); + break; + case SCOUTFS_RING_ADD_MANIFEST: + add = (void *)(ent + 1); + printf(" block: %llu\n" + " seq: %llu\n" + " level: %u\n" + " first: "SKF"\n" + " last: "SKF"\n", + le64_to_cpu(add->block), le64_to_cpu(add->seq), + add->level, SKA(&add->first), SKA(&add->last)); + break; + case SCOUTFS_RING_BITMAP: + bm = (void *)(ent + 1); + printf(" offset: %u\n" + " bits: 0x%llx%llx\n", + le32_to_cpu(bm->offset), + le64_to_cpu(bm->bits[1]), le64_to_cpu(bm->bits[0])); + break; + } +} + +static void update_live_blocks(struct scoutfs_ring_entry *ent, + __le64 *live_blocks) +{ + struct scoutfs_ring_remove_manifest *rem; + struct scoutfs_ring_add_manifest *add; + + switch(ent->type) { + case SCOUTFS_RING_REMOVE_MANIFEST: + rem = (void *)(ent + 1); + clear_le_bit(live_blocks, le64_to_cpu(rem->block)); + break; + case SCOUTFS_RING_ADD_MANIFEST: + add = (void *)(ent + 1); + set_le_bit(live_blocks, le64_to_cpu(add->block)); + break; + } +} + +static int print_ring_block(int fd, u64 block_nr, __le64 *live_blocks) +{ + struct scoutfs_ring_brick *ring; + struct scoutfs_ring_entry *ent; + size_t off; + int ret = 0; + u64 nr; + int i; + + /* XXX just printing the first brick for now */ + + nr = block_nr << SCOUTFS_BLOCK_BRICK; + ring = read_brick(fd, nr); + if (!ring) + return -ENOMEM; + + printf("ring brick: &%llu\n", nr); + print_brick_header(&ring->hdr); + printf(" nr_entries: %u\n", le16_to_cpu(ring->nr_entries)); + + off = sizeof(struct scoutfs_ring_brick); + for (i = 0; i < le16_to_cpu(ring->nr_entries); i++) { + ent = (void *)((char *)ring + off); + + update_live_blocks(ent, live_blocks); + + print_ring_entry(fd, ent, off); + + off += sizeof(struct scoutfs_ring_entry) + + le16_to_cpu(ent->len); + } + + free(ring); + return ret; +} + +static int print_ring_layout(int fd, u64 blkno, __le64 *live_blocks) +{ + struct scoutfs_ring_layout *rlo; + int ret = 0; + int err; + int i; + + rlo = read_block(fd, blkno); + if (!rlo) + return -ENOMEM; + + printf("ring layout: &%llu\n", blkno); + print_block_header(&rlo->hdr); + printf(" nr_blocks: %u\n", le32_to_cpu(rlo->nr_blocks)); + + printf(" blocks: "); + for (i = 0; i < le32_to_cpu(rlo->nr_blocks); i++) + printf(" %llu\n", le64_to_cpu(rlo->blocks[i])); + + for (i = 0; i < le32_to_cpu(rlo->nr_blocks); i++) { + err = print_ring_block(fd, le64_to_cpu(rlo->blocks[i]), + live_blocks); + if (err && !ret) + ret = err; + } + + free(rlo); + return 0; +} + +static int print_super_brick(int fd) +{ + struct scoutfs_super *super; + char uuid_str[37]; + __le64 *live_blocks; + u64 total_blocks; + size_t bytes; + int ret = 0; + int err; + + /* XXX print both */ + super = read_brick(fd, SCOUTFS_SUPER_BRICK); + if (!super) + return -ENOMEM; + + uuid_unparse(super->uuid, uuid_str); + + total_blocks = le64_to_cpu(super->total_blocks); + + printf("super: &%llu\n", le64_to_cpu(super->hdr.nr)); + print_brick_header(&super->hdr); + printf(" id: %llx\n" + " uuid: %s\n" + " total_blocks: %llu\n" + " ring_layout_block: %llu\n" + " ring_layout_seq: %llu\n" + " last_ring_brick: %llu\n" + " last_ring_seq: %llu\n" + " last_block_seq: %llu\n", + le64_to_cpu(super->id), + uuid_str, + total_blocks, + le64_to_cpu(super->ring_layout_block), + le64_to_cpu(super->ring_layout_seq), + le64_to_cpu(super->last_ring_brick), + le64_to_cpu(super->last_ring_seq), + le64_to_cpu(super->last_block_seq)); + + /* XXX by hand? */ + bytes = (total_blocks + 63) / 8; + live_blocks = malloc(bytes); + if (!live_blocks) { + ret = -ENOMEM; + goto out; + } + memset(live_blocks, 0, bytes); + + err = print_ring_layout(fd, le64_to_cpu(super->ring_layout_block), + live_blocks); + if (err && !ret) + ret = err; + + err = print_blocks(fd, live_blocks, total_blocks); + if (err && !ret) + ret = err; + +out: + free(super); + free(live_blocks); + return ret; +} + +static int print_cmd(int argc, char **argv) +{ + char *path; + int ret; + int fd; + + if (argc != 1) { + printf("scoutfs print: a single path argument is required\n"); + return -EINVAL; + } + path = argv[0]; + + fd = open(path, O_RDONLY); + if (fd < 0) { + ret = -errno; + fprintf(stderr, "failed to open '%s': %s (%d)\n", + path, strerror(errno), errno); + return ret; + } + + ret = print_super_brick(fd); + close(fd); + return ret; +}; + +static void __attribute__((constructor)) print_ctor(void) +{ + cmd_register("print", "", "print metadata structures", + print_cmd); +} diff --git a/utils/src/rand.c b/utils/src/rand.c new file mode 100644 index 00000000..82b17e11 --- /dev/null +++ b/utils/src/rand.c @@ -0,0 +1,30 @@ +#include + +#include "rand.h" +#include "sparse.h" +#include "util.h" + +void pseudo_random_bytes(void *data, unsigned int len) +{ + unsigned long long tmp; + unsigned long long *ll = data; + unsigned int sz = sizeof(*ll); + unsigned int unaligned; + + /* see if the initial buffer is unaligned */ + unaligned = min((unsigned long)data & (sz - 1), len); + if (unaligned) { + __builtin_ia32_rdrand64_step(&tmp); + memcpy(data, &tmp, unaligned); + data += unaligned; + len -= unaligned; + } + + for (ll = data; len >= sz; ll++, len -= sz) + __builtin_ia32_rdrand64_step(ll); + + if (len) { + __builtin_ia32_rdrand64_step(&tmp); + memcpy(data, &tmp, len); + } +} diff --git a/utils/src/rand.h b/utils/src/rand.h new file mode 100644 index 00000000..cbc74eb7 --- /dev/null +++ b/utils/src/rand.h @@ -0,0 +1,10 @@ +#ifndef _RAND_H_ +#define _RAND_H_ + +/* + * We could play around a bit with some macros to get aligned constant + * word sized buffers filled by single instructions. + */ +void pseudo_random_bytes(void *data, unsigned int len); + +#endif diff --git a/utils/src/sparse.h b/utils/src/sparse.h new file mode 100644 index 00000000..63b66ca9 --- /dev/null +++ b/utils/src/sparse.h @@ -0,0 +1,105 @@ +#ifndef _SPARSE_H_ +#define _SPARSE_H_ + +#include +#include + +#ifdef __CHECKER__ +# undef __force +# define __force __attribute__((force)) +# undef __bitwise +# define __bitwise __attribute__((bitwise)) +/* sparse seems to get confused by some builtins */ +extern __builtin_ia32_rdrand64_step(unsigned long long *); +extern unsigned int __builtin_ia32_crc32di(unsigned int, unsigned long long); +extern unsigned int __builtin_ia32_crc32si(unsigned int, unsigned int); +extern unsigned int __builtin_ia32_crc32hi(unsigned int, unsigned short); +extern unsigned int __builtin_ia32_crc32qi(unsigned int, unsigned char); + +#else +# define __force +# define __bitwise +#endif + +typedef unsigned char u8; +typedef unsigned short u16; +typedef unsigned int u32; +typedef unsigned long long u64; +typedef signed long long s64; + +typedef u8 __u8; +typedef u16 __u16; +typedef u32 __u32; +typedef u64 __u64; + +typedef u16 __bitwise __le16; +typedef u16 __bitwise __be16; +typedef u32 __bitwise __le32; +typedef u32 __bitwise __be32; +typedef u64 __bitwise __le64; +typedef u64 __bitwise __be64; + +static inline u16 ___swab16(u16 x) +{ + return ((x & (u16)0x00ffU) << 8) | + ((x & (u16)0xff00U) >> 8); +} + +static inline u32 ___swab32(u32 x) +{ + return ((x & (u32)0x000000ffUL) << 24) | + ((x & (u32)0x0000ff00UL) << 8) | + ((x & (u32)0x00ff0000UL) >> 8) | + ((x & (u32)0xff000000UL) >> 24); +} + +static inline u64 ___swab64(u64 x) +{ + return (u64)((x & (u64)0x00000000000000ffULL) << 56) | + (u64)((x & (u64)0x000000000000ff00ULL) << 40) | + (u64)((x & (u64)0x0000000000ff0000ULL) << 24) | + (u64)((x & (u64)0x00000000ff000000ULL) << 8) | + (u64)((x & (u64)0x000000ff00000000ULL) >> 8) | + (u64)((x & (u64)0x0000ff0000000000ULL) >> 24) | + (u64)((x & (u64)0x00ff000000000000ULL) >> 40) | + (u64)((x & (u64)0xff00000000000000ULL) >> 56); +} + +#define __gen_cast_tofrom(end, size) \ +static inline __##end##size cpu_to_##end##size(u##size x) \ +{ \ + return (__force __##end##size)x; \ +} \ +static inline u##size end##size##_to_cpu(__##end##size x) \ +{ \ + return (__force u##size)x; \ +} + +#define __gen_swap_tofrom(end, size) \ +static inline __##end##size cpu_to_##end##size(u##size x) \ +{ \ + return (__force __##end##size)___swab##size(x); \ +} \ +static inline u##size end##size##_to_cpu(__##end##size x) \ +{ \ + return ___swab##size((__force u##size) x); \ +} + +#define __gen_functions(which, end) \ + __gen_##which##_tofrom(end, 16) \ + __gen_##which##_tofrom(end, 32) \ + __gen_##which##_tofrom(end, 64) + +#if __BYTE_ORDER == __LITTLE_ENDIAN +#define __LITTLE_ENDIAN_BITFIELD +__gen_functions(swap, be) +__gen_functions(cast, le) +#elif __BYTE_ORDER == __BIG_ENDIAN +#define __BIG_ENDIAN_BITFIELD +__gen_functions(swap, le) +__gen_functions(cast, be) +#else +#error "machine is neither BIG_ENDIAN nor LITTLE_ENDIAN" +#endif + +#endif diff --git a/utils/src/util.h b/utils/src/util.h new file mode 100644 index 00000000..f4dec4f0 --- /dev/null +++ b/utils/src/util.h @@ -0,0 +1,73 @@ +#ifndef _UTIL_H_ +#define _UTIL_H_ + +#include +#include +#include + +/* + * Generate build warnings if the condition is false but generate no + * code at run time if it's true. + */ +#define build_assert(cond) ((void)sizeof(char[1 - 2*!(cond)])) + +#define min(a, b) \ +({ \ + __typeof__(a) _a = (a); \ + __typeof__(b) _b = (b); \ + \ + _a < _b ? _a : _b; \ +}) + +#define max(a, b) \ +({ \ + __typeof__(a) _a = (a); \ + __typeof__(b) _b = (b); \ + \ + _a > _b ? _a : _b; \ +}) + +#define swap(a, b) \ +do { \ + __typeof__(a) _t = (a); \ + (a) = (b); \ + (b) = (_t); \ +} while (0) + +#define array_size(arr) (sizeof(arr) / sizeof(arr[0])) + +#define __packed __attribute__((packed)) + +/* + * Round the 'a' value up to the next 'b' power of two boundary. It + * casts the mask to the value type before masking to avoid truncation + * problems. + */ +#define round_up(a, b) \ +({ \ + __typeof__(a) _b = (b); \ + \ + ((a) + _b - 1) & ~(_b - 1); \ +}) + +#ifndef offsetof +#define offsetof(type, memb) ((unsigned long)&((type *)0)->memb) +#endif + +#define container_of(ptr, type, memb) \ + ((type *)((void *)(ptr) - offsetof(type, memb))) + +/* + * return -1,0,+1 based on the memcmp comparison of the minimum of their + * two lengths. If their min shared bytes are equal but the lengths + * are not then the larger length is considered greater. + */ +static inline int memcmp_lens(const void *a, int a_len, + const void *b, int b_len) +{ + unsigned int len = min(a_len, b_len); + + return memcmp(a, b, len) ?: a_len - b_len; +} + +#endif