diff --git a/utils/man/scoutfs.8 b/utils/man/scoutfs.8 index e05c42e0..09062fb0 100644 --- a/utils/man/scoutfs.8 +++ b/utils/man/scoutfs.8 @@ -32,7 +32,7 @@ A path within a ScoutFS filesystem. .PD .TP -.BI "mkfs META-DEVICE DATA-DEVICE {-Q|--quorum-slot} NR,ADDR,PORT [-m|--max-meta-size SIZE] [-d|--max-data-size SIZE] [-f|--force]" +.BI "mkfs META-DEVICE DATA-DEVICE {-Q|--quorum-slot} NR,ADDR,PORT [-m|--max-meta-size SIZE] [-d|--max-data-size SIZE] [-z|--data-alloc-zone-blocks BLOCKS] [-f|--force]" .sp Initialize a new ScoutFS filesystem on the target devices. Since ScoutFS uses separate block devices for its metadata and data storage, two are required. @@ -81,6 +81,10 @@ kibibytes, mebibytes, etc. .B "-d, --max-data-size SIZE" Same as previous, but for limiting the size of the data device. .TP +.B "-z, --data-alloc-zone-blocks BLOCKS" +Set the data_alloc_zone_blocks volume option, as described in +.BR scoutfs (5). +.TP .B "-f, --force" Ignore presence of existing data on the data and metadata devices. .RE diff --git a/utils/src/mkfs.c b/utils/src/mkfs.c index 8b293996..92dc0b50 100644 --- a/utils/src/mkfs.c +++ b/utils/src/mkfs.c @@ -97,11 +97,43 @@ static int write_alloc_root(int fd, __le64 fsid, SCOUTFS_BLOCK_LG_SHIFT, &bt->hdr); } +#define SCOUTFS_SERVER_DATA_FILL_TARGET \ + ((4ULL * 1024 * 1024 * 1024) >> SCOUTFS_BLOCK_SM_SHIFT) +static bool invalid_data_alloc_zone_blocks(u64 total_data_blocks, u64 zone_blocks) +{ + u64 nr; + + if (zone_blocks == 0) + return false; + + if (zone_blocks < SCOUTFS_SERVER_DATA_FILL_TARGET) { + fprintf(stderr, "setting data_alloc_zone_blocks to '%llu' failed, must be at least %llu mount data allocation target blocks", + zone_blocks, SCOUTFS_SERVER_DATA_FILL_TARGET); + return true; + } + + nr = total_data_blocks / SCOUTFS_DATA_ALLOC_MAX_ZONES; + if (zone_blocks < nr) { + fprintf(stderr, "setting data_alloc_zone_blocks to '%llu' failed, must be greater than %llu blocks which results in max %u zones", + zone_blocks, nr, SCOUTFS_DATA_ALLOC_MAX_ZONES); + return true; + } + + if (zone_blocks > total_data_blocks) { + fprintf(stderr, "setting data_alloc_zone_blocks to '%llu' failed, must be at most %llu total data device blocks", + zone_blocks, total_data_blocks); + return true; + } + + return false; +} + struct mkfs_args { char *meta_device; char *data_device; unsigned long long max_meta_size; unsigned long long max_data_size; + u64 data_alloc_zone_blocks; bool force; int nr_slots; struct scoutfs_quorum_slot slots[SCOUTFS_QUORUM_MAX_SLOTS]; @@ -216,6 +248,17 @@ static int do_mkfs(struct mkfs_args *args) member_sizeof(struct scoutfs_super_block, qconf.slots)); memcpy(super->qconf.slots, args->slots, sizeof(args->slots)); + if (invalid_data_alloc_zone_blocks(le64_to_cpu(super->total_data_blocks), + args->data_alloc_zone_blocks)) { + ret = -EINVAL; + goto out; + } + + if (args->data_alloc_zone_blocks) { + super->volopt.set_bits |= cpu_to_le64(SCOUTFS_VOLOPT_DATA_ALLOC_ZONE_BLOCKS_BIT); + super->volopt.data_alloc_zone_blocks = cpu_to_le64(args->data_alloc_zone_blocks); + } + /* fs root starts with root inode and its index items */ blkno = next_meta++; btree_init_root_single(&super->fs_root, bt, 1, blkno); @@ -477,6 +520,17 @@ static int parse_opt(int key, char *arg, struct argp_state *state) prev_val, args->max_data_size); break; } + case 'z': /* data-alloc-zone-blocks */ + { + ret = parse_u64(arg, &args->data_alloc_zone_blocks); + if (ret) + return ret; + + if (args->data_alloc_zone_blocks == 0) + argp_error(state, "must provide non-zero data-alloc-zone-blocks"); + + break; + } case ARGP_KEY_ARG: if (!args->meta_device) args->meta_device = strdup_or_error(state, arg); @@ -507,6 +561,7 @@ static struct argp_option options[] = { { "force", 'f', NULL, 0, "Overwrite existing data on block devices"}, { "max-meta-size", 'm', "SIZE", 0, "Use a size less than the base metadata device size (bytes or KMGTP units)"}, { "max-data-size", 'd', "SIZE", 0, "Use a size less than the base data device size (bytes or KMGTP units)"}, + { "data-alloc-zone-blocks", 'z', "BLOCKS", 0, "Divide data device into block zones so each mounts writes to a zone (4KB blocks)"}, { NULL } };