From ef2daf8857703f9ca8b644f7f1e160be22f85a95 Mon Sep 17 00:00:00 2001 From: Zach Brown Date: Wed, 5 Oct 2022 15:19:56 -0700 Subject: [PATCH 1/5] Make data preallocation tunable Make mount options for the size of preallocation and whether or not it should be restricted to extending writes. Disabling the default restriction to streaming writes lets it preallocate in aligned regions of the preallocation size when they contain no extents. Signed-off-by: Zach Brown --- kmod/src/alloc.h | 11 ++-- kmod/src/data.c | 133 +++++++++++++++++++++++++++++--------------- kmod/src/options.c | 117 ++++++++++++++++++++++++++++++++++++++ kmod/src/options.h | 2 + utils/man/scoutfs.5 | 34 +++++++++++ 5 files changed, 246 insertions(+), 51 deletions(-) diff --git a/kmod/src/alloc.h b/kmod/src/alloc.h index 4496802c..70d39c5e 100644 --- a/kmod/src/alloc.h +++ b/kmod/src/alloc.h @@ -19,14 +19,11 @@ (128ULL * 1024 * 1024 >> SCOUTFS_BLOCK_SM_SHIFT) /* - * The largest aligned region that we'll try to allocate at the end of - * the file as it's extended. This is also limited to the current file - * size so we can only waste at most twice the total file size when - * files are less than this. We try to keep this around the point of - * diminishing returns in streaming performance of common data devices - * to limit waste. + * The default size that we'll try to preallocate. This is trying to + * hit the limit of large efficient device writes while minimizing + * wasted preallocation that is never used. */ -#define SCOUTFS_DATA_EXTEND_PREALLOC_LIMIT \ +#define SCOUTFS_DATA_PREALLOC_DEFAULT_BLOCKS \ (8ULL * 1024 * 1024 >> SCOUTFS_BLOCK_SM_SHIFT) /* diff --git a/kmod/src/data.c b/kmod/src/data.c index 2c0822db..402e50b6 100644 --- a/kmod/src/data.c +++ b/kmod/src/data.c @@ -366,27 +366,27 @@ static inline u64 ext_last(struct scoutfs_extent *ext) /* * The caller is writing to a logical iblock that doesn't have an - * allocated extent. + * allocated extent. The caller has searched for an extent containing + * iblock. If it already existed then it must be unallocated and + * offline. * - * We always allocate an extent starting at the logical iblock. The - * caller has searched for an extent containing iblock. If it already - * existed then it must be unallocated and offline. + * We implement two preallocation strategies. Typically we only + * preallocate for simple streaming writes and limit preallocation while + * the file is small. The largest efficient allocation size is + * typically large enough that it would be unreasonable to allocate that + * much for all small files. * - * Preallocation is used if we're strictly contiguously extending - * writes. That is, if the logical block offset equals the number of - * online blocks. We try to preallocate the number of blocks existing - * so that small files don't waste inordinate amounts of space and large - * files will eventually see large extents. This only works for - * contiguous single stream writes or stages of files from the first - * block. It doesn't work for concurrent stages, releasing behind - * staging, sparse files, multi-node writes, etc. fallocate() is always - * a better tool to use. + * Optionally, we can simply preallocate large empty aligned regions. + * This can waste a lot of space for small or sparse files but is + * reasonable when a file population is known to be large and dense but + * known to be written with non-streaming write patterns. */ static int alloc_block(struct super_block *sb, struct inode *inode, struct scoutfs_extent *ext, u64 iblock, struct scoutfs_lock *lock) { DECLARE_DATA_INFO(sb, datinf); + struct scoutfs_mount_options opts; const u64 ino = scoutfs_ino(inode); struct data_ext_args args = { .ino = ino, @@ -394,17 +394,22 @@ static int alloc_block(struct super_block *sb, struct inode *inode, .lock = lock, }; struct scoutfs_extent found; - struct scoutfs_extent pre; + struct scoutfs_extent pre = {0,}; + bool undo_pre = false; u64 blkno = 0; u64 online; u64 offline; u8 flags; + u64 start; u64 count; + u64 rem; int ret; int err; trace_scoutfs_data_alloc_block_enter(sb, ino, iblock, ext); + scoutfs_options_read(sb, &opts); + /* can only allocate over existing unallocated offline extent */ if (WARN_ON_ONCE(ext->len && !(iblock >= ext->start && iblock <= ext_last(ext) && @@ -413,66 +418,106 @@ static int alloc_block(struct super_block *sb, struct inode *inode, mutex_lock(&datinf->mutex); - scoutfs_inode_get_onoff(inode, &online, &offline); + /* default to single allocation at the written block */ + start = iblock; + count = 1; + /* copy existing flags for preallocated regions */ + flags = ext->len ? ext->flags : 0; if (ext->len) { - /* limit preallocation to remaining existing (offline) extent */ + /* + * Assume that offline writers are going to be writing + * all the offline extents and try to preallocate the + * rest of the unwritten extent. + */ count = ext->len - (iblock - ext->start); - flags = ext->flags; + + } else if (opts.data_prealloc_contig_only) { + /* + * Only preallocate when a quick test of the online + * block counts looks like we're a simple streaming + * write. Try to write until the next extent but limit + * the preallocation size to the number of online + * blocks. + */ + scoutfs_inode_get_onoff(inode, &online, &offline); + if (iblock > 1 && iblock == online) { + ret = scoutfs_ext_next(sb, &data_ext_ops, &args, + iblock, 1, &found); + if (ret < 0 && ret != -ENOENT) + goto out; + if (found.len && found.start > iblock) + count = found.start - iblock; + else + count = opts.data_prealloc_blocks; + + count = min(iblock, count); + } + } else { - /* otherwise alloc to next extent */ - ret = scoutfs_ext_next(sb, &data_ext_ops, &args, - iblock, 1, &found); + /* + * Preallocation of aligned regions only preallocates if + * the aligned region contains no extents at all. This + * could be fooled by offline sparse extents but we + * don't want to iterate over all offline extents in the + * aligned region. + */ + div64_u64_rem(iblock, opts.data_prealloc_blocks, &rem); + start = iblock - rem; + count = opts.data_prealloc_blocks; + ret = scoutfs_ext_next(sb, &data_ext_ops, &args, start, 1, &found); if (ret < 0 && ret != -ENOENT) goto out; - if (found.len && found.start > iblock) - count = found.start - iblock; - else - count = SCOUTFS_DATA_EXTEND_PREALLOC_LIMIT; - flags = 0; + if (found.len && found.start < start + count) + count = 1; } /* overall prealloc limit */ - count = min_t(u64, count, SCOUTFS_DATA_EXTEND_PREALLOC_LIMIT); - - /* only strictly contiguous extending writes will try to preallocate */ - if (iblock > 1 && iblock == online) - count = min(iblock, count); - else - count = 1; + count = min_t(u64, count, opts.data_prealloc_blocks); ret = scoutfs_alloc_data(sb, datinf->alloc, datinf->wri, &datinf->dalloc, count, &blkno, &count); if (ret < 0) goto out; - ret = scoutfs_ext_set(sb, &data_ext_ops, &args, iblock, 1, blkno, 0); - if (ret < 0) - goto out; + /* + * An aligned prealloc attempt that gets a smaller extent can + * fail to cover iblock, make sure that it does. This is a + * pathological case so we don't try to move the window past + * iblock. Just enough to cover it, which we know is safe. + */ + if (start + count <= iblock) + start += (iblock - (start + count) + 1); if (count > 1) { - pre.start = iblock + 1; - pre.len = count - 1; - pre.map = blkno + 1; + pre.start = start; + pre.len = count; + pre.map = blkno; pre.flags = flags | SEF_UNWRITTEN; ret = scoutfs_ext_set(sb, &data_ext_ops, &args, pre.start, pre.len, pre.map, pre.flags); - if (ret < 0) { - err = scoutfs_ext_set(sb, &data_ext_ops, &args, iblock, - 1, 0, flags); - BUG_ON(err); /* couldn't restore original */ + if (ret < 0) goto out; - } + undo_pre = true; } + ret = scoutfs_ext_set(sb, &data_ext_ops, &args, iblock, 1, blkno + (iblock - start), 0); + if (ret < 0) + goto out; + /* tell the caller we have a single block, could check next? */ ext->start = iblock; ext->len = 1; - ext->map = blkno; + ext->map = blkno + (iblock - start); ext->flags = 0; ret = 0; out: if (ret < 0 && blkno > 0) { + if (undo_pre) { + err = scoutfs_ext_set(sb, &data_ext_ops, &args, + pre.start, pre.len, 0, flags); + BUG_ON(err); /* leaked preallocated extent */ + } err = scoutfs_free_data(sb, datinf->alloc, datinf->wri, &datinf->data_freed, blkno, count); BUG_ON(err); /* leaked free blocks */ diff --git a/kmod/src/options.c b/kmod/src/options.c index 474d2b0d..e28bf2a6 100644 --- a/kmod/src/options.c +++ b/kmod/src/options.c @@ -27,9 +27,12 @@ #include "options.h" #include "super.h" #include "inode.h" +#include "alloc.h" enum { Opt_acl, + Opt_data_prealloc_blocks, + Opt_data_prealloc_contig_only, Opt_metadev_path, Opt_noacl, Opt_orphan_scan_delay_ms, @@ -39,6 +42,8 @@ enum { static const match_table_t tokens = { {Opt_acl, "acl"}, + {Opt_data_prealloc_blocks, "data_prealloc_blocks=%s"}, + {Opt_data_prealloc_contig_only, "data_prealloc_contig_only=%s"}, {Opt_metadev_path, "metadev_path=%s"}, {Opt_noacl, "noacl"}, {Opt_orphan_scan_delay_ms, "orphan_scan_delay_ms=%s"}, @@ -110,9 +115,15 @@ static void free_options(struct scoutfs_mount_options *opts) #define DEFAULT_ORPHAN_SCAN_DELAY_MS (10 * MSEC_PER_SEC) #define MAX_ORPHAN_SCAN_DELAY_MS (60 * MSEC_PER_SEC) +#define MIN_DATA_PREALLOC_BLOCKS 1ULL +#define MAX_DATA_PREALLOC_BLOCKS ((unsigned long long)SCOUTFS_BLOCK_SM_MAX) + static void init_default_options(struct scoutfs_mount_options *opts) { memset(opts, 0, sizeof(*opts)); + + opts->data_prealloc_blocks = SCOUTFS_DATA_PREALLOC_DEFAULT_BLOCKS; + opts->data_prealloc_contig_only = 1; opts->quorum_slot_nr = -1; opts->orphan_scan_delay_ms = -1; } @@ -126,6 +137,7 @@ static void init_default_options(struct scoutfs_mount_options *opts) static int parse_options(struct super_block *sb, char *options, struct scoutfs_mount_options *opts) { substring_t args[MAX_OPT_ARGS]; + u64 nr64; int nr; int token; char *p; @@ -142,6 +154,30 @@ static int parse_options(struct super_block *sb, char *options, struct scoutfs_m sb->s_flags |= MS_POSIXACL; break; + case Opt_data_prealloc_blocks: + ret = match_u64(args, &nr64); + if (ret < 0 || + nr64 < MIN_DATA_PREALLOC_BLOCKS || nr64 > MAX_DATA_PREALLOC_BLOCKS) { + scoutfs_err(sb, "invalid data_prealloc_blocks option, must be between %llu and %llu", + MIN_DATA_PREALLOC_BLOCKS, MAX_DATA_PREALLOC_BLOCKS); + if (ret == 0) + ret = -EINVAL; + return ret; + } + opts->data_prealloc_blocks = nr64; + break; + + case Opt_data_prealloc_contig_only: + ret = match_int(args, &nr); + if (ret < 0 || nr < 0 || nr > 1) { + scoutfs_err(sb, "invalid data_prealloc_contig_only option, bool must only be 0 or 1"); + if (ret == 0) + ret = -EINVAL; + return ret; + } + opts->data_prealloc_contig_only = nr; + break; + case Opt_metadev_path: ret = parse_bdev_path(sb, &args[0], &opts->metadev_path); if (ret < 0) @@ -271,6 +307,8 @@ int scoutfs_options_show(struct seq_file *seq, struct dentry *root) if (is_acl) seq_puts(seq, ",acl"); + seq_printf(seq, ",data_prealloc_blocks=%llu", opts.data_prealloc_blocks); + seq_printf(seq, ",data_prealloc_contig_only=%u", opts.data_prealloc_contig_only); seq_printf(seq, ",metadev_path=%s", opts.metadev_path); if (!is_acl) seq_puts(seq, ",noacl"); @@ -281,6 +319,83 @@ int scoutfs_options_show(struct seq_file *seq, struct dentry *root) return 0; } +static ssize_t data_prealloc_blocks_show(struct kobject *kobj, struct kobj_attribute *attr, + char *buf) +{ + struct super_block *sb = SCOUTFS_SYSFS_ATTRS_SB(kobj); + struct scoutfs_mount_options opts; + + scoutfs_options_read(sb, &opts); + + return snprintf(buf, PAGE_SIZE, "%llu", opts.data_prealloc_blocks); +} +static ssize_t data_prealloc_blocks_store(struct kobject *kobj, struct kobj_attribute *attr, + const char *buf, size_t count) +{ + struct super_block *sb = SCOUTFS_SYSFS_ATTRS_SB(kobj); + DECLARE_OPTIONS_INFO(sb, optinf); + char nullterm[30]; /* more than enough for octal -U64_MAX */ + u64 val; + int len; + int ret; + + len = min(count, sizeof(nullterm) - 1); + memcpy(nullterm, buf, len); + nullterm[len] = '\0'; + + ret = kstrtoll(nullterm, 0, &val); + if (ret < 0 || val < MIN_DATA_PREALLOC_BLOCKS || val > MAX_DATA_PREALLOC_BLOCKS) { + scoutfs_err(sb, "invalid data_prealloc_blocks option, must be between %llu and %llu", + MIN_DATA_PREALLOC_BLOCKS, MAX_DATA_PREALLOC_BLOCKS); + return -EINVAL; + } + + write_seqlock(&optinf->seqlock); + optinf->opts.data_prealloc_blocks = val; + write_sequnlock(&optinf->seqlock); + + return count; +} +SCOUTFS_ATTR_RW(data_prealloc_blocks); + +static ssize_t data_prealloc_contig_only_show(struct kobject *kobj, struct kobj_attribute *attr, + char *buf) +{ + struct super_block *sb = SCOUTFS_SYSFS_ATTRS_SB(kobj); + struct scoutfs_mount_options opts; + + scoutfs_options_read(sb, &opts); + + return snprintf(buf, PAGE_SIZE, "%u", opts.data_prealloc_contig_only); +} +static ssize_t data_prealloc_contig_only_store(struct kobject *kobj, struct kobj_attribute *attr, + const char *buf, size_t count) +{ + struct super_block *sb = SCOUTFS_SYSFS_ATTRS_SB(kobj); + DECLARE_OPTIONS_INFO(sb, optinf); + char nullterm[20]; /* more than enough for octal -U32_MAX */ + long val; + int len; + int ret; + + len = min(count, sizeof(nullterm) - 1); + memcpy(nullterm, buf, len); + nullterm[len] = '\0'; + + ret = kstrtol(nullterm, 0, &val); + if (ret < 0 || val < 0 || val > 1) { + scoutfs_err(sb, "invalid data_prealloc_contig_only option, bool must be 0 or 1"); + return -EINVAL; + } + + write_seqlock(&optinf->seqlock); + optinf->opts.data_prealloc_contig_only = val; + write_sequnlock(&optinf->seqlock); + + return count; +} +SCOUTFS_ATTR_RW(data_prealloc_contig_only); + static ssize_t metadev_path_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { struct super_block *sb = SCOUTFS_SYSFS_ATTRS_SB(kobj); @@ -345,6 +460,8 @@ static ssize_t quorum_slot_nr_show(struct kobject *kobj, struct kobj_attribute * SCOUTFS_ATTR_RO(quorum_slot_nr); static struct attribute *options_attrs[] = { + SCOUTFS_ATTR_PTR(data_prealloc_blocks), + SCOUTFS_ATTR_PTR(data_prealloc_contig_only), SCOUTFS_ATTR_PTR(metadev_path), SCOUTFS_ATTR_PTR(orphan_scan_delay_ms), SCOUTFS_ATTR_PTR(quorum_slot_nr), diff --git a/kmod/src/options.h b/kmod/src/options.h index 26d1eb1e..d2e4ad74 100644 --- a/kmod/src/options.h +++ b/kmod/src/options.h @@ -6,6 +6,8 @@ #include "format.h" struct scoutfs_mount_options { + u64 data_prealloc_blocks; + bool data_prealloc_contig_only; char *metadev_path; unsigned int orphan_scan_delay_ms; int quorum_slot_nr; diff --git a/utils/man/scoutfs.5 b/utils/man/scoutfs.5 index 8b20483b..8f619e4b 100644 --- a/utils/man/scoutfs.5 +++ b/utils/man/scoutfs.5 @@ -21,6 +21,40 @@ as detailed in .BR acl (5) . Support for POSIX ACLs is the default. .TP +.B data_prealloc_blocks= +Set the size of preallocation regions of data files, in 4KiB blocks. +Writes to these regions that contain no extents will attempt to +preallocate the size of the full region. This can waste a lot of space +with small files, files with sparse regions, and files whose final +length isn't a multiple of the preallocation size. The following +data_prealloc_contig_only option, which is the default, restricts this +behaviour to waste less space. +.sp +All the preallocation options can be changed in an active mount by +writing to their respective files in the options directory in the +mount's sysfs directory. +.sp +It is worth noting that it is always more efficient in every way to use +.BR fallocate (2) +to precisely allocate large extents for the resulting size of the file. +Always attempt to enable it in software that supports it. +.TP +.B data_prealloc_contig_only=<0|1> +This option, currently the default, limits file data preallocation in +two ways. First, it will only preallocate when extending a fully +allocated file. Second, it will limit the size of preallocation to the +existing length of the file. These limits reduce the amount of +preallocation wasted per file at the cost of multiple initial extents in +all files. It only supports simple streaming writes, any other write +pattern will not be recognized and could result in many fragmented +extent allocations. +.sp +This option can be disabled to encourage large allocated extents +regardless of write patterns. This can be helpful if files are written +with initial sparse regions (perhaps by multiple threads writing to +different regions) and wasted space isn't an issue (perhaps because the +file population contains few small files). +.TP .B metadev_path= The metadev_path option specifies the path to the block device that contains the filesystem's metadata. From 3847c4fe6398fc52ece23986f5106637ae9db729 Mon Sep 17 00:00:00 2001 From: Zach Brown Date: Thu, 6 Oct 2022 13:30:17 -0700 Subject: [PATCH 2/5] Add data-prealloc test Signed-off-by: Zach Brown --- tests/golden/data-prealloc | 26 +++++++ tests/sequence | 1 + tests/tests/data-prealloc.sh | 136 +++++++++++++++++++++++++++++++++++ 3 files changed, 163 insertions(+) create mode 100644 tests/golden/data-prealloc create mode 100644 tests/tests/data-prealloc.sh diff --git a/tests/golden/data-prealloc b/tests/golden/data-prealloc new file mode 100644 index 00000000..93854034 --- /dev/null +++ b/tests/golden/data-prealloc @@ -0,0 +1,26 @@ +== initial writes smaller than prealloc grow to prealloc size +/mnt/test/test/data-prealloc/file-1: 7 extents found +/mnt/test/test/data-prealloc/file-2: 7 extents found +== larger files get full prealloc extents +/mnt/test/test/data-prealloc/file-1: 9 extents found +/mnt/test/test/data-prealloc/file-2: 9 extents found +== non-streaming writes with contig have per-block extents +/mnt/test/test/data-prealloc/file-1: 32 extents found +/mnt/test/test/data-prealloc/file-2: 32 extents found +== any writes to region prealloc get full extents +/mnt/test/test/data-prealloc/file-1: 4 extents found +/mnt/test/test/data-prealloc/file-2: 4 extents found +/mnt/test/test/data-prealloc/file-1: 4 extents found +/mnt/test/test/data-prealloc/file-2: 4 extents found +== streaming offline writes get full extents either way +/mnt/test/test/data-prealloc/file-1: 4 extents found +/mnt/test/test/data-prealloc/file-2: 4 extents found +/mnt/test/test/data-prealloc/file-1: 4 extents found +/mnt/test/test/data-prealloc/file-2: 4 extents found +== goofy preallocation amounts work +/mnt/test/test/data-prealloc/file-1: 5 extents found +/mnt/test/test/data-prealloc/file-2: 5 extents found +/mnt/test/test/data-prealloc/file-1: 5 extents found +/mnt/test/test/data-prealloc/file-2: 5 extents found +/mnt/test/test/data-prealloc/file-1: 3 extents found +/mnt/test/test/data-prealloc/file-2: 3 extents found diff --git a/tests/sequence b/tests/sequence index db6e9ba0..43a599b0 100644 --- a/tests/sequence +++ b/tests/sequence @@ -6,6 +6,7 @@ simple-inode-index.sh simple-staging.sh simple-release-extents.sh fallocate.sh +data-prealloc.sh setattr_more.sh offline-extent-waiting.sh move-blocks.sh diff --git a/tests/tests/data-prealloc.sh b/tests/tests/data-prealloc.sh new file mode 100644 index 00000000..b0bb81d2 --- /dev/null +++ b/tests/tests/data-prealloc.sh @@ -0,0 +1,136 @@ +# +# test that the data prealloc options behave as expected. We write to +# two files a block at a time so that a single file doesn't naturally +# merge adjacent consecutive allocations. (we don't have multiple +# allocation cursors) +# +t_require_commands scoutfs stat filefrag dd touch truncate + +write_forwards() +{ + local prefix="$1" + local nr="$2" + local blk + + touch "$prefix"-{1,2} + truncate -s 0 "$prefix"-{1,2} + + for blk in $(seq 0 1 $((nr - 1))); do + dd if=/dev/zero of="$prefix"-1 bs=4096 seek=$blk count=1 conv=notrunc status=none + dd if=/dev/zero of="$prefix"-2 bs=4096 seek=$blk count=1 conv=notrunc status=none + done +} + +write_backwards() +{ + local prefix="$1" + local nr="$2" + local blk + + touch "$prefix"-{1,2} + truncate -s 0 "$prefix"-{1,2} + + for blk in $(seq $((nr - 1)) -1 0); do + dd if=/dev/zero of="$prefix"-1 bs=4096 seek=$blk count=1 conv=notrunc status=none + dd if=/dev/zero of="$prefix"-2 bs=4096 seek=$blk count=1 conv=notrunc status=none + done +} + +release_files() { + local prefix="$1" + local size=$(($2 * 4096)) + local vers + local f + + for f in "$prefix"*; do + size=$(stat -c "%s" "$f") + vers=$(scoutfs stat -s data_version "$f") + scoutfs release "$f" -V "$vers" -o 0 -l $size + done +} + +stage_files() { + local prefix="$1" + local nr="$2" + local vers + local f + + for blk in $(seq 0 1 $((nr - 1))); do + for f in "$prefix"*; do + vers=$(scoutfs stat -s data_version "$f") + scoutfs stage /dev/zero "$f" -V "$vers" -o $((blk * 4096)) -l 4096 + done + done +} + +print_extents_found() +{ + local prefix="$1" + + filefrag "$prefix"* 2>&1 | grep "extent.*found" | t_filter_fs +} + +t_save_all_sysfs_mount_options data_prealloc_blocks +t_save_all_sysfs_mount_options data_prealloc_contig_only +restore_options() +{ + t_restore_all_sysfs_mount_options data_prealloc_blocks + t_restore_all_sysfs_mount_options data_prealloc_contig_only +} +trap restore_options EXIT + +prefix="$T_D0/file" + +echo "== initial writes smaller than prealloc grow to prealloc size" +t_set_sysfs_mount_option 0 data_prealloc_blocks 32 +t_set_sysfs_mount_option 0 data_prealloc_contig_only 1 +write_forwards $prefix 64 +print_extents_found $prefix + +echo "== larger files get full prealloc extents" +t_set_sysfs_mount_option 0 data_prealloc_blocks 32 +t_set_sysfs_mount_option 0 data_prealloc_contig_only 1 +write_forwards $prefix 128 +print_extents_found $prefix + +echo "== non-streaming writes with contig have per-block extents" +t_set_sysfs_mount_option 0 data_prealloc_blocks 32 +t_set_sysfs_mount_option 0 data_prealloc_contig_only 1 +write_backwards $prefix 32 +print_extents_found $prefix + +echo "== any writes to region prealloc get full extents" +t_set_sysfs_mount_option 0 data_prealloc_blocks 16 +t_set_sysfs_mount_option 0 data_prealloc_contig_only 0 +write_forwards $prefix 64 +print_extents_found $prefix +write_backwards $prefix 64 +print_extents_found $prefix + +echo "== streaming offline writes get full extents either way" +t_set_sysfs_mount_option 0 data_prealloc_blocks 16 +t_set_sysfs_mount_option 0 data_prealloc_contig_only 1 +write_forwards $prefix 64 +release_files $prefix 64 +stage_files $prefix 64 +print_extents_found $prefix +t_set_sysfs_mount_option 0 data_prealloc_contig_only 0 +release_files $prefix 64 +stage_files $prefix 64 +print_extents_found $prefix + +echo "== goofy preallocation amounts work" +t_set_sysfs_mount_option 0 data_prealloc_blocks 7 +t_set_sysfs_mount_option 0 data_prealloc_contig_only 1 +write_forwards $prefix 14 +print_extents_found $prefix +t_set_sysfs_mount_option 0 data_prealloc_blocks 13 +t_set_sysfs_mount_option 0 data_prealloc_contig_only 0 +write_forwards $prefix 53 +print_extents_found $prefix +t_set_sysfs_mount_option 0 data_prealloc_blocks 1 +t_set_sysfs_mount_option 0 data_prealloc_contig_only 0 +write_forwards $prefix 3 +print_extents_found $prefix + +t_pass From 51fe5a4ceb515a913dba7200366cbaeb22b27a5e Mon Sep 17 00:00:00 2001 From: Zach Brown Date: Thu, 13 Oct 2022 12:42:46 -0700 Subject: [PATCH 3/5] Add -o mount option argument to run-tests Add a run-tests option that lets us append an option string to all mounts performed during the tests. Signed-off-by: Zach Brown --- tests/run-tests.sh | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tests/run-tests.sh b/tests/run-tests.sh index a8aa02af..836273d9 100755 --- a/tests/run-tests.sh +++ b/tests/run-tests.sh @@ -58,6 +58,7 @@ $(basename $0) options: -m | Run mkfs on the device before mounting and running | tests. Implies unmounting existing mounts first. -n | The number of devices and mounts to test. + -o | Add option string to all mounts during all tests. -P | Enable trace_printk. -p | Exit script after preparing mounts only, don't run tests. -q | The first mounts will be quorum members. Must be @@ -136,6 +137,12 @@ while true; do T_NR_MOUNTS="$2" shift ;; + -o) + test -n "$2" || die "-o must have option string argument" + # always appending to existing options + T_MNT_OPTIONS+=",$2" + shift + ;; -P) T_TRACE_PRINTK="1" ;; @@ -430,6 +437,7 @@ for i in $(seq 0 $((T_NR_MOUNTS - 1))); do if [ "$i" -lt "$T_QUORUM" ]; then opts="$opts,quorum_slot_nr=$i" fi + opts="${opts}${T_MNT_OPTIONS}" msg "mounting $meta_dev|$data_dev on $dir" cmd mount -t scoutfs $opts "$data_dev" "$dir" & From e27ea22fe4ded1fa4616d4924acc6ac17f87cfd7 Mon Sep 17 00:00:00 2001 From: Zach Brown Date: Fri, 14 Oct 2022 09:43:36 -0700 Subject: [PATCH 4/5] Add run-tests -T option to increase trace size Add an option to increase the trace buffer size during the run. Signed-off-by: Zach Brown --- tests/run-tests.sh | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/tests/run-tests.sh b/tests/run-tests.sh index 836273d9..a9613687 100755 --- a/tests/run-tests.sh +++ b/tests/run-tests.sh @@ -69,6 +69,7 @@ $(basename $0) options: -s | Skip git repo checkouts. -t | Enabled trace events that match the given glob argument. | Multiple options enable multiple globbed events. + -T | Multiply the original trace buffer size by nr during the run. -X | xfstests git repo. Used by tests/xfstests.sh. -x | xfstests git branch to checkout and track. -y | xfstests ./check additional args @@ -167,6 +168,11 @@ while true; do T_TRACE_GLOB+=("$2") shift ;; + -T) + test -n "$2" || die "-T must have trace buffer size multiplier argument" + T_TRACE_MULT="$2" + shift + ;; -X) test -n "$2" || die "-X requires xfstests git repo dir argument" T_XFSTESTS_REPO="$2" @@ -352,6 +358,13 @@ if [ -n "$T_INSMOD" ]; then cmd insmod "$T_KMOD/src/scoutfs.ko" fi +if [ -n "$T_TRACE_MULT" ]; then + orig_trace_size=$(cat /sys/kernel/debug/tracing/buffer_size_kb) + mult_trace_size=$((orig_trace_size * T_TRACE_MULT)) + msg "increasing trace buffer size from $orig_trace_size KiB to $mult_trace_size KiB" + echo $mult_trace_size > /sys/kernel/debug/tracing/buffer_size_kb +fi + nr_globs=${#T_TRACE_GLOB[@]} if [ $nr_globs -gt 0 ]; then echo 0 > /sys/kernel/debug/tracing/events/scoutfs/enable @@ -381,6 +394,7 @@ fi # always describe tracing in the logs cmd cat /sys/kernel/debug/tracing/set_event cmd grep . /sys/kernel/debug/tracing/options/trace_printk \ + /sys/kernel/debug/tracing/buffer_size_kb \ /proc/sys/kernel/ftrace_dump_on_oops # @@ -612,6 +626,9 @@ if [ -n "$T_TRACE_GLOB" -o -n "$T_TRACE_PRINTK" ]; then echo 0 > /sys/kernel/debug/tracing/events/scoutfs/enable echo 0 > /sys/kernel/debug/tracing/options/trace_printk cat /sys/kernel/debug/tracing/trace > "$T_RESULTS/traces" + if [ -n "$orig_trace_size" ]; then + echo $orig_trace_size > /sys/kernel/debug/tracing/buffer_size_kb + fi fi if [ "$skipped" == 0 -a "$failed" == 0 ]; then From d5ddf1ecaca8e48b94e2703d2f02464db729baa6 Mon Sep 17 00:00:00 2001 From: Zach Brown Date: Fri, 14 Oct 2022 11:11:00 -0700 Subject: [PATCH 5/5] Fix option save/restore test helpers The test shell helpers for saving and restoring mount options were trying to put each mount's option value in an array. It meant to build the array key by concatenating the option name and the mount number. But it didn't isolate the option "name" variable when evaluating it, instead always evaluating "name_" to nothing and building keys for all options that only contained the mount index. This then broke when tests attempted to save and restore multiple options. Signed-off-by: Zach Brown --- tests/funcs/fs.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/funcs/fs.sh b/tests/funcs/fs.sh index 9a2c2db7..34c9c5ca 100644 --- a/tests/funcs/fs.sh +++ b/tests/funcs/fs.sh @@ -405,7 +405,7 @@ t_save_all_sysfs_mount_options() { for i in $(t_fs_nrs); do opt="$(t_sysfs_path $i)/mount_options/$name" - ind="$name_$i" + ind="${name}_${i}" _saved_opts[$ind]="$(cat $opt)" done @@ -417,7 +417,7 @@ t_restore_all_sysfs_mount_options() { local i for i in $(t_fs_nrs); do - ind="$name_$i" + ind="${name}_${i}" t_set_sysfs_mount_option $i $name "${_saved_opts[$ind]}" done