Merge pull request #101 from versity/zab/data_prealloc_options

Zab/data prealloc options
This commit is contained in:
Zach Brown
2022-10-17 12:18:51 -07:00
committed by GitHub
10 changed files with 436 additions and 53 deletions

View File

@@ -19,14 +19,11 @@
(128ULL * 1024 * 1024 >> SCOUTFS_BLOCK_SM_SHIFT)
/*
* The largest aligned region that we'll try to allocate at the end of
* the file as it's extended. This is also limited to the current file
* size so we can only waste at most twice the total file size when
* files are less than this. We try to keep this around the point of
* diminishing returns in streaming performance of common data devices
* to limit waste.
* The default size that we'll try to preallocate. This is trying to
* hit the limit of large efficient device writes while minimizing
* wasted preallocation that is never used.
*/
#define SCOUTFS_DATA_EXTEND_PREALLOC_LIMIT \
#define SCOUTFS_DATA_PREALLOC_DEFAULT_BLOCKS \
(8ULL * 1024 * 1024 >> SCOUTFS_BLOCK_SM_SHIFT)
/*

View File

@@ -366,27 +366,27 @@ static inline u64 ext_last(struct scoutfs_extent *ext)
/*
* The caller is writing to a logical iblock that doesn't have an
* allocated extent.
* allocated extent. The caller has searched for an extent containing
* iblock. If it already existed then it must be unallocated and
* offline.
*
* We always allocate an extent starting at the logical iblock. The
* caller has searched for an extent containing iblock. If it already
* existed then it must be unallocated and offline.
* We implement two preallocation strategies. Typically we only
* preallocate for simple streaming writes and limit preallocation while
* the file is small. The largest efficient allocation size is
* typically large enough that it would be unreasonable to allocate that
* much for all small files.
*
* Preallocation is used if we're strictly contiguously extending
* writes. That is, if the logical block offset equals the number of
* online blocks. We try to preallocate the number of blocks existing
* so that small files don't waste inordinate amounts of space and large
* files will eventually see large extents. This only works for
* contiguous single stream writes or stages of files from the first
* block. It doesn't work for concurrent stages, releasing behind
* staging, sparse files, multi-node writes, etc. fallocate() is always
* a better tool to use.
* Optionally, we can simply preallocate large empty aligned regions.
* This can waste a lot of space for small or sparse files but is
* reasonable when a file population is known to be large and dense but
* known to be written with non-streaming write patterns.
*/
static int alloc_block(struct super_block *sb, struct inode *inode,
struct scoutfs_extent *ext, u64 iblock,
struct scoutfs_lock *lock)
{
DECLARE_DATA_INFO(sb, datinf);
struct scoutfs_mount_options opts;
const u64 ino = scoutfs_ino(inode);
struct data_ext_args args = {
.ino = ino,
@@ -394,17 +394,22 @@ static int alloc_block(struct super_block *sb, struct inode *inode,
.lock = lock,
};
struct scoutfs_extent found;
struct scoutfs_extent pre;
struct scoutfs_extent pre = {0,};
bool undo_pre = false;
u64 blkno = 0;
u64 online;
u64 offline;
u8 flags;
u64 start;
u64 count;
u64 rem;
int ret;
int err;
trace_scoutfs_data_alloc_block_enter(sb, ino, iblock, ext);
scoutfs_options_read(sb, &opts);
/* can only allocate over existing unallocated offline extent */
if (WARN_ON_ONCE(ext->len &&
!(iblock >= ext->start && iblock <= ext_last(ext) &&
@@ -413,66 +418,106 @@ static int alloc_block(struct super_block *sb, struct inode *inode,
mutex_lock(&datinf->mutex);
scoutfs_inode_get_onoff(inode, &online, &offline);
/* default to single allocation at the written block */
start = iblock;
count = 1;
/* copy existing flags for preallocated regions */
flags = ext->len ? ext->flags : 0;
if (ext->len) {
/* limit preallocation to remaining existing (offline) extent */
/*
* Assume that offline writers are going to be writing
* all the offline extents and try to preallocate the
* rest of the unwritten extent.
*/
count = ext->len - (iblock - ext->start);
flags = ext->flags;
} else if (opts.data_prealloc_contig_only) {
/*
* Only preallocate when a quick test of the online
* block counts looks like we're a simple streaming
* write. Try to write until the next extent but limit
* the preallocation size to the number of online
* blocks.
*/
scoutfs_inode_get_onoff(inode, &online, &offline);
if (iblock > 1 && iblock == online) {
ret = scoutfs_ext_next(sb, &data_ext_ops, &args,
iblock, 1, &found);
if (ret < 0 && ret != -ENOENT)
goto out;
if (found.len && found.start > iblock)
count = found.start - iblock;
else
count = opts.data_prealloc_blocks;
count = min(iblock, count);
}
} else {
/* otherwise alloc to next extent */
ret = scoutfs_ext_next(sb, &data_ext_ops, &args,
iblock, 1, &found);
/*
* Preallocation of aligned regions only preallocates if
* the aligned region contains no extents at all. This
* could be fooled by offline sparse extents but we
* don't want to iterate over all offline extents in the
* aligned region.
*/
div64_u64_rem(iblock, opts.data_prealloc_blocks, &rem);
start = iblock - rem;
count = opts.data_prealloc_blocks;
ret = scoutfs_ext_next(sb, &data_ext_ops, &args, start, 1, &found);
if (ret < 0 && ret != -ENOENT)
goto out;
if (found.len && found.start > iblock)
count = found.start - iblock;
else
count = SCOUTFS_DATA_EXTEND_PREALLOC_LIMIT;
flags = 0;
if (found.len && found.start < start + count)
count = 1;
}
/* overall prealloc limit */
count = min_t(u64, count, SCOUTFS_DATA_EXTEND_PREALLOC_LIMIT);
/* only strictly contiguous extending writes will try to preallocate */
if (iblock > 1 && iblock == online)
count = min(iblock, count);
else
count = 1;
count = min_t(u64, count, opts.data_prealloc_blocks);
ret = scoutfs_alloc_data(sb, datinf->alloc, datinf->wri,
&datinf->dalloc, count, &blkno, &count);
if (ret < 0)
goto out;
ret = scoutfs_ext_set(sb, &data_ext_ops, &args, iblock, 1, blkno, 0);
if (ret < 0)
goto out;
/*
* An aligned prealloc attempt that gets a smaller extent can
* fail to cover iblock, make sure that it does. This is a
* pathological case so we don't try to move the window past
* iblock. Just enough to cover it, which we know is safe.
*/
if (start + count <= iblock)
start += (iblock - (start + count) + 1);
if (count > 1) {
pre.start = iblock + 1;
pre.len = count - 1;
pre.map = blkno + 1;
pre.start = start;
pre.len = count;
pre.map = blkno;
pre.flags = flags | SEF_UNWRITTEN;
ret = scoutfs_ext_set(sb, &data_ext_ops, &args, pre.start,
pre.len, pre.map, pre.flags);
if (ret < 0) {
err = scoutfs_ext_set(sb, &data_ext_ops, &args, iblock,
1, 0, flags);
BUG_ON(err); /* couldn't restore original */
if (ret < 0)
goto out;
}
undo_pre = true;
}
ret = scoutfs_ext_set(sb, &data_ext_ops, &args, iblock, 1, blkno + (iblock - start), 0);
if (ret < 0)
goto out;
/* tell the caller we have a single block, could check next? */
ext->start = iblock;
ext->len = 1;
ext->map = blkno;
ext->map = blkno + (iblock - start);
ext->flags = 0;
ret = 0;
out:
if (ret < 0 && blkno > 0) {
if (undo_pre) {
err = scoutfs_ext_set(sb, &data_ext_ops, &args,
pre.start, pre.len, 0, flags);
BUG_ON(err); /* leaked preallocated extent */
}
err = scoutfs_free_data(sb, datinf->alloc, datinf->wri,
&datinf->data_freed, blkno, count);
BUG_ON(err); /* leaked free blocks */

View File

@@ -27,9 +27,12 @@
#include "options.h"
#include "super.h"
#include "inode.h"
#include "alloc.h"
enum {
Opt_acl,
Opt_data_prealloc_blocks,
Opt_data_prealloc_contig_only,
Opt_metadev_path,
Opt_noacl,
Opt_orphan_scan_delay_ms,
@@ -39,6 +42,8 @@ enum {
static const match_table_t tokens = {
{Opt_acl, "acl"},
{Opt_data_prealloc_blocks, "data_prealloc_blocks=%s"},
{Opt_data_prealloc_contig_only, "data_prealloc_contig_only=%s"},
{Opt_metadev_path, "metadev_path=%s"},
{Opt_noacl, "noacl"},
{Opt_orphan_scan_delay_ms, "orphan_scan_delay_ms=%s"},
@@ -110,9 +115,15 @@ static void free_options(struct scoutfs_mount_options *opts)
#define DEFAULT_ORPHAN_SCAN_DELAY_MS (10 * MSEC_PER_SEC)
#define MAX_ORPHAN_SCAN_DELAY_MS (60 * MSEC_PER_SEC)
#define MIN_DATA_PREALLOC_BLOCKS 1ULL
#define MAX_DATA_PREALLOC_BLOCKS ((unsigned long long)SCOUTFS_BLOCK_SM_MAX)
static void init_default_options(struct scoutfs_mount_options *opts)
{
memset(opts, 0, sizeof(*opts));
opts->data_prealloc_blocks = SCOUTFS_DATA_PREALLOC_DEFAULT_BLOCKS;
opts->data_prealloc_contig_only = 1;
opts->quorum_slot_nr = -1;
opts->orphan_scan_delay_ms = -1;
}
@@ -126,6 +137,7 @@ static void init_default_options(struct scoutfs_mount_options *opts)
static int parse_options(struct super_block *sb, char *options, struct scoutfs_mount_options *opts)
{
substring_t args[MAX_OPT_ARGS];
u64 nr64;
int nr;
int token;
char *p;
@@ -142,6 +154,30 @@ static int parse_options(struct super_block *sb, char *options, struct scoutfs_m
sb->s_flags |= MS_POSIXACL;
break;
case Opt_data_prealloc_blocks:
ret = match_u64(args, &nr64);
if (ret < 0 ||
nr64 < MIN_DATA_PREALLOC_BLOCKS || nr64 > MAX_DATA_PREALLOC_BLOCKS) {
scoutfs_err(sb, "invalid data_prealloc_blocks option, must be between %llu and %llu",
MIN_DATA_PREALLOC_BLOCKS, MAX_DATA_PREALLOC_BLOCKS);
if (ret == 0)
ret = -EINVAL;
return ret;
}
opts->data_prealloc_blocks = nr64;
break;
case Opt_data_prealloc_contig_only:
ret = match_int(args, &nr);
if (ret < 0 || nr < 0 || nr > 1) {
scoutfs_err(sb, "invalid data_prealloc_contig_only option, bool must only be 0 or 1");
if (ret == 0)
ret = -EINVAL;
return ret;
}
opts->data_prealloc_contig_only = nr;
break;
case Opt_metadev_path:
ret = parse_bdev_path(sb, &args[0], &opts->metadev_path);
if (ret < 0)
@@ -271,6 +307,8 @@ int scoutfs_options_show(struct seq_file *seq, struct dentry *root)
if (is_acl)
seq_puts(seq, ",acl");
seq_printf(seq, ",data_prealloc_blocks=%llu", opts.data_prealloc_blocks);
seq_printf(seq, ",data_prealloc_contig_only=%u", opts.data_prealloc_contig_only);
seq_printf(seq, ",metadev_path=%s", opts.metadev_path);
if (!is_acl)
seq_puts(seq, ",noacl");
@@ -281,6 +319,83 @@ int scoutfs_options_show(struct seq_file *seq, struct dentry *root)
return 0;
}
static ssize_t data_prealloc_blocks_show(struct kobject *kobj, struct kobj_attribute *attr,
char *buf)
{
struct super_block *sb = SCOUTFS_SYSFS_ATTRS_SB(kobj);
struct scoutfs_mount_options opts;
scoutfs_options_read(sb, &opts);
return snprintf(buf, PAGE_SIZE, "%llu", opts.data_prealloc_blocks);
}
static ssize_t data_prealloc_blocks_store(struct kobject *kobj, struct kobj_attribute *attr,
const char *buf, size_t count)
{
struct super_block *sb = SCOUTFS_SYSFS_ATTRS_SB(kobj);
DECLARE_OPTIONS_INFO(sb, optinf);
char nullterm[30]; /* more than enough for octal -U64_MAX */
u64 val;
int len;
int ret;
len = min(count, sizeof(nullterm) - 1);
memcpy(nullterm, buf, len);
nullterm[len] = '\0';
ret = kstrtoll(nullterm, 0, &val);
if (ret < 0 || val < MIN_DATA_PREALLOC_BLOCKS || val > MAX_DATA_PREALLOC_BLOCKS) {
scoutfs_err(sb, "invalid data_prealloc_blocks option, must be between %llu and %llu",
MIN_DATA_PREALLOC_BLOCKS, MAX_DATA_PREALLOC_BLOCKS);
return -EINVAL;
}
write_seqlock(&optinf->seqlock);
optinf->opts.data_prealloc_blocks = val;
write_sequnlock(&optinf->seqlock);
return count;
}
SCOUTFS_ATTR_RW(data_prealloc_blocks);
static ssize_t data_prealloc_contig_only_show(struct kobject *kobj, struct kobj_attribute *attr,
char *buf)
{
struct super_block *sb = SCOUTFS_SYSFS_ATTRS_SB(kobj);
struct scoutfs_mount_options opts;
scoutfs_options_read(sb, &opts);
return snprintf(buf, PAGE_SIZE, "%u", opts.data_prealloc_contig_only);
}
static ssize_t data_prealloc_contig_only_store(struct kobject *kobj, struct kobj_attribute *attr,
const char *buf, size_t count)
{
struct super_block *sb = SCOUTFS_SYSFS_ATTRS_SB(kobj);
DECLARE_OPTIONS_INFO(sb, optinf);
char nullterm[20]; /* more than enough for octal -U32_MAX */
long val;
int len;
int ret;
len = min(count, sizeof(nullterm) - 1);
memcpy(nullterm, buf, len);
nullterm[len] = '\0';
ret = kstrtol(nullterm, 0, &val);
if (ret < 0 || val < 0 || val > 1) {
scoutfs_err(sb, "invalid data_prealloc_contig_only option, bool must be 0 or 1");
return -EINVAL;
}
write_seqlock(&optinf->seqlock);
optinf->opts.data_prealloc_contig_only = val;
write_sequnlock(&optinf->seqlock);
return count;
}
SCOUTFS_ATTR_RW(data_prealloc_contig_only);
static ssize_t metadev_path_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
{
struct super_block *sb = SCOUTFS_SYSFS_ATTRS_SB(kobj);
@@ -345,6 +460,8 @@ static ssize_t quorum_slot_nr_show(struct kobject *kobj, struct kobj_attribute *
SCOUTFS_ATTR_RO(quorum_slot_nr);
static struct attribute *options_attrs[] = {
SCOUTFS_ATTR_PTR(data_prealloc_blocks),
SCOUTFS_ATTR_PTR(data_prealloc_contig_only),
SCOUTFS_ATTR_PTR(metadev_path),
SCOUTFS_ATTR_PTR(orphan_scan_delay_ms),
SCOUTFS_ATTR_PTR(quorum_slot_nr),

View File

@@ -6,6 +6,8 @@
#include "format.h"
struct scoutfs_mount_options {
u64 data_prealloc_blocks;
bool data_prealloc_contig_only;
char *metadev_path;
unsigned int orphan_scan_delay_ms;
int quorum_slot_nr;

View File

@@ -405,7 +405,7 @@ t_save_all_sysfs_mount_options() {
for i in $(t_fs_nrs); do
opt="$(t_sysfs_path $i)/mount_options/$name"
ind="$name_$i"
ind="${name}_${i}"
_saved_opts[$ind]="$(cat $opt)"
done
@@ -417,7 +417,7 @@ t_restore_all_sysfs_mount_options() {
local i
for i in $(t_fs_nrs); do
ind="$name_$i"
ind="${name}_${i}"
t_set_sysfs_mount_option $i $name "${_saved_opts[$ind]}"
done

View File

@@ -0,0 +1,26 @@
== initial writes smaller than prealloc grow to prealloc size
/mnt/test/test/data-prealloc/file-1: 7 extents found
/mnt/test/test/data-prealloc/file-2: 7 extents found
== larger files get full prealloc extents
/mnt/test/test/data-prealloc/file-1: 9 extents found
/mnt/test/test/data-prealloc/file-2: 9 extents found
== non-streaming writes with contig have per-block extents
/mnt/test/test/data-prealloc/file-1: 32 extents found
/mnt/test/test/data-prealloc/file-2: 32 extents found
== any writes to region prealloc get full extents
/mnt/test/test/data-prealloc/file-1: 4 extents found
/mnt/test/test/data-prealloc/file-2: 4 extents found
/mnt/test/test/data-prealloc/file-1: 4 extents found
/mnt/test/test/data-prealloc/file-2: 4 extents found
== streaming offline writes get full extents either way
/mnt/test/test/data-prealloc/file-1: 4 extents found
/mnt/test/test/data-prealloc/file-2: 4 extents found
/mnt/test/test/data-prealloc/file-1: 4 extents found
/mnt/test/test/data-prealloc/file-2: 4 extents found
== goofy preallocation amounts work
/mnt/test/test/data-prealloc/file-1: 5 extents found
/mnt/test/test/data-prealloc/file-2: 5 extents found
/mnt/test/test/data-prealloc/file-1: 5 extents found
/mnt/test/test/data-prealloc/file-2: 5 extents found
/mnt/test/test/data-prealloc/file-1: 3 extents found
/mnt/test/test/data-prealloc/file-2: 3 extents found

View File

@@ -58,6 +58,7 @@ $(basename $0) options:
-m | Run mkfs on the device before mounting and running
| tests. Implies unmounting existing mounts first.
-n <nr> | The number of devices and mounts to test.
-o <opts> | Add option string to all mounts during all tests.
-P | Enable trace_printk.
-p | Exit script after preparing mounts only, don't run tests.
-q <nr> | The first <nr> mounts will be quorum members. Must be
@@ -68,6 +69,7 @@ $(basename $0) options:
-s | Skip git repo checkouts.
-t | Enabled trace events that match the given glob argument.
| Multiple options enable multiple globbed events.
-T <nr> | Multiply the original trace buffer size by nr during the run.
-X | xfstests git repo. Used by tests/xfstests.sh.
-x | xfstests git branch to checkout and track.
-y | xfstests ./check additional args
@@ -136,6 +138,12 @@ while true; do
T_NR_MOUNTS="$2"
shift
;;
-o)
test -n "$2" || die "-o must have option string argument"
# always appending to existing options
T_MNT_OPTIONS+=",$2"
shift
;;
-P)
T_TRACE_PRINTK="1"
;;
@@ -160,6 +168,11 @@ while true; do
T_TRACE_GLOB+=("$2")
shift
;;
-T)
test -n "$2" || die "-T must have trace buffer size multiplier argument"
T_TRACE_MULT="$2"
shift
;;
-X)
test -n "$2" || die "-X requires xfstests git repo dir argument"
T_XFSTESTS_REPO="$2"
@@ -345,6 +358,13 @@ if [ -n "$T_INSMOD" ]; then
cmd insmod "$T_KMOD/src/scoutfs.ko"
fi
if [ -n "$T_TRACE_MULT" ]; then
orig_trace_size=$(cat /sys/kernel/debug/tracing/buffer_size_kb)
mult_trace_size=$((orig_trace_size * T_TRACE_MULT))
msg "increasing trace buffer size from $orig_trace_size KiB to $mult_trace_size KiB"
echo $mult_trace_size > /sys/kernel/debug/tracing/buffer_size_kb
fi
nr_globs=${#T_TRACE_GLOB[@]}
if [ $nr_globs -gt 0 ]; then
echo 0 > /sys/kernel/debug/tracing/events/scoutfs/enable
@@ -374,6 +394,7 @@ fi
# always describe tracing in the logs
cmd cat /sys/kernel/debug/tracing/set_event
cmd grep . /sys/kernel/debug/tracing/options/trace_printk \
/sys/kernel/debug/tracing/buffer_size_kb \
/proc/sys/kernel/ftrace_dump_on_oops
#
@@ -430,6 +451,7 @@ for i in $(seq 0 $((T_NR_MOUNTS - 1))); do
if [ "$i" -lt "$T_QUORUM" ]; then
opts="$opts,quorum_slot_nr=$i"
fi
opts="${opts}${T_MNT_OPTIONS}"
msg "mounting $meta_dev|$data_dev on $dir"
cmd mount -t scoutfs $opts "$data_dev" "$dir" &
@@ -604,6 +626,9 @@ if [ -n "$T_TRACE_GLOB" -o -n "$T_TRACE_PRINTK" ]; then
echo 0 > /sys/kernel/debug/tracing/events/scoutfs/enable
echo 0 > /sys/kernel/debug/tracing/options/trace_printk
cat /sys/kernel/debug/tracing/trace > "$T_RESULTS/traces"
if [ -n "$orig_trace_size" ]; then
echo $orig_trace_size > /sys/kernel/debug/tracing/buffer_size_kb
fi
fi
if [ "$skipped" == 0 -a "$failed" == 0 ]; then

View File

@@ -6,6 +6,7 @@ simple-inode-index.sh
simple-staging.sh
simple-release-extents.sh
fallocate.sh
data-prealloc.sh
setattr_more.sh
offline-extent-waiting.sh
move-blocks.sh

View File

@@ -0,0 +1,136 @@
#
# test that the data prealloc options behave as expected. We write to
# two files a block at a time so that a single file doesn't naturally
# merge adjacent consecutive allocations. (we don't have multiple
# allocation cursors)
#
t_require_commands scoutfs stat filefrag dd touch truncate
write_forwards()
{
local prefix="$1"
local nr="$2"
local blk
touch "$prefix"-{1,2}
truncate -s 0 "$prefix"-{1,2}
for blk in $(seq 0 1 $((nr - 1))); do
dd if=/dev/zero of="$prefix"-1 bs=4096 seek=$blk count=1 conv=notrunc status=none
dd if=/dev/zero of="$prefix"-2 bs=4096 seek=$blk count=1 conv=notrunc status=none
done
}
write_backwards()
{
local prefix="$1"
local nr="$2"
local blk
touch "$prefix"-{1,2}
truncate -s 0 "$prefix"-{1,2}
for blk in $(seq $((nr - 1)) -1 0); do
dd if=/dev/zero of="$prefix"-1 bs=4096 seek=$blk count=1 conv=notrunc status=none
dd if=/dev/zero of="$prefix"-2 bs=4096 seek=$blk count=1 conv=notrunc status=none
done
}
release_files() {
local prefix="$1"
local size=$(($2 * 4096))
local vers
local f
for f in "$prefix"*; do
size=$(stat -c "%s" "$f")
vers=$(scoutfs stat -s data_version "$f")
scoutfs release "$f" -V "$vers" -o 0 -l $size
done
}
stage_files() {
local prefix="$1"
local nr="$2"
local vers
local f
for blk in $(seq 0 1 $((nr - 1))); do
for f in "$prefix"*; do
vers=$(scoutfs stat -s data_version "$f")
scoutfs stage /dev/zero "$f" -V "$vers" -o $((blk * 4096)) -l 4096
done
done
}
print_extents_found()
{
local prefix="$1"
filefrag "$prefix"* 2>&1 | grep "extent.*found" | t_filter_fs
}
t_save_all_sysfs_mount_options data_prealloc_blocks
t_save_all_sysfs_mount_options data_prealloc_contig_only
restore_options()
{
t_restore_all_sysfs_mount_options data_prealloc_blocks
t_restore_all_sysfs_mount_options data_prealloc_contig_only
}
trap restore_options EXIT
prefix="$T_D0/file"
echo "== initial writes smaller than prealloc grow to prealloc size"
t_set_sysfs_mount_option 0 data_prealloc_blocks 32
t_set_sysfs_mount_option 0 data_prealloc_contig_only 1
write_forwards $prefix 64
print_extents_found $prefix
echo "== larger files get full prealloc extents"
t_set_sysfs_mount_option 0 data_prealloc_blocks 32
t_set_sysfs_mount_option 0 data_prealloc_contig_only 1
write_forwards $prefix 128
print_extents_found $prefix
echo "== non-streaming writes with contig have per-block extents"
t_set_sysfs_mount_option 0 data_prealloc_blocks 32
t_set_sysfs_mount_option 0 data_prealloc_contig_only 1
write_backwards $prefix 32
print_extents_found $prefix
echo "== any writes to region prealloc get full extents"
t_set_sysfs_mount_option 0 data_prealloc_blocks 16
t_set_sysfs_mount_option 0 data_prealloc_contig_only 0
write_forwards $prefix 64
print_extents_found $prefix
write_backwards $prefix 64
print_extents_found $prefix
echo "== streaming offline writes get full extents either way"
t_set_sysfs_mount_option 0 data_prealloc_blocks 16
t_set_sysfs_mount_option 0 data_prealloc_contig_only 1
write_forwards $prefix 64
release_files $prefix 64
stage_files $prefix 64
print_extents_found $prefix
t_set_sysfs_mount_option 0 data_prealloc_contig_only 0
release_files $prefix 64
stage_files $prefix 64
print_extents_found $prefix
echo "== goofy preallocation amounts work"
t_set_sysfs_mount_option 0 data_prealloc_blocks 7
t_set_sysfs_mount_option 0 data_prealloc_contig_only 1
write_forwards $prefix 14
print_extents_found $prefix
t_set_sysfs_mount_option 0 data_prealloc_blocks 13
t_set_sysfs_mount_option 0 data_prealloc_contig_only 0
write_forwards $prefix 53
print_extents_found $prefix
t_set_sysfs_mount_option 0 data_prealloc_blocks 1
t_set_sysfs_mount_option 0 data_prealloc_contig_only 0
write_forwards $prefix 3
print_extents_found $prefix
t_pass

View File

@@ -21,6 +21,40 @@ as detailed in
.BR acl (5) .
Support for POSIX ACLs is the default.
.TP
.B data_prealloc_blocks=<blocks>
Set the size of preallocation regions of data files, in 4KiB blocks.
Writes to these regions that contain no extents will attempt to
preallocate the size of the full region. This can waste a lot of space
with small files, files with sparse regions, and files whose final
length isn't a multiple of the preallocation size. The following
data_prealloc_contig_only option, which is the default, restricts this
behaviour to waste less space.
.sp
All the preallocation options can be changed in an active mount by
writing to their respective files in the options directory in the
mount's sysfs directory.
.sp
It is worth noting that it is always more efficient in every way to use
.BR fallocate (2)
to precisely allocate large extents for the resulting size of the file.
Always attempt to enable it in software that supports it.
.TP
.B data_prealloc_contig_only=<0|1>
This option, currently the default, limits file data preallocation in
two ways. First, it will only preallocate when extending a fully
allocated file. Second, it will limit the size of preallocation to the
existing length of the file. These limits reduce the amount of
preallocation wasted per file at the cost of multiple initial extents in
all files. It only supports simple streaming writes, any other write
pattern will not be recognized and could result in many fragmented
extent allocations.
.sp
This option can be disabled to encourage large allocated extents
regardless of write patterns. This can be helpful if files are written
with initial sparse regions (perhaps by multiple threads writing to
different regions) and wasted space isn't an issue (perhaps because the
file population contains few small files).
.TP
.B metadev_path=<device>
The metadev_path option specifies the path to the block device that
contains the filesystem's metadata.