Compare commits

...

1 Commits

Author SHA1 Message Date
Auke Kok
72dc5695a6 Introduce meta_reserve_blocks mount option, default value.
This option adds a mount option, with default value of 16384, that adds
an additional reserve amount of blocks for the meta device.

The default value is 16384, which corresponds to 1GB of space, and just
about doubles the internal value for the reserve that is calculated
based on clients/mounts dynamically in sort of standard values. It also
just compromises about less than 2% of the meta device size for the
smallest meta device size.

A suggested value for larger deployments is like somewhere around 256
blocks per GB of meta device size, i.e. 1/64 of the meta device space,
and about 1.6% in effect.

Customers who are running into issues can adjust their mount options to
increase the value to have a larger safety buffer, or decrease it to
potentially have a way to get out of low space conditions temporarily.
Obviously one would want to increase the value of this option after
resolving the low space condition issues as soon as possible.

Our test suite will run with meta_reserve_blocks=0, so that the behavior
of any of our tests is functionally unaffected by this change, and won't
interfere with resolving underlying ENOSPC issues and their resolution.
The addition of this option however allows us to artifically create
ENOSPC conditions at will, and we may want to add tests specifically
that do so.

Signed-off-by: Auke Kok <auke.kok@versity.com>
2025-04-17 16:06:33 -04:00
4 changed files with 51 additions and 1 deletions

View File

@@ -39,6 +39,7 @@ enum {
Opt_orphan_scan_delay_ms,
Opt_quorum_heartbeat_timeout_ms,
Opt_quorum_slot_nr,
Opt_meta_reserve_blocks,
Opt_err,
};
@@ -52,6 +53,7 @@ static const match_table_t tokens = {
{Opt_orphan_scan_delay_ms, "orphan_scan_delay_ms=%s"},
{Opt_quorum_heartbeat_timeout_ms, "quorum_heartbeat_timeout_ms=%s"},
{Opt_quorum_slot_nr, "quorum_slot_nr=%s"},
{Opt_meta_reserve_blocks, "meta_reserve_blocks=%s"},
{Opt_err, NULL}
};
@@ -126,6 +128,9 @@ static void free_options(struct scoutfs_mount_options *opts)
#define MIN_DATA_PREALLOC_BLOCKS 1ULL
#define MAX_DATA_PREALLOC_BLOCKS ((unsigned long long)SCOUTFS_BLOCK_SM_MAX)
#define SCOUTFS_META_RESERVE_DEFAULT_BLOCKS 16384
static void init_default_options(struct scoutfs_mount_options *opts)
{
memset(opts, 0, sizeof(*opts));
@@ -136,6 +141,7 @@ static void init_default_options(struct scoutfs_mount_options *opts)
opts->orphan_scan_delay_ms = -1;
opts->quorum_heartbeat_timeout_ms = SCOUTFS_QUORUM_DEF_HB_TIMEO_MS;
opts->quorum_slot_nr = -1;
opts->meta_reserve_blocks = SCOUTFS_META_RESERVE_DEFAULT_BLOCKS;
}
static int verify_log_merge_wait_timeout_ms(struct super_block *sb, int ret, int val)
@@ -167,6 +173,24 @@ static int verify_quorum_heartbeat_timeout_ms(struct super_block *sb, int ret, u
return 0;
}
static int verify_meta_reserve_blocks(struct super_block *sb, int ret, int val)
{
/*
* Ideally we set a limit to something reasonable like 1/2 the actual
* total_meta_blocks, but we can't yet get this info when mount is called
*/
if (ret < 0) {
scoutfs_err(sb, "failed to parse meta_reserve_blocks value");
return -EINVAL;
}
if (val < 0 || val > INT_MAX) {
scoutfs_err(sb, "invalid meta_reserve_blocks value %d, must be between 0 and %d",
val, INT_MAX);
return -EINVAL;
}
return 0;
}
/*
* Parse the option string into our options struct. This can allocate
@@ -279,6 +303,14 @@ static int parse_options(struct super_block *sb, char *options, struct scoutfs_m
opts->quorum_slot_nr = nr;
break;
case Opt_meta_reserve_blocks:
ret = match_int(args, &nr);
ret = verify_meta_reserve_blocks(sb, ret, nr);
if (ret < 0)
return ret;
opts->meta_reserve_blocks = nr;
break;
default:
scoutfs_err(sb, "Unknown or malformed option, \"%s\"", p);
return -EINVAL;
@@ -371,6 +403,7 @@ int scoutfs_options_show(struct seq_file *seq, struct dentry *root)
seq_printf(seq, ",orphan_scan_delay_ms=%u", opts.orphan_scan_delay_ms);
if (opts.quorum_slot_nr >= 0)
seq_printf(seq, ",quorum_slot_nr=%d", opts.quorum_slot_nr);
seq_printf(seq, ".meta_reserve_blocks=%llu", opts.meta_reserve_blocks);
return 0;
}
@@ -589,6 +622,17 @@ static ssize_t quorum_slot_nr_show(struct kobject *kobj, struct kobj_attribute *
}
SCOUTFS_ATTR_RO(quorum_slot_nr);
static ssize_t meta_reserve_blocks_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
{
struct super_block *sb = SCOUTFS_SYSFS_ATTRS_SB(kobj);
struct scoutfs_mount_options opts;
scoutfs_options_read(sb, &opts);
return snprintf(buf, PAGE_SIZE, "%lld\n", opts.meta_reserve_blocks);
}
SCOUTFS_ATTR_RO(meta_reserve_blocks);
static struct attribute *options_attrs[] = {
SCOUTFS_ATTR_PTR(data_prealloc_blocks),
SCOUTFS_ATTR_PTR(data_prealloc_contig_only),
@@ -597,6 +641,7 @@ static struct attribute *options_attrs[] = {
SCOUTFS_ATTR_PTR(orphan_scan_delay_ms),
SCOUTFS_ATTR_PTR(quorum_heartbeat_timeout_ms),
SCOUTFS_ATTR_PTR(quorum_slot_nr),
SCOUTFS_ATTR_PTR(meta_reserve_blocks),
NULL,
};

View File

@@ -13,6 +13,7 @@ struct scoutfs_mount_options {
unsigned int orphan_scan_delay_ms;
int quorum_slot_nr;
u64 quorum_heartbeat_timeout_ms;
u64 meta_reserve_blocks;
};
void scoutfs_options_read(struct super_block *sb, struct scoutfs_mount_options *opts);

View File

@@ -772,11 +772,14 @@ static int alloc_move_empty(struct super_block *sb,
u64 scoutfs_server_reserved_meta_blocks(struct super_block *sb)
{
DECLARE_SERVER_INFO(sb, server);
struct scoutfs_mount_options opts;
u64 server_blocks;
u64 client_blocks;
u64 log_blocks;
u64 nr_clients;
scoutfs_options_read(sb, &opts);
/* server has two meta_avail lists it swaps between */
server_blocks = SCOUTFS_SERVER_META_FILL_TARGET * 2;
@@ -801,7 +804,7 @@ u64 scoutfs_server_reserved_meta_blocks(struct super_block *sb)
nr_clients = server->nr_clients;
spin_unlock(&server->lock);
return server_blocks + (max(1ULL, nr_clients) * client_blocks);
return server_blocks + (max(1ULL, nr_clients) * client_blocks) + opts.meta_reserve_blocks;
}
/*

View File

@@ -464,6 +464,7 @@ for i in $(seq 0 $((T_NR_MOUNTS - 1))); do
if [ "$i" -lt "$T_QUORUM" ]; then
opts="$opts,quorum_slot_nr=$i"
fi
opts="$opts,meta_reserve_blocks=0"
opts="${opts}${T_MNT_OPTIONS}"
msg "mounting $meta_dev|$data_dev on $dir"