mirror of
https://github.com/versity/scoutfs.git
synced 2026-01-03 02:45:20 +00:00
Make log merge wait timeout tunable
Add a mount option for the amount of time that log merge creation can wait before giving up. We add some counters so we can see how often the timeout is being hit and what the average successfull wait time is. Signed-off-by: Zach Brown <zab@versity.com>
This commit is contained in:
@@ -145,6 +145,7 @@
|
||||
EXPAND_COUNTER(lock_shrink_work) \
|
||||
EXPAND_COUNTER(lock_unlock) \
|
||||
EXPAND_COUNTER(lock_wait) \
|
||||
EXPAND_COUNTER(log_merge_wait_timeout) \
|
||||
EXPAND_COUNTER(net_dropped_response) \
|
||||
EXPAND_COUNTER(net_send_bytes) \
|
||||
EXPAND_COUNTER(net_send_error) \
|
||||
|
||||
@@ -33,6 +33,7 @@ enum {
|
||||
Opt_acl,
|
||||
Opt_data_prealloc_blocks,
|
||||
Opt_data_prealloc_contig_only,
|
||||
Opt_log_merge_wait_timeout_ms,
|
||||
Opt_metadev_path,
|
||||
Opt_noacl,
|
||||
Opt_orphan_scan_delay_ms,
|
||||
@@ -45,6 +46,7 @@ static const match_table_t tokens = {
|
||||
{Opt_acl, "acl"},
|
||||
{Opt_data_prealloc_blocks, "data_prealloc_blocks=%s"},
|
||||
{Opt_data_prealloc_contig_only, "data_prealloc_contig_only=%s"},
|
||||
{Opt_log_merge_wait_timeout_ms, "log_merge_wait_timeout_ms=%s"},
|
||||
{Opt_metadev_path, "metadev_path=%s"},
|
||||
{Opt_noacl, "noacl"},
|
||||
{Opt_orphan_scan_delay_ms, "orphan_scan_delay_ms=%s"},
|
||||
@@ -113,6 +115,10 @@ static void free_options(struct scoutfs_mount_options *opts)
|
||||
kfree(opts->metadev_path);
|
||||
}
|
||||
|
||||
#define MIN_LOG_MERGE_WAIT_TIMEOUT_MS 100UL
|
||||
#define DEFAULT_LOG_MERGE_WAIT_TIMEOUT_MS 500
|
||||
#define MAX_LOG_MERGE_WAIT_TIMEOUT_MS (60 * MSEC_PER_SEC)
|
||||
|
||||
#define MIN_ORPHAN_SCAN_DELAY_MS 100UL
|
||||
#define DEFAULT_ORPHAN_SCAN_DELAY_MS (10 * MSEC_PER_SEC)
|
||||
#define MAX_ORPHAN_SCAN_DELAY_MS (60 * MSEC_PER_SEC)
|
||||
@@ -126,11 +132,27 @@ static void init_default_options(struct scoutfs_mount_options *opts)
|
||||
|
||||
opts->data_prealloc_blocks = SCOUTFS_DATA_PREALLOC_DEFAULT_BLOCKS;
|
||||
opts->data_prealloc_contig_only = 1;
|
||||
opts->log_merge_wait_timeout_ms = DEFAULT_LOG_MERGE_WAIT_TIMEOUT_MS;
|
||||
opts->orphan_scan_delay_ms = -1;
|
||||
opts->quorum_heartbeat_timeout_ms = SCOUTFS_QUORUM_DEF_HB_TIMEO_MS;
|
||||
opts->quorum_slot_nr = -1;
|
||||
}
|
||||
|
||||
static int verify_log_merge_wait_timeout_ms(struct super_block *sb, int ret, int val)
|
||||
{
|
||||
if (ret < 0) {
|
||||
scoutfs_err(sb, "failed to parse log_merge_wait_timeout_ms value");
|
||||
return -EINVAL;
|
||||
}
|
||||
if (val < MIN_LOG_MERGE_WAIT_TIMEOUT_MS || val > MAX_LOG_MERGE_WAIT_TIMEOUT_MS) {
|
||||
scoutfs_err(sb, "invalid log_merge_wait_timeout_ms value %d, must be between %lu and %lu",
|
||||
val, MIN_LOG_MERGE_WAIT_TIMEOUT_MS, MAX_LOG_MERGE_WAIT_TIMEOUT_MS);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int verify_quorum_heartbeat_timeout_ms(struct super_block *sb, int ret, u64 val)
|
||||
{
|
||||
if (ret < 0) {
|
||||
@@ -196,6 +218,14 @@ static int parse_options(struct super_block *sb, char *options, struct scoutfs_m
|
||||
opts->data_prealloc_contig_only = nr;
|
||||
break;
|
||||
|
||||
case Opt_log_merge_wait_timeout_ms:
|
||||
ret = match_int(args, &nr);
|
||||
ret = verify_log_merge_wait_timeout_ms(sb, ret, nr);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
opts->log_merge_wait_timeout_ms = nr64;
|
||||
break;
|
||||
|
||||
case Opt_metadev_path:
|
||||
ret = parse_bdev_path(sb, &args[0], &opts->metadev_path);
|
||||
if (ret < 0)
|
||||
@@ -422,6 +452,43 @@ static ssize_t data_prealloc_contig_only_store(struct kobject *kobj, struct kobj
|
||||
}
|
||||
SCOUTFS_ATTR_RW(data_prealloc_contig_only);
|
||||
|
||||
static ssize_t log_merge_wait_timeout_ms_show(struct kobject *kobj, struct kobj_attribute *attr,
|
||||
char *buf)
|
||||
{
|
||||
struct super_block *sb = SCOUTFS_SYSFS_ATTRS_SB(kobj);
|
||||
struct scoutfs_mount_options opts;
|
||||
|
||||
scoutfs_options_read(sb, &opts);
|
||||
|
||||
return snprintf(buf, PAGE_SIZE, "%u", opts.log_merge_wait_timeout_ms);
|
||||
}
|
||||
static ssize_t log_merge_wait_timeout_ms_store(struct kobject *kobj, struct kobj_attribute *attr,
|
||||
const char *buf, size_t count)
|
||||
{
|
||||
struct super_block *sb = SCOUTFS_SYSFS_ATTRS_SB(kobj);
|
||||
DECLARE_OPTIONS_INFO(sb, optinf);
|
||||
char nullterm[30]; /* more than enough for octal -U64_MAX */
|
||||
int val;
|
||||
int len;
|
||||
int ret;
|
||||
|
||||
len = min(count, sizeof(nullterm) - 1);
|
||||
memcpy(nullterm, buf, len);
|
||||
nullterm[len] = '\0';
|
||||
|
||||
ret = kstrtoint(nullterm, 0, &val);
|
||||
ret = verify_log_merge_wait_timeout_ms(sb, ret, val);
|
||||
if (ret == 0) {
|
||||
write_seqlock(&optinf->seqlock);
|
||||
optinf->opts.log_merge_wait_timeout_ms = val;
|
||||
write_sequnlock(&optinf->seqlock);
|
||||
ret = count;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
SCOUTFS_ATTR_RW(log_merge_wait_timeout_ms);
|
||||
|
||||
static ssize_t metadev_path_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
|
||||
{
|
||||
struct super_block *sb = SCOUTFS_SYSFS_ATTRS_SB(kobj);
|
||||
@@ -525,6 +592,7 @@ SCOUTFS_ATTR_RO(quorum_slot_nr);
|
||||
static struct attribute *options_attrs[] = {
|
||||
SCOUTFS_ATTR_PTR(data_prealloc_blocks),
|
||||
SCOUTFS_ATTR_PTR(data_prealloc_contig_only),
|
||||
SCOUTFS_ATTR_PTR(log_merge_wait_timeout_ms),
|
||||
SCOUTFS_ATTR_PTR(metadev_path),
|
||||
SCOUTFS_ATTR_PTR(orphan_scan_delay_ms),
|
||||
SCOUTFS_ATTR_PTR(quorum_heartbeat_timeout_ms),
|
||||
|
||||
@@ -8,6 +8,7 @@
|
||||
struct scoutfs_mount_options {
|
||||
u64 data_prealloc_blocks;
|
||||
bool data_prealloc_contig_only;
|
||||
unsigned int log_merge_wait_timeout_ms;
|
||||
char *metadev_path;
|
||||
unsigned int orphan_scan_delay_ms;
|
||||
int quorum_slot_nr;
|
||||
|
||||
@@ -1052,21 +1052,13 @@ static int next_log_merge_item(struct super_block *sb,
|
||||
* abandoned log btree finalized. If it takes too long each client has
|
||||
* a change to make forward progress before being asked to commit again.
|
||||
*
|
||||
* We're waiting on heavy state that is protected by mutexes and
|
||||
* transaction machinery. It's tricky to recreate that state for
|
||||
* lightweight condition tests that don't change task state. Instead of
|
||||
* trying to get that right, particularly as we unwind after success or
|
||||
* after timeouts, waiters use an unsatisfying poll. Short enough to
|
||||
* not add terrible latency, given how heavy and infrequent this already
|
||||
* is, and long enough to not melt the cpu. This could be tuned if it
|
||||
* becomes a problem.
|
||||
*
|
||||
* This can end up finalizing a new empty log btree if a new mount
|
||||
* happens to arrive at just the right time. That's fine, merging will
|
||||
* ignore and tear down the empty input.
|
||||
*/
|
||||
#define FINALIZE_POLL_MS (11)
|
||||
#define FINALIZE_TIMEOUT_MS (MSEC_PER_SEC / 2)
|
||||
#define FINALIZE_POLL_MIN_DELAY_MS 5U
|
||||
#define FINALIZE_POLL_MAX_DELAY_MS 100U
|
||||
#define FINALIZE_POLL_DELAY_GROWTH_PCT 150U
|
||||
static int finalize_and_start_log_merge(struct super_block *sb, struct scoutfs_log_trees *lt,
|
||||
u64 rid, struct commit_hold *hold)
|
||||
{
|
||||
@@ -1074,8 +1066,10 @@ static int finalize_and_start_log_merge(struct super_block *sb, struct scoutfs_l
|
||||
struct scoutfs_super_block *super = DIRTY_SUPER_SB(sb);
|
||||
struct scoutfs_log_merge_status stat;
|
||||
struct scoutfs_log_merge_range rng;
|
||||
struct scoutfs_mount_options opts;
|
||||
struct scoutfs_log_trees each_lt;
|
||||
struct scoutfs_log_trees fin;
|
||||
unsigned int delay_ms;
|
||||
unsigned long timeo;
|
||||
bool saw_finalized;
|
||||
bool others_active;
|
||||
@@ -1083,10 +1077,14 @@ static int finalize_and_start_log_merge(struct super_block *sb, struct scoutfs_l
|
||||
bool ours_visible;
|
||||
struct scoutfs_key key;
|
||||
char *err_str = NULL;
|
||||
ktime_t start;
|
||||
int ret;
|
||||
int err;
|
||||
|
||||
timeo = jiffies + msecs_to_jiffies(FINALIZE_TIMEOUT_MS);
|
||||
scoutfs_options_read(sb, &opts);
|
||||
timeo = jiffies + msecs_to_jiffies(opts.log_merge_wait_timeout_ms);
|
||||
delay_ms = FINALIZE_POLL_MIN_DELAY_MS;
|
||||
start = ktime_get_raw();
|
||||
|
||||
for (;;) {
|
||||
/* nothing to do if there's already a merge in flight */
|
||||
@@ -1201,13 +1199,16 @@ static int finalize_and_start_log_merge(struct super_block *sb, struct scoutfs_l
|
||||
if (ret < 0)
|
||||
err_str = "applying commit before waiting for finalized";
|
||||
|
||||
msleep(FINALIZE_POLL_MS);
|
||||
msleep(delay_ms);
|
||||
delay_ms = min(delay_ms * FINALIZE_POLL_DELAY_GROWTH_PCT / 100,
|
||||
FINALIZE_POLL_MAX_DELAY_MS);
|
||||
|
||||
server_hold_commit(sb, hold);
|
||||
mutex_lock(&server->logs_mutex);
|
||||
|
||||
/* done if we timed out */
|
||||
if (time_after(jiffies, timeo)) {
|
||||
scoutfs_inc_counter(sb, log_merge_wait_timeout);
|
||||
ret = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -55,6 +55,19 @@ with initial sparse regions (perhaps by multiple threads writing to
|
||||
different regions) and wasted space isn't an issue (perhaps because the
|
||||
file population contains few small files).
|
||||
.TP
|
||||
.B log_merge_wait_timeout_ms=<number>
|
||||
This option sets the amount of time, in milliseconds, that log merge
|
||||
creation can wait before timing out. This setting is per-mount, only
|
||||
changes the behavior of that mount, and only affects the server when it
|
||||
is running in that mount.
|
||||
.sp
|
||||
This determines how long it may take for mounts to synchronize
|
||||
committing their log trees to create a log merge operation. Setting it
|
||||
too high can create long latencies in the event that a mount takes a
|
||||
long time to commit their log. Setting it too low can result in the
|
||||
creation of excessive numbers of log trees that are never merged. The
|
||||
default is 500 and it can not be less than 100 nor greater than 60000.
|
||||
.TP
|
||||
.B metadev_path=<device>
|
||||
The metadev_path option specifies the path to the block device that
|
||||
contains the filesystem's metadata.
|
||||
|
||||
Reference in New Issue
Block a user