From a67ea30bb7ba06323792a407adcd7fc608588c9c Mon Sep 17 00:00:00 2001 From: Zach Brown Date: Fri, 11 Feb 2022 12:55:25 -0800 Subject: [PATCH] Add orphan_scan_delay_ms mount option Add a mount option to set the delay betwen scanning of the orphan list. The sysfs file for the option is writable so this option can be set at run time. Signed-off-by: Zach Brown --- kmod/src/inode.c | 28 +++++++++------ kmod/src/inode.h | 1 + kmod/src/options.c | 87 ++++++++++++++++++++++++++++++++++++++++----- kmod/src/options.h | 4 ++- utils/man/scoutfs.5 | 15 ++++++++ 5 files changed, 114 insertions(+), 21 deletions(-) diff --git a/kmod/src/inode.c b/kmod/src/inode.c index 4329905c..b71dffdd 100644 --- a/kmod/src/inode.c +++ b/kmod/src/inode.c @@ -1745,18 +1745,26 @@ void scoutfs_inode_queue_iput(struct inode *inode) /* * All mounts are performing this work concurrently. We introduce * significant jitter between them to try and keep them from all - * bunching up and working on the same inodes. + * bunching up and working on the same inodes. We always try to delay + * for at least one jiffy if precision tricks us into calculating no + * delay. */ -static void schedule_orphan_dwork(struct inode_sb_info *inf) +void scoutfs_inode_schedule_orphan_dwork(struct super_block *sb) { -#define ORPHAN_SCAN_MIN_MS (10 * MSEC_PER_SEC) -#define ORPHAN_SCAN_JITTER_MS (40 * MSEC_PER_SEC) + DECLARE_INODE_SB_INFO(sb, inf); + struct scoutfs_mount_options opts; + unsigned long low; + unsigned long high; unsigned long delay; if (!inf->stopped) { - delay = msecs_to_jiffies(ORPHAN_SCAN_MIN_MS + - prandom_u32_max(ORPHAN_SCAN_JITTER_MS)); - schedule_delayed_work(&inf->orphan_scan_dwork, delay); + scoutfs_options_read(sb, &opts); + + low = (opts.orphan_scan_delay_ms * 80) / 100; + high = (opts.orphan_scan_delay_ms * 120) / 100; + delay = msecs_to_jiffies(low + prandom_u32_max(high - low)) ?: 1; + + mod_delayed_work(system_wq, &inf->orphan_scan_dwork, delay); } } @@ -1885,7 +1893,7 @@ out: if (ret < 0) scoutfs_inc_counter(sb, orphan_scan_error); - schedule_orphan_dwork(inf); + scoutfs_inode_schedule_orphan_dwork(sb); } /* @@ -2010,9 +2018,7 @@ int scoutfs_inode_setup(struct super_block *sb) */ void scoutfs_inode_start(struct super_block *sb) { - DECLARE_INODE_SB_INFO(sb, inf); - - schedule_orphan_dwork(inf); + scoutfs_inode_schedule_orphan_dwork(sb); } /* diff --git a/kmod/src/inode.h b/kmod/src/inode.h index eab303ee..e855e8f1 100644 --- a/kmod/src/inode.h +++ b/kmod/src/inode.h @@ -126,6 +126,7 @@ int scoutfs_setattr(struct dentry *dentry, struct iattr *attr); int scoutfs_inode_orphan_create(struct super_block *sb, u64 ino, struct scoutfs_lock *lock); int scoutfs_inode_orphan_delete(struct super_block *sb, u64 ino, struct scoutfs_lock *lock); +void scoutfs_inode_schedule_orphan_dwork(struct super_block *sb); void scoutfs_inode_queue_writeback(struct inode *inode); int scoutfs_inode_walk_writeback(struct super_block *sb, bool write); diff --git a/kmod/src/options.c b/kmod/src/options.c index ce731384..a447a931 100644 --- a/kmod/src/options.c +++ b/kmod/src/options.c @@ -26,16 +26,19 @@ #include "msg.h" #include "options.h" #include "super.h" +#include "inode.h" enum { - Opt_quorum_slot_nr, Opt_metadev_path, + Opt_orphan_scan_delay_ms, + Opt_quorum_slot_nr, Opt_err, }; static const match_table_t tokens = { - {Opt_quorum_slot_nr, "quorum_slot_nr=%s"}, {Opt_metadev_path, "metadev_path=%s"}, + {Opt_orphan_scan_delay_ms, "orphan_scan_delay_ms=%s"}, + {Opt_quorum_slot_nr, "quorum_slot_nr=%s"}, {Opt_err, NULL} }; @@ -99,10 +102,15 @@ static void free_options(struct scoutfs_mount_options *opts) kfree(opts->metadev_path); } +#define MIN_ORPHAN_SCAN_DELAY_MS 100UL +#define DEFAULT_ORPHAN_SCAN_DELAY_MS (10 * MSEC_PER_SEC) +#define MAX_ORPHAN_SCAN_DELAY_MS (60 * MSEC_PER_SEC) + static void init_default_options(struct scoutfs_mount_options *opts) { memset(opts, 0, sizeof(*opts)); opts->quorum_slot_nr = -1; + opts->orphan_scan_delay_ms = DEFAULT_ORPHAN_SCAN_DELAY_MS; } /* @@ -126,6 +134,30 @@ static int parse_options(struct super_block *sb, char *options, struct scoutfs_m token = match_token(p, tokens, args); switch (token) { + case Opt_metadev_path: + ret = parse_bdev_path(sb, &args[0], &opts->metadev_path); + if (ret < 0) + return ret; + break; + + case Opt_orphan_scan_delay_ms: + if (opts->orphan_scan_delay_ms != -1) { + scoutfs_err(sb, "multiple orphan_scan_delay_ms options provided, only provide one."); + return -EINVAL; + } + + ret = match_int(args, &nr); + if (ret < 0 || + nr < MIN_ORPHAN_SCAN_DELAY_MS || nr > MAX_ORPHAN_SCAN_DELAY_MS) { + scoutfs_err(sb, "invalid orphan_scan_delay_ms option, must be between %lu and %lu", + MIN_ORPHAN_SCAN_DELAY_MS, MAX_ORPHAN_SCAN_DELAY_MS); + if (ret == 0) + ret = -EINVAL; + return ret; + } + opts->orphan_scan_delay_ms = nr; + break; + case Opt_quorum_slot_nr: if (opts->quorum_slot_nr != -1) { scoutfs_err(sb, "multiple quorum_slot_nr options provided, only provide one."); @@ -143,12 +175,6 @@ static int parse_options(struct super_block *sb, char *options, struct scoutfs_m opts->quorum_slot_nr = nr; break; - case Opt_metadev_path: - ret = parse_bdev_path(sb, &args[0], &opts->metadev_path); - if (ret < 0) - return ret; - break; - default: scoutfs_err(sb, "Unknown or malformed option, \"%s\"", p); return -EINVAL; @@ -227,9 +253,10 @@ int scoutfs_options_show(struct seq_file *seq, struct dentry *root) scoutfs_options_read(sb, &opts); + seq_printf(seq, ",metadev_path=%s", opts.metadev_path); + seq_printf(seq, ",orphan_scan_delay_ms=%u", opts.orphan_scan_delay_ms); if (opts.quorum_slot_nr >= 0) seq_printf(seq, ",quorum_slot_nr=%d", opts.quorum_slot_nr); - seq_printf(seq, ",metadev_path=%s", opts.metadev_path); return 0; } @@ -245,6 +272,47 @@ static ssize_t metadev_path_show(struct kobject *kobj, struct kobj_attribute *at } SCOUTFS_ATTR_RO(metadev_path); +static ssize_t orphan_scan_delay_ms_show(struct kobject *kobj, struct kobj_attribute *attr, + char *buf) +{ + struct super_block *sb = SCOUTFS_SYSFS_ATTRS_SB(kobj); + struct scoutfs_mount_options opts; + + scoutfs_options_read(sb, &opts); + + return snprintf(buf, PAGE_SIZE, "%u", opts.orphan_scan_delay_ms); +} +static ssize_t orphan_scan_delay_ms_store(struct kobject *kobj, struct kobj_attribute *attr, + const char *buf, size_t count) +{ + struct super_block *sb = SCOUTFS_SYSFS_ATTRS_SB(kobj); + DECLARE_OPTIONS_INFO(sb, optinf); + char nullterm[20]; /* more than enough for octal -U32_MAX */ + long val; + int len; + int ret; + + len = min(count, sizeof(nullterm) - 1); + memcpy(nullterm, buf, len); + nullterm[len] = '\0'; + + ret = kstrtol(nullterm, 0, &val); + if (ret < 0 || val < MIN_ORPHAN_SCAN_DELAY_MS || val > MAX_ORPHAN_SCAN_DELAY_MS) { + scoutfs_err(sb, "invalid orphan_scan_delay_ms value written to options sysfs file, must be between %lu and %lu", + MIN_ORPHAN_SCAN_DELAY_MS, MAX_ORPHAN_SCAN_DELAY_MS); + return -EINVAL; + } + + write_seqlock(&optinf->seqlock); + optinf->opts.orphan_scan_delay_ms = val; + write_sequnlock(&optinf->seqlock); + + scoutfs_inode_schedule_orphan_dwork(sb); + + return count; +} +SCOUTFS_ATTR_RW(orphan_scan_delay_ms); + static ssize_t quorum_slot_nr_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { struct super_block *sb = SCOUTFS_SYSFS_ATTRS_SB(kobj); @@ -258,6 +326,7 @@ SCOUTFS_ATTR_RO(quorum_slot_nr); static struct attribute *options_attrs[] = { SCOUTFS_ATTR_PTR(metadev_path), + SCOUTFS_ATTR_PTR(orphan_scan_delay_ms), SCOUTFS_ATTR_PTR(quorum_slot_nr), NULL, }; diff --git a/kmod/src/options.h b/kmod/src/options.h index 993f3894..26d1eb1e 100644 --- a/kmod/src/options.h +++ b/kmod/src/options.h @@ -6,8 +6,10 @@ #include "format.h" struct scoutfs_mount_options { - int quorum_slot_nr; char *metadev_path; + unsigned int orphan_scan_delay_ms; + int quorum_slot_nr; + }; void scoutfs_options_read(struct super_block *sb, struct scoutfs_mount_options *opts); diff --git a/utils/man/scoutfs.5 b/utils/man/scoutfs.5 index a9303c9e..f6cbe193 100644 --- a/utils/man/scoutfs.5 +++ b/utils/man/scoutfs.5 @@ -21,6 +21,21 @@ contains the filesystem's metadata. .sp This option is required. .TP +.B orphan_scan_delay_ms= +This option sets the average expected delay, in milliseconds, between +each mount's scan of the global orphaned inode list. Jitter is added to +avoid contention so each individual delay between scans is a random +value up to 20% less than or greater than this average expected delay. +.sp +The minimum value for this option is 100ms which is very short and is +only reasonable for testing or experiments. The default is 10000ms (10 +seconds) and the maximum is 60000ms (1 minute). +.sp +This option can be changed in an active mount by writing to its file in +the options directory in the mount's sysfs directory. Writing a new +value will cause the next pending orphan scan to be rescheduled +with the newly written delay time. +.TP .B quorum_slot_nr= The quorum_slot_nr option assigns a quorum member slot to the mount. The mount will use the slot assignment to claim exclusive ownership of