From 7ef62894bd98293f3191e9871f75f9101159a767 Mon Sep 17 00:00:00 2001 From: Zach Brown Date: Mon, 10 Nov 2025 11:39:22 -0800 Subject: [PATCH] Add ino_alloc_per_lock option Add an option that can limit the number of inode numbers that are allocated per lock group. Signed-off-by: Zach Brown --- kmod/src/inode.c | 25 ++++++++++++++------ kmod/src/options.c | 56 +++++++++++++++++++++++++++++++++++++++++++++ kmod/src/options.h | 1 + utils/man/scoutfs.5 | 8 +++++++ 4 files changed, 83 insertions(+), 7 deletions(-) diff --git a/kmod/src/inode.c b/kmod/src/inode.c index 3553c520..15d29d64 100644 --- a/kmod/src/inode.c +++ b/kmod/src/inode.c @@ -1482,12 +1482,6 @@ static int remove_index_items(struct super_block *sb, u64 ino, * Return an allocated and unused inode number. Returns -ENOSPC if * we're out of inode. * - * Each parent directory has its own pool of free inode numbers. Items - * are sorted by their inode numbers as they're stored in segments. - * This will tend to group together files that are created in a - * directory at the same time in segments. Concurrent creation across - * different directories will be stored in their own regions. - * * Inode numbers are never reclaimed. If the inode is evicted or we're * unmounted the pending inode numbers will be lost. Asking for a * relatively small number from the server each time will tend to @@ -1497,12 +1491,18 @@ static int remove_index_items(struct super_block *sb, u64 ino, int scoutfs_alloc_ino(struct super_block *sb, bool is_dir, u64 *ino_ret) { DECLARE_INODE_SB_INFO(sb, inf); + struct scoutfs_mount_options opts; struct inode_allocator *ia; u64 ino; u64 nr; int ret; - ia = is_dir ? &inf->dir_ino_alloc : &inf->ino_alloc; + scoutfs_options_read(sb, &opts); + + if (is_dir && opts.ino_alloc_per_lock == SCOUTFS_LOCK_INODE_GROUP_NR) + ia = &inf->dir_ino_alloc; + else + ia = &inf->ino_alloc; spin_lock(&ia->lock); @@ -1523,6 +1523,17 @@ int scoutfs_alloc_ino(struct super_block *sb, bool is_dir, u64 *ino_ret) *ino_ret = ia->ino++; ia->nr--; + if (opts.ino_alloc_per_lock != SCOUTFS_LOCK_INODE_GROUP_NR) { + nr = ia->ino & SCOUTFS_LOCK_INODE_GROUP_MASK; + if (nr >= opts.ino_alloc_per_lock) { + nr = SCOUTFS_LOCK_INODE_GROUP_NR - nr; + if (nr > ia->nr) + nr = ia->nr; + ia->ino += nr; + ia->nr -= nr; + } + } + spin_unlock(&ia->lock); ret = 0; out: diff --git a/kmod/src/options.c b/kmod/src/options.c index b72344a1..0632e39f 100644 --- a/kmod/src/options.c +++ b/kmod/src/options.c @@ -33,6 +33,7 @@ enum { Opt_acl, Opt_data_prealloc_blocks, Opt_data_prealloc_contig_only, + Opt_ino_alloc_per_lock, Opt_log_merge_wait_timeout_ms, Opt_metadev_path, Opt_noacl, @@ -47,6 +48,7 @@ static const match_table_t tokens = { {Opt_acl, "acl"}, {Opt_data_prealloc_blocks, "data_prealloc_blocks=%s"}, {Opt_data_prealloc_contig_only, "data_prealloc_contig_only=%s"}, + {Opt_ino_alloc_per_lock, "ino_alloc_per_lock=%s"}, {Opt_log_merge_wait_timeout_ms, "log_merge_wait_timeout_ms=%s"}, {Opt_metadev_path, "metadev_path=%s"}, {Opt_noacl, "noacl"}, @@ -136,6 +138,7 @@ static void init_default_options(struct scoutfs_mount_options *opts) opts->data_prealloc_blocks = SCOUTFS_DATA_PREALLOC_DEFAULT_BLOCKS; opts->data_prealloc_contig_only = 1; + opts->ino_alloc_per_lock = SCOUTFS_LOCK_INODE_GROUP_NR; opts->log_merge_wait_timeout_ms = DEFAULT_LOG_MERGE_WAIT_TIMEOUT_MS; opts->orphan_scan_delay_ms = -1; opts->quorum_heartbeat_timeout_ms = SCOUTFS_QUORUM_DEF_HB_TIMEO_MS; @@ -238,6 +241,18 @@ static int parse_options(struct super_block *sb, char *options, struct scoutfs_m opts->data_prealloc_contig_only = nr; break; + case Opt_ino_alloc_per_lock: + ret = match_int(args, &nr); + if (ret < 0 || nr < 1 || nr > SCOUTFS_LOCK_INODE_GROUP_NR) { + scoutfs_err(sb, "invalid ino_alloc_per_lock option, must be between 1 and %u", + SCOUTFS_LOCK_INODE_GROUP_NR); + if (ret == 0) + ret = -EINVAL; + return ret; + } + opts->ino_alloc_per_lock = nr; + break; + case Opt_tcp_keepalive_timeout_ms: ret = match_int(args, &nr); ret = verify_tcp_keepalive_timeout_ms(sb, ret, nr); @@ -393,6 +408,7 @@ int scoutfs_options_show(struct seq_file *seq, struct dentry *root) seq_puts(seq, ",acl"); seq_printf(seq, ",data_prealloc_blocks=%llu", opts.data_prealloc_blocks); seq_printf(seq, ",data_prealloc_contig_only=%u", opts.data_prealloc_contig_only); + seq_printf(seq, ",ino_alloc_per_lock=%u", opts.ino_alloc_per_lock); seq_printf(seq, ",metadev_path=%s", opts.metadev_path); if (!is_acl) seq_puts(seq, ",noacl"); @@ -481,6 +497,45 @@ static ssize_t data_prealloc_contig_only_store(struct kobject *kobj, struct kobj } SCOUTFS_ATTR_RW(data_prealloc_contig_only); +static ssize_t ino_alloc_per_lock_show(struct kobject *kobj, struct kobj_attribute *attr, + char *buf) +{ + struct super_block *sb = SCOUTFS_SYSFS_ATTRS_SB(kobj); + struct scoutfs_mount_options opts; + + scoutfs_options_read(sb, &opts); + + return snprintf(buf, PAGE_SIZE, "%u", opts.ino_alloc_per_lock); +} +static ssize_t ino_alloc_per_lock_store(struct kobject *kobj, struct kobj_attribute *attr, + const char *buf, size_t count) +{ + struct super_block *sb = SCOUTFS_SYSFS_ATTRS_SB(kobj); + DECLARE_OPTIONS_INFO(sb, optinf); + char nullterm[20]; /* more than enough for octal -U32_MAX */ + long val; + int len; + int ret; + + len = min(count, sizeof(nullterm) - 1); + memcpy(nullterm, buf, len); + nullterm[len] = '\0'; + + ret = kstrtol(nullterm, 0, &val); + if (ret < 0 || val < 1 || val > SCOUTFS_LOCK_INODE_GROUP_NR) { + scoutfs_err(sb, "invalid ino_alloc_per_lock option, must be between 1 and %u", + SCOUTFS_LOCK_INODE_GROUP_NR); + return -EINVAL; + } + + write_seqlock(&optinf->seqlock); + optinf->opts.ino_alloc_per_lock = val; + write_sequnlock(&optinf->seqlock); + + return count; +} +SCOUTFS_ATTR_RW(ino_alloc_per_lock); + static ssize_t log_merge_wait_timeout_ms_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { @@ -621,6 +676,7 @@ SCOUTFS_ATTR_RO(quorum_slot_nr); static struct attribute *options_attrs[] = { SCOUTFS_ATTR_PTR(data_prealloc_blocks), SCOUTFS_ATTR_PTR(data_prealloc_contig_only), + SCOUTFS_ATTR_PTR(ino_alloc_per_lock), SCOUTFS_ATTR_PTR(log_merge_wait_timeout_ms), SCOUTFS_ATTR_PTR(metadev_path), SCOUTFS_ATTR_PTR(orphan_scan_delay_ms), diff --git a/kmod/src/options.h b/kmod/src/options.h index 540c82b8..60a58239 100644 --- a/kmod/src/options.h +++ b/kmod/src/options.h @@ -8,6 +8,7 @@ struct scoutfs_mount_options { u64 data_prealloc_blocks; bool data_prealloc_contig_only; + unsigned int ino_alloc_per_lock; unsigned int log_merge_wait_timeout_ms; char *metadev_path; unsigned int orphan_scan_delay_ms; diff --git a/utils/man/scoutfs.5 b/utils/man/scoutfs.5 index d8cca732..6b855fbc 100644 --- a/utils/man/scoutfs.5 +++ b/utils/man/scoutfs.5 @@ -55,6 +55,14 @@ with initial sparse regions (perhaps by multiple threads writing to different regions) and wasted space isn't an issue (perhaps because the file population contains few small files). .TP +.B ino_alloc_per_lock= +This option determines how many inode numbers are allocated in the same +cluster lock. The default, and maximum, is 1024. The minimum is 1. +Allocating fewer inodes per lock can allow more parallelism between +mounts because there are more locks that cover the same number of +created files. This can be helpful when working with smaller numbers of +large files. +.TP .B log_merge_wait_timeout_ms= This option sets the amount of time, in milliseconds, that log merge creation can wait before timing out. This setting is per-mount, only