Add ino_alloc_per_lock option

Add an option that can limit the number of inode numbers that are allocated per lock group. Signed-off-by: Zach Brown <zab@versity.com>
2026-02-10 12:40:09 +00:00 · 2025-11-10 11:39:22 -08:00
parent 1f363a1ead
commit 7ef62894bd
4 changed files with 83 additions and 7 deletions
--- a/kmod/src/inode.c
+++ b/kmod/src/inode.c
@@ -1482,12 +1482,6 @@ static int remove_index_items(struct super_block *sb, u64 ino,
 * Return an allocated and unused inode number.  Returns -ENOSPC if
 * we're out of inode.
 *
- * Each parent directory has its own pool of free inode numbers.  Items
- * are sorted by their inode numbers as they're stored in segments.
- * This will tend to group together files that are created in a
- * directory at the same time in segments.  Concurrent creation across
- * different directories will be stored in their own regions.
- *
 * Inode numbers are never reclaimed.  If the inode is evicted or we're
 * unmounted the pending inode numbers will be lost.  Asking for a
 * relatively small number from the server each time will tend to
@@ -1497,12 +1491,18 @@ static int remove_index_items(struct super_block *sb, u64 ino,
 int scoutfs_alloc_ino(struct super_block *sb, bool is_dir, u64 *ino_ret)
 {
 	DECLARE_INODE_SB_INFO(sb, inf);
+	struct scoutfs_mount_options opts;
 	struct inode_allocator *ia;
 	u64 ino;
 	u64 nr;
 	int ret;

-	ia = is_dir ? &inf->dir_ino_alloc : &inf->ino_alloc;
+	scoutfs_options_read(sb, &opts);
+
+	if (is_dir && opts.ino_alloc_per_lock == SCOUTFS_LOCK_INODE_GROUP_NR)
+		ia = &inf->dir_ino_alloc;
+	else
+		ia = &inf->ino_alloc;

 	spin_lock(&ia->lock);

@@ -1523,6 +1523,17 @@ int scoutfs_alloc_ino(struct super_block *sb, bool is_dir, u64 *ino_ret)
 	*ino_ret = ia->ino++;
 	ia->nr--;

+	if (opts.ino_alloc_per_lock != SCOUTFS_LOCK_INODE_GROUP_NR) {
+		nr = ia->ino & SCOUTFS_LOCK_INODE_GROUP_MASK;
+		if (nr >= opts.ino_alloc_per_lock) {
+			nr = SCOUTFS_LOCK_INODE_GROUP_NR - nr;
+			if (nr > ia->nr)
+				nr = ia->nr;
+			ia->ino += nr;
+			ia->nr -= nr;
+		}
+	}
+
 	spin_unlock(&ia->lock);
 	ret = 0;
 out:
--- a/kmod/src/options.c
+++ b/kmod/src/options.c
@@ -33,6 +33,7 @@ enum {
 	Opt_acl,
 	Opt_data_prealloc_blocks,
 	Opt_data_prealloc_contig_only,
+	Opt_ino_alloc_per_lock,
 	Opt_log_merge_wait_timeout_ms,
 	Opt_metadev_path,
 	Opt_noacl,
@@ -47,6 +48,7 @@ static const match_table_t tokens = {
 	{Opt_acl, "acl"},
 	{Opt_data_prealloc_blocks, "data_prealloc_blocks=%s"},
 	{Opt_data_prealloc_contig_only, "data_prealloc_contig_only=%s"},
+	{Opt_ino_alloc_per_lock, "ino_alloc_per_lock=%s"},
 	{Opt_log_merge_wait_timeout_ms, "log_merge_wait_timeout_ms=%s"},
 	{Opt_metadev_path, "metadev_path=%s"},
 	{Opt_noacl, "noacl"},
@@ -136,6 +138,7 @@ static void init_default_options(struct scoutfs_mount_options *opts)

 	opts->data_prealloc_blocks = SCOUTFS_DATA_PREALLOC_DEFAULT_BLOCKS;
 	opts->data_prealloc_contig_only = 1;
+	opts->ino_alloc_per_lock = SCOUTFS_LOCK_INODE_GROUP_NR;
 	opts->log_merge_wait_timeout_ms = DEFAULT_LOG_MERGE_WAIT_TIMEOUT_MS;
 	opts->orphan_scan_delay_ms = -1;
 	opts->quorum_heartbeat_timeout_ms = SCOUTFS_QUORUM_DEF_HB_TIMEO_MS;
@@ -238,6 +241,18 @@ static int parse_options(struct super_block *sb, char *options, struct scoutfs_m
 			opts->data_prealloc_contig_only = nr;
 			break;

+		case Opt_ino_alloc_per_lock:
+			ret = match_int(args, &nr);
+			if (ret < 0 || nr < 1 || nr > SCOUTFS_LOCK_INODE_GROUP_NR) {
+				scoutfs_err(sb, "invalid ino_alloc_per_lock option, must be between 1 and %u",
+					    SCOUTFS_LOCK_INODE_GROUP_NR);
+				if (ret == 0)
+					ret = -EINVAL;
+				return ret;
+			}
+			opts->ino_alloc_per_lock = nr;
+			break;
+
 		case Opt_tcp_keepalive_timeout_ms:
 			ret = match_int(args, &nr);
 			ret = verify_tcp_keepalive_timeout_ms(sb, ret, nr);
@@ -393,6 +408,7 @@ int scoutfs_options_show(struct seq_file *seq, struct dentry *root)
 		seq_puts(seq, ",acl");
 	seq_printf(seq, ",data_prealloc_blocks=%llu", opts.data_prealloc_blocks);
 	seq_printf(seq, ",data_prealloc_contig_only=%u", opts.data_prealloc_contig_only);
+	seq_printf(seq, ",ino_alloc_per_lock=%u", opts.ino_alloc_per_lock);
 	seq_printf(seq, ",metadev_path=%s", opts.metadev_path);
 	if (!is_acl)
 		seq_puts(seq, ",noacl");
@@ -481,6 +497,45 @@ static ssize_t data_prealloc_contig_only_store(struct kobject *kobj, struct kobj
 }
 SCOUTFS_ATTR_RW(data_prealloc_contig_only);

+static ssize_t ino_alloc_per_lock_show(struct kobject *kobj, struct kobj_attribute *attr,
+					 char *buf)
+{
+	struct super_block *sb = SCOUTFS_SYSFS_ATTRS_SB(kobj);
+	struct scoutfs_mount_options opts;
+
+	scoutfs_options_read(sb, &opts);
+
+	return snprintf(buf, PAGE_SIZE, "%u", opts.ino_alloc_per_lock);
+}
+static ssize_t ino_alloc_per_lock_store(struct kobject *kobj, struct kobj_attribute *attr,
+					  const char *buf, size_t count)
+{
+	struct super_block *sb = SCOUTFS_SYSFS_ATTRS_SB(kobj);
+	DECLARE_OPTIONS_INFO(sb, optinf);
+	char nullterm[20]; /* more than enough for octal -U32_MAX */
+	long val;
+	int len;
+	int ret;
+
+	len = min(count, sizeof(nullterm) - 1);
+	memcpy(nullterm, buf, len);
+	nullterm[len] = '\0';
+
+	ret = kstrtol(nullterm, 0, &val);
+	if (ret < 0 || val < 1 || val > SCOUTFS_LOCK_INODE_GROUP_NR) {
+		scoutfs_err(sb, "invalid ino_alloc_per_lock option, must be between 1 and %u",
+			    SCOUTFS_LOCK_INODE_GROUP_NR);
+		return -EINVAL;
+	}
+
+	write_seqlock(&optinf->seqlock);
+	optinf->opts.ino_alloc_per_lock = val;
+	write_sequnlock(&optinf->seqlock);
+
+	return count;
+}
+SCOUTFS_ATTR_RW(ino_alloc_per_lock);
+
 static ssize_t log_merge_wait_timeout_ms_show(struct kobject *kobj, struct kobj_attribute *attr,
 						char *buf)
 {
@@ -621,6 +676,7 @@ SCOUTFS_ATTR_RO(quorum_slot_nr);
 static struct attribute *options_attrs[] = {
 	SCOUTFS_ATTR_PTR(data_prealloc_blocks),
 	SCOUTFS_ATTR_PTR(data_prealloc_contig_only),
+	SCOUTFS_ATTR_PTR(ino_alloc_per_lock),
 	SCOUTFS_ATTR_PTR(log_merge_wait_timeout_ms),
 	SCOUTFS_ATTR_PTR(metadev_path),
 	SCOUTFS_ATTR_PTR(orphan_scan_delay_ms),
--- a/kmod/src/options.h
+++ b/kmod/src/options.h
@@ -8,6 +8,7 @@
 struct scoutfs_mount_options {
 	u64 data_prealloc_blocks;
 	bool data_prealloc_contig_only;
+	unsigned int ino_alloc_per_lock;
 	unsigned int log_merge_wait_timeout_ms;
 	char *metadev_path;
 	unsigned int orphan_scan_delay_ms;
--- a/utils/man/scoutfs.5
+++ b/utils/man/scoutfs.5
@@ -55,6 +55,14 @@ with initial sparse regions (perhaps by multiple threads writing to
 different regions) and wasted space isn't an issue (perhaps because the
 file population contains few small files).
 .TP
+.B ino_alloc_per_lock=<number>
+This option determines how many inode numbers are allocated in the same
+cluster lock.  The default, and maximum, is 1024.  The minimum is 1.
+Allocating fewer inodes per lock can allow more parallelism between
+mounts because there are more locks that cover the same number of
+created files.  This can be helpful when working with smaller numbers of
+large files.
+.TP
 .B log_merge_wait_timeout_ms=<number>
 This option sets the amount of time, in milliseconds, that log merge
 creation can wait before timing out.  This setting is per-mount, only