Compare commits

...

6 Commits

Author SHA1 Message Date
Auke Kok 81ea586581 Avoid try_shrink_lock spinlock contention
Every lock_key_range call results in a spinlock on linfo->lock to do
housekeeping and reclaim idle locks. This housekeeping is non-critical
and can be deferred, but more importantly, while a reclaim pass is
ongoing, there's no need to attempt reclaim at the same time.

I ran into this while doing cross-node spam and it caused a near hard
lockup with my VM spinning on the lock for 7+ minutes, without making
any forward progress.

We can just trylock and give up. If the lock is held, another task
is already reclaiming and we're safe to skip. The force flag will
still cause a spin_lock to assure shutdown waits for other reclaims,
which needs to happen before shutdown can progress anyway.

Signed-off-by: Auke Kok <auke.kok@versity.com>
2026-06-02 14:15:56 -07:00
Zach Brown 07e90422ee Merge pull request #293 from versity/auke/data_prealloc_min
scaling prealloc.
2026-05-28 09:40:31 -07:00
Zach Brown 634ca720c9 Merge pull request #318 from versity/auke/timer_container_of
Use timer_container_of with fallback for from_timer -> el9.8 support
2026-05-28 09:38:19 -07:00
Auke Kok 1f1e3e9c6a Use timer_container_of with fallback for from_timer
El9.8 backported the upstream v6.15.* rename of from_timer to
timer_container_of.  Switch the two callers in fence.c and recov.c
to the new style and add a simple kcompat define for older kernels.

Signed-off-by: Auke Kok <auke.kok@versity.com>
2026-05-26 17:45:53 -04:00
Auke Kok 5a6523ecf4 Ramping up data preallocation
Ramps up data preallocation based on the number of online
blocks. This results in a simple 2<<n block allocation pattern
until n=11 (2048) - the default value of data_prealloc_blocks.

Signed-off-by: Auke Kok <auke.kok@versity.com>
2026-05-19 19:18:53 -07:00
Zach Brown fece0a9372 Merge pull request #310 from versity/zab/v1.31
v1.31 Release
2026-05-06 10:37:07 -07:00
7 changed files with 36 additions and 10 deletions
+9
View File
@@ -496,3 +496,12 @@ endif
ifneq (,$(shell grep 'struct posix_acl.*get_inode_acl' include/linux/fs.h))
ccflags-y += -DKC_GET_INODE_ACL
endif
#
# v6.15-13744-g41cb08555c41
#
# from_timer renamed to timer_container_of.
#
ifneq (,$(shell grep 'define timer_container_of' include/linux/timer.h))
ccflags-y += -DKC_TIMER_CONTAINER_OF
endif
+9 -1
View File
@@ -422,6 +422,8 @@ static int alloc_block(struct super_block *sb, struct inode *inode,
mutex_lock(&datinf->mutex);
scoutfs_inode_get_onoff(inode, &online, &offline);
/* default to single allocation at the written block */
start = iblock;
count = 1;
@@ -444,7 +446,6 @@ static int alloc_block(struct super_block *sb, struct inode *inode,
* the preallocation size to the number of online
* blocks.
*/
scoutfs_inode_get_onoff(inode, &online, &offline);
if (iblock > 1 && iblock == online) {
ret = scoutfs_ext_next(sb, &data_ext_ops, &args,
iblock, 1, &found);
@@ -486,6 +487,13 @@ static int alloc_block(struct super_block *sb, struct inode *inode,
/* trim count by next extent after iblock */
if (found.len && found.start > start && found.start < start + count)
count = (found.start - start);
/*
* Ramp the aligned region size up proportionally with
* the file's online block count rather than jumping to
* the full prealloc size.
*/
count = max_t(u64, 1, min(count, online));
}
/* overall prealloc limit */
+1 -1
View File
@@ -222,7 +222,7 @@ static struct attribute *fence_attrs[] = {
static void fence_timeout(struct timer_list *timer)
{
struct pending_fence *fence = from_timer(fence, timer, timer);
struct pending_fence *fence = timer_container_of(fence, timer, timer);
struct super_block *sb = fence->sb;
DECLARE_FENCE_INFO(sb, fi);
+5
View File
@@ -489,4 +489,9 @@ static inline void stack_trace_print(unsigned long *entries, unsigned int nr_ent
}
#endif
#ifndef KC_TIMER_CONTAINER_OF
#define timer_container_of(var, callback_timer, timer_fieldname) \
from_timer(var, callback_timer, timer_fieldname)
#endif
#endif
+5 -1
View File
@@ -906,7 +906,11 @@ static bool try_shrink_lock(struct super_block *sb, struct lock_info *linfo, boo
READ_ONCE(linfo->nr_locks) <= opts.lock_idle_count))
return false;
spin_lock(&linfo->lock);
/* Shrinking is best-effort housekeeping unless forced. */
if (force)
spin_lock(&linfo->lock);
else if (!spin_trylock(&linfo->lock))
return false;
lock = list_first_entry_or_null(&linfo->lru_list, struct scoutfs_lock, lru_head);
if (lock && (force || (linfo->nr_locks > opts.lock_idle_count))) {
+1 -1
View File
@@ -134,7 +134,7 @@ static int recov_finished(struct recov_info *recinf)
static void timer_callback(struct timer_list *timer)
{
struct recov_info *recinf = from_timer(recinf, timer, timer);
struct recov_info *recinf = timer_container_of(recinf, timer, timer);
recinf->timeout_fn(recinf->sb);
}
+6 -6
View File
@@ -8,10 +8,10 @@
/mnt/test/test/data-prealloc/file-1: extents: 32
/mnt/test/test/data-prealloc/file-2: extents: 32
== any writes to region prealloc get full extents
/mnt/test/test/data-prealloc/file-1: extents: 4
/mnt/test/test/data-prealloc/file-2: extents: 4
/mnt/test/test/data-prealloc/file-1: extents: 4
/mnt/test/test/data-prealloc/file-2: extents: 4
/mnt/test/test/data-prealloc/file-1: extents: 8
/mnt/test/test/data-prealloc/file-2: extents: 8
/mnt/test/test/data-prealloc/file-1: extents: 8
/mnt/test/test/data-prealloc/file-2: extents: 8
== streaming offline writes get full extents either way
/mnt/test/test/data-prealloc/file-1: extents: 4
/mnt/test/test/data-prealloc/file-2: extents: 4
@@ -20,8 +20,8 @@
== goofy preallocation amounts work
/mnt/test/test/data-prealloc/file-1: extents: 6
/mnt/test/test/data-prealloc/file-2: extents: 6
/mnt/test/test/data-prealloc/file-1: extents: 6
/mnt/test/test/data-prealloc/file-2: extents: 6
/mnt/test/test/data-prealloc/file-1: extents: 10
/mnt/test/test/data-prealloc/file-2: extents: 10
/mnt/test/test/data-prealloc/file-1: extents: 3
/mnt/test/test/data-prealloc/file-2: extents: 3
== block writes into region allocs hole