Run kmemleak during tests.

Enable kmemleak possible leak collection during each test. Suspected or real leaks *fail* the test. Only a clean scan is passing. This requires that the kernel is compiled with kmemleak enabled in the config (`CONFIG_DEBUG_KMEMLEAK`) and that kmemleak isn't disabled by default (`CONFIG_DEBUG_KMEMLEAK_DEFAULT_OFF`) at boot time, which is the case for the default distro kernels. In that case, the easiest is to add `kmemleak=on` to the kernel boot cmdline. During each test, the initial kmemleak results are wiped and the auto stack and scan workers are disabled. After each test is finished the `scan` command is given to kmemleak and the results are collected. If nothing is found, the kmemleak output is empty. If there is any output from kmemleak, it will throw a dmesg error that leaks were found, and the (suspected) leaks are dumped with stack traces of each allocation, size, and the first 32b are dumped. If kmemleak is present in the kernel, but (irreversably) disabled, the test will fail to run. Same if it is entirely missing from the kernel. Signed-off-by: Auke Kok <auke.kok@versity.com>
POSIX ACL changes.
2026-05-03 19:35:43 +00:00 · 2025-07-15 15:14:26 -07:00 · 2025-07-15 14:51:36 -07:00 · 2025-07-15 14:51:36 -07:00
10 changed files with 124 additions and 83 deletions
--- a/kmod/src/Makefile.kernelcompat
+++ b/kmod/src/Makefile.kernelcompat
@@ -287,6 +287,14 @@ ifneq (,$(shell grep 'int ..mknod. .struct user_namespace' include/linux/fs.h))
 ccflags-y += -DKC_VFS_METHOD_USER_NAMESPACE_ARG
 endif

+#
+# v6.2-rc1-2-gabf08576afe3
+#
+# fs: vfs methods use struct mnt_idmap instead of struct user_namespace
+ifneq (,$(shell grep 'int vfs_mknod.struct mnt_idmap' include/linux/fs.h))
+ccflags-y += -DKC_VFS_METHOD_MNT_IDMAP_ARG
+endif
+
 #
 # v5.17-rc2-21-g07888c665b40
 #
@@ -434,3 +442,12 @@ endif
 ifneq (,$(shell grep 'int ..remap_pages..struct vm_area_struct' include/linux/mm.h))
 ccflags-y += -DKC_MM_REMAP_PAGES
 endif
+
+#
+# v6.1-rc1-4-g7420332a6ff4
+#
+# .get_acl() method now has dentry arg (and mnt_idmap). The old get_acl has been renamed
+# to get_inode_acl() and is still available as well, but has an extra rcu param.
+ifneq (,$(shell grep 'struct posix_acl ...get_acl..struct mnt_idmap ., struct dentry' include/linux/fs.h))
+ccflags-y += -DKC_GET_ACL_DENTRY
+endif
--- a/kmod/src/acl.c
+++ b/kmod/src/acl.c
@@ -107,8 +107,15 @@ struct posix_acl *scoutfs_get_acl_locked(struct inode *inode, int type, struct s
 	return acl;
 }

+#ifdef KC_GET_ACL_DENTRY
+struct posix_acl *scoutfs_get_acl(KC_VFS_NS_DEF
+				  struct dentry *dentry, int type)
+{
+	struct inode *inode = dentry->d_inode;
+#else
 struct posix_acl *scoutfs_get_acl(struct inode *inode, int type)
 {
+#endif
 	struct super_block *sb = inode->i_sb;
 	struct scoutfs_lock *lock = NULL;
 	struct posix_acl *acl;
@@ -201,8 +208,15 @@ out:
 	return ret;
 }

+#ifdef KC_GET_ACL_DENTRY
+int scoutfs_set_acl(KC_VFS_NS_DEF
+		    struct dentry *dentry, struct posix_acl *acl, int type)
+{
+	struct inode *inode = dentry->d_inode;
+#else
 int scoutfs_set_acl(struct inode *inode, struct posix_acl *acl, int type)
 {
+#endif
 	struct super_block *sb = inode->i_sb;
 	struct scoutfs_lock *lock = NULL;
 	LIST_HEAD(ind_locks);
@@ -240,7 +254,12 @@ int scoutfs_acl_get_xattr(struct dentry *dentry, const char *name, void *value,
 	if (!IS_POSIXACL(dentry->d_inode))
 		return -EOPNOTSUPP;

+#ifdef KC_GET_ACL_DENTRY
+	acl = scoutfs_get_acl(KC_VFS_INIT_NS
+			      dentry, type);
+#else
 	acl = scoutfs_get_acl(dentry->d_inode, type);
+#endif
 	if (IS_ERR(acl))
 		return PTR_ERR(acl);
 	if (acl == NULL)
@@ -286,7 +305,11 @@ int scoutfs_acl_set_xattr(struct dentry *dentry, const char *name, const void *v
 		}
 	}

+#ifdef KC_GET_ACL_DENTRY
+	ret = scoutfs_set_acl(KC_VFS_INIT_NS dentry, acl, type);
+#else
 	ret = scoutfs_set_acl(dentry->d_inode, acl, type);
+#endif
 out:
 	posix_acl_release(acl);

--- a/kmod/src/acl.h
+++ b/kmod/src/acl.h
@@ -1,9 +1,14 @@
 #ifndef _SCOUTFS_ACL_H_
 #define _SCOUTFS_ACL_H_

+#ifdef KC_GET_ACL_DENTRY
+struct posix_acl *scoutfs_get_acl(KC_VFS_NS_DEF struct dentry *dentry, int type);
+int scoutfs_set_acl(KC_VFS_NS_DEF struct dentry *dentry, struct posix_acl *acl, int type);
+#else
 struct posix_acl *scoutfs_get_acl(struct inode *inode, int type);
-struct posix_acl *scoutfs_get_acl_locked(struct inode *inode, int type, struct scoutfs_lock *lock);
 int scoutfs_set_acl(struct inode *inode, struct posix_acl *acl, int type);
+#endif
+struct posix_acl *scoutfs_get_acl_locked(struct inode *inode, int type, struct scoutfs_lock *lock);
 int scoutfs_set_acl_locked(struct inode *inode, struct posix_acl *acl, int type,
 			   struct scoutfs_lock *lock, struct list_head *ind_locks);
 #ifdef KC_XATTR_STRUCT_XATTR_HANDLER
--- a/kmod/src/alloc.c
+++ b/kmod/src/alloc.c
@@ -86,47 +86,18 @@ static u64 smallest_order_length(u64 len)
 }

 /*
- * Moving an extent between trees can dirty blocks in several ways. This
- * function calculates worst case number of blocks across these scenarions.
- * We treat the alloc and free counts independently, so the values below are
- * max(allocated, freed), not the sum.
- *
- * We track extents with two separate btree items: by block number and by size.
- *
- * If we're removing an extent from the btree (allocating), we can dirty
- * two blocks if the keys are in different leaves. If we wind up merging
- * leaves because we fall below the low water mark, we can wind up freeing
- * three leaves.
- *
- * That sequence is as follows, assuming the original keys are removed from
- * blocks A and B:
- *
- * Allocate new dirty A' and B'
- * Free old stable A and B
- * B' has fallen below the low water mark, so copy B' into A'
- * Free B'
- *
- * An extent insertion (freeing an extent) can dirty up to five distinct items
- * in the btree as it adds and removes the blkno and size sorted items for the
- * old and new lengths of the extent:
- *
- * In the by-blkno portion of the btree, we can dirty (allocate for COW) up
- * to two blocks- either by merging adjacent extents, which can cause us to
- * join leaf blocks; or by an insertion that causes a split.
- *
- * In the by-size portion, we never merge extents, so normally we just dirty
- * a single item with a size insertion. But if we merged adjacent extents in
- * the by-blkno portion of the tree, we might be working with three by-sizex
- * items: removing the two old ones that were combined in the merge; and
- * adding the new one for the larger, merged size.
- *
- * Finally, dirtying the paths to these leaves can grow the tree and grow/shrink
- * neighbours at each level, so we multiply by the height of the tree after
- * accounting for a possible new level.
+ * An extent modification dirties three distinct leaves of an allocator
+ * btree as it adds and removes the blkno and size sorted items for the
+ * old and new lengths of the extent.  Dirtying the paths to these
+ * leaves can grow the tree and grow/shrink neighbours at each level.
+ * We over-estimate the number of blocks allocated and freed (the paths
+ * share a root, growth doesn't free) to err on the simpler and safer
+ * side.  The overhead is minimal given the relatively large list blocks
+ * and relatively short allocator trees.
 */
 static u32 extent_mod_blocks(u32 height)
 {
-	return ((1 + height) * 3) * 5;
+	return ((1 + height) * 2) * 3;
 }

 /*
--- a/kmod/src/dir.c
+++ b/kmod/src/dir.c
@@ -2053,6 +2053,9 @@ const struct inode_operations scoutfs_dir_iops = {
 #endif
 	.listxattr	= scoutfs_listxattr,
 	.get_acl	= scoutfs_get_acl,
+#ifdef KC_GET_ACL_DENTRY
+	.set_acl	= scoutfs_set_acl,
+#endif
 	.symlink	= scoutfs_symlink,
 	.permission	= scoutfs_permission,
 #ifdef KC_LINUX_HAVE_RHEL_IOPS_WRAPPER
--- a/kmod/src/inode.c
+++ b/kmod/src/inode.c
@@ -150,6 +150,9 @@ static const struct inode_operations scoutfs_file_iops = {
 #endif
 	.listxattr	= scoutfs_listxattr,
 	.get_acl	= scoutfs_get_acl,
+#ifdef KC_GET_ACL_DENTRY
+	.set_acl	= scoutfs_set_acl,
+#endif
 	.fiemap		= scoutfs_data_fiemap,
 };

@@ -163,6 +166,9 @@ static const struct inode_operations scoutfs_special_iops = {
 #endif
 	.listxattr	= scoutfs_listxattr,
 	.get_acl	= scoutfs_get_acl,
+#ifdef KC_GET_ACL_DENTRY
+	.set_acl	= scoutfs_set_acl,
+#endif
 };

 /*
--- a/kmod/src/kernelcompat.h
+++ b/kmod/src/kernelcompat.h
@@ -263,6 +263,11 @@ typedef unsigned int blk_opf_t;
 #define kc__vmalloc __vmalloc
 #endif

+#ifdef KC_VFS_METHOD_MNT_IDMAP_ARG
+#define KC_VFS_NS_DEF struct mnt_idmap *mnt_idmap,
+#define KC_VFS_NS mnt_idmap,
+#define KC_VFS_INIT_NS &nop_mnt_idmap,
+#else
 #ifdef KC_VFS_METHOD_USER_NAMESPACE_ARG
 #define KC_VFS_NS_DEF struct user_namespace *mnt_user_ns,
 #define KC_VFS_NS mnt_user_ns,
@@ -272,6 +277,7 @@ typedef unsigned int blk_opf_t;
 #define KC_VFS_NS
 #define KC_VFS_INIT_NS
 #endif
+#endif /* KC_VFS_METHOD_MNT_IDMAP_ARG */

 #ifdef KC_BIO_ALLOC_DEV_OPF_ARGS
 #define kc_bio_alloc bio_alloc
--- a/kmod/src/scoutfs_trace.h
+++ b/kmod/src/scoutfs_trace.h
@@ -1966,17 +1966,15 @@ DEFINE_EVENT(scoutfs_server_client_count_class, scoutfs_server_client_down,
 );

 DECLARE_EVENT_CLASS(scoutfs_server_commit_users_class,
-        TP_PROTO(struct super_block *sb, int holding, int applying,
-		 int nr_holders, u32 budget,
-		 u32 avail_before, u32 freed_before,
-		 int committing, int exceeded),
-        TP_ARGS(sb, holding, applying, nr_holders, budget, avail_before, freed_before, committing, exceeded),
+        TP_PROTO(struct super_block *sb, int holding, int applying, int nr_holders,
+		 u32 avail_before, u32 freed_before, int committing, int exceeded),
+        TP_ARGS(sb, holding, applying, nr_holders, avail_before, freed_before, committing,
+		exceeded),
        TP_STRUCT__entry(
 		SCSB_TRACE_FIELDS
 		__field(int, holding)
 		__field(int, applying)
 		__field(int, nr_holders)
-		__field(u32, budget)
 		__field(__u32, avail_before)
 		__field(__u32, freed_before)
 		__field(int, committing)
@@ -1987,45 +1985,35 @@ DECLARE_EVENT_CLASS(scoutfs_server_commit_users_class,
 		__entry->holding = !!holding;
 		__entry->applying = !!applying;
 		__entry->nr_holders = nr_holders;
-		__entry->budget = budget;
 		__entry->avail_before = avail_before;
 		__entry->freed_before = freed_before;
 		__entry->committing = !!committing;
 		__entry->exceeded = !!exceeded;
        ),
-	TP_printk(SCSBF" holding %u applying %u nr %u budget %u avail_before %u freed_before %u committing %u exceeded %u",
-		  SCSB_TRACE_ARGS, __entry->holding, __entry->applying,
-		  __entry->nr_holders, __entry->budget,
-		  __entry->avail_before, __entry->freed_before,
-		  __entry->committing, __entry->exceeded)
+	TP_printk(SCSBF" holding %u applying %u nr %u avail_before %u freed_before %u committing %u exceeded %u",
+		  SCSB_TRACE_ARGS, __entry->holding, __entry->applying, __entry->nr_holders,
+		  __entry->avail_before, __entry->freed_before, __entry->committing,
+		  __entry->exceeded)
 );
 DEFINE_EVENT(scoutfs_server_commit_users_class, scoutfs_server_commit_hold,
-        TP_PROTO(struct super_block *sb, int holding, int applying,
-		 int nr_holders, u32 budget,
-		 u32 avail_before, u32 freed_before,
-		 int committing, int exceeded),
-        TP_ARGS(sb, holding, applying, nr_holders, budget, avail_before, freed_before, committing, exceeded)
+        TP_PROTO(struct super_block *sb, int holding, int applying, int nr_holders,
+		 u32 avail_before, u32 freed_before, int committing, int exceeded),
+        TP_ARGS(sb, holding, applying, nr_holders, avail_before, freed_before, committing, exceeded)
 );
 DEFINE_EVENT(scoutfs_server_commit_users_class, scoutfs_server_commit_apply,
-        TP_PROTO(struct super_block *sb, int holding, int applying,
-		 int nr_holders, u32 budget,
-		 u32 avail_before, u32 freed_before,
-		 int committing, int exceeded),
-        TP_ARGS(sb, holding, applying, nr_holders, budget, avail_before, freed_before, committing, exceeded)
+        TP_PROTO(struct super_block *sb, int holding, int applying, int nr_holders,
+		 u32 avail_before, u32 freed_before, int committing, int exceeded),
+        TP_ARGS(sb, holding, applying, nr_holders, avail_before, freed_before, committing, exceeded)
 );
 DEFINE_EVENT(scoutfs_server_commit_users_class, scoutfs_server_commit_start,
-        TP_PROTO(struct super_block *sb, int holding, int applying,
-		 int nr_holders, u32 budget,
-		 u32 avail_before, u32 freed_before,
-		 int committing, int exceeded),
-        TP_ARGS(sb, holding, applying, nr_holders, budget, avail_before, freed_before, committing, exceeded)
+        TP_PROTO(struct super_block *sb, int holding, int applying, int nr_holders,
+		 u32 avail_before, u32 freed_before, int committing, int exceeded),
+        TP_ARGS(sb, holding, applying, nr_holders, avail_before, freed_before, committing, exceeded)
 );
 DEFINE_EVENT(scoutfs_server_commit_users_class, scoutfs_server_commit_end,
-        TP_PROTO(struct super_block *sb, int holding, int applying,
-		 int nr_holders, u32 budget,
-		 u32 avail_before, u32 freed_before,
-		 int committing, int exceeded),
-        TP_ARGS(sb, holding, applying, nr_holders, budget, avail_before, freed_before, committing, exceeded)
+        TP_PROTO(struct super_block *sb, int holding, int applying, int nr_holders,
+		 u32 avail_before, u32 freed_before, int committing, int exceeded),
+        TP_ARGS(sb, holding, applying, nr_holders, avail_before, freed_before, committing, exceeded)
 );

 #define slt_symbolic(mode)						\
--- a/kmod/src/server.c
+++ b/kmod/src/server.c
@@ -65,7 +65,6 @@ struct commit_users {
 	struct list_head holding;
 	struct list_head applying;
 	unsigned int nr_holders;
-	u32 budget;
 	u32 avail_before;
 	u32 freed_before;
 	bool committing;
@@ -85,9 +84,8 @@ static void init_commit_users(struct commit_users *cusers)
 do {												\
 	__typeof__(cusers) _cusers = (cusers);							\
 	trace_scoutfs_server_commit_##which(sb, !list_empty(&_cusers->holding),			\
-		!list_empty(&_cusers->applying), _cusers->nr_holders, _cusers->budget,		\
-		_cusers->avail_before, _cusers->freed_before, _cusers->committing,		\
-		_cusers->exceeded);								\
+		!list_empty(&_cusers->applying), _cusers->nr_holders, _cusers->avail_before,	\
+		_cusers->freed_before, _cusers->committing, _cusers->exceeded);			\
 } while (0)

 struct server_info {
@@ -305,6 +303,7 @@ static void check_holder_budget(struct super_block *sb, struct server_info *serv
 	u32 freed_used;
 	u32 avail_now;
 	u32 freed_now;
+	u32 budget;

 	assert_spin_locked(&cusers->lock);

@@ -319,14 +318,15 @@ static void check_holder_budget(struct super_block *sb, struct server_info *serv
 	else
 		freed_used = SCOUTFS_ALLOC_LIST_MAX_BLOCKS - freed_now;

-	if (avail_used <= cusers->budget && freed_used <= cusers->budget)
+	budget = cusers->nr_holders * COMMIT_HOLD_ALLOC_BUDGET;
+	if (avail_used <= budget && freed_used <= budget)
 		return;

 	exceeded_once = true;
 	cusers->exceeded = cusers->nr_holders;

-	scoutfs_err(sb, "holders exceeded alloc budget %u av: bef %u now %u, fr: bef %u now %u",
-		    cusers->budget, cusers->avail_before, avail_now,
+	scoutfs_err(sb, "%u holders exceeded alloc budget av: bef %u now %u, fr: bef %u now %u",
+		    cusers->nr_holders, cusers->avail_before, avail_now,
 		    cusers->freed_before, freed_now);

 	list_for_each_entry(hold, &cusers->holding, entry) {
@@ -349,7 +349,7 @@ static bool hold_commit(struct super_block *sb, struct server_info *server,
 {
 	bool has_room;
 	bool held;
-	u32 new_budget;
+	u32 budget;
 	u32 av;
 	u32 fr;

@@ -367,8 +367,8 @@ static bool hold_commit(struct super_block *sb, struct server_info *server,
 	}

 	/* +2 for our additional hold and then for the final commit work the server does */
-	new_budget = max(cusers->budget, (cusers->nr_holders + 2) * COMMIT_HOLD_ALLOC_BUDGET);
-	has_room = av >= new_budget && fr >= new_budget;
+	budget = (cusers->nr_holders + 2) * COMMIT_HOLD_ALLOC_BUDGET;
+	has_room = av >= budget && fr >= budget;
 	/* checking applying so holders drain once an apply caller starts waiting */
 	held = !cusers->committing && has_room && list_empty(&cusers->applying);

@@ -388,7 +388,6 @@ static bool hold_commit(struct super_block *sb, struct server_info *server,
 		list_add_tail(&hold->entry, &cusers->holding);

 		cusers->nr_holders++;
-		cusers->budget = new_budget;

 	} else if (!has_room && cusers->nr_holders == 0 && !cusers->committing) {
 		cusers->committing = true;
@@ -517,7 +516,6 @@ static void commit_end(struct super_block *sb, struct commit_users *cusers, int
 	list_for_each_entry_safe(hold, tmp, &cusers->applying, entry)
 		list_del_init(&hold->entry);
 	cusers->committing = false;
-	cusers->budget = 0;
 	spin_unlock(&cusers->lock);

 	wake_up(&cusers->waitq);
--- a/tests/run-tests.sh
+++ b/tests/run-tests.sh
@@ -60,6 +60,8 @@ $(basename $0) options:
              | the file system to be tested.  Will be clobbered by -m mkfs.
    -m        | Run mkfs on the device before mounting and running
              | tests.  Implies unmounting existing mounts first.
+    -l        | Enable kmemleak scan during each test. Requires "kmemleak=on" in
+              | kernel cmdline boot args.
    -n <nr>   | The number of devices and mounts to test.
    -o <opts> | Add option string to all mounts during all tests.
    -P        | Enable trace_printk.
@@ -129,6 +131,12 @@ while true; do
 	-i)
 		T_INSMOD="1"
 		;;
+	-l)
+		echo "stack=off" > /sys/kernel/debug/kmemleak &&
+		echo "scan=off" > /sys/kernel/debug/kmemleak ||
+		die "kmemleak disabled or missing"
+		T_KMEMLEAK="1"
+		;;
 	-M)
 	        test -n "$2" || die "-z must have meta device file argument"
 	        T_META_DEVICE="$2"
@@ -569,6 +577,11 @@ for t in $tests; do
 	# mark in dmesg as to what test we are running
 	echo "run scoutfs test $test_name" > /dev/kmsg

+	# clean kmemleak scan
+	if [ -n "$T_KMEMLEAK" ]; then
+		echo "clear" > /sys/kernel/debug/kmemleak
+	fi
+
 	# record dmesg before
 	dmesg | t_filter_dmesg > "$T_TMPDIR/dmesg.before"

@@ -616,6 +629,17 @@ for t in $tests; do
 		fi
 	fi

+	# record kmemleak scan
+	if [ -n "$T_KMEMLEAK" ]; then
+		echo scan > /sys/kernel/debug/kmemleak
+		cp /sys/kernel/debug/kmemleak "$T_TMPDIR/kmemleak.scan"
+		if [ -s "$T_TMPDIR/kmemleak.scan" ]; then
+			message="kmemleak detected memory leak"
+			sts=$T_FAIL_STATUS
+			cat "$T_TMPDIR/kmemleak.scan" >> "$T_RESULTS/fail.log"
+		fi
+	fi
+
 	# record unknown exit status
 	if [ "$sts" -lt "$T_FIRST_STATUS" -o "$sts" -gt "$T_LAST_STATUS" ]; then
 		message="unknown status: $sts"