Compare commits

..

3 Commits

Author SHA1 Message Date
Auke Kok
bddc170cf2 Run kmemleak during tests.
Enable kmemleak possible leak collection during each test. Suspected or
real leaks *fail* the test. Only a clean scan is passing.

This requires that the kernel is compiled with kmemleak enabled in
the config (`CONFIG_DEBUG_KMEMLEAK`) and that kmemleak isn't disabled
by default (`CONFIG_DEBUG_KMEMLEAK_DEFAULT_OFF`) at boot time, which
is the case for the default distro kernels. In that case, the easiest
is to add `kmemleak=on` to the kernel boot cmdline.

During each test, the initial kmemleak results are wiped and the auto
stack and scan workers are disabled. After each test is finished the
`scan` command is given to kmemleak and the results are collected. If
nothing is found, the kmemleak output is empty. If there is any output
from kmemleak, it will throw a dmesg error that leaks were found, and
the (suspected) leaks are dumped with stack traces of each allocation,
size, and the first 32b are dumped.

If kmemleak is present in the kernel, but (irreversably) disabled, the
test will fail to run. Same if it is entirely missing from the kernel.

Signed-off-by: Auke Kok <auke.kok@versity.com>
2025-07-15 15:14:26 -07:00
Auke Kok
aeb24433d0 POSIX ACL changes.
The .get_acl() method now gets passed a mnt_idmap arg, and we can now
choose to implement either .get_acl() or .get_inode_acl(). Technically
.get_acl() is a new implementation, and .get_inode_acl() is the old.
That second method now also gets an rcu flag passed, but we should be
fine either way.

Deeper under the covers however we do need to hook up the .set_acl()
method for inodes, otherwise setfacl will just fail with -ENOTSUPP. To
make this not super messy (it already is) we tack on the get_acl()
changes here.

This is all roughly ca. v6.1-rc1-4-g7420332a6ff4.

Signed-off-by: Auke Kok <auke.kok@versity.com>
2025-07-15 14:51:36 -07:00
Auke Kok
95be3f8889 All vfs methods now take a mnt_idmap instead of user_namespace arg.
Similar to before when namespaces were added, they are now translated to
a mnt_idmap, since v6.2-rc1-2-gabf08576afe3.

Signed-off-by: Auke Kok <auke.kok@versity.com>
2025-07-15 14:51:36 -07:00
10 changed files with 124 additions and 83 deletions

View File

@@ -287,6 +287,14 @@ ifneq (,$(shell grep 'int ..mknod. .struct user_namespace' include/linux/fs.h))
ccflags-y += -DKC_VFS_METHOD_USER_NAMESPACE_ARG
endif
#
# v6.2-rc1-2-gabf08576afe3
#
# fs: vfs methods use struct mnt_idmap instead of struct user_namespace
ifneq (,$(shell grep 'int vfs_mknod.struct mnt_idmap' include/linux/fs.h))
ccflags-y += -DKC_VFS_METHOD_MNT_IDMAP_ARG
endif
#
# v5.17-rc2-21-g07888c665b40
#
@@ -434,3 +442,12 @@ endif
ifneq (,$(shell grep 'int ..remap_pages..struct vm_area_struct' include/linux/mm.h))
ccflags-y += -DKC_MM_REMAP_PAGES
endif
#
# v6.1-rc1-4-g7420332a6ff4
#
# .get_acl() method now has dentry arg (and mnt_idmap). The old get_acl has been renamed
# to get_inode_acl() and is still available as well, but has an extra rcu param.
ifneq (,$(shell grep 'struct posix_acl ...get_acl..struct mnt_idmap ., struct dentry' include/linux/fs.h))
ccflags-y += -DKC_GET_ACL_DENTRY
endif

View File

@@ -107,8 +107,15 @@ struct posix_acl *scoutfs_get_acl_locked(struct inode *inode, int type, struct s
return acl;
}
#ifdef KC_GET_ACL_DENTRY
struct posix_acl *scoutfs_get_acl(KC_VFS_NS_DEF
struct dentry *dentry, int type)
{
struct inode *inode = dentry->d_inode;
#else
struct posix_acl *scoutfs_get_acl(struct inode *inode, int type)
{
#endif
struct super_block *sb = inode->i_sb;
struct scoutfs_lock *lock = NULL;
struct posix_acl *acl;
@@ -201,8 +208,15 @@ out:
return ret;
}
#ifdef KC_GET_ACL_DENTRY
int scoutfs_set_acl(KC_VFS_NS_DEF
struct dentry *dentry, struct posix_acl *acl, int type)
{
struct inode *inode = dentry->d_inode;
#else
int scoutfs_set_acl(struct inode *inode, struct posix_acl *acl, int type)
{
#endif
struct super_block *sb = inode->i_sb;
struct scoutfs_lock *lock = NULL;
LIST_HEAD(ind_locks);
@@ -240,7 +254,12 @@ int scoutfs_acl_get_xattr(struct dentry *dentry, const char *name, void *value,
if (!IS_POSIXACL(dentry->d_inode))
return -EOPNOTSUPP;
#ifdef KC_GET_ACL_DENTRY
acl = scoutfs_get_acl(KC_VFS_INIT_NS
dentry, type);
#else
acl = scoutfs_get_acl(dentry->d_inode, type);
#endif
if (IS_ERR(acl))
return PTR_ERR(acl);
if (acl == NULL)
@@ -286,7 +305,11 @@ int scoutfs_acl_set_xattr(struct dentry *dentry, const char *name, const void *v
}
}
#ifdef KC_GET_ACL_DENTRY
ret = scoutfs_set_acl(KC_VFS_INIT_NS dentry, acl, type);
#else
ret = scoutfs_set_acl(dentry->d_inode, acl, type);
#endif
out:
posix_acl_release(acl);

View File

@@ -1,9 +1,14 @@
#ifndef _SCOUTFS_ACL_H_
#define _SCOUTFS_ACL_H_
#ifdef KC_GET_ACL_DENTRY
struct posix_acl *scoutfs_get_acl(KC_VFS_NS_DEF struct dentry *dentry, int type);
int scoutfs_set_acl(KC_VFS_NS_DEF struct dentry *dentry, struct posix_acl *acl, int type);
#else
struct posix_acl *scoutfs_get_acl(struct inode *inode, int type);
struct posix_acl *scoutfs_get_acl_locked(struct inode *inode, int type, struct scoutfs_lock *lock);
int scoutfs_set_acl(struct inode *inode, struct posix_acl *acl, int type);
#endif
struct posix_acl *scoutfs_get_acl_locked(struct inode *inode, int type, struct scoutfs_lock *lock);
int scoutfs_set_acl_locked(struct inode *inode, struct posix_acl *acl, int type,
struct scoutfs_lock *lock, struct list_head *ind_locks);
#ifdef KC_XATTR_STRUCT_XATTR_HANDLER

View File

@@ -86,47 +86,18 @@ static u64 smallest_order_length(u64 len)
}
/*
* Moving an extent between trees can dirty blocks in several ways. This
* function calculates worst case number of blocks across these scenarions.
* We treat the alloc and free counts independently, so the values below are
* max(allocated, freed), not the sum.
*
* We track extents with two separate btree items: by block number and by size.
*
* If we're removing an extent from the btree (allocating), we can dirty
* two blocks if the keys are in different leaves. If we wind up merging
* leaves because we fall below the low water mark, we can wind up freeing
* three leaves.
*
* That sequence is as follows, assuming the original keys are removed from
* blocks A and B:
*
* Allocate new dirty A' and B'
* Free old stable A and B
* B' has fallen below the low water mark, so copy B' into A'
* Free B'
*
* An extent insertion (freeing an extent) can dirty up to five distinct items
* in the btree as it adds and removes the blkno and size sorted items for the
* old and new lengths of the extent:
*
* In the by-blkno portion of the btree, we can dirty (allocate for COW) up
* to two blocks- either by merging adjacent extents, which can cause us to
* join leaf blocks; or by an insertion that causes a split.
*
* In the by-size portion, we never merge extents, so normally we just dirty
* a single item with a size insertion. But if we merged adjacent extents in
* the by-blkno portion of the tree, we might be working with three by-sizex
* items: removing the two old ones that were combined in the merge; and
* adding the new one for the larger, merged size.
*
* Finally, dirtying the paths to these leaves can grow the tree and grow/shrink
* neighbours at each level, so we multiply by the height of the tree after
* accounting for a possible new level.
* An extent modification dirties three distinct leaves of an allocator
* btree as it adds and removes the blkno and size sorted items for the
* old and new lengths of the extent. Dirtying the paths to these
* leaves can grow the tree and grow/shrink neighbours at each level.
* We over-estimate the number of blocks allocated and freed (the paths
* share a root, growth doesn't free) to err on the simpler and safer
* side. The overhead is minimal given the relatively large list blocks
* and relatively short allocator trees.
*/
static u32 extent_mod_blocks(u32 height)
{
return ((1 + height) * 3) * 5;
return ((1 + height) * 2) * 3;
}
/*

View File

@@ -2053,6 +2053,9 @@ const struct inode_operations scoutfs_dir_iops = {
#endif
.listxattr = scoutfs_listxattr,
.get_acl = scoutfs_get_acl,
#ifdef KC_GET_ACL_DENTRY
.set_acl = scoutfs_set_acl,
#endif
.symlink = scoutfs_symlink,
.permission = scoutfs_permission,
#ifdef KC_LINUX_HAVE_RHEL_IOPS_WRAPPER

View File

@@ -150,6 +150,9 @@ static const struct inode_operations scoutfs_file_iops = {
#endif
.listxattr = scoutfs_listxattr,
.get_acl = scoutfs_get_acl,
#ifdef KC_GET_ACL_DENTRY
.set_acl = scoutfs_set_acl,
#endif
.fiemap = scoutfs_data_fiemap,
};
@@ -163,6 +166,9 @@ static const struct inode_operations scoutfs_special_iops = {
#endif
.listxattr = scoutfs_listxattr,
.get_acl = scoutfs_get_acl,
#ifdef KC_GET_ACL_DENTRY
.set_acl = scoutfs_set_acl,
#endif
};
/*

View File

@@ -263,6 +263,11 @@ typedef unsigned int blk_opf_t;
#define kc__vmalloc __vmalloc
#endif
#ifdef KC_VFS_METHOD_MNT_IDMAP_ARG
#define KC_VFS_NS_DEF struct mnt_idmap *mnt_idmap,
#define KC_VFS_NS mnt_idmap,
#define KC_VFS_INIT_NS &nop_mnt_idmap,
#else
#ifdef KC_VFS_METHOD_USER_NAMESPACE_ARG
#define KC_VFS_NS_DEF struct user_namespace *mnt_user_ns,
#define KC_VFS_NS mnt_user_ns,
@@ -272,6 +277,7 @@ typedef unsigned int blk_opf_t;
#define KC_VFS_NS
#define KC_VFS_INIT_NS
#endif
#endif /* KC_VFS_METHOD_MNT_IDMAP_ARG */
#ifdef KC_BIO_ALLOC_DEV_OPF_ARGS
#define kc_bio_alloc bio_alloc

View File

@@ -1966,17 +1966,15 @@ DEFINE_EVENT(scoutfs_server_client_count_class, scoutfs_server_client_down,
);
DECLARE_EVENT_CLASS(scoutfs_server_commit_users_class,
TP_PROTO(struct super_block *sb, int holding, int applying,
int nr_holders, u32 budget,
u32 avail_before, u32 freed_before,
int committing, int exceeded),
TP_ARGS(sb, holding, applying, nr_holders, budget, avail_before, freed_before, committing, exceeded),
TP_PROTO(struct super_block *sb, int holding, int applying, int nr_holders,
u32 avail_before, u32 freed_before, int committing, int exceeded),
TP_ARGS(sb, holding, applying, nr_holders, avail_before, freed_before, committing,
exceeded),
TP_STRUCT__entry(
SCSB_TRACE_FIELDS
__field(int, holding)
__field(int, applying)
__field(int, nr_holders)
__field(u32, budget)
__field(__u32, avail_before)
__field(__u32, freed_before)
__field(int, committing)
@@ -1987,45 +1985,35 @@ DECLARE_EVENT_CLASS(scoutfs_server_commit_users_class,
__entry->holding = !!holding;
__entry->applying = !!applying;
__entry->nr_holders = nr_holders;
__entry->budget = budget;
__entry->avail_before = avail_before;
__entry->freed_before = freed_before;
__entry->committing = !!committing;
__entry->exceeded = !!exceeded;
),
TP_printk(SCSBF" holding %u applying %u nr %u budget %u avail_before %u freed_before %u committing %u exceeded %u",
SCSB_TRACE_ARGS, __entry->holding, __entry->applying,
__entry->nr_holders, __entry->budget,
__entry->avail_before, __entry->freed_before,
__entry->committing, __entry->exceeded)
TP_printk(SCSBF" holding %u applying %u nr %u avail_before %u freed_before %u committing %u exceeded %u",
SCSB_TRACE_ARGS, __entry->holding, __entry->applying, __entry->nr_holders,
__entry->avail_before, __entry->freed_before, __entry->committing,
__entry->exceeded)
);
DEFINE_EVENT(scoutfs_server_commit_users_class, scoutfs_server_commit_hold,
TP_PROTO(struct super_block *sb, int holding, int applying,
int nr_holders, u32 budget,
u32 avail_before, u32 freed_before,
int committing, int exceeded),
TP_ARGS(sb, holding, applying, nr_holders, budget, avail_before, freed_before, committing, exceeded)
TP_PROTO(struct super_block *sb, int holding, int applying, int nr_holders,
u32 avail_before, u32 freed_before, int committing, int exceeded),
TP_ARGS(sb, holding, applying, nr_holders, avail_before, freed_before, committing, exceeded)
);
DEFINE_EVENT(scoutfs_server_commit_users_class, scoutfs_server_commit_apply,
TP_PROTO(struct super_block *sb, int holding, int applying,
int nr_holders, u32 budget,
u32 avail_before, u32 freed_before,
int committing, int exceeded),
TP_ARGS(sb, holding, applying, nr_holders, budget, avail_before, freed_before, committing, exceeded)
TP_PROTO(struct super_block *sb, int holding, int applying, int nr_holders,
u32 avail_before, u32 freed_before, int committing, int exceeded),
TP_ARGS(sb, holding, applying, nr_holders, avail_before, freed_before, committing, exceeded)
);
DEFINE_EVENT(scoutfs_server_commit_users_class, scoutfs_server_commit_start,
TP_PROTO(struct super_block *sb, int holding, int applying,
int nr_holders, u32 budget,
u32 avail_before, u32 freed_before,
int committing, int exceeded),
TP_ARGS(sb, holding, applying, nr_holders, budget, avail_before, freed_before, committing, exceeded)
TP_PROTO(struct super_block *sb, int holding, int applying, int nr_holders,
u32 avail_before, u32 freed_before, int committing, int exceeded),
TP_ARGS(sb, holding, applying, nr_holders, avail_before, freed_before, committing, exceeded)
);
DEFINE_EVENT(scoutfs_server_commit_users_class, scoutfs_server_commit_end,
TP_PROTO(struct super_block *sb, int holding, int applying,
int nr_holders, u32 budget,
u32 avail_before, u32 freed_before,
int committing, int exceeded),
TP_ARGS(sb, holding, applying, nr_holders, budget, avail_before, freed_before, committing, exceeded)
TP_PROTO(struct super_block *sb, int holding, int applying, int nr_holders,
u32 avail_before, u32 freed_before, int committing, int exceeded),
TP_ARGS(sb, holding, applying, nr_holders, avail_before, freed_before, committing, exceeded)
);
#define slt_symbolic(mode) \

View File

@@ -65,7 +65,6 @@ struct commit_users {
struct list_head holding;
struct list_head applying;
unsigned int nr_holders;
u32 budget;
u32 avail_before;
u32 freed_before;
bool committing;
@@ -85,9 +84,8 @@ static void init_commit_users(struct commit_users *cusers)
do { \
__typeof__(cusers) _cusers = (cusers); \
trace_scoutfs_server_commit_##which(sb, !list_empty(&_cusers->holding), \
!list_empty(&_cusers->applying), _cusers->nr_holders, _cusers->budget, \
_cusers->avail_before, _cusers->freed_before, _cusers->committing, \
_cusers->exceeded); \
!list_empty(&_cusers->applying), _cusers->nr_holders, _cusers->avail_before, \
_cusers->freed_before, _cusers->committing, _cusers->exceeded); \
} while (0)
struct server_info {
@@ -305,6 +303,7 @@ static void check_holder_budget(struct super_block *sb, struct server_info *serv
u32 freed_used;
u32 avail_now;
u32 freed_now;
u32 budget;
assert_spin_locked(&cusers->lock);
@@ -319,14 +318,15 @@ static void check_holder_budget(struct super_block *sb, struct server_info *serv
else
freed_used = SCOUTFS_ALLOC_LIST_MAX_BLOCKS - freed_now;
if (avail_used <= cusers->budget && freed_used <= cusers->budget)
budget = cusers->nr_holders * COMMIT_HOLD_ALLOC_BUDGET;
if (avail_used <= budget && freed_used <= budget)
return;
exceeded_once = true;
cusers->exceeded = cusers->nr_holders;
scoutfs_err(sb, "holders exceeded alloc budget %u av: bef %u now %u, fr: bef %u now %u",
cusers->budget, cusers->avail_before, avail_now,
scoutfs_err(sb, "%u holders exceeded alloc budget av: bef %u now %u, fr: bef %u now %u",
cusers->nr_holders, cusers->avail_before, avail_now,
cusers->freed_before, freed_now);
list_for_each_entry(hold, &cusers->holding, entry) {
@@ -349,7 +349,7 @@ static bool hold_commit(struct super_block *sb, struct server_info *server,
{
bool has_room;
bool held;
u32 new_budget;
u32 budget;
u32 av;
u32 fr;
@@ -367,8 +367,8 @@ static bool hold_commit(struct super_block *sb, struct server_info *server,
}
/* +2 for our additional hold and then for the final commit work the server does */
new_budget = max(cusers->budget, (cusers->nr_holders + 2) * COMMIT_HOLD_ALLOC_BUDGET);
has_room = av >= new_budget && fr >= new_budget;
budget = (cusers->nr_holders + 2) * COMMIT_HOLD_ALLOC_BUDGET;
has_room = av >= budget && fr >= budget;
/* checking applying so holders drain once an apply caller starts waiting */
held = !cusers->committing && has_room && list_empty(&cusers->applying);
@@ -388,7 +388,6 @@ static bool hold_commit(struct super_block *sb, struct server_info *server,
list_add_tail(&hold->entry, &cusers->holding);
cusers->nr_holders++;
cusers->budget = new_budget;
} else if (!has_room && cusers->nr_holders == 0 && !cusers->committing) {
cusers->committing = true;
@@ -517,7 +516,6 @@ static void commit_end(struct super_block *sb, struct commit_users *cusers, int
list_for_each_entry_safe(hold, tmp, &cusers->applying, entry)
list_del_init(&hold->entry);
cusers->committing = false;
cusers->budget = 0;
spin_unlock(&cusers->lock);
wake_up(&cusers->waitq);

View File

@@ -60,6 +60,8 @@ $(basename $0) options:
| the file system to be tested. Will be clobbered by -m mkfs.
-m | Run mkfs on the device before mounting and running
| tests. Implies unmounting existing mounts first.
-l | Enable kmemleak scan during each test. Requires "kmemleak=on" in
| kernel cmdline boot args.
-n <nr> | The number of devices and mounts to test.
-o <opts> | Add option string to all mounts during all tests.
-P | Enable trace_printk.
@@ -129,6 +131,12 @@ while true; do
-i)
T_INSMOD="1"
;;
-l)
echo "stack=off" > /sys/kernel/debug/kmemleak &&
echo "scan=off" > /sys/kernel/debug/kmemleak ||
die "kmemleak disabled or missing"
T_KMEMLEAK="1"
;;
-M)
test -n "$2" || die "-z must have meta device file argument"
T_META_DEVICE="$2"
@@ -569,6 +577,11 @@ for t in $tests; do
# mark in dmesg as to what test we are running
echo "run scoutfs test $test_name" > /dev/kmsg
# clean kmemleak scan
if [ -n "$T_KMEMLEAK" ]; then
echo "clear" > /sys/kernel/debug/kmemleak
fi
# record dmesg before
dmesg | t_filter_dmesg > "$T_TMPDIR/dmesg.before"
@@ -616,6 +629,17 @@ for t in $tests; do
fi
fi
# record kmemleak scan
if [ -n "$T_KMEMLEAK" ]; then
echo scan > /sys/kernel/debug/kmemleak
cp /sys/kernel/debug/kmemleak "$T_TMPDIR/kmemleak.scan"
if [ -s "$T_TMPDIR/kmemleak.scan" ]; then
message="kmemleak detected memory leak"
sts=$T_FAIL_STATUS
cat "$T_TMPDIR/kmemleak.scan" >> "$T_RESULTS/fail.log"
fi
fi
# record unknown exit status
if [ "$sts" -lt "$T_FIRST_STATUS" -o "$sts" -gt "$T_LAST_STATUS" ]; then
message="unknown status: $sts"