Compare commits

..

1 Commits

Author SHA1 Message Date
Auke Kok f604bb4b77 Show last holder PID, inode in client_locks
Add last_user_pid[mode] and last_user_ino[mode] arrays to scoutfs_lock,
filled in at the granted-mode path alongside the existing counts.
The inode is passed by callers for all per-inode cases, and set to 0
for others. PID is from the current task.

The client_locks line is expanded with "ino: rd I wr I wo I pid: rd P
wr P wo P".  Existing users:/waiters: field positions are unchanged.

A simple test case demonstrates the functionality for the two simple
inode/non-inode case, and for a contended lock case (multiple rd/wr
lock holders).

Signed-off-by: Auke Kok <auke.kok@versity.com>
2026-05-08 15:08:23 -07:00
19 changed files with 110 additions and 199 deletions
-14
View File
@@ -1,20 +1,6 @@
Versity ScoutFS Release Notes
=============================
---
v1.32
\
*June 2, 2026*
Fix writing POSIX ACLs over NFS mounts that export the scoutfs
filesystem.
Add support for kernels in the RHEL 9.8 minor release.
Reduce unneeded block allocation when data\_prealloc\_contig\_only was
set to 0. This will help achieve more efficient data space usage when
writing small files.
---
v1.31
\
-19
View File
@@ -479,16 +479,6 @@ ifneq (,$(shell grep '^unsigned int stack_trace_save' include/linux/stacktrace.h
ccflags-y += -DKC_STACK_TRACE_SAVE
endif
#
# v3.14-rc1-7-g4e34e719e457
#
# .set_acl callback added to struct inode_operations. Most kernels
# we target have it, but el7 (3.10 base) does not, so detect.
#
ifneq (,$(shell grep 'int ..set_acl..struct' include/linux/fs.h))
ccflags-y += -DKC_HAS_SET_ACL
endif
#
# v6.1-rc1-2-g138060ba92b3
#
@@ -506,12 +496,3 @@ endif
ifneq (,$(shell grep 'struct posix_acl.*get_inode_acl' include/linux/fs.h))
ccflags-y += -DKC_GET_INODE_ACL
endif
#
# v6.15-13744-g41cb08555c41
#
# from_timer renamed to timer_container_of.
#
ifneq (,$(shell grep 'define timer_container_of' include/linux/timer.h))
ccflags-y += -DKC_TIMER_CONTAINER_OF
endif
+2 -3
View File
@@ -216,8 +216,7 @@ int scoutfs_set_acl(KC_VFS_NS_DEF
{
struct inode *inode = dentry->d_inode;
#else
int scoutfs_set_acl(KC_VFS_NS_DEF
struct inode *inode, struct posix_acl *acl, int type)
int scoutfs_set_acl(struct inode *inode, struct posix_acl *acl, int type)
{
#endif
struct super_block *sb = inode->i_sb;
@@ -310,7 +309,7 @@ int scoutfs_acl_set_xattr(struct dentry *dentry, const char *name, const void *v
#ifdef KC_SET_ACL_DENTRY
ret = scoutfs_set_acl(KC_VFS_INIT_NS dentry, acl, type);
#else
ret = scoutfs_set_acl(KC_VFS_INIT_NS dentry->d_inode, acl, type);
ret = scoutfs_set_acl(dentry->d_inode, acl, type);
#endif
out:
posix_acl_release(acl);
+1 -2
View File
@@ -5,8 +5,7 @@
int scoutfs_set_acl(KC_VFS_NS_DEF
struct dentry *dentry, struct posix_acl *acl, int type);
#else
int scoutfs_set_acl(KC_VFS_NS_DEF
struct inode *inode, struct posix_acl *acl, int type);
int scoutfs_set_acl(struct inode *inode, struct posix_acl *acl, int type);
#endif
#ifdef KC_GET_INODE_ACL
struct posix_acl *scoutfs_get_acl(struct inode *inode, int type, bool rcu);
+1 -9
View File
@@ -422,8 +422,6 @@ static int alloc_block(struct super_block *sb, struct inode *inode,
mutex_lock(&datinf->mutex);
scoutfs_inode_get_onoff(inode, &online, &offline);
/* default to single allocation at the written block */
start = iblock;
count = 1;
@@ -446,6 +444,7 @@ static int alloc_block(struct super_block *sb, struct inode *inode,
* the preallocation size to the number of online
* blocks.
*/
scoutfs_inode_get_onoff(inode, &online, &offline);
if (iblock > 1 && iblock == online) {
ret = scoutfs_ext_next(sb, &data_ext_ops, &args,
iblock, 1, &found);
@@ -487,13 +486,6 @@ static int alloc_block(struct super_block *sb, struct inode *inode,
/* trim count by next extent after iblock */
if (found.len && found.start > start && found.start < start + count)
count = (found.start - start);
/*
* Ramp the aligned region size up proportionally with
* the file's online block count rather than jumping to
* the full prealloc size.
*/
count = max_t(u64, 1, min(count, online));
}
/* overall prealloc limit */
+1 -1
View File
@@ -2063,7 +2063,7 @@ const struct inode_operations scoutfs_dir_iops = {
#else
.get_acl = scoutfs_get_acl,
#endif
#ifdef KC_HAS_SET_ACL
#ifdef KC_SET_ACL_DENTRY
.set_acl = scoutfs_set_acl,
#endif
.symlink = scoutfs_symlink,
+1 -1
View File
@@ -222,7 +222,7 @@ static struct attribute *fence_attrs[] = {
static void fence_timeout(struct timer_list *timer)
{
struct pending_fence *fence = timer_container_of(fence, timer, timer);
struct pending_fence *fence = from_timer(fence, timer, timer);
struct super_block *sb = fence->sb;
DECLARE_FENCE_INFO(sb, fi);
+2 -2
View File
@@ -154,7 +154,7 @@ static const struct inode_operations scoutfs_file_iops = {
#else
.get_acl = scoutfs_get_acl,
#endif
#ifdef KC_HAS_SET_ACL
#ifdef KC_SET_ACL_DENTRY
.set_acl = scoutfs_set_acl,
#endif
.fiemap = scoutfs_data_fiemap,
@@ -174,7 +174,7 @@ static const struct inode_operations scoutfs_special_iops = {
#else
.get_acl = scoutfs_get_acl,
#endif
#ifdef KC_HAS_SET_ACL
#ifdef KC_SET_ACL_DENTRY
.set_acl = scoutfs_set_acl,
#endif
};
-5
View File
@@ -489,9 +489,4 @@ static inline void stack_trace_print(unsigned long *entries, unsigned int nr_ent
}
#endif
#ifndef KC_TIMER_CONTAINER_OF
#define timer_container_of(var, callback_timer, timer_fieldname) \
from_timer(var, callback_timer, timer_fieldname)
#endif
#endif
+18 -10
View File
@@ -980,7 +980,7 @@ static bool lock_flags_invalid(int flags)
*/
static int lock_key_range(struct super_block *sb, enum scoutfs_lock_mode mode, int flags,
struct scoutfs_key *start, struct scoutfs_key *end,
struct scoutfs_lock **ret_lock)
u64 ino, struct scoutfs_lock **ret_lock)
{
DECLARE_LOCK_INFO(sb, linfo);
struct scoutfs_lock *lock;
@@ -1028,6 +1028,8 @@ static int lock_key_range(struct super_block *sb, enum scoutfs_lock_mode mode, i
/* the fast path where we can use the granted mode */
if (lock_modes_match(lock->mode, mode)) {
lock_inc_count(lock->users, mode);
lock->last_user_pid[mode] = task_pid_nr(current);
lock->last_user_ino[mode] = ino;
*ret_lock = lock;
ret = 0;
break;
@@ -1108,7 +1110,7 @@ int scoutfs_lock_ino(struct super_block *sb, enum scoutfs_lock_mode mode, int fl
end.sk_zone = SCOUTFS_FS_ZONE;
end.ski_ino = cpu_to_le64(ino | SCOUTFS_LOCK_INODE_GROUP_MASK);
return lock_key_range(sb, mode, flags, &start, &end, ret_lock);
return lock_key_range(sb, mode, flags, &start, &end, ino, ret_lock);
}
/*
@@ -1238,7 +1240,7 @@ int scoutfs_lock_rename(struct super_block *sb, enum scoutfs_lock_mode mode, int
.sk_type = SCOUTFS_RENAME_TYPE,
};
return lock_key_range(sb, mode, flags, &key, &key, lock);
return lock_key_range(sb, mode, flags, &key, &key, 0, lock);
}
/*
@@ -1286,7 +1288,7 @@ int scoutfs_lock_inode_index(struct super_block *sb, enum scoutfs_lock_mode mode
scoutfs_lock_get_index_item_range(type, major, ino, &start, &end);
return lock_key_range(sb, mode, 0, &start, &end, ret_lock);
return lock_key_range(sb, mode, 0, &start, &end, ino, ret_lock);
}
/*
@@ -1313,7 +1315,7 @@ int scoutfs_lock_orphan(struct super_block *sb, enum scoutfs_lock_mode mode, int
end.sko_ino = cpu_to_le64(U64_MAX);
end.sk_type = SCOUTFS_ORPHAN_TYPE;
return lock_key_range(sb, mode, flags, &start, &end, lock);
return lock_key_range(sb, mode, flags, &start, &end, ino, lock);
}
int scoutfs_lock_xattr_totl(struct super_block *sb, enum scoutfs_lock_mode mode, int flags,
@@ -1324,7 +1326,7 @@ int scoutfs_lock_xattr_totl(struct super_block *sb, enum scoutfs_lock_mode mode,
scoutfs_totl_set_range(&start, &end);
return lock_key_range(sb, mode, flags, &start, &end, lock);
return lock_key_range(sb, mode, flags, &start, &end, 0, lock);
}
int scoutfs_lock_xattr_indx(struct super_block *sb, enum scoutfs_lock_mode mode, int flags,
@@ -1335,7 +1337,7 @@ int scoutfs_lock_xattr_indx(struct super_block *sb, enum scoutfs_lock_mode mode,
scoutfs_xattr_indx_get_range(&start, &end);
return lock_key_range(sb, mode, flags, &start, &end, lock);
return lock_key_range(sb, mode, flags, &start, &end, 0, lock);
}
int scoutfs_lock_quota(struct super_block *sb, enum scoutfs_lock_mode mode, int flags,
@@ -1346,7 +1348,7 @@ int scoutfs_lock_quota(struct super_block *sb, enum scoutfs_lock_mode mode, int
scoutfs_quota_get_lock_range(&start, &end);
return lock_key_range(sb, mode, flags, &start, &end, lock);
return lock_key_range(sb, mode, flags, &start, &end, 0, lock);
}
void scoutfs_unlock(struct super_block *sb, struct scoutfs_lock *lock, enum scoutfs_lock_mode mode)
@@ -1463,7 +1465,7 @@ static void lock_tseq_show(struct seq_file *m, struct scoutfs_tseq_entry *ent)
struct scoutfs_lock *lock =
container_of(ent, struct scoutfs_lock, tseq_entry);
seq_printf(m, "start "SK_FMT" end "SK_FMT" refresh_gen %llu mode %d waiters: rd %u wr %u wo %u users: rd %u wr %u wo %u\n",
seq_printf(m, "start "SK_FMT" end "SK_FMT" refresh_gen %llu mode %d waiters: rd %u wr %u wo %u users: rd %u wr %u wo %u ino: rd %llu wr %llu wo %llu pid: rd %d wr %d wo %d\n",
SK_ARG(&lock->start), SK_ARG(&lock->end),
lock->refresh_gen, lock->mode,
lock->waiters[SCOUTFS_LOCK_READ],
@@ -1471,7 +1473,13 @@ static void lock_tseq_show(struct seq_file *m, struct scoutfs_tseq_entry *ent)
lock->waiters[SCOUTFS_LOCK_WRITE_ONLY],
lock->users[SCOUTFS_LOCK_READ],
lock->users[SCOUTFS_LOCK_WRITE],
lock->users[SCOUTFS_LOCK_WRITE_ONLY]);
lock->users[SCOUTFS_LOCK_WRITE_ONLY],
lock->last_user_ino[SCOUTFS_LOCK_READ],
lock->last_user_ino[SCOUTFS_LOCK_WRITE],
lock->last_user_ino[SCOUTFS_LOCK_WRITE_ONLY],
lock->last_user_pid[SCOUTFS_LOCK_READ],
lock->last_user_pid[SCOUTFS_LOCK_WRITE],
lock->last_user_pid[SCOUTFS_LOCK_WRITE_ONLY]);
}
/*
+2
View File
@@ -42,6 +42,8 @@ struct scoutfs_lock {
enum scoutfs_lock_mode invalidating_mode;
unsigned int waiters[SCOUTFS_LOCK_NR_MODES];
unsigned int users[SCOUTFS_LOCK_NR_MODES];
pid_t last_user_pid[SCOUTFS_LOCK_NR_MODES];
u64 last_user_ino[SCOUTFS_LOCK_NR_MODES];
struct scoutfs_tseq_entry tseq_entry;
+1 -1
View File
@@ -134,7 +134,7 @@ static int recov_finished(struct recov_info *recinf)
static void timer_callback(struct timer_list *timer)
{
struct recov_info *recinf = timer_container_of(recinf, timer, timer);
struct recov_info *recinf = from_timer(recinf, timer, timer);
recinf->timeout_fn(recinf->sb);
}
-7
View File
@@ -171,13 +171,6 @@ t_filter_dmesg()
# orphan log trees reclaim is handled, not an error
re="$re|scoutfs .* reclaiming orphan log trees"
# nfs can emit a whole range of messages we can ignore
re="$re|Installing knfsd .*"
re="$re|nfsd: .*"
re="$re|NFSD: .*"
re="$re|RPC: .*"
re="$re|FS-Cache: .*"
# fencing tests force unmounts and trigger timeouts
re="$re|scoutfs .* forcing unmount"
re="$re|scoutfs .* reconnect timed out"
-32
View File
@@ -1,32 +0,0 @@
== write via NFS, read both sides
== POSIX ACL set via NFS, read both sides
user::rw-
user:22222:rw-
group::r--
mask::rw-
other::r--
user::rw-
user:22222:rw-
group::r--
mask::rw-
other::r--
== POSIX ACL set on scoutfs, read via NFS
user::rw-
user:22222:rw-
group::r--
group:44444:r--
mask::rw-
other::r--
== default ACL inheritance via NFS
user::rw-
user:22222:rwx #effective:rw-
group::r-x #effective:r--
mask::rw-
other::r--
== NFS read demand-stages a released file
1
== cleanup
+6 -6
View File
@@ -8,10 +8,10 @@
/mnt/test/test/data-prealloc/file-1: extents: 32
/mnt/test/test/data-prealloc/file-2: extents: 32
== any writes to region prealloc get full extents
/mnt/test/test/data-prealloc/file-1: extents: 8
/mnt/test/test/data-prealloc/file-2: extents: 8
/mnt/test/test/data-prealloc/file-1: extents: 8
/mnt/test/test/data-prealloc/file-2: extents: 8
/mnt/test/test/data-prealloc/file-1: extents: 4
/mnt/test/test/data-prealloc/file-2: extents: 4
/mnt/test/test/data-prealloc/file-1: extents: 4
/mnt/test/test/data-prealloc/file-2: extents: 4
== streaming offline writes get full extents either way
/mnt/test/test/data-prealloc/file-1: extents: 4
/mnt/test/test/data-prealloc/file-2: extents: 4
@@ -20,8 +20,8 @@
== goofy preallocation amounts work
/mnt/test/test/data-prealloc/file-1: extents: 6
/mnt/test/test/data-prealloc/file-2: extents: 6
/mnt/test/test/data-prealloc/file-1: extents: 10
/mnt/test/test/data-prealloc/file-2: extents: 10
/mnt/test/test/data-prealloc/file-1: extents: 6
/mnt/test/test/data-prealloc/file-2: extents: 6
/mnt/test/test/data-prealloc/file-1: extents: 3
/mnt/test/test/data-prealloc/file-2: extents: 3
== block writes into region allocs hole
+6
View File
@@ -0,0 +1,6 @@
== set up file
== exercise read, write, and write-only modes
== verify FS-zone lock recorded read and write ino+pid
== verify orphan-zone lock recorded write-only ino+pid
== contend on a single inode with concurrent read and write loops
== verify both rd and wr slots populated by concurrent contention
+1 -1
View File
@@ -3,7 +3,6 @@ basic-block-counts.sh
basic-bad-mounts.sh
basic-posix-acl.sh
basic-acl-consistency.sh
basic-nfs.sh
inode-items-updated.sh
simple-inode-index.sh
simple-staging.sh
@@ -33,6 +32,7 @@ totl-merge-read.sh
quota-invalidate-race.sh
totl-delta-inject.sh
lock-refleak.sh
lock-pid-ino.sh
lock-shrink-consistency.sh
lock-shrink-read-race.sh
lock-pr-cw-conflict.sh
-86
View File
@@ -1,86 +0,0 @@
#
# Test basic scoutfs-nfs interactions:
# - read/write
# - stage/release and data wait
# - nfs setacl/getacl mapping
#
t_require_commands scoutfs setfacl getfacl exportfs mount.nfs umount \
stat dd cmp systemctl
systemctl start nfs-server >> "$T_TMPDIR/nfs.log" 2>&1 || \
t_skip "nfs-server not available"
# Keep file creation modes deterministic for the ACL golden output.
umask 022
EXPORT_OPTS="rw,async,no_root_squash,no_subtree_check,fsid=42"
NFS_MNT="$T_TMP.nfs"
NFS_DIR="$NFS_MNT/test/basic-nfs"
filter() { sed "s@$T_TMPDIR@T_TMPDIR@g" | t_filter_fs; }
gf() { getfacl -n --omit-header "$@" 2>/dev/null; }
teardown_nfs()
{
(
umount "$NFS_MNT"
exportfs -u "127.0.0.1:$T_M0"
exportfs -f
systemctl stop nfs-server
rmdir "$NFS_MNT"
) >> "$T_TMPDIR/nfs.log" 2>&1
}
trap teardown_nfs EXIT
exportfs -u "127.0.0.1:$T_M0" >> "$T_TMPDIR/nfs.log" 2>&1 || true
t_quiet mkdir -p "$NFS_MNT"
exportfs -o "$EXPORT_OPTS" "127.0.0.1:$T_M0" >> "$T_TMPDIR/nfs.log" 2>&1
mount.nfs -o vers=3,noac,actimeo=0 "127.0.0.1:$T_M0" "$NFS_MNT" >> "$T_TMPDIR/nfs.log" 2>&1
test -d "$NFS_DIR" || t_fail "test dir $NFS_DIR not visible over NFS"
echo "== write via NFS, read both sides"
dd if=/dev/urandom bs=4096 count=1 of="$T_TMP.data" status=none
cp "$T_TMP.data" "$NFS_DIR/file"
cmp "$T_TMP.data" "$T_D0/file"
cmp "$T_TMP.data" "$NFS_DIR/file"
echo "== POSIX ACL set via NFS, read both sides"
setfacl -m u:22222:rw "$NFS_DIR/file" 2>&1 | filter
gf "$NFS_DIR/file"
gf "$T_D0/file"
echo "== POSIX ACL set on scoutfs, read via NFS"
setfacl -m g:44444:r "$T_D0/file" 2>&1 | filter
gf "$NFS_DIR/file"
echo "== default ACL inheritance via NFS"
mkdir "$NFS_DIR/d"
setfacl -d -m u:22222:rwx "$NFS_DIR/d" 2>&1 | filter
touch "$NFS_DIR/d/child"
gf "$NFS_DIR/d/child"
echo "== NFS read demand-stages a released file"
dd if=/dev/urandom bs=4096 count=1 of="$T_TMP.big" status=none
cp "$T_TMP.big" "$T_D0/big"
sync
vers=$(scoutfs stat -s data_version "$T_D0/big")
t_quiet scoutfs release "$T_D0/big" -V "$vers" -o 0 -l 4K
# NFS read against the offline file blocks in scoutfs_read waiting
# for the data to come back online.
cat "$NFS_DIR/big" > "$T_TMP.read" &
read_pid=$!
sleep 1
scoutfs data-waiting -B 0 -I 0 -p "$T_D0" | wc -l
t_quiet scoutfs stage "$T_TMP.big" "$T_D0/big" -V "$vers" -o 0 -l 4096
wait "$read_pid"
cmp "$T_TMP.big" "$T_TMP.read"
echo "== cleanup"
rm -f "$T_D0/file" "$T_D0/big"
rm -rf "$T_D0/d"
t_pass
+68
View File
@@ -0,0 +1,68 @@
#
# verify debugfs client_locks reports per-mode last-user PID and inode.
#
t_require_commands stat touch awk rm
FILE="$T_D0/file"
echo "== set up file"
touch "$FILE"
INO=$(stat -c %i "$FILE")
GROUP_START=$(( INO & ~1023 ))
echo "== exercise read, write, and write-only modes"
t_quiet stat "$FILE"
echo data > "$FILE"
rm -f "$FILE"
echo "== verify FS-zone lock recorded read and write ino+pid"
ERR=$(awk -v group="$GROUP_START" -v ino="$INO" '
$2 == "16." group ".0.0.0.0" {
if ($25 != ino || $32 <= 0)
print "read mode: ino=" $25 " pid=" $32 " want ino=" ino " pid>0"
if ($27 != ino || $34 <= 0)
print "write mode: ino=" $27 " pid=" $34 " want ino=" ino " pid>0"
found = 1
}
END { if (!found) print "no FS-zone client_locks line for group " group }
' < "$(t_debugfs_path)/client_locks")
[ -n "$ERR" ] && t_fail "$ERR"
echo "== verify orphan-zone lock recorded write-only ino+pid"
ERR=$(awk -v ino="$INO" '
$2 == "8.0.4.0.0.0" {
if ($29 != ino || $36 <= 0)
print "write-only mode: ino=" $29 " pid=" $36 " want ino=" ino " pid>0"
found = 1
}
END { if (!found) print "no orphan-zone client_locks line" }
' < "$(t_debugfs_path)/client_locks")
[ -n "$ERR" ] && t_fail "$ERR"
echo "== contend on a single inode with concurrent read and write loops"
FILE2="$T_D0/file2"
touch "$FILE2"
INO2=$(stat -c %i "$FILE2")
GROUP2=$(( INO2 & ~1023 ))
for i in $(seq 1 5); do t_quiet stat "$FILE2"; done &
RPID=$!
for i in $(seq 1 5); do echo $i > "$FILE2"; done &
WPID=$!
wait $RPID $WPID
echo "== verify both rd and wr slots populated by concurrent contention"
ERR=$(awk -v group="$GROUP2" -v ino="$INO2" '
$2 == "16." group ".0.0.0.0" {
if ($25 != ino || $32 <= 0)
print "concurrent read: ino=" $25 " pid=" $32 " want ino=" ino " pid>0"
if ($27 != ino || $34 <= 0)
print "concurrent write: ino=" $27 " pid=" $34 " want ino=" ino " pid>0"
found = 1
}
END { if (!found) print "no FS-zone client_locks line for group " group }
' < "$(t_debugfs_path)/client_locks")
[ -n "$ERR" ] && t_fail "$ERR"
t_pass