Compare commits

...

2 Commits

Author SHA1 Message Date
Zach Brown
8efb30afbc No i_mutex in aio_read for data_wait_check
As a regular file reader first acquires a cluster lock it checks the
file's extents to see if the region it is reading contains offline
extents.

When this was written the extent operations didn't have their own
internal locking.  It was up to callers to use vfs mechanisms to
serialize readers and writers.  The aio_read data_wait_check extent
caller tried to use dio_count and an i_mutex acquisition to ensure that
our ioctls wouldn't modify extents.

This creates a bad inversion between the vfs i_mutex and our cluster
inode lock.  There are lots of fs methods which are called by the vfs
with i_mutex held which acquire a lock.  This read case was holding a
cluster lock and then acquiring i_mutex.

Since the data waiting was written the file data extent operations have
added their own extent_sem to protect the extent items from concurrent
callers.  We can rely on that internal locking and drop the bad i_mutex
use which causes the inversion.

Not surprisingly, this was trivial to deadlock by racing simple
utilities like cat and touch.

Signed-off-by: Zach Brown <zab@versity.com>
2021-04-27 11:20:40 -07:00
Zach Brown
df90b3eb90 Add export-lookup-evict-race test
Add a test that creates races between fh_to_dentry and eviction
triggered by lock invalidation.

Signed-off-by: Zach Brown <zab@versity.com>
2021-04-27 11:20:40 -07:00
4 changed files with 48 additions and 8 deletions

View File

@@ -29,7 +29,12 @@
#include "per_task.h"
#include "omap.h"
/* TODO: Direct I/O, AIO */
/*
* Start a high level file read. We check for offline extents in the
* read region here so that we only check the extents once. We use the
* dio count to prevent releasing while we're reading after we've
* checked the extents.
*/
ssize_t scoutfs_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
unsigned long nr_segs, loff_t pos)
{
@@ -43,30 +48,32 @@ ssize_t scoutfs_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
int ret;
retry:
/* protect checked extents from release */
mutex_lock(&inode->i_mutex);
atomic_inc(&inode->i_dio_count);
mutex_unlock(&inode->i_mutex);
ret = scoutfs_lock_inode(sb, SCOUTFS_LOCK_READ,
SCOUTFS_LKF_REFRESH_INODE, inode, &inode_lock);
if (ret)
goto out;
if (scoutfs_per_task_add_excl(&si->pt_data_lock, &pt_ent, inode_lock)) {
/* protect checked extents from stage/release */
mutex_lock(&inode->i_mutex);
atomic_inc(&inode->i_dio_count);
mutex_unlock(&inode->i_mutex);
ret = scoutfs_data_wait_check_iov(inode, iov, nr_segs, pos,
SEF_OFFLINE,
SCOUTFS_IOC_DWO_READ,
&dw, inode_lock);
if (ret != 0)
goto out;
} else {
WARN_ON_ONCE(true);
}
ret = generic_file_aio_read(iocb, iov, nr_segs, pos);
out:
if (scoutfs_per_task_del(&si->pt_data_lock, &pt_ent))
inode_dio_done(inode);
inode_dio_done(inode);
scoutfs_per_task_del(&si->pt_data_lock, &pt_ent);
scoutfs_unlock(sb, inode_lock, SCOUTFS_LOCK_READ);
if (scoutfs_data_wait_found(&dw)) {

View File

View File

@@ -13,6 +13,7 @@ lock-refleak.sh
lock-shrink-consistency.sh
lock-pr-cw-conflict.sh
lock-revoke-getcwd.sh
export-lookup-evict-race.sh
createmany-parallel.sh
createmany-large-names.sh
createmany-rename-large-dir.sh

View File

@@ -0,0 +1,32 @@
#
# test racing fh_to_dentry with evict from lock invalidation. We've
# had deadlocks between the ordering of iget and evict when they acquire
# cluster locks.
#
t_require_commands touch stat handle_cat
t_require_mounts 2
CPUS=$(getconf _NPROCESSORS_ONLN)
NR=$((CPUS * 4))
END=$((SECONDS + 30))
touch "$T_D0/file"
ino=$(stat -c "%i" "$T_D0/file")
while test $SECONDS -lt $END; do
for i in $(seq 1 $NR); do
fs=$((RANDOM % T_NR_MOUNTS))
eval dir="\$T_D${fs}"
write=$((RANDOM & 1))
if [ "$write" == 1 ]; then
touch "$dir/file" &
else
handle_cat "$dir" "$ino" &
fi
done
wait
done
t_pass