Merge pull request #38 from versity/zab/read_xattr_deadlocks

Zab/read xattr deadlocks
This commit is contained in:
Zach Brown
2021-05-03 09:44:57 -07:00
committed by GitHub
7 changed files with 64 additions and 36 deletions

View File

@@ -29,7 +29,12 @@
#include "per_task.h"
#include "omap.h"
/* TODO: Direct I/O, AIO */
/*
* Start a high level file read. We check for offline extents in the
* read region here so that we only check the extents once. We use the
* dio count to prevent releasing while we're reading after we've
* checked the extents.
*/
ssize_t scoutfs_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
unsigned long nr_segs, loff_t pos)
{
@@ -43,30 +48,32 @@ ssize_t scoutfs_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
int ret;
retry:
/* protect checked extents from release */
mutex_lock(&inode->i_mutex);
atomic_inc(&inode->i_dio_count);
mutex_unlock(&inode->i_mutex);
ret = scoutfs_lock_inode(sb, SCOUTFS_LOCK_READ,
SCOUTFS_LKF_REFRESH_INODE, inode, &inode_lock);
if (ret)
goto out;
if (scoutfs_per_task_add_excl(&si->pt_data_lock, &pt_ent, inode_lock)) {
/* protect checked extents from stage/release */
mutex_lock(&inode->i_mutex);
atomic_inc(&inode->i_dio_count);
mutex_unlock(&inode->i_mutex);
ret = scoutfs_data_wait_check_iov(inode, iov, nr_segs, pos,
SEF_OFFLINE,
SCOUTFS_IOC_DWO_READ,
&dw, inode_lock);
if (ret != 0)
goto out;
} else {
WARN_ON_ONCE(true);
}
ret = generic_file_aio_read(iocb, iov, nr_segs, pos);
out:
if (scoutfs_per_task_del(&si->pt_data_lock, &pt_ent))
inode_dio_done(inode);
inode_dio_done(inode);
scoutfs_per_task_del(&si->pt_data_lock, &pt_ent);
scoutfs_unlock(sb, inode_lock, SCOUTFS_LOCK_READ);
if (scoutfs_data_wait_found(&dw)) {

View File

@@ -648,12 +648,22 @@ void scoutfs_inode_get_onoff(struct inode *inode, s64 *on, s64 *off)
} while (read_seqcount_retry(&si->seqcount, seq));
}
/*
* We have inversions between getting cluster locks while performing
* final deletion on a freeing inode and waiting on a freeing inode
* while holding a cluster lock.
*
* We can avoid these deadlocks by hiding freeing inodes in our hash
* lookup function. We're fine with either returning null or populating
* a new inode overlapping with eviction freeing a previous instance of
* the inode.
*/
static int scoutfs_iget_test(struct inode *inode, void *arg)
{
struct scoutfs_inode_info *si = SCOUTFS_I(inode);
u64 *ino = arg;
return si->ino == *ino;
return (si->ino == *ino) && !(inode->i_state & I_FREEING);
}
static int scoutfs_iget_set(struct inode *inode, void *arg)
@@ -672,28 +682,6 @@ struct inode *scoutfs_ilookup(struct super_block *sb, u64 ino)
return ilookup5(sb, ino, scoutfs_iget_test, &ino);
}
static int iget_test_nofreeing(struct inode *inode, void *arg)
{
return !(inode->i_state & I_FREEING) && scoutfs_iget_test(inode, arg);
}
/*
* There's a natural risk of a deadlock between lock invalidation and
* eviction. Invalidation blocks locks while looking up inodes and
* invalidating local caches. Inode eviction gets a lock to check final
* inode deletion while the inode is marked FREEING which blocks
* lookups.
*
* We have a lookup variant which doesn't return I_FREEING inodes
* instead of waiting on them. If an inode has made it to I_FREEING
* then it doesn't have any local caches that are reachable and the lock
* invalidation promise is kept.
*/
struct inode *scoutfs_ilookup_nofreeing(struct super_block *sb, u64 ino)
{
return ilookup5(sb, ino, iget_test_nofreeing, &ino);
}
struct inode *scoutfs_iget(struct super_block *sb, u64 ino)
{
struct scoutfs_lock *lock = NULL;

View File

@@ -79,7 +79,6 @@ int scoutfs_orphan_inode(struct inode *inode);
struct inode *scoutfs_iget(struct super_block *sb, u64 ino);
struct inode *scoutfs_ilookup(struct super_block *sb, u64 ino);
struct inode *scoutfs_ilookup_nofreeing(struct super_block *sb, u64 ino);
void scoutfs_inode_init_index_key(struct scoutfs_key *key, u8 type, u64 major,
u32 minor, u64 ino);

View File

@@ -156,9 +156,7 @@ static void lock_inv_iput_worker(struct work_struct *work)
/*
* Invalidate cached data associated with an inode whose lock is going
* away. We ignore indoes with I_FREEING instead of waiting on them to
* avoid a deadlock, if they're freeing then they won't be visible to
* future lock users and we don't need to invalidate them.
* away.
*
* We try to drop cached dentries and inodes covered by the lock if they
* aren't referenced. This removes them from the mount's open map and
@@ -178,7 +176,7 @@ static void invalidate_inode(struct super_block *sb, u64 ino)
struct scoutfs_inode_info *si;
struct inode *inode;
inode = scoutfs_ilookup_nofreeing(sb, ino);
inode = scoutfs_ilookup(sb, ino);
if (inode) {
si = SCOUTFS_I(inode);
@@ -896,6 +894,9 @@ static void lock_invalidate_worker(struct work_struct *work)
list_del_init(&lock->inv_head);
lock->invalidate_pending = 0;
wake_up(&lock->waitq);
} else {
/* another request filled nl/net_id, put it back on the list */
list_move_tail(&lock->inv_head, &linfo->inv_list);
}
put_lock(linfo, lock);
}

View File

View File

@@ -13,6 +13,7 @@ lock-refleak.sh
lock-shrink-consistency.sh
lock-pr-cw-conflict.sh
lock-revoke-getcwd.sh
export-lookup-evict-race.sh
createmany-parallel.sh
createmany-large-names.sh
createmany-rename-large-dir.sh

View File

@@ -0,0 +1,32 @@
#
# test racing fh_to_dentry with evict from lock invalidation. We've
# had deadlocks between the ordering of iget and evict when they acquire
# cluster locks.
#
t_require_commands touch stat handle_cat
t_require_mounts 2
CPUS=$(getconf _NPROCESSORS_ONLN)
NR=$((CPUS * 4))
END=$((SECONDS + 30))
touch "$T_D0/file"
ino=$(stat -c "%i" "$T_D0/file")
while test $SECONDS -lt $END; do
for i in $(seq 1 $NR); do
fs=$((RANDOM % T_NR_MOUNTS))
eval dir="\$T_D${fs}"
write=$((RANDOM & 1))
if [ "$write" == 1 ]; then
touch "$dir/file" &
else
handle_cat "$dir" "$ino" &
fi
done
wait
done
t_pass