From ca320d02cb1666b5fd40e9c01c6e9a8f0f5b3eca Mon Sep 17 00:00:00 2001 From: Zach Brown Date: Mon, 26 Apr 2021 16:09:13 -0700 Subject: [PATCH] Get i_mutex before cluster lock in file aio_read The vfs often calls filesystem methods with i_mutex held. This creates a natural ordering of i_mutex outside of cluster locks. The file aio_read method acquired i_mutex after its cluster lock, creating a deadlock with other vfs methods like setattr. The acquisition of i_mutex after the cluster lock was due to using the pattern where we use the per-task lock to discover if we're the first user of the lock in a call chain. Readpage has to do this, but file aio_read doesn't. It should never be called recursively. So we can acquire the i_mutex outside of the cluster lock and warn if we ever are called recursively. Signed-off-by: Zach Brown --- kmod/src/file.c | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/kmod/src/file.c b/kmod/src/file.c index b1555b08..586d77fd 100644 --- a/kmod/src/file.c +++ b/kmod/src/file.c @@ -29,7 +29,12 @@ #include "per_task.h" #include "omap.h" -/* TODO: Direct I/O, AIO */ +/* + * Start a high level file read. We check for offline extents in the + * read region here so that we only check the extents once. We use the + * dio count to prevent releasing while we're reading after we've + * checked the extents. + */ ssize_t scoutfs_file_aio_read(struct kiocb *iocb, const struct iovec *iov, unsigned long nr_segs, loff_t pos) { @@ -43,30 +48,32 @@ ssize_t scoutfs_file_aio_read(struct kiocb *iocb, const struct iovec *iov, int ret; retry: + /* protect checked extents from release */ + mutex_lock(&inode->i_mutex); + atomic_inc(&inode->i_dio_count); + mutex_unlock(&inode->i_mutex); + ret = scoutfs_lock_inode(sb, SCOUTFS_LOCK_READ, SCOUTFS_LKF_REFRESH_INODE, inode, &inode_lock); if (ret) goto out; if (scoutfs_per_task_add_excl(&si->pt_data_lock, &pt_ent, inode_lock)) { - /* protect checked extents from stage/release */ - mutex_lock(&inode->i_mutex); - atomic_inc(&inode->i_dio_count); - mutex_unlock(&inode->i_mutex); - ret = scoutfs_data_wait_check_iov(inode, iov, nr_segs, pos, SEF_OFFLINE, SCOUTFS_IOC_DWO_READ, &dw, inode_lock); if (ret != 0) goto out; + } else { + WARN_ON_ONCE(true); } ret = generic_file_aio_read(iocb, iov, nr_segs, pos); out: - if (scoutfs_per_task_del(&si->pt_data_lock, &pt_ent)) - inode_dio_done(inode); + inode_dio_done(inode); + scoutfs_per_task_del(&si->pt_data_lock, &pt_ent); scoutfs_unlock(sb, inode_lock, SCOUTFS_LOCK_READ); if (scoutfs_data_wait_found(&dw)) {