From 603af327acf0f0ecd730283a556da76a2fa6d07f Mon Sep 17 00:00:00 2001 From: Zach Brown Date: Tue, 27 Apr 2021 14:36:03 -0700 Subject: [PATCH] Ignore I_FREEING in all inode hash lookups Previously we added a ilookup variant that ignored I_FREEING inodes to avoid a deadlock between lock invalidation (lock->I_FREEING) and eviction (I_FREEING->lock); Now we're seeing similar deadlocks between eviction (I_FREEING->lock) and fh_to_dentry's iget (lock->I_FREEING). I think it's reasonable to ignore all inodes with I_FREEING set when we're using our _test callback in ilookup or iget. We can remove the _nofreeing ilookup variant and move its I_FREEING test into the iget_test callback provided to both ilookup and iget. Callers will get the same result, it will just happen without waiting for a previously I_FREEING inode to leave. They'll get NULL instead of waiting from ilookup. They'll allocate and start to initialize a newer instance of the inode and insert it along side the previous instance. We don't have inode number re-use so we don't have the problem where a newly allocated inode number is relying on inode cache serialization to not find a previously allocated inode that is being evicted. This change does allow for concurrent iget of an inode number that is being deleted on a local node. This could happen in fh_to_dentry with a raw inode number. But this was already a problem between mounts because they don't have a shared inode cache to serialize them. Once we fix that between nodes, we fix it on a single node as well. Signed-off-by: Zach Brown --- kmod/src/inode.c | 34 +++++++++++----------------------- kmod/src/inode.h | 1 - kmod/src/lock.c | 6 ++---- 3 files changed, 13 insertions(+), 28 deletions(-) diff --git a/kmod/src/inode.c b/kmod/src/inode.c index cdade67d..3c78dc21 100644 --- a/kmod/src/inode.c +++ b/kmod/src/inode.c @@ -648,12 +648,22 @@ void scoutfs_inode_get_onoff(struct inode *inode, s64 *on, s64 *off) } while (read_seqcount_retry(&si->seqcount, seq)); } +/* + * We have inversions between getting cluster locks while performing + * final deletion on a freeing inode and waiting on a freeing inode + * while holding a cluster lock. + * + * We can avoid these deadlocks by hiding freeing inodes in our hash + * lookup function. We're fine with either returning null or populating + * a new inode overlapping with eviction freeing a previous instance of + * the inode. + */ static int scoutfs_iget_test(struct inode *inode, void *arg) { struct scoutfs_inode_info *si = SCOUTFS_I(inode); u64 *ino = arg; - return si->ino == *ino; + return (si->ino == *ino) && !(inode->i_state & I_FREEING); } static int scoutfs_iget_set(struct inode *inode, void *arg) @@ -672,28 +682,6 @@ struct inode *scoutfs_ilookup(struct super_block *sb, u64 ino) return ilookup5(sb, ino, scoutfs_iget_test, &ino); } -static int iget_test_nofreeing(struct inode *inode, void *arg) -{ - return !(inode->i_state & I_FREEING) && scoutfs_iget_test(inode, arg); -} - -/* - * There's a natural risk of a deadlock between lock invalidation and - * eviction. Invalidation blocks locks while looking up inodes and - * invalidating local caches. Inode eviction gets a lock to check final - * inode deletion while the inode is marked FREEING which blocks - * lookups. - * - * We have a lookup variant which doesn't return I_FREEING inodes - * instead of waiting on them. If an inode has made it to I_FREEING - * then it doesn't have any local caches that are reachable and the lock - * invalidation promise is kept. - */ -struct inode *scoutfs_ilookup_nofreeing(struct super_block *sb, u64 ino) -{ - return ilookup5(sb, ino, iget_test_nofreeing, &ino); -} - struct inode *scoutfs_iget(struct super_block *sb, u64 ino) { struct scoutfs_lock *lock = NULL; diff --git a/kmod/src/inode.h b/kmod/src/inode.h index f1884ad6..070d6492 100644 --- a/kmod/src/inode.h +++ b/kmod/src/inode.h @@ -79,7 +79,6 @@ int scoutfs_orphan_inode(struct inode *inode); struct inode *scoutfs_iget(struct super_block *sb, u64 ino); struct inode *scoutfs_ilookup(struct super_block *sb, u64 ino); -struct inode *scoutfs_ilookup_nofreeing(struct super_block *sb, u64 ino); void scoutfs_inode_init_index_key(struct scoutfs_key *key, u8 type, u64 major, u32 minor, u64 ino); diff --git a/kmod/src/lock.c b/kmod/src/lock.c index f149bb13..a091ca68 100644 --- a/kmod/src/lock.c +++ b/kmod/src/lock.c @@ -156,9 +156,7 @@ static void lock_inv_iput_worker(struct work_struct *work) /* * Invalidate cached data associated with an inode whose lock is going - * away. We ignore indoes with I_FREEING instead of waiting on them to - * avoid a deadlock, if they're freeing then they won't be visible to - * future lock users and we don't need to invalidate them. + * away. * * We try to drop cached dentries and inodes covered by the lock if they * aren't referenced. This removes them from the mount's open map and @@ -178,7 +176,7 @@ static void invalidate_inode(struct super_block *sb, u64 ino) struct scoutfs_inode_info *si; struct inode *inode; - inode = scoutfs_ilookup_nofreeing(sb, ino); + inode = scoutfs_ilookup(sb, ino); if (inode) { si = SCOUTFS_I(inode);