Ignore I_FREEING in all inode hash lookups

Previously we added a ilookup variant that ignored I_FREEING inodes
to avoid a deadlock between lock invalidation (lock->I_FREEING) and
eviction (I_FREEING->lock);

Now we're seeing similar deadlocks between eviction (I_FREEING->lock)
and fh_to_dentry's iget (lock->I_FREEING).

I think it's reasonable to ignore all inodes with I_FREEING set when
we're using our _test callback in ilookup or iget.  We can remove the
_nofreeing ilookup variant and move its I_FREEING test into the
iget_test callback provided to both ilookup and iget.

Callers will get the same result, it will just happen without waiting
for a previously I_FREEING inode to leave.  They'll get NULL instead of
waiting from ilookup.  They'll allocate and start to initialize a newer
instance of the inode and insert it along side the previous instance.

We don't have inode number re-use so we don't have the problem where a
newly allocated inode number is relying on inode cache serialization to
not find a previously allocated inode that is being evicted.

This change does allow for concurrent iget of an inode number that is
being deleted on a local node.  This could happen in fh_to_dentry with a
raw inode number.  But this was already a problem between mounts because
they don't have a shared inode cache to serialize them.  Once we fix
that between nodes, we fix it on a single node as well.

Signed-off-by: Zach Brown <zab@versity.com>
This commit is contained in:
Zach Brown
2021-04-27 14:36:03 -07:00
parent ca320d02cb
commit 603af327ac
3 changed files with 13 additions and 28 deletions

View File

@@ -648,12 +648,22 @@ void scoutfs_inode_get_onoff(struct inode *inode, s64 *on, s64 *off)
} while (read_seqcount_retry(&si->seqcount, seq));
}
/*
* We have inversions between getting cluster locks while performing
* final deletion on a freeing inode and waiting on a freeing inode
* while holding a cluster lock.
*
* We can avoid these deadlocks by hiding freeing inodes in our hash
* lookup function. We're fine with either returning null or populating
* a new inode overlapping with eviction freeing a previous instance of
* the inode.
*/
static int scoutfs_iget_test(struct inode *inode, void *arg)
{
struct scoutfs_inode_info *si = SCOUTFS_I(inode);
u64 *ino = arg;
return si->ino == *ino;
return (si->ino == *ino) && !(inode->i_state & I_FREEING);
}
static int scoutfs_iget_set(struct inode *inode, void *arg)
@@ -672,28 +682,6 @@ struct inode *scoutfs_ilookup(struct super_block *sb, u64 ino)
return ilookup5(sb, ino, scoutfs_iget_test, &ino);
}
static int iget_test_nofreeing(struct inode *inode, void *arg)
{
return !(inode->i_state & I_FREEING) && scoutfs_iget_test(inode, arg);
}
/*
* There's a natural risk of a deadlock between lock invalidation and
* eviction. Invalidation blocks locks while looking up inodes and
* invalidating local caches. Inode eviction gets a lock to check final
* inode deletion while the inode is marked FREEING which blocks
* lookups.
*
* We have a lookup variant which doesn't return I_FREEING inodes
* instead of waiting on them. If an inode has made it to I_FREEING
* then it doesn't have any local caches that are reachable and the lock
* invalidation promise is kept.
*/
struct inode *scoutfs_ilookup_nofreeing(struct super_block *sb, u64 ino)
{
return ilookup5(sb, ino, iget_test_nofreeing, &ino);
}
struct inode *scoutfs_iget(struct super_block *sb, u64 ino)
{
struct scoutfs_lock *lock = NULL;

View File

@@ -79,7 +79,6 @@ int scoutfs_orphan_inode(struct inode *inode);
struct inode *scoutfs_iget(struct super_block *sb, u64 ino);
struct inode *scoutfs_ilookup(struct super_block *sb, u64 ino);
struct inode *scoutfs_ilookup_nofreeing(struct super_block *sb, u64 ino);
void scoutfs_inode_init_index_key(struct scoutfs_key *key, u8 type, u64 major,
u32 minor, u64 ino);

View File

@@ -156,9 +156,7 @@ static void lock_inv_iput_worker(struct work_struct *work)
/*
* Invalidate cached data associated with an inode whose lock is going
* away. We ignore indoes with I_FREEING instead of waiting on them to
* avoid a deadlock, if they're freeing then they won't be visible to
* future lock users and we don't need to invalidate them.
* away.
*
* We try to drop cached dentries and inodes covered by the lock if they
* aren't referenced. This removes them from the mount's open map and
@@ -178,7 +176,7 @@ static void invalidate_inode(struct super_block *sb, u64 ino)
struct scoutfs_inode_info *si;
struct inode *inode;
inode = scoutfs_ilookup_nofreeing(sb, ino);
inode = scoutfs_ilookup(sb, ino);
if (inode) {
si = SCOUTFS_I(inode);