mirror of
https://github.com/versity/scoutfs.git
synced 2026-06-02 09:46:21 +00:00
Proactively drop dentry/inode caches outside locks
Previously we wouldn't try and remove cached dentries and inodes as lock revocation removed cluster lock coverage. The next time we tried to use the cached dentries or inodes we'd acquire a lock and refresh them. But now cached inodes prevent final inode deletion. If they linger outside cluster locking then any final deletion will need to be deferred until all its cached inodes are naturally dropped at some point in the future across the cluster. It might take refreshing the dentries or for memory pressure to push out the old cached inodes. This tries to proctively drop cached dentries and inodes as we lose cluster lock coverage if they're not actively referenced. We need to be careful not to perform final inode deletion during lock invalidation because it will deadlock, so we defer an iput which could delete during evict out to async work. Now deletion can be done synchronously in the task that is performing the unlink because previous use of the inode on remote mounts hasn't left unused cached inodes sitting around. Signed-off-by: Zach Brown <zab@versity.com>
This commit is contained in:
@@ -84,6 +84,7 @@ static void scoutfs_inode_ctor(void *obj)
|
||||
init_rwsem(&si->xattr_rwsem);
|
||||
RB_CLEAR_NODE(&si->writeback_node);
|
||||
scoutfs_lock_init_coverage(&si->ino_lock_cov);
|
||||
atomic_set(&si->inv_iput_count, 0);
|
||||
|
||||
inode_init_once(&si->inode);
|
||||
}
|
||||
@@ -313,6 +314,7 @@ int scoutfs_inode_refresh(struct inode *inode, struct scoutfs_lock *lock,
|
||||
load_inode(inode, &sinode);
|
||||
atomic64_set(&si->last_refreshed, refresh_gen);
|
||||
scoutfs_lock_add_coverage(sb, lock, &si->ino_lock_cov);
|
||||
si->drop_invalidated = false;
|
||||
}
|
||||
} else {
|
||||
ret = 0;
|
||||
@@ -1393,6 +1395,7 @@ struct inode *scoutfs_new_inode(struct super_block *sb, struct inode *dir,
|
||||
si->have_item = false;
|
||||
atomic64_set(&si->last_refreshed, lock->refresh_gen);
|
||||
scoutfs_lock_add_coverage(sb, lock, &si->ino_lock_cov);
|
||||
si->drop_invalidated = false;
|
||||
si->flags = 0;
|
||||
|
||||
scoutfs_inode_set_meta_seq(inode);
|
||||
@@ -1586,13 +1589,30 @@ clear:
|
||||
clear_inode(inode);
|
||||
}
|
||||
|
||||
/*
|
||||
* We want to remove inodes from the cache as their count goes to 0 if
|
||||
* they're no longer covered by a cluster lock or if while locked they
|
||||
* were unlinked.
|
||||
*
|
||||
* We don't want unused cached inodes to linger outside of cluster
|
||||
* locking so that they don't prevent final inode deletion on other
|
||||
* nodes. We don't have specific per-inode or per-dentry locks which
|
||||
* would otherwise remove the stale caches as they're invalidated.
|
||||
* Stale cached inodes provide little value because they're going to be
|
||||
* refreshed the next time they're locked. Populating the item cache
|
||||
* and loading the inode item is a lot more expensive than initializing
|
||||
* and inserting a newly allocated vfs inode.
|
||||
*/
|
||||
int scoutfs_drop_inode(struct inode *inode)
|
||||
{
|
||||
int ret = generic_drop_inode(inode);
|
||||
struct scoutfs_inode_info *si = SCOUTFS_I(inode);
|
||||
struct super_block *sb = inode->i_sb;
|
||||
|
||||
trace_scoutfs_drop_inode(inode->i_sb, scoutfs_ino(inode),
|
||||
inode->i_nlink, inode_unhashed(inode));
|
||||
return ret;
|
||||
trace_scoutfs_drop_inode(sb, scoutfs_ino(inode), inode->i_nlink, inode_unhashed(inode),
|
||||
si->drop_invalidated);
|
||||
|
||||
return si->drop_invalidated || !scoutfs_lock_is_covered(sb, &si->ino_lock_cov) ||
|
||||
generic_drop_inode(inode);
|
||||
}
|
||||
|
||||
/*
|
||||
|
||||
@@ -53,6 +53,11 @@ struct scoutfs_inode_info {
|
||||
|
||||
struct scoutfs_lock_coverage ino_lock_cov;
|
||||
|
||||
/* drop if i_count hits 0, allows drop while invalidate holds coverage */
|
||||
bool drop_invalidated;
|
||||
struct llist_node inv_iput_llnode;
|
||||
atomic_t inv_iput_count;
|
||||
|
||||
struct inode inode;
|
||||
};
|
||||
|
||||
|
||||
@@ -75,6 +75,7 @@ struct lock_info {
|
||||
struct super_block *sb;
|
||||
spinlock_t lock;
|
||||
bool shutdown;
|
||||
bool unmounting;
|
||||
struct rb_root lock_tree;
|
||||
struct rb_root lock_range_tree;
|
||||
struct shrinker shrinker;
|
||||
@@ -88,6 +89,9 @@ struct lock_info {
|
||||
struct work_struct shrink_work;
|
||||
struct list_head shrink_list;
|
||||
atomic64_t next_refresh_gen;
|
||||
struct work_struct inv_iput_work;
|
||||
struct llist_head inv_iput_llist;
|
||||
|
||||
struct dentry *tseq_dentry;
|
||||
struct scoutfs_tseq_tree tseq_tree;
|
||||
};
|
||||
@@ -122,12 +126,53 @@ static bool lock_modes_match(int granted, int requested)
|
||||
requested == SCOUTFS_LOCK_READ);
|
||||
}
|
||||
|
||||
/*
|
||||
* Final iput can get into evict and perform final inode deletion which
|
||||
* can delete a lot of items under locks and transactions. We really
|
||||
* don't want to be doing all that in an iput during invalidation. When
|
||||
* invalidation sees that iput might perform final deletion it puts them
|
||||
* on a list and queues this work.
|
||||
*
|
||||
* Nothing stops multiple puts for multiple invalidations of an inode
|
||||
* before the work runs so we can track multiple puts in flight.
|
||||
*/
|
||||
static void lock_inv_iput_worker(struct work_struct *work)
|
||||
{
|
||||
struct lock_info *linfo = container_of(work, struct lock_info, inv_iput_work);
|
||||
struct scoutfs_inode_info *si;
|
||||
struct scoutfs_inode_info *tmp;
|
||||
struct llist_node *inodes;
|
||||
bool more;
|
||||
|
||||
inodes = llist_del_all(&linfo->inv_iput_llist);
|
||||
|
||||
llist_for_each_entry_safe(si, tmp, inodes, inv_iput_llnode) {
|
||||
do {
|
||||
more = atomic_dec_return(&si->inv_iput_count) > 0;
|
||||
iput(&si->inode);
|
||||
} while (more);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* invalidate cached data associated with an inode whose lock is going
|
||||
* away.
|
||||
*
|
||||
* We try to drop cached dentries and inodes covered by the lock if they
|
||||
* aren't referenced. This removes them from the mount's open map and
|
||||
* allows deletions to be performed by unlink without having to wait for
|
||||
* remote cached inodes to be dropped.
|
||||
*
|
||||
* If the cached inode was already deferring final inode deletion then
|
||||
* we can't perform that inline in invalidation. The locking alone
|
||||
* deadlock, and it might also take multiple transactions to fully
|
||||
* delete an inode with significant metadata. We only perform the iput
|
||||
* inline if we know that possible eviction can't perform the final
|
||||
* deletion, otherwise we kick it off to async work.
|
||||
*/
|
||||
static void invalidate_inode(struct super_block *sb, u64 ino)
|
||||
{
|
||||
DECLARE_LOCK_INFO(sb, linfo);
|
||||
struct scoutfs_inode_info *si;
|
||||
struct inode *inode;
|
||||
|
||||
@@ -141,7 +186,20 @@ static void invalidate_inode(struct super_block *sb, u64 ino)
|
||||
scoutfs_data_wait_changed(inode);
|
||||
}
|
||||
|
||||
iput(inode);
|
||||
/* can't touch during unmount, dcache destroys w/o locks */
|
||||
if (!linfo->unmounting)
|
||||
d_prune_aliases(inode);
|
||||
|
||||
si->drop_invalidated = true;
|
||||
if (scoutfs_lock_is_covered(sb, &si->ino_lock_cov) && inode->i_nlink > 0) {
|
||||
iput(inode);
|
||||
} else {
|
||||
/* defer iput to work context so we don't evict inodes from invalidation */
|
||||
if (atomic_inc_return(&si->inv_iput_count) == 1)
|
||||
llist_add(&si->inv_iput_llnode, &linfo->inv_iput_llist);
|
||||
smp_wmb(); /* count and list visible before work executes */
|
||||
queue_work(linfo->workq, &linfo->inv_iput_work);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1536,11 +1594,21 @@ static void lock_tseq_show(struct seq_file *m, struct scoutfs_tseq_entry *ent)
|
||||
}
|
||||
|
||||
/*
|
||||
* The caller is going to be calling _destroy soon and, critically, is
|
||||
* about to shutdown networking before calling us so that we don't get
|
||||
* any callbacks while we're destroying. We have to ensure that we
|
||||
* won't call networking after this returns.
|
||||
*
|
||||
* shrink_dcache_for_umount() tears down dentries with no locking. We
|
||||
* need to make sure that our invalidation won't touch dentries before
|
||||
* we return and the caller calls the generic vfs unmount path.
|
||||
*/
|
||||
void scoutfs_lock_unmount_begin(struct super_block *sb)
|
||||
{
|
||||
DECLARE_LOCK_INFO(sb, linfo);
|
||||
|
||||
if (linfo) {
|
||||
linfo->unmounting = true;
|
||||
flush_delayed_work(&linfo->inv_dwork);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Internal fs threads can be using locking, and locking can have async
|
||||
* work pending. We use ->shutdown to force callers to return
|
||||
* -ESHUTDOWN and to prevent the future queueing of work that could call
|
||||
@@ -1682,6 +1750,8 @@ int scoutfs_lock_setup(struct super_block *sb)
|
||||
INIT_WORK(&linfo->shrink_work, lock_shrink_worker);
|
||||
INIT_LIST_HEAD(&linfo->shrink_list);
|
||||
atomic64_set(&linfo->next_refresh_gen, 0);
|
||||
INIT_WORK(&linfo->inv_iput_work, lock_inv_iput_worker);
|
||||
init_llist_head(&linfo->inv_iput_llist);
|
||||
scoutfs_tseq_tree_init(&linfo->tseq_tree, lock_tseq_show);
|
||||
|
||||
sbi->lock_info = linfo;
|
||||
|
||||
@@ -104,6 +104,7 @@ bool scoutfs_lock_protected(struct scoutfs_lock *lock, struct scoutfs_key *key,
|
||||
void scoutfs_free_unused_locks(struct super_block *sb, unsigned long nr);
|
||||
|
||||
int scoutfs_lock_setup(struct super_block *sb);
|
||||
void scoutfs_lock_unmount_begin(struct super_block *sb);
|
||||
void scoutfs_lock_shutdown(struct super_block *sb);
|
||||
void scoutfs_lock_destroy(struct super_block *sb);
|
||||
|
||||
|
||||
@@ -690,15 +690,16 @@ TRACE_EVENT(scoutfs_evict_inode,
|
||||
|
||||
TRACE_EVENT(scoutfs_drop_inode,
|
||||
TP_PROTO(struct super_block *sb, __u64 ino, unsigned int nlink,
|
||||
unsigned int unhashed),
|
||||
unsigned int unhashed, bool drop_invalidated),
|
||||
|
||||
TP_ARGS(sb, ino, nlink, unhashed),
|
||||
TP_ARGS(sb, ino, nlink, unhashed, drop_invalidated),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
SCSB_TRACE_FIELDS
|
||||
__field(__u64, ino)
|
||||
__field(unsigned int, nlink)
|
||||
__field(unsigned int, unhashed)
|
||||
__field(unsigned int, drop_invalidated)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
@@ -706,10 +707,12 @@ TRACE_EVENT(scoutfs_drop_inode,
|
||||
__entry->ino = ino;
|
||||
__entry->nlink = nlink;
|
||||
__entry->unhashed = unhashed;
|
||||
__entry->drop_invalidated = !!drop_invalidated;
|
||||
),
|
||||
|
||||
TP_printk(SCSBF" ino %llu nlink %u unhashed %d", SCSB_TRACE_ARGS,
|
||||
__entry->ino, __entry->nlink, __entry->unhashed)
|
||||
TP_printk(SCSBF" ino %llu nlink %u unhashed %d drop_invalidated %u", SCSB_TRACE_ARGS,
|
||||
__entry->ino, __entry->nlink, __entry->unhashed,
|
||||
__entry->drop_invalidated)
|
||||
);
|
||||
|
||||
TRACE_EVENT(scoutfs_inode_walk_writeback,
|
||||
|
||||
@@ -649,6 +649,9 @@ static void scoutfs_kill_sb(struct super_block *sb)
|
||||
{
|
||||
trace_scoutfs_kill_sb(sb);
|
||||
|
||||
if (SCOUTFS_HAS_SBI(sb))
|
||||
scoutfs_lock_unmount_begin(sb);
|
||||
|
||||
kill_block_super(sb);
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user