diff --git a/kmod/src/inode.c b/kmod/src/inode.c index 15e4014f..cbbf6d70 100644 --- a/kmod/src/inode.c +++ b/kmod/src/inode.c @@ -68,6 +68,9 @@ struct inode_sb_info { /* serialize multiple inode ->evict trying to delete same ino's items */ spinlock_t deleting_items_lock; struct list_head deleting_items_list; + + struct work_struct iput_work; + struct llist_head iput_llist; }; #define DECLARE_INODE_SB_INFO(sb, name) \ @@ -94,7 +97,7 @@ static void scoutfs_inode_ctor(void *obj) init_rwsem(&si->xattr_rwsem); RB_CLEAR_NODE(&si->writeback_node); scoutfs_lock_init_coverage(&si->ino_lock_cov); - atomic_set(&si->inv_iput_count, 0); + atomic_set(&si->iput_count, 0); inode_init_once(&si->inode); } @@ -1699,6 +1702,49 @@ int scoutfs_drop_inode(struct inode *inode) generic_drop_inode(inode); } +static void iput_worker(struct work_struct *work) +{ + struct inode_sb_info *inf = container_of(work, struct inode_sb_info, iput_work); + struct scoutfs_inode_info *si; + struct scoutfs_inode_info *tmp; + struct llist_node *inodes; + bool more; + + inodes = llist_del_all(&inf->iput_llist); + + llist_for_each_entry_safe(si, tmp, inodes, iput_llnode) { + do { + more = atomic_dec_return(&si->iput_count) > 0; + iput(&si->inode); + } while (more); + } +} + +/* + * Final iput can get into evict and perform final inode deletion which + * can delete a lot of items spanning multiple cluster locks and + * transactions. It should be understood as a heavy high level + * operation, more like file writing and less like dropping a refcount. + * + * Unfortunately we also have incentives to use igrab/iput from internal + * contexts that have no business doing that work, like lock + * invalidation or dirty inode writeback during transaction commit. + * + * In those cases we can kick iput off to background work context. + * Nothing stops multiple puts of an inode before the work runs so we + * can track multiple puts in flight. + */ +void scoutfs_inode_queue_iput(struct inode *inode) +{ + DECLARE_INODE_SB_INFO(inode->i_sb, inf); + struct scoutfs_inode_info *si = SCOUTFS_I(inode); + + if (atomic_inc_return(&si->iput_count) == 1) + llist_add(&si->iput_llnode, &inf->iput_llist); + smp_wmb(); /* count and list visible before work executes */ + schedule_work(&inf->iput_work); +} + /* * All mounts are performing this work concurrently. We introduce * significant jitter between them to try and keep them from all @@ -1951,6 +1997,8 @@ int scoutfs_inode_setup(struct super_block *sb) INIT_DELAYED_WORK(&inf->orphan_scan_dwork, inode_orphan_scan_worker); spin_lock_init(&inf->deleting_items_lock); INIT_LIST_HEAD(&inf->deleting_items_list); + INIT_WORK(&inf->iput_work, iput_worker); + init_llist_head(&inf->iput_llist); sbi->inode_sb_info = inf; diff --git a/kmod/src/inode.h b/kmod/src/inode.h index 7cb61b57..050cd097 100644 --- a/kmod/src/inode.h +++ b/kmod/src/inode.h @@ -55,8 +55,8 @@ struct scoutfs_inode_info { /* drop if i_count hits 0, allows drop while invalidate holds coverage */ bool drop_invalidated; - struct llist_node inv_iput_llnode; - atomic_t inv_iput_count; + struct llist_node iput_llnode; + atomic_t iput_count; struct inode inode; }; @@ -75,6 +75,7 @@ struct inode *scoutfs_alloc_inode(struct super_block *sb); void scoutfs_destroy_inode(struct inode *inode); int scoutfs_drop_inode(struct inode *inode); void scoutfs_evict_inode(struct inode *inode); +void scoutfs_inode_queue_iput(struct inode *inode); struct inode *scoutfs_iget(struct super_block *sb, u64 ino); struct inode *scoutfs_ilookup(struct super_block *sb, u64 ino); diff --git a/kmod/src/lock.c b/kmod/src/lock.c index 2278ee71..b7dfee7d 100644 --- a/kmod/src/lock.c +++ b/kmod/src/lock.c @@ -89,8 +89,6 @@ struct lock_info { struct work_struct shrink_work; struct list_head shrink_list; atomic64_t next_refresh_gen; - struct work_struct inv_iput_work; - struct llist_head inv_iput_llist; struct dentry *tseq_dentry; struct scoutfs_tseq_tree tseq_tree; @@ -126,34 +124,6 @@ static bool lock_modes_match(int granted, int requested) requested == SCOUTFS_LOCK_READ); } -/* - * Final iput can get into evict and perform final inode deletion which - * can delete a lot of items under locks and transactions. We really - * don't want to be doing all that in an iput during invalidation. When - * invalidation sees that iput might perform final deletion it puts them - * on a list and queues this work. - * - * Nothing stops multiple puts for multiple invalidations of an inode - * before the work runs so we can track multiple puts in flight. - */ -static void lock_inv_iput_worker(struct work_struct *work) -{ - struct lock_info *linfo = container_of(work, struct lock_info, inv_iput_work); - struct scoutfs_inode_info *si; - struct scoutfs_inode_info *tmp; - struct llist_node *inodes; - bool more; - - inodes = llist_del_all(&linfo->inv_iput_llist); - - llist_for_each_entry_safe(si, tmp, inodes, inv_iput_llnode) { - do { - more = atomic_dec_return(&si->inv_iput_count) > 0; - iput(&si->inode); - } while (more); - } -} - /* * Invalidate cached data associated with an inode whose lock is going * away. @@ -194,11 +164,8 @@ static void invalidate_inode(struct super_block *sb, u64 ino) if (scoutfs_lock_is_covered(sb, &si->ino_lock_cov) && inode->i_nlink > 0) { iput(inode); } else { - /* defer iput to work context so we don't evict inodes from invalidation */ - if (atomic_inc_return(&si->inv_iput_count) == 1) - llist_add(&si->inv_iput_llnode, &linfo->inv_iput_llist); - smp_wmb(); /* count and list visible before work executes */ - queue_work(linfo->workq, &linfo->inv_iput_work); + /* defer iput to work context so we don't evict inodes from invalidation */ + scoutfs_inode_queue_iput(inode); } } } @@ -1789,8 +1756,6 @@ int scoutfs_lock_setup(struct super_block *sb) INIT_WORK(&linfo->shrink_work, lock_shrink_worker); INIT_LIST_HEAD(&linfo->shrink_list); atomic64_set(&linfo->next_refresh_gen, 0); - INIT_WORK(&linfo->inv_iput_work, lock_inv_iput_worker); - init_llist_head(&linfo->inv_iput_llist); scoutfs_tseq_tree_init(&linfo->tseq_tree, lock_tseq_show); sbi->lock_info = linfo;