mirror of
https://github.com/versity/scoutfs.git
synced 2026-01-04 03:14:02 +00:00
Fix item cache page memory corruption
The item cache page life cycle is tricky. There are no proper page reference counts, everthing is done by nesting the page rwlock inside item_cache_info rwlock. The intent is that you can only reference pages while you hold the rwlocks appropriately. The per-cpu page references are outside that locking regime so they add a reference count. Now there are reference counts for the main cache index reference and for each per-cpu reference. The end result of all this is that you can only reference pages outside of locks if you're protected by references. Lock invalidation messed this up by trying to add its right split page to the lru after it was unlocked. Its page reference wasn't protected at this point. Shrinking could be freeing that page, and so it could be putting a freed page's memory back on the lru. Shrinking had a little bug that it was using list_move to move an initialized lru_head list_head. It turns out to be harmless (list_del will just follow pointers to itself and set itself as next and prev all over again), but boy does it catch one's eye. Let's remove all confusion and drop the reference while holding the cinf->rwlock instead of trying to optimize freeing outside locks. Finally, the big one: inserting a read item after compacting the page to make room was inserting into stale parent pointers into the old pre-compacted page, rather than the new page that was swapped in by compaction. This left references to a freed page in the page rbtree and hilarity ensued. Signed-off-by: Zach Brown <zab@versity.com>
This commit is contained in:
@@ -1339,7 +1339,10 @@ static int read_page_item(struct super_block *sb, struct scoutfs_key *key,
|
||||
/* split needs multiple items, sparse may not have enough */
|
||||
if (!left)
|
||||
return -ENOMEM;
|
||||
|
||||
compact_page_items(sb, pg, left);
|
||||
found = item_rbtree_walk(&pg->item_root, key, NULL, &par,
|
||||
&pnode);
|
||||
}
|
||||
|
||||
item = alloc_item(pg, key, liv, val, val_len);
|
||||
@@ -2364,9 +2367,9 @@ retry:
|
||||
/* inv was entirely inside page, done after bisect */
|
||||
write_trylock_will_succeed(&right->rwlock);
|
||||
rbtree_insert(&right->node, par, pnode, &cinf->pg_root);
|
||||
lru_accessed(sb, cinf, right);
|
||||
write_unlock(&right->rwlock);
|
||||
write_unlock(&pg->rwlock);
|
||||
lru_accessed(sb, cinf, right);
|
||||
right = NULL;
|
||||
break;
|
||||
}
|
||||
@@ -2396,7 +2399,6 @@ static int item_lru_shrink(struct shrinker *shrink,
|
||||
struct active_reader *active;
|
||||
struct cached_page *tmp;
|
||||
struct cached_page *pg;
|
||||
LIST_HEAD(list);
|
||||
int nr;
|
||||
|
||||
if (sc->nr_to_scan == 0)
|
||||
@@ -2433,21 +2435,17 @@ static int item_lru_shrink(struct shrinker *shrink,
|
||||
|
||||
__lru_remove(sb, cinf, pg);
|
||||
rbtree_erase(&pg->node, &cinf->pg_root);
|
||||
list_move_tail(&pg->lru_head, &list);
|
||||
invalidate_pcpu_page(pg);
|
||||
write_unlock(&pg->rwlock);
|
||||
|
||||
put_pg(sb, pg);
|
||||
|
||||
if (--nr == 0)
|
||||
break;
|
||||
}
|
||||
|
||||
write_unlock(&cinf->rwlock);
|
||||
spin_unlock(&cinf->lru_lock);
|
||||
|
||||
list_for_each_entry_safe(pg, tmp, &list, lru_head) {
|
||||
list_del_init(&pg->lru_head);
|
||||
put_pg(sb, pg);
|
||||
}
|
||||
out:
|
||||
return min_t(unsigned long, cinf->lru_pages, INT_MAX);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user