From ebbb2e842e40c3612f9a2a8bf3d745d7119cd335 Mon Sep 17 00:00:00 2001 From: Mark Fasheh Date: Tue, 18 Oct 2016 11:59:18 -0700 Subject: [PATCH] scoutfs: implement inode orphaning This is pretty straight forward - we define a new item type, SCOUTFS_ORPHAN_KEY. We don't need to store any value with this, the inode and type fields are enough for us to find what inode has been orphaned. Otherwise this works as one would expect. Unlink sets the item, and ->evict_inode removes it. On mount, we scan for orphan items and remove any corresponding inodes. Signed-off-by: Mark Fasheh Signed-off-by: Zach Brown --- kmod/src/dir.c | 30 +++++-- kmod/src/format.h | 1 + kmod/src/inode.c | 170 +++++++++++++++++++++++++++++++++------ kmod/src/inode.h | 3 + kmod/src/scoutfs_trace.h | 56 +++++++++++++ kmod/src/super.c | 2 + 6 files changed, 229 insertions(+), 33 deletions(-) diff --git a/kmod/src/dir.c b/kmod/src/dir.c index e1c90f31..d45fd05e 100644 --- a/kmod/src/dir.c +++ b/kmod/src/dir.c @@ -568,19 +568,33 @@ static int scoutfs_unlink(struct inode *dir, struct dentry *dentry) return ret; set_lref_key(&lref_key, scoutfs_ino(inode), di->lref_counter); - - ret = scoutfs_dirty_inode_item(dir) ?: - scoutfs_dirty_inode_item(inode) ?: - scoutfs_btree_dirty(sb, meta, &lref_key); - if (ret) - goto out; - scoutfs_set_key(&key, scoutfs_ino(dir), SCOUTFS_DIRENT_KEY, di->hash); - ret = scoutfs_btree_delete(sb, meta, &key); + /* + * Dirty most of the metadata up front so that later btree + * operations can't fail. + */ + ret = scoutfs_dirty_inode_item(dir) ?: + scoutfs_dirty_inode_item(inode) ?: + scoutfs_btree_dirty(sb, meta, &lref_key) ?: + scoutfs_btree_dirty(sb, meta, &key); if (ret) goto out; + if ((inode->i_nlink == 1) || + (S_ISDIR(inode->i_mode) && inode->i_nlink == 2)) { + /* + * Insert the orphan item before we modify any inode + * metadata so we can gracefully exit should it + * fail. + */ + ret = scoutfs_orphan_inode(inode); + if (ret) + goto out; + } + + /* XXX: In thoery this can't fail but we should trap errors anyway */ + scoutfs_btree_delete(sb, meta, &key); scoutfs_btree_delete(sb, meta, &lref_key); dir->i_ctime = ts; diff --git a/kmod/src/format.h b/kmod/src/format.h index fd0cbd62..7a02c26a 100644 --- a/kmod/src/format.h +++ b/kmod/src/format.h @@ -108,6 +108,7 @@ struct scoutfs_key { #define SCOUTFS_LINK_BACKREF_KEY 6 #define SCOUTFS_SYMLINK_KEY 7 #define SCOUTFS_BMAP_KEY 8 +#define SCOUTFS_ORPHAN_KEY 9 #define SCOUTFS_MAX_ITEM_LEN 512 diff --git a/kmod/src/inode.c b/kmod/src/inode.c index e60d92c6..f8e6b6f8 100644 --- a/kmod/src/inode.c +++ b/kmod/src/inode.c @@ -27,6 +27,8 @@ #include "scoutfs_trace.h" #include "xattr.h" #include "trans.h" +#include "btree.h" +#include "msg.h" /* * XXX @@ -359,34 +361,29 @@ struct inode *scoutfs_new_inode(struct super_block *sb, struct inode *dir, return inode; } -/* - * Remove all the items associated with a given inode. - */ -static void drop_inode_items(struct super_block *sb, u64 ino) +static int remove_orphan_item(struct super_block *sb, u64 ino) { - struct scoutfs_btree_root *meta = SCOUTFS_META(sb); - struct scoutfs_btree_val val; - struct scoutfs_inode sinode; struct scoutfs_key key; - bool release = false; - umode_t mode; + struct scoutfs_btree_root *meta = SCOUTFS_META(sb); int ret; - /* sample the inode mode, XXX don't need to copy whole thing here */ - scoutfs_set_key(&key, ino, SCOUTFS_INODE_KEY, 0); - scoutfs_btree_init_val(&val, &sinode, sizeof(sinode)); + scoutfs_set_key(&key, ino, SCOUTFS_ORPHAN_KEY, 0); - ret = scoutfs_btree_lookup(sb, meta, &key, &val); - if (ret < 0) - goto out; + ret = scoutfs_btree_delete(sb, meta, &key); + if (ret == -ENOENT) + ret = 0; - /* XXX corruption */ - if (ret != sizeof(sinode)) { - ret = -EIO; - goto out; - } + return ret; +} - mode = le32_to_cpu(sinode.mode); +static int __delete_inode(struct super_block *sb, struct scoutfs_key *key, + u64 ino, umode_t mode) +{ + int ret; + bool release = false; + struct scoutfs_btree_root *meta = SCOUTFS_META(sb); + + trace_delete_inode(sb, ino, mode); ret = scoutfs_hold_trans(sb); if (ret) @@ -404,12 +401,48 @@ static void drop_inode_items(struct super_block *sb, u64 ino) if (ret) goto out; - ret = scoutfs_btree_delete(sb, meta, &key); + ret = scoutfs_btree_delete(sb, meta, key); + if (ret) + goto out; + + ret = remove_orphan_item(sb, ino); +out: + if (release) + scoutfs_release_trans(sb); + return ret; +} + +/* + * Remove all the items associated with a given inode. + */ +static void delete_inode(struct super_block *sb, u64 ino) +{ + struct scoutfs_btree_root *meta = SCOUTFS_META(sb); + struct scoutfs_btree_val val; + struct scoutfs_inode sinode; + struct scoutfs_key key; + umode_t mode; + int ret; + + /* sample the inode mode, XXX don't need to copy whole thing here */ + scoutfs_set_key(&key, ino, SCOUTFS_INODE_KEY, 0); + scoutfs_btree_init_val(&val, &sinode, sizeof(sinode)); + + ret = scoutfs_btree_lookup(sb, meta, &key, &val); + if (ret < 0) + goto out; + + /* XXX corruption */ + if (ret != sizeof(sinode)) { + ret = -EIO; + goto out; + } + mode = le32_to_cpu(sinode.mode); + + ret = __delete_inode(sb, &key, ino, mode); out: if (ret) trace_printk("drop items failed ret %d ino %llu\n", ret, ino); - if (release) - scoutfs_release_trans(sb); } /* @@ -429,7 +462,7 @@ void scoutfs_evict_inode(struct inode *inode) truncate_inode_pages_final(&inode->i_data); if (inode->i_nlink == 0) - drop_inode_items(inode->i_sb, scoutfs_ino(inode)); + delete_inode(inode->i_sb, scoutfs_ino(inode)); clear: clear_inode(inode); } @@ -443,6 +476,93 @@ int scoutfs_drop_inode(struct inode *inode) return ret; } +static int process_orphaned_inode(struct super_block *sb, u64 ino) +{ + int ret; + struct scoutfs_btree_root *meta = SCOUTFS_META(sb); + struct scoutfs_btree_val val; + struct scoutfs_inode sinode; + struct scoutfs_key key; + + scoutfs_set_key(&key, ino, SCOUTFS_INODE_KEY, 0); + scoutfs_btree_init_val(&val, &sinode, sizeof(sinode)); + + ret = scoutfs_btree_lookup(sb, meta, &key, &val); + if (ret < 0) { + if (ret == -ENOENT) + ret = 0; + return ret; + } + + /* XXX corruption */ + if (ret != sizeof(sinode)) { + ret = -EIO; + goto out; + } + + if (le32_to_cpu(sinode.nlink) == 0) + __delete_inode(sb, &key, ino, le32_to_cpu(sinode.mode)); + else + scoutfs_warn(sb, "Dangling orphan item for inode %llu.", ino); + +out: + return ret; +} + +/* + * Scan the metadata tree for orphan items and process each one. + * + * Runtime of this will be bounded by the number of orphans, which could + * theoretically be very large. If that becomes a problem we might want to push + * this work off to a thread. + */ +int scoutfs_scan_orphans(struct super_block *sb) +{ + int ret, err = 0; + struct scoutfs_key first, last, found; + struct scoutfs_btree_root *meta = SCOUTFS_META(sb); + + trace_scoutfs_scan_orphans(sb); + + scoutfs_set_key(&first, 0, SCOUTFS_ORPHAN_KEY, 0); + scoutfs_set_key(&last, ~0ULL, SCOUTFS_ORPHAN_KEY, 0); + + while (1) { + ret = scoutfs_btree_next(sb, meta, &first, &last, &found, NULL); + if (ret == -ENOENT) /* No more orphan items */ + break; + if (ret < 0) + goto out; + + ret = process_orphaned_inode(sb, le64_to_cpu(found.inode)); + if (ret && ret != -ENOENT && !err) + err = ret; + + first = found; + scoutfs_inc_key(&first); + } + + ret = 0; +out: + return err ? err : ret; +} + +int scoutfs_orphan_inode(struct inode *inode) +{ + int ret; + struct super_block *sb = inode->i_sb; + struct scoutfs_key key; + struct scoutfs_btree_root *meta = SCOUTFS_META(sb); + + trace_scoutfs_orphan_inode(sb, inode); + + scoutfs_set_key(&key, scoutfs_ino(inode), SCOUTFS_ORPHAN_KEY, 0); + + ret = scoutfs_btree_insert(sb, meta, &key, NULL); + + return ret; +} + void scoutfs_inode_exit(void) { if (scoutfs_inode_cachep) { diff --git a/kmod/src/inode.h b/kmod/src/inode.h index e02acf27..f303ba97 100644 --- a/kmod/src/inode.h +++ b/kmod/src/inode.h @@ -25,6 +25,7 @@ struct inode *scoutfs_alloc_inode(struct super_block *sb); void scoutfs_destroy_inode(struct inode *inode); int scoutfs_drop_inode(struct inode *inode); void scoutfs_evict_inode(struct inode *inode); +int scoutfs_orphan_inode(struct inode *inode); struct inode *scoutfs_iget(struct super_block *sb, u64 ino); int scoutfs_dirty_inode_item(struct inode *inode); @@ -32,6 +33,8 @@ void scoutfs_update_inode_item(struct inode *inode); struct inode *scoutfs_new_inode(struct super_block *sb, struct inode *dir, umode_t mode, dev_t rdev); +int scoutfs_scan_orphans(struct super_block *sb); + u64 scoutfs_last_ino(struct super_block *sb); void scoutfs_inode_exit(void); diff --git a/kmod/src/scoutfs_trace.h b/kmod/src/scoutfs_trace.h index 81cef012..1157c26a 100644 --- a/kmod/src/scoutfs_trace.h +++ b/kmod/src/scoutfs_trace.h @@ -125,6 +125,62 @@ TRACE_EVENT(scoutfs_update_inode, __entry->ino, __entry->size) ); +TRACE_EVENT(scoutfs_orphan_inode, + TP_PROTO(struct super_block *sb, struct inode *inode), + + TP_ARGS(sb, inode), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(__u64, ino) + ), + + TP_fast_assign( + __entry->dev = sb->s_dev; + __entry->ino = scoutfs_ino(inode); + ), + + TP_printk("dev %d,%d ino %llu", MAJOR(__entry->dev), + MINOR(__entry->dev), __entry->ino) +); + +TRACE_EVENT(delete_inode, + TP_PROTO(struct super_block *sb, u64 ino, umode_t mode), + + TP_ARGS(sb, ino, mode), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(__u64, ino) + __field(umode_t, mode) + ), + + TP_fast_assign( + __entry->dev = sb->s_dev; + __entry->ino = ino; + __entry->mode = mode; + ), + + TP_printk("dev %d,%d ino %llu, mode 0x%x", MAJOR(__entry->dev), + MINOR(__entry->dev), __entry->ino, __entry->mode) +); + +TRACE_EVENT(scoutfs_scan_orphans, + TP_PROTO(struct super_block *sb), + + TP_ARGS(sb), + + TP_STRUCT__entry( + __field(dev_t, dev) + ), + + TP_fast_assign( + __entry->dev = sb->s_dev; + ), + + TP_printk("dev %d,%d", MAJOR(__entry->dev), MINOR(__entry->dev)) +); + TRACE_EVENT(scoutfs_buddy_alloc, TP_PROTO(u64 blkno, int order, int region, int ret), diff --git a/kmod/src/super.c b/kmod/src/super.c index 6c4a4d51..f7f0c81f 100644 --- a/kmod/src/super.c +++ b/kmod/src/super.c @@ -233,6 +233,8 @@ static int scoutfs_fill_super(struct super_block *sb, void *data, int silent) if (!sb->s_root) return -ENOMEM; + scoutfs_scan_orphans(sb); + return 0; }