diff --git a/kmod/src/dir.c b/kmod/src/dir.c index e1c90f31..d45fd05e 100644 --- a/kmod/src/dir.c +++ b/kmod/src/dir.c @@ -568,19 +568,33 @@ static int scoutfs_unlink(struct inode *dir, struct dentry *dentry) return ret; set_lref_key(&lref_key, scoutfs_ino(inode), di->lref_counter); - - ret = scoutfs_dirty_inode_item(dir) ?: - scoutfs_dirty_inode_item(inode) ?: - scoutfs_btree_dirty(sb, meta, &lref_key); - if (ret) - goto out; - scoutfs_set_key(&key, scoutfs_ino(dir), SCOUTFS_DIRENT_KEY, di->hash); - ret = scoutfs_btree_delete(sb, meta, &key); + /* + * Dirty most of the metadata up front so that later btree + * operations can't fail. + */ + ret = scoutfs_dirty_inode_item(dir) ?: + scoutfs_dirty_inode_item(inode) ?: + scoutfs_btree_dirty(sb, meta, &lref_key) ?: + scoutfs_btree_dirty(sb, meta, &key); if (ret) goto out; + if ((inode->i_nlink == 1) || + (S_ISDIR(inode->i_mode) && inode->i_nlink == 2)) { + /* + * Insert the orphan item before we modify any inode + * metadata so we can gracefully exit should it + * fail. + */ + ret = scoutfs_orphan_inode(inode); + if (ret) + goto out; + } + + /* XXX: In thoery this can't fail but we should trap errors anyway */ + scoutfs_btree_delete(sb, meta, &key); scoutfs_btree_delete(sb, meta, &lref_key); dir->i_ctime = ts; diff --git a/kmod/src/format.h b/kmod/src/format.h index fd0cbd62..7a02c26a 100644 --- a/kmod/src/format.h +++ b/kmod/src/format.h @@ -108,6 +108,7 @@ struct scoutfs_key { #define SCOUTFS_LINK_BACKREF_KEY 6 #define SCOUTFS_SYMLINK_KEY 7 #define SCOUTFS_BMAP_KEY 8 +#define SCOUTFS_ORPHAN_KEY 9 #define SCOUTFS_MAX_ITEM_LEN 512 diff --git a/kmod/src/inode.c b/kmod/src/inode.c index e60d92c6..f8e6b6f8 100644 --- a/kmod/src/inode.c +++ b/kmod/src/inode.c @@ -27,6 +27,8 @@ #include "scoutfs_trace.h" #include "xattr.h" #include "trans.h" +#include "btree.h" +#include "msg.h" /* * XXX @@ -359,34 +361,29 @@ struct inode *scoutfs_new_inode(struct super_block *sb, struct inode *dir, return inode; } -/* - * Remove all the items associated with a given inode. - */ -static void drop_inode_items(struct super_block *sb, u64 ino) +static int remove_orphan_item(struct super_block *sb, u64 ino) { - struct scoutfs_btree_root *meta = SCOUTFS_META(sb); - struct scoutfs_btree_val val; - struct scoutfs_inode sinode; struct scoutfs_key key; - bool release = false; - umode_t mode; + struct scoutfs_btree_root *meta = SCOUTFS_META(sb); int ret; - /* sample the inode mode, XXX don't need to copy whole thing here */ - scoutfs_set_key(&key, ino, SCOUTFS_INODE_KEY, 0); - scoutfs_btree_init_val(&val, &sinode, sizeof(sinode)); + scoutfs_set_key(&key, ino, SCOUTFS_ORPHAN_KEY, 0); - ret = scoutfs_btree_lookup(sb, meta, &key, &val); - if (ret < 0) - goto out; + ret = scoutfs_btree_delete(sb, meta, &key); + if (ret == -ENOENT) + ret = 0; - /* XXX corruption */ - if (ret != sizeof(sinode)) { - ret = -EIO; - goto out; - } + return ret; +} - mode = le32_to_cpu(sinode.mode); +static int __delete_inode(struct super_block *sb, struct scoutfs_key *key, + u64 ino, umode_t mode) +{ + int ret; + bool release = false; + struct scoutfs_btree_root *meta = SCOUTFS_META(sb); + + trace_delete_inode(sb, ino, mode); ret = scoutfs_hold_trans(sb); if (ret) @@ -404,12 +401,48 @@ static void drop_inode_items(struct super_block *sb, u64 ino) if (ret) goto out; - ret = scoutfs_btree_delete(sb, meta, &key); + ret = scoutfs_btree_delete(sb, meta, key); + if (ret) + goto out; + + ret = remove_orphan_item(sb, ino); +out: + if (release) + scoutfs_release_trans(sb); + return ret; +} + +/* + * Remove all the items associated with a given inode. + */ +static void delete_inode(struct super_block *sb, u64 ino) +{ + struct scoutfs_btree_root *meta = SCOUTFS_META(sb); + struct scoutfs_btree_val val; + struct scoutfs_inode sinode; + struct scoutfs_key key; + umode_t mode; + int ret; + + /* sample the inode mode, XXX don't need to copy whole thing here */ + scoutfs_set_key(&key, ino, SCOUTFS_INODE_KEY, 0); + scoutfs_btree_init_val(&val, &sinode, sizeof(sinode)); + + ret = scoutfs_btree_lookup(sb, meta, &key, &val); + if (ret < 0) + goto out; + + /* XXX corruption */ + if (ret != sizeof(sinode)) { + ret = -EIO; + goto out; + } + mode = le32_to_cpu(sinode.mode); + + ret = __delete_inode(sb, &key, ino, mode); out: if (ret) trace_printk("drop items failed ret %d ino %llu\n", ret, ino); - if (release) - scoutfs_release_trans(sb); } /* @@ -429,7 +462,7 @@ void scoutfs_evict_inode(struct inode *inode) truncate_inode_pages_final(&inode->i_data); if (inode->i_nlink == 0) - drop_inode_items(inode->i_sb, scoutfs_ino(inode)); + delete_inode(inode->i_sb, scoutfs_ino(inode)); clear: clear_inode(inode); } @@ -443,6 +476,93 @@ int scoutfs_drop_inode(struct inode *inode) return ret; } +static int process_orphaned_inode(struct super_block *sb, u64 ino) +{ + int ret; + struct scoutfs_btree_root *meta = SCOUTFS_META(sb); + struct scoutfs_btree_val val; + struct scoutfs_inode sinode; + struct scoutfs_key key; + + scoutfs_set_key(&key, ino, SCOUTFS_INODE_KEY, 0); + scoutfs_btree_init_val(&val, &sinode, sizeof(sinode)); + + ret = scoutfs_btree_lookup(sb, meta, &key, &val); + if (ret < 0) { + if (ret == -ENOENT) + ret = 0; + return ret; + } + + /* XXX corruption */ + if (ret != sizeof(sinode)) { + ret = -EIO; + goto out; + } + + if (le32_to_cpu(sinode.nlink) == 0) + __delete_inode(sb, &key, ino, le32_to_cpu(sinode.mode)); + else + scoutfs_warn(sb, "Dangling orphan item for inode %llu.", ino); + +out: + return ret; +} + +/* + * Scan the metadata tree for orphan items and process each one. + * + * Runtime of this will be bounded by the number of orphans, which could + * theoretically be very large. If that becomes a problem we might want to push + * this work off to a thread. + */ +int scoutfs_scan_orphans(struct super_block *sb) +{ + int ret, err = 0; + struct scoutfs_key first, last, found; + struct scoutfs_btree_root *meta = SCOUTFS_META(sb); + + trace_scoutfs_scan_orphans(sb); + + scoutfs_set_key(&first, 0, SCOUTFS_ORPHAN_KEY, 0); + scoutfs_set_key(&last, ~0ULL, SCOUTFS_ORPHAN_KEY, 0); + + while (1) { + ret = scoutfs_btree_next(sb, meta, &first, &last, &found, NULL); + if (ret == -ENOENT) /* No more orphan items */ + break; + if (ret < 0) + goto out; + + ret = process_orphaned_inode(sb, le64_to_cpu(found.inode)); + if (ret && ret != -ENOENT && !err) + err = ret; + + first = found; + scoutfs_inc_key(&first); + } + + ret = 0; +out: + return err ? err : ret; +} + +int scoutfs_orphan_inode(struct inode *inode) +{ + int ret; + struct super_block *sb = inode->i_sb; + struct scoutfs_key key; + struct scoutfs_btree_root *meta = SCOUTFS_META(sb); + + trace_scoutfs_orphan_inode(sb, inode); + + scoutfs_set_key(&key, scoutfs_ino(inode), SCOUTFS_ORPHAN_KEY, 0); + + ret = scoutfs_btree_insert(sb, meta, &key, NULL); + + return ret; +} + void scoutfs_inode_exit(void) { if (scoutfs_inode_cachep) { diff --git a/kmod/src/inode.h b/kmod/src/inode.h index e02acf27..f303ba97 100644 --- a/kmod/src/inode.h +++ b/kmod/src/inode.h @@ -25,6 +25,7 @@ struct inode *scoutfs_alloc_inode(struct super_block *sb); void scoutfs_destroy_inode(struct inode *inode); int scoutfs_drop_inode(struct inode *inode); void scoutfs_evict_inode(struct inode *inode); +int scoutfs_orphan_inode(struct inode *inode); struct inode *scoutfs_iget(struct super_block *sb, u64 ino); int scoutfs_dirty_inode_item(struct inode *inode); @@ -32,6 +33,8 @@ void scoutfs_update_inode_item(struct inode *inode); struct inode *scoutfs_new_inode(struct super_block *sb, struct inode *dir, umode_t mode, dev_t rdev); +int scoutfs_scan_orphans(struct super_block *sb); + u64 scoutfs_last_ino(struct super_block *sb); void scoutfs_inode_exit(void); diff --git a/kmod/src/scoutfs_trace.h b/kmod/src/scoutfs_trace.h index 81cef012..1157c26a 100644 --- a/kmod/src/scoutfs_trace.h +++ b/kmod/src/scoutfs_trace.h @@ -125,6 +125,62 @@ TRACE_EVENT(scoutfs_update_inode, __entry->ino, __entry->size) ); +TRACE_EVENT(scoutfs_orphan_inode, + TP_PROTO(struct super_block *sb, struct inode *inode), + + TP_ARGS(sb, inode), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(__u64, ino) + ), + + TP_fast_assign( + __entry->dev = sb->s_dev; + __entry->ino = scoutfs_ino(inode); + ), + + TP_printk("dev %d,%d ino %llu", MAJOR(__entry->dev), + MINOR(__entry->dev), __entry->ino) +); + +TRACE_EVENT(delete_inode, + TP_PROTO(struct super_block *sb, u64 ino, umode_t mode), + + TP_ARGS(sb, ino, mode), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(__u64, ino) + __field(umode_t, mode) + ), + + TP_fast_assign( + __entry->dev = sb->s_dev; + __entry->ino = ino; + __entry->mode = mode; + ), + + TP_printk("dev %d,%d ino %llu, mode 0x%x", MAJOR(__entry->dev), + MINOR(__entry->dev), __entry->ino, __entry->mode) +); + +TRACE_EVENT(scoutfs_scan_orphans, + TP_PROTO(struct super_block *sb), + + TP_ARGS(sb), + + TP_STRUCT__entry( + __field(dev_t, dev) + ), + + TP_fast_assign( + __entry->dev = sb->s_dev; + ), + + TP_printk("dev %d,%d", MAJOR(__entry->dev), MINOR(__entry->dev)) +); + TRACE_EVENT(scoutfs_buddy_alloc, TP_PROTO(u64 blkno, int order, int region, int ret), diff --git a/kmod/src/super.c b/kmod/src/super.c index 6c4a4d51..f7f0c81f 100644 --- a/kmod/src/super.c +++ b/kmod/src/super.c @@ -233,6 +233,8 @@ static int scoutfs_fill_super(struct super_block *sb, void *data, int silent) if (!sb->s_root) return -ENOMEM; + scoutfs_scan_orphans(sb); + return 0; }