From 41c29c48ddb1ebd8014025a6db456b2366bb07e7 Mon Sep 17 00:00:00 2001 From: Zach Brown Date: Tue, 22 May 2018 11:14:14 -0700 Subject: [PATCH] scoutfs: add extent corruption cases The extent code was originally written to panic if it hit errors during cleanup that resulted in inconsistent metadata. The more reasonble strategy is to warn about the corruption and act accordingly and leave it to corrective measures to resolve the corruption. In this case we continue returning the error that caused us to try and clean up. Signed-off-by: Zach Brown --- kmod/src/counters.h | 3 +++ kmod/src/data.c | 54 ++++++++++++++++++++++----------------------- kmod/src/extents.c | 52 ++++++++++--------------------------------- kmod/src/extents.h | 25 +++++++++++++++++++++ kmod/src/format.h | 3 +++ kmod/src/server.c | 9 ++++++-- 6 files changed, 76 insertions(+), 70 deletions(-) diff --git a/kmod/src/counters.h b/kmod/src/counters.h index 621d72c0..9d5db004 100644 --- a/kmod/src/counters.h +++ b/kmod/src/counters.h @@ -25,12 +25,15 @@ EXPAND_COUNTER(compact_sticky_written) \ EXPAND_COUNTER(corrupt_btree_block_level) \ EXPAND_COUNTER(corrupt_btree_no_child_ref) \ + EXPAND_COUNTER(corrupt_data_extent_trunc_cleanup) \ + EXPAND_COUNTER(corrupt_data_extent_alloc_cleanup) \ EXPAND_COUNTER(corrupt_dirent_backref_name_len) \ EXPAND_COUNTER(corrupt_dirent_name_len) \ EXPAND_COUNTER(corrupt_dirent_readdir_name_len) \ EXPAND_COUNTER(corrupt_inode_block_counts) \ EXPAND_COUNTER(corrupt_extent_add_cleanup) \ EXPAND_COUNTER(corrupt_extent_rem_cleanup) \ + EXPAND_COUNTER(corrupt_server_extent_cleanup) \ EXPAND_COUNTER(corrupt_symlink_inode_size) \ EXPAND_COUNTER(corrupt_symlink_missing_item) \ EXPAND_COUNTER(corrupt_symlink_not_null_term) \ diff --git a/kmod/src/data.c b/kmod/src/data.c index d3f89b3c..478746a6 100644 --- a/kmod/src/data.c +++ b/kmod/src/data.c @@ -35,6 +35,7 @@ #include "lock.h" #include "file.h" #include "extents.h" +#include "msg.h" /* * scoutfs uses extent items to track file data block mappings and free @@ -236,7 +237,6 @@ static s64 truncate_one_extent(struct super_block *sb, struct inode *inode, s64 offline_delta = 0; s64 online_delta = 0; s64 ret; - int err; scoutfs_extent_init(&next, SCOUTFS_FILE_EXTENT_TYPE, ino, iblock, 1, 0, 0); @@ -302,19 +302,17 @@ static s64 truncate_one_extent(struct super_block *sb, struct inode *inode, ret = 1; out: - if (ret < 0) { - err = 0; - if (add_rem) - err |= scoutfs_extent_add(sb, data_extent_io, &rem, - lock); - if (rem_fr) - err |= scoutfs_extent_remove(sb, data_extent_io, &fr, - sbi->node_id_lock); - BUG_ON(err); /* inconsistency, could save/restore */ + scoutfs_extent_cleanup(ret < 0 && add_rem, scoutfs_extent_add, sb, + data_extent_io, &rem, lock, + SC_DATA_EXTENT_TRUNC_CLEANUP, + corrupt_data_extent_trunc_cleanup, &rem); + scoutfs_extent_cleanup(ret < 0 && rem_fr, scoutfs_extent_remove, sb, + data_extent_io, &fr, sbi->node_id_lock, + SC_DATA_EXTENT_TRUNC_CLEANUP, + corrupt_data_extent_trunc_cleanup, &rem); - } else if (ret > 0) { + if (ret > 0) ret = rem.start + rem.len; - } return ret; } @@ -435,7 +433,6 @@ static int alloc_block(struct super_block *sb, struct inode *inode, u64 iblock, u64 offline; u64 online; u64 len; - int err; int ret; down_write(&datinf->alloc_rwsem); @@ -471,6 +468,10 @@ static int alloc_block(struct super_block *sb, struct inode *inode, u64 iblock, trace_scoutfs_data_alloc_block_next(sb, &ext); + /* initialize the new mapped block extent, referenced by cleanup */ + scoutfs_extent_init(&blk, SCOUTFS_FILE_EXTENT_TYPE, ino, + iblock, 1, ext.start, 0); + /* remove the free extent we're using */ scoutfs_extent_init(&fr, SCOUTFS_FREE_EXTENT_BLKNO_TYPE, sbi->node_id, ext.start, len, 0, 0); @@ -490,8 +491,6 @@ static int alloc_block(struct super_block *sb, struct inode *inode, u64 iblock, } /* add the block that the caller is writing */ - scoutfs_extent_init(&blk, SCOUTFS_FILE_EXTENT_TYPE, ino, - iblock, 1, ext.start, 0); ret = scoutfs_extent_add(sb, data_extent_io, &blk, lock); if (ret) goto out; @@ -510,19 +509,18 @@ static int alloc_block(struct super_block *sb, struct inode *inode, u64 iblock, scoutfs_inode_add_onoff(inode, 1, was_offline ? -1ULL : 0); ret = 0; out: - if (ret) { - err = 0; - if (rem_blk) - err |= scoutfs_extent_remove(sb, data_extent_io, &blk, - lock); - if (add_ofl) - err |= scoutfs_extent_add(sb, data_extent_io, &ofl, - lock); - if (add_fr) - err |= scoutfs_extent_add(sb, data_extent_io, &fr, - sbi->node_id_lock); - BUG_ON(err); /* inconsistency */ - } + scoutfs_extent_cleanup(ret < 0 && rem_blk, scoutfs_extent_remove, sb, + data_extent_io, &blk, lock, + SC_DATA_EXTENT_ALLOC_CLEANUP, + corrupt_data_extent_alloc_cleanup, &blk); + scoutfs_extent_cleanup(ret < 0 && add_ofl, scoutfs_extent_add, sb, + data_extent_io, &ofl, lock, + SC_DATA_EXTENT_ALLOC_CLEANUP, + corrupt_data_extent_alloc_cleanup, &blk); + scoutfs_extent_cleanup(ret < 0 && add_fr, scoutfs_extent_add, sb, + data_extent_io, &fr, sbi->node_id_lock, + SC_DATA_EXTENT_ALLOC_CLEANUP, + corrupt_data_extent_alloc_cleanup, &blk); up_write(&datinf->alloc_rwsem); diff --git a/kmod/src/extents.c b/kmod/src/extents.c index 1a461805..bbb18c71 100644 --- a/kmod/src/extents.c +++ b/kmod/src/extents.c @@ -198,30 +198,6 @@ out: return ret; } -/* - * The process of modifying an extent creates and deletes many - * intermediate extents. If we hit an error we need to undo the - * process. If we then hit an error we can be left with inconsistent - * extent items. - * - * We could fix this for extents that are stored in the item cache - * because it has tools for ensuring that operations can't fail. - * Extents that are stored in the btree currently can't avoid errors. - * We'd have to predirty blocks, allow deletion to fall below thresholds - * if merging saw an error, and preallocate blocks to be used for - * splitting/growth. It'd probably be worth it. - */ -#define extent_cleanup(cond, ext_func, sb, iof, clean, data, which, ctr, ext) \ -do { \ - __typeof__(sb) _sb = (sb); \ - int _ret; \ - \ - if ((cond) && (_ret = ext_func(_sb, iof, clean, data)) < 0) \ - scoutfs_corruption(_sb, which, ctr, \ - "ext "SE_FMT" clean "SE_FMT" ret %d", \ - SE_ARG(ext), SE_ARG(clean), _ret); \ -} while (0) - /* * Add a new extent. It can not overlap with any existing extents. It * may be merged with neighbouring extents. @@ -265,15 +241,12 @@ int scoutfs_extent_add(struct super_block *sb, scoutfs_extent_io_t iof, /* finally insert our new (possibly merged) extent */ ret = extent_insert(sb, iof, &ext, data); out: - extent_cleanup(ret < 0 && ins_right, - extent_insert, sb, iof, &right, data, - SC_EXTENT_ADD_CLEANUP, corrupt_extent_add_cleanup, - add); - extent_cleanup(ret < 0 && ins_left, - extent_insert, sb, iof, &left, data, - SC_EXTENT_ADD_CLEANUP, corrupt_extent_add_cleanup, - add); - + scoutfs_extent_cleanup(ret < 0 && ins_right, extent_insert, sb, iof, + &right, data, SC_EXTENT_ADD_CLEANUP, + corrupt_extent_add_cleanup, add); + scoutfs_extent_cleanup(ret < 0 && ins_left, extent_insert, sb, iof, + &left, data, SC_EXTENT_ADD_CLEANUP, + corrupt_extent_add_cleanup, add); return ret; } @@ -332,12 +305,11 @@ int scoutfs_extent_remove(struct super_block *sb, scoutfs_extent_io_t iof, } out: - extent_cleanup(ret < 0 && del_left, - extent_delete, sb, iof, &left, data, - SC_EXTENT_REM_CLEANUP, corrupt_extent_rem_cleanup, rem); - extent_cleanup(ret < 0 && ins_ext, - extent_insert, sb, iof, &ext, data, - SC_EXTENT_REM_CLEANUP, corrupt_extent_rem_cleanup, rem); - + scoutfs_extent_cleanup(ret < 0 && del_left, extent_delete, sb, iof, + &left, data, SC_EXTENT_REM_CLEANUP, + corrupt_extent_rem_cleanup, rem); + scoutfs_extent_cleanup(ret < 0 && ins_ext, extent_insert, sb, iof, + &ext, data, SC_EXTENT_REM_CLEANUP, + corrupt_extent_rem_cleanup, rem); return ret; } diff --git a/kmod/src/extents.h b/kmod/src/extents.h index ab25c93f..d97f1f32 100644 --- a/kmod/src/extents.h +++ b/kmod/src/extents.h @@ -38,4 +38,29 @@ int scoutfs_extent_add(struct super_block *sb, scoutfs_extent_io_t iof, int scoutfs_extent_remove(struct super_block *sb, scoutfs_extent_io_t iof, struct scoutfs_extent *rem, void *data); +/* + * The process of modifying an extent creates and deletes many + * intermediate extents. If we hit an error we need to undo the + * process. If we then hit an error we can be left with inconsistent + * extent items. + * + * We could fix this for extents that are stored in the item cache + * because it has tools for ensuring that operations can't fail. + * Extents that are stored in the btree currently can't avoid errors. + * We'd have to predirty blocks, allow deletion to fall below thresholds + * if merging saw an error, and preallocate blocks to be used for + * splitting/growth. It'd probably be worth it. + */ +#define scoutfs_extent_cleanup(cond, ext_func, sb, iof, clean, data, \ + which, ctr, ext) \ +do { \ + __typeof__(sb) _sb = (sb); \ + int _ret; \ + \ + if ((cond) && (_ret = ext_func(_sb, iof, clean, data)) < 0) \ + scoutfs_corruption(_sb, which, ctr, \ + "ext "SE_FMT" clean "SE_FMT" ret %d", \ + SE_ARG(ext), SE_ARG(clean), _ret); \ +} while (0) + #endif diff --git a/kmod/src/format.h b/kmod/src/format.h index cecd5c8b..f9a33b83 100644 --- a/kmod/src/format.h +++ b/kmod/src/format.h @@ -619,6 +619,9 @@ enum { SC_INODE_BLOCK_COUNTS, SC_EXTENT_ADD_CLEANUP, SC_EXTENT_REM_CLEANUP, + SC_DATA_EXTENT_TRUNC_CLEANUP, + SC_DATA_EXTENT_ALLOC_CLEANUP, + SC_SERVER_EXTENT_CLEANUP, SC_NR_SOURCES, }; diff --git a/kmod/src/server.c b/kmod/src/server.c index b9538f87..e7c0c3dd 100644 --- a/kmod/src/server.c +++ b/kmod/src/server.c @@ -190,9 +190,14 @@ static int server_extent_io(struct super_block *sb, int op, swap(ext->type, mirror_type); ret = server_extent_io(sb, op, ext, data); swap(ext->type, mirror_type); - if (ret) { + if (ret < 0) { err = server_extent_io(sb, mirror_op, ext, data); - BUG_ON(err); + if (err) + scoutfs_corruption(sb, + SC_SERVER_EXTENT_CLEANUP, + corrupt_server_extent_cleanup, + "op %u ext "SE_FMT" ret %d", + op, SE_ARG(ext), err); } }