diff --git a/kmod/src/server.c b/kmod/src/server.c index 65b5d116..f4e09031 100644 --- a/kmod/src/server.c +++ b/kmod/src/server.c @@ -1299,12 +1299,10 @@ static int finalize_and_start_log_merge(struct super_block *sb, struct scoutfs_l * is nested inside holding commits so we recheck the persistent item * each time we commit to make sure it's still what we think. The * caller is still going to send the item to the client so we update the - * caller's each time we make progress. This is a best-effort attempt - * to clean up and it's valid to leave extents in data_freed we don't - * return errors to the caller. The client will continue the work later - * in get_log_trees or as the rid is reclaimed. + * caller's each time we make progress. If we hit an error applying the + * changes we make then we can't send the log_trees to the client. */ -static void try_drain_data_freed(struct super_block *sb, struct scoutfs_log_trees *lt) +static int try_drain_data_freed(struct super_block *sb, struct scoutfs_log_trees *lt) { DECLARE_SERVER_INFO(sb, server); struct scoutfs_super_block *super = DIRTY_SUPER_SB(sb); @@ -1313,6 +1311,7 @@ static void try_drain_data_freed(struct super_block *sb, struct scoutfs_log_tree struct scoutfs_log_trees drain; struct scoutfs_key key; COMMIT_HOLD(hold); + bool apply = false; int ret = 0; int err; @@ -1321,22 +1320,27 @@ static void try_drain_data_freed(struct super_block *sb, struct scoutfs_log_tree while (lt->data_freed.total_len != 0) { server_hold_commit(sb, &hold); mutex_lock(&server->logs_mutex); + apply = true; ret = find_log_trees_item(sb, &super->logs_root, false, rid, U64_MAX, &drain); - if (ret < 0) + if (ret < 0) { + ret = 0; break; + } /* careful to only keep draining the caller's specific open trans */ if (drain.nr != lt->nr || drain.get_trans_seq != lt->get_trans_seq || drain.commit_trans_seq != lt->commit_trans_seq || drain.flags != lt->flags) { - ret = -ENOENT; + ret = 0; break; } ret = scoutfs_btree_dirty(sb, &server->alloc, &server->wri, &super->logs_root, &key); - if (ret < 0) + if (ret < 0) { + ret = 0; break; + } /* moving can modify and return errors, always update caller and item */ mutex_lock(&server->alloc_mutex); @@ -1352,19 +1356,19 @@ static void try_drain_data_freed(struct super_block *sb, struct scoutfs_log_tree BUG_ON(err < 0); /* dirtying must guarantee success */ mutex_unlock(&server->logs_mutex); - ret = server_apply_commit(sb, &hold, ret); - if (ret < 0) { - ret = 0; /* don't try to abort, ignoring ret */ + apply = false; + + if (ret < 0) break; - } } - /* try to cleanly abort and write any partial dirty btree blocks, but ignore result */ - if (ret < 0) { + if (apply) { mutex_unlock(&server->logs_mutex); - server_apply_commit(sb, &hold, 0); + server_apply_commit(sb, &hold, ret); } + + return ret; } /* @@ -1572,9 +1576,9 @@ out: scoutfs_err(sb, "error %d getting log trees for rid %016llx: %s", ret, rid, err_str); - /* try to drain excessive data_freed with additional commits, if needed, ignoring err */ + /* try to drain excessive data_freed with additional commits, if needed */ if (ret == 0) - try_drain_data_freed(sb, <); + ret = try_drain_data_freed(sb, <); return scoutfs_net_response(sb, conn, cmd, id, ret, <, sizeof(lt)); } diff --git a/kmod/src/trans.c b/kmod/src/trans.c index bc07071b..10494c43 100644 --- a/kmod/src/trans.c +++ b/kmod/src/trans.c @@ -159,6 +159,58 @@ static bool drained_holders(struct trans_info *tri) return holders == 0; } +static int commit_current_log_trees(struct super_block *sb, char **str) +{ + DECLARE_TRANS_INFO(sb, tri); + + return (*str = "data submit", scoutfs_inode_walk_writeback(sb, true)) ?: + (*str = "item dirty", scoutfs_item_write_dirty(sb)) ?: + (*str = "data prepare", scoutfs_data_prepare_commit(sb)) ?: + (*str = "alloc prepare", scoutfs_alloc_prepare_commit(sb, &tri->alloc, &tri->wri)) ?: + (*str = "meta write", scoutfs_block_writer_write(sb, &tri->wri)) ?: + (*str = "data wait", scoutfs_inode_walk_writeback(sb, false)) ?: + (*str = "commit log trees", commit_btrees(sb)) ?: + scoutfs_item_write_done(sb); +} + +static int get_next_log_trees(struct super_block *sb, char **str) +{ + return (*str = "get log trees", scoutfs_trans_get_log_trees(sb)); +} + +static int retry_forever(struct super_block *sb, int (*func)(struct super_block *sb, char **str)) +{ + bool retrying = false; + char *str; + int ret; + + do { + str = NULL; + + ret = func(sb, &str); + if (ret < 0) { + if (!retrying) { + scoutfs_warn(sb, "critical transaction commit failure: %s = %d, retrying", + str, ret); + retrying = true; + } + + if (scoutfs_forcing_unmount(sb)) { + ret = -EIO; + break; + } + + msleep(2 * MSEC_PER_SEC); + + } else if (retrying) { + scoutfs_info(sb, "retried transaction commit succeeded"); + } + + } while (ret < 0); + + return ret; +} + /* * This work func is responsible for writing out all the dirty blocks * that make up the current dirty transaction. It prevents writers from @@ -184,8 +236,6 @@ void scoutfs_trans_write_func(struct work_struct *work) struct trans_info *tri = container_of(work, struct trans_info, write_work.work); struct super_block *sb = tri->sb; struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb); - bool retrying = false; - char *s = NULL; int ret = 0; tri->task = current; @@ -214,37 +264,9 @@ void scoutfs_trans_write_func(struct work_struct *work) scoutfs_inc_counter(sb, trans_commit_written); - do { - ret = (s = "data submit", scoutfs_inode_walk_writeback(sb, true)) ?: - (s = "item dirty", scoutfs_item_write_dirty(sb)) ?: - (s = "data prepare", scoutfs_data_prepare_commit(sb)) ?: - (s = "alloc prepare", scoutfs_alloc_prepare_commit(sb, &tri->alloc, - &tri->wri)) ?: - (s = "meta write", scoutfs_block_writer_write(sb, &tri->wri)) ?: - (s = "data wait", scoutfs_inode_walk_writeback(sb, false)) ?: - (s = "commit log trees", commit_btrees(sb)) ?: - scoutfs_item_write_done(sb) ?: - (s = "get log trees", scoutfs_trans_get_log_trees(sb)); - if (ret < 0) { - if (!retrying) { - scoutfs_warn(sb, "critical transaction commit failure: %s = %d, retrying", - s, ret); - retrying = true; - } - - if (scoutfs_forcing_unmount(sb)) { - ret = -EIO; - break; - } - - msleep(2 * MSEC_PER_SEC); - - } else if (retrying) { - scoutfs_info(sb, "retried transaction commit succeeded"); - } - - } while (ret < 0); - + /* retry {commit,get}_log_trees until they succeeed, can only fail when forcing unmount */ + ret = retry_forever(sb, commit_current_log_trees) ?: + retry_forever(sb, get_next_log_trees); out: spin_lock(&tri->write_lock); tri->write_count++;