diff --git a/kmod/src/server.c b/kmod/src/server.c index 6ecd8e85..b25b2019 100644 --- a/kmod/src/server.c +++ b/kmod/src/server.c @@ -38,6 +38,7 @@ #include "srch.h" #include "alloc.h" #include "forest.h" +#include "recov.h" /* * Every active mount can act as the server that listens on a net @@ -96,6 +97,9 @@ struct server_info { /* stable versions stored from commits, given in locks and rpcs */ seqcount_t roots_seqcount; struct scoutfs_net_roots roots; + + /* recovery timeout fences from work */ + struct work_struct fence_pending_recov_work; }; #define DECLARE_SERVER_INFO(sb, name) \ @@ -1198,8 +1202,13 @@ static int server_greeting(struct super_block *sb, ret = scoutfs_server_apply_commit(sb, ret); queue_work(server->wq, &server->farewell_work); + if (ret < 0) + goto send_err; } + scoutfs_server_recov_finish(sb, le64_to_cpu(gr->rid), SCOUTFS_RECOV_GREETING); + ret = 0; + send_err: err = ret; @@ -1259,6 +1268,24 @@ static bool invalid_mounted_client_item(struct scoutfs_btree_item_ref *iref) sizeof(struct scoutfs_mounted_client_btree_val)); } +static int reclaim_rid(struct super_block *sb, u64 rid) +{ + int ret; + + ret = scoutfs_server_hold_commit(sb); + if (ret < 0) + return ret; + + /* delete mounted client last, client reconnect looks for it */ + ret = scoutfs_lock_server_farewell(sb, rid) ?: + remove_trans_seq(sb, rid) ?: + reclaim_log_trees(sb, rid) ?: + cancel_srch_compact(sb, rid) ?: + delete_mounted_client(sb, rid); + + return scoutfs_server_apply_commit(sb, ret); +} + /* * This work processes farewell requests asynchronously. Requests from * quorum members can be held until only the final majority remains and @@ -1386,18 +1413,7 @@ static void farewell_worker(struct work_struct *work) /* process and send farewell responses */ list_for_each_entry_safe(fw, tmp, &send, entry) { - ret = scoutfs_server_hold_commit(sb); - if (ret) - goto out; - - /* delete mounted client last, client reconnect looks for it */ - ret = scoutfs_lock_server_farewell(sb, fw->rid) ?: - remove_trans_seq(sb, fw->rid) ?: - reclaim_log_trees(sb, fw->rid) ?: - cancel_srch_compact(sb, fw->rid) ?: - delete_mounted_client(sb, fw->rid); - - ret = scoutfs_server_apply_commit(sb, ret); + ret = reclaim_rid(sb, fw->rid); if (ret) goto out; } @@ -1540,6 +1556,141 @@ static void server_notify_down(struct super_block *sb, } } +/* + * All clients have recovered all state. Now we can kick all the work + * that was waiting on recovery. + * + * It's a bit of a false dependency to have all work wait for completion + * before any work can make progress, but recovery is naturally + * concerned about in-memory state. It should all be quick to recover + * once a client arrives. + */ +static void finished_recovery(struct super_block *sb) +{ + DECLARE_SERVER_INFO(sb, server); + int ret = 0; + + scoutfs_info(sb, "all clients recovered"); + + if (ret < 0) { + scoutfs_err(sb, "error %d resuming after recovery finished, shutting down", ret); + stop_server(server); + } +} + +void scoutfs_server_recov_finish(struct super_block *sb, u64 rid, int which) +{ + if (scoutfs_recov_finish(sb, rid, which) > 0) + finished_recovery(sb); +} + +/* + * If the recovery timeout is too short we'll prematurely evict mounts + * that would have recovered. They need time to have their sockets + * timeout, reconnect to the current server, and fully recover their + * state. + * + * If it's too long we'll needlessly delay resuming operations after + * clients crash and will never recover. + */ +#define SERVER_RECOV_TIMEOUT_MS (30 * MSEC_PER_SEC) + +/* + * Not all clients recovered in time. We fence them and reclaim + * whatever resources they were using. If we see a rid here then we're + * going to fence it, regardless of if it manages to finish recovery + * while we're fencing it. + */ +static void fence_pending_recov_worker(struct work_struct *work) +{ + struct server_info *server = container_of(work, struct server_info, + fence_pending_recov_work); + struct super_block *sb = server->sb; + u64 rid; + int ret; + + while ((rid = scoutfs_recov_next_pending(sb, SCOUTFS_RECOV_ALL)) > 0) { + scoutfs_err(sb, "%lu ms recovery timeout expired for client rid %016llx, fencing", + SERVER_RECOV_TIMEOUT_MS, rid); + + ret = reclaim_rid(sb, rid); + if (ret < 0) { + scoutfs_err(sb, "error %d reclaiming rid %016llx, shutting down", ret, rid); + stop_server(server); + break; + } + + scoutfs_server_recov_finish(sb, rid, SCOUTFS_RECOV_ALL); + } +} + +static void recovery_timeout(struct super_block *sb) +{ + DECLARE_SERVER_INFO(sb, server); + + if (!server->shutting_down) + queue_work(server->wq, &server->fence_pending_recov_work); +} + +/* + * As the server starts up it needs to start waiting for recovery from + * any clients which were previously still mounted in the last running + * server. This is done before networking is started so we won't + * receive any messages from clients until we've prepared them all. If + * the clients don't recover in time then they'll be fenced. + */ +static int start_recovery(struct super_block *sb) +{ + DECLARE_SERVER_INFO(sb, server); + struct scoutfs_super_block *super = &SCOUTFS_SB(sb)->super; + SCOUTFS_BTREE_ITEM_REF(iref); + struct scoutfs_key key; + unsigned int nr = 0; + u64 rid; + int ret; + + for (rid = 0; ; rid++) { + init_mounted_client_key(&key, rid); + ret = scoutfs_btree_next(sb, &super->mounted_clients, &key, &iref); + if (ret == -ENOENT) { + ret = 0; + break; + } + if (ret == 0) { + rid = le64_to_cpu(iref.key->skmc_rid); + scoutfs_btree_put_iref(&iref); + } + if (ret < 0) + goto out; + + ret = scoutfs_recov_prepare(sb, rid, SCOUTFS_RECOV_ALL); + if (ret < 0) { + scoutfs_err(sb, "error %d preparing recovery for client rid %016llx, shutting down", + ret, rid); + goto out; + } + + nr++; + } + + if (nr > 0) { + scoutfs_info(sb, "waiting for %u clients to recover", nr); + + ret = scoutfs_recov_begin(sb, recovery_timeout, SERVER_RECOV_TIMEOUT_MS); + if (ret > 0) { + finished_recovery(sb); + ret = 0; + } + } + +out: + if (ret < 0) { + scoutfs_err(sb, "error %d starting recovery, shutting down", ret); + stop_server(server); + } + return ret; +} + static void scoutfs_server_worker(struct work_struct *work) { struct server_info *server = container_of(work, struct server_info, @@ -1610,8 +1761,8 @@ static void scoutfs_server_worker(struct work_struct *work) goto shutdown; } - ret = scoutfs_lock_server_setup(sb, &server->alloc, &server->wri, - max_vers); + ret = scoutfs_lock_server_setup(sb, &server->alloc, &server->wri, max_vers) ?: + start_recovery(sb); if (ret) goto shutdown; @@ -1635,6 +1786,10 @@ shutdown: scoutfs_net_shutdown(sb, conn); server->conn = NULL; + /* stop tracking recovery, cancel timer, flush any fencing */ + scoutfs_recov_shutdown(sb); + flush_work(&server->fence_pending_recov_work); + /* wait for extra queues by requests, won't find waiters */ flush_work(&server->commit_work); @@ -1724,6 +1879,7 @@ int scoutfs_server_setup(struct super_block *sb) mutex_init(&server->srch_mutex); mutex_init(&server->mounted_clients_mutex); seqcount_init(&server->roots_seqcount); + INIT_WORK(&server->fence_pending_recov_work, fence_pending_recov_worker); server->wq = alloc_workqueue("scoutfs_server", WQ_UNBOUND | WQ_NON_REENTRANT, 0); diff --git a/kmod/src/server.h b/kmod/src/server.h index 84e25ddb..e06c0818 100644 --- a/kmod/src/server.h +++ b/kmod/src/server.h @@ -64,6 +64,7 @@ int scoutfs_server_lock_recover_request(struct super_block *sb, u64 rid, struct scoutfs_key *key); int scoutfs_server_hold_commit(struct super_block *sb); int scoutfs_server_apply_commit(struct super_block *sb, int err); +void scoutfs_server_recov_finish(struct super_block *sb, u64 rid, int which); struct sockaddr_in; struct scoutfs_quorum_elected_info; diff --git a/kmod/src/super.c b/kmod/src/super.c index 1a795a69..e66160ed 100644 --- a/kmod/src/super.c +++ b/kmod/src/super.c @@ -44,6 +44,7 @@ #include "srch.h" #include "item.h" #include "alloc.h" +#include "recov.h" #include "scoutfs_trace.h" static struct dentry *scoutfs_debugfs_root; @@ -260,6 +261,7 @@ static void scoutfs_put_super(struct super_block *sb) scoutfs_quorum_destroy(sb); scoutfs_lock_shutdown(sb); scoutfs_server_destroy(sb); + scoutfs_recov_destroy(sb); scoutfs_net_destroy(sb); scoutfs_lock_destroy(sb); @@ -593,6 +595,7 @@ static int scoutfs_fill_super(struct super_block *sb, void *data, int silent) scoutfs_setup_trans(sb) ?: scoutfs_lock_setup(sb) ?: scoutfs_net_setup(sb) ?: + scoutfs_recov_setup(sb) ?: scoutfs_server_setup(sb) ?: scoutfs_quorum_setup(sb) ?: scoutfs_client_setup(sb) ?: