From c061ada671ae9dfa3282e044b128d84d249aa612 Mon Sep 17 00:00:00 2001 From: Zach Brown Date: Thu, 30 May 2019 14:28:11 -0700 Subject: [PATCH] scoutfs: mounts connect once server is listening An elected leader writes a quorum block showing that it's elected before it assumes exclusive access to the device and starts bringing up the server. This lets another later elected leader find and fence it if something happens. Other mounts were trying to connect to the server once this elected quorum block was written and before the server was listening. They'd get conection refused, decide to elect a new leader, and try to fence the server that's still running. Now, they should have tried much harder to connect to the elected leader instead of taking a single failed attempt as fatal. But that's a problem for another day that involves more work in balancing timeouts and retries. But mounts should not have tried try to connect to the server until its listening. That's easy to signal by adding a simple listening flag to the quorum block. Now mounts will only try to connect once they see the listening flag and don't see these racey refused connections. Signed-off-by: Zach Brown --- kmod/src/format.h | 3 ++- kmod/src/quorum.c | 34 +++++++++++++++++++++++++++++++--- kmod/src/quorum.h | 2 ++ kmod/src/server.c | 8 ++++++-- 4 files changed, 41 insertions(+), 6 deletions(-) diff --git a/kmod/src/format.h b/kmod/src/format.h index 9fcbc082..79317691 100644 --- a/kmod/src/format.h +++ b/kmod/src/format.h @@ -448,7 +448,8 @@ struct scoutfs_quorum_block { } __packed; #define SCOUTFS_QUORUM_BLOCK_FLAG_ELECTED (1 << 0) -#define SCOUTFS_QUORUM_BLOCK_FLAGS_UNKNOWN (U8_MAX << 1) +#define SCOUTFS_QUORUM_BLOCK_FLAG_LISTENING (1 << 1) +#define SCOUTFS_QUORUM_BLOCK_FLAGS_UNKNOWN (U8_MAX << 2) #define SCOUTFS_QUORUM_MAX_SLOTS SCOUTFS_QUORUM_BLOCKS diff --git a/kmod/src/quorum.c b/kmod/src/quorum.c index 5804a3a9..7664aef6 100644 --- a/kmod/src/quorum.c +++ b/kmod/src/quorum.c @@ -58,6 +58,13 @@ * server. This ensures that racing elected leaders will always result * in fencing all but the most recent. * + * Once the elected leader verifies its written elected block it tries + * to start up the server. Once it's listening it writes another quorum + * block that indicates that it's listening. Once mounts see that + * they'll try to connect. If the server takes too long to write its + * listening flag the mounts may decide that the leader has died and try + * to elect a new leader. + * * XXX: * - actually fence * - add temporary priority for choosing a specific mount as a leader @@ -667,9 +674,10 @@ int scoutfs_quorum_election(struct super_block *sb, char *our_name, vote_streak = 0; } - /* return if we found a new leader or ran out of time */ - if (qei->elected_nr > old_elected_nr || - ktime_after(now, timeout_abs)) { + /* return if we found a new listening leader or timed out */ + if (((qei->elected_nr > old_elected_nr) && + (qei->flags & SCOUTFS_QUORUM_BLOCK_FLAG_LISTENING)) || + ktime_after(now, timeout_abs)) { if (qei->elected_nr > 0) { scoutfs_inc_counter(sb, quorum_found_leader); ret = 0; @@ -749,6 +757,26 @@ out: return ret; } +/* + * The calling server has successfully started and is listening for + * collections. It writes a new block to communicate to the other + * mounts that they should now try to connect. We do increase the write_nr + * here to still indicate that we're alive. + */ +int scoutfs_quorum_set_listening(struct super_block *sb, + struct scoutfs_quorum_elected_info *qei) +{ + struct scoutfs_super_block *super = &SCOUTFS_SB(sb)->super; + + qei->flags |= SCOUTFS_QUORUM_BLOCK_FLAG_LISTENING; + le64_add_cpu(&qei->write_nr, 1); + + return write_quorum_block(sb, super->hdr.fsid, qei->config_gen, + qei->config_slot, qei->write_nr, + qei->elected_nr, qei->unmount_barrier, + qei->config_slot, qei->flags); +} + /* * The calling server is shutting down and has finished modifying * persistent state. We clear the elected flag from our quorum block so diff --git a/kmod/src/quorum.h b/kmod/src/quorum.h index e558d326..cea55525 100644 --- a/kmod/src/quorum.h +++ b/kmod/src/quorum.h @@ -16,6 +16,8 @@ int scoutfs_quorum_election(struct super_block *sb, char *our_name, u64 old_elected_nr, ktime_t timeout_abs, bool unmounting, u64 our_umb, struct scoutfs_quorum_elected_info *qei); +int scoutfs_quorum_set_listening(struct super_block *sb, + struct scoutfs_quorum_elected_info *qei); int scoutfs_quorum_clear_elected(struct super_block *sb, struct scoutfs_quorum_elected_info *qei); int scoutfs_quorum_update_barrier(struct super_block *sb, diff --git a/kmod/src/server.c b/kmod/src/server.c index fd7cda3d..fc07a1a5 100644 --- a/kmod/src/server.c +++ b/kmod/src/server.c @@ -2334,8 +2334,12 @@ static void scoutfs_server_worker(struct work_struct *work) server->conn = conn; scoutfs_net_listen(sb, conn); - /* wait_event/wake_up provide barriers */ - wait_event_interruptible(server->waitq, server->shutting_down); + ret = scoutfs_quorum_set_listening(sb, &server->qei); + + if (ret == 0) { + /* wait_event/wake_up provide barriers */ + wait_event_interruptible(server->waitq, server->shutting_down); + } scoutfs_info(sb, "server shutting down on "SIN_FMT, SIN_ARG(&sin));