Merge pull request #83 from versity/zab/heartbeat_during_fencing

Send quorum heartbeats while fencing
This commit is contained in:
Zach Brown
2022-04-01 09:12:41 -07:00
committed by GitHub
7 changed files with 174 additions and 159 deletions

View File

@@ -157,6 +157,7 @@
EXPAND_COUNTER(orphan_scan_error) \
EXPAND_COUNTER(orphan_scan_item) \
EXPAND_COUNTER(orphan_scan_omap_set) \
EXPAND_COUNTER(quorum_candidate_server_stopping) \
EXPAND_COUNTER(quorum_elected) \
EXPAND_COUNTER(quorum_fence_error) \
EXPAND_COUNTER(quorum_fence_leader) \

View File

@@ -749,7 +749,7 @@ out:
if (ret < 0) {
scoutfs_err(sb, "lock server err %d during client rid %016llx farewell, shutting down",
ret, rid);
scoutfs_server_abort(sb);
scoutfs_server_stop(sb);
}
return ret;

View File

@@ -1292,7 +1292,7 @@ restart:
if (ret) {
scoutfs_err(sb, "client fence returned err %d, shutting down server",
ret);
scoutfs_server_abort(sb);
scoutfs_server_stop(sb);
}
}
destroy_conn(acc);

View File

@@ -105,6 +105,8 @@ enum quorum_role { FOLLOWER, CANDIDATE, LEADER };
struct quorum_status {
enum quorum_role role;
u64 term;
u64 server_start_term;
int server_event;
int vote_for;
unsigned long vote_bits;
ktime_t timeout;
@@ -117,7 +119,6 @@ struct quorum_info {
bool shutdown;
int our_quorum_slot_nr;
unsigned long flags;
int votes_needed;
spinlock_t show_lock;
@@ -128,8 +129,6 @@ struct quorum_info {
struct scoutfs_sysfs_attrs ssa;
};
#define QINF_FLAG_SERVER 0
#define DECLARE_QUORUM_INFO(sb, name) \
struct quorum_info *name = SCOUTFS_SB(sb)->quorum_info
#define DECLARE_QUORUM_INFO_KOBJ(kobj, name) \
@@ -494,16 +493,6 @@ static int update_quorum_block(struct super_block *sb, int event, u64 term, bool
return ret;
}
/*
* The calling server has fenced previous leaders and reclaimed their
* resources. We can now update our fence event with a greater term to
* stop future leaders from doing the same.
*/
int scoutfs_quorum_fence_complete(struct super_block *sb, u64 term)
{
return update_quorum_block(sb, SCOUTFS_QUORUM_EVENT_FENCE, term, true);
}
/*
* The calling server has been elected and has started running but can't
* yet assume that it has exclusive access to the metadata device. We
@@ -593,15 +582,9 @@ int scoutfs_quorum_fence_leaders(struct super_block *sb, u64 term)
}
out:
if (fence_started) {
err = scoutfs_fence_wait_fenced(sb, msecs_to_jiffies(SCOUTFS_QUORUM_FENCE_TO_MS));
if (ret == 0)
ret = err;
} else {
err = scoutfs_quorum_fence_complete(sb, term);
if (ret == 0)
ret = err;
}
err = scoutfs_fence_wait_fenced(sb, msecs_to_jiffies(SCOUTFS_QUORUM_FENCE_TO_MS));
if (ret == 0)
ret = err;
if (ret < 0)
scoutfs_inc_counter(sb, quorum_fence_error);
@@ -627,9 +610,8 @@ static void update_show_status(struct quorum_info *qinf, struct quorum_status *q
/*
* The quorum work always runs in the background of quorum member
* mounts. It's responsible for starting and stopping the server if
* it's elected leader, and the server can call back into it to let it
* know that it has shut itself down (perhaps due to error) so that the
* work should stop sending heartbeats.
* it's elected leader. While it's leader it sends heartbeats to
* suppress other quorum work from standing for election.
*/
static void scoutfs_quorum_worker(struct work_struct *work)
{
@@ -637,7 +619,7 @@ static void scoutfs_quorum_worker(struct work_struct *work)
struct super_block *sb = qinf->sb;
struct sockaddr_in unused;
struct quorum_host_msg msg;
struct quorum_status qst;
struct quorum_status qst = {0,};
int ret;
int err;
@@ -646,9 +628,7 @@ static void scoutfs_quorum_worker(struct work_struct *work)
/* start out as a follower */
qst.role = FOLLOWER;
qst.term = 0;
qst.vote_for = -1;
qst.vote_bits = 0;
/* read our starting term from greatest in all events in all slots */
read_greatest_term(sb, &qst.term);
@@ -684,20 +664,6 @@ static void scoutfs_quorum_worker(struct work_struct *work)
msg.term < qst.term)
msg.type = SCOUTFS_QUORUM_MSG_INVALID;
/* if the server has shutdown we become follower */
if (!test_bit(QINF_FLAG_SERVER, &qinf->flags) &&
qst.role == LEADER) {
qst.role = FOLLOWER;
qst.vote_for = -1;
qst.vote_bits = 0;
qst.timeout = election_timeout();
scoutfs_inc_counter(sb, quorum_server_shutdown);
send_msg_others(sb, SCOUTFS_QUORUM_MSG_RESIGNATION,
qst.term);
scoutfs_inc_counter(sb, quorum_send_resignation);
}
trace_scoutfs_quorum_loop(sb, qst.role, qst.term, qst.vote_for,
qst.vote_bits,
ktime_to_timespec64(qst.timeout));
@@ -708,8 +674,6 @@ static void scoutfs_quorum_worker(struct work_struct *work)
if (qst.role == LEADER) {
scoutfs_warn(sb, "saw msg type %u from %u for term %llu while leader in term %llu, shutting down server.",
msg.type, msg.from, msg.term, qst.term);
update_show_status(qinf, &qst);
scoutfs_server_stop(sb);
}
qst.role = FOLLOWER;
qst.term = msg.term;
@@ -731,6 +695,13 @@ static void scoutfs_quorum_worker(struct work_struct *work)
/* followers and candidates start new election on timeout */
if (qst.role != LEADER &&
ktime_after(ktime_get(), qst.timeout)) {
/* .. but only if their server has stopped */
if (!scoutfs_server_is_down(sb)) {
qst.timeout = election_timeout();
scoutfs_inc_counter(sb, quorum_candidate_server_stopping);
continue;
}
qst.role = CANDIDATE;
qst.term++;
qst.vote_for = -1;
@@ -779,24 +750,62 @@ static void scoutfs_quorum_worker(struct work_struct *work)
if (ret < 0)
goto out;
/* make very sure server is fully shut down */
scoutfs_server_stop(sb);
/* set server bit before server shutdown could clear */
set_bit(QINF_FLAG_SERVER, &qinf->flags);
qst.server_start_term = qst.term;
qst.server_event = SCOUTFS_QUORUM_EVENT_ELECT;
scoutfs_server_start(sb, qst.term);
}
ret = scoutfs_server_start(sb, qst.term);
if (ret < 0) {
clear_bit(QINF_FLAG_SERVER, &qinf->flags);
/* store our increased term */
err = update_quorum_block(sb, SCOUTFS_QUORUM_EVENT_STOP, qst.term,
true);
if (err < 0) {
ret = err;
goto out;
}
ret = 0;
continue;
/*
* This leader's server is up, having finished fencing
* previous leaders. We update the fence event with the
* current term to let future leaders know that previous
* servers have been fenced.
*/
if (qst.role == LEADER && qst.server_event != SCOUTFS_QUORUM_EVENT_FENCE &&
scoutfs_server_is_up(sb)) {
ret = update_quorum_block(sb, SCOUTFS_QUORUM_EVENT_FENCE, qst.term, true);
if (ret < 0)
goto out;
qst.server_event = SCOUTFS_QUORUM_EVENT_FENCE;
}
/*
* Stop a running server if we're no longer leader in
* its term.
*/
if (!(qst.role == LEADER && qst.term == qst.server_start_term) &&
scoutfs_server_is_running(sb)) {
scoutfs_server_stop(sb);
}
/*
* A previously running server has stopped. The quorum
* protocol might have shut it down by changing roles or
* it might have stopped on its own, perhaps on errors.
* If we're still a leader then we become a follower and
* send resignations to encourage the next election.
* Always update the _STOP event to stop connections and
* fencing.
*/
if (qst.server_start_term > 0 && scoutfs_server_is_down(sb)) {
if (qst.role == LEADER) {
qst.role = FOLLOWER;
qst.vote_for = -1;
qst.vote_bits = 0;
qst.timeout = election_timeout();
scoutfs_inc_counter(sb, quorum_server_shutdown);
send_msg_others(sb, SCOUTFS_QUORUM_MSG_RESIGNATION,
qst.server_start_term);
scoutfs_inc_counter(sb, quorum_send_resignation);
}
ret = update_quorum_block(sb, SCOUTFS_QUORUM_EVENT_STOP,
qst.server_start_term, true);
if (ret < 0)
goto out;
qst.server_start_term = 0;
}
/* leaders regularly send heartbeats to delay elections */
@@ -836,11 +845,16 @@ static void scoutfs_quorum_worker(struct work_struct *work)
update_show_status(qinf, &qst);
/* always try to stop a running server as we stop */
if (test_bit(QINF_FLAG_SERVER, &qinf->flags)) {
scoutfs_server_stop(sb);
scoutfs_fence_stop(sb);
send_msg_others(sb, SCOUTFS_QUORUM_MSG_RESIGNATION,
qst.term);
if (scoutfs_server_is_running(sb)) {
scoutfs_server_stop_wait(sb);
send_msg_others(sb, SCOUTFS_QUORUM_MSG_RESIGNATION, qst.term);
if (qst.server_start_term > 0) {
err = update_quorum_block(sb, SCOUTFS_QUORUM_EVENT_STOP,
qst.server_start_term, true);
if (err < 0 && ret == 0)
ret = err;
}
}
/* record that this slot no longer has an active quorum */
@@ -852,21 +866,6 @@ out:
}
}
/*
* The calling server has shutdown and is no longer using shared
* resources. Clear the bit so that we stop sending heartbeats and
* allow the next server to be elected. Update the stop event so that
* it won't be considered available by clients or fenced by the next
* leader.
*/
void scoutfs_quorum_server_shutdown(struct super_block *sb, u64 term)
{
DECLARE_QUORUM_INFO(sb, qinf);
clear_bit(QINF_FLAG_SERVER, &qinf->flags);
update_quorum_block(sb, SCOUTFS_QUORUM_EVENT_STOP, term, true);
}
/*
* Clients read quorum blocks looking for the leader with a server whose
* address it can try and connect to.
@@ -988,6 +987,8 @@ static ssize_t status_show(struct kobject *kobj, struct kobj_attribute *attr,
qinf->our_quorum_slot_nr);
snprintf_ret(buf, size, &ret, "term %llu\n",
qst.term);
snprintf_ret(buf, size, &ret, "server_start_term %llu\n", qst.server_start_term);
snprintf_ret(buf, size, &ret, "server_event %d\n", qst.server_event);
snprintf_ret(buf, size, &ret, "role %d (%s)\n",
qst.role, role_str(qst.role));
snprintf_ret(buf, size, &ret, "vote_for %d\n",

View File

@@ -2,14 +2,12 @@
#define _SCOUTFS_QUORUM_H_
int scoutfs_quorum_server_sin(struct super_block *sb, struct sockaddr_in *sin);
void scoutfs_quorum_server_shutdown(struct super_block *sb, u64 term);
u8 scoutfs_quorum_votes_needed(struct super_block *sb);
void scoutfs_quorum_slot_sin(struct scoutfs_super_block *super, int i,
struct sockaddr_in *sin);
int scoutfs_quorum_fence_leaders(struct super_block *sb, u64 term);
int scoutfs_quorum_fence_complete(struct super_block *sb, u64 term);
int scoutfs_quorum_setup(struct super_block *sb);
void scoutfs_quorum_shutdown(struct super_block *sb);

View File

@@ -59,9 +59,7 @@ struct server_info {
struct workqueue_struct *wq;
struct work_struct work;
int err;
bool shutting_down;
struct completion start_comp;
int status;
u64 term;
struct scoutfs_net_connection *conn;
@@ -155,6 +153,62 @@ static bool get_volopt_val(struct server_info *server, int nr, u64 *val)
return is_set;
}
enum {
SERVER_NOP = 0,
SERVER_STARTING,
SERVER_UP,
SERVER_STOPPING,
SERVER_DOWN,
};
bool scoutfs_server_is_running(struct super_block *sb)
{
DECLARE_SERVER_INFO(sb, server);
long was = cmpxchg(&server->status, SERVER_NOP, SERVER_NOP);
return was == SERVER_STARTING || was == SERVER_UP;
}
bool scoutfs_server_is_up(struct super_block *sb)
{
DECLARE_SERVER_INFO(sb, server);
return cmpxchg(&server->status, SERVER_NOP, SERVER_NOP) == SERVER_UP;
}
bool scoutfs_server_is_down(struct super_block *sb)
{
DECLARE_SERVER_INFO(sb, server);
return cmpxchg(&server->status, SERVER_NOP, SERVER_NOP) == SERVER_DOWN;
}
static bool server_is_stopping(struct server_info *server)
{
return cmpxchg(&server->status, SERVER_NOP, SERVER_NOP) == SERVER_STOPPING;
}
static void stop_server(struct server_info *server)
{
long was = cmpxchg(&server->status, SERVER_NOP, SERVER_NOP);
if ((was == SERVER_STARTING || was == SERVER_UP) &&
cmpxchg(&server->status, was, SERVER_STOPPING) == was)
wake_up(&server->waitq);
}
static void server_up(struct server_info *server)
{
cmpxchg(&server->status, SERVER_STARTING, SERVER_UP);
}
static void server_down(struct server_info *server)
{
long was = cmpxchg(&server->status, SERVER_NOP, SERVER_NOP);
if (was != SERVER_DOWN)
cmpxchg(&server->status, was, SERVER_DOWN);
}
struct commit_waiter {
struct completion comp;
@@ -162,24 +216,6 @@ struct commit_waiter {
int ret;
};
static bool test_shutting_down(struct server_info *server)
{
smp_rmb();
return server->shutting_down;
}
static void set_shutting_down(struct server_info *server, bool val)
{
server->shutting_down = val;
smp_wmb();
}
static void stop_server(struct server_info *server)
{
set_shutting_down(server, true);
wake_up(&server->waitq);
}
/*
* Hold the shared rwsem that lets multiple holders modify blocks in the
* current commit and prevents the commit worker from acquiring the
@@ -2051,8 +2087,8 @@ static void server_log_merge_free_work(struct work_struct *work)
bool commit = false;
int ret = 0;
/* shutdown waits for us, we'll eventually load set shutting_down */
while (!server->shutting_down) {
while (!server_is_stopping(server)) {
scoutfs_server_hold_commit(sb);
mutex_lock(&server->logs_mutex);
commit = true;
@@ -3180,7 +3216,7 @@ out:
*/
static void queue_farewell_work(struct server_info *server)
{
if (!test_shutting_down(server))
if (!server_is_stopping(server))
queue_work(server->wq, &server->farewell_work);
}
@@ -3693,14 +3729,14 @@ static void fence_pending_recov_worker(struct work_struct *work)
}
if (ret < 0)
scoutfs_server_abort(sb);
stop_server(server);
}
static void recovery_timeout(struct super_block *sb)
{
DECLARE_SERVER_INFO(sb, server);
if (!test_shutting_down(server))
if (!server_is_stopping(server))
queue_work(server->wq, &server->fence_pending_recov_work);
}
@@ -3765,7 +3801,7 @@ out:
static void queue_reclaim_work(struct server_info *server, unsigned long delay)
{
if (!test_shutting_down(server))
if (!server_is_stopping(server))
queue_delayed_work(server->wq, &server->reclaim_dwork, delay);
}
@@ -3800,7 +3836,7 @@ static void reclaim_worker(struct work_struct *work)
if (error == true) {
scoutfs_err(sb, "saw error indicator on fence request for rid %016llx, shutting down server",
rid);
scoutfs_server_abort(sb);
stop_server(server);
ret = -ESHUTDOWN;
goto out;
}
@@ -3809,7 +3845,7 @@ static void reclaim_worker(struct work_struct *work)
if (ret < 0) {
scoutfs_err(sb, "failure to reclaim fenced rid %016llx: err %d, shutting down server",
rid, ret);
scoutfs_server_abort(sb);
stop_server(server);
goto out;
}
@@ -3817,16 +3853,7 @@ static void reclaim_worker(struct work_struct *work)
scoutfs_fence_free(sb, rid);
scoutfs_server_recov_finish(sb, rid, SCOUTFS_RECOV_ALL);
/* tell quorum we've finished fencing all previous leaders */
if (reason == SCOUTFS_FENCE_QUORUM_BLOCK_LEADER &&
!scoutfs_fence_reason_pending(sb, reason)) {
ret = scoutfs_quorum_fence_complete(sb, server->term);
if (ret < 0)
goto out;
}
ret = 0;
out:
/* queue next reclaim immediately if we're making progress */
if (ret == 0)
@@ -3942,12 +3969,12 @@ static void scoutfs_server_worker(struct work_struct *work)
scoutfs_net_listen(sb, conn);
scoutfs_info(sb, "server ready at "SIN_FMT, SIN_ARG(&sin));
complete(&server->start_comp);
server_up(server);
queue_reclaim_work(server, 0);
/* interruptible mostly to avoid stuck messages */
wait_event_interruptible(server->waitq, test_shutting_down(server));
wait_event_interruptible(server->waitq, server_is_stopping(server));
shutdown:
scoutfs_info(sb, "server shutting down at "SIN_FMT, SIN_ARG(&sin));
@@ -3981,60 +4008,44 @@ out:
scoutfs_fence_stop(sb);
scoutfs_net_free_conn(sb, conn);
/* let quorum know that we've shutdown */
scoutfs_quorum_server_shutdown(sb, server->term);
server_down(server);
scoutfs_info(sb, "server stopped at "SIN_FMT, SIN_ARG(&sin));
trace_scoutfs_server_work_exit(sb, 0, ret);
server->err = ret;
complete(&server->start_comp);
}
/*
* Wait for the server to successfully start. If this returns error then
* the super block's fence_term has been set to the new server's term so
* that it won't be fenced.
* Start the server but don't wait for it to complete.
*/
int scoutfs_server_start(struct super_block *sb, u64 term)
void scoutfs_server_start(struct super_block *sb, u64 term)
{
DECLARE_SERVER_INFO(sb, server);
server->err = 0;
set_shutting_down(server, false);
server->term = term;
init_completion(&server->start_comp);
queue_work(server->wq, &server->work);
wait_for_completion(&server->start_comp);
return server->err;
if (cmpxchg(&server->status, SERVER_DOWN, SERVER_STARTING) == SERVER_DOWN) {
server->term = term;
queue_work(server->wq, &server->work);
}
}
/*
* Start shutdown on the server but don't want for it to finish.
*/
void scoutfs_server_abort(struct super_block *sb)
{
DECLARE_SERVER_INFO(sb, server);
stop_server(server);
}
/*
* Once the server is stopped we give the caller our election info
* which might have been modified while we were running.
*/
void scoutfs_server_stop(struct super_block *sb)
{
DECLARE_SERVER_INFO(sb, server);
stop_server(server);
}
cancel_work_sync(&server->work);
cancel_work_sync(&server->farewell_work);
cancel_work_sync(&server->commit_work);
cancel_work_sync(&server->log_merge_free_work);
/*
* Start shutdown on the server and wait for it to finish.
*/
void scoutfs_server_stop_wait(struct super_block *sb)
{
DECLARE_SERVER_INFO(sb, server);
stop_server(server);
flush_work_sync(&server->work);
}
int scoutfs_server_setup(struct super_block *sb)
@@ -4050,6 +4061,7 @@ int scoutfs_server_setup(struct super_block *sb)
spin_lock_init(&server->lock);
init_waitqueue_head(&server->waitq);
INIT_WORK(&server->work, scoutfs_server_worker);
server->status = SERVER_DOWN;
init_rwsem(&server->commit_rwsem);
init_llist_head(&server->commit_waiters);
INIT_WORK(&server->commit_work, scoutfs_server_commit_func);

View File

@@ -77,9 +77,12 @@ u64 scoutfs_server_seq(struct super_block *sb);
u64 scoutfs_server_next_seq(struct super_block *sb);
void scoutfs_server_set_seq_if_greater(struct super_block *sb, u64 seq);
int scoutfs_server_start(struct super_block *sb, u64 term);
void scoutfs_server_abort(struct super_block *sb);
void scoutfs_server_start(struct super_block *sb, u64 term);
void scoutfs_server_stop(struct super_block *sb);
void scoutfs_server_stop_wait(struct super_block *sb);
bool scoutfs_server_is_running(struct super_block *sb);
bool scoutfs_server_is_up(struct super_block *sb);
bool scoutfs_server_is_down(struct super_block *sb);
int scoutfs_server_setup(struct super_block *sb);
void scoutfs_server_destroy(struct super_block *sb);