mirror of
https://github.com/versity/scoutfs.git
synced 2026-01-10 05:37:25 +00:00
Send quorum heartbeats while fencing
Quorum members will try to elect a new leader when they don't receive heartbeats from the currently elected leader. This timeout is short to encourage restoring service promptly. Heartbeats are sent from the quorum worker thread and are delayed while it synchronously starts up the server, which includes fencing previous servers. If fence requests take too long then heartbeats will be delayed long enough for remaining quorum members to elect a new leader while the recently elected server is still busy fencing. To fix this we decouple server startup from the quorum main thread. Server starting and stopping becomes asynchronous so the quorum thread is able to send heartbeats while the server work is off starting up and fencing. The server used to call into quorum to clear a flag as it exited. We remove that mechanism and have the server maintain a running status that quorum can query. We add some state to the quorum work to track the asynchronous state of the server. This lets the quorum protocol change roles immediately as needed while remembering that there is a server running that needs to be acted on. The server used to also call into quorum to update quorum blocks. This is a read-modify-write operation that has to be serialized. Now that we have both the server starting up and the quorum work running they both can't perform these read-modify-write cycles. Instead we have the quorum work own all the block updates and it queries the server status to determine when it should update the quorum block to indicate that the server has fenced or shut down. Signed-off-by: Zach Brown <zab@versity.com>
This commit is contained in:
@@ -157,6 +157,7 @@
|
||||
EXPAND_COUNTER(orphan_scan_error) \
|
||||
EXPAND_COUNTER(orphan_scan_item) \
|
||||
EXPAND_COUNTER(orphan_scan_omap_set) \
|
||||
EXPAND_COUNTER(quorum_candidate_server_stopping) \
|
||||
EXPAND_COUNTER(quorum_elected) \
|
||||
EXPAND_COUNTER(quorum_fence_error) \
|
||||
EXPAND_COUNTER(quorum_fence_leader) \
|
||||
|
||||
@@ -749,7 +749,7 @@ out:
|
||||
if (ret < 0) {
|
||||
scoutfs_err(sb, "lock server err %d during client rid %016llx farewell, shutting down",
|
||||
ret, rid);
|
||||
scoutfs_server_abort(sb);
|
||||
scoutfs_server_stop(sb);
|
||||
}
|
||||
|
||||
return ret;
|
||||
|
||||
@@ -1292,7 +1292,7 @@ restart:
|
||||
if (ret) {
|
||||
scoutfs_err(sb, "client fence returned err %d, shutting down server",
|
||||
ret);
|
||||
scoutfs_server_abort(sb);
|
||||
scoutfs_server_stop(sb);
|
||||
}
|
||||
}
|
||||
destroy_conn(acc);
|
||||
|
||||
@@ -105,6 +105,8 @@ enum quorum_role { FOLLOWER, CANDIDATE, LEADER };
|
||||
struct quorum_status {
|
||||
enum quorum_role role;
|
||||
u64 term;
|
||||
u64 server_start_term;
|
||||
int server_event;
|
||||
int vote_for;
|
||||
unsigned long vote_bits;
|
||||
ktime_t timeout;
|
||||
@@ -117,7 +119,6 @@ struct quorum_info {
|
||||
bool shutdown;
|
||||
|
||||
int our_quorum_slot_nr;
|
||||
unsigned long flags;
|
||||
int votes_needed;
|
||||
|
||||
spinlock_t show_lock;
|
||||
@@ -128,8 +129,6 @@ struct quorum_info {
|
||||
struct scoutfs_sysfs_attrs ssa;
|
||||
};
|
||||
|
||||
#define QINF_FLAG_SERVER 0
|
||||
|
||||
#define DECLARE_QUORUM_INFO(sb, name) \
|
||||
struct quorum_info *name = SCOUTFS_SB(sb)->quorum_info
|
||||
#define DECLARE_QUORUM_INFO_KOBJ(kobj, name) \
|
||||
@@ -494,16 +493,6 @@ static int update_quorum_block(struct super_block *sb, int event, u64 term, bool
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* The calling server has fenced previous leaders and reclaimed their
|
||||
* resources. We can now update our fence event with a greater term to
|
||||
* stop future leaders from doing the same.
|
||||
*/
|
||||
int scoutfs_quorum_fence_complete(struct super_block *sb, u64 term)
|
||||
{
|
||||
return update_quorum_block(sb, SCOUTFS_QUORUM_EVENT_FENCE, term, true);
|
||||
}
|
||||
|
||||
/*
|
||||
* The calling server has been elected and has started running but can't
|
||||
* yet assume that it has exclusive access to the metadata device. We
|
||||
@@ -593,15 +582,9 @@ int scoutfs_quorum_fence_leaders(struct super_block *sb, u64 term)
|
||||
}
|
||||
|
||||
out:
|
||||
if (fence_started) {
|
||||
err = scoutfs_fence_wait_fenced(sb, msecs_to_jiffies(SCOUTFS_QUORUM_FENCE_TO_MS));
|
||||
if (ret == 0)
|
||||
ret = err;
|
||||
} else {
|
||||
err = scoutfs_quorum_fence_complete(sb, term);
|
||||
if (ret == 0)
|
||||
ret = err;
|
||||
}
|
||||
err = scoutfs_fence_wait_fenced(sb, msecs_to_jiffies(SCOUTFS_QUORUM_FENCE_TO_MS));
|
||||
if (ret == 0)
|
||||
ret = err;
|
||||
|
||||
if (ret < 0)
|
||||
scoutfs_inc_counter(sb, quorum_fence_error);
|
||||
@@ -627,9 +610,8 @@ static void update_show_status(struct quorum_info *qinf, struct quorum_status *q
|
||||
/*
|
||||
* The quorum work always runs in the background of quorum member
|
||||
* mounts. It's responsible for starting and stopping the server if
|
||||
* it's elected leader, and the server can call back into it to let it
|
||||
* know that it has shut itself down (perhaps due to error) so that the
|
||||
* work should stop sending heartbeats.
|
||||
* it's elected leader. While it's leader it sends heartbeats to
|
||||
* suppress other quorum work from standing for election.
|
||||
*/
|
||||
static void scoutfs_quorum_worker(struct work_struct *work)
|
||||
{
|
||||
@@ -637,7 +619,7 @@ static void scoutfs_quorum_worker(struct work_struct *work)
|
||||
struct super_block *sb = qinf->sb;
|
||||
struct sockaddr_in unused;
|
||||
struct quorum_host_msg msg;
|
||||
struct quorum_status qst;
|
||||
struct quorum_status qst = {0,};
|
||||
int ret;
|
||||
int err;
|
||||
|
||||
@@ -646,9 +628,7 @@ static void scoutfs_quorum_worker(struct work_struct *work)
|
||||
|
||||
/* start out as a follower */
|
||||
qst.role = FOLLOWER;
|
||||
qst.term = 0;
|
||||
qst.vote_for = -1;
|
||||
qst.vote_bits = 0;
|
||||
|
||||
/* read our starting term from greatest in all events in all slots */
|
||||
read_greatest_term(sb, &qst.term);
|
||||
@@ -684,20 +664,6 @@ static void scoutfs_quorum_worker(struct work_struct *work)
|
||||
msg.term < qst.term)
|
||||
msg.type = SCOUTFS_QUORUM_MSG_INVALID;
|
||||
|
||||
/* if the server has shutdown we become follower */
|
||||
if (!test_bit(QINF_FLAG_SERVER, &qinf->flags) &&
|
||||
qst.role == LEADER) {
|
||||
qst.role = FOLLOWER;
|
||||
qst.vote_for = -1;
|
||||
qst.vote_bits = 0;
|
||||
qst.timeout = election_timeout();
|
||||
scoutfs_inc_counter(sb, quorum_server_shutdown);
|
||||
|
||||
send_msg_others(sb, SCOUTFS_QUORUM_MSG_RESIGNATION,
|
||||
qst.term);
|
||||
scoutfs_inc_counter(sb, quorum_send_resignation);
|
||||
}
|
||||
|
||||
trace_scoutfs_quorum_loop(sb, qst.role, qst.term, qst.vote_for,
|
||||
qst.vote_bits,
|
||||
ktime_to_timespec64(qst.timeout));
|
||||
@@ -708,8 +674,6 @@ static void scoutfs_quorum_worker(struct work_struct *work)
|
||||
if (qst.role == LEADER) {
|
||||
scoutfs_warn(sb, "saw msg type %u from %u for term %llu while leader in term %llu, shutting down server.",
|
||||
msg.type, msg.from, msg.term, qst.term);
|
||||
update_show_status(qinf, &qst);
|
||||
scoutfs_server_stop(sb);
|
||||
}
|
||||
qst.role = FOLLOWER;
|
||||
qst.term = msg.term;
|
||||
@@ -731,6 +695,13 @@ static void scoutfs_quorum_worker(struct work_struct *work)
|
||||
/* followers and candidates start new election on timeout */
|
||||
if (qst.role != LEADER &&
|
||||
ktime_after(ktime_get(), qst.timeout)) {
|
||||
/* .. but only if their server has stopped */
|
||||
if (!scoutfs_server_is_down(sb)) {
|
||||
qst.timeout = election_timeout();
|
||||
scoutfs_inc_counter(sb, quorum_candidate_server_stopping);
|
||||
continue;
|
||||
}
|
||||
|
||||
qst.role = CANDIDATE;
|
||||
qst.term++;
|
||||
qst.vote_for = -1;
|
||||
@@ -779,24 +750,62 @@ static void scoutfs_quorum_worker(struct work_struct *work)
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
/* make very sure server is fully shut down */
|
||||
scoutfs_server_stop(sb);
|
||||
/* set server bit before server shutdown could clear */
|
||||
set_bit(QINF_FLAG_SERVER, &qinf->flags);
|
||||
qst.server_start_term = qst.term;
|
||||
qst.server_event = SCOUTFS_QUORUM_EVENT_ELECT;
|
||||
scoutfs_server_start(sb, qst.term);
|
||||
}
|
||||
|
||||
ret = scoutfs_server_start(sb, qst.term);
|
||||
if (ret < 0) {
|
||||
clear_bit(QINF_FLAG_SERVER, &qinf->flags);
|
||||
/* store our increased term */
|
||||
err = update_quorum_block(sb, SCOUTFS_QUORUM_EVENT_STOP, qst.term,
|
||||
true);
|
||||
if (err < 0) {
|
||||
ret = err;
|
||||
goto out;
|
||||
}
|
||||
ret = 0;
|
||||
continue;
|
||||
/*
|
||||
* This leader's server is up, having finished fencing
|
||||
* previous leaders. We update the fence event with the
|
||||
* current term to let future leaders know that previous
|
||||
* servers have been fenced.
|
||||
*/
|
||||
if (qst.role == LEADER && qst.server_event != SCOUTFS_QUORUM_EVENT_FENCE &&
|
||||
scoutfs_server_is_up(sb)) {
|
||||
ret = update_quorum_block(sb, SCOUTFS_QUORUM_EVENT_FENCE, qst.term, true);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
qst.server_event = SCOUTFS_QUORUM_EVENT_FENCE;
|
||||
}
|
||||
|
||||
/*
|
||||
* Stop a running server if we're no longer leader in
|
||||
* its term.
|
||||
*/
|
||||
if (!(qst.role == LEADER && qst.term == qst.server_start_term) &&
|
||||
scoutfs_server_is_running(sb)) {
|
||||
scoutfs_server_stop(sb);
|
||||
}
|
||||
|
||||
/*
|
||||
* A previously running server has stopped. The quorum
|
||||
* protocol might have shut it down by changing roles or
|
||||
* it might have stopped on its own, perhaps on errors.
|
||||
* If we're still a leader then we become a follower and
|
||||
* send resignations to encourage the next election.
|
||||
* Always update the _STOP event to stop connections and
|
||||
* fencing.
|
||||
*/
|
||||
if (qst.server_start_term > 0 && scoutfs_server_is_down(sb)) {
|
||||
if (qst.role == LEADER) {
|
||||
qst.role = FOLLOWER;
|
||||
qst.vote_for = -1;
|
||||
qst.vote_bits = 0;
|
||||
qst.timeout = election_timeout();
|
||||
scoutfs_inc_counter(sb, quorum_server_shutdown);
|
||||
|
||||
send_msg_others(sb, SCOUTFS_QUORUM_MSG_RESIGNATION,
|
||||
qst.server_start_term);
|
||||
scoutfs_inc_counter(sb, quorum_send_resignation);
|
||||
}
|
||||
|
||||
ret = update_quorum_block(sb, SCOUTFS_QUORUM_EVENT_STOP,
|
||||
qst.server_start_term, true);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
qst.server_start_term = 0;
|
||||
}
|
||||
|
||||
/* leaders regularly send heartbeats to delay elections */
|
||||
@@ -836,11 +845,16 @@ static void scoutfs_quorum_worker(struct work_struct *work)
|
||||
update_show_status(qinf, &qst);
|
||||
|
||||
/* always try to stop a running server as we stop */
|
||||
if (test_bit(QINF_FLAG_SERVER, &qinf->flags)) {
|
||||
scoutfs_server_stop(sb);
|
||||
scoutfs_fence_stop(sb);
|
||||
send_msg_others(sb, SCOUTFS_QUORUM_MSG_RESIGNATION,
|
||||
qst.term);
|
||||
if (scoutfs_server_is_running(sb)) {
|
||||
scoutfs_server_stop_wait(sb);
|
||||
send_msg_others(sb, SCOUTFS_QUORUM_MSG_RESIGNATION, qst.term);
|
||||
|
||||
if (qst.server_start_term > 0) {
|
||||
err = update_quorum_block(sb, SCOUTFS_QUORUM_EVENT_STOP,
|
||||
qst.server_start_term, true);
|
||||
if (err < 0 && ret == 0)
|
||||
ret = err;
|
||||
}
|
||||
}
|
||||
|
||||
/* record that this slot no longer has an active quorum */
|
||||
@@ -852,21 +866,6 @@ out:
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* The calling server has shutdown and is no longer using shared
|
||||
* resources. Clear the bit so that we stop sending heartbeats and
|
||||
* allow the next server to be elected. Update the stop event so that
|
||||
* it won't be considered available by clients or fenced by the next
|
||||
* leader.
|
||||
*/
|
||||
void scoutfs_quorum_server_shutdown(struct super_block *sb, u64 term)
|
||||
{
|
||||
DECLARE_QUORUM_INFO(sb, qinf);
|
||||
|
||||
clear_bit(QINF_FLAG_SERVER, &qinf->flags);
|
||||
update_quorum_block(sb, SCOUTFS_QUORUM_EVENT_STOP, term, true);
|
||||
}
|
||||
|
||||
/*
|
||||
* Clients read quorum blocks looking for the leader with a server whose
|
||||
* address it can try and connect to.
|
||||
@@ -988,6 +987,8 @@ static ssize_t status_show(struct kobject *kobj, struct kobj_attribute *attr,
|
||||
qinf->our_quorum_slot_nr);
|
||||
snprintf_ret(buf, size, &ret, "term %llu\n",
|
||||
qst.term);
|
||||
snprintf_ret(buf, size, &ret, "server_start_term %llu\n", qst.server_start_term);
|
||||
snprintf_ret(buf, size, &ret, "server_event %d\n", qst.server_event);
|
||||
snprintf_ret(buf, size, &ret, "role %d (%s)\n",
|
||||
qst.role, role_str(qst.role));
|
||||
snprintf_ret(buf, size, &ret, "vote_for %d\n",
|
||||
|
||||
@@ -2,14 +2,12 @@
|
||||
#define _SCOUTFS_QUORUM_H_
|
||||
|
||||
int scoutfs_quorum_server_sin(struct super_block *sb, struct sockaddr_in *sin);
|
||||
void scoutfs_quorum_server_shutdown(struct super_block *sb, u64 term);
|
||||
|
||||
u8 scoutfs_quorum_votes_needed(struct super_block *sb);
|
||||
void scoutfs_quorum_slot_sin(struct scoutfs_super_block *super, int i,
|
||||
struct sockaddr_in *sin);
|
||||
|
||||
int scoutfs_quorum_fence_leaders(struct super_block *sb, u64 term);
|
||||
int scoutfs_quorum_fence_complete(struct super_block *sb, u64 term);
|
||||
|
||||
int scoutfs_quorum_setup(struct super_block *sb);
|
||||
void scoutfs_quorum_shutdown(struct super_block *sb);
|
||||
|
||||
@@ -59,9 +59,7 @@ struct server_info {
|
||||
|
||||
struct workqueue_struct *wq;
|
||||
struct work_struct work;
|
||||
int err;
|
||||
bool shutting_down;
|
||||
struct completion start_comp;
|
||||
int status;
|
||||
u64 term;
|
||||
struct scoutfs_net_connection *conn;
|
||||
|
||||
@@ -155,6 +153,62 @@ static bool get_volopt_val(struct server_info *server, int nr, u64 *val)
|
||||
return is_set;
|
||||
}
|
||||
|
||||
enum {
|
||||
SERVER_NOP = 0,
|
||||
SERVER_STARTING,
|
||||
SERVER_UP,
|
||||
SERVER_STOPPING,
|
||||
SERVER_DOWN,
|
||||
};
|
||||
|
||||
bool scoutfs_server_is_running(struct super_block *sb)
|
||||
{
|
||||
DECLARE_SERVER_INFO(sb, server);
|
||||
long was = cmpxchg(&server->status, SERVER_NOP, SERVER_NOP);
|
||||
|
||||
return was == SERVER_STARTING || was == SERVER_UP;
|
||||
}
|
||||
|
||||
bool scoutfs_server_is_up(struct super_block *sb)
|
||||
{
|
||||
DECLARE_SERVER_INFO(sb, server);
|
||||
|
||||
return cmpxchg(&server->status, SERVER_NOP, SERVER_NOP) == SERVER_UP;
|
||||
}
|
||||
|
||||
bool scoutfs_server_is_down(struct super_block *sb)
|
||||
{
|
||||
DECLARE_SERVER_INFO(sb, server);
|
||||
|
||||
return cmpxchg(&server->status, SERVER_NOP, SERVER_NOP) == SERVER_DOWN;
|
||||
}
|
||||
|
||||
static bool server_is_stopping(struct server_info *server)
|
||||
{
|
||||
return cmpxchg(&server->status, SERVER_NOP, SERVER_NOP) == SERVER_STOPPING;
|
||||
}
|
||||
|
||||
static void stop_server(struct server_info *server)
|
||||
{
|
||||
long was = cmpxchg(&server->status, SERVER_NOP, SERVER_NOP);
|
||||
|
||||
if ((was == SERVER_STARTING || was == SERVER_UP) &&
|
||||
cmpxchg(&server->status, was, SERVER_STOPPING) == was)
|
||||
wake_up(&server->waitq);
|
||||
}
|
||||
|
||||
static void server_up(struct server_info *server)
|
||||
{
|
||||
cmpxchg(&server->status, SERVER_STARTING, SERVER_UP);
|
||||
}
|
||||
|
||||
static void server_down(struct server_info *server)
|
||||
{
|
||||
long was = cmpxchg(&server->status, SERVER_NOP, SERVER_NOP);
|
||||
|
||||
if (was != SERVER_DOWN)
|
||||
cmpxchg(&server->status, was, SERVER_DOWN);
|
||||
}
|
||||
|
||||
struct commit_waiter {
|
||||
struct completion comp;
|
||||
@@ -162,24 +216,6 @@ struct commit_waiter {
|
||||
int ret;
|
||||
};
|
||||
|
||||
static bool test_shutting_down(struct server_info *server)
|
||||
{
|
||||
smp_rmb();
|
||||
return server->shutting_down;
|
||||
}
|
||||
|
||||
static void set_shutting_down(struct server_info *server, bool val)
|
||||
{
|
||||
server->shutting_down = val;
|
||||
smp_wmb();
|
||||
}
|
||||
|
||||
static void stop_server(struct server_info *server)
|
||||
{
|
||||
set_shutting_down(server, true);
|
||||
wake_up(&server->waitq);
|
||||
}
|
||||
|
||||
/*
|
||||
* Hold the shared rwsem that lets multiple holders modify blocks in the
|
||||
* current commit and prevents the commit worker from acquiring the
|
||||
@@ -2051,8 +2087,8 @@ static void server_log_merge_free_work(struct work_struct *work)
|
||||
bool commit = false;
|
||||
int ret = 0;
|
||||
|
||||
/* shutdown waits for us, we'll eventually load set shutting_down */
|
||||
while (!server->shutting_down) {
|
||||
while (!server_is_stopping(server)) {
|
||||
|
||||
scoutfs_server_hold_commit(sb);
|
||||
mutex_lock(&server->logs_mutex);
|
||||
commit = true;
|
||||
@@ -3180,7 +3216,7 @@ out:
|
||||
*/
|
||||
static void queue_farewell_work(struct server_info *server)
|
||||
{
|
||||
if (!test_shutting_down(server))
|
||||
if (!server_is_stopping(server))
|
||||
queue_work(server->wq, &server->farewell_work);
|
||||
}
|
||||
|
||||
@@ -3693,14 +3729,14 @@ static void fence_pending_recov_worker(struct work_struct *work)
|
||||
}
|
||||
|
||||
if (ret < 0)
|
||||
scoutfs_server_abort(sb);
|
||||
stop_server(server);
|
||||
}
|
||||
|
||||
static void recovery_timeout(struct super_block *sb)
|
||||
{
|
||||
DECLARE_SERVER_INFO(sb, server);
|
||||
|
||||
if (!test_shutting_down(server))
|
||||
if (!server_is_stopping(server))
|
||||
queue_work(server->wq, &server->fence_pending_recov_work);
|
||||
}
|
||||
|
||||
@@ -3765,7 +3801,7 @@ out:
|
||||
|
||||
static void queue_reclaim_work(struct server_info *server, unsigned long delay)
|
||||
{
|
||||
if (!test_shutting_down(server))
|
||||
if (!server_is_stopping(server))
|
||||
queue_delayed_work(server->wq, &server->reclaim_dwork, delay);
|
||||
}
|
||||
|
||||
@@ -3800,7 +3836,7 @@ static void reclaim_worker(struct work_struct *work)
|
||||
if (error == true) {
|
||||
scoutfs_err(sb, "saw error indicator on fence request for rid %016llx, shutting down server",
|
||||
rid);
|
||||
scoutfs_server_abort(sb);
|
||||
stop_server(server);
|
||||
ret = -ESHUTDOWN;
|
||||
goto out;
|
||||
}
|
||||
@@ -3809,7 +3845,7 @@ static void reclaim_worker(struct work_struct *work)
|
||||
if (ret < 0) {
|
||||
scoutfs_err(sb, "failure to reclaim fenced rid %016llx: err %d, shutting down server",
|
||||
rid, ret);
|
||||
scoutfs_server_abort(sb);
|
||||
stop_server(server);
|
||||
goto out;
|
||||
}
|
||||
|
||||
@@ -3817,16 +3853,7 @@ static void reclaim_worker(struct work_struct *work)
|
||||
scoutfs_fence_free(sb, rid);
|
||||
scoutfs_server_recov_finish(sb, rid, SCOUTFS_RECOV_ALL);
|
||||
|
||||
/* tell quorum we've finished fencing all previous leaders */
|
||||
if (reason == SCOUTFS_FENCE_QUORUM_BLOCK_LEADER &&
|
||||
!scoutfs_fence_reason_pending(sb, reason)) {
|
||||
ret = scoutfs_quorum_fence_complete(sb, server->term);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = 0;
|
||||
|
||||
out:
|
||||
/* queue next reclaim immediately if we're making progress */
|
||||
if (ret == 0)
|
||||
@@ -3942,12 +3969,12 @@ static void scoutfs_server_worker(struct work_struct *work)
|
||||
scoutfs_net_listen(sb, conn);
|
||||
|
||||
scoutfs_info(sb, "server ready at "SIN_FMT, SIN_ARG(&sin));
|
||||
complete(&server->start_comp);
|
||||
server_up(server);
|
||||
|
||||
queue_reclaim_work(server, 0);
|
||||
|
||||
/* interruptible mostly to avoid stuck messages */
|
||||
wait_event_interruptible(server->waitq, test_shutting_down(server));
|
||||
wait_event_interruptible(server->waitq, server_is_stopping(server));
|
||||
|
||||
shutdown:
|
||||
scoutfs_info(sb, "server shutting down at "SIN_FMT, SIN_ARG(&sin));
|
||||
@@ -3981,60 +4008,44 @@ out:
|
||||
scoutfs_fence_stop(sb);
|
||||
scoutfs_net_free_conn(sb, conn);
|
||||
|
||||
/* let quorum know that we've shutdown */
|
||||
scoutfs_quorum_server_shutdown(sb, server->term);
|
||||
server_down(server);
|
||||
|
||||
scoutfs_info(sb, "server stopped at "SIN_FMT, SIN_ARG(&sin));
|
||||
trace_scoutfs_server_work_exit(sb, 0, ret);
|
||||
|
||||
server->err = ret;
|
||||
complete(&server->start_comp);
|
||||
}
|
||||
|
||||
/*
|
||||
* Wait for the server to successfully start. If this returns error then
|
||||
* the super block's fence_term has been set to the new server's term so
|
||||
* that it won't be fenced.
|
||||
* Start the server but don't wait for it to complete.
|
||||
*/
|
||||
int scoutfs_server_start(struct super_block *sb, u64 term)
|
||||
void scoutfs_server_start(struct super_block *sb, u64 term)
|
||||
{
|
||||
DECLARE_SERVER_INFO(sb, server);
|
||||
|
||||
server->err = 0;
|
||||
set_shutting_down(server, false);
|
||||
server->term = term;
|
||||
init_completion(&server->start_comp);
|
||||
|
||||
queue_work(server->wq, &server->work);
|
||||
|
||||
wait_for_completion(&server->start_comp);
|
||||
return server->err;
|
||||
if (cmpxchg(&server->status, SERVER_DOWN, SERVER_STARTING) == SERVER_DOWN) {
|
||||
server->term = term;
|
||||
queue_work(server->wq, &server->work);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Start shutdown on the server but don't want for it to finish.
|
||||
*/
|
||||
void scoutfs_server_abort(struct super_block *sb)
|
||||
{
|
||||
DECLARE_SERVER_INFO(sb, server);
|
||||
|
||||
stop_server(server);
|
||||
}
|
||||
|
||||
/*
|
||||
* Once the server is stopped we give the caller our election info
|
||||
* which might have been modified while we were running.
|
||||
*/
|
||||
void scoutfs_server_stop(struct super_block *sb)
|
||||
{
|
||||
DECLARE_SERVER_INFO(sb, server);
|
||||
|
||||
stop_server(server);
|
||||
}
|
||||
|
||||
cancel_work_sync(&server->work);
|
||||
cancel_work_sync(&server->farewell_work);
|
||||
cancel_work_sync(&server->commit_work);
|
||||
cancel_work_sync(&server->log_merge_free_work);
|
||||
/*
|
||||
* Start shutdown on the server and wait for it to finish.
|
||||
*/
|
||||
void scoutfs_server_stop_wait(struct super_block *sb)
|
||||
{
|
||||
DECLARE_SERVER_INFO(sb, server);
|
||||
|
||||
stop_server(server);
|
||||
flush_work_sync(&server->work);
|
||||
}
|
||||
|
||||
int scoutfs_server_setup(struct super_block *sb)
|
||||
@@ -4050,6 +4061,7 @@ int scoutfs_server_setup(struct super_block *sb)
|
||||
spin_lock_init(&server->lock);
|
||||
init_waitqueue_head(&server->waitq);
|
||||
INIT_WORK(&server->work, scoutfs_server_worker);
|
||||
server->status = SERVER_DOWN;
|
||||
init_rwsem(&server->commit_rwsem);
|
||||
init_llist_head(&server->commit_waiters);
|
||||
INIT_WORK(&server->commit_work, scoutfs_server_commit_func);
|
||||
|
||||
@@ -77,9 +77,12 @@ u64 scoutfs_server_seq(struct super_block *sb);
|
||||
u64 scoutfs_server_next_seq(struct super_block *sb);
|
||||
void scoutfs_server_set_seq_if_greater(struct super_block *sb, u64 seq);
|
||||
|
||||
int scoutfs_server_start(struct super_block *sb, u64 term);
|
||||
void scoutfs_server_abort(struct super_block *sb);
|
||||
void scoutfs_server_start(struct super_block *sb, u64 term);
|
||||
void scoutfs_server_stop(struct super_block *sb);
|
||||
void scoutfs_server_stop_wait(struct super_block *sb);
|
||||
bool scoutfs_server_is_running(struct super_block *sb);
|
||||
bool scoutfs_server_is_up(struct super_block *sb);
|
||||
bool scoutfs_server_is_down(struct super_block *sb);
|
||||
|
||||
int scoutfs_server_setup(struct super_block *sb);
|
||||
void scoutfs_server_destroy(struct super_block *sb);
|
||||
|
||||
Reference in New Issue
Block a user