scoutfs: add elected flag to quorum block

It was a mistake to use a non-zero elected_nr as the indication that a
slot is considered actively elected.  Zeroing it as the server shuts
down wipes the elected_nr and means that it doesn't advance as each
server is elected.  This then causes a client connecting to a new server
to be confused for a client reconnecting to a server after the server
has timed it out and destroyed its state.  This caused reconnection
after shutting down a server to fail and clients to loop reconnecting
indefinitely.

This instead adds flags to the quorum block and assigns a flag to
indicate that the slot should be considered active.  It's cleared by
fencing and by the client as the server shuts down.

Signed-off-by: Zach Brown <zab@versity.com>
This commit is contained in:
Zach Brown
2019-04-10 14:51:23 -07:00
committed by Zach Brown
parent 36b0df336b
commit b5133bfc98
4 changed files with 43 additions and 21 deletions

View File

@@ -443,8 +443,12 @@ struct scoutfs_quorum_block {
__le64 unmount_barrier;
__le32 crc;
__u8 vote_slot;
__u8 flags;
} __packed;
#define SCOUTFS_QUORUM_BLOCK_FLAG_ELECTED (1 << 0)
#define SCOUTFS_QUORUM_BLOCK_FLAGS_UNKNOWN (U8_MAX << 1)
#define SCOUTFS_QUORUM_MAX_SLOTS SCOUTFS_QUORUM_BLOCKS
/*

View File

@@ -297,7 +297,8 @@ static bool invalid_quorum_block(struct scoutfs_super_block *super,
return quorum_block_crc(blk) != blk->crc ||
blk->fsid != super->hdr.fsid ||
le64_to_cpu(blk->blkno) != bh->b_blocknr ||
blk->vote_slot >= SCOUTFS_QUORUM_MAX_SLOTS;
blk->vote_slot >= SCOUTFS_QUORUM_MAX_SLOTS ||
(blk->flags & SCOUTFS_QUORUM_BLOCK_FLAGS_UNKNOWN);
}
/*
@@ -393,7 +394,7 @@ static inline int first_slot_flags(struct scoutfs_quorum_config *conf,
static int write_quorum_block(struct super_block *sb, __le64 fsid,
__le64 config_gen, u8 our_slot, __le64 write_nr,
u64 elected_nr, u64 unmount_barrier,
u8 vote_slot)
u8 vote_slot, u8 flags)
{
struct scoutfs_quorum_block *blk;
struct buffer_head *bh;
@@ -419,6 +420,7 @@ static int write_quorum_block(struct super_block *sb, __le64 fsid,
blk->elected_nr = cpu_to_le64(elected_nr);
blk->unmount_barrier = cpu_to_le64(unmount_barrier);
blk->vote_slot = vote_slot;
blk->flags = flags;
blk->crc = quorum_block_crc(blk);
@@ -464,19 +466,24 @@ static int fence_other_elected(struct super_block *sb,
{
struct scoutfs_quorum_config *conf = &super->quorum_config;
struct scoutfs_quorum_block blk;
u8 flags;
int ret;
int i;
for_each_block(sb, super, i, &blk) {
if (i != our_slot &&
le64_to_cpu(blk.elected_nr) > 0 &&
(blk.flags & SCOUTFS_QUORUM_BLOCK_FLAG_ELECTED) &&
le64_to_cpu(blk.elected_nr) <= elected_nr) {
scoutfs_err(sb, "would have fenced");
scoutfs_inc_counter(sb, quorum_fenced);
flags = blk.flags & ~SCOUTFS_QUORUM_BLOCK_FLAG_ELECTED;
ret = write_quorum_block(sb, super->hdr.fsid,
conf->gen, i, blk.write_nr, 0,
le64_to_cpu(blk.unmount_barrier), i);
conf->gen, i, blk.write_nr,
le64_to_cpu(blk.elected_nr),
le64_to_cpu(blk.unmount_barrier), i,
flags);
if (ret)
break;
}
@@ -531,6 +538,7 @@ int scoutfs_quorum_election(struct super_block *sb, char *our_name,
__le64 write_nr = 0;
u64 elected_nr = 0;
u64 unmount_barrier = 0;
u8 flags = 0;
int vote_streak = 0;
int vote_slot;
int our_slot;
@@ -586,6 +594,7 @@ int scoutfs_quorum_election(struct super_block *sb, char *our_name,
/* find the most recently elected leader */
if ((blk.config_gen == conf->gen) &&
(blk.flags & SCOUTFS_QUORUM_BLOCK_FLAG_ELECTED) &&
(le64_to_cpu(blk.elected_nr) > qei->elected_nr)){
addr_to_sin(&qei->sin, &slot->addr);
qei->config_gen = blk.config_gen;
@@ -594,6 +603,7 @@ int scoutfs_quorum_election(struct super_block *sb, char *our_name,
qei->unmount_barrier =
le64_to_cpu(blk.unmount_barrier);
qei->config_slot = i;
qei->flags = blk.flags;
}
}
@@ -605,7 +615,7 @@ int scoutfs_quorum_election(struct super_block *sb, char *our_name,
* most recent, or we couldn't fence, then we fall back
* to participating in the election.
*/
if (elected_nr != 0) {
if (flags & SCOUTFS_QUORUM_BLOCK_FLAG_ELECTED) {
if (qei->write_nr == write_nr &&
qei->elected_nr == elected_nr &&
qei->config_slot == our_slot) {
@@ -643,6 +653,7 @@ int scoutfs_quorum_election(struct super_block *sb, char *our_name,
write_nr = cpu_to_le64(1);
elected_nr = 0;
unmount_barrier = 0;
flags = 0;
for_each_active_block(sb, super, conf, hist, hi, &blk, slot, i){
/* count our votes (maybe including from us) */
@@ -677,14 +688,14 @@ int scoutfs_quorum_election(struct super_block *sb, char *our_name,
else
vote_streak = 0;
if (vote_streak >= 2)
if (vote_streak >= 2) {
flags |= SCOUTFS_QUORUM_BLOCK_FLAG_ELECTED;
elected_nr++;
else
elected_nr = 0;
}
write_quorum_block(sb, super->hdr.fsid, conf->gen, our_slot,
write_nr, elected_nr, unmount_barrier,
vote_slot);
vote_slot, flags);
set_current_state(TASK_UNINTERRUPTIBLE);
schedule_hrtimeout(&expires, HRTIMER_MODE_ABS);
@@ -705,14 +716,14 @@ out:
/*
* The calling server is shutting down and has finished modifying
* persistent state. We clear elected_nr from our quorum block so that
* mounts won't try to connect and so that the next next leader won't
* try to fence.
* persistent state. We clear the elected flag from our quorum block so
* that mounts won't try to connect and so that the next next leader
* won't try to fence.
*
* By definition nothing has written to the slot since we wrote our
* elected_nr and the slot could not have been reclaimed. To reclaim
* the slot would have required proving that we were gone or fencing
* us.
* elected quorum block and the slot could not have been reclaimed. To
* reclaim the slot would have required proving that we were gone or
* fencing us.
*
* If this fails then the mount is in trouble because it'll probably be
* fenced by the next elected leader.
@@ -729,9 +740,12 @@ int scoutfs_quorum_clear_elected(struct super_block *sb,
{
struct scoutfs_super_block *super = &SCOUTFS_SB(sb)->super;
qei->flags &= ~SCOUTFS_QUORUM_BLOCK_FLAG_ELECTED;
return write_quorum_block(sb, super->hdr.fsid, qei->config_gen,
qei->config_slot, qei->write_nr, 0,
qei->unmount_barrier, qei->config_slot);
qei->config_slot, qei->write_nr,
qei->elected_nr, qei->unmount_barrier,
qei->config_slot, qei->flags);
}
int scoutfs_quorum_update_barrier(struct super_block *sb,
@@ -745,7 +759,7 @@ int scoutfs_quorum_update_barrier(struct super_block *sb,
return write_quorum_block(sb, super->hdr.fsid, qei->config_gen,
qei->config_slot, qei->write_nr,
qei->elected_nr, qei->unmount_barrier,
qei->config_slot);
qei->config_slot, qei->flags);
}
/*

View File

@@ -9,6 +9,7 @@ struct scoutfs_quorum_elected_info {
u64 unmount_barrier;
unsigned int config_slot;
bool run_server;
u8 flags;
};
int scoutfs_quorum_election(struct super_block *sb, char *our_name,

View File

@@ -2485,6 +2485,7 @@ DECLARE_EVENT_CLASS(scoutfs_quorum_block_class,
__field(__u64, unmount_barrier)
__field(__u32, crc)
__field(__u8, vote_slot)
__field(__u8, flags)
),
TP_fast_assign(
@@ -2497,12 +2498,14 @@ DECLARE_EVENT_CLASS(scoutfs_quorum_block_class,
__entry->unmount_barrier = le64_to_cpu(blk->unmount_barrier);
__entry->crc = le32_to_cpu(blk->crc);
__entry->vote_slot = blk->vote_slot;
__entry->flags = blk->flags;
),
TP_printk("fsid "FSID_FMT" io_blkno %llu hdr_blkno %llu config_gen %llu write_nr %llu elected_nr %llu umb %llu crc 0x%08x vote_slot %u",
TP_printk("fsid "FSID_FMT" io_blkno %llu hdr_blkno %llu config_gen %llu write_nr %llu elected_nr %llu umb %llu crc 0x%08x vote_slot %u flags %02x",
__entry->fsid, __entry->io_blkno, __entry->hdr_blkno,
__entry->config_gen, __entry->write_nr, __entry->elected_nr,
__entry->unmount_barrier, __entry->crc, __entry->vote_slot)
__entry->unmount_barrier, __entry->crc, __entry->vote_slot,
__entry->flags)
);
DEFINE_EVENT(scoutfs_quorum_block_class, scoutfs_quorum_read_block,
TP_PROTO(struct super_block *sb, u64 io_blkno,