From b5133bfc986840102f166bad87f18f7803cdc1bc Mon Sep 17 00:00:00 2001 From: Zach Brown Date: Wed, 10 Apr 2019 14:51:23 -0700 Subject: [PATCH] scoutfs: add elected flag to quorum block It was a mistake to use a non-zero elected_nr as the indication that a slot is considered actively elected. Zeroing it as the server shuts down wipes the elected_nr and means that it doesn't advance as each server is elected. This then causes a client connecting to a new server to be confused for a client reconnecting to a server after the server has timed it out and destroyed its state. This caused reconnection after shutting down a server to fail and clients to loop reconnecting indefinitely. This instead adds flags to the quorum block and assigns a flag to indicate that the slot should be considered active. It's cleared by fencing and by the client as the server shuts down. Signed-off-by: Zach Brown --- kmod/src/format.h | 4 ++++ kmod/src/quorum.c | 52 +++++++++++++++++++++++++--------------- kmod/src/quorum.h | 1 + kmod/src/scoutfs_trace.h | 7 ++++-- 4 files changed, 43 insertions(+), 21 deletions(-) diff --git a/kmod/src/format.h b/kmod/src/format.h index b5ced5d4..936b0717 100644 --- a/kmod/src/format.h +++ b/kmod/src/format.h @@ -443,8 +443,12 @@ struct scoutfs_quorum_block { __le64 unmount_barrier; __le32 crc; __u8 vote_slot; + __u8 flags; } __packed; +#define SCOUTFS_QUORUM_BLOCK_FLAG_ELECTED (1 << 0) +#define SCOUTFS_QUORUM_BLOCK_FLAGS_UNKNOWN (U8_MAX << 1) + #define SCOUTFS_QUORUM_MAX_SLOTS SCOUTFS_QUORUM_BLOCKS /* diff --git a/kmod/src/quorum.c b/kmod/src/quorum.c index 7f90e407..87cdd186 100644 --- a/kmod/src/quorum.c +++ b/kmod/src/quorum.c @@ -297,7 +297,8 @@ static bool invalid_quorum_block(struct scoutfs_super_block *super, return quorum_block_crc(blk) != blk->crc || blk->fsid != super->hdr.fsid || le64_to_cpu(blk->blkno) != bh->b_blocknr || - blk->vote_slot >= SCOUTFS_QUORUM_MAX_SLOTS; + blk->vote_slot >= SCOUTFS_QUORUM_MAX_SLOTS || + (blk->flags & SCOUTFS_QUORUM_BLOCK_FLAGS_UNKNOWN); } /* @@ -393,7 +394,7 @@ static inline int first_slot_flags(struct scoutfs_quorum_config *conf, static int write_quorum_block(struct super_block *sb, __le64 fsid, __le64 config_gen, u8 our_slot, __le64 write_nr, u64 elected_nr, u64 unmount_barrier, - u8 vote_slot) + u8 vote_slot, u8 flags) { struct scoutfs_quorum_block *blk; struct buffer_head *bh; @@ -419,6 +420,7 @@ static int write_quorum_block(struct super_block *sb, __le64 fsid, blk->elected_nr = cpu_to_le64(elected_nr); blk->unmount_barrier = cpu_to_le64(unmount_barrier); blk->vote_slot = vote_slot; + blk->flags = flags; blk->crc = quorum_block_crc(blk); @@ -464,19 +466,24 @@ static int fence_other_elected(struct super_block *sb, { struct scoutfs_quorum_config *conf = &super->quorum_config; struct scoutfs_quorum_block blk; + u8 flags; int ret; int i; for_each_block(sb, super, i, &blk) { if (i != our_slot && - le64_to_cpu(blk.elected_nr) > 0 && + (blk.flags & SCOUTFS_QUORUM_BLOCK_FLAG_ELECTED) && le64_to_cpu(blk.elected_nr) <= elected_nr) { scoutfs_err(sb, "would have fenced"); scoutfs_inc_counter(sb, quorum_fenced); + flags = blk.flags & ~SCOUTFS_QUORUM_BLOCK_FLAG_ELECTED; + ret = write_quorum_block(sb, super->hdr.fsid, - conf->gen, i, blk.write_nr, 0, - le64_to_cpu(blk.unmount_barrier), i); + conf->gen, i, blk.write_nr, + le64_to_cpu(blk.elected_nr), + le64_to_cpu(blk.unmount_barrier), i, + flags); if (ret) break; } @@ -531,6 +538,7 @@ int scoutfs_quorum_election(struct super_block *sb, char *our_name, __le64 write_nr = 0; u64 elected_nr = 0; u64 unmount_barrier = 0; + u8 flags = 0; int vote_streak = 0; int vote_slot; int our_slot; @@ -586,6 +594,7 @@ int scoutfs_quorum_election(struct super_block *sb, char *our_name, /* find the most recently elected leader */ if ((blk.config_gen == conf->gen) && + (blk.flags & SCOUTFS_QUORUM_BLOCK_FLAG_ELECTED) && (le64_to_cpu(blk.elected_nr) > qei->elected_nr)){ addr_to_sin(&qei->sin, &slot->addr); qei->config_gen = blk.config_gen; @@ -594,6 +603,7 @@ int scoutfs_quorum_election(struct super_block *sb, char *our_name, qei->unmount_barrier = le64_to_cpu(blk.unmount_barrier); qei->config_slot = i; + qei->flags = blk.flags; } } @@ -605,7 +615,7 @@ int scoutfs_quorum_election(struct super_block *sb, char *our_name, * most recent, or we couldn't fence, then we fall back * to participating in the election. */ - if (elected_nr != 0) { + if (flags & SCOUTFS_QUORUM_BLOCK_FLAG_ELECTED) { if (qei->write_nr == write_nr && qei->elected_nr == elected_nr && qei->config_slot == our_slot) { @@ -643,6 +653,7 @@ int scoutfs_quorum_election(struct super_block *sb, char *our_name, write_nr = cpu_to_le64(1); elected_nr = 0; unmount_barrier = 0; + flags = 0; for_each_active_block(sb, super, conf, hist, hi, &blk, slot, i){ /* count our votes (maybe including from us) */ @@ -677,14 +688,14 @@ int scoutfs_quorum_election(struct super_block *sb, char *our_name, else vote_streak = 0; - if (vote_streak >= 2) + if (vote_streak >= 2) { + flags |= SCOUTFS_QUORUM_BLOCK_FLAG_ELECTED; elected_nr++; - else - elected_nr = 0; + } write_quorum_block(sb, super->hdr.fsid, conf->gen, our_slot, write_nr, elected_nr, unmount_barrier, - vote_slot); + vote_slot, flags); set_current_state(TASK_UNINTERRUPTIBLE); schedule_hrtimeout(&expires, HRTIMER_MODE_ABS); @@ -705,14 +716,14 @@ out: /* * The calling server is shutting down and has finished modifying - * persistent state. We clear elected_nr from our quorum block so that - * mounts won't try to connect and so that the next next leader won't - * try to fence. + * persistent state. We clear the elected flag from our quorum block so + * that mounts won't try to connect and so that the next next leader + * won't try to fence. * * By definition nothing has written to the slot since we wrote our - * elected_nr and the slot could not have been reclaimed. To reclaim - * the slot would have required proving that we were gone or fencing - * us. + * elected quorum block and the slot could not have been reclaimed. To + * reclaim the slot would have required proving that we were gone or + * fencing us. * * If this fails then the mount is in trouble because it'll probably be * fenced by the next elected leader. @@ -729,9 +740,12 @@ int scoutfs_quorum_clear_elected(struct super_block *sb, { struct scoutfs_super_block *super = &SCOUTFS_SB(sb)->super; + qei->flags &= ~SCOUTFS_QUORUM_BLOCK_FLAG_ELECTED; + return write_quorum_block(sb, super->hdr.fsid, qei->config_gen, - qei->config_slot, qei->write_nr, 0, - qei->unmount_barrier, qei->config_slot); + qei->config_slot, qei->write_nr, + qei->elected_nr, qei->unmount_barrier, + qei->config_slot, qei->flags); } int scoutfs_quorum_update_barrier(struct super_block *sb, @@ -745,7 +759,7 @@ int scoutfs_quorum_update_barrier(struct super_block *sb, return write_quorum_block(sb, super->hdr.fsid, qei->config_gen, qei->config_slot, qei->write_nr, qei->elected_nr, qei->unmount_barrier, - qei->config_slot); + qei->config_slot, qei->flags); } /* diff --git a/kmod/src/quorum.h b/kmod/src/quorum.h index c40e9d48..26a45b5b 100644 --- a/kmod/src/quorum.h +++ b/kmod/src/quorum.h @@ -9,6 +9,7 @@ struct scoutfs_quorum_elected_info { u64 unmount_barrier; unsigned int config_slot; bool run_server; + u8 flags; }; int scoutfs_quorum_election(struct super_block *sb, char *our_name, diff --git a/kmod/src/scoutfs_trace.h b/kmod/src/scoutfs_trace.h index 48040606..5e85305e 100644 --- a/kmod/src/scoutfs_trace.h +++ b/kmod/src/scoutfs_trace.h @@ -2485,6 +2485,7 @@ DECLARE_EVENT_CLASS(scoutfs_quorum_block_class, __field(__u64, unmount_barrier) __field(__u32, crc) __field(__u8, vote_slot) + __field(__u8, flags) ), TP_fast_assign( @@ -2497,12 +2498,14 @@ DECLARE_EVENT_CLASS(scoutfs_quorum_block_class, __entry->unmount_barrier = le64_to_cpu(blk->unmount_barrier); __entry->crc = le32_to_cpu(blk->crc); __entry->vote_slot = blk->vote_slot; + __entry->flags = blk->flags; ), - TP_printk("fsid "FSID_FMT" io_blkno %llu hdr_blkno %llu config_gen %llu write_nr %llu elected_nr %llu umb %llu crc 0x%08x vote_slot %u", + TP_printk("fsid "FSID_FMT" io_blkno %llu hdr_blkno %llu config_gen %llu write_nr %llu elected_nr %llu umb %llu crc 0x%08x vote_slot %u flags %02x", __entry->fsid, __entry->io_blkno, __entry->hdr_blkno, __entry->config_gen, __entry->write_nr, __entry->elected_nr, - __entry->unmount_barrier, __entry->crc, __entry->vote_slot) + __entry->unmount_barrier, __entry->crc, __entry->vote_slot, + __entry->flags) ); DEFINE_EVENT(scoutfs_quorum_block_class, scoutfs_quorum_read_block, TP_PROTO(struct super_block *sb, u64 io_blkno,