mirror of
https://github.com/scylladb/scylladb.git
synced 2026-04-24 10:30:38 +00:00
For a follower to forward requests to a leader the leader must be known. But there may be a situation where a follower does not learn about a leader for a while. This may happen when a node becomes a follower while its log is up-to-date and there are no new entries submitted to raft. In such case the leader will send nothing to the follower and the only way to learn about the current leader is to get a message from it. Until a new entry is added to the raft's log a follower that does not know who the leader is will not be able to add entries. Kind of a deadlock. Note that the problem is specific to our implementation where failure detection is done by an outside module. In vanilla raft a leader sends messages to all followers periodically, so essentially it is never idle. The patch solves this by broadcasting specially crafted append reject to all nodes in the cluster on a tick in case a leader is not known. The leader responds to this message with an empty append request which will cause the node to learn about the leader. For optimisation purposes the patch sends the broadcast only in case there is actually an operation that waits for leader to be known. Fixes #10379
2307 lines
95 KiB
C++
2307 lines
95 KiB
C++
/*
|
|
* Copyright (C) 2020-present ScyllaDB
|
|
*/
|
|
|
|
/*
|
|
* SPDX-License-Identifier: AGPL-3.0-or-later
|
|
*/
|
|
|
|
#define BOOST_TEST_MODULE raft
|
|
|
|
#include "test/raft/helpers.hh"
|
|
|
|
using namespace raft;
|
|
|
|
BOOST_AUTO_TEST_CASE(test_votes) {
|
|
auto id1 = id();
|
|
|
|
raft::votes votes(raft::configuration({id1}));
|
|
BOOST_CHECK_EQUAL(votes.tally_votes(), raft::vote_result::UNKNOWN);
|
|
BOOST_CHECK_EQUAL(votes.voters().size(), 1);
|
|
// Try a vote from an unknown server, it should be ignored.
|
|
votes.register_vote(id(), true);
|
|
votes.register_vote(id1, false);
|
|
// Quorum votes against the decision
|
|
BOOST_CHECK_EQUAL(votes.tally_votes(), raft::vote_result::LOST);
|
|
// Another vote from the same server is ignored
|
|
votes.register_vote(id1, true);
|
|
votes.register_vote(id1, true);
|
|
BOOST_CHECK_EQUAL(votes.tally_votes(), raft::vote_result::LOST);
|
|
auto id2 = id();
|
|
votes = raft::votes(raft::configuration({id1, id2}));
|
|
BOOST_CHECK_EQUAL(votes.voters().size(), 2);
|
|
votes.register_vote(id1, true);
|
|
// We need a quorum of participants to win an election
|
|
BOOST_CHECK_EQUAL(votes.tally_votes(), raft::vote_result::UNKNOWN);
|
|
votes.register_vote(id2, false);
|
|
// At this point it's clear we don't have enough votes
|
|
BOOST_CHECK_EQUAL(votes.tally_votes(), raft::vote_result::LOST);
|
|
auto id3 = id();
|
|
// Joint configuration
|
|
votes = raft::votes(raft::configuration(address_set({id1}), address_set({id2, id3})));
|
|
BOOST_CHECK_EQUAL(votes.voters().size(), 3);
|
|
BOOST_CHECK_EQUAL(votes.tally_votes(), raft::vote_result::UNKNOWN);
|
|
votes.register_vote(id2, true);
|
|
votes.register_vote(id3, true);
|
|
BOOST_CHECK_EQUAL(votes.tally_votes(), raft::vote_result::UNKNOWN);
|
|
votes.register_vote(id1, false);
|
|
BOOST_CHECK_EQUAL(votes.tally_votes(), raft::vote_result::LOST);
|
|
votes = raft::votes(raft::configuration(address_set({id1}), address_set({id2, id3})));
|
|
votes.register_vote(id2, true);
|
|
votes.register_vote(id3, true);
|
|
votes.register_vote(id1, true);
|
|
BOOST_CHECK_EQUAL(votes.tally_votes(), raft::vote_result::WON);
|
|
votes = raft::votes(raft::configuration(address_set({id1, id2, id3}), address_set({id1})));
|
|
BOOST_CHECK_EQUAL(votes.voters().size(), 3);
|
|
votes.register_vote(id1, true);
|
|
BOOST_CHECK_EQUAL(votes.tally_votes(), raft::vote_result::UNKNOWN);
|
|
// This gives us a majority in both new and old
|
|
// configurations.
|
|
votes.register_vote(id2, true);
|
|
BOOST_CHECK_EQUAL(votes.tally_votes(), raft::vote_result::WON);
|
|
// Basic voting test for 4 nodes
|
|
auto id4 = id();
|
|
votes = raft::votes(raft::configuration({id1, id2, id3, id4}));
|
|
votes.register_vote(id1, true);
|
|
votes.register_vote(id2, true);
|
|
BOOST_CHECK_EQUAL(votes.tally_votes(), raft::vote_result::UNKNOWN);
|
|
votes.register_vote(id3, false);
|
|
BOOST_CHECK_EQUAL(votes.tally_votes(), raft::vote_result::UNKNOWN);
|
|
votes.register_vote(id4, false);
|
|
BOOST_CHECK_EQUAL(votes.tally_votes(), raft::vote_result::LOST);
|
|
auto id5 = id();
|
|
// Basic voting test for 5 nodes
|
|
votes = raft::votes(raft::configuration(address_set({id1, id2, id3, id4, id5}),
|
|
address_set({id1, id2, id3})));
|
|
votes.register_vote(id1, false);
|
|
votes.register_vote(id2, false);
|
|
BOOST_CHECK_EQUAL(votes.tally_votes(), raft::vote_result::LOST);
|
|
votes.register_vote(id3, true);
|
|
votes.register_vote(id4, true);
|
|
votes.register_vote(id5, true);
|
|
BOOST_CHECK_EQUAL(votes.tally_votes(), raft::vote_result::LOST);
|
|
// Basic voting test with tree voters and one no-voter
|
|
votes = raft::votes(raft::configuration({{.id = id1},
|
|
{.id = id2}, {.id = id3}, {id4, false}}));
|
|
votes.register_vote(id1, true);
|
|
votes.register_vote(id2, true);
|
|
BOOST_CHECK_EQUAL(votes.tally_votes(), raft::vote_result::WON);
|
|
// Basic test that non-voting votes are ignored
|
|
votes = raft::votes(raft::configuration({{.id = id1},
|
|
{.id = id2}, {.id = id3}, {id4, false}}));
|
|
votes.register_vote(id1, true);
|
|
votes.register_vote(id4, true);
|
|
BOOST_CHECK_EQUAL(votes.tally_votes(), raft::vote_result::UNKNOWN);
|
|
votes.register_vote(id3, true);
|
|
BOOST_CHECK_EQUAL(votes.tally_votes(), raft::vote_result::WON);
|
|
// Joint configuration with non voting members
|
|
votes = raft::votes(raft::configuration({{.id = id1}},
|
|
{{.id = id2}, {.id = id3}, {id4, false}}));
|
|
BOOST_CHECK_EQUAL(votes.voters().size(), 3);
|
|
BOOST_CHECK_EQUAL(votes.tally_votes(), raft::vote_result::UNKNOWN);
|
|
votes.register_vote(id2, true);
|
|
votes.register_vote(id3, true);
|
|
votes.register_vote(id4, true);
|
|
BOOST_CHECK_EQUAL(votes.tally_votes(), raft::vote_result::UNKNOWN);
|
|
votes.register_vote(id1, true);
|
|
BOOST_CHECK_EQUAL(votes.tally_votes(), raft::vote_result::WON);
|
|
// Same node is voting in one config and non voting in another
|
|
votes = raft::votes(raft::configuration({{.id = id1}, {.id = id4}},
|
|
{{.id = id2}, {.id = id3}, {id4, false}}));
|
|
votes.register_vote(id2, true);
|
|
votes.register_vote(id1, true);
|
|
votes.register_vote(id4, true);
|
|
BOOST_CHECK_EQUAL(votes.tally_votes(), raft::vote_result::UNKNOWN);
|
|
votes.register_vote(id3, true);
|
|
BOOST_CHECK_EQUAL(votes.tally_votes(), raft::vote_result::WON);
|
|
}
|
|
|
|
BOOST_AUTO_TEST_CASE(test_tracker) {
|
|
auto id1 = id();
|
|
raft::tracker tracker;
|
|
raft::configuration cfg({id1});
|
|
tracker.set_configuration(cfg, index_t{1});
|
|
BOOST_CHECK_NE(tracker.find(id1), nullptr);
|
|
// The node with id set during construction is assumed to be
|
|
// the leader, since otherwise we wouldn't create a tracker
|
|
// in the first place.
|
|
BOOST_CHECK_EQUAL(tracker.committed(index_t{0}), index_t{0});
|
|
// Avoid keeping a reference, follower_progress address may
|
|
// change with configuration change
|
|
auto pr = [&tracker](raft::server_id id) -> raft::follower_progress* {
|
|
return tracker.find(id);
|
|
};
|
|
BOOST_CHECK_EQUAL(pr(id1)->match_idx, index_t{0});
|
|
BOOST_CHECK_EQUAL(pr(id1)->next_idx, index_t{1});
|
|
|
|
pr(id1)->accepted(index_t{1});
|
|
BOOST_CHECK_EQUAL(pr(id1)->match_idx, index_t{1});
|
|
BOOST_CHECK_EQUAL(pr(id1)->next_idx, index_t{2});
|
|
BOOST_CHECK_EQUAL(tracker.committed(index_t{0}), index_t{1});
|
|
|
|
pr(id1)->accepted(index_t{10});
|
|
BOOST_CHECK_EQUAL(pr(id1)->match_idx, index_t{10});
|
|
BOOST_CHECK_EQUAL(pr(id1)->next_idx, index_t{11});
|
|
BOOST_CHECK_EQUAL(tracker.committed(index_t{0}), index_t{10});
|
|
|
|
// Out of order confirmation is OK
|
|
//
|
|
pr(id1)->accepted(index_t{5});
|
|
BOOST_CHECK_EQUAL(pr(id1)->match_idx, index_t{10});
|
|
BOOST_CHECK_EQUAL(pr(id1)->next_idx, index_t{11});
|
|
BOOST_CHECK_EQUAL(tracker.committed(index_t{5}), index_t{10});
|
|
|
|
// Enter joint configuration {A,B,C}
|
|
auto id2 = id(), id3 = id();
|
|
cfg.enter_joint(address_set({id1, id2, id3}));
|
|
tracker.set_configuration(cfg, index_t{1});
|
|
BOOST_CHECK_EQUAL(tracker.committed(index_t{10}), index_t{10});
|
|
pr(id2)->accepted(index_t{11});
|
|
BOOST_CHECK_EQUAL(tracker.committed(index_t{10}), index_t{10});
|
|
pr(id3)->accepted(index_t{12});
|
|
BOOST_CHECK_EQUAL(tracker.committed(index_t{10}), index_t{10});
|
|
pr(id1)->accepted(index_t{13});
|
|
BOOST_CHECK_EQUAL(tracker.committed(index_t{10}), index_t{12});
|
|
pr(id1)->accepted(index_t{14});
|
|
BOOST_CHECK_EQUAL(tracker.committed(index_t{13}), index_t{13});
|
|
|
|
// Leave joint configuration, final configuration is {A,B,C}
|
|
cfg.leave_joint();
|
|
tracker.set_configuration(cfg, index_t{1});
|
|
BOOST_CHECK_EQUAL(tracker.committed(index_t{13}), index_t{13});
|
|
|
|
auto id4 = id(), id5 = id();
|
|
cfg.enter_joint(address_set({id3, id4, id5}));
|
|
tracker.set_configuration(cfg, index_t{1});
|
|
BOOST_CHECK_EQUAL(tracker.committed(index_t{13}), index_t{13});
|
|
pr(id1)->accepted(index_t{15});
|
|
BOOST_CHECK_EQUAL(tracker.committed(index_t{13}), index_t{13});
|
|
pr(id5)->accepted(index_t{15});
|
|
BOOST_CHECK_EQUAL(tracker.committed(index_t{13}), index_t{13});
|
|
pr(id3)->accepted(index_t{15});
|
|
BOOST_CHECK_EQUAL(tracker.committed(index_t{13}), index_t{15});
|
|
// This does not advance the joint quorum
|
|
pr(id1)->accepted(index_t{16});
|
|
pr(id4)->accepted(index_t{17});
|
|
pr(id5)->accepted(index_t{18});
|
|
BOOST_CHECK_EQUAL(tracker.committed(index_t{15}), index_t{15});
|
|
|
|
cfg.leave_joint();
|
|
tracker.set_configuration(cfg, index_t{1});
|
|
// Leaving joint configuration commits more entries
|
|
BOOST_CHECK_EQUAL(tracker.committed(index_t{15}), index_t{17});
|
|
//
|
|
cfg.enter_joint(address_set({id1}));
|
|
cfg.leave_joint();
|
|
cfg.enter_joint(address_set({id2}));
|
|
tracker.set_configuration(cfg, index_t{1});
|
|
// Sic: we're in a weird state. The joint commit index
|
|
// is actually 1, since id2 is at position 1. But in
|
|
// unwinding back the commit index would be weird,
|
|
// so we report back the hint (prev_commit_idx).
|
|
// As soon as the cluster enters joint configuration,
|
|
// and old quorum is insufficient, the leader won't be able to
|
|
// commit new entries until the new members catch up.
|
|
BOOST_CHECK_EQUAL(tracker.committed(index_t{17}), index_t{17});
|
|
pr(id1)->accepted(index_t{18});
|
|
BOOST_CHECK_EQUAL(tracker.committed(index_t{17}), index_t{17});
|
|
pr(id2)->accepted(index_t{19});
|
|
BOOST_CHECK_EQUAL(tracker.committed(index_t{17}), index_t{18});
|
|
pr(id1)->accepted(index_t{20});
|
|
BOOST_CHECK_EQUAL(tracker.committed(index_t{18}), index_t{19});
|
|
|
|
// Check that non voting member is not counted for the quorum in simple config
|
|
cfg.enter_joint({{.id = id1}, {.id = id2}, {id3, false}});
|
|
cfg.leave_joint();
|
|
tracker.set_configuration(cfg, index_t{1});
|
|
pr(id1)->accepted(index_t{30});
|
|
pr(id2)->accepted(index_t{25});
|
|
pr(id3)->accepted(index_t{30});
|
|
BOOST_CHECK_EQUAL(tracker.committed(index_t{0}), index_t{25});
|
|
|
|
// Check that non voting member is not counted for the quorum in joint config
|
|
cfg.enter_joint({{.id = id4}, {.id = id5}});
|
|
tracker.set_configuration(cfg, index_t{1});
|
|
pr(id4)->accepted(index_t{30});
|
|
pr(id5)->accepted(index_t{30});
|
|
BOOST_CHECK_EQUAL(tracker.committed(index_t{0}), index_t{25});
|
|
|
|
// Check the case where the same node is in both config but different voting rights
|
|
cfg.leave_joint();
|
|
cfg.enter_joint({{.id = id1}, {.id = id2}, {id5, false}});
|
|
BOOST_CHECK_EQUAL(tracker.committed(index_t{0}), index_t{25});
|
|
}
|
|
|
|
BOOST_AUTO_TEST_CASE(test_log_last_conf_idx) {
|
|
// last_conf_idx, prev_conf_idx are initialized correctly,
|
|
// and maintained during truncate head/truncate tail
|
|
server_id id1 = id();
|
|
raft::configuration cfg({id1});
|
|
raft::log log{raft::snapshot_descriptor{.config = cfg}};
|
|
BOOST_CHECK_EQUAL(log.last_conf_idx(), 0);
|
|
add_entry(log, cfg);
|
|
BOOST_CHECK_EQUAL(log.last_conf_idx(), 1);
|
|
add_entry(log, log_entry::dummy{});
|
|
add_entry(log, cfg);
|
|
BOOST_CHECK_EQUAL(log.last_conf_idx(), 3);
|
|
// apply snapshot truncates the log and resets last_conf_idx()
|
|
log.apply_snapshot(log_snapshot(log, log.last_idx()), 0);
|
|
BOOST_CHECK_EQUAL(log.last_conf_idx(), log.get_snapshot().idx);
|
|
// log::last_term() is maintained correctly by truncate_head/truncate_tail() (snapshotting)
|
|
BOOST_CHECK_EQUAL(log.last_term(), log.get_snapshot().term);
|
|
BOOST_CHECK(log.term_for(log.get_snapshot().idx));
|
|
BOOST_CHECK_EQUAL(log.term_for(log.get_snapshot().idx).value(), log.get_snapshot().term);
|
|
BOOST_CHECK(! log.term_for(log.last_idx() - index_t{1}));
|
|
add_entry(log, log_entry::dummy{});
|
|
BOOST_CHECK(log.term_for(log.last_idx()));
|
|
add_entry(log, log_entry::dummy{});
|
|
const size_t GAP = 10;
|
|
// apply_snapshot with a log gap, this should clear all log
|
|
// entries, despite that trailing is given, a gap
|
|
// between old log entries and a snapshot would violate
|
|
// log continuity.
|
|
log.apply_snapshot(log_snapshot(log, log.last_idx() + index_t{GAP}), GAP * 2);
|
|
BOOST_CHECK(log.empty());
|
|
BOOST_CHECK_EQUAL(log.next_idx(), log.get_snapshot().idx + index_t{1});
|
|
add_entry(log, log_entry::dummy{});
|
|
BOOST_CHECK_EQUAL(log.in_memory_size(), 1);
|
|
add_entry(log, log_entry::dummy{});
|
|
BOOST_CHECK_EQUAL(log.in_memory_size(), 2);
|
|
// Set trailing longer than the length of the log.
|
|
log.apply_snapshot(log_snapshot(log, log.last_idx()), 3);
|
|
BOOST_CHECK_EQUAL(log.in_memory_size(), 2);
|
|
// Set trailing the same length as the current log length
|
|
add_entry(log, log_entry::dummy{});
|
|
BOOST_CHECK_EQUAL(log.in_memory_size(), 3);
|
|
log.apply_snapshot(log_snapshot(log, log.last_idx()), 3);
|
|
BOOST_CHECK_EQUAL(log.in_memory_size(), 3);
|
|
BOOST_CHECK_EQUAL(log.last_conf_idx(), log.get_snapshot().idx);
|
|
add_entry(log, log_entry::dummy{});
|
|
// Set trailing shorter than the length of the log
|
|
log.apply_snapshot(log_snapshot(log, log.last_idx()), 1);
|
|
BOOST_CHECK_EQUAL(log.in_memory_size(), 1);
|
|
// check that configuration from snapshot is used and not config entries from a trailing
|
|
add_entry(log, cfg);
|
|
add_entry(log, cfg);
|
|
add_entry(log, log_entry::dummy{});
|
|
auto snp_idx = log.last_idx();
|
|
log.apply_snapshot(log_snapshot(log, snp_idx), 10);
|
|
BOOST_CHECK_EQUAL(log.last_conf_idx(), snp_idx);
|
|
// Check that configuration from the log is used if it has higher index then snapshot idx
|
|
add_entry(log, log_entry::dummy{});
|
|
snp_idx = log.last_idx();
|
|
add_entry(log, cfg);
|
|
add_entry(log, cfg);
|
|
log.apply_snapshot(log_snapshot(log, snp_idx), 10);
|
|
BOOST_CHECK_EQUAL(log.last_conf_idx(), log.last_idx());
|
|
}
|
|
|
|
void test_election_single_node_helper(raft::fsm_config fcfg) {
|
|
|
|
server_id id1 = id();
|
|
raft::configuration cfg({id1});
|
|
raft::log log{raft::snapshot_descriptor{.config = cfg}};
|
|
raft::fsm fsm(id1, term_t{}, server_id{}, std::move(log), trivial_failure_detector, fcfg);
|
|
|
|
election_timeout(fsm);
|
|
|
|
// Immediately converts from leader to follower if quorum=1
|
|
BOOST_CHECK(fsm.is_leader());
|
|
|
|
auto output = fsm.get_output();
|
|
|
|
BOOST_CHECK(output.term_and_vote);
|
|
BOOST_CHECK(output.term_and_vote->first);
|
|
BOOST_CHECK(output.term_and_vote->second);
|
|
BOOST_CHECK(output.messages.empty());
|
|
// A new leader applies one dummy entry
|
|
BOOST_CHECK_EQUAL(output.log_entries.size(), 1);
|
|
if (output.log_entries.size()) {
|
|
BOOST_CHECK(std::holds_alternative<raft::log_entry::dummy>(output.log_entries[0]->data));
|
|
}
|
|
BOOST_CHECK(output.committed.empty());
|
|
// The leader does not become candidate simply because
|
|
// a timeout has elapsed, i.e. there are no spurious
|
|
// elections.
|
|
election_timeout(fsm);
|
|
BOOST_CHECK(fsm.is_leader());
|
|
output = fsm.get_output();
|
|
BOOST_CHECK(!output.term_and_vote);
|
|
BOOST_CHECK(output.messages.empty());
|
|
BOOST_CHECK(output.log_entries.empty());
|
|
// Dummy entry is now committed
|
|
BOOST_CHECK_EQUAL(output.committed.size(), 1);
|
|
if (output.committed.size()) {
|
|
BOOST_CHECK(std::holds_alternative<raft::log_entry::dummy>(output.committed[0]->data));
|
|
}
|
|
}
|
|
|
|
BOOST_AUTO_TEST_CASE(test_election_single_node) {
|
|
test_election_single_node_helper(fsm_cfg);
|
|
}
|
|
// Test that adding an entry to a single-node cluster
|
|
// does not lead to RPC
|
|
BOOST_AUTO_TEST_CASE(test_single_node_is_quiet) {
|
|
|
|
server_id id1 = id();
|
|
raft::configuration cfg({id1});
|
|
raft::log log{raft::snapshot_descriptor{.config = cfg}};
|
|
|
|
auto fsm = create_follower(id1, std::move(log));
|
|
|
|
election_timeout(fsm);
|
|
|
|
// Immediately converts from leader to follower if quorum=1
|
|
BOOST_CHECK(fsm.is_leader());
|
|
|
|
(void) fsm.get_output();
|
|
|
|
fsm.add_entry(raft::command{});
|
|
|
|
BOOST_CHECK(fsm.get_output().messages.empty());
|
|
|
|
fsm.tick();
|
|
|
|
BOOST_CHECK(fsm.get_output().messages.empty());
|
|
}
|
|
|
|
BOOST_AUTO_TEST_CASE(test_snapshot_follower_is_quiet) {
|
|
server_id id1 = id(), id2 = id();
|
|
|
|
raft::configuration cfg({id1, id2});
|
|
raft::log log(raft::snapshot_descriptor{.idx = index_t{999}, .config = cfg});
|
|
|
|
log.emplace_back(seastar::make_lw_shared<raft::log_entry>(raft::log_entry{term_t{10}, index_t{1000}}));
|
|
log.stable_to(log.last_idx());
|
|
|
|
fsm_debug fsm(id1, term_t{10}, server_id{}, std::move(log), trivial_failure_detector, fsm_cfg);
|
|
|
|
// become leader
|
|
election_timeout(fsm);
|
|
|
|
fsm.step(id2, raft::vote_reply{fsm.get_current_term(), true});
|
|
|
|
BOOST_CHECK(fsm.is_leader());
|
|
|
|
// clear output
|
|
(void) fsm.get_output();
|
|
|
|
// reply with reject pointing into the snapshot
|
|
fsm.step(id2, raft::append_reply{fsm.get_current_term(), raft::index_t{1}, raft::append_reply::rejected{raft::index_t{1000}, raft::index_t{1}}});
|
|
|
|
BOOST_CHECK(fsm.get_progress(id2).state == raft::follower_progress::state::SNAPSHOT);
|
|
|
|
// clear output
|
|
(void) fsm.get_output();
|
|
|
|
for (int i = 0; i < 100; i++) {
|
|
fsm.tick();
|
|
BOOST_CHECK(fsm.get_output().messages.empty());
|
|
}
|
|
}
|
|
|
|
BOOST_AUTO_TEST_CASE(test_election_two_nodes) {
|
|
|
|
discrete_failure_detector fd;
|
|
|
|
server_id id1 = id(), id2 = id();
|
|
|
|
raft::configuration cfg({id1, id2});
|
|
raft::log log{raft::snapshot_descriptor{.config = cfg}};
|
|
|
|
auto fsm = create_follower(id1, std::move(log), fd);
|
|
|
|
// Initial state is follower
|
|
BOOST_CHECK(fsm.is_follower());
|
|
|
|
// After election timeout, a follower becomes a candidate
|
|
election_timeout(fsm);
|
|
BOOST_CHECK(fsm.is_candidate());
|
|
|
|
// If nothing happens, the candidate stays this way
|
|
election_timeout(fsm);
|
|
BOOST_CHECK(fsm.is_candidate());
|
|
|
|
auto output = fsm.get_output();
|
|
BOOST_CHECK(output.term_and_vote);
|
|
// After a favourable reply, we become a leader (quorum is 2)
|
|
fsm.step(id2, raft::vote_reply{output.term_and_vote->first, true});
|
|
BOOST_CHECK(fsm.is_leader());
|
|
// Out of order response to the previous election is ignored
|
|
fsm.step(id2, raft::vote_reply{output.term_and_vote->first - term_t{1}, false});
|
|
BOOST_CHECK(fsm.is_leader());
|
|
|
|
// Any message with a newer term after election timeout
|
|
// -> immediately convert to follower
|
|
fd.mark_all_dead();
|
|
election_threshold(fsm);
|
|
// Use current_term + 2 to switch fsm to follower
|
|
// even if it itself switched to a candidate
|
|
fsm.step(id2, raft::vote_request{output.term_and_vote->first + term_t{2}});
|
|
BOOST_CHECK(fsm.is_follower());
|
|
|
|
// Check that the candidate converts to a follower as well
|
|
election_timeout(fsm);
|
|
BOOST_CHECK(fsm.is_candidate());
|
|
output = fsm.get_output();
|
|
BOOST_CHECK(output.term_and_vote);
|
|
fsm.step(id2, raft::vote_request{output.term_and_vote->first + term_t{1}});
|
|
BOOST_CHECK(fsm.is_follower());
|
|
|
|
// Test that a node doesn't cast a vote if it has voted for
|
|
// self already
|
|
(void) fsm.get_output();
|
|
while (fsm.is_follower()) {
|
|
fsm.tick();
|
|
}
|
|
BOOST_CHECK(fsm.is_candidate());
|
|
output = fsm.get_output();
|
|
auto msg = std::get<raft::vote_request>(output.messages.back().second);
|
|
fsm.step(id2, std::move(msg));
|
|
// We could figure out this round is going to a nowhere, but
|
|
// we're not that smart and simply wait for a vote_reply.
|
|
BOOST_CHECK(fsm.is_candidate());
|
|
output = fsm.get_output();
|
|
auto reply = std::get<raft::vote_reply>(output.messages.back().second);
|
|
BOOST_CHECK(!reply.vote_granted);
|
|
}
|
|
|
|
BOOST_AUTO_TEST_CASE(test_election_four_nodes) {
|
|
|
|
discrete_failure_detector fd;
|
|
|
|
server_id id1 = id(), id2 = id(), id3 = id(), id4 = id();
|
|
|
|
raft::configuration cfg({id1, id2, id3, id4});
|
|
raft::log log{raft::snapshot_descriptor{.config = cfg}};
|
|
|
|
auto fsm = create_follower(id1, std::move(log), fd);
|
|
|
|
// Initial state is follower
|
|
BOOST_CHECK(fsm.is_follower());
|
|
|
|
// Inform FSM about a new leader at a new term
|
|
fsm.step(id4, raft::append_request{term_t{1}, index_t{1}, term_t{1}});
|
|
|
|
(void) fsm.get_output();
|
|
|
|
// Request a vote during the same term. Even though
|
|
// we haven't voted, we should deny a vote because we
|
|
// know about a leader for this term.
|
|
fsm.step(id3, raft::vote_request{term_t{1}, index_t{1}, term_t{1}});
|
|
|
|
auto output = fsm.get_output();
|
|
auto reply = std::get<raft::vote_reply>(output.messages.back().second);
|
|
BOOST_CHECK(!reply.vote_granted);
|
|
|
|
// Run out of steam for this term. Start a new one.
|
|
fd.mark_all_dead();
|
|
election_timeout(fsm);
|
|
BOOST_CHECK(fsm.is_candidate());
|
|
|
|
output = fsm.get_output();
|
|
BOOST_CHECK(output.term_and_vote);
|
|
auto current_term = output.term_and_vote->first;
|
|
// Add a favourable reply, not enough for quorum
|
|
fsm.step(id2, raft::vote_reply{current_term, true});
|
|
BOOST_CHECK(fsm.is_candidate());
|
|
|
|
// Add another one, this adds up to quorum
|
|
fsm.step(id3, raft::vote_reply{current_term, true});
|
|
BOOST_CHECK(fsm.is_leader());
|
|
}
|
|
|
|
BOOST_AUTO_TEST_CASE(test_election_single_node_prevote) {
|
|
auto fcfg = fsm_cfg;
|
|
fcfg.enable_prevoting = true;
|
|
test_election_single_node_helper(fcfg);
|
|
}
|
|
|
|
BOOST_AUTO_TEST_CASE(test_election_two_nodes_prevote) {
|
|
auto fcfg = fsm_cfg;
|
|
fcfg.enable_prevoting = true;
|
|
|
|
server_id id1 = id(), id2 = id();
|
|
|
|
raft::configuration cfg({id1, id2});
|
|
raft::log log{raft::snapshot_descriptor{.config = cfg}};
|
|
|
|
raft::fsm fsm(id1, term_t{}, server_id{}, std::move(log), trivial_failure_detector, fcfg);
|
|
|
|
// Initial state is follower
|
|
BOOST_CHECK(fsm.is_follower());
|
|
|
|
// After election timeout, a follower becomes a prevote candidate
|
|
election_timeout(fsm);
|
|
BOOST_CHECK(fsm.is_prevote_candidate());
|
|
// Term was not increased
|
|
BOOST_CHECK_EQUAL(fsm.get_current_term(), term_t{});
|
|
|
|
// If nothing happens, the candidate stays this way
|
|
election_timeout(fsm);
|
|
BOOST_CHECK(fsm.is_prevote_candidate());
|
|
BOOST_CHECK_EQUAL(fsm.get_current_term(), term_t{});
|
|
|
|
auto output = fsm.get_output();
|
|
// After a favourable prevote reply, we become a regular candidate (quorum is 2)
|
|
BOOST_CHECK(!output.term_and_vote);
|
|
fsm.step(id2, raft::vote_reply{fsm.get_current_term(), true, true});
|
|
BOOST_CHECK(fsm.is_candidate() && !fsm.is_prevote_candidate());
|
|
// And increased our term this time
|
|
BOOST_CHECK_EQUAL(fsm.get_current_term(), term_t{1});
|
|
|
|
election_timeout(fsm);
|
|
// Check that rejected prevote with higher term causes prevote candidate move to follower
|
|
fsm.step(id2, raft::vote_reply{term_t{2}, false, true});
|
|
BOOST_CHECK(fsm.is_follower());
|
|
BOOST_CHECK_EQUAL(fsm.get_current_term(), term_t{2});
|
|
|
|
election_timeout(fsm);
|
|
(void)fsm.get_output();
|
|
// Check that receiving prevote with smaller term generate reject with newer term
|
|
fsm.step(id2, raft::vote_request{term_t{1}, index_t{}, term_t{}, true});
|
|
output = fsm.get_output();
|
|
BOOST_CHECK_EQUAL(output.messages.size(), 1);
|
|
auto msg = std::get<raft::vote_reply>(output.messages.back().second);
|
|
BOOST_CHECK(msg.current_term == term_t{2} && !msg.vote_granted);
|
|
|
|
// Check that prevote with higer term get a reply with term in the future
|
|
// and does not change local term.
|
|
// Move to follower again
|
|
fsm.step(id2, raft::vote_reply{term_t{3}, false, true});
|
|
BOOST_CHECK(fsm.is_follower());
|
|
// Send prevote with higher term
|
|
fsm.step(id2, raft::vote_request{term_t{4}, index_t{}, term_t{}, true});
|
|
output = fsm.get_output();
|
|
BOOST_CHECK_EQUAL(output.messages.size(), 1);
|
|
// Reply has request's term
|
|
msg = std::get<raft::vote_reply>(output.messages.back().second);
|
|
BOOST_CHECK(msg.current_term == term_t{4} && msg.vote_granted);
|
|
// But fsm current term stays the same
|
|
BOOST_CHECK_EQUAL(fsm.get_current_term(), term_t{3});
|
|
}
|
|
|
|
BOOST_AUTO_TEST_CASE(test_election_four_nodes_prevote) {
|
|
auto fcfg = fsm_cfg;
|
|
fcfg.enable_prevoting = true;
|
|
|
|
discrete_failure_detector fd;
|
|
|
|
server_id id1 = id(), id2 = id(), id3 = id(), id4 = id();
|
|
|
|
raft::configuration cfg({id1, id2, id3, id4});
|
|
raft::log log{raft::snapshot_descriptor{.config = cfg}};
|
|
|
|
raft::fsm fsm(id1, term_t{}, server_id{}, std::move(log), fd, fcfg);
|
|
|
|
// Initial state is follower
|
|
BOOST_CHECK(fsm.is_follower());
|
|
|
|
// Inform FSM about a new leader at a new term
|
|
fsm.step(id4, raft::append_request{term_t{1}, index_t{1}, term_t{1}});
|
|
|
|
(void) fsm.get_output();
|
|
|
|
// Request a prevote during the same term. Even though
|
|
// we haven't voted, we should deny a vote because we
|
|
// know about a leader for this term.
|
|
fsm.step(id3, raft::vote_request{term_t{1}, index_t{1}, term_t{1}, true});
|
|
|
|
auto output = fsm.get_output();
|
|
auto reply = std::get<raft::vote_reply>(output.messages.back().second);
|
|
BOOST_CHECK(!reply.vote_granted && reply.is_prevote);
|
|
|
|
// Run out of steam for this term. Start a new one.
|
|
fd.mark_all_dead();
|
|
election_timeout(fsm);
|
|
BOOST_CHECK(fsm.is_candidate() && fsm.is_prevote_candidate());
|
|
|
|
output = fsm.get_output();
|
|
// Add a favourable prevote reply, not enough for quorum
|
|
BOOST_CHECK(!output.term_and_vote);
|
|
fsm.step(id2, raft::vote_reply{fsm.get_current_term() + term_t{1}, true, true});
|
|
BOOST_CHECK(fsm.is_candidate() && fsm.is_prevote_candidate());
|
|
|
|
// Add another one, this adds up to quorum
|
|
fsm.step(id3, raft::vote_reply{fsm.get_current_term() + term_t{1}, true, true});
|
|
BOOST_CHECK(fsm.is_candidate() && !fsm.is_prevote_candidate());
|
|
|
|
// Check that prevote with future term is answered even if we voted already
|
|
// Request regular vote
|
|
fsm.step(id2, raft::vote_request{fsm.get_current_term(), index_t{1}, term_t{1}, false});
|
|
// Clear message queue
|
|
(void)fsm.get_output();
|
|
// Ask for prevote with future term
|
|
fsm.step(id3, raft::vote_request{fsm.get_current_term() + term_t{1}, index_t{1}, term_t{1}, true});
|
|
output = fsm.get_output();
|
|
BOOST_CHECK_EQUAL(output.messages.size(), 1);
|
|
reply = std::get<raft::vote_reply>(output.messages.back().second);
|
|
BOOST_CHECK(reply.vote_granted && reply.is_prevote);
|
|
}
|
|
|
|
BOOST_AUTO_TEST_CASE(test_log_matching_rule) {
|
|
|
|
server_id id1 = id(), id2 = id(), id3 = id();
|
|
|
|
raft::configuration cfg({id1, id2, id3});
|
|
raft::log log(raft::snapshot_descriptor{.idx = index_t{999}, .config = cfg});
|
|
|
|
log.emplace_back(seastar::make_lw_shared<raft::log_entry>(raft::log_entry{term_t{10}, index_t{1000}}));
|
|
log.stable_to(log.last_idx());
|
|
|
|
raft::fsm fsm(id1, term_t{10}, server_id{}, std::move(log), trivial_failure_detector, fsm_cfg);
|
|
|
|
// Initial state is follower
|
|
BOOST_CHECK(fsm.is_follower());
|
|
|
|
(void) fsm.get_output();
|
|
|
|
fsm.step(id2, raft::vote_request{term_t{9}, index_t{1001}, term_t{11}});
|
|
// Current term is too old - vote is not granted
|
|
auto output = fsm.get_output();
|
|
BOOST_CHECK(output.messages.empty());
|
|
|
|
auto request_vote = [&](term_t term, index_t last_log_idx, term_t last_log_term) -> raft::vote_reply {
|
|
fsm.step(id2, raft::vote_request{term, last_log_idx, last_log_term});
|
|
auto output = fsm.get_output();
|
|
return std::get<raft::vote_reply>(output.messages.back().second);
|
|
};
|
|
|
|
// Last stable index is too small - vote is not granted
|
|
BOOST_CHECK(!request_vote(term_t{11}, index_t{999}, term_t{10}).vote_granted);
|
|
// Last stable term is too small - vote is not granted
|
|
BOOST_CHECK(!request_vote(term_t{12}, index_t{1002}, term_t{9}).vote_granted);
|
|
// Last stable term and index are equal to the voter's - vote
|
|
// is granted
|
|
BOOST_CHECK(request_vote(term_t{13}, index_t{1000}, term_t{10}).vote_granted);
|
|
// Last stable term is the same, index is greater to the voter's - vote
|
|
// is granted
|
|
BOOST_CHECK(request_vote(term_t{14}, index_t{1001}, term_t{10}).vote_granted);
|
|
// Both term and index are greater than the voter's - vote
|
|
// is granted
|
|
BOOST_CHECK(request_vote(term_t{15}, index_t{1001}, term_t{11}).vote_granted);
|
|
}
|
|
|
|
BOOST_AUTO_TEST_CASE(test_confchange_add_node) {
|
|
|
|
server_id id1 = id(), id2 = id(), id3 = id();
|
|
|
|
raft::configuration cfg({id1, id2});
|
|
raft::log log(raft::snapshot_descriptor{.idx = index_t{100}, .config = cfg});
|
|
|
|
auto fsm = create_follower(id1, std::move(log));
|
|
|
|
// Initial state is follower
|
|
BOOST_CHECK(fsm.is_follower());
|
|
|
|
// Turn to a leader
|
|
election_timeout(fsm);
|
|
BOOST_CHECK(fsm.is_candidate());
|
|
auto output = fsm.get_output();
|
|
BOOST_CHECK(output.term_and_vote);
|
|
fsm.step(id2, raft::vote_reply{output.term_and_vote->first, true});
|
|
BOOST_CHECK(fsm.is_leader());
|
|
|
|
output = fsm.get_output();
|
|
// A new leader applies one dummy entry
|
|
BOOST_CHECK_EQUAL(output.log_entries.size(), 1);
|
|
if (output.log_entries.size()) {
|
|
BOOST_CHECK(std::holds_alternative<raft::log_entry::dummy>(output.log_entries[0]->data));
|
|
}
|
|
BOOST_CHECK(output.committed.empty());
|
|
// accept dummy entry, otherwise no more entries will be sent
|
|
BOOST_CHECK_EQUAL(output.messages.size(), 1);
|
|
auto msg = std::get<raft::append_request>(output.messages.back().second);
|
|
auto idx = msg.entries.back()->idx;
|
|
fsm.step(id2, raft::append_reply{msg.current_term, idx, raft::append_reply::accepted{idx}});
|
|
|
|
raft::configuration newcfg({id1, id2, id3});
|
|
// Suggest a confchange.
|
|
fsm.add_entry(newcfg);
|
|
// Can't have two confchanges in progress.
|
|
BOOST_CHECK_THROW(fsm.add_entry(newcfg), raft::conf_change_in_progress);
|
|
// Entered joint configuration immediately.
|
|
BOOST_CHECK(fsm.get_configuration().is_joint());
|
|
BOOST_CHECK_EQUAL(fsm.get_configuration().previous.size(), 2);
|
|
BOOST_CHECK_EQUAL(fsm.get_configuration().current.size(), 3);
|
|
output = fsm.get_output();
|
|
// The output contains a log entry to be committed.
|
|
// Once it's committed, it will be replicated.
|
|
// The output must contain messages both for id2 and id3
|
|
BOOST_CHECK_EQUAL(output.log_entries.size(), 1);
|
|
// Append entry for id2 and id3
|
|
BOOST_CHECK_EQUAL(output.messages.size(), 2);
|
|
msg = std::get<raft::append_request>(output.messages.back().second);
|
|
idx = msg.entries.back().get()->idx;
|
|
// In order to accept a configuration change
|
|
// we need one ACK, since there is a quorum overlap.
|
|
// Strictly speaking the new node needs to install a snapshot,
|
|
// first, for simplicity let's assume it's happened already.
|
|
|
|
fsm.step(id2, raft::append_reply{msg.current_term, idx, raft::append_reply::accepted{idx}});
|
|
// One reply is enough to commit the joint configuration,
|
|
// since there is a quorum overlap between the two
|
|
// configurations.
|
|
BOOST_CHECK(! fsm.get_configuration().is_joint());
|
|
// Still can't have two confchanges in progress, even though
|
|
// we left joint already, the final configuration is not
|
|
// committed yet.
|
|
BOOST_CHECK_THROW(fsm.add_entry(newcfg), raft::conf_change_in_progress);
|
|
output = fsm.get_output();
|
|
// A log entry for the final configuration
|
|
BOOST_CHECK_EQUAL(output.log_entries.size(), 1);
|
|
// AppendEntries messages for the final configuration
|
|
BOOST_CHECK(output.messages.size() >= 1);
|
|
msg = std::get<raft::append_request>(output.messages.back().second);
|
|
idx = msg.entries.back().get()->idx;
|
|
// Ack AppendEntries for the final configuration
|
|
fsm.step(id2, raft::append_reply{msg.current_term, idx, raft::append_reply::accepted{idx}});
|
|
BOOST_CHECK_EQUAL(fsm.get_configuration().current.size(), 3);
|
|
fsm.step(id3, raft::append_reply{msg.current_term, idx, raft::append_reply::accepted{idx}});
|
|
// Check that we can start a new confchange
|
|
raft::configuration newcfg2({id1, id2});
|
|
fsm.add_entry(newcfg);
|
|
}
|
|
|
|
BOOST_AUTO_TEST_CASE(test_confchange_remove_node) {
|
|
|
|
server_id id1 = id(), id2 = id(), id3 = id();
|
|
|
|
raft::configuration cfg({id1, id2, id3});
|
|
raft::log log(raft::snapshot_descriptor{.idx = index_t{100}, .config = cfg});
|
|
|
|
auto fsm = create_follower(id1, std::move(log));
|
|
|
|
// Initial state is follower
|
|
BOOST_CHECK(fsm.is_follower());
|
|
|
|
// Turn to a leader
|
|
election_timeout(fsm);
|
|
BOOST_CHECK(fsm.is_candidate());
|
|
auto output = fsm.get_output();
|
|
// Vote requests to id2 and id3
|
|
BOOST_CHECK_EQUAL(output.messages.size(), 2);
|
|
if (output.messages.size() > 0) {
|
|
BOOST_CHECK(std::holds_alternative<raft::vote_request>(output.messages[0].second));
|
|
}
|
|
if (output.messages.size() > 1) {
|
|
BOOST_CHECK(std::holds_alternative<raft::vote_request>(output.messages[1].second));
|
|
}
|
|
|
|
BOOST_CHECK(output.term_and_vote);
|
|
fsm.step(id2, raft::vote_reply{output.term_and_vote->first, true});
|
|
BOOST_CHECK(fsm.is_leader());
|
|
output = fsm.get_output();
|
|
BOOST_CHECK_EQUAL(output.log_entries.size(), 1);
|
|
if (output.log_entries.size()) {
|
|
BOOST_CHECK(std::holds_alternative<raft::log_entry::dummy>(output.log_entries[0]->data));
|
|
}
|
|
// accept dummy entry, otherwise no more entries will be sent
|
|
BOOST_CHECK_EQUAL(output.messages.size(), 2);
|
|
auto msg = std::get<raft::append_request>(output.messages.back().second);
|
|
auto idx = msg.entries.back()->idx;
|
|
fsm.step(id2, raft::append_reply{msg.current_term, idx, raft::append_reply::accepted{idx}});
|
|
fsm.step(id3, raft::append_reply{msg.current_term, idx, raft::append_reply::accepted{idx}});
|
|
|
|
raft::configuration newcfg({id1, id2});
|
|
// Suggest a confchange.
|
|
fsm.add_entry(newcfg);
|
|
// Entered joint configuration immediately.
|
|
BOOST_CHECK(fsm.get_configuration().is_joint());
|
|
BOOST_CHECK_EQUAL(fsm.get_configuration().current.size(), 2);
|
|
BOOST_CHECK_EQUAL(fsm.get_configuration().previous.size(), 3);
|
|
output = fsm.get_output();
|
|
// The output contains a log entry to be committed.
|
|
// Once it's committed, it will be replicated.
|
|
BOOST_CHECK_EQUAL(output.log_entries.size(), 1);
|
|
if (output.log_entries.size()) {
|
|
BOOST_CHECK(std::holds_alternative<raft::configuration>(output.log_entries[0]->data));
|
|
}
|
|
BOOST_CHECK_EQUAL(output.messages.size(), 2); // Configuration change sent to id2 and id3
|
|
BOOST_REQUIRE_NO_THROW(msg = std::get<raft::append_request>(output.messages[0].second));
|
|
BOOST_CHECK_EQUAL(msg.entries.size(), 1);
|
|
BOOST_CHECK(std::holds_alternative<raft::configuration>(msg.entries[0]->data));
|
|
idx = msg.entries.back().get()->idx;
|
|
BOOST_CHECK_EQUAL(idx, 102);
|
|
// Ack AppendEntries for the joint configuration
|
|
// In order to accept a configuration change
|
|
// we need one ACK, since there is a quorum overlap.
|
|
fsm.step(id2, raft::append_reply{msg.current_term, idx, raft::append_reply::accepted{idx}});
|
|
|
|
// Final configuration is proposed
|
|
output = fsm.get_output();
|
|
// AppendEntries messages for the final configuration
|
|
BOOST_CHECK_EQUAL(output.messages.size(), 1);
|
|
|
|
BOOST_REQUIRE_NO_THROW(msg = std::get<raft::append_request>(output.messages[0].second));
|
|
// A log entry for the final configuration
|
|
BOOST_CHECK_EQUAL(msg.entries.size(), 1);
|
|
if (output.log_entries.size()) {
|
|
BOOST_CHECK(std::holds_alternative<raft::configuration>(msg.entries[0]->data));
|
|
}
|
|
|
|
idx = msg.entries.back().get()->idx;
|
|
BOOST_CHECK_EQUAL(idx, 103);
|
|
|
|
BOOST_CHECK_EQUAL(fsm.get_configuration().current.size(), 2);
|
|
BOOST_CHECK(!fsm.get_configuration().is_joint());
|
|
|
|
// Ack AppendEntries for final configuration
|
|
fsm.step(id2, raft::append_reply{msg.current_term, idx, raft::append_reply::accepted{idx}});
|
|
|
|
// Check that we can start a new confchange
|
|
raft::configuration newcfg2({id1, id2, id3});
|
|
fsm.add_entry(newcfg);
|
|
BOOST_CHECK_EQUAL(fsm.get_configuration().current.size(), 2);
|
|
}
|
|
|
|
BOOST_AUTO_TEST_CASE(test_confchange_replace_node) {
|
|
|
|
server_id id1 = id(), id2 = id(), id3 = id(), id4 = id();
|
|
|
|
raft::configuration cfg({id1, id2, id3});
|
|
raft::log log(raft::snapshot_descriptor{.idx = index_t{100}, .config = cfg});
|
|
|
|
auto fsm = create_follower(id1, std::move(log));
|
|
|
|
// Initial state is follower
|
|
BOOST_CHECK(fsm.is_follower());
|
|
|
|
// Turn to a leader
|
|
election_timeout(fsm);
|
|
BOOST_CHECK(fsm.is_candidate());
|
|
auto output = fsm.get_output();
|
|
BOOST_CHECK(output.term_and_vote);
|
|
fsm.step(id2, raft::vote_reply{output.term_and_vote->first, true});
|
|
BOOST_CHECK(fsm.is_leader());
|
|
output = fsm.get_output();
|
|
BOOST_CHECK_EQUAL(output.log_entries.size(), 1);
|
|
if (output.log_entries.size()) {
|
|
BOOST_CHECK(std::holds_alternative<raft::log_entry::dummy>(output.log_entries[0]->data));
|
|
}
|
|
BOOST_CHECK(output.committed.empty());
|
|
// accept dummy entry, otherwise no more entries will be sent
|
|
BOOST_CHECK_EQUAL(output.messages.size(), 2);
|
|
auto msg = std::get<raft::append_request>(output.messages.back().second);
|
|
auto idx = msg.entries.back()->idx;
|
|
fsm.step(id2, raft::append_reply{msg.current_term, idx, raft::append_reply::accepted{idx}});
|
|
fsm.step(id3, raft::append_reply{msg.current_term, idx, raft::append_reply::accepted{idx}});
|
|
|
|
raft::configuration newcfg({id1, id2, id4});
|
|
// Suggest a confchange.
|
|
fsm.add_entry(newcfg);
|
|
// Entered joint configuration immediately.
|
|
BOOST_CHECK(fsm.get_configuration().is_joint());
|
|
BOOST_CHECK_EQUAL(fsm.get_configuration().current.size(), 3);
|
|
BOOST_CHECK_EQUAL(fsm.get_configuration().previous.size(), 3);
|
|
output = fsm.get_output();
|
|
BOOST_REQUIRE_NO_THROW(msg = std::get<raft::append_request>(output.messages[0].second));
|
|
idx = msg.entries.back().get()->idx;
|
|
// In order to accept a configuration change
|
|
// we need two ACK, since there is a quorum overlap.
|
|
fsm.step(id2, raft::append_reply{msg.current_term, idx, raft::append_reply::accepted{idx}});
|
|
BOOST_CHECK(!fsm.get_configuration().is_joint());
|
|
// final config to log
|
|
output = fsm.get_output();
|
|
BOOST_CHECK_EQUAL(output.log_entries.size(), 1);
|
|
if (output.log_entries.size()) {
|
|
BOOST_CHECK(std::holds_alternative<raft::configuration>(output.log_entries[0]->data));
|
|
}
|
|
// AppendEntries messages for the final configuration
|
|
BOOST_CHECK(output.messages.size() >= 1);
|
|
msg = std::get<raft::append_request>(output.messages.back().second);
|
|
idx = msg.entries.back().get()->idx;
|
|
// Ack AppendEntries for the final configuration
|
|
fsm.step(id2, raft::append_reply{msg.current_term, idx, raft::append_reply::accepted{idx}});
|
|
BOOST_CHECK_EQUAL(fsm.get_configuration().current.size(), 3);
|
|
BOOST_CHECK(!fsm.get_configuration().is_joint());
|
|
}
|
|
|
|
|
|
BOOST_AUTO_TEST_CASE(test_leader_stepdown) {
|
|
|
|
server_id id1 = id(), id2 = id(), id3 = id();
|
|
|
|
raft::configuration cfg({{id1}, {id2}, {id3, false}});
|
|
raft::log log(raft::snapshot_descriptor{.config = cfg});
|
|
|
|
raft::fsm fsm(id1, term_t{1}, /* voted for */ server_id{}, std::move(log), trivial_failure_detector, fsm_cfg);
|
|
|
|
// Check that we move to candidate state on timeout_now message
|
|
fsm.step(id2, raft::timeout_now{fsm.get_current_term()});
|
|
BOOST_CHECK(fsm.is_candidate());
|
|
auto output = fsm.get_output();
|
|
auto vote_request = std::get<raft::vote_request>(output.messages.back().second);
|
|
// Check that vote_request has `force` flag set.
|
|
BOOST_CHECK(vote_request.force);
|
|
|
|
// Turn to a leader
|
|
fsm.step(id2, raft::vote_reply{fsm.get_current_term(), true});
|
|
BOOST_CHECK(fsm.is_leader());
|
|
|
|
// make id2's match idx to be up-to-date
|
|
output = fsm.get_output();
|
|
auto append = std::get<raft::append_request>(output.messages.back().second);
|
|
auto idx = append.entries.back()->idx;
|
|
fsm.step(id2, raft::append_reply{fsm.get_current_term(), index_t{}, raft::append_reply::accepted{idx}});
|
|
|
|
// start leadership transfer while there is a fully up-to-date follower
|
|
fsm.transfer_leadership();
|
|
|
|
// Check that timeout_now message is sent
|
|
output = fsm.get_output();
|
|
BOOST_CHECK_EQUAL(output.messages.size(), 1);
|
|
BOOST_CHECK(std::holds_alternative<raft::timeout_now>(output.messages.back().second));
|
|
|
|
// Turn to a leader again
|
|
// ... first turn to a follower
|
|
fsm.step(id2, raft::vote_request{fsm.get_current_term() + term_t{1}, index_t{10}, term_t{}, false, true});
|
|
BOOST_CHECK(fsm.is_follower());
|
|
(void)fsm.get_output();
|
|
// ... and now leader
|
|
election_timeout(fsm);
|
|
BOOST_CHECK(fsm.is_candidate());
|
|
output = fsm.get_output();
|
|
fsm.step(id2, raft::vote_reply{fsm.get_current_term(), true});
|
|
BOOST_CHECK(fsm.is_leader());
|
|
output = fsm.get_output();
|
|
append = std::get<raft::append_request>(output.messages.back().second);
|
|
idx = append.entries.back()->idx;
|
|
|
|
// start leadership transfer while there is no fully up-to-date follower
|
|
// (dummy entry appended by become_leader is not replicated yet)
|
|
fsm.transfer_leadership();
|
|
|
|
// check that no timeout_now message was sent
|
|
output = fsm.get_output();
|
|
BOOST_CHECK_EQUAL(output.messages.size(), 0);
|
|
|
|
// Now make non voting follower match the log and see that timeout_now is not sent
|
|
fsm.step(id3, raft::append_reply{fsm.get_current_term(), index_t{}, raft::append_reply::accepted{idx}});
|
|
output = fsm.get_output();
|
|
BOOST_CHECK_EQUAL(output.messages.size(), 0);
|
|
|
|
// Now make voting follower match the log and see that timeout_now is sent
|
|
fsm.step(id2, raft::append_reply{fsm.get_current_term(), index_t{}, raft::append_reply::accepted{idx}});
|
|
output = fsm.get_output();
|
|
BOOST_CHECK_EQUAL(output.messages.size(), 1);
|
|
BOOST_CHECK(std::holds_alternative<raft::timeout_now>(output.messages.back().second));
|
|
|
|
// Turn to a leader yet again
|
|
// ... first turn to a follower
|
|
fsm.step(id2, raft::vote_request{fsm.get_current_term() + term_t{1}, index_t{10}, term_t{}, false, true});
|
|
BOOST_CHECK(fsm.is_follower());
|
|
(void)fsm.get_output();
|
|
// ... and now leader
|
|
election_timeout(fsm);
|
|
BOOST_CHECK(fsm.is_candidate());
|
|
output = fsm.get_output();
|
|
fsm.step(id2, raft::vote_reply{fsm.get_current_term(), true});
|
|
BOOST_CHECK(fsm.is_leader());
|
|
// Commit dummy entry
|
|
output = fsm.get_output();
|
|
append = std::get<raft::append_request>(output.messages.back().second);
|
|
idx = append.entries.back()->idx;
|
|
fsm.step(id2, raft::append_reply{fsm.get_current_term(), idx, raft::append_reply::accepted{idx}});
|
|
|
|
// Drop the leader from the current config and see that stepdown message is sent
|
|
raft::configuration newcfg({{id2}, {id3, false}});
|
|
fsm.add_entry(newcfg);
|
|
output = fsm.get_output();
|
|
append = std::get<raft::append_request>(output.messages.back().second);
|
|
idx = append.entries.back()->idx;
|
|
// Accept joint config entry on id2
|
|
fsm.step(id2, raft::append_reply{fsm.get_current_term(), idx, raft::append_reply::accepted{idx}});
|
|
// fms added new config to the log
|
|
output = fsm.get_output();
|
|
append = std::get<raft::append_request>(output.messages.back().second);
|
|
idx = append.entries.back()->idx;
|
|
// Accept new config entry on id2
|
|
fsm.step(id2, raft::append_reply{fsm.get_current_term(), idx, raft::append_reply::accepted{idx}});
|
|
|
|
// And check that the deposed leader sent timeout_now
|
|
output = fsm.get_output();
|
|
BOOST_CHECK_EQUAL(output.messages.size(), 1);
|
|
BOOST_CHECK(std::holds_alternative<raft::timeout_now>(output.messages.back().second));
|
|
|
|
|
|
/// Check that leader stepdown works when the leader is removed from the config and there are entries above C_new in its log
|
|
raft::configuration cfg2({{id1}, {id2}, {id3}});
|
|
raft::log log2(raft::snapshot_descriptor{.config = cfg});
|
|
|
|
raft::fsm fsm2(id1, term_t{1}, /* voted for */ server_id{}, std::move(log2), trivial_failure_detector, fsm_cfg);
|
|
|
|
election_timeout(fsm2);
|
|
// Turn to a leader
|
|
fsm2.step(id2, raft::vote_reply{fsm2.get_current_term(), true});
|
|
BOOST_CHECK(fsm2.is_leader());
|
|
output = fsm2.get_output();
|
|
append = std::get<raft::append_request>(output.messages.back().second);
|
|
idx = append.entries.back()->idx;
|
|
// Accept the dummy on id2
|
|
fsm2.step(id2, raft::append_reply{fsm2.get_current_term(), idx, raft::append_reply::accepted{idx}});
|
|
// Accept the dummy on id3
|
|
fsm2.step(id3, raft::append_reply{fsm2.get_current_term(), idx, raft::append_reply::accepted{idx}});
|
|
|
|
// Drop the leader from the current config and see that stepdown message is sent
|
|
raft::configuration newcfg2({{id2}, {id3}});
|
|
fsm2.add_entry(newcfg2);
|
|
output = fsm2.get_output();
|
|
append = std::get<raft::append_request>(output.messages.back().second);
|
|
idx = append.entries.back()->idx;
|
|
// Accept joint config entry on id2
|
|
fsm2.step(id2, raft::append_reply{fsm2.get_current_term(), idx, raft::append_reply::accepted{idx}});
|
|
// Accept joint config entry on id3
|
|
fsm2.step(id3, raft::append_reply{fsm2.get_current_term(), idx, raft::append_reply::accepted{idx}});
|
|
// fsm added new config entry
|
|
output = fsm2.get_output();
|
|
append = std::get<raft::append_request>(output.messages.back().second);
|
|
idx = append.entries.back()->idx;
|
|
|
|
fsm2.add_entry(raft::command{}); // add one more command that will be not replicated yet
|
|
|
|
// Accept new config entry on id2
|
|
fsm2.step(id2, raft::append_reply{fsm2.get_current_term(), idx, raft::append_reply::accepted{idx}});
|
|
// Accept new config entry on id3
|
|
fsm2.step(id3, raft::append_reply{fsm2.get_current_term(), idx, raft::append_reply::accepted{idx}});
|
|
// C_new is now commited
|
|
output = fsm2.get_output(); // this sends out the entry submitted after C_new
|
|
append = std::get<raft::append_request>(output.messages.back().second);
|
|
idx = append.entries.back()->idx;
|
|
// Accept the entry
|
|
fsm2.step(id2, raft::append_reply{fsm2.get_current_term(), idx, raft::append_reply::accepted{idx}});
|
|
// And check that the deposed leader sent timeout_now
|
|
output = fsm2.get_output();
|
|
BOOST_CHECK_EQUAL(output.messages.size(), 1);
|
|
BOOST_CHECK(std::holds_alternative<raft::timeout_now>(output.messages.back().second));
|
|
/// End test
|
|
}
|
|
|
|
BOOST_AUTO_TEST_CASE(test_empty_configuration) {
|
|
// When a server is joining an existing cluster, its configuration is empty.
|
|
// The leader sends its configuration over in AppendEntries or
|
|
// ApplySnapshot RPC. Test this scenario.
|
|
|
|
server_id id1 = id();
|
|
|
|
raft::configuration cfg({});
|
|
raft::log log(raft::snapshot_descriptor{.idx = index_t{0}, .config = cfg});
|
|
auto follower = create_follower(id1, std::move(log));
|
|
// Initial state is follower
|
|
BOOST_CHECK(follower.is_follower());
|
|
election_timeout(follower);
|
|
BOOST_CHECK(follower.is_follower());
|
|
auto output = follower.get_output();
|
|
BOOST_CHECK_EQUAL(output.log_entries.size(), 0);
|
|
BOOST_CHECK_EQUAL(output.messages.size(), 0);
|
|
BOOST_CHECK_EQUAL(follower.get_current_term(), 0);
|
|
|
|
server_id id2 = id();
|
|
auto log2 = raft::log(raft::snapshot_descriptor{.idx = index_t{0}, .config = raft::configuration({id2})});
|
|
auto leader = create_follower(id2, std::move(log2));
|
|
election_timeout(leader);
|
|
BOOST_CHECK(leader.is_leader());
|
|
// Transitioning to an empty configuration is not supported.
|
|
BOOST_CHECK_THROW(leader.add_entry(raft::configuration({})), std::invalid_argument);
|
|
leader.add_entry(raft::configuration({id1, id2}));
|
|
|
|
communicate(leader, follower);
|
|
BOOST_CHECK_EQUAL(follower.get_current_term(), 1);
|
|
BOOST_CHECK_EQUAL(follower.in_memory_log_size(), leader.in_memory_log_size());
|
|
BOOST_CHECK_EQUAL(leader.get_configuration().is_joint(), false);
|
|
}
|
|
|
|
BOOST_AUTO_TEST_CASE(test_confchange_a_to_b) {
|
|
// Test we can transition from a single-server configuration
|
|
// {A} to a single server configuration {B}
|
|
|
|
server_id A_id = id();
|
|
|
|
raft::log log(raft::snapshot_descriptor{.idx = index_t{0}, .config = raft::configuration{A_id}});
|
|
auto A = create_follower(A_id, log);
|
|
election_timeout(A);
|
|
BOOST_CHECK(A.is_leader());
|
|
// Let's have a non-empty log at A
|
|
A.add_entry(log_entry::dummy{});
|
|
|
|
server_id B_id = id();
|
|
|
|
auto B = create_follower(B_id, log);
|
|
|
|
A.add_entry(raft::configuration({B_id}));
|
|
|
|
communicate(A, B);
|
|
BOOST_CHECK_EQUAL(A.get_current_term(), 1);
|
|
BOOST_CHECK(A.is_follower());
|
|
// A is not part of the current configuration
|
|
BOOST_CHECK(B.is_leader());
|
|
BOOST_CHECK_EQUAL(B.get_current_term(), 2);
|
|
BOOST_CHECK_EQUAL(B.get_configuration().is_joint(), false);
|
|
BOOST_CHECK_EQUAL(B.get_configuration().current.size(), 1);
|
|
BOOST_CHECK(B.get_configuration().current.contains(raft::server_address{B_id}));
|
|
// Let's try the same configuration change now, but let's
|
|
// restart the leader after persisting the joint
|
|
// configuration.
|
|
log = raft::log(raft::snapshot_descriptor{.idx = B.log_last_idx(), .term = B.log_last_term(),
|
|
.config = B.get_configuration()});
|
|
// A somewhat awkward way to obtain B's log for restart
|
|
log.emplace_back(make_lw_shared<raft::log_entry>(B.add_entry(raft::configuration({A_id}))));
|
|
log.stable_to(log.last_idx());
|
|
raft::fsm B_1(B_id, B.get_current_term(), B_id, std::move(log), trivial_failure_detector, fsm_cfg);
|
|
election_timeout(B_1);
|
|
communicate(A, B_1);
|
|
BOOST_CHECK(B_1.is_follower());
|
|
election_timeout(A);
|
|
BOOST_CHECK(A.is_leader());
|
|
// B_1 must be quiet after an election timeout and doesn't
|
|
// disrupt the new configuration
|
|
election_timeout(B_1);
|
|
BOOST_CHECK(B_1.is_follower());
|
|
BOOST_CHECK_EQUAL(B_1.get_output().messages.size(), 0);
|
|
}
|
|
|
|
|
|
BOOST_AUTO_TEST_CASE(test_confchange_ab_to_cd) {
|
|
// Similar to A -> B change, but with many nodes,
|
|
// so C_new has to campaign after configuration change.
|
|
server_id A_id = id(), B_id = id(), C_id = id(), D_id = id();
|
|
|
|
raft::log log(raft::snapshot_descriptor{.idx = index_t{0}, .config = raft::configuration{A_id, B_id}});
|
|
auto A = create_follower(A_id, log);
|
|
auto B = create_follower(B_id, log);
|
|
election_timeout(A);
|
|
communicate(A, B);
|
|
BOOST_CHECK(A.is_leader());
|
|
|
|
auto C = create_follower(C_id, log);
|
|
auto D = create_follower(D_id, log);
|
|
|
|
A.add_entry(raft::configuration({C_id, D_id}));
|
|
communicate(A, B, C, D);
|
|
|
|
BOOST_CHECK_EQUAL(A.get_current_term(), 1);
|
|
// A and B are not part of the current configuration
|
|
BOOST_CHECK(A.is_follower());
|
|
BOOST_CHECK(B.is_follower());
|
|
|
|
election_timeout(C);
|
|
election_threshold(D);
|
|
communicate(A, B, C, D);
|
|
BOOST_CHECK_EQUAL(C.get_current_term(), 2);
|
|
BOOST_CHECK(C.is_leader());
|
|
BOOST_CHECK_EQUAL(C.get_configuration().is_joint(), false);
|
|
BOOST_CHECK_EQUAL(C.get_configuration().current.size(), 2);
|
|
}
|
|
|
|
|
|
BOOST_AUTO_TEST_CASE(test_confchange_abc_to_cde) {
|
|
// Check configuration changes when C_old and C_new have no
|
|
// common quorum, test leader change during configuration
|
|
// change
|
|
discrete_failure_detector fd;
|
|
server_id A_id = id(), B_id = id(), C_id = id(), D_id = id(), E_id = id();
|
|
|
|
raft::log log(raft::snapshot_descriptor{.idx = index_t{0}, .config = raft::configuration{A_id, B_id, C_id}});
|
|
auto A = create_follower(A_id, log);
|
|
auto B = create_follower(B_id, log, fd);
|
|
auto C = create_follower(C_id, log, fd);
|
|
election_timeout(A);
|
|
communicate(A, B, C);
|
|
BOOST_CHECK(A.is_leader());
|
|
|
|
auto D = create_follower(D_id, log);
|
|
auto E = create_follower(E_id, log);
|
|
|
|
A.add_entry(raft::configuration({C_id, D_id, E_id}));
|
|
// Make sure C gets a new (joint) configuration entry.
|
|
// It is stable, but not committed, because we need D or E
|
|
// to commit it.
|
|
communicate(A, B, C);
|
|
// Leader change while committing a joint configuration
|
|
fd.mark_dead(A_id);
|
|
election_timeout(C);
|
|
BOOST_CHECK(C.is_candidate());
|
|
// Ticking for election_threshold at B is
|
|
// necessary for B to vote for C but not become
|
|
// candidate itself.
|
|
election_threshold(B);
|
|
communicate(B, C, D, E);
|
|
BOOST_CHECK(A.is_leader());
|
|
BOOST_CHECK_EQUAL(A.get_current_term(), 1);
|
|
BOOST_CHECK(B.is_follower());
|
|
BOOST_CHECK(C.is_leader());
|
|
BOOST_CHECK(D.is_follower());
|
|
BOOST_CHECK(E.is_follower());
|
|
|
|
BOOST_CHECK(C.get_current_term() >= 2);
|
|
BOOST_CHECK_EQUAL(C.get_configuration().is_joint(), false);
|
|
BOOST_CHECK_EQUAL(C.get_configuration().current.size(), 3);
|
|
}
|
|
|
|
|
|
BOOST_AUTO_TEST_CASE(test_confchange_abcdef_to_abcgh) {
|
|
// Test configuration changes in presence of down nodes in C_old
|
|
discrete_failure_detector fd;
|
|
server_id A_id = id(), B_id = id(), C_id = id(), D_id = id(), E_id = id(),
|
|
F_id = id(), G_id = id(), H_id = id();
|
|
|
|
raft::log log(raft::snapshot_descriptor{.idx = index_t{0},
|
|
.config = raft::configuration{A_id, B_id, C_id, D_id, E_id, F_id}});
|
|
auto A = create_follower(A_id, log, fd);
|
|
auto B = create_follower(B_id, log, fd);
|
|
auto C = create_follower(C_id, log, fd);
|
|
auto D = create_follower(D_id, log, fd);
|
|
auto E = create_follower(E_id, log, fd);
|
|
auto F = create_follower(F_id, log, fd);
|
|
election_timeout(D);
|
|
communicate(A, D, E, F);
|
|
BOOST_CHECK(D.is_leader());
|
|
|
|
auto G = create_follower(G_id, log);
|
|
auto H = create_follower(H_id, log);
|
|
|
|
D.add_entry(raft::configuration({A_id, B_id, C_id, G_id, H_id}));
|
|
// We can't transition to C_new in absence of C_old majority
|
|
communicate(B, C, D, G, H);
|
|
BOOST_CHECK(D.is_leader());
|
|
BOOST_CHECK(D.get_configuration().is_joint());
|
|
D.tick();
|
|
communicate(B, C, E, D, G, H);
|
|
BOOST_CHECK(D.is_follower());
|
|
auto leader = select_leader(A, B, C, G, H);
|
|
BOOST_CHECK_EQUAL(leader->get_configuration().is_joint(), false);
|
|
BOOST_CHECK_EQUAL(leader->get_configuration().current.size(), 5);
|
|
|
|
fd.mark_all_dead();
|
|
election_timeout(D);
|
|
election_timeout(A);
|
|
communicate(A, B, C, D, E, F, G, H);
|
|
BOOST_CHECK(leader->is_leader());
|
|
}
|
|
|
|
BOOST_AUTO_TEST_CASE(test_confchange_abcde_abcdefg) {
|
|
// Check configuration changes work fine with many nodes down
|
|
discrete_failure_detector fd;
|
|
server_id A_id = id(), B_id = id(), C_id = id(), D_id = id(), E_id = id(),
|
|
F_id = id(), G_id = id();
|
|
|
|
raft::log log(raft::snapshot_descriptor{.idx = index_t{0},
|
|
.config = raft::configuration{A_id, B_id, C_id, D_id, E_id}});
|
|
auto A = create_follower(A_id, log, fd);
|
|
auto B = create_follower(B_id, log, fd);
|
|
auto C = create_follower(C_id, log, fd);
|
|
auto D = create_follower(D_id, log, fd);
|
|
auto E = create_follower(E_id, log, fd);
|
|
election_timeout(A);
|
|
communicate(A, D, E);
|
|
BOOST_CHECK(A.is_leader());
|
|
|
|
auto F = create_follower(F_id, log);
|
|
auto G = create_follower(G_id, log);
|
|
|
|
// Wrap configuration entry into some traffic
|
|
A.add_entry(log_entry::dummy{});
|
|
A.add_entry(raft::configuration({A_id, B_id, C_id, D_id, E_id, F_id, G_id}));
|
|
A.add_entry(log_entry::dummy{});
|
|
// Without tick() A won't re-try communication with nodes it
|
|
// believes are down (B, C).
|
|
A.tick();
|
|
// 4 is enough to transition to the new configuration
|
|
communicate(A, B, C, G);
|
|
BOOST_CHECK(A.is_leader());
|
|
BOOST_CHECK_EQUAL(A.get_configuration().is_joint(), false);
|
|
BOOST_CHECK_EQUAL(A.get_configuration().current.size(), 7);
|
|
A.tick();
|
|
communicate(A, B, C, D, E, F, G);
|
|
BOOST_CHECK_EQUAL(A.log_last_idx(), B.log_last_idx());
|
|
BOOST_CHECK_EQUAL(A.log_last_idx(), C.log_last_idx());
|
|
BOOST_CHECK_EQUAL(A.log_last_idx(), D.log_last_idx());
|
|
BOOST_CHECK_EQUAL(A.log_last_idx(), E.log_last_idx());
|
|
BOOST_CHECK_EQUAL(A.log_last_idx(), F.log_last_idx());
|
|
BOOST_CHECK_EQUAL(A.log_last_idx(), G.log_last_idx());
|
|
BOOST_CHECK(A.is_leader());
|
|
BOOST_CHECK_EQUAL(A.get_configuration().is_joint(), false);
|
|
BOOST_CHECK_EQUAL(A.get_configuration().current.size(), 7);
|
|
}
|
|
|
|
BOOST_AUTO_TEST_CASE(test_election_during_confchange) {
|
|
server_id A_id = id(), B_id = id(), C_id = id(), D_id = id(), E_id = id();
|
|
|
|
raft::log log(raft::snapshot_descriptor{.idx = index_t{0}, .config = raft::configuration{A_id, B_id, C_id}});
|
|
|
|
// Joint config has reached old majority, the leader is
|
|
// from new majority
|
|
discrete_failure_detector fd;
|
|
auto A = create_follower(A_id, log, fd);
|
|
auto B = create_follower(B_id, log, fd);
|
|
auto C = create_follower(C_id, log, fd);
|
|
election_timeout(A);
|
|
communicate(A, B, C);
|
|
A.add_entry(raft::configuration({C_id, D_id, E_id}));
|
|
communicate(A, B, C);
|
|
fd.mark_dead(A_id);
|
|
auto D = create_follower(D_id, log, fd);
|
|
auto E = create_follower(E_id, log, fd);
|
|
election_timeout(C);
|
|
election_threshold(B);
|
|
communicate_until([&C]() { return C.is_leader(); }, B, C, D, E);
|
|
BOOST_CHECK_EQUAL(C.get_configuration().is_joint(), true);
|
|
fd.mark_alive(A.id());
|
|
communicate(D, A, B, E);
|
|
fd.mark_alive(C.id());
|
|
communicate_until([&C]() { return C.get_configuration().is_joint() == false; }, B, C, D, E);
|
|
communicate(C, D);
|
|
fd.mark_dead(C.id());
|
|
election_timeout(D);
|
|
// E may still be in joint. It must vote for D anyway. D is in C_new
|
|
// and will replicate C_new to E after becoming a leader
|
|
election_threshold(E);
|
|
A.tick();
|
|
communicate(A, D, E);
|
|
BOOST_CHECK(D.is_leader());
|
|
BOOST_CHECK(A.is_follower());
|
|
BOOST_CHECK_EQUAL(D.get_configuration().is_joint(), false);
|
|
BOOST_CHECK_EQUAL(D.get_configuration().current.size(), 3);
|
|
}
|
|
|
|
BOOST_AUTO_TEST_CASE(test_reply_from_removed_follower) {
|
|
// Messages from followers may be delayed. Check they don't
|
|
// upset the leader when they are delivered past configuration
|
|
// change
|
|
|
|
server_id A_id = id(), B_id = id();
|
|
|
|
raft::log log(raft::snapshot_descriptor{.idx = index_t{0}, .config = raft::configuration{A_id, B_id}});
|
|
auto A = create_follower(A_id, log);
|
|
auto B = create_follower(B_id, log);
|
|
election_timeout(A);
|
|
communicate(A, B);
|
|
A.add_entry(raft::configuration({A_id}));
|
|
communicate(A, B);
|
|
BOOST_CHECK(A.is_leader());
|
|
BOOST_CHECK_EQUAL(A.get_configuration().is_joint(), false);
|
|
BOOST_CHECK_EQUAL(A.get_configuration().current.size(), 1);
|
|
auto idx = A.log_last_idx();
|
|
A.step(B.id(), raft::append_reply{A.get_current_term(), idx, raft::append_reply::accepted{idx}});
|
|
A.step(B.id(), raft::append_reply{A.get_current_term(), idx, raft::append_reply::rejected{idx}});
|
|
A.step(B.id(), raft::snapshot_reply{A.get_current_term(), true});
|
|
BOOST_CHECK(A.is_leader());
|
|
}
|
|
|
|
BOOST_AUTO_TEST_CASE(test_leader_ignores_messages_with_current_term) {
|
|
// Check that the leader properly handles InstallSnapshot/AppendRequest/VoteRequest
|
|
// messages carrying its own term.
|
|
discrete_failure_detector fd;
|
|
server_id A_id = id(), B_id = id();
|
|
|
|
raft::log log(raft::snapshot_descriptor{.idx = index_t{0},
|
|
.config = raft::configuration{A_id, B_id}});
|
|
auto A = create_follower(A_id, log, fd);
|
|
auto B = create_follower(B_id, log, fd);
|
|
election_timeout(A);
|
|
communicate(A, B);
|
|
BOOST_CHECK(A.is_leader());
|
|
// Check that InstallSnapshot with current term gets negative reply
|
|
A.step(B.id(), raft::install_snapshot{A.get_current_term()});
|
|
auto output = A.get_output();
|
|
BOOST_CHECK_EQUAL(output.messages.size(), 1);
|
|
raft::snapshot_reply msg;
|
|
BOOST_REQUIRE_NO_THROW(msg = std::get<raft::snapshot_reply>(output.messages[0].second));
|
|
BOOST_CHECK(!msg.success);
|
|
// Check that AppendRequest with current term is ignired by the leader
|
|
A.step(B.id(), raft::append_request{A.get_current_term()});
|
|
output = A.get_output();
|
|
BOOST_CHECK_EQUAL(output.messages.size(), 0);
|
|
// Check that VoteRequest with current term is not granted
|
|
A.step(B.id(), raft::vote_request{A.get_current_term(), index_t{}, term_t{}, false, false});
|
|
output = A.get_output();
|
|
BOOST_CHECK_EQUAL(output.messages.size(), 1);
|
|
raft::vote_reply msg1;
|
|
BOOST_REQUIRE_NO_THROW(msg1 = std::get<raft::vote_reply>(output.messages[0].second));
|
|
BOOST_CHECK(!msg1.vote_granted);
|
|
}
|
|
|
|
BOOST_AUTO_TEST_CASE(test_leader_read_quorum) {
|
|
discrete_failure_detector fd;
|
|
server_id A_id = id(), B_id = id(), C_id = id(), D_id = id();
|
|
|
|
// 4 nodes 3 voting 1 non voting (quorum is 2)
|
|
raft::server_address_set nodes{raft::server_address{A_id}, raft::server_address{B_id},
|
|
raft::server_address{C_id}, raft::server_address{D_id, false}};
|
|
|
|
raft::log log(raft::snapshot_descriptor{.idx = index_t{0}, .config = raft::configuration(nodes)});
|
|
auto A = create_follower(A_id, log, fd);
|
|
auto B = create_follower(B_id, log, fd);
|
|
auto C = create_follower(C_id, log, fd);
|
|
auto D = create_follower(D_id, log, fd);
|
|
election_timeout(A);
|
|
communicate(A, B, C, D);
|
|
BOOST_CHECK(A.is_leader());
|
|
// Just because timeout passes the leader does not stepdown if quorum of nodes is alive
|
|
election_timeout(A);
|
|
BOOST_CHECK(A.is_leader());
|
|
// One of voting members dies but the leader is still not steepping down because there is
|
|
// a quorum of nodes that are still alive
|
|
fd.mark_dead(C_id);
|
|
election_timeout(A);
|
|
BOOST_CHECK(A.is_leader());
|
|
// Non voting member dies and the leader is still not stepping down (there two voting members still)
|
|
fd.mark_dead(D_id);
|
|
election_timeout(A);
|
|
BOOST_CHECK(A.is_leader());
|
|
// One more voting members dies and the leader becomes a follower now
|
|
fd.mark_dead(B_id);
|
|
election_timeout(A);
|
|
BOOST_CHECK(!A.is_leader());
|
|
}
|
|
|
|
BOOST_AUTO_TEST_CASE(test_zero) {
|
|
server_id id{};
|
|
BOOST_CHECK_THROW(raft::configuration cfg({id}), std::invalid_argument);
|
|
BOOST_CHECK_THROW(raft::configuration cfg(raft::server_address_set{raft::server_address{id}}), std::invalid_argument);
|
|
BOOST_CHECK_THROW(create_follower(id, raft::log(raft::snapshot_descriptor{})), std::invalid_argument);
|
|
}
|
|
|
|
BOOST_AUTO_TEST_CASE(test_reordered_reject) {
|
|
auto id1 = id();
|
|
raft::fsm fsm1(id1, term_t{1}, server_id{},
|
|
raft::log{raft::snapshot_descriptor{.config = {{{id1}}}}},
|
|
trivial_failure_detector, fsm_cfg);
|
|
|
|
while (!fsm1.is_leader()) {
|
|
fsm1.tick();
|
|
}
|
|
|
|
fsm1.add_entry(log_entry::dummy{});
|
|
(void)fsm1.get_output();
|
|
|
|
auto id2 = id();
|
|
raft::fsm fsm2(id2, term_t{1}, server_id{},
|
|
raft::log{raft::snapshot_descriptor{.config = raft::configuration{}}},
|
|
trivial_failure_detector, fsm_cfg);
|
|
|
|
raft_routing_map routes{{fsm1.id(), &fsm1}, {fsm2.id(), &fsm2}};
|
|
|
|
fsm1.add_entry(raft::configuration{{{fsm1.id()}, {fsm2.id()}}});
|
|
fsm1.tick();
|
|
|
|
// fsm1 sends append_entries with idx=2 to fsm2
|
|
auto append_idx2_1 = fsm1.get_output();
|
|
|
|
fsm1.tick();
|
|
|
|
// fsm1 sends append_entries with idx=2 to fsm2 (again)
|
|
auto append_idx2_2 = fsm1.get_output();
|
|
|
|
raft::logger.trace("delivering first append idx=2");
|
|
deliver(routes, fsm1.id(), std::move(append_idx2_1.messages));
|
|
|
|
// fsm2 rejects the first idx=2 append
|
|
auto reject_1 = fsm2.get_output();
|
|
|
|
raft::logger.trace("delivering second append idx=2");
|
|
deliver(routes, fsm1.id(), std::move(append_idx2_2.messages));
|
|
|
|
// fsm2 rejects the second idx=2 append
|
|
auto reject_2 = fsm2.get_output();
|
|
|
|
raft::logger.trace("delivering first reject");
|
|
deliver(routes, fsm2.id(), std::move(reject_1.messages));
|
|
|
|
// fsm1 sends append_entries with idx=1 to fsm2
|
|
auto append_idx1 = fsm1.get_output();
|
|
|
|
raft::logger.trace("delivering append idx=1");
|
|
deliver(routes, fsm1.id(), std::move(append_idx1.messages));
|
|
|
|
// fsm2 accepts the idx=1 append
|
|
auto accept = fsm2.get_output();
|
|
|
|
raft::logger.trace("delivering accept for append idx=1");
|
|
deliver(routes, fsm2.id(), std::move(accept.messages));
|
|
|
|
raft::logger.trace("delivering second reject");
|
|
deliver(routes, fsm2.id(), std::move(reject_2.messages));
|
|
}
|
|
|
|
BOOST_AUTO_TEST_CASE(test_non_voter_stays_pipeline) {
|
|
// Check that a node stays in PIPELINE mode
|
|
// through configuration changes.
|
|
server_id A_id = id(), B_id = id();
|
|
raft::server_address_set addrset{raft::server_address{A_id}, raft::server_address{B_id, false}};
|
|
raft::configuration cfg(addrset);
|
|
raft::log log(raft::snapshot_descriptor{.idx = index_t{0}, .config = cfg});
|
|
auto A = create_follower(A_id, log);
|
|
auto B = create_follower(B_id, log);
|
|
election_timeout(A);
|
|
communicate(A);
|
|
BOOST_CHECK(A.is_leader());
|
|
BOOST_CHECK(A.get_progress(B_id).state == raft::follower_progress::state::PROBE);
|
|
A.add_entry(log_entry::dummy{});
|
|
// We need to deliver a probe from A to B so that B gets
|
|
// a change to respond and A can switch B to PIPELINE mode.
|
|
A.tick();
|
|
communicate(A, B);
|
|
BOOST_CHECK(A.get_progress(B_id).state == raft::follower_progress::state::PIPELINE);
|
|
raft::configuration newcfg({A_id, B_id});
|
|
A.add_entry(newcfg);
|
|
communicate(A, B);
|
|
BOOST_CHECK_EQUAL(A.get_configuration().is_joint(), false);
|
|
BOOST_CHECK_EQUAL(A.get_configuration().current.find(raft::server_address{B_id})->can_vote, true);
|
|
BOOST_CHECK(A.get_progress(B_id).state == raft::follower_progress::state::PIPELINE);
|
|
A.add_entry(cfg);
|
|
auto tick_occasionally = [&A, &B]() {
|
|
if (rolladice()) {
|
|
A.tick(); B.tick();
|
|
}
|
|
return false;
|
|
};
|
|
communicate_until(tick_occasionally, A, B);
|
|
BOOST_CHECK_EQUAL(A.get_configuration().is_joint(), false);
|
|
BOOST_CHECK_EQUAL(A.get_configuration().current.find(raft::server_address{B_id})->can_vote, false);
|
|
BOOST_CHECK(A.get_progress(B_id).state == raft::follower_progress::state::PIPELINE);
|
|
}
|
|
|
|
BOOST_AUTO_TEST_CASE(test_leader_change_to_non_voter) {
|
|
// Test a two-node cluster, change a leader to a non-voter.
|
|
server_id A_id = id(), B_id = id();
|
|
raft::server_address_set oldset{raft::server_address{A_id, true}, raft::server_address{B_id, false}};
|
|
raft::configuration cfg(oldset);
|
|
raft::log log(raft::snapshot_descriptor{.idx = index_t{0}, .config = cfg});
|
|
auto A = create_follower(A_id, log);
|
|
auto B = create_follower(B_id, log);
|
|
election_timeout(A);
|
|
communicate(A, B);
|
|
BOOST_CHECK(A.is_leader());
|
|
raft::server_address_set newset{raft::server_address{A_id, false}, raft::server_address{B_id, true}};
|
|
raft::configuration newcfg(newset);
|
|
A.add_entry(newcfg);
|
|
A.tick();
|
|
communicate(A, B);
|
|
BOOST_CHECK(A.is_follower());
|
|
BOOST_CHECK(B.is_leader());
|
|
// Try to switch the leader to a non-voter, leaving no other voters.
|
|
newset = raft::server_address_set{raft::server_address{A_id, false}, raft::server_address{B_id, false}};
|
|
newcfg = raft::configuration(newset);
|
|
BOOST_CHECK_THROW(B.add_entry(newcfg), std::invalid_argument);
|
|
// Try to remove the last remaining voter
|
|
newset = raft::server_address_set{raft::server_address{B_id, false}};
|
|
newcfg = raft::configuration(newset);
|
|
BOOST_CHECK_THROW(B.add_entry(newcfg), std::invalid_argument);
|
|
}
|
|
|
|
BOOST_AUTO_TEST_CASE(test_non_voter_gets_timeout_now) {
|
|
// Test that even if a non-voter gets timeout now, there is no
|
|
// elections and later this learner can rejoin the cluster,
|
|
// although it does disrupt the cluster a bit (through
|
|
// leader's having to increase its term).
|
|
server_id A_id = id(), B_id = id(), C_id = id();
|
|
raft::configuration cfg(raft::server_address_set{
|
|
raft::server_address{A_id},
|
|
raft::server_address{B_id},
|
|
raft::server_address{C_id, false}});
|
|
|
|
raft::log log(raft::snapshot_descriptor{.idx = index_t{0}, .config = cfg});
|
|
auto A = create_follower(A_id, log);
|
|
auto B = create_follower(B_id, log);
|
|
auto C = create_follower(C_id, log);
|
|
election_timeout(A);
|
|
communicate(A, B, C);
|
|
BOOST_CHECK(A.is_leader());
|
|
C.step(A.id(), raft::timeout_now{.current_term = A.get_current_term()});
|
|
C.tick();
|
|
auto output = C.get_output();
|
|
BOOST_CHECK(C.is_follower());
|
|
BOOST_CHECK_EQUAL(output.messages.size(), 0);
|
|
BOOST_CHECK(!output.term_and_vote);
|
|
A.add_entry(log_entry::dummy{});
|
|
communicate(A, B, C);
|
|
BOOST_CHECK_EQUAL(A.log_last_idx(), C.log_last_idx());
|
|
BOOST_CHECK_EQUAL(A.get_current_term(), C.get_current_term());
|
|
BOOST_CHECK(A.is_leader());
|
|
}
|
|
|
|
BOOST_AUTO_TEST_CASE(test_non_voter_election_timeout) {
|
|
// Test that non-voter doesn't start election even if its
|
|
// election timeout expires and it doesn't see a valid leader.
|
|
discrete_failure_detector fd;
|
|
server_id A_id = id(), B_id = id(), C_id = id();
|
|
raft::configuration cfg(raft::server_address_set{
|
|
raft::server_address{A_id},
|
|
raft::server_address{B_id},
|
|
raft::server_address{C_id, false}});
|
|
|
|
raft::log log(raft::snapshot_descriptor{.idx = index_t{0}, .config = cfg});
|
|
auto A = create_follower(A_id, log, fd);
|
|
auto B = create_follower(B_id, log, fd);
|
|
auto C = create_follower(C_id, log, fd);
|
|
election_timeout(A);
|
|
communicate(A, B, C);
|
|
BOOST_CHECK(A.is_leader());
|
|
fd.mark_all_dead();
|
|
auto C_term = C.get_current_term();
|
|
election_timeout(C);
|
|
BOOST_CHECK(C.is_follower());
|
|
BOOST_CHECK_EQUAL(C_term, C.get_current_term());
|
|
}
|
|
|
|
BOOST_AUTO_TEST_CASE(test_non_voter_voter_loop) {
|
|
// Test voter-non-voter change in a loop
|
|
server_id A_id = id(), B_id = id(), C_id = id();
|
|
|
|
raft::configuration cfg({A_id, B_id, C_id});
|
|
raft::configuration cfg_with_non_voter(raft::server_address_set{
|
|
raft::server_address{A_id},
|
|
raft::server_address{B_id},
|
|
raft::server_address{C_id, false}});
|
|
|
|
raft::log log(raft::snapshot_descriptor{.idx = index_t{0}, .config = cfg});
|
|
auto A = create_follower(A_id, log);
|
|
auto B = create_follower(B_id, log);
|
|
auto C = create_follower(C_id, log);
|
|
election_timeout(A);
|
|
communicate(A, B, C);
|
|
BOOST_CHECK(A.is_leader());
|
|
for (int i = 0; i < 100; ++i) {
|
|
A.add_entry(i % 2 ? cfg_with_non_voter : cfg);
|
|
if (rolladice()) {
|
|
A.add_entry(log_entry::dummy{});
|
|
}
|
|
communicate(A, B, C);
|
|
if (rolladice()) {
|
|
A.add_entry(log_entry::dummy());
|
|
communicate(A, B, C);
|
|
}
|
|
// If iteration count is large, this helps save some
|
|
// memory
|
|
if (rolladice(1./1000)) {
|
|
A.get_log().apply_snapshot(log_snapshot(A.get_log(), A.log_last_idx()), 0);
|
|
}
|
|
if (rolladice(1./100)) {
|
|
B.get_log().apply_snapshot(log_snapshot(A.get_log(), B.log_last_idx()), 0);
|
|
}
|
|
if (rolladice(1./5000)) {
|
|
C.get_log().apply_snapshot(log_snapshot(A.get_log(), B.log_last_idx()), 0);
|
|
}
|
|
}
|
|
BOOST_CHECK(A.is_leader());
|
|
BOOST_CHECK_EQUAL(A.get_current_term(), C.get_current_term());
|
|
BOOST_CHECK_EQUAL(A.log_last_idx(), C.log_last_idx());
|
|
}
|
|
|
|
BOOST_AUTO_TEST_CASE(test_non_voter_confchange_in_snapshot) {
|
|
// Test non-voter learns it's a non-voter via snapshot
|
|
discrete_failure_detector fd;
|
|
server_id A_id = id(), B_id = id(), C_id = id();
|
|
|
|
raft::configuration cfg({A_id, B_id, C_id});
|
|
|
|
raft::log log(raft::snapshot_descriptor{.idx = index_t{0}, .config = cfg});
|
|
auto A = create_follower(A_id, log, fd);
|
|
auto B = create_follower(B_id, log, fd);
|
|
auto C = create_follower(C_id, log, fd);
|
|
election_timeout(A);
|
|
communicate(A, B, C);
|
|
BOOST_CHECK(A.is_leader());
|
|
A.add_entry(log_entry::dummy{});
|
|
raft::configuration cfg_with_non_voter(raft::server_address_set{
|
|
raft::server_address{A_id},
|
|
raft::server_address{B_id},
|
|
raft::server_address{C_id, false}});
|
|
A.tick();
|
|
A.add_entry(cfg_with_non_voter);
|
|
A.tick();
|
|
// Majority commits the configuration change
|
|
communicate(A, B);
|
|
BOOST_CHECK_EQUAL(A.get_configuration().is_joint(), false);
|
|
BOOST_CHECK_EQUAL(A.get_configuration().current.find(raft::server_address{C_id})->can_vote, false);
|
|
A.tick();
|
|
raft::snapshot_descriptor A_snp{.idx = A.log_last_idx(), .term = A.log_last_term(), .config = A.get_configuration()};
|
|
A.apply_snapshot(A_snp, 0, true);
|
|
A.tick();
|
|
communicate(A, B, C);
|
|
BOOST_CHECK(A.is_leader());
|
|
BOOST_CHECK_EQUAL(A.get_current_term(), C.get_current_term());
|
|
BOOST_CHECK_EQUAL(A.log_last_idx(), C.log_last_idx());
|
|
// A non-voter doesn't become candidate on election timeout
|
|
fd.mark_all_dead();
|
|
election_timeout(C);
|
|
BOOST_CHECK(C.is_follower());
|
|
// Now try the same trick, but this time convert a non-voter
|
|
// to a voter with a snapshot
|
|
fd.mark_all_alive();
|
|
A.tick();
|
|
for (int i = 0; i < 100; i++) {
|
|
A.add_entry(log_entry::dummy{});
|
|
}
|
|
A.add_entry(cfg);
|
|
A.tick();
|
|
// Majority commits the configuration change
|
|
communicate(A, B);
|
|
BOOST_CHECK_EQUAL(A.get_configuration().is_joint(), false);
|
|
BOOST_CHECK_EQUAL(A.get_configuration().current.find(raft::server_address{C_id})->can_vote, true);
|
|
A.tick();
|
|
A_snp = raft::snapshot_descriptor{.idx = A.log_last_idx(), .term = A.log_last_term(), .config = A.get_configuration()};
|
|
A.apply_snapshot(A_snp, 0, true);
|
|
A.tick();
|
|
communicate(A, B, C);
|
|
BOOST_CHECK(A.is_leader());
|
|
BOOST_CHECK_EQUAL(A.get_current_term(), C.get_current_term());
|
|
BOOST_CHECK_EQUAL(A.log_last_idx(), C.log_last_idx());
|
|
fd.mark_all_dead();
|
|
election_timeout(C);
|
|
BOOST_CHECK(C.is_candidate());
|
|
// Check an ex-voter can become a leader alright (LearnerPromotion)
|
|
election_threshold(B);
|
|
communicate(C, B);
|
|
BOOST_CHECK(C.is_leader());
|
|
}
|
|
|
|
BOOST_AUTO_TEST_CASE(test_non_voter_can_vote) {
|
|
// Test non-voter can vote when it is requested to - it may
|
|
// not be aware of the configuration in which it is a voter
|
|
discrete_failure_detector fd;
|
|
server_id A_id = id(), B_id = id(), C_id = id();
|
|
|
|
raft::configuration cfg(raft::server_address_set{
|
|
raft::server_address{A_id},
|
|
raft::server_address{B_id},
|
|
raft::server_address{C_id, false}});
|
|
|
|
raft::log log(raft::snapshot_descriptor{.idx = index_t{0}, .config = cfg});
|
|
auto A = create_follower(A_id, log, fd);
|
|
auto B = create_follower(B_id, log, fd);
|
|
auto C = create_follower(C_id, log, fd);
|
|
election_timeout(A);
|
|
communicate(A, B, C);
|
|
BOOST_CHECK(A.is_leader());
|
|
|
|
raft::configuration cfg_all_voters({A_id, B_id, C_id});
|
|
A.add_entry(cfg_all_voters);
|
|
// Majority commits the configuration change
|
|
communicate(A, B);
|
|
BOOST_CHECK_EQUAL(A.get_configuration().is_joint(), false);
|
|
BOOST_CHECK_EQUAL(A.get_configuration().current.find(raft::server_address{C_id})->can_vote, true);
|
|
BOOST_CHECK_EQUAL(A.log_last_idx(), B.log_last_idx());
|
|
fd.mark_dead(A_id);
|
|
election_timeout(B);
|
|
election_threshold(C);
|
|
// B and C are enough to elect B in the new configuration.
|
|
communicate(B, C);
|
|
BOOST_CHECK(B.is_leader());
|
|
BOOST_CHECK_EQUAL(B.get_current_term(), C.get_current_term());
|
|
BOOST_CHECK_EQUAL(B.log_last_idx(), C.log_last_idx());
|
|
}
|
|
|
|
BOOST_AUTO_TEST_CASE(test_leader_transferee_dies_upon_receiving_timeout_now) {
|
|
/// 4-node cluster (A, B, C, D). A is initially elected a leader.
|
|
/// The leader adds a new configuration entry, that removes it from the
|
|
/// cluster (B, C, D).
|
|
/// Communicate the cluster up to the point where A starts to resign
|
|
/// its leadership (calls `transfer_leadership()`).
|
|
/// At this point, A should send a `timeout_now` message to one
|
|
/// the remaining nodes (B, C or D) and the new configuration should be
|
|
/// committed. But no nodes actually have received the `timeout_now` message
|
|
/// yet.
|
|
///
|
|
/// Determine on which node the message should arrive, accept the
|
|
/// `timeout_now` message and disconnect the target from the rest of the
|
|
/// group.
|
|
///
|
|
/// Check that after that the cluster, which has only two live members,
|
|
/// could progress and elect a new leader through a normal election process.
|
|
|
|
discrete_failure_detector fd;
|
|
|
|
raft::server_id A_id = id(), B_id = id(), C_id = id(), D_id = id();
|
|
raft::log log(raft::snapshot_descriptor{.idx = raft::index_t{0},
|
|
.config = raft::configuration({A_id, B_id, C_id, D_id})});
|
|
auto A = create_follower(A_id, log, fd);
|
|
auto B = create_follower(B_id, log, fd);
|
|
auto C = create_follower(C_id, log, fd);
|
|
auto D = create_follower(D_id, log, fd);
|
|
|
|
raft_routing_map map;
|
|
map.emplace(A_id, &A);
|
|
map.emplace(B_id, &B);
|
|
map.emplace(C_id, &C);
|
|
map.emplace(D_id, &D);
|
|
|
|
// A becomes leader
|
|
election_timeout(A);
|
|
communicate(A, B, C, D);
|
|
BOOST_CHECK(A.is_leader());
|
|
|
|
// Add a cfg entry on leader that removes it from the cluster ({B_id, C_id, D_id})
|
|
raft::configuration newcfg({B_id, C_id, D_id});
|
|
A.add_entry(newcfg);
|
|
|
|
// Commit new config and stop communicating right after A steps down due to
|
|
// starting leadership transfer.
|
|
communicate_until([&A] { return !A.is_leader(); }, A, B, C, D);
|
|
|
|
// At this point A should have a `timeout_now` message in its message queue.
|
|
BOOST_CHECK(A.is_follower());
|
|
// We cannot assume which node will be selected as the target for
|
|
// `timeout_now` message, because the order in which A should test each
|
|
// follower whether it's an eligible target for `timeout_now` is
|
|
// unspecified. Let's call it X. X can be either B, C, or D.
|
|
//
|
|
// Maintain the routing map state since it will be used later to
|
|
// determine which two nodes will remain in the cluster after partitioning
|
|
// `timeout_now` target node (X) away.
|
|
map.erase(A_id);
|
|
|
|
// We don't really care on which node `timeout_now` message arrives so adapt
|
|
// in a dynamic fashion.
|
|
//
|
|
// Check that A has sent the `timeout_now` message and determine to whom it was sent
|
|
auto output = A.get_output();
|
|
BOOST_CHECK_EQUAL(output.messages.size(), 1);
|
|
BOOST_CHECK(std::holds_alternative<raft::timeout_now>(output.messages.back().second));
|
|
auto timeout_now_target_id = output.messages.back().first;
|
|
auto timeout_now_msg = std::get<raft::timeout_now>(output.messages.back().second);
|
|
|
|
// Accept the message on target node...
|
|
map[timeout_now_target_id]->step(A_id, std::move(timeout_now_msg));
|
|
|
|
// ...and immediately cut it from the rest of the cluster so that others think it's dead.
|
|
fd.mark_dead(timeout_now_target_id);
|
|
map.erase(timeout_now_target_id);
|
|
|
|
// Two more nodes should remain in the cluster.
|
|
// Again, we don't care which node from these two would like to become a leader,
|
|
// so just select the first one in the list of remaining nodes.
|
|
//
|
|
// Wait for standard election_timeout() on the first node, and for election_threshold()
|
|
// on the second.
|
|
// Then, check, that the new leader is elected among these two remaining nodes.
|
|
auto first_fsm = map.begin();
|
|
auto second_fsm = ++map.begin();
|
|
election_timeout(*first_fsm->second);
|
|
election_threshold(*second_fsm->second);
|
|
communicate(B, C, D);
|
|
auto final_leader = select_leader(B, C, D);
|
|
BOOST_CHECK(final_leader->id() == first_fsm->first || final_leader->id() == second_fsm->first);
|
|
}
|
|
|
|
BOOST_AUTO_TEST_CASE(test_leader_transfer_lost_timeout_now) {
|
|
/// 3-node cluster (A, B, C). A is initially elected a leader.
|
|
/// The leader adds a new configuration entry, that removes it from the
|
|
/// cluster (B, C).
|
|
///
|
|
/// Wait up until the former leader commits the new configuration and starts
|
|
/// leader transfer procedure, sending out the `timeout_now` message to
|
|
/// one of the remaining nodes. But at that point it haven't received it yet.
|
|
///
|
|
/// Lose this message and verify that the rest of the cluster (B, C)
|
|
/// can make progress and elect a new leader.
|
|
|
|
raft::server_id A_id = id(), B_id = id(), C_id = id();
|
|
raft::log log(raft::snapshot_descriptor{.idx = raft::index_t{0},
|
|
.config = raft::configuration({A_id, B_id, C_id})});
|
|
auto A = create_follower(A_id, log);
|
|
auto B = create_follower(B_id, log);
|
|
auto C = create_follower(C_id, log);
|
|
|
|
// A becomes leader
|
|
election_timeout(A);
|
|
communicate(A, B, C);
|
|
BOOST_CHECK(A.is_leader());
|
|
|
|
// Add a cfg entry on leader that removes it from the cluster ({B_id, C_id})
|
|
raft::configuration newcfg({B_id, C_id});
|
|
A.add_entry(newcfg);
|
|
|
|
// Commit new config and stop communicating right after A steps down due to
|
|
// starting leadership transfer.
|
|
communicate_until([&A] { return !A.is_leader(); }, A, B, C);
|
|
|
|
// We don't really care on which node `timeout_now` message should arrive,
|
|
// since it'll be lost, anyway.
|
|
//
|
|
// Check that the `timeout_now` message was sent...
|
|
auto output = A.get_output();
|
|
BOOST_CHECK_EQUAL(output.messages.size(), 1);
|
|
BOOST_CHECK(std::holds_alternative<raft::timeout_now>(output.messages.back().second));
|
|
auto timeout_now_msg = std::get<raft::timeout_now>(output.messages.back().second);
|
|
(void)timeout_now_msg;
|
|
// ... and lose it.
|
|
|
|
// By now, B and C should both remain in the follower state.
|
|
// Check that and attept to go forward with a normal election process to see
|
|
// that the cluster operates normally after `timeout_now` has been lost.
|
|
BOOST_CHECK(B.is_follower());
|
|
BOOST_CHECK(C.is_follower());
|
|
|
|
// Elect B a leader and check that normal election proceeds as expected.
|
|
election_timeout(B);
|
|
election_threshold(C);
|
|
communicate(B, C);
|
|
BOOST_CHECK(B.is_leader());
|
|
}
|
|
|
|
BOOST_AUTO_TEST_CASE(test_leader_transfer_lost_force_vote_request) {
|
|
/// 3-node cluster (A, B, C). A is initially elected a leader.
|
|
/// The leader adds a new configuration entry, that removes it from the
|
|
/// cluster (B, C).
|
|
///
|
|
/// Wait up until the former leader commits the new configuration and starts
|
|
/// leader transfer procedure, sending out the `timeout_now` message to
|
|
/// one of the remaining nodes. But at that point it haven't received it yet.
|
|
///
|
|
/// Deliver the `timeout_now` message to the target but lose all the
|
|
/// `vote_request(force)` messages it attempts to send.
|
|
/// This should halt the election process.
|
|
/// Then wait for election timeout so that candidate node starts another
|
|
/// normal election (without `force` flag for vote requests).
|
|
///
|
|
/// Check that this candidate further makes progress and is elected a
|
|
/// leader.
|
|
|
|
raft::server_id A_id = id(), B_id = id(), C_id = id();
|
|
raft::log log(raft::snapshot_descriptor{.idx = raft::index_t{0},
|
|
.config = raft::configuration({A_id, B_id, C_id})});
|
|
auto A = create_follower(A_id, log);
|
|
auto B = create_follower(B_id, log);
|
|
auto C = create_follower(C_id, log);
|
|
|
|
raft_routing_map map;
|
|
map.emplace(A_id, &A);
|
|
map.emplace(B_id, &B);
|
|
map.emplace(C_id, &C);
|
|
|
|
// A becomes leader
|
|
election_timeout(A);
|
|
communicate(A, B, C);
|
|
BOOST_CHECK(A.is_leader());
|
|
|
|
// Add a cfg entry on leader that removes it from the cluster ({B_id, C_id})
|
|
raft::configuration newcfg({B_id, C_id});
|
|
A.add_entry(newcfg);
|
|
|
|
// Commit new config and stop communicating right after A steps down due to
|
|
// starting leadership transfer.
|
|
communicate_until([&A] { return !A.is_leader(); }, A, B, C);
|
|
map.erase(A_id);
|
|
|
|
// We don't really care on which node `timeout_now` message arrives so adapt
|
|
// in a dynamic fashion.
|
|
//
|
|
// Check that A has sent the `timeout_now` message and determine to whom it was sent
|
|
auto output = A.get_output();
|
|
BOOST_CHECK_EQUAL(output.messages.size(), 1);
|
|
BOOST_CHECK(std::holds_alternative<raft::timeout_now>(output.messages.back().second));
|
|
auto timeout_now_target_id = output.messages.back().first;
|
|
auto timeout_now_msg = std::get<raft::timeout_now>(output.messages.back().second);
|
|
|
|
// Accept the message on the node selected by A to be eligible for leadership transfer.
|
|
auto& timeout_now_target = *map[timeout_now_target_id];
|
|
timeout_now_target.step(A_id, std::move(timeout_now_msg));
|
|
// New candidate should've sent a vote_request with force flag set
|
|
output = timeout_now_target.get_output();
|
|
BOOST_CHECK_EQUAL(output.messages.size(), 1);
|
|
BOOST_CHECK(std::holds_alternative<raft::vote_request>(output.messages.front().second));
|
|
auto vote_req1 = std::get<raft::vote_request>(output.messages.front().second);
|
|
BOOST_CHECK(vote_req1.force);
|
|
|
|
// Lose the forced vote request so that the candidates' election is halted.
|
|
// After election timeout has passed it should become a regular candidate and
|
|
// then proceed with non-force vote requests to elect itself a leader through
|
|
// the normal election process.
|
|
election_timeout(timeout_now_target);
|
|
output = timeout_now_target.get_output();
|
|
BOOST_CHECK_EQUAL(output.messages.size(), 1);
|
|
BOOST_CHECK(std::holds_alternative<raft::vote_request>(output.messages.front().second));
|
|
// These requests will be sent after election threshold passes for other remaining nodes.
|
|
auto vote_req1_regular = std::get<raft::vote_request>(output.messages.front().second);
|
|
auto vote_req1_regular_target = output.messages.front().first;
|
|
BOOST_CHECK(!vote_req1_regular.force);
|
|
|
|
// Pass election threshold for remaining node and send pending regular vote request
|
|
election_threshold(*map[vote_req1_regular_target]);
|
|
map[vote_req1_regular_target]->step(timeout_now_target_id, std::move(vote_req1_regular));
|
|
|
|
communicate(B, C);
|
|
auto final_leader = select_leader(B, C);
|
|
BOOST_CHECK(final_leader->id() == timeout_now_target_id);
|
|
}
|
|
|
|
// A follower should reject remote snapshots that are behind its current commit index.
|
|
BOOST_AUTO_TEST_CASE(test_reject_outdated_remote_snapshot) {
|
|
server_id A_id = id(), B_id = id();
|
|
raft::configuration cfg({A_id, B_id});
|
|
raft::log log(raft::snapshot_descriptor{.idx = index_t{0}, .config = cfg});
|
|
auto A = create_follower(A_id, log);
|
|
auto B = create_follower(B_id, log);
|
|
election_timeout(A);
|
|
communicate(A, B);
|
|
BOOST_CHECK(A.is_leader());
|
|
A.add_entry(log_entry::dummy{});
|
|
A.add_entry(log_entry::dummy{});
|
|
communicate(A, B);
|
|
|
|
auto snp_idx = index_t{1};
|
|
BOOST_CHECK(B.log_last_idx() > snp_idx);
|
|
auto snp_term = B.get_log().term_for(snp_idx);
|
|
BOOST_CHECK(snp_term);
|
|
auto snp = raft::snapshot_descriptor{.idx = index_t{1}, .term = *snp_term, .config = cfg};
|
|
BOOST_CHECK(!B.apply_snapshot(snp, 0, false));
|
|
// But it should apply this snapshot if it's locally generated
|
|
BOOST_CHECK(B.apply_snapshot(snp, 0, true));
|
|
}
|
|
|
|
// A server should sometimes become a candidate even though it is outside the current configuration,
|
|
// for example if it's the only server that can become a leader (due to log lengths).
|
|
BOOST_AUTO_TEST_CASE(test_candidate_outside_configuration) {
|
|
server_id A_id = id(), B_id = id();
|
|
raft::server_address_set addrset{raft::server_address{A_id}, raft::server_address{B_id}};
|
|
raft::configuration cfg(addrset);
|
|
raft::log log(raft::snapshot_descriptor{.idx = index_t{0}, .config = cfg});
|
|
discrete_failure_detector fd;
|
|
auto A = create_follower(A_id, log, fd);
|
|
auto B = create_follower(B_id, log, fd);
|
|
election_timeout(A);
|
|
communicate(A, B);
|
|
BOOST_CHECK(A.is_leader());
|
|
raft::configuration newcfg({B_id});
|
|
A.add_entry(newcfg);
|
|
BOOST_CHECK(!B.get_log().get_configuration().is_joint());
|
|
communicate_until([&A, &B] () { return !A.get_configuration().is_joint() && B.get_log().get_configuration().is_joint(); }, A, B);
|
|
BOOST_CHECK(!A.get_configuration().is_joint());
|
|
BOOST_CHECK(B.get_log().get_configuration().is_joint());
|
|
fd.mark_dead(B_id);
|
|
election_timeout(A);
|
|
// A steps down because it cannot communicate with a quorum in the current configuration ({B}).
|
|
BOOST_CHECK(!A.is_leader());
|
|
fd.mark_alive(B_id);
|
|
election_timeout(A);
|
|
// A should become a candidate - it is the only server that can become a leader;
|
|
// B's configuration is joint and it can't receive a vote from A due to shorter log.
|
|
BOOST_CHECK(A.is_candidate());
|
|
communicate_until([&A] () { return A.is_leader(); }, A, B);
|
|
BOOST_CHECK(A.is_leader());
|
|
communicate(A, B);
|
|
BOOST_CHECK(B.is_leader());
|
|
}
|
|
|
|
BOOST_AUTO_TEST_CASE(test_read_barrier) {
|
|
raft::server_id A_id = id(), B_id = id(), C_id = id(), D_id = id(), E_id = id();
|
|
raft::log log(raft::snapshot_descriptor{.idx = raft::index_t{0},
|
|
.config = raft::configuration({A_id, B_id, C_id, D_id})});
|
|
auto A = create_follower(A_id, log);
|
|
auto B = create_follower(B_id, log);
|
|
auto C = create_follower(C_id, log);
|
|
auto D = create_follower(D_id, log);
|
|
auto E = create_follower(E_id, log);
|
|
|
|
// A becomes leader
|
|
election_timeout(A);
|
|
communicate(A, B, C, D);
|
|
BOOST_CHECK(A.is_leader());
|
|
// propagate commit index
|
|
A.tick();
|
|
communicate(A, B, C, D);
|
|
|
|
// Check that a node outside of config cannot start read barrier
|
|
BOOST_CHECK_THROW(A.start_read_barrier(E_id), std::runtime_error);
|
|
|
|
// start read barrier
|
|
auto rid = A.start_read_barrier(A_id);
|
|
BOOST_CHECK(rid);
|
|
|
|
// Check that read_quorum was broadcasted to other nodes
|
|
auto output = A.get_output();
|
|
BOOST_CHECK_EQUAL(output.messages.size(), 3);
|
|
BOOST_CHECK(std::holds_alternative<raft::read_quorum>(output.messages[0].second));
|
|
BOOST_CHECK(std::holds_alternative<raft::read_quorum>(output.messages[1].second));
|
|
BOOST_CHECK(std::holds_alternative<raft::read_quorum>(output.messages[2].second));
|
|
|
|
// Check that it gets re-broadcasted on leader's tick
|
|
A.tick();
|
|
output = A.get_output();
|
|
BOOST_CHECK_EQUAL(output.messages.size(), 3);
|
|
BOOST_CHECK(std::holds_alternative<raft::read_quorum>(output.messages[0].second));
|
|
BOOST_CHECK(std::holds_alternative<raft::read_quorum>(output.messages[1].second));
|
|
BOOST_CHECK(std::holds_alternative<raft::read_quorum>(output.messages[2].second));
|
|
|
|
auto read_quorum_msg = std::get<raft::read_quorum>(output.messages[0].second);
|
|
// check that read id is correct
|
|
BOOST_CHECK_EQUAL(read_quorum_msg.id, rid->first);
|
|
|
|
// Check that a leader ignores read_barrier with its own term
|
|
A.step(B_id, std::move(read_quorum_msg));
|
|
output = A.get_output();
|
|
BOOST_CHECK_EQUAL(output.messages.size(), 0);
|
|
|
|
// Check that a follower replies to read_barrier with read_quorum_reply
|
|
B.step(A_id, std::move(read_quorum_msg));
|
|
output = B.get_output();
|
|
BOOST_CHECK_EQUAL(output.messages.size(), 1);
|
|
BOOST_CHECK(std::holds_alternative<raft::read_quorum_reply>(output.messages[0].second));
|
|
|
|
auto read_quorum_reply_msg = std::get<raft::read_quorum_reply>(output.messages[0].second);
|
|
|
|
// Ack barrier from B and check that this is not enough to complete a read
|
|
A.step(B_id, std::move(read_quorum_reply_msg));
|
|
output = A.get_output();
|
|
BOOST_CHECK(!output.max_read_id_with_quorum);
|
|
|
|
// Ack from B one more time and check that ack is not counted twice
|
|
A.step(B_id, std::move(read_quorum_reply_msg));
|
|
output = A.get_output();
|
|
BOOST_CHECK(!output.max_read_id_with_quorum);
|
|
|
|
// Ack from C and check that the read barrier is completed
|
|
A.step(C_id, std::move(read_quorum_reply_msg));
|
|
output = A.get_output();
|
|
BOOST_CHECK(output.max_read_id_with_quorum);
|
|
|
|
// Enter joint config
|
|
raft::configuration newcfg({A_id, E_id});
|
|
A.add_entry(newcfg);
|
|
// Process log storing event and drop append_entries messages
|
|
output = A.get_output();
|
|
|
|
// start read barrier
|
|
rid = A.start_read_barrier(A_id);
|
|
BOOST_CHECK(rid);
|
|
|
|
// check that read_barrier is broadcasted to all nodes
|
|
output = A.get_output();
|
|
BOOST_CHECK_EQUAL(output.messages.size(), 4);
|
|
BOOST_CHECK(std::holds_alternative<raft::read_quorum>(output.messages[0].second));
|
|
BOOST_CHECK(std::holds_alternative<raft::read_quorum>(output.messages[1].second));
|
|
BOOST_CHECK(std::holds_alternative<raft::read_quorum>(output.messages[2].second));
|
|
BOOST_CHECK(std::holds_alternative<raft::read_quorum>(output.messages[3].second));
|
|
|
|
// Ack in only old quorum and check that the read is not completed
|
|
A.step(B_id, read_quorum_reply{A.get_current_term(), index_t{0}, rid->first});
|
|
A.step(C_id, read_quorum_reply{A.get_current_term(), index_t{0}, rid->first});
|
|
A.step(D_id, read_quorum_reply{A.get_current_term(), index_t{0}, rid->first});
|
|
output = A.get_output();
|
|
BOOST_CHECK(!output.max_read_id_with_quorum);
|
|
|
|
// Ack in new config as well and see that it is committed now
|
|
A.step(E_id, read_quorum_reply{A.get_current_term(), index_t{0}, rid->first});
|
|
output = A.get_output();
|
|
BOOST_CHECK(output.max_read_id_with_quorum);
|
|
|
|
// check that read_barrier with lower term does not depose the leader
|
|
A.step(E_id, read_quorum{term_t{A.get_current_term() - 1}, index_t{10}, rid->first});
|
|
BOOST_CHECK(A.is_leader());
|
|
|
|
// check that read_barrier with higher term leads to leader
|
|
// step down
|
|
A.step(E_id, read_quorum{term_t{A.get_current_term() + 1}, index_t{10}, rid->first});
|
|
BOOST_CHECK(!A.is_leader());
|
|
|
|
// create one node cluster
|
|
raft::log log1(raft::snapshot_descriptor{.idx = raft::index_t{0}, .config = raft::configuration({A_id})});
|
|
auto AA = create_follower(A_id, log1);
|
|
// Make AA a leader
|
|
election_timeout(AA);
|
|
BOOST_CHECK(AA.is_leader());
|
|
output = AA.get_output();
|
|
|
|
// execute read barrier
|
|
rid = AA.start_read_barrier(A_id);
|
|
BOOST_CHECK(rid);
|
|
|
|
// check that it completes immediately
|
|
output = AA.get_output();
|
|
BOOST_CHECK(output.max_read_id_with_quorum);
|
|
}
|
|
|
|
BOOST_AUTO_TEST_CASE(test_append_entry_inside_snapshot) {
|
|
server_id A_id = id(), B_id = id(), C_id = id();
|
|
|
|
raft::log log(raft::snapshot_descriptor{.idx = index_t{0}, .config = raft::configuration{A_id, B_id, C_id}});
|
|
|
|
auto A = create_follower(A_id, log);
|
|
auto B = create_follower(B_id, log);
|
|
auto C = create_follower(C_id, log);
|
|
election_timeout(A);
|
|
communicate(A, B, C);
|
|
A.add_entry(log_entry::dummy{});
|
|
A.add_entry(log_entry::dummy{});
|
|
A.add_entry(log_entry::dummy{});
|
|
communicate(A, B, C);
|
|
|
|
// Add new entry and commit it with B
|
|
A.add_entry(log_entry::dummy{});
|
|
auto output = A.get_output();
|
|
BOOST_CHECK_EQUAL(output.messages.size(), 2);
|
|
auto append = std::get<raft::append_request>(output.messages.back().second);
|
|
B.step(A_id, std::move(append));
|
|
output = B.get_output();
|
|
BOOST_CHECK_EQUAL(output.messages.size(), 1);
|
|
auto reply = std::get<raft::append_reply>(output.messages.back().second);
|
|
A.step(B_id, std::move(reply)); // A commits last entry here
|
|
|
|
// propagate commit index to B
|
|
A.tick();
|
|
communicate(A, B);
|
|
|
|
// generate new message for C, first one will be empty
|
|
// so feed it back to A and get next one
|
|
A.tick();
|
|
output = A.get_output();
|
|
BOOST_CHECK_EQUAL(output.messages.size(), 1);
|
|
append = std::get<raft::append_request>(output.messages.back().second);
|
|
C.step(A_id, std::move(append));
|
|
output = C.get_output();
|
|
BOOST_CHECK_EQUAL(output.messages.size(), 1);
|
|
reply = std::get<raft::append_reply>(output.messages.back().second);
|
|
A.step(C_id, std::move(reply));
|
|
output = A.get_output();
|
|
BOOST_CHECK_EQUAL(output.messages.size(), 1);
|
|
append = std::get<raft::append_request>(output.messages.back().second);
|
|
|
|
// Now send it to C and ignore the reply
|
|
C.step(A_id, std::move(append));
|
|
(void)C.get_output();
|
|
// C snapshots the log
|
|
C.apply_snapshot(log_snapshot(C.get_log(), C.log_last_idx()), 0, true);
|
|
|
|
// Try to add one more entry
|
|
A.add_entry(log_entry::dummy{});
|
|
A.tick();
|
|
communicate(A, B, C);
|
|
BOOST_CHECK(!C.get_log().empty());
|
|
}
|
|
|
|
BOOST_AUTO_TEST_CASE(test_ping_leader) {
|
|
discrete_failure_detector fd;
|
|
server_id A_id = id(), B_id = id(), C_id = id();
|
|
raft::configuration cfg(raft::server_address_set{
|
|
raft::server_address{A_id},
|
|
raft::server_address{B_id},
|
|
raft::server_address{C_id, false}});
|
|
|
|
raft::log log(raft::snapshot_descriptor{.idx = index_t{0}, .config = cfg});
|
|
auto A = create_follower(A_id, log, fd);
|
|
auto B = create_follower(B_id, log, fd);
|
|
auto C = create_follower(C_id, log, fd);
|
|
election_timeout(A);
|
|
communicate(A, B, C);
|
|
BOOST_CHECK(A.is_leader());
|
|
// Check that non voter forgot a leader after election timeout.
|
|
// It does not have to be this way, but currently our impl behaves this
|
|
// way.
|
|
fd.mark_all_dead();
|
|
election_timeout(C);
|
|
BOOST_CHECK(!C.current_leader());
|
|
// Check that without any new input a node will not find out who leader is
|
|
// after network repairs.
|
|
fd.mark_all_alive();
|
|
communicate(A, B, C);
|
|
BOOST_CHECK(!C.current_leader());
|
|
// Check that is we request leader ping then a node is able to find out
|
|
// the leader after communicating with the cluster.
|
|
C.ping_leader();
|
|
C.tick();
|
|
communicate(A, B, C);
|
|
BOOST_CHECK(C.current_leader());
|
|
}
|