raft: replication test: elect_new_leader wait log and pause

Do wait_log() for the next leader always in elect_new_leader.

Only wait log for new leader if it's connected to the old leader.

Pause and restart tickers when creating a candidate to avoid another
node to become a dueling candidate.

Remove pause and restart tickers around calls to elect_new_leader.

Signed-off-by: Alejo Sanchez <alejo.sanchez@scylladb.com>
This commit is contained in:
Alejo Sanchez
2021-05-08 21:53:19 -04:00
parent 52188016af
commit e9df914692

View File

@@ -748,6 +748,11 @@ future<> raft_cluster::elect_new_leader(size_t new_leader) {
if (new_leader != _leader) {
do {
if ((*_connected)(to_raft_id(_leader), to_raft_id(new_leader))) {
co_await wait_log(new_leader);
}
pause_tickers();
// Leader could be already partially disconnected, save current connectivity state
struct connected prev_disconnected = *_connected;
// Disconnect current leader from everyone
@@ -761,7 +766,9 @@ future<> raft_cluster::elect_new_leader(size_t new_leader) {
_connected->connect(to_raft_id(_leader));
// Disconnect old leader from all nodes except new leader
_connected->disconnect(to_raft_id(_leader), to_raft_id(new_leader));
restart_tickers();
co_await _servers[new_leader].server->wait_election_done();
// Restore connections to the original setting
*_connected = prev_disconnected;
} while (!_servers[new_leader].server->is_leader());
@@ -945,10 +952,7 @@ future<> run_test(test_case test, bool prevote, bool packet_drops) {
co_await rafts.add_entries(n);
} else if (std::holds_alternative<new_leader>(update)) {
unsigned next_leader = std::get<new_leader>(update).id;
co_await rafts.wait_log(next_leader);
rafts.pause_tickers();
co_await rafts.elect_new_leader(next_leader);
rafts.restart_tickers();
} else if (std::holds_alternative<partition>(update)) {
co_await rafts.partition(std::get<partition>(update));
} else if (std::holds_alternative<set_config>(update)) {
@@ -1344,9 +1348,7 @@ SEASTAR_TEST_CASE(rpc_leader_election) {
// Elect 2nd node a leader
constexpr size_t new_leader = 1;
rafts.pause_tickers();
co_await rafts.elect_new_leader(new_leader);
rafts.restart_tickers();
// Check that no attempts to update RPC were made.
for (size_t s = 0; s < rafts.size(); ++s) {
@@ -1446,9 +1448,7 @@ SEASTAR_TEST_CASE(rpc_configuration_truncate_restore_from_snp) {
BOOST_CHECK(rafts[0].rpc->known_peers() == extended_conf);
// Elect B as leader
rafts.pause_tickers();
co_await rafts.elect_new_leader(1);
rafts.restart_tickers();
// Heal network partition.
connected->connect_all();
@@ -1550,9 +1550,7 @@ SEASTAR_TEST_CASE(rpc_configuration_truncate_restore_from_log) {
BOOST_CHECK(rafts[0].rpc->known_peers() == committed_conf);
// Elect B as leader
rafts.pause_tickers();
co_await rafts.elect_new_leader(1);
rafts.restart_tickers();
// Heal network partition.
connected->connect_all();
@@ -1575,9 +1573,7 @@ SEASTAR_TEST_CASE(rpc_configuration_truncate_restore_from_log) {
//
// Elect A leader again.
rafts.pause_tickers();
co_await rafts.elect_new_leader(initial_leader);
rafts.restart_tickers();
co_await rafts.wait_log_all();
@@ -1609,9 +1605,7 @@ SEASTAR_TEST_CASE(rpc_configuration_truncate_restore_from_log) {
BOOST_CHECK(rafts[0].rpc->known_peers() == all_nodes);
// Elect B as leader
rafts.pause_tickers();
co_await rafts.elect_new_leader(1);
rafts.restart_tickers();
// Heal network partition.
connected->connect_all();