From 586ef8fc23375c46a7e42192aeba23acbe680eab Mon Sep 17 00:00:00 2001 From: Kamil Braun Date: Tue, 1 Feb 2022 17:34:49 +0100 Subject: [PATCH] service: raft: discovery: stop returning peer_list from `request` after becoming leader In `raft_group0::discover_group0`, when we detect that we became a leader, we destroy the `discovery` object, create a group 0, and respond with the group 0 information to all further requests. However there is a small time window after becoming a leader but before destroying the `discovery` object where we still answer to discovery requests by returning peer lists, without informing the requester that we become a leader. This is unsafe, and the algorithm specification does not allow this. For example, consider the seed graph 0 --> 1. It satisfies the property required by the algorithm, i.e. that there exists a vertex reachable from every other vertex. Now `1` can become a leader before `0` contacts it. When `0` contacts `1`, it should learn from `1` that `1` created a group 0, so `0` does not become a leader itself and create another group 0. However, with the current implementation, it may happen that `0` contacts `1` and receives a peer list (instead of group 0 information), and also becomes a leader because it has the smallest ID, so we end up with two group 0s. The correct thing to do is to stop returning peer lists to requests immediately after becoming a leader. This is what we fix in this commit. --- service/raft/discovery.cc | 5 ++++- service/raft/discovery.hh | 8 ++++---- service/raft/raft_group0.cc | 7 ++++++- test/raft/discovery_test.cc | 4 +++- 4 files changed, 17 insertions(+), 7 deletions(-) diff --git a/service/raft/discovery.cc b/service/raft/discovery.cc index 6dd0faf0bd..312acae8f0 100644 --- a/service/raft/discovery.cc +++ b/service/raft/discovery.cc @@ -93,8 +93,11 @@ void discovery::maybe_become_leader() { } } -discovery::peer_list discovery::request(const peer_list& peers) { +std::optional discovery::request(const peer_list& peers) { step(peers); + if (_is_leader) { + return std::nullopt; + } return _peer_list; } diff --git a/service/raft/discovery.hh b/service/raft/discovery.hh index 59cc3eb497..ef5efdba12 100644 --- a/service/raft/discovery.hh +++ b/service/raft/discovery.hh @@ -86,10 +86,10 @@ public: discovery(raft::server_address self, const peer_list& seeds); // To be used on the receiving peer to generate a reply - // while the discovery protocol is in progress. Always - // returns a peer list, even if this node is a leader, - // since leader state must be persisted first. - peer_list request(const peer_list& peers); + // while the discovery protocol is in progress. + // Until the node becomes a leader, returns the list of known peers. + // When (if) the node becomes a leader, returns std::nullopt. + std::optional request(const peer_list& peers); // Submit a reply from one of the peers to this discovery // state machine. If this node is a leader, resposne is diff --git a/service/raft/raft_group0.cc b/service/raft/raft_group0.cc index 750d28374e..47e4686594 100644 --- a/service/raft/raft_group0.cc +++ b/service/raft/raft_group0.cc @@ -299,7 +299,12 @@ future raft_group0::peer_exchange(discovery::peer_list pee co_return group0_peer_exchange{std::monostate{}}; } else if constexpr (std::is_same_v) { // Use discovery to produce a response - co_return group0_peer_exchange{d.request(std::move(peers))}; + if (auto response = d.request(std::move(peers))) { + co_return group0_peer_exchange{std::move(*response)}; + } + // We just became a leader. + // Eventually we'll answer with group0_info. + co_return group0_peer_exchange{std::monostate{}}; } else if constexpr (std::is_same_v) { // Even if in follower state, return own address: the // incoming RPC will then be bounced to the leader. diff --git a/test/raft/discovery_test.cc b/test/raft/discovery_test.cc index 31d4cce50e..f9ff12b94c 100644 --- a/test/raft/discovery_test.cc +++ b/test/raft/discovery_test.cc @@ -34,7 +34,9 @@ run_discovery_impl(discovery_network& network) { continue; } discovery& to = *(it->second); - from.response(m.first, to.request(m.second)); + if (auto peer_list = to.request(m.second)) { + from.response(m.first, std::move(*peer_list)); + } } } }