mirror of
https://github.com/scylladb/scylladb.git
synced 2026-05-02 22:25:48 +00:00
Introduce a special state machine used to to find a leader of an existing Raft cluster or create a new cluster. This state machine should be used when a new Scylla node has no persisted Raft Group 0 configuration. The algorithm is initialized with a list of seed IP addresses, IP address of this server, and, this server's Raft server id. The IP addresses are used to construct an initial list of peers. Then, the algorithm tries to contact each peer (excluding self) from its peer list and share the peer list with this peer, as well as get the peer's peer list. If this peer is already part of some Raft cluster, this information is also shared. On a response from a peer, the current peer's peer list is updated. The algorithm stops when all peers have exchanged peer information or one of the peers responds with id of a Raft group and Raft server address of the group leader. (If any of the peers fails to respond, the algorithm re-tries ad infinitum with a timeout). More formally, the algorithm stops when one of the following is true: - it finds an instance with initialized Raft Group 0, with a leader - all the peers have been contacted, and this server's Raft server id is the smallest among all contacted peers.
147 lines
4.3 KiB
C++
147 lines
4.3 KiB
C++
/*
|
|
* Copyright (C) 2021-present ScyllaDB
|
|
*/
|
|
|
|
/*
|
|
* This file is part of Scylla.
|
|
*
|
|
* Scylla is free software: you can redistribute it and/or modify
|
|
* it under the terms of the GNU Affero General Public License as published by
|
|
* the Free Software Foundation, either version 3 of the License, or
|
|
* (at your option) any later version.
|
|
*
|
|
* Scylla is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with Scylla. If not, see <http://www.gnu.org/licenses/>.
|
|
*/
|
|
#include "service/raft/discovery.hh"
|
|
|
|
namespace service {
|
|
|
|
void check_peer(const raft::server_address& peer) {
|
|
if (!peer.info.size()) {
|
|
throw std::logic_error("Discovery requires peer internet address to be set");
|
|
}
|
|
}
|
|
|
|
discovery::discovery(raft::server_address self, const peer_list& seeds)
|
|
: _self(std::move(self)) {
|
|
|
|
// self must have a non-empty Internet address
|
|
check_peer(_self);
|
|
for (const auto& addr : seeds) {
|
|
check_peer(addr);
|
|
}
|
|
_peer_list.push_back(_self);
|
|
|
|
step(seeds);
|
|
}
|
|
|
|
void discovery::step(const peer_list& peers) {
|
|
|
|
if (_is_leader) {
|
|
return;
|
|
}
|
|
|
|
peer_set new_peers;
|
|
// Set to true if we learned about a new peer or
|
|
// received Raft server ID for one of the seeds.
|
|
bool refresh_peer_list = false;
|
|
|
|
for (const auto& addr : peers) {
|
|
// peer must have a non-empty Internet address
|
|
if (addr.info == _self.info) {
|
|
// do not include _self into _peers
|
|
continue;
|
|
}
|
|
auto it = _peers.find(addr);
|
|
// Update peer information if it's a new peer or provides
|
|
// a Raft ID for an existing peer.
|
|
if (it == _peers.end() || it->id == raft::server_id{}) {
|
|
refresh_peer_list = true;
|
|
if (it == _peers.end()) {
|
|
_peers.emplace(addr);
|
|
new_peers.emplace(addr);
|
|
} else {
|
|
// Update Raft ID
|
|
_peers.erase(it);
|
|
_peers.emplace(addr);
|
|
}
|
|
} else {
|
|
// If we have this peer, its ID must be the
|
|
// same as we know (with the exceptions of seeds,
|
|
// for which servers might not know ids at first).
|
|
assert(it == _peers.end() || it->id == addr.id || addr.id == raft::server_id{});
|
|
}
|
|
}
|
|
if (refresh_peer_list) {
|
|
_peer_list = {_peers.begin(), _peers.end()};
|
|
_peer_list.push_back(_self);
|
|
}
|
|
maybe_become_leader();
|
|
if (_is_leader) {
|
|
return;
|
|
}
|
|
for (const auto& peer : new_peers) {
|
|
_requests.push_back(std::make_pair(peer, _peer_list));
|
|
}
|
|
}
|
|
|
|
void discovery::maybe_become_leader() {
|
|
/*
|
|
* _responded is a subset of _peers.
|
|
* When all contacted peers have responded, we're ready
|
|
* to choose a node with the smallest id for the leader.
|
|
*/
|
|
if (_responded.size() < _peers.size()) {
|
|
return;
|
|
}
|
|
auto min_id = std::min_element(_peer_list.begin(), _peer_list.end());
|
|
if (min_id != _peer_list.end() && min_id->id == _self.id) {
|
|
_is_leader = true;
|
|
}
|
|
}
|
|
|
|
discovery::peer_list discovery::request(const peer_list& peers) {
|
|
step(peers);
|
|
return _peer_list;
|
|
}
|
|
|
|
void discovery::response(raft::server_address from, const peer_list& peers) {
|
|
assert(_peers.contains(from));
|
|
_responded.emplace(from);
|
|
step(peers);
|
|
}
|
|
|
|
discovery::output discovery::get_output() {
|
|
if (_is_leader) {
|
|
return i_am_leader{};
|
|
} else if (!_requests.empty()) {
|
|
return std::move(_requests);
|
|
} else {
|
|
if (_responded.size() == _peers.size()) {
|
|
// All have responded, but we're not a leader.
|
|
// Try to find out who it is. Don't waste traffic on
|
|
// the peer list.
|
|
for (const auto& peer : _peers) {
|
|
_requests.push_back(std::make_pair(peer, peer_list{}));
|
|
}
|
|
} else {
|
|
// Contact new peers
|
|
for (const auto& peer : _peers) {
|
|
if (_responded.contains(peer)) {
|
|
continue;
|
|
}
|
|
_requests.push_back(std::make_pair(peer, _peer_list));
|
|
}
|
|
}
|
|
return pause{};
|
|
}
|
|
}
|
|
|
|
} // end of namespace raft
|