raft: refactor the voters api to allow enabling voters
The raft voters api implementation only allowed to make a node to be a non-voter, but for the "limited voters" feature we need to also have the option to make the node a voter (from within the topology coordinator). Modifying the api to allow both adding and removing voters. This in particular tries to simplify the API by not having to add another set of new functions to make a voter, but having a single setter that allows to modify the node configuration to either become a voter or a non-voter. Fixes: scylladb/scylladb#21914 Refs: scylladb/scylladb#18793 Closes scylladb/scylladb#21899
This commit is contained in:
committed by
Kamil Braun
parent
d719f423e5
commit
115005d863
@@ -547,17 +547,17 @@ future<> raft_group0::join_group0(std::vector<gms::inet_address> seeds, shared_p
|
||||
group0_log.info("server {} joined group 0 with group id {}", my_id, group0_id);
|
||||
}
|
||||
|
||||
shared_ptr<service::group0_handshaker> raft_group0::make_legacy_handshaker(bool can_vote) {
|
||||
shared_ptr<service::group0_handshaker> raft_group0::make_legacy_handshaker(can_vote can_vote) {
|
||||
struct legacy_handshaker : public group0_handshaker {
|
||||
service::raft_group0& _group0;
|
||||
netw::messaging_service& _ms;
|
||||
bool _can_vote;
|
||||
service::can_vote _can_vote;
|
||||
|
||||
legacy_handshaker(service::raft_group0& group0, netw::messaging_service& ms, bool can_vote)
|
||||
: _group0(group0)
|
||||
, _ms(ms)
|
||||
, _can_vote(can_vote)
|
||||
{}
|
||||
legacy_handshaker(service::raft_group0& group0, netw::messaging_service& ms, service::can_vote can_vote)
|
||||
: _group0(group0)
|
||||
, _ms(ms)
|
||||
, _can_vote(can_vote) {
|
||||
}
|
||||
|
||||
future<> pre_server_start(const group0_info& info) override {
|
||||
// Nothing to do in this step
|
||||
@@ -569,7 +569,8 @@ shared_ptr<service::group0_handshaker> raft_group0::make_legacy_handshaker(bool
|
||||
auto my_id = _group0.load_my_id();
|
||||
raft::server_address my_addr{my_id, {}};
|
||||
try {
|
||||
co_await ser::group0_rpc_verbs::send_group0_modify_config(&_ms, locator::host_id{g0_info.id.uuid()}, timeout, g0_info.group0_id, {{my_addr, _can_vote}}, {});
|
||||
co_await ser::group0_rpc_verbs::send_group0_modify_config(
|
||||
&_ms, locator::host_id{g0_info.id.uuid()}, timeout, g0_info.group0_id, {{my_addr, static_cast<bool>(_can_vote)}}, {});
|
||||
co_return true;
|
||||
} catch (std::runtime_error& e) {
|
||||
group0_log.warn("failed to modify config at peer {}: {}. Retrying.", g0_info.id, e.what());
|
||||
@@ -795,30 +796,31 @@ future<> raft_group0::become_nonvoter(abort_source& as, std::optional<raft_timeo
|
||||
auto my_id = load_my_id();
|
||||
group0_log.info("becoming a non-voter (my id = {})...", my_id);
|
||||
|
||||
co_await make_raft_config_nonvoter({my_id}, as, timeout);
|
||||
co_await modify_raft_voter_status({my_id}, can_vote::no, as, timeout);
|
||||
group0_log.info("became a non-voter.", my_id);
|
||||
}
|
||||
|
||||
future<> raft_group0::make_nonvoter(raft::server_id node, abort_source& as, std::optional<raft_timeout> timeout) {
|
||||
co_return co_await make_nonvoters({node}, as, timeout);
|
||||
future<> raft_group0::set_voter_status(raft::server_id node, can_vote can_vote, abort_source& as, std::optional<raft_timeout> timeout) {
|
||||
co_return co_await set_voters_status({node}, can_vote, as, timeout);
|
||||
}
|
||||
|
||||
future<> raft_group0::make_nonvoters(const std::unordered_set<raft::server_id>& nodes, abort_source& as,
|
||||
std::optional<raft_timeout> timeout)
|
||||
{
|
||||
future<> raft_group0::set_voters_status(
|
||||
const std::unordered_set<raft::server_id>& nodes, can_vote can_vote, abort_source& as, std::optional<raft_timeout> timeout) {
|
||||
if (!(co_await raft_upgrade_complete())) {
|
||||
on_internal_error(group0_log, "called make_nonvoters before Raft upgrade finished");
|
||||
on_internal_error(group0_log, "called set_voter_status before Raft upgrade finished");
|
||||
}
|
||||
|
||||
if (nodes.empty()) {
|
||||
co_return;
|
||||
}
|
||||
|
||||
group0_log.info("making servers {} non-voters...", nodes);
|
||||
const std::string_view status_str = can_vote ? "voters" : "non-voters";
|
||||
|
||||
co_await make_raft_config_nonvoter(nodes, as, timeout);
|
||||
group0_log.info("making servers {} {}...", nodes, status_str);
|
||||
|
||||
group0_log.info("servers {} are now non-voters.", nodes);
|
||||
co_await modify_raft_voter_status(nodes, can_vote, as, timeout);
|
||||
|
||||
group0_log.info("servers {} are now {}.", nodes, status_str);
|
||||
}
|
||||
|
||||
future<> raft_group0::leave_group0() {
|
||||
@@ -904,9 +906,8 @@ future<bool> raft_group0::wait_for_raft() {
|
||||
co_return true;
|
||||
}
|
||||
|
||||
future<> raft_group0::make_raft_config_nonvoter(const std::unordered_set<raft::server_id>& ids, abort_source& as,
|
||||
std::optional<raft_timeout> timeout)
|
||||
{
|
||||
future<> raft_group0::modify_raft_voter_status(
|
||||
const std::unordered_set<raft::server_id>& ids, can_vote can_vote, abort_source& as, std::optional<raft_timeout> timeout) {
|
||||
static constexpr auto max_retry_period = std::chrono::seconds{1};
|
||||
auto retry_period = std::chrono::milliseconds{10};
|
||||
|
||||
@@ -915,14 +916,15 @@ future<> raft_group0::make_raft_config_nonvoter(const std::unordered_set<raft::s
|
||||
|
||||
std::vector<raft::config_member> add;
|
||||
add.reserve(ids.size());
|
||||
std::transform(ids.begin(), ids.end(), std::back_inserter(add),
|
||||
[] (raft::server_id id) { return raft::config_member{{id, {}}, false}; });
|
||||
std::transform(ids.begin(), ids.end(), std::back_inserter(add), [can_vote] (raft::server_id id) {
|
||||
return raft::config_member{{id, {}}, static_cast<bool>(can_vote)};
|
||||
});
|
||||
|
||||
try {
|
||||
co_await _raft_gr.group0_with_timeouts().modify_config(std::move(add), {}, &as, timeout);
|
||||
co_return;
|
||||
} catch (const raft::commit_status_unknown& e) {
|
||||
group0_log.info("make_raft_config_nonvoter({}): modify_config returned \"{}\", retrying", ids, e);
|
||||
group0_log.info("modify_raft_voter_config({}): modify_config returned \"{}\", retrying", ids, e);
|
||||
}
|
||||
retry_period *= 2;
|
||||
if (retry_period > max_retry_period) {
|
||||
@@ -1649,7 +1651,7 @@ future<> raft_group0::do_upgrade_to_group0(group0_upgrade_state start_state, ser
|
||||
|
||||
if (!joined_group0()) {
|
||||
upgrade_log.info("Joining group 0...");
|
||||
auto handshaker = make_legacy_handshaker(true); // Voter
|
||||
auto handshaker = make_legacy_handshaker(can_vote::yes); // Voter
|
||||
co_await join_group0(co_await _sys_ks.load_peers(), std::move(handshaker), ss, qp, mm, _sys_ks, topology_change_enabled);
|
||||
} else {
|
||||
upgrade_log.info(
|
||||
|
||||
@@ -27,6 +27,9 @@ class migration_manager;
|
||||
class raft_group0_client;
|
||||
class storage_service;
|
||||
|
||||
struct can_vote_tag {};
|
||||
using can_vote = bool_class<can_vote_tag>;
|
||||
|
||||
// Wrapper for `discovery` which persists the learned peers on disk.
|
||||
class persistent_discovery {
|
||||
discovery _discovery;
|
||||
@@ -220,18 +223,17 @@ public:
|
||||
// `wait_for_raft` must've also been called earlier and returned `true`.
|
||||
future<> become_nonvoter(abort_source& as, std::optional<raft_timeout> timeout = std::nullopt);
|
||||
|
||||
// Make the given server, other than us, a non-voter in group 0.
|
||||
// Set the voter status of the given server, other than us, in group 0.
|
||||
//
|
||||
// Assumes we've finished the startup procedure (`setup_group0()` finished earlier).
|
||||
// `wait_for_raft` must've also been called earlier and returned `true`.
|
||||
future<> make_nonvoter(raft::server_id, abort_source&, std::optional<raft_timeout> timeout = std::nullopt);
|
||||
future<> set_voter_status(raft::server_id, can_vote, abort_source&, std::optional<raft_timeout> timeout = std::nullopt);
|
||||
|
||||
// Make the given servers, other than us, a non-voter in group 0.
|
||||
// Set the voter status of the given servers, other than us, in group 0.
|
||||
//
|
||||
// Assumes we've finished the startup procedure (`setup_group0()` finished earlier).
|
||||
// `wait_for_raft` must've also been called earlier and returned `true`.
|
||||
future<> make_nonvoters(const std::unordered_set<raft::server_id>&, abort_source&,
|
||||
std::optional<raft_timeout> timeout = std::nullopt);
|
||||
future<> set_voters_status(const std::unordered_set<raft::server_id>&, can_vote, abort_source&, std::optional<raft_timeout> timeout = std::nullopt);
|
||||
|
||||
// Remove ourselves from group 0.
|
||||
//
|
||||
@@ -271,7 +273,7 @@ public:
|
||||
// It is meant to be used as a fallback when a proper handshake procedure
|
||||
// cannot be used (e.g. when completing the upgrade or group0 procedures
|
||||
// or when joining an old cluster which does not support JOIN_NODE RPC).
|
||||
shared_ptr<group0_handshaker> make_legacy_handshaker(bool can_vote);
|
||||
shared_ptr<group0_handshaker> make_legacy_handshaker(can_vote can_vote);
|
||||
|
||||
// Waits until all upgrade to raft group 0 finishes and all nodes switched
|
||||
// to use_post_raft_procedures.
|
||||
@@ -376,9 +378,10 @@ private:
|
||||
// (we could then try restarting the server internally).
|
||||
future<> start_server_for_group0(raft::group_id group0_id, service::storage_service& ss, cql3::query_processor& qp, service::migration_manager& mm, bool topology_change_enabled);
|
||||
|
||||
// Make the given server a non-voter in Raft group 0 configuration.
|
||||
// Modify the given server voter status in Raft group 0 configuration.
|
||||
// Retries on raft::commit_status_unknown.
|
||||
future<> make_raft_config_nonvoter(const std::unordered_set<raft::server_id>&, abort_source& as, std::optional<raft_timeout> timeout = std::nullopt);
|
||||
future<> modify_raft_voter_status(
|
||||
const std::unordered_set<raft::server_id>&, can_vote, abort_source& as, std::optional<raft_timeout> timeout = std::nullopt);
|
||||
|
||||
// Returns true if raft is enabled
|
||||
future<bool> use_raft();
|
||||
|
||||
@@ -1812,7 +1812,7 @@ future<> storage_service::join_topology(sharded<db::system_distributed_keyspace>
|
||||
// if the node is bootstrapped the function will do nothing since we already created group0 in main.cc
|
||||
::shared_ptr<group0_handshaker> handshaker = raft_topology_change_enabled()
|
||||
? ::make_shared<join_node_rpc_handshaker>(*this, join_params)
|
||||
: _group0->make_legacy_handshaker(false);
|
||||
: _group0->make_legacy_handshaker(can_vote::no);
|
||||
co_await _group0->setup_group0(_sys_ks.local(), initial_contact_nodes, std::move(handshaker),
|
||||
raft_replace_info, *this, _qp, _migration_manager.local(), raft_topology_change_enabled());
|
||||
|
||||
@@ -4115,7 +4115,7 @@ future<> storage_service::raft_removenode(locator::host_id host_id, locator::hos
|
||||
}
|
||||
try {
|
||||
// Make non voter during request submission for better HA
|
||||
co_await _group0->make_nonvoters(ignored_ids, _group0_as, raft_timeout{});
|
||||
co_await _group0->set_voters_status(ignored_ids, can_vote::no, _group0_as, raft_timeout{});
|
||||
co_await _group0->client().add_entry(std::move(g0_cmd), std::move(guard), _group0_as, raft_timeout{});
|
||||
} catch (group0_concurrent_modification&) {
|
||||
rtlogger.info("removenode: concurrent operation is detected, retrying.");
|
||||
@@ -4198,7 +4198,7 @@ future<> storage_service::removenode(locator::host_id host_id, locator::host_id_
|
||||
// but before removing it group 0, group 0's availability won't be reduced.
|
||||
if (is_group0_member && ss._group0->is_member(raft_id, true)) {
|
||||
slogger.info("removenode[{}]: making node {} a non-voter in group 0", uuid, raft_id);
|
||||
ss._group0->make_nonvoter(raft_id, ss._group0_as).get();
|
||||
ss._group0->set_voter_status(raft_id, can_vote::no, ss._group0_as).get();
|
||||
slogger.info("removenode[{}]: made node {} a non-voter in group 0", uuid, raft_id);
|
||||
}
|
||||
|
||||
@@ -6859,7 +6859,7 @@ future<join_node_request_result> storage_service::join_node_request_handler(join
|
||||
|
||||
try {
|
||||
// Make replaced node and ignored nodes non voters earlier for better HA
|
||||
co_await _group0->make_nonvoters(ignored_nodes_from_join_params(params), _group0_as, raft_timeout{});
|
||||
co_await _group0->set_voters_status(ignored_nodes_from_join_params(params), can_vote::no, _group0_as, raft_timeout{});
|
||||
co_await _group0->client().add_entry(std::move(g0_cmd), std::move(guard), _group0_as, raft_timeout{});
|
||||
break;
|
||||
} catch (group0_concurrent_modification&) {
|
||||
|
||||
@@ -2139,7 +2139,7 @@ class topology_coordinator : public endpoint_lifecycle_subscriber {
|
||||
// FIXME: removenode may be aborted and the already dead node can be resurrected. We should consider
|
||||
// restoring its voter state on the recovery path.
|
||||
if (node.rs->state == node_state::removing) {
|
||||
co_await _group0.make_nonvoter(node.id, _as);
|
||||
co_await _group0.set_voter_status(node.id, can_vote::no, _as);
|
||||
}
|
||||
|
||||
// If we decommission a node when the number of nodes is even, we make it a non-voter early.
|
||||
@@ -2156,7 +2156,7 @@ class topology_coordinator : public endpoint_lifecycle_subscriber {
|
||||
"giving up leadership");
|
||||
co_await step_down_as_nonvoter();
|
||||
} else {
|
||||
co_await _group0.make_nonvoter(node.id, _as);
|
||||
co_await _group0.set_voter_status(node.id, can_vote::no, _as);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -2164,7 +2164,7 @@ class topology_coordinator : public endpoint_lifecycle_subscriber {
|
||||
// We make a replaced node a non-voter early, just like a removed node.
|
||||
auto replaced_node_id = parse_replaced_node(node.req_param);
|
||||
if (_group0.is_member(replaced_node_id, true)) {
|
||||
co_await _group0.make_nonvoter(replaced_node_id, _as);
|
||||
co_await _group0.set_voter_status(replaced_node_id, can_vote::no, _as);
|
||||
}
|
||||
}
|
||||
utils::get_local_injector().inject("crash_coordinator_before_stream", [] { abort(); });
|
||||
|
||||
Reference in New Issue
Block a user