mirror of
https://github.com/scylladb/scylladb.git
synced 2026-05-23 16:22:15 +00:00
The only thing it does not change a bootstrapping node to become a voter in case the cluster does not support limited voters feature. But the feature was introduced in 2025.2 and direct upgrade from 2025.1 to version newer than 2026.1 is not supported. But even if such upgrade is done the removed code has affect only during bootstrap, not during regular boot. Also remove the upgrade test since after the patch suppressing the feature on the first boot will no longer behave correctly.
1036 lines
44 KiB
C++
1036 lines
44 KiB
C++
/*
|
|
* Copyright (C) 2021-present ScyllaDB
|
|
*/
|
|
|
|
/*
|
|
* SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.1
|
|
*/
|
|
#include <iterator>
|
|
#include <source_location>
|
|
#include <fmt/ranges.h>
|
|
|
|
#include "mutation/async_utils.hh"
|
|
#include "raft/raft.hh"
|
|
#include "service/raft/group0_fwd.hh"
|
|
#include "service/raft/raft_group0.hh"
|
|
#include "service/raft/raft_rpc.hh"
|
|
#include "service/raft/raft_sys_table_storage.hh"
|
|
#include "service/raft/group0_state_machine.hh"
|
|
#include "service/raft/raft_group0_client.hh"
|
|
|
|
#include "message/messaging_service.hh"
|
|
#include "cql3/query_processor.hh"
|
|
#include "cql3/untyped_result_set.hh"
|
|
#include "service/storage_proxy.hh"
|
|
#include "service/storage_service.hh"
|
|
#include "service/migration_manager.hh"
|
|
#include "service/direct_failure_detector/failure_detector.hh"
|
|
#include "gms/gossiper.hh"
|
|
#include "gms/feature_service.hh"
|
|
#include "db/config.hh"
|
|
#include "db/system_keyspace.hh"
|
|
#include "replica/database.hh"
|
|
#include "service/topology_mutation.hh"
|
|
#include "utils/assert.hh"
|
|
#include "utils/error_injection.hh"
|
|
|
|
#include <seastar/core/smp.hh>
|
|
#include <seastar/core/sleep.hh>
|
|
#include <seastar/core/coroutine.hh>
|
|
#include <seastar/core/with_scheduling_group.hh>
|
|
#include <seastar/coroutine/as_future.hh>
|
|
#include <seastar/util/log.hh>
|
|
#include <seastar/util/defer.hh>
|
|
#include <seastar/rpc/rpc_types.hh>
|
|
#include <stdexcept>
|
|
#include <csignal>
|
|
#include <unordered_set>
|
|
|
|
#include "idl/group0.dist.hh"
|
|
#include "idl/migration_manager.dist.hh"
|
|
|
|
// Used to implement 'wait for any task to finish'.
|
|
//
|
|
// Pass a copy of this object to each task in a set of tasks.
|
|
// Once a task finishes, it should call `set_value` or `set_exception`.
|
|
//
|
|
// Call `get()` to wait for the result of the first task that finishes.
|
|
// Note that the results of all other tasks will be lost.
|
|
//
|
|
// There can be at most one `get()` call.
|
|
//
|
|
// Make sure that there is at least one task that reaches `set_value` or `set_exception`;
|
|
// otherwise `get()` would hang indefinitely.
|
|
template <typename T>
|
|
requires std::is_nothrow_move_constructible_v<T>
|
|
class tracker {
|
|
struct shared {
|
|
bool is_set{false};
|
|
promise<T> p{};
|
|
};
|
|
|
|
lw_shared_ptr<shared> _shared{make_lw_shared<shared>()};
|
|
|
|
public:
|
|
bool finished() {
|
|
return _shared->is_set;
|
|
}
|
|
|
|
void set_value(T&& v) {
|
|
if (!_shared->is_set) {
|
|
_shared->p.set_value(std::move(v));
|
|
_shared->is_set = true;
|
|
}
|
|
}
|
|
|
|
void set_exception(std::exception_ptr ep) {
|
|
if (!_shared->is_set) {
|
|
_shared->p.set_exception(std::move(ep));
|
|
_shared->is_set = true;
|
|
}
|
|
}
|
|
|
|
future<T> get() {
|
|
return _shared->p.get_future();
|
|
}
|
|
};
|
|
|
|
namespace service {
|
|
|
|
static logging::logger group0_log("raft_group0");
|
|
static logging::logger upgrade_log("raft_group0_upgrade");
|
|
|
|
namespace {
|
|
|
|
constexpr std::chrono::milliseconds default_retry_period{10}; // 10 milliseconds
|
|
constexpr std::chrono::seconds default_max_retry_period{1}; // 1 second
|
|
constexpr std::chrono::seconds default_max_total_timeout{300}; // 5 minutes
|
|
|
|
enum class operation_result : uint8_t { success, failure };
|
|
|
|
future<> run_op_with_retry(abort_source& as, auto&& op, const sstring op_name,
|
|
const std::optional<std::chrono::seconds> max_total_timeout = default_max_total_timeout, std::chrono::milliseconds retry_period = default_retry_period,
|
|
const std::chrono::seconds max_retry_period = default_max_retry_period) {
|
|
const auto start = lowres_clock::now();
|
|
while (true) {
|
|
as.check();
|
|
const operation_result result = co_await op();
|
|
if (result == operation_result::success) {
|
|
co_return;
|
|
}
|
|
|
|
if (max_total_timeout) {
|
|
const auto elapsed = lowres_clock::now() - start;
|
|
if (elapsed > *max_total_timeout) {
|
|
on_internal_error(group0_log,
|
|
format("{} timed out after retrying for {} seconds", op_name, std::chrono::duration_cast<std::chrono::seconds>(elapsed).count()));
|
|
}
|
|
}
|
|
|
|
retry_period *= 2;
|
|
if (retry_period > max_retry_period) {
|
|
retry_period = max_retry_period;
|
|
}
|
|
co_await sleep_abortable(retry_period, as);
|
|
}
|
|
std::unreachable();
|
|
}
|
|
|
|
} // namespace
|
|
|
|
// TODO: change the links from master to stable/5.2 after 5.2 is released
|
|
const char* const raft_upgrade_doc = "https://docs.scylladb.com/master/architecture/raft.html#verifying-that-the-internal-raft-upgrade-procedure-finished-successfully";
|
|
static const auto raft_manual_recovery_doc = "https://docs.scylladb.com/master/architecture/raft.html#raft-manual-recovery-procedure";
|
|
|
|
// {{{ group0_rpc Maintain failure detector subscription whenever
|
|
// group 0 configuration changes.
|
|
|
|
class group0_rpc: public service::raft_rpc {
|
|
direct_failure_detector::failure_detector& _direct_fd;
|
|
public:
|
|
explicit group0_rpc(direct_failure_detector::failure_detector& direct_fd,
|
|
raft_state_machine& sm, netw::messaging_service& ms,
|
|
shared_ptr<raft::failure_detector> raft_fd, raft::group_id gid, raft::server_id srv_id)
|
|
: raft_rpc(sm, ms, std::move(raft_fd), gid, srv_id)
|
|
, _direct_fd(direct_fd)
|
|
{}
|
|
|
|
virtual void on_configuration_change(raft::server_address_set add, raft::server_address_set del) override {
|
|
for (const auto& addr: add) {
|
|
// Notify the direct failure detector that it should track
|
|
// (or liveness of a specific raft server id.
|
|
if (addr != _my_id) {
|
|
// No need to ping self to know it's alive
|
|
_direct_fd.add_endpoint(addr.id.id);
|
|
}
|
|
}
|
|
for (const auto& addr: del) {
|
|
_direct_fd.remove_endpoint(addr.id.id);
|
|
}
|
|
}
|
|
};
|
|
|
|
// }}} group0_rpc
|
|
|
|
raft_group0::raft_group0(seastar::abort_source& abort_source,
|
|
raft_group_registry& raft_gr,
|
|
sharded<netw::messaging_service>& ms,
|
|
gms::gossiper& gs,
|
|
gms::feature_service& feat,
|
|
raft_group0_client& client,
|
|
seastar::scheduling_group sg)
|
|
: _shutdown_gate("raft_group0::shutdown")
|
|
, _abort_source(abort_source), _raft_gr(raft_gr), _ms(ms), _gossiper(gs), _feat(feat), _client(client), _sg(sg)
|
|
, _status_for_monitoring(status_for_monitoring::normal)
|
|
{
|
|
register_metrics();
|
|
}
|
|
|
|
future<> raft_group0::start() {
|
|
return smp::invoke_on_all([shard0_this=this]() {
|
|
init_rpc_verbs(*shard0_this);
|
|
});
|
|
}
|
|
|
|
void raft_group0::init_rpc_verbs(raft_group0& shard0_this) {
|
|
ser::group0_rpc_verbs::register_group0_peer_exchange(&shard0_this._ms.local(),
|
|
[&shard0_this] (const rpc::client_info&, rpc::opt_time_point, discovery::peer_list peers) {
|
|
return smp::submit_to(0, [&shard0_this, peers = std::move(peers)]() mutable {
|
|
return shard0_this.peer_exchange(std::move(peers));
|
|
});
|
|
});
|
|
|
|
ser::group0_rpc_verbs::register_group0_modify_config(&shard0_this._ms.local(),
|
|
[&shard0_this] (const rpc::client_info&, rpc::opt_time_point, raft::group_id gid, std::vector<raft::config_member> add, std::vector<raft::server_id> del) {
|
|
return smp::submit_to(0, [&shard0_this, gid, add = std::move(add), del = std::move(del)]() mutable {
|
|
return shard0_this._raft_gr.get_server(gid).modify_config(std::move(add), std::move(del), nullptr);
|
|
});
|
|
});
|
|
}
|
|
|
|
future<> raft_group0::uninit_rpc_verbs(netw::messaging_service& ms) {
|
|
return when_all_succeed(
|
|
ser::group0_rpc_verbs::unregister_group0_peer_exchange(&ms),
|
|
ser::group0_rpc_verbs::unregister_group0_modify_config(&ms)
|
|
).discard_result();
|
|
}
|
|
|
|
const raft::server_id& raft_group0::load_my_id() {
|
|
return _raft_gr.get_my_raft_id();
|
|
}
|
|
|
|
raft_server_for_group raft_group0::create_server_for_group0(raft::group_id gid, raft::server_id my_id, service::storage_service& ss, cql3::query_processor& qp,
|
|
service::migration_manager& mm) {
|
|
auto state_machine = std::make_unique<group0_state_machine>(
|
|
_client, mm, qp.proxy(), ss, _gossiper, _feat);
|
|
auto& state_machine_ref = *state_machine;
|
|
auto rpc = std::make_unique<group0_rpc>(_raft_gr.direct_fd(), *state_machine, _ms.local(), _raft_gr.failure_detector(), gid, my_id);
|
|
// Keep a reference to a specific RPC class.
|
|
auto& rpc_ref = *rpc;
|
|
auto storage = std::make_unique<raft_sys_table_storage>(qp, gid, my_id);
|
|
auto& persistence_ref = *storage;
|
|
auto* cl = qp.proxy().get_db().local().schema_commitlog();
|
|
auto config = raft::server::configuration {
|
|
.on_background_error = [gid, this](std::exception_ptr e) {
|
|
// The future will be waited indirectly in raft_group0::abort_and_drain.
|
|
(void)_raft_gr.abort_server(gid, fmt::format("background error, {}", e));
|
|
_status_for_monitoring = status_for_monitoring::aborted;
|
|
}
|
|
};
|
|
if (cl) {
|
|
// Dividing by two is to protect against paddings that the
|
|
// commit log can add for each mutation, as well as
|
|
// against different commit log limits on different nodes.
|
|
config.max_command_size = cl->max_record_size() / 2;
|
|
config.max_log_size = 3 * config.max_command_size;
|
|
config.snapshot_threshold_log_size = config.max_log_size / 2;
|
|
config.snapshot_trailing_size = config.snapshot_threshold_log_size / 2;
|
|
};
|
|
auto server = raft::create_server(my_id, std::move(rpc), std::move(state_machine),
|
|
std::move(storage), _raft_gr.failure_detector(), config);
|
|
|
|
// initialize the corresponding timer to tick the raft server instance
|
|
auto ticker = std::make_unique<raft_ticker_type>([srv = server.get()] { srv->tick(); });
|
|
return raft_server_for_group{
|
|
.gid = std::move(gid),
|
|
.server = std::move(server),
|
|
.ticker = std::move(ticker),
|
|
.rpc = rpc_ref,
|
|
.persistence = persistence_ref,
|
|
.state_machine = state_machine_ref,
|
|
.default_op_timeout_in_ms = qp.proxy().get_db().local().get_config().group0_raft_op_timeout_in_ms
|
|
};
|
|
}
|
|
|
|
future<group0_info>
|
|
raft_group0::discover_group0(const std::vector<gms::inet_address>& seeds, cql3::query_processor& qp) {
|
|
auto my_id = load_my_id();
|
|
discovery::peer_list peers;
|
|
for (auto& ip: seeds) {
|
|
if (ip != _gossiper.get_broadcast_address()) {
|
|
peers.emplace_back(discovery_peer{raft::server_id{}, ip});
|
|
}
|
|
}
|
|
discovery_peer my_addr = {my_id, _gossiper.get_broadcast_address()};
|
|
|
|
auto& p_discovery = _group0.emplace<persistent_discovery>(co_await persistent_discovery::make(my_addr, std::move(peers), qp));
|
|
co_return co_await futurize_invoke([this, &p_discovery, my_addr = std::move(my_addr)] () mutable {
|
|
return p_discovery.run(_ms.local(), _shutdown_gate.hold(), _abort_source, std::move(my_addr));
|
|
}).finally(std::bind_front([] (raft_group0& self, persistent_discovery& p_discovery) -> future<> {
|
|
co_await p_discovery.stop();
|
|
self._group0 = std::monostate{};
|
|
}, std::ref(*this), std::ref(p_discovery)));
|
|
}
|
|
|
|
static constexpr auto DISCOVERY_KEY = "peers";
|
|
|
|
static future<discovery::peer_list> load_discovered_peers(cql3::query_processor& qp) {
|
|
static const auto load_cql = format(
|
|
"SELECT ip_addr, raft_server_id FROM system.{} WHERE key = '{}'",
|
|
db::system_keyspace::DISCOVERY, DISCOVERY_KEY);
|
|
auto rs = co_await qp.execute_internal(load_cql, cql3::query_processor::cache_internal::yes);
|
|
SCYLLA_ASSERT(rs);
|
|
|
|
discovery::peer_list peers;
|
|
for (auto& r: *rs) {
|
|
peers.push_back({
|
|
raft::server_id{r.get_as<utils::UUID>("raft_server_id")},
|
|
gms::inet_address{r.get_as<net::inet_address>("ip_addr")}
|
|
});
|
|
}
|
|
|
|
co_return peers;
|
|
}
|
|
|
|
static mutation make_discovery_mutation(discovery::peer_list peers) {
|
|
auto s = db::system_keyspace::discovery();
|
|
auto ts = api::new_timestamp();
|
|
auto raft_id_cdef = s->get_column_definition("raft_server_id");
|
|
SCYLLA_ASSERT(raft_id_cdef);
|
|
|
|
mutation m(s, partition_key::from_singular(*s, DISCOVERY_KEY));
|
|
for (auto& p: peers) {
|
|
auto& row = m.partition().clustered_row(*s, clustering_key::from_singular(*s, data_value(p.ip_addr)));
|
|
row.apply(row_marker(ts));
|
|
row.cells().apply(*raft_id_cdef, atomic_cell::make_live(*raft_id_cdef->type, ts, raft_id_cdef->type->decompose(p.id.id)));
|
|
}
|
|
|
|
return m;
|
|
}
|
|
|
|
static future<> store_discovered_peers(cql3::query_processor& qp, discovery::peer_list peers) {
|
|
return qp.proxy().mutate_locally({make_discovery_mutation(std::move(peers))}, tracing::trace_state_ptr{});
|
|
}
|
|
|
|
future<group0_info> persistent_discovery::run(
|
|
netw::messaging_service& ms,
|
|
gate::holder pause_shutdown,
|
|
abort_source& as,
|
|
discovery_peer my_addr) {
|
|
// Send peer information to all known peers. If replies
|
|
// discover new peers, send peer information to them as well.
|
|
// As soon as we get a Raft Group 0 member information from
|
|
// any peer, return it. If there is no Group 0, collect
|
|
// replies from all peers, then, if this server has the smallest
|
|
// id, make a new Group 0 with this server as the only member.
|
|
// Otherwise sleep and keep pinging peers till some other node
|
|
// creates a group and shares its group 0 id and peer address
|
|
// with us.
|
|
while (true) {
|
|
auto output = co_await tick();
|
|
|
|
if (std::holds_alternative<discovery::i_am_leader>(output)) {
|
|
co_return group0_info{
|
|
// Time-based ordering for groups identifiers may be
|
|
// useful to provide linearisability between group
|
|
// operations. Currently it's unused.
|
|
.group0_id = raft::group_id{utils::UUID_gen::get_time_UUID()},
|
|
.id = my_addr.id,
|
|
.ip_addr = my_addr.ip_addr
|
|
};
|
|
}
|
|
|
|
if (std::holds_alternative<discovery::pause>(output)) {
|
|
group0_log.trace("server {} pausing discovery...", my_addr.id);
|
|
co_await seastar::sleep_abortable(std::chrono::milliseconds{100}, as);
|
|
continue;
|
|
}
|
|
|
|
::tracker<std::optional<group0_info>> tracker;
|
|
(void)[] (persistent_discovery& self, netw::messaging_service& ms, gate::holder pause_shutdown,
|
|
discovery::request_list request_list, ::tracker<std::optional<group0_info>> tracker) -> future<> {
|
|
auto timeout = db::timeout_clock::now() + std::chrono::milliseconds{1000};
|
|
co_await parallel_for_each(request_list, [&] (std::pair<discovery_peer, discovery::peer_list>& req) -> future<> {
|
|
netw::msg_addr peer(req.first.ip_addr);
|
|
group0_log.trace("sending discovery message to {}", peer);
|
|
try {
|
|
auto reply = co_await ser::group0_rpc_verbs::send_group0_peer_exchange(&ms, peer, timeout, std::move(req.second));
|
|
|
|
if (tracker.finished()) {
|
|
// Another peer was used to discover group 0 before us.
|
|
co_return;
|
|
}
|
|
|
|
if (auto peer_list = std::get_if<discovery::peer_list>(&reply.info)) {
|
|
// `tracker.finished()` is false so `run_discovery` hasn't exited yet, still safe to access `self`.
|
|
self.response(req.first, std::move(*peer_list));
|
|
} else if (auto g0_info = std::get_if<group0_info>(&reply.info)) {
|
|
tracker.set_value(std::move(*g0_info));
|
|
}
|
|
} catch (std::exception& e) {
|
|
if (dynamic_cast<std::runtime_error*>(&e)) {
|
|
group0_log.trace("failed to send message: {}", e);
|
|
} else {
|
|
tracker.set_exception(std::current_exception());
|
|
}
|
|
}
|
|
});
|
|
|
|
// In case we haven't discovered group 0 yet - need to run another iteration.
|
|
tracker.set_value(std::nullopt);
|
|
}(std::ref(*this), ms, pause_shutdown, std::move(std::get<discovery::request_list>(output)), tracker);
|
|
|
|
if (auto g0_info = co_await tracker.get()) {
|
|
co_return *g0_info;
|
|
}
|
|
}
|
|
}
|
|
|
|
future<> raft_group0::abort_and_drain() {
|
|
if (!_aborted) {
|
|
// Async lambdas are destroyed at the first co_await,
|
|
// so accessing lambda-local state (like 'this') afterward would result
|
|
// in use-after-free. To avoid that, we delegate to a helper function,
|
|
// do_abort_and_drain().
|
|
|
|
_aborted = futurize_invoke([this]() { return do_abort_and_drain(); });
|
|
}
|
|
return _aborted->get_future();
|
|
}
|
|
|
|
future<> raft_group0::do_abort_and_drain() {
|
|
group0_log.debug("Aborting raft group0 service...");
|
|
|
|
// abort_server() may already be running in the background if triggered by the
|
|
// on_background_error callback. We wait for that to complete. This code
|
|
// shouldn't normally throw, but we wrap it in try/catch just in case, to ensure
|
|
// we still wait for the background abort.
|
|
|
|
try {
|
|
co_await smp::invoke_on_all([this]() {
|
|
return uninit_rpc_verbs(_ms.local());
|
|
});
|
|
|
|
_leadership_monitor_as.request_abort();
|
|
|
|
co_await _shutdown_gate.close();
|
|
|
|
co_await std::move(_leadership_monitor);
|
|
} catch (...) {
|
|
rslog.warn("Failed to abort raft group0: {}", std::current_exception());
|
|
}
|
|
|
|
if (auto* group0_id = std::get_if<raft::group_id>(&_group0)) {
|
|
co_await _raft_gr.abort_server(*group0_id, "raft group0 is aborted");
|
|
}
|
|
|
|
group0_log.debug("Raft group0 service aborted");
|
|
}
|
|
|
|
void raft_group0::destroy() {
|
|
if (auto* group0_id = std::get_if<raft::group_id>(&_group0)) {
|
|
_raft_gr.destroy_server(*group0_id);
|
|
}
|
|
}
|
|
|
|
future<> raft_group0::start_server_for_group0(raft::group_id group0_id, service::storage_service& ss, cql3::query_processor& qp, service::migration_manager& mm) {
|
|
SCYLLA_ASSERT(group0_id != raft::group_id{});
|
|
// The address map may miss our own id in case we connect
|
|
// to an existing Raft Group 0 leader.
|
|
auto my_id = load_my_id();
|
|
group0_log.info("Server {} is starting group 0 with id {}", my_id, group0_id);
|
|
auto srv_for_group0 = create_server_for_group0(group0_id, my_id, ss, qp, mm);
|
|
auto& persistence = srv_for_group0.persistence;
|
|
auto& server = *srv_for_group0.server;
|
|
co_await with_scheduling_group(_sg, [this, &srv_for_group0, group0_id] (this auto self) -> future<> {
|
|
auto& state_machine = dynamic_cast<group0_state_machine&>(srv_for_group0.state_machine);
|
|
co_await _raft_gr.start_server_for_group(std::move(srv_for_group0));
|
|
// Set _group0 immediately after the server is registered in _raft_gr._servers.
|
|
// This ensures abort_and_drain()/destroy() can find and clean up the server
|
|
// even if enable_in_memory_state_machine() or later steps throw.
|
|
_group0.emplace<raft::group_id>(group0_id);
|
|
co_await state_machine.enable_in_memory_state_machine();
|
|
});
|
|
|
|
// Fix for scylladb/scylladb#16683:
|
|
// If the snapshot index is 0, trigger creation of a new snapshot
|
|
// so bootstrapping nodes will receive a snapshot transfer.
|
|
auto snap = co_await persistence.load_snapshot_descriptor();
|
|
if (snap.idx == raft::index_t{0}) {
|
|
group0_log.info("Detected snapshot with index=0, id={}, triggering new snapshot", snap.id);
|
|
bool created = co_await server.trigger_snapshot(&_abort_source);
|
|
if (created) {
|
|
snap = co_await persistence.load_snapshot_descriptor();
|
|
group0_log.info("New snapshot created, index={} id={}", snap.idx, snap.id);
|
|
} else {
|
|
group0_log.warn("Could not create new snapshot, there are no entries applied");
|
|
}
|
|
}
|
|
}
|
|
|
|
future<> raft_group0::leadership_monitor_fiber() {
|
|
try {
|
|
auto sub = _abort_source.subscribe([&] () noexcept {
|
|
if (!_leadership_monitor_as.abort_requested()) {
|
|
_leadership_monitor_as.request_abort();
|
|
}
|
|
});
|
|
|
|
auto holder = hold_group0_gate();
|
|
while (true) {
|
|
while (!group0_server().is_leader()) {
|
|
co_await group0_server().wait_for_state_change(&_leadership_monitor_as);
|
|
}
|
|
group0_log.info("gaining leadership");
|
|
_leadership_observable.set(true);
|
|
co_await group0_server().wait_for_state_change(&_leadership_monitor_as);
|
|
group0_log.info("losing leadership");
|
|
_leadership_observable.set(false);
|
|
}
|
|
} catch (...) {
|
|
group0_log.debug("leadership_monitor_fiber aborted with {}", std::current_exception());
|
|
}
|
|
}
|
|
|
|
utils::observer<bool> raft_group0::observe_leadership(std::function<void(bool)> cb) {
|
|
if (_leadership_observable.get()) {
|
|
cb(true);
|
|
}
|
|
return _leadership_observable.observe(cb);
|
|
}
|
|
|
|
future<> raft_group0::join_group0(std::vector<gms::inet_address> seeds, shared_ptr<service::group0_handshaker> handshaker, service::storage_service& ss, cql3::query_processor& qp, service::migration_manager& mm,
|
|
db::system_keyspace& sys_ks, const join_node_request_params& params) {
|
|
SCYLLA_ASSERT(this_shard_id() == 0);
|
|
SCYLLA_ASSERT(!joined_group0());
|
|
|
|
auto group0_id = raft::group_id{co_await sys_ks.get_raft_group0_id()};
|
|
if (group0_id) {
|
|
// Group 0 ID present means we've already joined group 0 before.
|
|
co_return co_await start_server_for_group0(group0_id, ss, qp, mm);
|
|
}
|
|
|
|
raft::server* server = nullptr;
|
|
auto my_id = load_my_id();
|
|
group0_log.info("server {} found no local group 0. Discovering...", my_id);
|
|
while (true) {
|
|
auto g0_info = co_await discover_group0(seeds, qp);
|
|
group0_log.info("server {} found group 0 with group id {}, leader {}", my_id, g0_info.group0_id, g0_info.id);
|
|
|
|
if (server && group0_id != g0_info.group0_id) {
|
|
// `server` is not `nullptr` so we finished discovery in an earlier iteration and found a group 0 ID.
|
|
// But in this iteration it's different. That shouldn't be possible.
|
|
on_internal_error(group0_log, format(
|
|
"The Raft discovery algorithm returned two different group IDs on subsequent runs: {} and {}."
|
|
" Cannot proceed due to possible inconsistency problems."
|
|
" If you're bootstrapping a fresh cluster, make sure that every node uses the same seeds configuration, then retry."
|
|
" If this is happening after upgrade, please report a bug, then try following the manual recovery procedure: {}",
|
|
group0_id, g0_info.group0_id, raft_manual_recovery_doc));
|
|
}
|
|
group0_id = g0_info.group0_id;
|
|
raft::server_address my_addr{my_id, {}};
|
|
|
|
bool starting_server_as_follower = false;
|
|
if (server == nullptr) {
|
|
// This is the first time discovery is run. Create and start a Raft server for group 0 on this node.
|
|
raft::configuration initial_configuration;
|
|
bool nontrivial_snapshot = false;
|
|
if (g0_info.id == my_id) {
|
|
// We were chosen as the discovery leader.
|
|
// We should start a new group with this node as voter.
|
|
group0_log.info("Server {} chosen as discovery leader; bootstrapping group 0 from scratch", my_id);
|
|
initial_configuration.current.emplace(my_addr, raft::is_voter::yes);
|
|
|
|
// Initializes system tables for the first group 0 member. Nodes joining group 0 henceforth would apply them via snapshots.
|
|
// We should not change system tables on the recovery leader (the discovery leader of the new group 0
|
|
// created in the Raft-based recovery procedure). The persistent topology state is present on that node
|
|
// when it creates the new group 0. Also, it joins the new group 0 using legacy_handshaker, so there is
|
|
// no need to create a join request.
|
|
if (!qp.db().get_config().recovery_leader.is_set()) {
|
|
co_await ss.raft_initialize_discovery_leader(params);
|
|
}
|
|
|
|
// Force snapshot transfer from us to subsequently joining servers.
|
|
// This is important for upgrade and recovery, where the group 0 state machine
|
|
// (schema tables in particular) is nonempty.
|
|
// In case of fresh cluster with raft topology enabled, this will trigger a snapshot transfer which propagates initial
|
|
// topology state (created in raft_initialize_discovery_leader above). Otherwise, with raft topology disabled, this will
|
|
// trigger an empty snapshot transfer.
|
|
nontrivial_snapshot = true;
|
|
} else {
|
|
starting_server_as_follower = true;
|
|
co_await handshaker->pre_server_start(g0_info);
|
|
}
|
|
|
|
utils::get_local_injector().inject("stop_after_sending_join_node_request",
|
|
[] { std::raise(SIGSTOP); });
|
|
|
|
// Populates correct upgrade state value before starting raft server, so that reads always get correct values.
|
|
co_await ss.initialize_done_topology_upgrade_state();
|
|
|
|
// Bootstrap the initial configuration
|
|
co_await raft_sys_table_storage(qp, group0_id, my_id)
|
|
.bootstrap(std::move(initial_configuration), nontrivial_snapshot);
|
|
|
|
utils::get_local_injector().inject("stop_after_bootstrapping_initial_raft_configuration",
|
|
[] { std::raise(SIGSTOP); });
|
|
|
|
co_await start_server_for_group0(group0_id, ss, qp, mm);
|
|
server = &_raft_gr.group0();
|
|
// FIXME if we crash now or after getting added to the config but before storing group 0 ID,
|
|
// we'll end with a bootstrapped server that possibly added some entries, but we won't remember that we have such a server
|
|
// after we restart. Then we'll call `persistence.bootstrap` again after restart which will overwrite our snapshot, leading to
|
|
// possibly incorrect state. One way of handling this may be changing `persistence.bootstrap` so it checks if any persistent
|
|
// state is present, and if it is, do nothing.
|
|
}
|
|
|
|
SCYLLA_ASSERT(server);
|
|
co_await utils::get_local_injector().inject("join_group0_pause_before_config_check",
|
|
utils::wait_for_message(std::chrono::minutes{5}));
|
|
if (!starting_server_as_follower && server->get_configuration().contains(my_id)) {
|
|
// True if we started a new group or completed a configuration change initiated earlier.
|
|
group0_log.info("server {} already in group 0 (id {}) as {}", my_id, group0_id,
|
|
server->get_configuration().can_vote(my_id)? "voter" : "non-voter");
|
|
break;
|
|
}
|
|
|
|
if (co_await handshaker->post_server_start(g0_info, _abort_source)) {
|
|
break;
|
|
}
|
|
|
|
// Try again after a pause
|
|
co_await seastar::sleep_abortable(std::chrono::milliseconds{1000}, _abort_source);
|
|
}
|
|
co_await sys_ks.set_raft_group0_id(group0_id.id);
|
|
// Allow peer_exchange() RPC to access group 0 only after group0_id is persisted.
|
|
|
|
_group0 = group0_id;
|
|
|
|
co_await _gossiper.container().invoke_on_all([group0_id = group0_id.uuid()] (auto& gossiper) {
|
|
gossiper.set_group0_id(group0_id);
|
|
return make_ready_future<>();
|
|
});
|
|
|
|
group0_log.info("server {} joined group 0 with group id {}", my_id, group0_id);
|
|
}
|
|
|
|
shared_ptr<service::group0_handshaker> raft_group0::make_legacy_handshaker(raft::is_voter can_vote) {
|
|
struct legacy_handshaker : public group0_handshaker {
|
|
service::raft_group0& _group0;
|
|
netw::messaging_service& _ms;
|
|
raft::is_voter _can_vote;
|
|
|
|
legacy_handshaker(service::raft_group0& group0, netw::messaging_service& ms, raft::is_voter can_vote)
|
|
: _group0(group0)
|
|
, _ms(ms)
|
|
, _can_vote(can_vote) {
|
|
}
|
|
|
|
future<> pre_server_start(const group0_info& info) override {
|
|
// Nothing to do in this step
|
|
co_return;
|
|
}
|
|
|
|
future<bool> post_server_start(const group0_info& g0_info, abort_source& as) override {
|
|
auto timeout = db::timeout_clock::now() + std::chrono::milliseconds{1000};
|
|
auto my_id = _group0.load_my_id();
|
|
raft::server_address my_addr{my_id, {}};
|
|
try {
|
|
co_await ser::group0_rpc_verbs::send_group0_modify_config(
|
|
&_ms, locator::host_id{g0_info.id.uuid()}, timeout, g0_info.group0_id, {{my_addr, _can_vote}}, {});
|
|
co_return true;
|
|
} catch (std::runtime_error& e) {
|
|
group0_log.warn("failed to modify config at peer {}: {}. Retrying.", g0_info.id, e.what());
|
|
co_return false;
|
|
}
|
|
};
|
|
};
|
|
|
|
return make_shared<legacy_handshaker>(*this, _ms.local(), can_vote);
|
|
}
|
|
|
|
struct group0_members {
|
|
const raft::server& _group0_server;
|
|
|
|
raft::config_member_set get_members() const {
|
|
return _group0_server.get_configuration().current;
|
|
}
|
|
|
|
std::vector<locator::host_id> get_host_ids() const {
|
|
return _group0_server.get_configuration().current |
|
|
std::views::transform([] (const auto& m) { return locator::host_id(m.addr.id.uuid()); }) |
|
|
std::ranges::to<std::vector<locator::host_id>>();
|
|
}
|
|
|
|
bool is_joint() const {
|
|
return _group0_server.get_configuration().is_joint();
|
|
}
|
|
};
|
|
|
|
bool raft_group0::maintenance_mode() {
|
|
SCYLLA_ASSERT(this_shard_id() == 0);
|
|
return _client.maintenance_mode();
|
|
}
|
|
|
|
future<> raft_group0::setup_group0_if_exist(db::system_keyspace& sys_ks, service::storage_service& ss, cql3::query_processor& qp, service::migration_manager& mm) {
|
|
auto group0_id = raft::group_id{co_await sys_ks.get_raft_group0_id()};
|
|
if (group0_id) {
|
|
// Group 0 ID is present => we've already joined group 0 earlier.
|
|
group0_log.info("setup_group0: group 0 ID present. Starting existing Raft server.");
|
|
co_await start_server_for_group0(group0_id, ss, qp, mm);
|
|
|
|
// Start group 0 leadership monitor fiber.
|
|
_leadership_monitor = leadership_monitor_fiber();
|
|
} else if (qp.db().get_config().recovery_leader.is_set()) {
|
|
// Recovery mode, no group0 to start
|
|
} else {
|
|
throw std::runtime_error("The node is bootstrapped already but Raft group0 is not present. This means that you try to upgrade"
|
|
" a node of a cluster that is not using Raft yet. This is no longer supported. Please first complete the upgrade of the cluster to use Raft");
|
|
}
|
|
}
|
|
|
|
future<> raft_group0::setup_group0(
|
|
db::system_keyspace& sys_ks, const std::unordered_set<gms::inet_address>& initial_contact_nodes, shared_ptr<group0_handshaker> handshaker,
|
|
service::storage_service& ss, cql3::query_processor& qp, service::migration_manager& mm,
|
|
const join_node_request_params& params) {
|
|
// Reaching this point is possible only in two cases:
|
|
// - the node is bootstrapping,
|
|
// - the node is restarting in the Raft-based recovery procedure and has not joined the new group 0 yet.
|
|
|
|
std::vector<gms::inet_address> seeds(initial_contact_nodes.begin(), initial_contact_nodes.end());
|
|
|
|
group0_log.info("setup_group0: joining group 0...");
|
|
co_await join_group0(std::move(seeds), std::move(handshaker), ss, qp, mm, sys_ks, params);
|
|
group0_log.info("setup_group0: successfully joined group 0.");
|
|
|
|
// Start group 0 leadership monitor fiber.
|
|
_leadership_monitor = leadership_monitor_fiber();
|
|
|
|
utils::get_local_injector().inject("stop_after_joining_group0", [&] {
|
|
throw std::runtime_error{"injection: stop_after_joining_group0"};
|
|
});
|
|
|
|
group0_log.info("setup_group0: the cluster is ready to use Raft. Finishing.");
|
|
co_await sys_ks.save_group0_upgrade_state("use_post_raft_procedures");
|
|
}
|
|
|
|
bool raft_group0::is_member(raft::server_id id, bool include_voters_only) {
|
|
if (!joined_group0()) {
|
|
on_internal_error(group0_log, "called is_member before we joined group 0");
|
|
}
|
|
|
|
auto cfg = _raft_gr.group0().get_configuration();
|
|
return cfg.contains(id) && (!include_voters_only || cfg.can_vote(id));
|
|
}
|
|
|
|
future<> raft_group0::modify_voters(const std::unordered_set<raft::server_id>& voters_add, const std::unordered_set<raft::server_id>& voters_del,
|
|
abort_source& as, std::optional<raft_timeout> timeout) {
|
|
if (voters_add.empty() && voters_del.empty()) {
|
|
co_return;
|
|
}
|
|
|
|
// Ensure that we're not trying to add and remove the same node.
|
|
auto calculate_intersection = [](const auto& nodes_add, const auto& nodes_del) {
|
|
return nodes_add | std::views::filter([&nodes_del](auto id) {
|
|
return nodes_del.contains(id);
|
|
});
|
|
};
|
|
if (!calculate_intersection(voters_add, voters_del).empty()) {
|
|
on_internal_error(group0_log, "called modify_voters with the same node in both voters and non-voters sets");
|
|
}
|
|
|
|
if (!voters_add.empty()) {
|
|
group0_log.info("making servers {} voters ...", voters_add);
|
|
}
|
|
if (!voters_del.empty()) {
|
|
group0_log.info("making servers {} non-voters ...", voters_del);
|
|
}
|
|
|
|
co_await modify_raft_voter_status(voters_add, voters_del, as, timeout);
|
|
|
|
if (!voters_add.empty()) {
|
|
group0_log.info("servers {} are now voters.", voters_add);
|
|
}
|
|
if (!voters_del.empty()) {
|
|
group0_log.info("servers {} are now non-voters.", voters_del);
|
|
}
|
|
}
|
|
|
|
future<> raft_group0::modify_raft_voter_status(const std::unordered_set<raft::server_id>& voters_add, const std::unordered_set<raft::server_id>& voters_del,
|
|
abort_source& as, std::optional<raft_timeout> timeout) {
|
|
return run_op_with_retry(as, [this, &voters_add, &voters_del, timeout, &as] -> future<operation_result> {
|
|
std::vector<raft::config_member> add;
|
|
add.reserve(voters_add.size() + voters_del.size());
|
|
|
|
for (const auto& id: voters_add) {
|
|
if (is_member(id, false)) {
|
|
add.push_back(raft::config_member{{id, {}}, raft::is_voter::yes});
|
|
} else {
|
|
group0_log.warn("modify_raft_voter_config({}, {}): tried to mark non-member {} as a voter, ignoring",
|
|
voters_add, voters_del, id);
|
|
}
|
|
}
|
|
|
|
for (const auto& id: voters_del) {
|
|
if (is_member(id, false)) {
|
|
add.push_back(raft::config_member{{id, {}}, raft::is_voter::no});
|
|
} else {
|
|
group0_log.warn("modify_raft_voter_config({}, {}): tried to mark non-member {} as a non-voter, ignoring",
|
|
voters_add, voters_del, id);
|
|
}
|
|
}
|
|
|
|
try {
|
|
co_await _raft_gr.group0_with_timeouts().modify_config(std::move(add), {}, &as, timeout);
|
|
} catch (const raft::commit_status_unknown& e) {
|
|
group0_log.info("modify_raft_voter_status({}, {}): modify_config returned \"{}\", retrying", voters_add, voters_del, e);
|
|
co_return operation_result::failure;
|
|
}
|
|
co_return operation_result::success;
|
|
}, "modify_raft_voter_status->modify_config");
|
|
}
|
|
|
|
future<> raft_group0::remove_from_raft_config(raft::server_id id) {
|
|
return run_op_with_retry(_abort_source, [this, id] -> future<operation_result> {
|
|
try {
|
|
co_await _raft_gr.group0_with_timeouts().modify_config({}, {id}, &_abort_source, raft_timeout{});
|
|
} catch (const raft::commit_status_unknown& e) {
|
|
group0_log.info("remove_from_raft_config({}): modify_config returned \"{}\", retrying", id, e);
|
|
co_return operation_result::failure;
|
|
}
|
|
co_return operation_result::success;
|
|
}, "remove_from_raft_config->modify_config");
|
|
}
|
|
|
|
bool raft_group0::joined_group0() const {
|
|
return std::holds_alternative<raft::group_id>(_group0);
|
|
}
|
|
|
|
future<group0_peer_exchange> raft_group0::peer_exchange(discovery::peer_list peers) {
|
|
return std::visit([this, peers = std::move(peers)] (auto&& d) mutable -> future<group0_peer_exchange> {
|
|
using T = std::decay_t<decltype(d)>;
|
|
if constexpr (std::is_same_v<T, std::monostate>) {
|
|
// Discovery not started or we're persisting the
|
|
// leader information locally.
|
|
co_return group0_peer_exchange{std::monostate{}};
|
|
} else if constexpr (std::is_same_v<T, persistent_discovery>) {
|
|
// Use discovery to produce a response
|
|
if (auto response = co_await d.request(std::move(peers))) {
|
|
co_return group0_peer_exchange{std::move(*response)};
|
|
}
|
|
// We just became a leader.
|
|
// Eventually we'll answer with group0_info.
|
|
co_return group0_peer_exchange{std::monostate{}};
|
|
} else if constexpr (std::is_same_v<T, raft::group_id>) {
|
|
// Even if in follower state, return own address: the
|
|
// incoming RPC will then be bounced to the leader.
|
|
co_return group0_peer_exchange{group0_info{
|
|
.group0_id = std::get<raft::group_id>(_group0),
|
|
// Use self as leader - modify_config() is
|
|
// a forwarding API so we'll be able to forward
|
|
// the request when it arrives.
|
|
.id = _raft_gr.group0().id(),
|
|
.ip_addr = _gossiper.get_broadcast_address(),
|
|
}};
|
|
}
|
|
}, _group0);
|
|
}
|
|
|
|
future<persistent_discovery> persistent_discovery::make(discovery_peer my_addr, peer_list seeds, cql3::query_processor& qp) {
|
|
auto peers = co_await load_discovered_peers(qp);
|
|
// If we're restarting discovery, the peer list is loaded from
|
|
// the discovery table and includes the seeds from
|
|
// scylla.yaml, so ignore the 'seeds' param.
|
|
//
|
|
// Should we perhaps use 'seeds' instead, or use both, the
|
|
// loaded seeds and scylla.yaml seeds?
|
|
//
|
|
// If a node crashes or stops during discovery, either of the
|
|
// following two option is safe:
|
|
// - restart the node; the discovery will resume from where it
|
|
// stopped with the persisted seeds
|
|
// - erase the data directory, possibly update scylla.yaml,
|
|
// and start a new boot.
|
|
// Updating scylla.yaml with a new set of seeds while keeping
|
|
// the old data directory is something DBAs can potentially
|
|
// do but their intent would be unclear at best: it is not
|
|
// safe to ignore the old seeds, they may have learned about
|
|
// this node already, so it's not safe to progress if they are
|
|
// not unreachable. As long as the old seeds have to be reached,
|
|
// adding more seeds is not very useful.
|
|
//
|
|
// We could check for this and throw, but since the
|
|
// whole case is a bit made up, let's simply ignore scylla.yaml
|
|
// seeds once we know they are persisted in the discovery table.
|
|
if (peers.empty()) {
|
|
peers = std::move(seeds);
|
|
}
|
|
// discovery::step() will automatically exclude my_addr and skip
|
|
// duplicates in the list.
|
|
co_return persistent_discovery{std::move(my_addr), peers, qp};
|
|
}
|
|
|
|
future<std::optional<discovery::peer_list>> persistent_discovery::request(peer_list peers) {
|
|
for (auto& p: peers) {
|
|
group0_log.debug("discovery: request peer: id={}, ip={}", p.id, p.ip_addr);
|
|
}
|
|
|
|
if (_gate.is_closed()) {
|
|
// We stopped discovery, about to destroy it.
|
|
co_return std::nullopt;
|
|
}
|
|
auto holder = _gate.hold();
|
|
|
|
auto response = _discovery.request(peers);
|
|
co_await store_discovered_peers(_qp, _discovery.get_peer_list());
|
|
|
|
co_return response;
|
|
}
|
|
|
|
void persistent_discovery::response(discovery_peer from, const peer_list& peers) {
|
|
// The peers discovered here will be persisted on the next `request` or `tick`.
|
|
for (auto& p: peers) {
|
|
group0_log.debug("discovery: response peer: id={}, ip={}", p.id, p.ip_addr);
|
|
}
|
|
_discovery.response(std::move(from), peers);
|
|
}
|
|
|
|
future<discovery::tick_output> persistent_discovery::tick() {
|
|
// No need to enter `_gate`, since `stop` must be called after all calls to `tick` (and before the object is destroyed).
|
|
|
|
auto result = _discovery.tick();
|
|
co_await store_discovered_peers(_qp, _discovery.get_peer_list());
|
|
|
|
co_return result;
|
|
}
|
|
|
|
future<> persistent_discovery::stop() {
|
|
return _gate.close();
|
|
}
|
|
|
|
persistent_discovery::persistent_discovery(discovery_peer my_addr, const peer_list& seeds, cql3::query_processor& qp)
|
|
: _discovery{std::move(my_addr), seeds}
|
|
, _qp{qp}
|
|
, _gate("raft_group0::persistent_discovery")
|
|
{
|
|
for (auto& addr: seeds) {
|
|
group0_log.debug("discovery: seed peer: id={}, info={}", addr.id, addr.ip_addr);
|
|
}
|
|
}
|
|
|
|
|
|
// Given a function `fun` that takes an `abort_source&` as parameter,
|
|
// call `fun` with an internally constructed abort source which is aborted after the given time duration.
|
|
//
|
|
// The internal abort source also subscribes to the provided `abort_source& as` so the function will also react
|
|
// to top-level aborts.
|
|
//
|
|
// `abort_requested_exception` thrown by `fun` is translated to `timed_out_error` exception
|
|
// unless `as` requested abort or we didn't reach timeout yet.
|
|
template <std::invocable<abort_source&> F>
|
|
static futurize_t<std::invoke_result_t<F, abort_source&>>
|
|
with_timeout(abort_source& as, db::timeout_clock::duration d, F&& fun) {
|
|
using future_t = futurize_t<std::invoke_result_t<F, abort_source&>>;
|
|
|
|
// FIXME: using lambda as workaround for clang bug #50345 (miscompiling coroutine templates).
|
|
auto impl = [] (abort_source& as, db::timeout_clock::duration d, F&& fun) -> future_t {
|
|
abort_source timeout_src;
|
|
auto sub = as.subscribe([&timeout_src] () noexcept {
|
|
if (!timeout_src.abort_requested()) {
|
|
timeout_src.request_abort();
|
|
}
|
|
});
|
|
if (!sub) {
|
|
throw abort_requested_exception{};
|
|
}
|
|
|
|
// Using lambda here as workaround for seastar#1005
|
|
future_t f = futurize_invoke([fun = std::move(fun)]
|
|
(abort_source& s) mutable { return std::forward<F>(fun)(s); }, timeout_src);
|
|
|
|
auto sleep_and_abort = [] (db::timeout_clock::duration d, abort_source& timeout_src) -> future<> {
|
|
co_await sleep_abortable(d, timeout_src);
|
|
if (!timeout_src.abort_requested()) {
|
|
// We resolved before `f`. Abort the operation.
|
|
timeout_src.request_abort();
|
|
}
|
|
}(d, timeout_src);
|
|
|
|
f = co_await coroutine::as_future(std::move(f));
|
|
|
|
if (!timeout_src.abort_requested()) {
|
|
// `f` has already resolved, but abort the sleep.
|
|
timeout_src.request_abort();
|
|
}
|
|
|
|
// Wait on the sleep as well (it should return shortly, being aborted) so we don't discard the future.
|
|
try {
|
|
co_await std::move(sleep_and_abort);
|
|
} catch (const sleep_aborted&) {
|
|
// Expected (if `f` resolved first or we were externally aborted).
|
|
} catch (...) {
|
|
// There should be no other exceptions, but just in case, catch and discard.
|
|
// we want to propagate exceptions from `f`, not from sleep.
|
|
group0_log.error("unexpected exception from sleep_and_abort: {}", std::current_exception());
|
|
}
|
|
|
|
// Translate aborts caused by timeout to `timed_out_error`.
|
|
// Top-level aborts (from `as`) are not translated.
|
|
try {
|
|
co_return co_await std::move(f);
|
|
} catch (abort_requested_exception&) {
|
|
if (as.abort_requested()) {
|
|
// Assume the abort was caused by `as` (it may have been our timeout abort - doesn't matter)
|
|
// and don't translate.
|
|
throw;
|
|
}
|
|
|
|
if (!timeout_src.abort_requested()) {
|
|
// Neither `as` nor `timeout_src` requested abort.
|
|
// This must be another abort source internal to `fun`.
|
|
// Don't translate.
|
|
throw;
|
|
}
|
|
|
|
throw seastar::timed_out_error{};
|
|
}
|
|
};
|
|
|
|
return impl(as, d, std::forward<F>(fun));
|
|
}
|
|
|
|
// A helper class to sleep in a loop with an exponentially
|
|
// increasing retry period.
|
|
struct sleep_with_exponential_backoff {
|
|
std::chrono::seconds _retry_period{1};
|
|
static constexpr std::chrono::seconds _max_retry_period{16};
|
|
future<> operator()(abort_source& as,
|
|
std::source_location loc = std::source_location::current()) {
|
|
upgrade_log.info("{}: sleeping for {} seconds before retrying...", loc.function_name(), _retry_period);
|
|
co_await sleep_abortable(_retry_period, as);
|
|
_retry_period = std::min(_retry_period * 2, _max_retry_period);
|
|
}
|
|
};
|
|
|
|
void raft_group0::register_metrics() {
|
|
namespace sm = seastar::metrics;
|
|
_metrics.add_group("raft_group0", {
|
|
sm::make_gauge("status", [this] { return static_cast<uint8_t>(_status_for_monitoring); },
|
|
sm::description("status of the raft group, 1 - normal, 2 - aborted"))
|
|
});
|
|
}
|
|
|
|
} // end of namespace service
|
|
|
|
|