Files
scylladb/raft/tracker.cc
Kefu Chai 168ade72f8 treewide: replace formatter<std::string_view> with formatter<string_view>
in in {fmt} before v10, it provides the specialization of `fmt::formatter<..>`
for `std::string_view` as well as the specialization of `fmt::formatter<..>`
for `fmt::string_view` which is an implementation builtin in {fmt} for
compatibility of pre-C++17. and this type is used even if the code is
compiled with C++ stadandard greater or equal to C++17. also, before v10,
the `fmt::formatter<std::string_view>::format()` is defined so it accepts
`std::string_view`. after v10, `fmt::formatter<std::string_view>` still
exists, but it is now defined using `format_as()` machinery, so it's
`format()` method does not actually accept `std::string_view`, it
accepts `fmt::string_view`, as the former can be converted to
`fmt::string_view`.

this is why we can inherit from `fmt::formatter<std::string_view>` and
use `formatter<std::string_view>::format(foo, ctx);` to implement the
`format()` method with {fmt} v9, but we cannot do this with {fmt} v10,
and we would have following compilation failure:

```
FAILED: service/CMakeFiles/service.dir/RelWithDebInfo/topology_state_machine.cc.o
/home/kefu/.local/bin/clang++ -DFMT_DEPRECATED_OSTREAM -DFMT_SHARED -DSCYLLA_BUILD_MODE=release -DSEASTAR_API_LEVEL=7 -DSEASTAR_LOGGER_COMPILE_TIME_FMT -DSEASTAR_LOGGER_TYPE_STDOUT -DSEASTAR_SCHEDULING_GROUPS_COUNT=16 -DSEASTAR_SSTRING -DXXH_PRIVATE_API -DCMAKE_INTDIR=\"RelWithDebInfo\" -I/home/kefu/dev/scylladb -I/home/kefu/dev/scylladb/build/gen -I/home/kefu/dev/scylladb/seastar/include -I/home/kefu/dev/scylladb/build/seastar/gen/include -I/home/kefu/dev/scylladb/build/seastar/gen/src -ffunction-sections -fdata-sections -O3 -g -gz -std=gnu++20 -fvisibility=hidden -Wall -Werror -Wextra -Wno-error=deprecated-declarations -Wimplicit-fallthrough -Wno-c++11-narrowing -Wno-deprecated-copy -Wno-mismatched-tags -Wno-missing-field-initializers -Wno-overloaded-virtual -Wno-unsupported-friend -Wno-enum-constexpr-conversion -Wno-unused-parameter -ffile-prefix-map=/home/kefu/dev/scylladb=. -march=westmere -mllvm -inline-threshold=2500 -fno-slp-vectorize -U_FORTIFY_SOURCE -Werror=unused-result -MD -MT service/CMakeFiles/service.dir/RelWithDebInfo/topology_state_machine.cc.o -MF service/CMakeFiles/service.dir/RelWithDebInfo/topology_state_machine.cc.o.d -o service/CMakeFiles/service.dir/RelWithDebInfo/topology_state_machine.cc.o -c /home/kefu/dev/scylladb/service/topology_state_machine.cc
/home/kefu/dev/scylladb/service/topology_state_machine.cc:254:41: error: no matching member function for call to 'format'
  254 |     return formatter<std::string_view>::format(it->second, ctx);
      |            ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^~~~~~
/usr/include/fmt/core.h:2759:22: note: candidate function template not viable: no known conversion from 'seastar::basic_sstring<char, unsigned int, 15>' to 'const fmt::basic_string_view<char>' for 1st argument
 2759 |   FMT_CONSTEXPR auto format(const T& val, FormatContext& ctx) const
      |                      ^      ~~~~~~~~~~~~
```

because the inherited `format()` method actually comes from
`fmt::formatter<fmt::string_view>`. to reduce the confusion, in this
change, we just inherit from `fmt::format<string_view>`, where
`string_view` is actually `fmt::string_view`. this follows
the document at
https://fmt.dev/latest/api.html#formatting-user-defined-types,
and since there is less indirection under the hood -- we do not
use the specialization created by `FMT_FORMAT_AS` which inherit
from `formatter<fmt::string_view>`, hopefully this can improve
the compilation speed a little bit. also, this change addresses
the build failure with {fmt} v10.

Signed-off-by: Kefu Chai <kefu.chai@scylladb.com>

Closes scylladb/scylladb#18299
2024-04-19 07:44:07 +03:00

294 lines
10 KiB
C++

/*
* Copyright (C) 2020-present ScyllaDB
*/
/*
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
#include "tracker.hh"
#include <seastar/core/coroutine.hh>
namespace raft {
bool follower_progress::is_stray_reject(const append_reply::rejected& rejected) {
// By precondition, we are the leader and `rejected.current_term` is equal to our term.
// By definition of `match_idx` we know that at some point all entries up to and including
// `match_idx` were the same in our log and the follower's log; ...
if (rejected.non_matching_idx <= match_idx) {
// ... in particular, entry `rejected.non_matching_idx` (which is <= `match_idx`) at some point
// was the same in our log and the follower's log, but `rejected` claims they are different.
// A follower cannot change an entry unless it enters a different term, but `rejected.current_term`
// is equal to our term. Thus `rejected` must be stray.
return true;
}
if (rejected.last_idx < match_idx) {
// ... in particular, at some point the follower had to have an entry with index `rejected.last_idx + 1`
// (because `rejected.last_idx < match_idx implies rejected.last_idx + 1 <= match_idx)
// but `rejected` claims it doesn't have such entry now.
// A follower cannot truncate a suffix of its log unless it enters a different term,
// but `rejected.current_term` is equal to our term. Thus `rejected` must be stray.
return true;
}
switch (state) {
case follower_progress::state::PIPELINE:
break;
case follower_progress::state::PROBE:
// In PROBE state we send a single append request `req` with `req.prev_log_idx == next_idx - 1`.
// When the follower generates a rejected response `r`, it sets `r.non_matching_idx = req.prev_log_idx`.
// Thus the reject either satisfies `rejected.non_matching_idx == next_idx - 1` or is stray.
if (rejected.non_matching_idx != index_t(next_idx - 1)) {
return true;
}
break;
case follower_progress::state::SNAPSHOT:
// any reject during snapshot transfer is stray one
return true;
default:
assert(false);
}
return false;
}
void follower_progress::become_probe() {
state = state::PROBE;
probe_sent = false;
}
void follower_progress::become_pipeline() {
if (state != state::PIPELINE) {
// If a previous request was accepted, move to "pipeline" state
// since we now know the follower's log state.
state = state::PIPELINE;
in_flight = 0;
}
}
void follower_progress::become_snapshot(index_t snp_idx) {
state = state::SNAPSHOT;
// If snapshot transfer succeeds, start replicating from the
// next index, otherwise we will learn the follower's index
// again by sending a probe request.
next_idx = snp_idx + index_t{1};
}
bool follower_progress::can_send_to() {
switch (state) {
case state::PROBE:
return !probe_sent;
case state::PIPELINE:
// allow `max_in_flight` outstanding indexes
// FIXME: make it smarter
return in_flight < follower_progress::max_in_flight;
case state::SNAPSHOT:
// In this state we are waiting
// for a snapshot to be transferred
// before starting to sync the log.
return false;
}
assert(false);
return false;
}
// If this is called when a tracker is just created, the current
// progress is empty and we should simply create an instance for
// each follower.
// When switching configurations, we should preserve progress
// for existing followers, create progress for new, and remove
// progress for non-members (to make sure we don't send noise
// messages to them).
void tracker::set_configuration(const configuration& configuration, index_t next_idx) {
_current_voters.clear();
_previous_voters.clear();
// Swap out the current progress and then re-add
// only those entries which are still present.
progress old_progress = std::move(*this);
auto emplace_simple_config = [&](const config_member_set& config, std::unordered_set<server_id>& voter_ids) {
for (const auto& s : config) {
if (s.can_vote) {
voter_ids.emplace(s.addr.id);
}
auto newp = this->progress::find(s.addr.id);
if (newp != this->progress::end()) {
// Processing joint configuration and already added
// an entry for this id.
continue;
}
auto oldp = old_progress.find(s.addr.id);
if (oldp != old_progress.end()) {
newp = this->progress::emplace(s.addr.id, std::move(oldp->second)).first;
} else {
newp = this->progress::emplace(s.addr.id, follower_progress{s.addr.id, next_idx}).first;
}
newp->second.can_vote = s.can_vote;
}
};
emplace_simple_config(configuration.current, _current_voters);
if (configuration.is_joint()) {
emplace_simple_config(configuration.previous, _previous_voters);
}
}
// A sorted array of node match indexes used to find
// the pivot which serves as commit index of the group.
template<typename Index>
class match_vector {
std::vector<Index> _match;
// How many elements in the match array have a match index
// larger than the previous commit index.
size_t _count = 0;
Index _prev_commit_idx;
public:
explicit match_vector(Index prev_commit_idx, size_t reserve_size)
: _prev_commit_idx(prev_commit_idx) {
_match.reserve(reserve_size);
}
void push_back(Index match_idx) {
if (match_idx > _prev_commit_idx) {
_count++;
}
_match.push_back(match_idx);
}
bool committed() const {
return _count >= _match.size()/2 + 1;
}
Index commit_idx() {
logger.trace("{}: check committed count {} cluster size {}", std::is_same_v<Index, index_t> ? "commit" : "read barrier", _count, _match.size());
// The index of the pivot node is selected so that all nodes
// with a larger match index plus the pivot form a majority,
// for example:
// cluster size pivot node majority
// 1 0 1
// 2 0 2
// 3 1 2
// 4 1 3
// 5 2 3
//
auto pivot = (_match.size() - 1) / 2;
std::nth_element(_match.begin(), _match.begin() + pivot, _match.end());
return _match[pivot];
}
};
template<typename T>
T tracker::committed(T prev_commit_idx) {
auto push_idx = [] (match_vector<T>& v, const follower_progress& p) {
if constexpr (std::is_same_v<T, index_t>) {
v.push_back(p.match_idx);
} else {
v.push_back(p.max_acked_read);
}
};
match_vector<T> current(prev_commit_idx, _current_voters.size());
if (!_previous_voters.empty()) {
match_vector<T> previous(prev_commit_idx, _previous_voters.size());
for (const auto& [id, p] : *this) {
if (_current_voters.contains(p.id)) {
push_idx(current, p);
}
if (_previous_voters.contains(p.id)) {
push_idx(previous, p);
}
}
if (!current.committed() || !previous.committed()) {
return prev_commit_idx;
}
return std::min(current.commit_idx(), previous.commit_idx());
} else {
for (const auto& [id, p] : *this) {
if (_current_voters.contains(p.id)) {
push_idx(current, p);
}
}
if (!current.committed()) {
return prev_commit_idx;
}
return current.commit_idx();
}
}
template index_t tracker::committed(index_t);
template read_id tracker::committed(read_id);
votes::votes(configuration configuration)
: _current(configuration.current) {
for (auto* cfg: {&configuration.previous, &configuration.current}) {
for (auto& srv: *cfg) {
if (srv.can_vote) {
_voters.insert(srv.addr);
}
}
}
if (configuration.is_joint()) {
_previous.emplace(configuration.previous);
}
}
void votes::register_vote(server_id from, bool granted) {
bool registered = false;
if (_current.register_vote(from, granted)) {
registered = true;
}
if (_previous && _previous->register_vote(from, granted)) {
registered = true;
}
// We can get an outdated vote from a node that is now non-voting member.
// Such vote should be ignored.
if (!registered) {
logger.info("Got a vote from unregistered server {} during election", from);
}
}
vote_result votes::tally_votes() const {
if (_previous) {
auto previous_result = _previous->tally_votes();
if (previous_result != vote_result::WON) {
return previous_result;
}
}
return _current.tally_votes();
}
} // end of namespace raft
auto fmt::formatter<raft::election_tracker>::format(const raft::election_tracker& v, fmt::format_context& ctx) const
-> decltype(ctx.out()) {
return fmt::format_to(ctx.out(), "responded: {}, granted: {}",
v._responded.size(), v._granted);
}
auto fmt::formatter<raft::votes>::format(const raft::votes& v, fmt::format_context& ctx) const
-> decltype(ctx.out()) {
auto out = ctx.out();
out = fmt::format_to(out, "current: {}\n", v._current);
if (v._previous) {
out = fmt::format_to(out, "previous: {}\n", v._previous.value());
}
return out;
}
auto fmt::formatter<raft::vote_result>::format(const raft::vote_result& v, fmt::format_context& ctx) const
-> decltype(ctx.out()) {
std::string_view name;
using enum raft::vote_result;
switch (v) {
case UNKNOWN:
name = "UNKNOWN";
break;
case WON:
name = "WON";
break;
case LOST:
name = "LOST";
break;
}
return formatter<string_view>::format(name, ctx);
}