mirror of
https://github.com/scylladb/scylladb.git
synced 2026-04-19 16:15:07 +00:00
Merge 'raft: remake the read barrier optimization' from Patryk Jędrzejczak
The approach taken in 1ae2ae50a6 turned
out to be incorrect. The Raft member requesting a read barrier could
incorrectly advance its commit_idx and break linearizability. We revert that
commit in this PR.
We also remake the read barrier optimization with a completely new approach.
We make the leader replicate to the non-voting requester of a read barrier if
its `commit_idx` is behind.
Fixes https://scylladb.atlassian.net/browse/SCYLLADB-998
No backport: the issue is present only in master.
Closes scylladb/scylladb#29216
* github.com:scylladb/scylladb:
raft: speed up read barrier requested by non-voters
Revert "raft: read_barrier: update local commit_idx to read_idx when it's safe"
This commit is contained in:
23
raft/fsm.cc
23
raft/fsm.cc
@@ -1098,7 +1098,8 @@ std::optional<std::pair<read_id, index_t>> fsm::start_read_barrier(server_id req
|
||||
|
||||
// Make sure that only a leader or a node that is part of the config can request read barrier
|
||||
// Nodes outside of the config may never get the data, so they will not be able to read it.
|
||||
if (requester != _my_id && leader_state().tracker.find(requester) == nullptr) {
|
||||
follower_progress* opt_progress = leader_state().tracker.find(requester);
|
||||
if (requester != _my_id && opt_progress == nullptr) {
|
||||
throw std::runtime_error(fmt::format("Read barrier requested by a node outside of the configuration {}", requester));
|
||||
}
|
||||
|
||||
@@ -1109,19 +1110,23 @@ std::optional<std::pair<read_id, index_t>> fsm::start_read_barrier(server_id req
|
||||
return {};
|
||||
}
|
||||
|
||||
// Optimization for read barriers requested on non-voters. A non-voter doesn't receive the read_quorum message, so
|
||||
// it might update its commit index only after another leader tick, which would slow down wait_for_apply() at the
|
||||
// end of the read barrier. Prevent that by replicating to the non-voting requester here.
|
||||
if (requester != _my_id && opt_progress->commit_idx < _commit_idx && opt_progress->match_idx == _log.last_idx()
|
||||
&& !opt_progress->can_vote) {
|
||||
logger.trace("start_read_barrier[{}]: replicate to {} because follower commit_idx={} < commit_idx={}, "
|
||||
"follower match_idx={} == last_idx={}, and follower can_vote={}",
|
||||
_my_id, requester, opt_progress->commit_idx, _commit_idx, opt_progress->match_idx,
|
||||
_log.last_idx(), opt_progress->can_vote);
|
||||
replicate_to(*opt_progress, true);
|
||||
}
|
||||
|
||||
read_id id = next_read_id();
|
||||
logger.trace("start_read_barrier[{}] starting read barrier with id {}", _my_id, id);
|
||||
return std::make_pair(id, _commit_idx);
|
||||
}
|
||||
|
||||
void fsm::maybe_update_commit_idx_for_read(index_t read_idx) {
|
||||
// read_idx from the leader might not be replicated to the local node yet.
|
||||
const bool in_local_log = read_idx <= _log.last_idx();
|
||||
if (in_local_log && log_term_for(read_idx) == get_current_term()) {
|
||||
advance_commit_idx(read_idx);
|
||||
}
|
||||
}
|
||||
|
||||
void fsm::stop() {
|
||||
if (is_leader()) {
|
||||
// Become follower to stop accepting requests
|
||||
|
||||
@@ -480,15 +480,6 @@ public:
|
||||
|
||||
std::optional<std::pair<read_id, index_t>> start_read_barrier(server_id requester);
|
||||
|
||||
// Update the commit index to the read index (a read barrier result from the leader) if the local entry with the
|
||||
// read index belongs to the current term.
|
||||
//
|
||||
// Satisfying the condition above guarantees that the local log matches the current leader's log up to the read
|
||||
// index (the Log Matching Property), so the current leader won't drop the local entry with the read index.
|
||||
// Moreover, this entry has been committed by the leader, so future leaders also won't drop it (the Leader
|
||||
// Completeness Property). Hence, updating the commit index is safe.
|
||||
void maybe_update_commit_idx_for_read(index_t read_idx);
|
||||
|
||||
size_t in_memory_log_size() const {
|
||||
return _log.in_memory_size();
|
||||
}
|
||||
|
||||
@@ -1571,7 +1571,6 @@ future<> server_impl::read_barrier(seastar::abort_source* as) {
|
||||
co_return stop_iteration::no;
|
||||
}
|
||||
read_idx = std::get<index_t>(res);
|
||||
_fsm->maybe_update_commit_idx_for_read(read_idx);
|
||||
co_return stop_iteration::yes;
|
||||
});
|
||||
|
||||
|
||||
Reference in New Issue
Block a user