diff --git a/raft/fsm.cc b/raft/fsm.cc index 4e4025db64..0bbeec5ec7 100644 --- a/raft/fsm.cc +++ b/raft/fsm.cc @@ -1098,7 +1098,8 @@ std::optional> fsm::start_read_barrier(server_id req // Make sure that only a leader or a node that is part of the config can request read barrier // Nodes outside of the config may never get the data, so they will not be able to read it. - if (requester != _my_id && leader_state().tracker.find(requester) == nullptr) { + follower_progress* opt_progress = leader_state().tracker.find(requester); + if (requester != _my_id && opt_progress == nullptr) { throw std::runtime_error(fmt::format("Read barrier requested by a node outside of the configuration {}", requester)); } @@ -1109,19 +1110,23 @@ std::optional> fsm::start_read_barrier(server_id req return {}; } + // Optimization for read barriers requested on non-voters. A non-voter doesn't receive the read_quorum message, so + // it might update its commit index only after another leader tick, which would slow down wait_for_apply() at the + // end of the read barrier. Prevent that by replicating to the non-voting requester here. + if (requester != _my_id && opt_progress->commit_idx < _commit_idx && opt_progress->match_idx == _log.last_idx() + && !opt_progress->can_vote) { + logger.trace("start_read_barrier[{}]: replicate to {} because follower commit_idx={} < commit_idx={}, " + "follower match_idx={} == last_idx={}, and follower can_vote={}", + _my_id, requester, opt_progress->commit_idx, _commit_idx, opt_progress->match_idx, + _log.last_idx(), opt_progress->can_vote); + replicate_to(*opt_progress, true); + } + read_id id = next_read_id(); logger.trace("start_read_barrier[{}] starting read barrier with id {}", _my_id, id); return std::make_pair(id, _commit_idx); } -void fsm::maybe_update_commit_idx_for_read(index_t read_idx) { - // read_idx from the leader might not be replicated to the local node yet. - const bool in_local_log = read_idx <= _log.last_idx(); - if (in_local_log && log_term_for(read_idx) == get_current_term()) { - advance_commit_idx(read_idx); - } -} - void fsm::stop() { if (is_leader()) { // Become follower to stop accepting requests diff --git a/raft/fsm.hh b/raft/fsm.hh index 6e14140d86..9d1ab9980f 100644 --- a/raft/fsm.hh +++ b/raft/fsm.hh @@ -480,15 +480,6 @@ public: std::optional> start_read_barrier(server_id requester); - // Update the commit index to the read index (a read barrier result from the leader) if the local entry with the - // read index belongs to the current term. - // - // Satisfying the condition above guarantees that the local log matches the current leader's log up to the read - // index (the Log Matching Property), so the current leader won't drop the local entry with the read index. - // Moreover, this entry has been committed by the leader, so future leaders also won't drop it (the Leader - // Completeness Property). Hence, updating the commit index is safe. - void maybe_update_commit_idx_for_read(index_t read_idx); - size_t in_memory_log_size() const { return _log.in_memory_size(); } diff --git a/raft/server.cc b/raft/server.cc index 86c6712df5..dbe5d1f513 100644 --- a/raft/server.cc +++ b/raft/server.cc @@ -1571,7 +1571,6 @@ future<> server_impl::read_barrier(seastar::abort_source* as) { co_return stop_iteration::no; } read_idx = std::get(res); - _fsm->maybe_update_commit_idx_for_read(read_idx); co_return stop_iteration::yes; });