Files
scylladb/test/raft/raft_server_test.cc
Patryk Jędrzejczak ccd92c0b6b test/raft: add tests for add_entry snapshot interactions
Add six tests covering add_entry with wait_type::applied and
wait_type::committed for three snapshot scenarios affected in the
previous commit:

1. Snapshot at the entry's index (wait_for_entry, term_for returns
   snapshot term).

2. Snapshot past the entry's index (wait_for_entry, term_for returns
   nullopt).

3. Follower's waiter is resolved via drop_waiters when a snapshot
   is loaded.

Without the fix in the previous commit, 4 of 6 tests fail:
all 3 wait_type::applied tests and the wait_type::committed
drop_waiters test. The remaining two tests pass because the changes
don't affect them.

We don't write tests covering the scenarios when add_entry should
still throw commit_status_unknown (that is when the entry's term
doesn't match the snapshot's term) because:
- these tests would be very complicated,
- a bug that would make these tests fail should also make the
  nemesis tests fail, as there would be an issue with linearizability.
2026-05-08 11:18:02 +02:00

386 lines
16 KiB
C++

#include <fmt/std.h>
#include "raft/raft.hh"
#include "replication.hh"
#include "utils/error_injection.hh"
#include <seastar/util/defer.hh>
#ifdef SEASTAR_DEBUG
// Increase tick time to allow debug to process messages
const auto tick_delay = 200ms;
#else
const auto tick_delay = 100ms;
#endif
// The word "default" means "usually used by the tests here".
template <typename clock_type = std::chrono::steady_clock>
static raft_cluster<clock_type> get_default_cluster(test_case test_config) {
return raft_cluster<clock_type>{
std::move(test_config),
::apply_changes,
0,
0,
0, false, tick_delay, rpc_config{}
};
}
SEASTAR_THREAD_TEST_CASE(test_check_abort_on_client_api) {
raft_cluster<std::chrono::steady_clock> cluster(
test_case { .nodes = 1 },
[](raft::server_id id, const std::vector<raft::command_cref>& commands, lw_shared_ptr<hasher_int> hasher) {
return 0;
},
0,
0,
0, false, tick_delay, rpc_config{});
cluster.start_all().get();
cluster.stop_server(0, "test crash").get();
auto check_error = [](const raft::stopped_error& e) {
return sstring(e.what()) == sstring("Raft instance is stopped, reason: \"test crash\"");
};
BOOST_CHECK_EXCEPTION(cluster.add_entries(1, 0).get(), raft::stopped_error, check_error);
BOOST_CHECK_EXCEPTION(cluster.get_server(0).modify_config({}, {to_raft_id(0)}, nullptr).get(), raft::stopped_error, check_error);
BOOST_CHECK_EXCEPTION(cluster.get_server(0).read_barrier(nullptr).get(), raft::stopped_error, check_error);
BOOST_CHECK_EXCEPTION(cluster.get_server(0).set_configuration({}, nullptr).get(), raft::stopped_error, check_error);
}
SEASTAR_THREAD_TEST_CASE(test_release_memory_if_add_entry_throws) {
#ifndef SCYLLA_ENABLE_ERROR_INJECTION
std::cerr << "Skipping test as it depends on error injection. Please run in mode where it's enabled (debug,dev).\n";
#else
const size_t command_size = sizeof(size_t);
test_case test_config {
.nodes = 1,
.config = std::vector<raft::server::configuration>({
raft::server::configuration {
.snapshot_threshold_log_size = 0,
.snapshot_trailing_size = 0,
.max_log_size = command_size,
.max_command_size = command_size
}
})
};
auto cluster = get_default_cluster(std::move(test_config));
cluster.start_all().get();
auto stop = defer([&cluster] { cluster.stop_all().get(); });
utils::get_local_injector().enable("fsm::add_entry/test-failure", true);
auto check_error = [](const std::runtime_error& e) {
return sstring(e.what()) == sstring("fsm::add_entry/test-failure");
};
BOOST_CHECK_EXCEPTION(cluster.add_entries(1, 0).get(), std::runtime_error, check_error);
// we would block forever if the memory wasn't released
// when the exception was thrown from the first add_entry
cluster.add_entries(1, 0).get();
cluster.read(read_value{0, 1}).get();
#endif
}
// A simple test verifying the most basic properties of `wait_for_state_change`:
// * Triggering the passed abort_source will abort the operation.
// The future will be resolved.
// * The future will contain an exception, and its type will be `raft::request_aborted`.
// Reproduces SCYLLADB-665.
SEASTAR_THREAD_TEST_CASE(test_aborting_wait_for_state_change) {
auto cluster = get_default_cluster(test_case{ .nodes = 1 });
cluster.start_all().get();
auto stop = defer([&cluster] { cluster.stop_all().get(); });
auto& server = cluster.get_server(0);
server.wait_for_leader(nullptr).get();
abort_source as;
// Note that this future cannot resolve immediately.
// In particular, the leader election we awaited above cannot
// influence it since the promises corresponding to
// waiting for a leader and state change are resolved
// within the same call, one after the other
// (cf. server_impl::process_fsm_output).
future<> fut_default_ex = server.wait_for_state_change(&as);
as.request_abort();
BOOST_CHECK_THROW((void) fut_default_ex.get(), raft::request_aborted);
}
static void test_func_on_aborted_server_aux(
std::function<future<>(raft::server&, abort_source*)> func,
const raft::server::configuration& config = raft::server::configuration{})
{
const size_t node_count = 2;
auto test_config = test_case {
.nodes = node_count,
.config = std::vector<raft::server::configuration>(node_count, config)
};
auto cluster = get_default_cluster(std::move(test_config));
constexpr std::string_view error_message = "some unfunny error message";
auto check_default_message = [] (const raft::stopped_error& e) {
return std::string_view(e.what()) == "Raft instance is stopped";
};
auto check_error_message = [&error_message] (const raft::stopped_error& e) {
return std::string_view(e.what()) == fmt::format("Raft instance is stopped, reason: \"{}\"", error_message);
};
/* Case 1. Default error message */ {
auto& s1 = cluster.get_server(0);
s1.start().get();
s1.abort().get();
abort_source as;
// Regardless of the state of the passed abort_source, we should get raft::stopped_error.
BOOST_CHECK_EXCEPTION((void) func(s1, nullptr).get(), raft::stopped_error, check_default_message);
BOOST_CHECK_EXCEPTION((void) func(s1, &as).get(), raft::stopped_error, check_default_message);
as.request_abort();
BOOST_CHECK_EXCEPTION((void) func(s1, &as).get(), raft::stopped_error, check_default_message);
}
/* Case 2. Custom error message */ {
auto& s2 = cluster.get_server(1);
s2.start().get();
s2.abort(sstring(error_message)).get();
abort_source as;
// The same checks as above: we just verify that the error message is what we want.
BOOST_CHECK_EXCEPTION((void) func(s2, nullptr).get(), raft::stopped_error, check_error_message);
BOOST_CHECK_EXCEPTION((void) func(s2, &as).get(), raft::stopped_error, check_error_message);
as.request_abort();
BOOST_CHECK_EXCEPTION((void) func(s2, &as).get(), raft::stopped_error, check_error_message);
}
}
static void test_add_entry_on_aborted_server_aux(const bool enable_forwarding) {
raft::server::configuration config { .enable_forwarding = enable_forwarding };
int val = 0;
auto add_entry = [&val] (raft::server& server, abort_source* as) {
return server.add_entry(create_command(val++), raft::wait_type::committed, as);
};
test_func_on_aborted_server_aux(add_entry, config);
}
static void test_modify_config_on_aborted_server_aux(const bool enable_forwarding) {
raft::server::configuration config { .enable_forwarding = enable_forwarding };
auto modify_config = [] (raft::server& server, abort_source* as) {
return server.modify_config({}, {}, as);
};
test_func_on_aborted_server_aux(modify_config, config);
}
// Reproducers of SCYLLADB-841: After raft::server had been aborted, both
// add_entry and modify_config used to return raft::not_a_leader with
// a null ID when forwarding was disabled.
//
// We verify that that's not the case. Furthermore, we check that
// raft::stopped_error is preferred over raft::request_aborted
// if both exceptions apply. That's a more natural choice.
SEASTAR_THREAD_TEST_CASE(test_add_entry_on_aborted_server_disabled_forwarding) {
test_add_entry_on_aborted_server_aux(false);
}
SEASTAR_THREAD_TEST_CASE(test_add_entry_on_aborted_server_enabled_forwarding) {
test_add_entry_on_aborted_server_aux(true);
}
SEASTAR_THREAD_TEST_CASE(test_modify_config_on_aborted_server_disabled_forwarding) {
test_modify_config_on_aborted_server_aux(false);
}
SEASTAR_THREAD_TEST_CASE(test_modify_config_on_aborted_server_enabled_forwarding) {
test_modify_config_on_aborted_server_aux(true);
}
// A call to raft::server::wait_for_leader should complete with
// raft::stopped_error if the server has been aborted, regardless
// of the state of the passed abort_source.
// Reproducer of SCYLLADB-841.
SEASTAR_THREAD_TEST_CASE(test_wait_for_leader_on_aborted_server) {
test_func_on_aborted_server_aux(&raft::server::wait_for_leader);
}
// A call to raft::server::wait_for_state_change should complete with
// raft::stopped_error if the server has been aborted, regardless
// of the state of the passed abort_source.
// Reproducer of SCYLLADB-841.
SEASTAR_THREAD_TEST_CASE(test_wait_for_state_change_on_aborted_server) {
test_func_on_aborted_server_aux(&raft::server::wait_for_state_change);
}
// Auxiliary function for testing add_entry behavior when a snapshot that
// includes the entry being added is taken before wait_for_entry runs.
//
// Uses a 1-node cluster with aggressive snapshotting and an error injection
// point that pauses add_entry after the entry is added to the log but before
// wait_for_entry checks its status. During the pause, the entry is committed,
// applied, and a snapshot is taken.
//
// If `advance_snapshot_past_entry` is true, a second entry is added so the
// snapshot moves past the first entry's index, fully truncating it from the
// log (term_for returns nullopt). Otherwise the snapshot is taken at the
// entry's index (term_for returns the snapshot's term).
//
// In both cases, wait_for_entry should succeed for both wait types, since
// the snapshot's term matching the entry's term proves the entry was committed
// and included in the snapshot.
static void test_add_entry_load_snapshot_before_wait_aux(raft::wait_type type, bool advance_snapshot_past_entry) {
#ifndef SCYLLA_ENABLE_ERROR_INJECTION
std::cerr << "Skipping test as it depends on error injection. Please run in mode where it's enabled (debug,dev).\n";
return;
#endif
const size_t command_size = sizeof(size_t);
test_case test_config {
.nodes = 1,
.config = std::vector<raft::server::configuration>({
raft::server::configuration {
// Snapshot after every entry; truncate aggressively.
.snapshot_threshold = 1,
.snapshot_threshold_log_size = 1,
.snapshot_trailing = 0,
.snapshot_trailing_size = 0,
.max_log_size = 10 * (command_size + sizeof(raft::log_entry)),
.enable_forwarding = false,
.max_command_size = command_size
}
})
};
// apply_entries must be greater than the number of entries added
// during the test, otherwise the state machine's done promise fires
// prematurely.
auto cluster = raft_cluster<std::chrono::steady_clock>{
std::move(test_config),
::apply_changes,
100, // apply_entries
0,
0, false, tick_delay, rpc_config{}
};
cluster.start_all().get();
auto stop = defer([&cluster] { cluster.stop_all().get(); });
cluster.add_entries(5, 0).get();
// one_shot: only the first add_entry is paused; the second one
// (if used) bypasses the injection.
utils::get_local_injector().enable("block_raft_add_entry_before_wait_for_entry", true);
auto& server = cluster.get_server(0);
auto fut = server.add_entry(create_command(42), type, nullptr);
// Wait for add_entry(42) to reach the injection point.
while (utils::get_local_injector().is_enabled("block_raft_add_entry_before_wait_for_entry")) {
seastar::thread::yield();
}
// Wait for the entry to be applied.
server.read_barrier(nullptr).get();
if (advance_snapshot_past_entry) {
// Add another entry so the snapshot moves past the first entry,
// fully truncating it from the log (term_for returns nullopt).
// The injection is one-shot and already consumed, so this goes through.
server.add_entry(create_command(43), raft::wait_type::applied, nullptr).get();
}
// Take a snapshot, truncating the entry from the log.
server.trigger_snapshot(nullptr).get();
// Unblock wait_for_entry.
utils::get_local_injector().receive_message("block_raft_add_entry_before_wait_for_entry");
// Both wait types should succeed: the snapshot's term matches the entry's
// term, proving the entry was committed and included in the snapshot.
BOOST_CHECK_NO_THROW(fut.get());
}
// Snapshot at the entry's index: term_for(eid.idx) returns the snapshot's term.
// Tests wait_for_entry site where the removed `applied` check used to throw
// commit_status_unknown.
SEASTAR_THREAD_TEST_CASE(test_add_entry_applied_load_snapshot_at_entry) {
test_add_entry_load_snapshot_before_wait_aux(raft::wait_type::applied, false);
}
SEASTAR_THREAD_TEST_CASE(test_add_entry_committed_load_snapshot_at_entry) {
test_add_entry_load_snapshot_before_wait_aux(raft::wait_type::committed, false);
}
// Snapshot past the entry's index: term_for(eid.idx) returns nullopt.
// Tests the `!term` branch in wait_for_entry where `snap_term == eid.term`
// now succeeds for both wait types.
SEASTAR_THREAD_TEST_CASE(test_add_entry_applied_load_snapshot_past_entry) {
test_add_entry_load_snapshot_before_wait_aux(raft::wait_type::applied, true);
}
SEASTAR_THREAD_TEST_CASE(test_add_entry_committed_load_snapshot_past_entry) {
test_add_entry_load_snapshot_before_wait_aux(raft::wait_type::committed, true);
}
// Auxiliary function for testing add_entry behavior when a follower receives
// the entry via a snapshot (load_snapshot) instead of applying it locally.
//
// Setup: 3-node cluster. Node 1 (follower) is blocked from receiving
// messages from the leader (node 0), but can still send to it. Node 1
// forwards add_entry to the leader, which commits the entry (with node 2),
// applies it, and takes a snapshot. When node 1 is reconnected, the leader
// sends a snapshot (since the log entries are truncated). Node 1 loads the
// snapshot via load_snapshot(), which calls drop_waiters(). The pending
// waiter for the forwarded entry is resolved successfully because the
// snapshot's term matches the entry's term.
static void test_add_entry_wait_resolved_via_drop_waiters_aux(raft::wait_type type) {
const size_t command_size = sizeof(size_t);
raft::server::configuration srv_config {
.snapshot_threshold = 1,
.snapshot_threshold_log_size = 1,
.snapshot_trailing = 0,
.snapshot_trailing_size = 0,
.max_log_size = 10 * (command_size + sizeof(raft::log_entry)),
.max_command_size = command_size
};
test_case test_config {
.nodes = 3,
.config = std::vector<raft::server::configuration>({srv_config, srv_config, srv_config})
};
// apply_entries must be greater than the number of entries added
// during the test, otherwise the state machine's done promise fires
// prematurely.
auto cluster = raft_cluster<std::chrono::steady_clock>{
std::move(test_config),
::apply_changes,
100, // apply_entries
0,
0, false, tick_delay, rpc_config{}
};
cluster.start_all().get();
auto stop = defer([&cluster] { cluster.stop_all().get(); });
// Add a few entries so all nodes are caught up.
cluster.add_entries(5, 0).get();
// Block node 1 from receiving messages from node 0 (leader).
// Node 1 can still send to node 0 (forwarding works).
cluster.block_receive(1, 0);
// Node 1 forwards add_entry to node 0. Node 0 commits (with node 2),
// applies, and takes a snapshot. Node 1 registers a waiter but never
// receives the entry via append entries.
auto& follower = cluster.get_server(1);
auto fut = follower.add_entry(create_command(42), type, nullptr);
// Wait for the leader to commit, apply, and snapshot the entry.
auto& leader = cluster.get_server(0);
leader.read_barrier(nullptr).get();
leader.trigger_snapshot(nullptr).get();
// Reconnect node 1. The leader will send a snapshot since the log
// entries are truncated (snapshot_trailing = 0).
cluster.connect_all();
// drop_waiters resolves the waiter successfully since the snapshot's
// term matches the entry's term, proving it was committed.
BOOST_CHECK_NO_THROW(fut.get());
}
SEASTAR_THREAD_TEST_CASE(test_add_entry_applied_wait_resolved_via_drop_waiters) {
test_add_entry_wait_resolved_via_drop_waiters_aux(raft::wait_type::applied);
}
SEASTAR_THREAD_TEST_CASE(test_add_entry_committed_wait_resolved_via_drop_waiters) {
test_add_entry_wait_resolved_via_drop_waiters_aux(raft::wait_type::committed);
}